github.com/weaviate/weaviate@v1.24.6/entities/schema/data_types.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package schema
    13  
    14  import (
    15  	"errors"
    16  	"fmt"
    17  	"strings"
    18  	"unicode"
    19  )
    20  
    21  type DataType string
    22  
    23  const (
    24  	// DataTypeCRef The data type is a cross-reference, it is starting with a capital letter
    25  	DataTypeCRef DataType = "cref"
    26  	// DataTypeText The data type is a value of type string
    27  	DataTypeText DataType = "text"
    28  	// DataTypeInt The data type is a value of type int
    29  	DataTypeInt DataType = "int"
    30  	// DataTypeNumber The data type is a value of type number/float
    31  	DataTypeNumber DataType = "number"
    32  	// DataTypeBoolean The data type is a value of type boolean
    33  	DataTypeBoolean DataType = "boolean"
    34  	// DataTypeDate The data type is a value of type date
    35  	DataTypeDate DataType = "date"
    36  	// DataTypeGeoCoordinates is used to represent geo coordinates, i.e. latitude
    37  	// and longitude pairs of locations on earth
    38  	DataTypeGeoCoordinates DataType = "geoCoordinates"
    39  	// DataTypePhoneNumber represents a parsed/to-be-parsed phone number
    40  	DataTypePhoneNumber DataType = "phoneNumber"
    41  	// DataTypeBlob represents a base64 encoded data
    42  	DataTypeBlob DataType = "blob"
    43  	// DataTypeTextArray The data type is a value of type string array
    44  	DataTypeTextArray DataType = "text[]"
    45  	// DataTypeIntArray The data type is a value of type int array
    46  	DataTypeIntArray DataType = "int[]"
    47  	// DataTypeNumberArray The data type is a value of type number/float array
    48  	DataTypeNumberArray DataType = "number[]"
    49  	// DataTypeBooleanArray The data type is a value of type boolean array
    50  	DataTypeBooleanArray DataType = "boolean[]"
    51  	// DataTypeDateArray The data type is a value of type date array
    52  	DataTypeDateArray DataType = "date[]"
    53  	// DataTypeUUID is a native UUID data type. It is stored in it's raw byte
    54  	// representation and therefore takes up less space than storing a UUID as a
    55  	// string
    56  	DataTypeUUID DataType = "uuid"
    57  	// DataTypeUUIDArray is the array version of DataTypeUUID
    58  	DataTypeUUIDArray DataType = "uuid[]"
    59  
    60  	DataTypeObject      DataType = "object"
    61  	DataTypeObjectArray DataType = "object[]"
    62  
    63  	// deprecated as of v1.19, replaced by DataTypeText + relevant tokenization setting
    64  	// DataTypeString The data type is a value of type string
    65  	DataTypeString DataType = "string"
    66  	// deprecated as of v1.19, replaced by DataTypeTextArray + relevant tokenization setting
    67  	// DataTypeArrayString The data type is a value of type string array
    68  	DataTypeStringArray DataType = "string[]"
    69  )
    70  
    71  func (dt DataType) String() string {
    72  	return string(dt)
    73  }
    74  
    75  func (dt DataType) PropString() []string {
    76  	return []string{dt.String()}
    77  }
    78  
    79  func (dt DataType) AsName() string {
    80  	return strings.ReplaceAll(dt.String(), "[]", "Array")
    81  }
    82  
    83  var PrimitiveDataTypes []DataType = []DataType{
    84  	DataTypeText, DataTypeInt, DataTypeNumber, DataTypeBoolean, DataTypeDate,
    85  	DataTypeGeoCoordinates, DataTypePhoneNumber, DataTypeBlob, DataTypeTextArray,
    86  	DataTypeIntArray, DataTypeNumberArray, DataTypeBooleanArray, DataTypeDateArray,
    87  	DataTypeUUID, DataTypeUUIDArray,
    88  }
    89  
    90  var NestedDataTypes []DataType = []DataType{
    91  	DataTypeObject, DataTypeObjectArray,
    92  }
    93  
    94  var DeprecatedPrimitiveDataTypes []DataType = []DataType{
    95  	// deprecated as of v1.19
    96  	DataTypeString, DataTypeStringArray,
    97  }
    98  
    99  type PropertyKind int
   100  
   101  const (
   102  	PropertyKindPrimitive PropertyKind = 1
   103  	PropertyKindRef       PropertyKind = 2
   104  	PropertyKindNested    PropertyKind = 3
   105  )
   106  
   107  type PropertyDataType interface {
   108  	Kind() PropertyKind
   109  	IsPrimitive() bool
   110  	AsPrimitive() DataType
   111  	IsReference() bool
   112  	Classes() []ClassName
   113  	ContainsClass(name ClassName) bool
   114  	IsNested() bool
   115  	AsNested() DataType
   116  }
   117  
   118  type propertyDataType struct {
   119  	kind          PropertyKind
   120  	primitiveType DataType
   121  	classes       []ClassName
   122  	nestedType    DataType
   123  }
   124  
   125  // IsPropertyLength returns if a string is a filters for property length. They have the form len(*PROPNAME*)
   126  func IsPropertyLength(propName string, offset int) (string, bool) {
   127  	isPropLengthFilter := len(propName) > 4+offset && propName[offset:offset+4] == "len(" && propName[len(propName)-1:] == ")"
   128  
   129  	if isPropLengthFilter {
   130  		return propName[offset+4 : len(propName)-1], isPropLengthFilter
   131  	}
   132  	return "", false
   133  }
   134  
   135  func IsArrayType(dt DataType) (DataType, bool) {
   136  	switch dt {
   137  	case DataTypeStringArray:
   138  		return DataTypeString, true
   139  	case DataTypeTextArray:
   140  		return DataTypeText, true
   141  	case DataTypeNumberArray:
   142  		return DataTypeNumber, true
   143  	case DataTypeIntArray:
   144  		return DataTypeInt, true
   145  	case DataTypeBooleanArray:
   146  		return DataTypeBoolean, true
   147  	case DataTypeDateArray:
   148  		return DataTypeDate, true
   149  	case DataTypeUUIDArray:
   150  		return DataTypeUUID, true
   151  	case DataTypeObjectArray:
   152  		return DataTypeObject, true
   153  	default:
   154  		return "", false
   155  	}
   156  }
   157  
   158  func (p *propertyDataType) Kind() PropertyKind {
   159  	return p.kind
   160  }
   161  
   162  func (p *propertyDataType) IsPrimitive() bool {
   163  	return p.kind == PropertyKindPrimitive
   164  }
   165  
   166  func (p *propertyDataType) AsPrimitive() DataType {
   167  	if !p.IsPrimitive() {
   168  		panic("not primitive type")
   169  	}
   170  
   171  	return p.primitiveType
   172  }
   173  
   174  func (p *propertyDataType) IsReference() bool {
   175  	return p.kind == PropertyKindRef
   176  }
   177  
   178  func (p *propertyDataType) Classes() []ClassName {
   179  	if !p.IsReference() {
   180  		panic("not MultipleRef type")
   181  	}
   182  
   183  	return p.classes
   184  }
   185  
   186  func (p *propertyDataType) ContainsClass(needle ClassName) bool {
   187  	if !p.IsReference() {
   188  		panic("not MultipleRef type")
   189  	}
   190  
   191  	for _, class := range p.classes {
   192  		if class == needle {
   193  			return true
   194  		}
   195  	}
   196  
   197  	return false
   198  }
   199  
   200  func (p *propertyDataType) IsNested() bool {
   201  	return p.kind == PropertyKindNested
   202  }
   203  
   204  func (p *propertyDataType) AsNested() DataType {
   205  	if !p.IsNested() {
   206  		panic("not nested type")
   207  	}
   208  	return p.nestedType
   209  }
   210  
   211  // Based on the schema, return a valid description of the defined datatype
   212  //
   213  // Note that this function will error if referenced classes do not exist. If
   214  // you don't want such validation, use [Schema.FindPropertyDataTypeRelaxedRefs]
   215  // instead and set relax to true
   216  func (s *Schema) FindPropertyDataType(dataType []string) (PropertyDataType, error) {
   217  	return s.FindPropertyDataTypeWithRefs(dataType, false, "")
   218  }
   219  
   220  // Based on the schema, return a valid description of the defined datatype
   221  // If relaxCrossRefValidation is set, there is no check if the referenced class
   222  // exists in the schema. This can be helpful in scenarios, such as restoring
   223  // from a backup where we have no guarantee over the order of class creation.
   224  // If belongingToClass is set and equal to referenced class, check whether class
   225  // exists in the schema is skipped. This is done to allow creating class schema with
   226  // properties referencing to itself. Previously such properties had to be created separately
   227  // only after creation of class schema
   228  func (s *Schema) FindPropertyDataTypeWithRefs(
   229  	dataType []string, relaxCrossRefValidation bool, beloningToClass ClassName,
   230  ) (PropertyDataType, error) {
   231  	if len(dataType) < 1 {
   232  		return nil, errors.New("dataType must have at least one element")
   233  	}
   234  	if len(dataType) == 1 {
   235  		for _, dt := range append(PrimitiveDataTypes, DeprecatedPrimitiveDataTypes...) {
   236  			if dataType[0] == dt.String() {
   237  				return &propertyDataType{
   238  					kind:          PropertyKindPrimitive,
   239  					primitiveType: dt,
   240  				}, nil
   241  			}
   242  		}
   243  		for _, dt := range NestedDataTypes {
   244  			if dataType[0] == dt.String() {
   245  				return &propertyDataType{
   246  					kind:       PropertyKindNested,
   247  					nestedType: dt,
   248  				}, nil
   249  			}
   250  		}
   251  		if len(dataType[0]) == 0 {
   252  			return nil, fmt.Errorf("dataType cannot be an empty string")
   253  		}
   254  		firstLetter := rune(dataType[0][0])
   255  		if unicode.IsLower(firstLetter) {
   256  			return nil, fmt.Errorf("Unknown primitive data type '%s'", dataType[0])
   257  		}
   258  	}
   259  	/* implies len(dataType) > 1, or first element is a class already */
   260  	var classes []ClassName
   261  
   262  	for _, someDataType := range dataType {
   263  		className, err := ValidateClassName(someDataType)
   264  		if err != nil {
   265  			return nil, err
   266  		}
   267  
   268  		if beloningToClass != className && !relaxCrossRefValidation {
   269  			if s.FindClassByName(className) == nil {
   270  				return nil, ErrRefToNonexistentClass
   271  			}
   272  		}
   273  
   274  		classes = append(classes, className)
   275  	}
   276  
   277  	return &propertyDataType{
   278  		kind:    PropertyKindRef,
   279  		classes: classes,
   280  	}, nil
   281  }
   282  
   283  func AsPrimitive(dataType []string) (DataType, bool) {
   284  	if len(dataType) == 1 {
   285  		for _, dt := range append(PrimitiveDataTypes, DeprecatedPrimitiveDataTypes...) {
   286  			if dataType[0] == dt.String() {
   287  				return dt, true
   288  			}
   289  		}
   290  		if len(dataType[0]) == 0 {
   291  			return "", true
   292  		}
   293  	}
   294  	return "", false
   295  }
   296  
   297  func AsNested(dataType []string) (DataType, bool) {
   298  	if len(dataType) == 1 {
   299  		for _, dt := range NestedDataTypes {
   300  			if dataType[0] == dt.String() {
   301  				return dt, true
   302  			}
   303  		}
   304  	}
   305  	return "", false
   306  }
   307  
   308  func IsNested(dataType DataType) bool {
   309  	for _, dt := range NestedDataTypes {
   310  		if dt == dataType {
   311  			return true
   312  		}
   313  	}
   314  	return false
   315  }