github.com/weaviate/weaviate@v1.24.6/entities/schema/data_types.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package schema 13 14 import ( 15 "errors" 16 "fmt" 17 "strings" 18 "unicode" 19 ) 20 21 type DataType string 22 23 const ( 24 // DataTypeCRef The data type is a cross-reference, it is starting with a capital letter 25 DataTypeCRef DataType = "cref" 26 // DataTypeText The data type is a value of type string 27 DataTypeText DataType = "text" 28 // DataTypeInt The data type is a value of type int 29 DataTypeInt DataType = "int" 30 // DataTypeNumber The data type is a value of type number/float 31 DataTypeNumber DataType = "number" 32 // DataTypeBoolean The data type is a value of type boolean 33 DataTypeBoolean DataType = "boolean" 34 // DataTypeDate The data type is a value of type date 35 DataTypeDate DataType = "date" 36 // DataTypeGeoCoordinates is used to represent geo coordinates, i.e. latitude 37 // and longitude pairs of locations on earth 38 DataTypeGeoCoordinates DataType = "geoCoordinates" 39 // DataTypePhoneNumber represents a parsed/to-be-parsed phone number 40 DataTypePhoneNumber DataType = "phoneNumber" 41 // DataTypeBlob represents a base64 encoded data 42 DataTypeBlob DataType = "blob" 43 // DataTypeTextArray The data type is a value of type string array 44 DataTypeTextArray DataType = "text[]" 45 // DataTypeIntArray The data type is a value of type int array 46 DataTypeIntArray DataType = "int[]" 47 // DataTypeNumberArray The data type is a value of type number/float array 48 DataTypeNumberArray DataType = "number[]" 49 // DataTypeBooleanArray The data type is a value of type boolean array 50 DataTypeBooleanArray DataType = "boolean[]" 51 // DataTypeDateArray The data type is a value of type date array 52 DataTypeDateArray DataType = "date[]" 53 // DataTypeUUID is a native UUID data type. It is stored in it's raw byte 54 // representation and therefore takes up less space than storing a UUID as a 55 // string 56 DataTypeUUID DataType = "uuid" 57 // DataTypeUUIDArray is the array version of DataTypeUUID 58 DataTypeUUIDArray DataType = "uuid[]" 59 60 DataTypeObject DataType = "object" 61 DataTypeObjectArray DataType = "object[]" 62 63 // deprecated as of v1.19, replaced by DataTypeText + relevant tokenization setting 64 // DataTypeString The data type is a value of type string 65 DataTypeString DataType = "string" 66 // deprecated as of v1.19, replaced by DataTypeTextArray + relevant tokenization setting 67 // DataTypeArrayString The data type is a value of type string array 68 DataTypeStringArray DataType = "string[]" 69 ) 70 71 func (dt DataType) String() string { 72 return string(dt) 73 } 74 75 func (dt DataType) PropString() []string { 76 return []string{dt.String()} 77 } 78 79 func (dt DataType) AsName() string { 80 return strings.ReplaceAll(dt.String(), "[]", "Array") 81 } 82 83 var PrimitiveDataTypes []DataType = []DataType{ 84 DataTypeText, DataTypeInt, DataTypeNumber, DataTypeBoolean, DataTypeDate, 85 DataTypeGeoCoordinates, DataTypePhoneNumber, DataTypeBlob, DataTypeTextArray, 86 DataTypeIntArray, DataTypeNumberArray, DataTypeBooleanArray, DataTypeDateArray, 87 DataTypeUUID, DataTypeUUIDArray, 88 } 89 90 var NestedDataTypes []DataType = []DataType{ 91 DataTypeObject, DataTypeObjectArray, 92 } 93 94 var DeprecatedPrimitiveDataTypes []DataType = []DataType{ 95 // deprecated as of v1.19 96 DataTypeString, DataTypeStringArray, 97 } 98 99 type PropertyKind int 100 101 const ( 102 PropertyKindPrimitive PropertyKind = 1 103 PropertyKindRef PropertyKind = 2 104 PropertyKindNested PropertyKind = 3 105 ) 106 107 type PropertyDataType interface { 108 Kind() PropertyKind 109 IsPrimitive() bool 110 AsPrimitive() DataType 111 IsReference() bool 112 Classes() []ClassName 113 ContainsClass(name ClassName) bool 114 IsNested() bool 115 AsNested() DataType 116 } 117 118 type propertyDataType struct { 119 kind PropertyKind 120 primitiveType DataType 121 classes []ClassName 122 nestedType DataType 123 } 124 125 // IsPropertyLength returns if a string is a filters for property length. They have the form len(*PROPNAME*) 126 func IsPropertyLength(propName string, offset int) (string, bool) { 127 isPropLengthFilter := len(propName) > 4+offset && propName[offset:offset+4] == "len(" && propName[len(propName)-1:] == ")" 128 129 if isPropLengthFilter { 130 return propName[offset+4 : len(propName)-1], isPropLengthFilter 131 } 132 return "", false 133 } 134 135 func IsArrayType(dt DataType) (DataType, bool) { 136 switch dt { 137 case DataTypeStringArray: 138 return DataTypeString, true 139 case DataTypeTextArray: 140 return DataTypeText, true 141 case DataTypeNumberArray: 142 return DataTypeNumber, true 143 case DataTypeIntArray: 144 return DataTypeInt, true 145 case DataTypeBooleanArray: 146 return DataTypeBoolean, true 147 case DataTypeDateArray: 148 return DataTypeDate, true 149 case DataTypeUUIDArray: 150 return DataTypeUUID, true 151 case DataTypeObjectArray: 152 return DataTypeObject, true 153 default: 154 return "", false 155 } 156 } 157 158 func (p *propertyDataType) Kind() PropertyKind { 159 return p.kind 160 } 161 162 func (p *propertyDataType) IsPrimitive() bool { 163 return p.kind == PropertyKindPrimitive 164 } 165 166 func (p *propertyDataType) AsPrimitive() DataType { 167 if !p.IsPrimitive() { 168 panic("not primitive type") 169 } 170 171 return p.primitiveType 172 } 173 174 func (p *propertyDataType) IsReference() bool { 175 return p.kind == PropertyKindRef 176 } 177 178 func (p *propertyDataType) Classes() []ClassName { 179 if !p.IsReference() { 180 panic("not MultipleRef type") 181 } 182 183 return p.classes 184 } 185 186 func (p *propertyDataType) ContainsClass(needle ClassName) bool { 187 if !p.IsReference() { 188 panic("not MultipleRef type") 189 } 190 191 for _, class := range p.classes { 192 if class == needle { 193 return true 194 } 195 } 196 197 return false 198 } 199 200 func (p *propertyDataType) IsNested() bool { 201 return p.kind == PropertyKindNested 202 } 203 204 func (p *propertyDataType) AsNested() DataType { 205 if !p.IsNested() { 206 panic("not nested type") 207 } 208 return p.nestedType 209 } 210 211 // Based on the schema, return a valid description of the defined datatype 212 // 213 // Note that this function will error if referenced classes do not exist. If 214 // you don't want such validation, use [Schema.FindPropertyDataTypeRelaxedRefs] 215 // instead and set relax to true 216 func (s *Schema) FindPropertyDataType(dataType []string) (PropertyDataType, error) { 217 return s.FindPropertyDataTypeWithRefs(dataType, false, "") 218 } 219 220 // Based on the schema, return a valid description of the defined datatype 221 // If relaxCrossRefValidation is set, there is no check if the referenced class 222 // exists in the schema. This can be helpful in scenarios, such as restoring 223 // from a backup where we have no guarantee over the order of class creation. 224 // If belongingToClass is set and equal to referenced class, check whether class 225 // exists in the schema is skipped. This is done to allow creating class schema with 226 // properties referencing to itself. Previously such properties had to be created separately 227 // only after creation of class schema 228 func (s *Schema) FindPropertyDataTypeWithRefs( 229 dataType []string, relaxCrossRefValidation bool, beloningToClass ClassName, 230 ) (PropertyDataType, error) { 231 if len(dataType) < 1 { 232 return nil, errors.New("dataType must have at least one element") 233 } 234 if len(dataType) == 1 { 235 for _, dt := range append(PrimitiveDataTypes, DeprecatedPrimitiveDataTypes...) { 236 if dataType[0] == dt.String() { 237 return &propertyDataType{ 238 kind: PropertyKindPrimitive, 239 primitiveType: dt, 240 }, nil 241 } 242 } 243 for _, dt := range NestedDataTypes { 244 if dataType[0] == dt.String() { 245 return &propertyDataType{ 246 kind: PropertyKindNested, 247 nestedType: dt, 248 }, nil 249 } 250 } 251 if len(dataType[0]) == 0 { 252 return nil, fmt.Errorf("dataType cannot be an empty string") 253 } 254 firstLetter := rune(dataType[0][0]) 255 if unicode.IsLower(firstLetter) { 256 return nil, fmt.Errorf("Unknown primitive data type '%s'", dataType[0]) 257 } 258 } 259 /* implies len(dataType) > 1, or first element is a class already */ 260 var classes []ClassName 261 262 for _, someDataType := range dataType { 263 className, err := ValidateClassName(someDataType) 264 if err != nil { 265 return nil, err 266 } 267 268 if beloningToClass != className && !relaxCrossRefValidation { 269 if s.FindClassByName(className) == nil { 270 return nil, ErrRefToNonexistentClass 271 } 272 } 273 274 classes = append(classes, className) 275 } 276 277 return &propertyDataType{ 278 kind: PropertyKindRef, 279 classes: classes, 280 }, nil 281 } 282 283 func AsPrimitive(dataType []string) (DataType, bool) { 284 if len(dataType) == 1 { 285 for _, dt := range append(PrimitiveDataTypes, DeprecatedPrimitiveDataTypes...) { 286 if dataType[0] == dt.String() { 287 return dt, true 288 } 289 } 290 if len(dataType[0]) == 0 { 291 return "", true 292 } 293 } 294 return "", false 295 } 296 297 func AsNested(dataType []string) (DataType, bool) { 298 if len(dataType) == 1 { 299 for _, dt := range NestedDataTypes { 300 if dataType[0] == dt.String() { 301 return dt, true 302 } 303 } 304 } 305 return "", false 306 } 307 308 func IsNested(dataType DataType) bool { 309 for _, dt := range NestedDataTypes { 310 if dt == dataType { 311 return true 312 } 313 } 314 return false 315 }