github.com/weaviate/weaviate@v1.24.6/usecases/schema/validation.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package schema 13 14 import ( 15 "context" 16 "fmt" 17 "strings" 18 19 "github.com/pkg/errors" 20 "github.com/weaviate/weaviate/entities/models" 21 "github.com/weaviate/weaviate/entities/schema" 22 "github.com/weaviate/weaviate/usecases/config" 23 ) 24 25 func (m *Manager) validateClassNameUniqueness(name string) error { 26 pred := func(c *models.Class) bool { 27 return strings.EqualFold(name, c.Class) 28 } 29 existingName := "" 30 m.schemaCache.RLockGuard(func() error { 31 if cls := m.schemaCache.unsafeFindClassIf(pred); cls != nil { 32 existingName = cls.Class 33 } 34 return nil 35 }) 36 37 if existingName == "" { 38 return nil 39 } 40 if name != existingName { 41 // It's a permutation 42 return fmt.Errorf( 43 "class name %q already exists as a permutation of: %q. class names must be unique when lowercased", 44 name, existingName) 45 } 46 return fmt.Errorf("class name %q already exists", name) 47 } 48 49 // Check that the format of the name is correct 50 func (m *Manager) validateClassName(ctx context.Context, className string) error { 51 _, err := schema.ValidateClassName(className) 52 return err 53 } 54 55 func (m *Manager) validatePropertyTokenization(tokenization string, propertyDataType schema.PropertyDataType) error { 56 if propertyDataType.IsPrimitive() { 57 primitiveDataType := propertyDataType.AsPrimitive() 58 59 switch primitiveDataType { 60 case schema.DataTypeString, schema.DataTypeStringArray: 61 // deprecated as of v1.19, will be migrated to DataTypeText/DataTypeTextArray 62 switch tokenization { 63 case models.PropertyTokenizationField, models.PropertyTokenizationWord: 64 return nil 65 } 66 case schema.DataTypeText, schema.DataTypeTextArray: 67 switch tokenization { 68 case models.PropertyTokenizationField, models.PropertyTokenizationWord, 69 models.PropertyTokenizationWhitespace, models.PropertyTokenizationLowercase, models.PropertyTokenizationTrigram, models.PropertyTokenizationGse: 70 return nil 71 } 72 default: 73 if tokenization == "" { 74 return nil 75 } 76 return fmt.Errorf("Tokenization is not allowed for data type '%s'", primitiveDataType) 77 } 78 return fmt.Errorf("Tokenization '%s' is not allowed for data type '%s'", tokenization, primitiveDataType) 79 } 80 81 if tokenization == "" { 82 return nil 83 } 84 85 if propertyDataType.IsNested() { 86 return fmt.Errorf("Tokenization is not allowed for object/object[] data types") 87 } 88 return fmt.Errorf("Tokenization is not allowed for reference data type") 89 } 90 91 func (m *Manager) validatePropertyIndexing(prop *models.Property) error { 92 if prop.IndexInverted != nil { 93 if prop.IndexFilterable != nil || prop.IndexSearchable != nil { 94 return fmt.Errorf("`indexInverted` is deprecated and can not be set together with `indexFilterable` or `indexSearchable`") 95 } 96 } 97 98 primitiveDataType, isPrimitive := schema.AsPrimitive(prop.DataType) 99 100 // TODO nested - should not be allowed for blobs (verify backward compat) 101 // if prop.IndexFilterable != nil { 102 // if isPrimitive && primitiveDataType == schema.DataTypeBlob { 103 // return fmt.Errorf("`indexFilterable` is not allowed for blob data type") 104 // } 105 // } 106 107 if prop.IndexSearchable != nil { 108 validateSet := true 109 if isPrimitive { 110 switch primitiveDataType { 111 case schema.DataTypeString, schema.DataTypeStringArray: 112 // string/string[] are migrated to text/text[] later, 113 // at this point they are still valid data types, therefore should be handled here 114 // true or false allowed 115 validateSet = false 116 case schema.DataTypeText, schema.DataTypeTextArray: 117 // true or false allowed 118 validateSet = false 119 default: 120 // do nothing 121 } 122 } 123 124 if validateSet && *prop.IndexSearchable { 125 return fmt.Errorf("`indexSearchable` is not allowed for other than text/text[] data types") 126 } 127 } 128 129 return nil 130 } 131 132 type validatorNestedProperty func(property *models.NestedProperty, 133 primitiveDataType, nestedDataType schema.DataType, 134 isPrimitive, isNested bool, propNamePrefix string) error 135 136 var validatorsNestedProperty = []validatorNestedProperty{ 137 validateNestedPropertyName, 138 validateNestedPropertyDataType, 139 validateNestedPropertyTokenization, 140 validateNestedPropertyIndexFilterable, 141 validateNestedPropertyIndexSearchable, 142 } 143 144 func validateNestedProperties(properties []*models.NestedProperty, propNamePrefix string) error { 145 if len(properties) == 0 { 146 return fmt.Errorf("Property '%s': At least one nested property is required for data type object/object[]", 147 propNamePrefix) 148 } 149 150 for _, property := range properties { 151 primitiveDataType, isPrimitive := schema.AsPrimitive(property.DataType) 152 nestedDataType, isNested := schema.AsNested(property.DataType) 153 154 for _, validator := range validatorsNestedProperty { 155 if err := validator(property, primitiveDataType, nestedDataType, isPrimitive, isNested, propNamePrefix); err != nil { 156 return err 157 } 158 } 159 if isNested { 160 if err := validateNestedProperties(property.NestedProperties, propNamePrefix+"."+property.Name); err != nil { 161 return err 162 } 163 } 164 } 165 return nil 166 } 167 168 func validateNestedPropertyName(property *models.NestedProperty, 169 _, _ schema.DataType, 170 _, _ bool, propNamePrefix string, 171 ) error { 172 return schema.ValidateNestedPropertyName(property.Name, propNamePrefix) 173 } 174 175 func validateNestedPropertyDataType(property *models.NestedProperty, 176 primitiveDataType, _ schema.DataType, 177 isPrimitive, isNested bool, propNamePrefix string, 178 ) error { 179 propName := propNamePrefix + "." + property.Name 180 181 if isPrimitive { 182 // DataTypeString and DataTypeStringArray as deprecated since 1.19 are not allowed 183 switch primitiveDataType { 184 case schema.DataTypeString, schema.DataTypeStringArray: 185 return fmt.Errorf("Property '%s': data type '%s' is deprecated and not allowed as nested property", propName, primitiveDataType) 186 case schema.DataTypeGeoCoordinates, schema.DataTypePhoneNumber: 187 return fmt.Errorf("Property '%s': data type '%s' not allowed as nested property", propName, primitiveDataType) 188 default: 189 // do nothing 190 } 191 return nil 192 } 193 if isNested { 194 return nil 195 } 196 return fmt.Errorf("Property '%s': reference data type not allowed", propName) 197 } 198 199 // Tokenization allowed only for text/text[] data types 200 func validateNestedPropertyTokenization(property *models.NestedProperty, 201 primitiveDataType, _ schema.DataType, 202 isPrimitive, isNested bool, propNamePrefix string, 203 ) error { 204 propName := propNamePrefix + "." + property.Name 205 206 if isPrimitive { 207 switch primitiveDataType { 208 case schema.DataTypeText, schema.DataTypeTextArray: 209 switch property.Tokenization { 210 case models.PropertyTokenizationField, models.PropertyTokenizationWord, 211 models.PropertyTokenizationWhitespace, models.PropertyTokenizationLowercase: 212 return nil 213 } 214 return fmt.Errorf("Property '%s': Tokenization '%s' is not allowed for data type '%s'", 215 propName, property.Tokenization, primitiveDataType) 216 default: 217 if property.Tokenization == "" { 218 return nil 219 } 220 return fmt.Errorf("Property '%s': Tokenization is not allowed for data type '%s'", 221 propName, primitiveDataType) 222 } 223 } 224 if property.Tokenization == "" { 225 return nil 226 } 227 if isNested { 228 return fmt.Errorf("Property '%s': Tokenization is not allowed for object/object[] data types", propName) 229 } 230 return fmt.Errorf("Property '%s': Tokenization is not allowed for reference data type", propName) 231 } 232 233 // indexFilterable allowed for primitive & ref data types 234 func validateNestedPropertyIndexFilterable(property *models.NestedProperty, 235 primitiveDataType, _ schema.DataType, 236 isPrimitive, _ bool, propNamePrefix string, 237 ) error { 238 propName := propNamePrefix + "." + property.Name 239 240 // at this point indexSearchable should be set (either by user or by defaults) 241 if property.IndexFilterable == nil { 242 return fmt.Errorf("Property '%s': `indexFilterable` not set", propName) 243 } 244 245 if isPrimitive && primitiveDataType == schema.DataTypeBlob { 246 if *property.IndexFilterable { 247 return fmt.Errorf("Property: '%s': indexFilterable is not allowed for blob data type", 248 propName) 249 } 250 } 251 252 return nil 253 } 254 255 // indexSearchable allowed for text/text[] data types 256 func validateNestedPropertyIndexSearchable(property *models.NestedProperty, 257 primitiveDataType, _ schema.DataType, 258 isPrimitive, _ bool, propNamePrefix string, 259 ) error { 260 propName := propNamePrefix + "." + property.Name 261 262 // at this point indexSearchable should be set (either by user or by defaults) 263 if property.IndexSearchable == nil { 264 return fmt.Errorf("Property '%s': `indexSearchable` not set", propName) 265 } 266 267 if isPrimitive { 268 switch primitiveDataType { 269 case schema.DataTypeText, schema.DataTypeTextArray: 270 return nil 271 default: 272 // do nothing 273 } 274 } 275 if *property.IndexSearchable { 276 return fmt.Errorf("Property '%s': `indexSearchable` is not allowed for other than text/text[] data types", 277 propName) 278 } 279 280 return nil 281 } 282 283 func (m *Manager) validateVectorSettings(class *models.Class) error { 284 if !hasTargetVectors(class) { 285 if err := m.validateVectorizer(class.Vectorizer); err != nil { 286 return err 287 } 288 if err := m.validateVectorIndexType(class.VectorIndexType); err != nil { 289 return err 290 } 291 return nil 292 } 293 294 if class.Vectorizer != "" { 295 return fmt.Errorf("class.vectorizer %q can not be set if class.vectorConfig is configured", class.Vectorizer) 296 } 297 if class.VectorIndexType != "" { 298 return fmt.Errorf("class.vectorIndexType %q can not be set if class.vectorConfig is configured", class.VectorIndexType) 299 } 300 301 for name, cfg := range class.VectorConfig { 302 // check only if vectorizer correctly configured (map with single key being vectorizer name) 303 // other cases are handled in module config validation 304 if vm, ok := cfg.Vectorizer.(map[string]interface{}); ok && len(vm) == 1 { 305 for vectorizer := range vm { 306 if err := m.validateVectorizer(vectorizer); err != nil { 307 return fmt.Errorf("target vector %q: %w", name, err) 308 } 309 } 310 } 311 if err := m.validateVectorIndexType(cfg.VectorIndexType); err != nil { 312 return fmt.Errorf("target vector %q: %w", name, err) 313 } 314 } 315 return nil 316 } 317 318 func (m *Manager) validateVectorizer(vectorizer string) error { 319 if vectorizer == config.VectorizerModuleNone { 320 return nil 321 } 322 323 if err := m.vectorizerValidator.ValidateVectorizer(vectorizer); err != nil { 324 return errors.Wrap(err, "vectorizer") 325 } 326 327 return nil 328 } 329 330 func (m *Manager) validateVectorIndexType(vectorIndexType string) error { 331 switch vectorIndexType { 332 case "hnsw", "flat": 333 return nil 334 default: 335 return errors.Errorf("unrecognized or unsupported vectorIndexType %q", 336 vectorIndexType) 337 } 338 }