github.com/weaviate/weaviate@v1.24.6/usecases/objects/auto_schema.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package objects 13 14 import ( 15 "context" 16 "encoding/json" 17 "fmt" 18 "strings" 19 "sync" 20 "time" 21 22 "github.com/google/uuid" 23 "github.com/sirupsen/logrus" 24 "github.com/weaviate/weaviate/entities/additional" 25 "github.com/weaviate/weaviate/entities/models" 26 "github.com/weaviate/weaviate/entities/schema" 27 "github.com/weaviate/weaviate/entities/schema/crossref" 28 "github.com/weaviate/weaviate/entities/search" 29 "github.com/weaviate/weaviate/usecases/config" 30 "github.com/weaviate/weaviate/usecases/objects/validation" 31 ) 32 33 type autoSchemaManager struct { 34 mutex sync.RWMutex 35 schemaManager schemaManager 36 vectorRepo VectorRepo 37 config config.AutoSchema 38 logger logrus.FieldLogger 39 } 40 41 func newAutoSchemaManager(schemaManager schemaManager, vectorRepo VectorRepo, 42 config *config.WeaviateConfig, logger logrus.FieldLogger, 43 ) *autoSchemaManager { 44 return &autoSchemaManager{ 45 schemaManager: schemaManager, 46 vectorRepo: vectorRepo, 47 config: config.Config.AutoSchema, 48 logger: logger, 49 } 50 } 51 52 func (m *autoSchemaManager) autoSchema(ctx context.Context, principal *models.Principal, 53 object *models.Object, allowCreateClass bool, 54 ) error { 55 if m.config.Enabled { 56 return m.performAutoSchema(ctx, principal, object, allowCreateClass) 57 } 58 return nil 59 } 60 61 func (m *autoSchemaManager) performAutoSchema(ctx context.Context, principal *models.Principal, 62 object *models.Object, allowCreateClass bool, 63 ) error { 64 m.mutex.Lock() 65 defer m.mutex.Unlock() 66 if object == nil { 67 return fmt.Errorf(validation.ErrorMissingObject) 68 } 69 70 if len(object.Class) == 0 { 71 // stop performing auto schema 72 return fmt.Errorf(validation.ErrorMissingClass) 73 } 74 75 object.Class = schema.UppercaseClassName(object.Class) 76 77 schemaClass, err := m.getClass(principal, object) 78 if err != nil { 79 return err 80 } 81 if schemaClass == nil && !allowCreateClass { 82 return fmt.Errorf("given class does not exist") 83 } 84 properties, err := m.getProperties(object) 85 if err != nil { 86 return err 87 } 88 if schemaClass == nil { 89 return m.createClass(ctx, principal, object.Class, properties) 90 } 91 return m.updateClass(ctx, principal, object.Class, properties, schemaClass.Properties) 92 } 93 94 func (m *autoSchemaManager) getClass(principal *models.Principal, 95 object *models.Object, 96 ) (*models.Class, error) { 97 s, err := m.schemaManager.GetSchema(principal) 98 if err != nil { 99 return nil, err 100 } 101 schemaClass := s.GetClass(schema.ClassName(object.Class)) 102 return schemaClass, nil 103 } 104 105 func (m *autoSchemaManager) createClass(ctx context.Context, principal *models.Principal, 106 className string, properties []*models.Property, 107 ) error { 108 now := time.Now() 109 class := &models.Class{ 110 Class: className, 111 Properties: properties, 112 Description: "This property was generated by Weaviate's auto-schema feature on " + now.Format(time.ANSIC), 113 } 114 m.logger. 115 WithField("auto_schema", "createClass"). 116 Debugf("create class %s", className) 117 return m.schemaManager.AddClass(ctx, principal, class) 118 } 119 120 func (m *autoSchemaManager) updateClass(ctx context.Context, principal *models.Principal, 121 className string, properties []*models.Property, existingProperties []*models.Property, 122 ) error { 123 existingPropertiesIndexMap := map[string]int{} 124 for index := range existingProperties { 125 existingPropertiesIndexMap[existingProperties[index].Name] = index 126 } 127 128 propertiesToAdd := []*models.Property{} 129 propertiesToUpdate := []*models.Property{} 130 for _, prop := range properties { 131 index, exists := existingPropertiesIndexMap[schema.LowercaseFirstLetter(prop.Name)] 132 if !exists { 133 propertiesToAdd = append(propertiesToAdd, prop) 134 } else if _, isNested := schema.AsNested(existingProperties[index].DataType); isNested { 135 mergedNestedProperties, merged := schema.MergeRecursivelyNestedProperties(existingProperties[index].NestedProperties, 136 prop.NestedProperties) 137 if merged { 138 prop.NestedProperties = mergedNestedProperties 139 propertiesToUpdate = append(propertiesToUpdate, prop) 140 } 141 } 142 } 143 for _, newProp := range propertiesToAdd { 144 m.logger. 145 WithField("auto_schema", "updateClass"). 146 Debugf("update class %s add property %s", className, newProp.Name) 147 err := m.schemaManager.AddClassProperty(ctx, principal, className, newProp) 148 if err != nil { 149 return err 150 } 151 } 152 for _, updatedProp := range propertiesToUpdate { 153 m.logger. 154 WithField("auto_schema", "updateClass"). 155 Debugf("update class %s merge object property %s", className, updatedProp.Name) 156 err := m.schemaManager.MergeClassObjectProperty(ctx, principal, className, updatedProp) 157 if err != nil { 158 return err 159 } 160 } 161 return nil 162 } 163 164 func (m *autoSchemaManager) getProperties(object *models.Object) ([]*models.Property, error) { 165 properties := []*models.Property{} 166 if props, ok := object.Properties.(map[string]interface{}); ok { 167 for name, value := range props { 168 now := time.Now() 169 dt, err := m.determineType(value, false) 170 if err != nil { 171 return nil, fmt.Errorf("property '%s' on class '%s': %w", name, object.Class, err) 172 } 173 174 var nestedProperties []*models.NestedProperty 175 if len(dt) == 1 { 176 switch dt[0] { 177 case schema.DataTypeObject: 178 nestedProperties, err = m.determineNestedProperties(value.(map[string]interface{}), now) 179 case schema.DataTypeObjectArray: 180 nestedProperties, err = m.determineNestedPropertiesOfArray(value.([]interface{}), now) 181 default: 182 // do nothing 183 } 184 } 185 if err != nil { 186 return nil, fmt.Errorf("property '%s' on class '%s': %w", name, object.Class, err) 187 } 188 189 property := &models.Property{ 190 Name: name, 191 DataType: m.getDataTypes(dt), 192 Description: "This property was generated by Weaviate's auto-schema feature on " + now.Format(time.ANSIC), 193 NestedProperties: nestedProperties, 194 } 195 properties = append(properties, property) 196 } 197 } 198 return properties, nil 199 } 200 201 func (m *autoSchemaManager) getDataTypes(dataTypes []schema.DataType) []string { 202 dtypes := make([]string, len(dataTypes)) 203 for i := range dataTypes { 204 dtypes[i] = string(dataTypes[i]) 205 } 206 return dtypes 207 } 208 209 func (m *autoSchemaManager) determineType(value interface{}, ofNestedProp bool) ([]schema.DataType, error) { 210 fallbackDataType := []schema.DataType{schema.DataTypeText} 211 fallbackArrayDataType := []schema.DataType{schema.DataTypeTextArray} 212 213 switch typedValue := value.(type) { 214 case string: 215 if _, err := time.Parse(time.RFC3339, typedValue); err == nil { 216 return []schema.DataType{schema.DataType(m.config.DefaultDate)}, nil 217 } 218 if _, err := uuid.Parse(typedValue); err == nil { 219 return []schema.DataType{schema.DataTypeUUID}, nil 220 } 221 if m.config.DefaultString != "" { 222 return []schema.DataType{schema.DataType(m.config.DefaultString)}, nil 223 } 224 return []schema.DataType{schema.DataTypeText}, nil 225 case json.Number: 226 return []schema.DataType{schema.DataType(m.config.DefaultNumber)}, nil 227 case float64: 228 return []schema.DataType{schema.DataTypeNumber}, nil 229 case int64: 230 return []schema.DataType{schema.DataTypeInt}, nil 231 case bool: 232 return []schema.DataType{schema.DataTypeBoolean}, nil 233 case map[string]interface{}: 234 // nested properties does not support phone and geo data types 235 if !ofNestedProp { 236 if dt, ok := m.asGeoCoordinatesType(typedValue); ok { 237 return dt, nil 238 } 239 if dt, ok := m.asPhoneNumber(typedValue); ok { 240 return dt, nil 241 } 242 } 243 return []schema.DataType{schema.DataTypeObject}, nil 244 case []interface{}: 245 if len(typedValue) == 0 { 246 return fallbackArrayDataType, nil 247 } 248 249 refDataTypes := []schema.DataType{} 250 var isRef bool 251 var determinedDataType schema.DataType 252 253 for i := range typedValue { 254 dataType, refDataType, err := m.determineArrayType(typedValue[i], ofNestedProp) 255 if err != nil { 256 return nil, fmt.Errorf("element [%d]: %w", i, err) 257 } 258 if i == 0 { 259 isRef = refDataType != "" 260 determinedDataType = dataType 261 } 262 if dataType != "" { 263 if isRef { 264 return nil, fmt.Errorf("element [%d]: mismatched data type - reference expected, got '%s'", 265 i, asSingleDataType(dataType)) 266 } 267 if dataType != determinedDataType { 268 return nil, fmt.Errorf("element [%d]: mismatched data type - '%s' expected, got '%s'", 269 i, asSingleDataType(determinedDataType), asSingleDataType(dataType)) 270 } 271 } else { 272 if !isRef { 273 return nil, fmt.Errorf("element [%d]: mismatched data type - '%s' expected, got reference", 274 i, asSingleDataType(determinedDataType)) 275 } 276 refDataTypes = append(refDataTypes, refDataType) 277 } 278 } 279 if len(refDataTypes) > 0 { 280 return refDataTypes, nil 281 } 282 return []schema.DataType{determinedDataType}, nil 283 case nil: 284 return fallbackDataType, nil 285 default: 286 allowed := []string{ 287 schema.DataTypeText.String(), 288 schema.DataTypeNumber.String(), 289 schema.DataTypeInt.String(), 290 schema.DataTypeBoolean.String(), 291 schema.DataTypeDate.String(), 292 schema.DataTypeUUID.String(), 293 schema.DataTypeObject.String(), 294 } 295 if !ofNestedProp { 296 allowed = append(allowed, schema.DataTypePhoneNumber.String(), schema.DataTypeGeoCoordinates.String()) 297 } 298 return nil, fmt.Errorf("unrecognized data type of value '%v' - one of '%s' expected", 299 typedValue, strings.Join(allowed, "', '")) 300 } 301 } 302 303 func asSingleDataType(arrayDataType schema.DataType) schema.DataType { 304 if dt, isArray := schema.IsArrayType(arrayDataType); isArray { 305 return dt 306 } 307 return arrayDataType 308 } 309 310 func (m *autoSchemaManager) determineArrayType(value interface{}, ofNestedProp bool, 311 ) (schema.DataType, schema.DataType, error) { 312 switch typedValue := value.(type) { 313 case string: 314 if _, err := time.Parse(time.RFC3339, typedValue); err == nil { 315 return schema.DataTypeDateArray, "", nil 316 } 317 if _, err := uuid.Parse(typedValue); err == nil { 318 return schema.DataTypeUUIDArray, "", nil 319 } 320 if schema.DataType(m.config.DefaultString) == schema.DataTypeString { 321 return schema.DataTypeStringArray, "", nil 322 } 323 return schema.DataTypeTextArray, "", nil 324 case json.Number: 325 if schema.DataType(m.config.DefaultNumber) == schema.DataTypeInt { 326 return schema.DataTypeIntArray, "", nil 327 } 328 return schema.DataTypeNumberArray, "", nil 329 case float64: 330 return schema.DataTypeNumberArray, "", nil 331 case int64: 332 return schema.DataTypeIntArray, "", nil 333 case bool: 334 return schema.DataTypeBooleanArray, "", nil 335 case map[string]interface{}: 336 if ofNestedProp { 337 return schema.DataTypeObjectArray, "", nil 338 } 339 if refDataType, ok := m.asRef(typedValue); ok { 340 return "", refDataType, nil 341 } 342 return schema.DataTypeObjectArray, "", nil 343 default: 344 allowed := []string{ 345 schema.DataTypeText.String(), 346 schema.DataTypeNumber.String(), 347 schema.DataTypeInt.String(), 348 schema.DataTypeBoolean.String(), 349 schema.DataTypeDate.String(), 350 schema.DataTypeUUID.String(), 351 schema.DataTypeObject.String(), 352 } 353 if !ofNestedProp { 354 allowed = append(allowed, schema.DataTypeCRef.String()) 355 } 356 return "", "", fmt.Errorf("unrecognized data type of value '%v' - one of '%s' expected", 357 typedValue, strings.Join(allowed, "', '")) 358 } 359 } 360 361 func (m *autoSchemaManager) asGeoCoordinatesType(val map[string]interface{}) ([]schema.DataType, bool) { 362 if len(val) == 2 { 363 if val["latitude"] != nil && val["longitude"] != nil { 364 return []schema.DataType{schema.DataTypeGeoCoordinates}, true 365 } 366 } 367 return nil, false 368 } 369 370 func (m *autoSchemaManager) asPhoneNumber(val map[string]interface{}) ([]schema.DataType, bool) { 371 if val["input"] != nil { 372 if len(val) == 1 { 373 return []schema.DataType{schema.DataTypePhoneNumber}, true 374 } 375 if len(val) == 2 { 376 if _, ok := val["defaultCountry"]; ok { 377 return []schema.DataType{schema.DataTypePhoneNumber}, true 378 } 379 } 380 } 381 382 return nil, false 383 } 384 385 func (m *autoSchemaManager) asRef(val map[string]interface{}) (schema.DataType, bool) { 386 if v, ok := val["beacon"]; ok { 387 if beacon, ok := v.(string); ok { 388 ref, err := crossref.Parse(beacon) 389 if err == nil { 390 if ref.Class == "" { 391 res, err := m.vectorRepo.ObjectByID(context.Background(), ref.TargetID, search.SelectProperties{}, additional.Properties{}, "") 392 if err == nil && res != nil { 393 return schema.DataType(res.ClassName), true 394 } 395 } else { 396 return schema.DataType(ref.Class), true 397 } 398 } 399 } 400 } 401 return "", false 402 } 403 404 func (m *autoSchemaManager) determineNestedProperties(values map[string]interface{}, now time.Time, 405 ) ([]*models.NestedProperty, error) { 406 i := 0 407 nestedProperties := make([]*models.NestedProperty, len(values)) 408 for name, value := range values { 409 np, err := m.determineNestedProperty(name, value, now) 410 if err != nil { 411 return nil, fmt.Errorf("nested property '%s': %w", name, err) 412 } 413 nestedProperties[i] = np 414 i++ 415 } 416 return nestedProperties, nil 417 } 418 419 func (m *autoSchemaManager) determineNestedProperty(name string, value interface{}, now time.Time, 420 ) (*models.NestedProperty, error) { 421 dt, err := m.determineType(value, true) 422 if err != nil { 423 return nil, err 424 } 425 426 var np []*models.NestedProperty 427 if len(dt) == 1 { 428 switch dt[0] { 429 case schema.DataTypeObject: 430 np, err = m.determineNestedProperties(value.(map[string]interface{}), now) 431 case schema.DataTypeObjectArray: 432 np, err = m.determineNestedPropertiesOfArray(value.([]interface{}), now) 433 default: 434 // do nothing 435 } 436 } 437 if err != nil { 438 return nil, err 439 } 440 441 return &models.NestedProperty{ 442 Name: name, 443 DataType: m.getDataTypes(dt), 444 Description: "This nested property was generated by Weaviate's auto-schema feature on " + 445 now.Format(time.ANSIC), 446 NestedProperties: np, 447 }, nil 448 } 449 450 func (m *autoSchemaManager) determineNestedPropertiesOfArray(valArray []interface{}, now time.Time, 451 ) ([]*models.NestedProperty, error) { 452 if len(valArray) == 0 { 453 return []*models.NestedProperty{}, nil 454 } 455 nestedProperties, err := m.determineNestedProperties(valArray[0].(map[string]interface{}), now) 456 if err != nil { 457 return nil, err 458 } 459 if len(valArray) == 1 { 460 return nestedProperties, nil 461 } 462 463 nestedPropertiesIndexMap := map[string]int{} 464 for index := range nestedProperties { 465 nestedPropertiesIndexMap[nestedProperties[index].Name] = index 466 } 467 468 for i := 1; i < len(valArray); i++ { 469 values := valArray[i].(map[string]interface{}) 470 for name, value := range values { 471 index, ok := nestedPropertiesIndexMap[name] 472 if !ok { 473 np, err := m.determineNestedProperty(name, value, now) 474 if err != nil { 475 return nil, err 476 } 477 nestedPropertiesIndexMap[name] = len(nestedProperties) 478 nestedProperties = append(nestedProperties, np) 479 } else if _, isNested := schema.AsNested(nestedProperties[index].DataType); isNested { 480 np, err := m.determineNestedProperty(name, value, now) 481 if err != nil { 482 return nil, err 483 } 484 if mergedNestedProperties, merged := schema.MergeRecursivelyNestedProperties( 485 nestedProperties[index].NestedProperties, np.NestedProperties, 486 ); merged { 487 nestedProperties[index].NestedProperties = mergedNestedProperties 488 } 489 } 490 } 491 } 492 493 return nestedProperties, nil 494 }