github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/objects.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import ( 15 "encoding/json" 16 "fmt" 17 "time" 18 "unicode/utf8" 19 20 "github.com/go-openapi/strfmt" 21 "github.com/google/uuid" 22 "github.com/pkg/errors" 23 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 24 "github.com/weaviate/weaviate/entities/filters" 25 "github.com/weaviate/weaviate/entities/models" 26 "github.com/weaviate/weaviate/entities/schema" 27 "github.com/weaviate/weaviate/usecases/objects/validation" 28 ) 29 30 func (a *Analyzer) Object(input map[string]any, props []*models.Property, 31 uuid strfmt.UUID, 32 ) ([]Property, error) { 33 propsMap := map[string]*models.Property{} 34 for _, prop := range props { 35 propsMap[prop.Name] = prop 36 } 37 38 properties, err := a.analyzeProps(propsMap, input) 39 if err != nil { 40 return nil, fmt.Errorf("analyze props: %w", err) 41 } 42 43 idProp, err := a.analyzeIDProp(uuid) 44 if err != nil { 45 return nil, fmt.Errorf("analyze uuid prop: %w", err) 46 } 47 properties = append(properties, *idProp) 48 49 tsProps, err := a.analyzeTimestampProps(input) 50 if err != nil { 51 return nil, fmt.Errorf("analyze timestamp props: %w", err) 52 } 53 // tsProps will be nil here if weaviate is 54 // not setup to index by timestamps 55 if tsProps != nil { 56 properties = append(properties, tsProps...) 57 } 58 59 return properties, nil 60 } 61 62 func (a *Analyzer) analyzeProps(propsMap map[string]*models.Property, 63 input map[string]any, 64 ) ([]Property, error) { 65 var out []Property 66 for key, prop := range propsMap { 67 if len(prop.DataType) < 1 { 68 return nil, fmt.Errorf("prop %q has no datatype", prop.Name) 69 } 70 71 if !HasInvertedIndex(prop) { 72 continue 73 } 74 75 if schema.IsBlobDataType(prop.DataType) { 76 continue 77 } 78 79 if schema.IsRefDataType(prop.DataType) { 80 if err := a.extendPropertiesWithReference(&out, prop, input, key); err != nil { 81 return nil, err 82 } 83 } else if schema.IsArrayDataType(prop.DataType) { 84 if err := a.extendPropertiesWithArrayType(&out, prop, input, key); err != nil { 85 return nil, err 86 } 87 } else { 88 if err := a.extendPropertiesWithPrimitive(&out, prop, input, key); err != nil { 89 return nil, err 90 } 91 } 92 93 } 94 return out, nil 95 } 96 97 func (a *Analyzer) analyzeIDProp(id strfmt.UUID) (*Property, error) { 98 value, err := id.MarshalText() 99 if err != nil { 100 return nil, fmt.Errorf("marshal id prop: %w", err) 101 } 102 return &Property{ 103 Name: filters.InternalPropID, 104 Items: []Countable{ 105 { 106 Data: value, 107 }, 108 }, 109 HasFilterableIndex: HasFilterableIndexIdProp, 110 HasSearchableIndex: HasSearchableIndexIdProp, 111 }, nil 112 } 113 114 func (a *Analyzer) analyzeTimestampProps(input map[string]any) ([]Property, error) { 115 createTime, createTimeOK := input[filters.InternalPropCreationTimeUnix] 116 updateTime, updateTimeOK := input[filters.InternalPropLastUpdateTimeUnix] 117 118 var props []Property 119 if createTimeOK { 120 b, err := json.Marshal(createTime) 121 if err != nil { 122 return nil, fmt.Errorf("analyze create timestamp prop: %w", err) 123 } 124 props = append(props, Property{ 125 Name: filters.InternalPropCreationTimeUnix, 126 Items: []Countable{{Data: b}}, 127 HasFilterableIndex: HasFilterableIndexTimestampProp, 128 HasSearchableIndex: HasSearchableIndexTimestampProp, 129 }) 130 } 131 132 if updateTimeOK { 133 b, err := json.Marshal(updateTime) 134 if err != nil { 135 return nil, fmt.Errorf("analyze update timestamp prop: %w", err) 136 } 137 props = append(props, Property{ 138 Name: filters.InternalPropLastUpdateTimeUnix, 139 Items: []Countable{{Data: b}}, 140 HasFilterableIndex: HasFilterableIndexTimestampProp, 141 HasSearchableIndex: HasSearchableIndexTimestampProp, 142 }) 143 } 144 145 return props, nil 146 } 147 148 func (a *Analyzer) extendPropertiesWithArrayType(properties *[]Property, 149 prop *models.Property, input map[string]any, propName string, 150 ) error { 151 value, ok := input[propName] 152 if !ok { 153 // skip any primitive prop that's not set 154 return nil 155 } 156 157 var err error 158 value, err = typedSliceToUntyped(value) 159 if err != nil { 160 return fmt.Errorf("extend properties with array type: %w", err) 161 } 162 163 values, ok := value.([]any) 164 if !ok { 165 // skip any primitive prop that's not set 166 return errors.New("analyze array prop: expected array prop") 167 } 168 169 property, err := a.analyzeArrayProp(prop, values) 170 if err != nil { 171 return fmt.Errorf("analyze array prop: %w", err) 172 } 173 if property == nil { 174 return nil 175 } 176 177 *properties = append(*properties, *property) 178 return nil 179 } 180 181 // extendPropertiesWithPrimitive mutates the passed in properties, by extending 182 // it with an additional property - if applicable 183 func (a *Analyzer) extendPropertiesWithPrimitive(properties *[]Property, 184 prop *models.Property, input map[string]any, propName string, 185 ) error { 186 var property *Property 187 var err error 188 189 value, ok := input[propName] 190 if !ok { 191 // skip any primitive prop that's not set 192 return nil 193 } 194 property, err = a.analyzePrimitiveProp(prop, value) 195 if err != nil { 196 return fmt.Errorf("analyze primitive prop: %w", err) 197 } 198 if property == nil { 199 return nil 200 } 201 202 *properties = append(*properties, *property) 203 return nil 204 } 205 206 func (a *Analyzer) analyzeArrayProp(prop *models.Property, values []any) (*Property, error) { 207 var items []Countable 208 hasFilterableIndex := HasFilterableIndex(prop) 209 hasSearchableIndex := HasSearchableIndex(prop) 210 211 switch dt := schema.DataType(prop.DataType[0]); dt { 212 case schema.DataTypeTextArray: 213 hasFilterableIndex = hasFilterableIndex && !a.isFallbackToSearchable() 214 in, err := stringsFromValues(prop, values) 215 if err != nil { 216 return nil, err 217 } 218 items = a.TextArray(prop.Tokenization, in) 219 case schema.DataTypeIntArray: 220 in := make([]int64, len(values)) 221 for i, value := range values { 222 if asJsonNumber, ok := value.(json.Number); ok { 223 var err error 224 value, err = asJsonNumber.Float64() 225 if err != nil { 226 return nil, err 227 } 228 } 229 230 if asFloat, ok := value.(float64); ok { 231 // unmarshaling from json into a dynamic schema will assume every number 232 // is a float64 233 value = int64(asFloat) 234 } 235 236 asInt, ok := value.(int64) 237 if !ok { 238 return nil, fmt.Errorf("expected property %s to be of type int64, but got %T", prop.Name, value) 239 } 240 in[i] = asInt 241 } 242 243 var err error 244 items, err = a.IntArray(in) 245 if err != nil { 246 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 247 } 248 case schema.DataTypeNumberArray: 249 in := make([]float64, len(values)) 250 for i, value := range values { 251 if asJsonNumber, ok := value.(json.Number); ok { 252 var err error 253 value, err = asJsonNumber.Float64() 254 if err != nil { 255 return nil, err 256 } 257 } 258 259 asFloat, ok := value.(float64) 260 if !ok { 261 return nil, fmt.Errorf("expected property %s to be of type float64, but got %T", prop.Name, value) 262 } 263 in[i] = asFloat 264 } 265 266 var err error 267 items, err = a.FloatArray(in) // convert to int before analyzing 268 if err != nil { 269 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 270 } 271 case schema.DataTypeBooleanArray: 272 in := make([]bool, len(values)) 273 for i, value := range values { 274 asBool, ok := value.(bool) 275 if !ok { 276 return nil, fmt.Errorf("expected property %s to be of type bool, but got %T", prop.Name, value) 277 } 278 in[i] = asBool 279 } 280 281 var err error 282 items, err = a.BoolArray(in) // convert to int before analyzing 283 if err != nil { 284 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 285 } 286 case schema.DataTypeDateArray: 287 in := make([]int64, len(values)) 288 for i, value := range values { 289 // dates can be either a date-string or directly a time object. Try to parse both 290 if asTime, okTime := value.(time.Time); okTime { 291 in[i] = asTime.UnixNano() 292 } else if asString, okString := value.(string); okString { 293 parsedTime, err := time.Parse(time.RFC3339Nano, asString) 294 if err != nil { 295 return nil, fmt.Errorf("parse time: %w", err) 296 } 297 in[i] = parsedTime.UnixNano() 298 } else { 299 return nil, fmt.Errorf("expected property %s to be a time-string or time object, but got %T", prop.Name, value) 300 } 301 } 302 303 var err error 304 items, err = a.IntArray(in) 305 if err != nil { 306 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 307 } 308 case schema.DataTypeUUIDArray: 309 parsed, err := validation.ParseUUIDArray(values) 310 if err != nil { 311 return nil, fmt.Errorf("parse uuid array: %w", err) 312 } 313 314 items, err = a.UUIDArray(parsed) 315 if err != nil { 316 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 317 } 318 319 default: 320 // ignore unsupported prop type 321 return nil, nil 322 } 323 324 return &Property{ 325 Name: prop.Name, 326 Items: items, 327 Length: len(values), 328 HasFilterableIndex: hasFilterableIndex, 329 HasSearchableIndex: hasSearchableIndex, 330 }, nil 331 } 332 333 func stringsFromValues(prop *models.Property, values []any) ([]string, error) { 334 in := make([]string, len(values)) 335 for i, value := range values { 336 asString, ok := value.(string) 337 if !ok { 338 return nil, fmt.Errorf("expected property %s to be of type string, but got %T", prop.Name, value) 339 } 340 in[i] = asString 341 } 342 return in, nil 343 } 344 345 func (a *Analyzer) analyzePrimitiveProp(prop *models.Property, value any) (*Property, error) { 346 var items []Countable 347 propertyLength := -1 // will be overwritten for string/text, signals not to add the other types. 348 hasFilterableIndex := HasFilterableIndex(prop) 349 hasSearchableIndex := HasSearchableIndex(prop) 350 351 switch dt := schema.DataType(prop.DataType[0]); dt { 352 case schema.DataTypeText: 353 hasFilterableIndex = hasFilterableIndex && !a.isFallbackToSearchable() 354 asString, ok := value.(string) 355 if !ok { 356 return nil, fmt.Errorf("expected property %s to be of type string, but got %T", prop.Name, value) 357 } 358 items = a.Text(prop.Tokenization, asString) 359 propertyLength = utf8.RuneCountInString(asString) 360 case schema.DataTypeInt: 361 if asFloat, ok := value.(float64); ok { 362 // unmarshaling from json into a dynamic schema will assume every number 363 // is a float64 364 value = int64(asFloat) 365 } 366 367 if asInt, ok := value.(int); ok { 368 // when merging an existing object we may retrieve an untyped int 369 value = int64(asInt) 370 } 371 372 asInt, ok := value.(int64) 373 if !ok { 374 return nil, fmt.Errorf("expected property %s to be of type int64, but got %T", prop.Name, value) 375 } 376 377 var err error 378 items, err = a.Int(asInt) 379 if err != nil { 380 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 381 } 382 case schema.DataTypeNumber: 383 asFloat, ok := value.(float64) 384 if !ok { 385 return nil, fmt.Errorf("expected property %s to be of type float64, but got %T", prop.Name, value) 386 } 387 388 var err error 389 items, err = a.Float(asFloat) // convert to int before analyzing 390 if err != nil { 391 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 392 } 393 case schema.DataTypeBoolean: 394 asBool, ok := value.(bool) 395 if !ok { 396 return nil, fmt.Errorf("expected property %s to be of type bool, but got %T", prop.Name, value) 397 } 398 399 var err error 400 items, err = a.Bool(asBool) // convert to int before analyzing 401 if err != nil { 402 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 403 } 404 case schema.DataTypeDate: 405 var err error 406 if asString, ok := value.(string); ok { 407 // for example when patching the date may have been loaded as a string 408 value, err = time.Parse(time.RFC3339Nano, asString) 409 if err != nil { 410 return nil, fmt.Errorf("parse stringified timestamp: %w", err) 411 } 412 } 413 asTime, ok := value.(time.Time) 414 if !ok { 415 return nil, fmt.Errorf("expected property %s to be time.Time, but got %T", prop.Name, value) 416 } 417 418 items, err = a.Int(asTime.UnixNano()) 419 if err != nil { 420 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 421 } 422 case schema.DataTypeUUID: 423 var err error 424 425 if asString, ok := value.(string); ok { 426 // for example when patching the uuid may have been loaded as a string 427 value, err = uuid.Parse(asString) 428 if err != nil { 429 return nil, fmt.Errorf("parse stringified uuid: %w", err) 430 } 431 } 432 433 asUUID, ok := value.(uuid.UUID) 434 if !ok { 435 return nil, fmt.Errorf("expected property %s to be uuid.UUID, but got %T", prop.Name, value) 436 } 437 438 items, err = a.UUID(asUUID) 439 if err != nil { 440 return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err) 441 } 442 default: 443 // ignore unsupported prop type 444 return nil, nil 445 } 446 447 return &Property{ 448 Name: prop.Name, 449 Items: items, 450 Length: propertyLength, 451 HasFilterableIndex: hasFilterableIndex, 452 HasSearchableIndex: hasSearchableIndex, 453 }, nil 454 } 455 456 // extendPropertiesWithReference extends the specified properties arrays with 457 // either 1 or 2 entries: If the ref is not set, only the ref-count property 458 // will be added. If the ref is set the ref-prop itself will also be added and 459 // contain all references as values 460 func (a *Analyzer) extendPropertiesWithReference(properties *[]Property, 461 prop *models.Property, input map[string]any, propName string, 462 ) error { 463 value, ok := input[propName] 464 if !ok { 465 // explicitly set zero-value, so we can index for "ref not set" 466 value = make(models.MultipleRef, 0) 467 } 468 469 var asRefs models.MultipleRef 470 asRefs, ok = value.(models.MultipleRef) 471 if !ok { 472 // due to the fix introduced in https://github.com/weaviate/weaviate/pull/2320, 473 // MultipleRef's can appear as empty []any when no actual refs are provided for 474 // an object's reference property. 475 // 476 // if we encounter []any, assume it indicates an empty ref prop, and skip it. 477 _, ok := value.([]any) 478 if !ok { 479 return fmt.Errorf("expected property %q to be of type models.MutlipleRef,"+ 480 " but got %T", prop.Name, value) 481 } 482 return nil 483 } 484 485 property, err := a.analyzeRefPropCount(prop, asRefs) 486 if err != nil { 487 return fmt.Errorf("ref count: %w", err) 488 } 489 490 *properties = append(*properties, *property) 491 492 if len(asRefs) == 0 { 493 return nil 494 } 495 496 property, err = a.analyzeRefProp(prop, asRefs) 497 if err != nil { 498 return fmt.Errorf("refs: %w", err) 499 } 500 501 *properties = append(*properties, *property) 502 return nil 503 } 504 505 func (a *Analyzer) analyzeRefPropCount(prop *models.Property, 506 value models.MultipleRef, 507 ) (*Property, error) { 508 items, err := a.RefCount(value) 509 if err != nil { 510 return nil, fmt.Errorf("analyze ref-property %q: %w", prop.Name, err) 511 } 512 513 return &Property{ 514 Name: helpers.MetaCountProp(prop.Name), 515 Items: items, 516 Length: len(value), 517 HasFilterableIndex: HasFilterableIndex(prop), 518 HasSearchableIndex: HasSearchableIndex(prop), 519 }, nil 520 } 521 522 func (a *Analyzer) analyzeRefProp(prop *models.Property, 523 value models.MultipleRef, 524 ) (*Property, error) { 525 items, err := a.Ref(value) 526 if err != nil { 527 return nil, fmt.Errorf("analyze ref-property %q: %w", prop.Name, err) 528 } 529 530 return &Property{ 531 Name: prop.Name, 532 Items: items, 533 HasFilterableIndex: HasFilterableIndex(prop), 534 HasSearchableIndex: HasSearchableIndex(prop), 535 }, nil 536 } 537 538 func typedSliceToUntyped(in any) ([]any, error) { 539 switch typed := in.(type) { 540 case []any: 541 // nothing to do 542 return typed, nil 543 case []string: 544 return convertToUntyped[string](typed), nil 545 case []int: 546 return convertToUntyped[int](typed), nil 547 case []time.Time: 548 return convertToUntyped[time.Time](typed), nil 549 case []bool: 550 return convertToUntyped[bool](typed), nil 551 case []float64: 552 return convertToUntyped[float64](typed), nil 553 case []uuid.UUID: 554 return convertToUntyped[uuid.UUID](typed), nil 555 default: 556 return nil, errors.Errorf("unsupported type %T", in) 557 } 558 } 559 560 func convertToUntyped[T comparable](in []T) []any { 561 out := make([]any, len(in)) 562 for i := range out { 563 out[i] = in[i] 564 } 565 return out 566 } 567 568 // Indicates whether property should be indexed 569 // Index holds document ids with property of/containing particular value 570 // and number of its occurrences in that property 571 // (index created using bucket of StrategyMapCollection) 572 func HasSearchableIndex(prop *models.Property) bool { 573 switch dt, _ := schema.AsPrimitive(prop.DataType); dt { 574 case schema.DataTypeText, schema.DataTypeTextArray: 575 // by default property has searchable index only for text/text[] props 576 if prop.IndexSearchable == nil { 577 return true 578 } 579 return *prop.IndexSearchable 580 default: 581 return false 582 } 583 } 584 585 // Indicates whether property should be indexed 586 // Index holds document ids with property of/containing particular value 587 // (index created using bucket of StrategyRoaringSet) 588 func HasFilterableIndex(prop *models.Property) bool { 589 // by default property has filterable index 590 if prop.IndexFilterable == nil { 591 return true 592 } 593 return *prop.IndexFilterable 594 } 595 596 func HasInvertedIndex(prop *models.Property) bool { 597 return HasFilterableIndex(prop) || HasSearchableIndex(prop) 598 } 599 600 const ( 601 // always 602 HasFilterableIndexIdProp = true 603 HasSearchableIndexIdProp = false 604 605 // only if index.invertedIndexConfig.IndexTimestamps set 606 HasFilterableIndexTimestampProp = true 607 HasSearchableIndexTimestampProp = false 608 609 // only if property.indexFilterable or property.indexSearchable set 610 HasFilterableIndexMetaCount = true 611 HasSearchableIndexMetaCount = false 612 613 // only if index.invertedIndexConfig.IndexNullState set 614 // and either property.indexFilterable or property.indexSearchable set 615 HasFilterableIndexPropNull = true 616 HasSearchableIndexPropNull = false 617 618 // only if index.invertedIndexConfig.IndexPropertyLength set 619 // and either property.indexFilterable or property.indexSearchable set 620 HasFilterableIndexPropLength = true 621 HasSearchableIndexPropLength = false 622 )