github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/searcher.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import ( 15 "context" 16 "encoding/binary" 17 "fmt" 18 "strconv" 19 "time" 20 21 enterrors "github.com/weaviate/weaviate/entities/errors" 22 23 "github.com/google/uuid" 24 "github.com/sirupsen/logrus" 25 "github.com/weaviate/sroar" 26 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 27 "github.com/weaviate/weaviate/adapters/repos/db/inverted/stopwords" 28 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 29 "github.com/weaviate/weaviate/adapters/repos/db/propertyspecific" 30 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 31 "github.com/weaviate/weaviate/adapters/repos/db/sorter" 32 "github.com/weaviate/weaviate/entities/additional" 33 "github.com/weaviate/weaviate/entities/filters" 34 "github.com/weaviate/weaviate/entities/inverted" 35 "github.com/weaviate/weaviate/entities/models" 36 "github.com/weaviate/weaviate/entities/schema" 37 "github.com/weaviate/weaviate/entities/storobj" 38 "github.com/weaviate/weaviate/usecases/config" 39 ) 40 41 type Searcher struct { 42 logger logrus.FieldLogger 43 store *lsmkv.Store 44 schema schema.Schema 45 classSearcher ClassSearcher // to allow recursive searches on ref-props 46 propIndices propertyspecific.Indices 47 stopwords stopwords.StopwordDetector 48 shardVersion uint16 49 isFallbackToSearchable IsFallbackToSearchable 50 tenant string 51 // nestedCrossRefLimit limits the number of nested cross refs returned for a query 52 nestedCrossRefLimit int64 53 bitmapFactory *roaringset.BitmapFactory 54 } 55 56 func NewSearcher(logger logrus.FieldLogger, store *lsmkv.Store, 57 schema schema.Schema, propIndices propertyspecific.Indices, 58 classSearcher ClassSearcher, stopwords stopwords.StopwordDetector, 59 shardVersion uint16, isFallbackToSearchable IsFallbackToSearchable, 60 tenant string, nestedCrossRefLimit int64, bitmapFactory *roaringset.BitmapFactory, 61 ) *Searcher { 62 return &Searcher{ 63 logger: logger, 64 store: store, 65 schema: schema, 66 propIndices: propIndices, 67 classSearcher: classSearcher, 68 stopwords: stopwords, 69 shardVersion: shardVersion, 70 isFallbackToSearchable: isFallbackToSearchable, 71 tenant: tenant, 72 nestedCrossRefLimit: nestedCrossRefLimit, 73 bitmapFactory: bitmapFactory, 74 } 75 } 76 77 // Objects returns a list of full objects 78 func (s *Searcher) Objects(ctx context.Context, limit int, 79 filter *filters.LocalFilter, sort []filters.Sort, additional additional.Properties, 80 className schema.ClassName, 81 ) ([]*storobj.Object, error) { 82 allowList, err := s.docIDs(ctx, filter, additional, className, limit) 83 if err != nil { 84 return nil, err 85 } 86 87 var it docIDsIterator 88 if len(sort) > 0 { 89 docIDs, err := s.sort(ctx, limit, sort, allowList, className) 90 if err != nil { 91 return nil, fmt.Errorf("sort doc ids: %w", err) 92 } 93 it = newSliceDocIDsIterator(docIDs) 94 } else { 95 it = allowList.Iterator() 96 } 97 98 return s.objectsByDocID(it, additional, limit) 99 } 100 101 func (s *Searcher) sort(ctx context.Context, limit int, sort []filters.Sort, 102 docIDs helpers.AllowList, className schema.ClassName, 103 ) ([]uint64, error) { 104 lsmSorter, err := sorter.NewLSMSorter(s.store, s.schema, className) 105 if err != nil { 106 return nil, err 107 } 108 return lsmSorter.SortDocIDs(ctx, limit, sort, docIDs) 109 } 110 111 func (s *Searcher) objectsByDocID(it docIDsIterator, 112 additional additional.Properties, limit int, 113 ) ([]*storobj.Object, error) { 114 bucket := s.store.Bucket(helpers.ObjectsBucketLSM) 115 if bucket == nil { 116 return nil, fmt.Errorf("objects bucket not found") 117 } 118 119 out := make([]*storobj.Object, it.Len()) 120 docIDBytes := make([]byte, 8) 121 122 // Prevent unbounded iteration 123 if limit == 0 { 124 limit = int(config.DefaultQueryMaximumResults) 125 } 126 127 i := 0 128 for docID, ok := it.Next(); ok; docID, ok = it.Next() { 129 binary.LittleEndian.PutUint64(docIDBytes, docID) 130 res, err := bucket.GetBySecondary(0, docIDBytes) 131 if err != nil { 132 return nil, err 133 } 134 135 if res == nil { 136 continue 137 } 138 139 var unmarshalled *storobj.Object 140 if additional.ReferenceQuery { 141 unmarshalled, err = storobj.FromBinaryUUIDOnly(res) 142 } else { 143 unmarshalled, err = storobj.FromBinaryOptional(res, additional) 144 } 145 if err != nil { 146 return nil, fmt.Errorf("unmarshal data object at position %d: %w", i, err) 147 } 148 149 out[i] = unmarshalled 150 i++ 151 152 if i >= limit { 153 break 154 } 155 } 156 157 return out[:i], nil 158 } 159 160 // DocIDs is similar to Objects, but does not actually resolve the docIDs to 161 // full objects. Instead it returns the pure object id pointers. They can then 162 // be used in a secondary index (e.g. vector index) 163 // 164 // DocID queries does not contain a limit by design, as we won't know if the limit 165 // wouldn't remove the item that is most important for the follow up query. 166 // Imagine the user sets the limit to 1 and the follow-up is a vector search. 167 // If we already limited the allowList to 1, the vector search would be 168 // pointless, as only the first element would be allowed, regardless of which 169 // had the shortest distance 170 func (s *Searcher) DocIDs(ctx context.Context, filter *filters.LocalFilter, 171 additional additional.Properties, className schema.ClassName, 172 ) (helpers.AllowList, error) { 173 allow, err := s.docIDs(ctx, filter, additional, className, 0) 174 if err != nil { 175 return nil, err 176 } 177 // Some filters, such as NotEqual, return a theoretical range of docIDs 178 // which also includes a buffer in the underlying bitmap, to reduce the 179 // overhead of repopulating the base bitmap. Here we can truncate that 180 // buffer to ensure that the caller is receiving only the possible range 181 // of docIDs 182 return allow.Truncate(s.bitmapFactory.ActualMaxVal()), nil 183 } 184 185 func (s *Searcher) docIDs(ctx context.Context, filter *filters.LocalFilter, 186 additional additional.Properties, className schema.ClassName, 187 limit int, 188 ) (helpers.AllowList, error) { 189 pv, err := s.extractPropValuePair(filter.Root, className) 190 if err != nil { 191 return nil, err 192 } 193 194 if err := pv.fetchDocIDs(s, limit); err != nil { 195 return nil, fmt.Errorf("fetch doc ids for prop/value pair: %w", err) 196 } 197 198 dbm, err := pv.mergeDocIDs() 199 if err != nil { 200 return nil, fmt.Errorf("merge doc ids by operator: %w", err) 201 } 202 203 return helpers.NewAllowListFromBitmap(dbm.docIDs), nil 204 } 205 206 func (s *Searcher) extractPropValuePair(filter *filters.Clause, 207 className schema.ClassName, 208 ) (*propValuePair, error) { 209 class := s.schema.FindClassByName(schema.ClassName(className)) 210 if class == nil { 211 return nil, fmt.Errorf("class %q not found", className) 212 } 213 out, err := newPropValuePair(class, s.logger) 214 if err != nil { 215 return nil, fmt.Errorf("new prop value pair: %w", err) 216 } 217 if filter.Operands != nil { 218 // nested filter 219 children, err := s.extractPropValuePairs(filter.Operands, className) 220 if err != nil { 221 return nil, err 222 } 223 out.children = children 224 out.operator = filter.Operator 225 return out, nil 226 } 227 228 if filter.Operator == filters.ContainsAny || filter.Operator == filters.ContainsAll { 229 return s.extractContains(filter.On, filter.Value.Type, filter.Value.Value, filter.Operator, class) 230 } 231 232 // on value or non-nested filter 233 props := filter.On.Slice() 234 propName := props[0] 235 236 if s.onInternalProp(propName) { 237 return s.extractInternalProp(propName, filter.Value.Type, filter.Value.Value, filter.Operator, class) 238 } 239 240 if extractedPropName, ok := schema.IsPropertyLength(propName, 0); ok { 241 property, err := s.schema.GetProperty(className, schema.PropertyName(extractedPropName)) 242 if err != nil { 243 return nil, err 244 } 245 return s.extractPropertyLength(property, filter.Value.Type, filter.Value.Value, filter.Operator, class) 246 } 247 248 property, err := s.schema.GetProperty(className, schema.PropertyName(propName)) 249 if err != nil { 250 return nil, err 251 } 252 253 if s.onRefProp(property) && len(props) != 1 { 254 return s.extractReferenceFilter(property, filter, class) 255 } 256 257 if s.onRefProp(property) && filter.Value.Type == schema.DataTypeInt { 258 // ref prop and int type is a special case, the user is looking for the 259 // reference count as opposed to the content 260 return s.extractReferenceCount(property, filter.Value.Value, filter.Operator, class) 261 } 262 263 if filter.Operator == filters.OperatorIsNull { 264 return s.extractPropertyNull(property, filter.Value.Type, filter.Value.Value, filter.Operator, class) 265 } 266 267 if s.onGeoProp(property) { 268 return s.extractGeoFilter(property, filter.Value.Value, filter.Value.Type, filter.Operator, class) 269 } 270 271 if s.onUUIDProp(property) { 272 return s.extractUUIDFilter(property, filter.Value.Value, filter.Value.Type, filter.Operator, class) 273 } 274 275 if s.onTokenizableProp(property) { 276 return s.extractTokenizableProp(property, filter.Value.Type, filter.Value.Value, filter.Operator, class) 277 } 278 279 return s.extractPrimitiveProp(property, filter.Value.Type, filter.Value.Value, filter.Operator, class) 280 } 281 282 func (s *Searcher) extractPropValuePairs(operands []filters.Clause, className schema.ClassName) ([]*propValuePair, error) { 283 children := make([]*propValuePair, len(operands)) 284 eg := enterrors.NewErrorGroupWrapper(s.logger) 285 // prevent unbounded concurrency, see 286 // https://github.com/weaviate/weaviate/issues/3179 for details 287 eg.SetLimit(2 * _NUMCPU) 288 289 for i, clause := range operands { 290 i, clause := i, clause 291 eg.Go(func() error { 292 child, err := s.extractPropValuePair(&clause, className) 293 if err != nil { 294 return fmt.Errorf("nested clause at pos %d: %w", i, err) 295 } 296 children[i] = child 297 298 return nil 299 }, clause) 300 } 301 if err := eg.Wait(); err != nil { 302 return nil, fmt.Errorf("nested query: %w", err) 303 } 304 return children, nil 305 } 306 307 func (s *Searcher) extractReferenceFilter(prop *models.Property, 308 filter *filters.Clause, class *models.Class, 309 ) (*propValuePair, error) { 310 ctx := context.TODO() 311 return newRefFilterExtractor(s.logger, s.classSearcher, filter, class, prop, s.tenant, s.nestedCrossRefLimit). 312 Do(ctx) 313 } 314 315 func (s *Searcher) extractPrimitiveProp(prop *models.Property, propType schema.DataType, 316 value interface{}, operator filters.Operator, class *models.Class, 317 ) (*propValuePair, error) { 318 var extractValueFn func(in interface{}) ([]byte, error) 319 switch propType { 320 case schema.DataTypeBoolean: 321 extractValueFn = s.extractBoolValue 322 case schema.DataTypeInt: 323 extractValueFn = s.extractIntValue 324 case schema.DataTypeNumber: 325 extractValueFn = s.extractNumberValue 326 case schema.DataTypeDate: 327 extractValueFn = s.extractDateValue 328 case "": 329 return nil, fmt.Errorf("data type cannot be empty") 330 default: 331 return nil, fmt.Errorf("data type %q not supported in query", propType) 332 } 333 334 byteValue, err := extractValueFn(value) 335 if err != nil { 336 return nil, err 337 } 338 339 hasFilterableIndex := HasFilterableIndex(prop) 340 hasSearchableIndex := HasSearchableIndex(prop) 341 342 if !hasFilterableIndex && !hasSearchableIndex { 343 return nil, inverted.NewMissingFilterableIndexError(prop.Name) 344 } 345 346 return &propValuePair{ 347 value: byteValue, 348 prop: prop.Name, 349 operator: operator, 350 hasFilterableIndex: hasFilterableIndex, 351 hasSearchableIndex: hasSearchableIndex, 352 Class: class, 353 }, nil 354 } 355 356 func (s *Searcher) extractReferenceCount(prop *models.Property, value interface{}, 357 operator filters.Operator, class *models.Class, 358 ) (*propValuePair, error) { 359 byteValue, err := s.extractIntCountValue(value) 360 if err != nil { 361 return nil, err 362 } 363 364 hasFilterableIndex := HasFilterableIndexMetaCount && HasInvertedIndex(prop) 365 hasSearchableIndex := HasSearchableIndexMetaCount && HasInvertedIndex(prop) 366 367 if !hasFilterableIndex && !hasSearchableIndex { 368 return nil, inverted.NewMissingFilterableMetaCountIndexError(prop.Name) 369 } 370 371 return &propValuePair{ 372 value: byteValue, 373 prop: helpers.MetaCountProp(prop.Name), 374 operator: operator, 375 hasFilterableIndex: hasFilterableIndex, 376 hasSearchableIndex: hasSearchableIndex, 377 Class: class, 378 }, nil 379 } 380 381 func (s *Searcher) extractGeoFilter(prop *models.Property, value interface{}, 382 valueType schema.DataType, operator filters.Operator, class *models.Class, 383 ) (*propValuePair, error) { 384 if valueType != schema.DataTypeGeoCoordinates { 385 return nil, fmt.Errorf("prop %q is of type geoCoordinates, it can only"+ 386 "be used with geoRange filters", prop.Name) 387 } 388 389 parsed := value.(filters.GeoRange) 390 391 return &propValuePair{ 392 value: nil, // not going to be served by an inverted index 393 valueGeoRange: &parsed, 394 prop: prop.Name, 395 operator: operator, 396 hasFilterableIndex: HasFilterableIndex(prop), 397 hasSearchableIndex: HasSearchableIndex(prop), 398 Class: class, 399 }, nil 400 } 401 402 func (s *Searcher) extractUUIDFilter(prop *models.Property, value interface{}, 403 valueType schema.DataType, operator filters.Operator, class *models.Class, 404 ) (*propValuePair, error) { 405 var byteValue []byte 406 407 switch valueType { 408 case schema.DataTypeText: 409 asStr, ok := value.(string) 410 if !ok { 411 return nil, fmt.Errorf("expected to see uuid as string in filter, got %T", value) 412 } 413 parsed, err := uuid.Parse(asStr) 414 if err != nil { 415 return nil, fmt.Errorf("parse uuid string: %w", err) 416 } 417 byteValue = parsed[:] 418 default: 419 return nil, fmt.Errorf("prop %q is of type uuid, the uuid to filter "+ 420 "on must be specified as a string (e.g. valueText:<uuid>)", prop.Name) 421 } 422 423 hasFilterableIndex := HasFilterableIndex(prop) 424 hasSearchableIndex := HasSearchableIndex(prop) 425 426 if !hasFilterableIndex && !hasSearchableIndex { 427 return nil, inverted.NewMissingFilterableIndexError(prop.Name) 428 } 429 430 return &propValuePair{ 431 value: byteValue, 432 prop: prop.Name, 433 operator: operator, 434 hasFilterableIndex: hasFilterableIndex, 435 hasSearchableIndex: hasSearchableIndex, 436 Class: class, 437 }, nil 438 } 439 440 func (s *Searcher) extractInternalProp(propName string, propType schema.DataType, value interface{}, 441 operator filters.Operator, class *models.Class, 442 ) (*propValuePair, error) { 443 switch propName { 444 case filters.InternalPropBackwardsCompatID, filters.InternalPropID: 445 return s.extractIDProp(propName, propType, value, operator, class) 446 case filters.InternalPropCreationTimeUnix, filters.InternalPropLastUpdateTimeUnix: 447 return s.extractTimestampProp(propName, propType, value, operator, class) 448 default: 449 return nil, fmt.Errorf( 450 "failed to extract internal prop, unsupported internal prop '%s'", propName) 451 } 452 } 453 454 func (s *Searcher) extractIDProp(propName string, propType schema.DataType, 455 value interface{}, operator filters.Operator, class *models.Class, 456 ) (*propValuePair, error) { 457 var byteValue []byte 458 459 switch propType { 460 case schema.DataTypeText: 461 v, ok := value.(string) 462 if !ok { 463 return nil, fmt.Errorf("expected value to be string, got '%T'", value) 464 } 465 byteValue = []byte(v) 466 default: 467 return nil, fmt.Errorf( 468 "failed to extract id prop, unsupported type '%T' for prop '%s'", propType, propName) 469 } 470 471 return &propValuePair{ 472 value: byteValue, 473 prop: filters.InternalPropID, 474 operator: operator, 475 hasFilterableIndex: HasFilterableIndexIdProp, 476 hasSearchableIndex: HasSearchableIndexIdProp, 477 Class: class, 478 }, nil 479 } 480 481 func (s *Searcher) extractTimestampProp(propName string, propType schema.DataType, value interface{}, 482 operator filters.Operator, class *models.Class, 483 ) (*propValuePair, error) { 484 var byteValue []byte 485 486 switch propType { 487 case schema.DataTypeText: 488 v, ok := value.(string) 489 if !ok { 490 return nil, fmt.Errorf("expected value to be string, got '%T'", value) 491 } 492 _, err := strconv.ParseInt(v, 10, 64) 493 if err != nil { 494 return nil, fmt.Errorf("expected value to be timestamp, got '%s'", v) 495 } 496 byteValue = []byte(v) 497 case schema.DataTypeDate: 498 v, ok := value.(string) 499 if !ok { 500 return nil, fmt.Errorf("expected value to be string, got '%T'", value) 501 } 502 t, err := time.Parse(time.RFC3339, v) 503 if err != nil { 504 return nil, fmt.Errorf("trying parse time as RFC3339 string: %w", err) 505 } 506 507 // if propType is a `valueDate`, we need to convert 508 // it to ms before fetching. this is the format by 509 // which our timestamps are indexed 510 byteValue = []byte(strconv.FormatInt(t.UnixMilli(), 10)) 511 default: 512 return nil, fmt.Errorf( 513 "failed to extract timestamp prop, unsupported type '%T' for prop '%s'", propType, propName) 514 } 515 516 return &propValuePair{ 517 value: byteValue, 518 prop: propName, 519 operator: operator, 520 hasFilterableIndex: HasFilterableIndexTimestampProp, // TODO text_rbm_inverted_index & with settings 521 hasSearchableIndex: HasSearchableIndexTimestampProp, // TODO text_rbm_inverted_index & with settings 522 Class: class, 523 }, nil 524 } 525 526 func (s *Searcher) extractTokenizableProp(prop *models.Property, propType schema.DataType, 527 value interface{}, operator filters.Operator, class *models.Class, 528 ) (*propValuePair, error) { 529 var terms []string 530 531 valueString, ok := value.(string) 532 if !ok { 533 return nil, fmt.Errorf("expected value to be string, got '%T'", value) 534 } 535 536 switch propType { 537 case schema.DataTypeText: 538 // if the operator is like, we cannot apply the regular text-splitting 539 // logic as it would remove all wildcard symbols 540 if operator == filters.OperatorLike { 541 terms = helpers.TokenizeWithWildcards(prop.Tokenization, valueString) 542 } else { 543 terms = helpers.Tokenize(prop.Tokenization, valueString) 544 } 545 default: 546 return nil, fmt.Errorf("expected value type to be text, got %v", propType) 547 } 548 549 hasFilterableIndex := HasFilterableIndex(prop) && !s.isFallbackToSearchable() 550 hasSearchableIndex := HasSearchableIndex(prop) 551 552 if !hasFilterableIndex && !hasSearchableIndex { 553 return nil, inverted.NewMissingFilterableIndexError(prop.Name) 554 } 555 556 propValuePairs := make([]*propValuePair, 0, len(terms)) 557 for _, term := range terms { 558 if s.stopwords.IsStopword(term) { 559 continue 560 } 561 propValuePairs = append(propValuePairs, &propValuePair{ 562 value: []byte(term), 563 prop: prop.Name, 564 operator: operator, 565 hasFilterableIndex: hasFilterableIndex, 566 hasSearchableIndex: hasSearchableIndex, 567 Class: class, 568 }) 569 } 570 571 if len(propValuePairs) > 1 { 572 return &propValuePair{operator: filters.OperatorAnd, children: propValuePairs, Class: class}, nil 573 } 574 if len(propValuePairs) == 1 { 575 return propValuePairs[0], nil 576 } 577 return nil, fmt.Errorf("invalid search term, only stopwords provided. " + 578 "Stopwords can be configured in class.invertedIndexConfig.stopwords") 579 } 580 581 func (s *Searcher) extractPropertyLength(prop *models.Property, propType schema.DataType, 582 value interface{}, operator filters.Operator, class *models.Class, 583 ) (*propValuePair, error) { 584 var byteValue []byte 585 586 switch propType { 587 case schema.DataTypeInt: 588 b, err := s.extractIntValue(value) 589 if err != nil { 590 return nil, err 591 } 592 byteValue = b 593 default: 594 return nil, fmt.Errorf( 595 "failed to extract length of prop, unsupported type '%T' for length of prop '%s'", propType, prop.Name) 596 } 597 598 return &propValuePair{ 599 value: byteValue, 600 prop: helpers.PropLength(prop.Name), 601 operator: operator, 602 hasFilterableIndex: HasFilterableIndexPropLength, // TODO text_rbm_inverted_index & with settings 603 hasSearchableIndex: HasSearchableIndexPropLength, // TODO text_rbm_inverted_index & with settings 604 Class: class, 605 }, nil 606 } 607 608 func (s *Searcher) extractPropertyNull(prop *models.Property, propType schema.DataType, 609 value interface{}, operator filters.Operator, class *models.Class, 610 ) (*propValuePair, error) { 611 var valResult []byte 612 613 switch propType { 614 case schema.DataTypeBoolean: 615 b, err := s.extractBoolValue(value) 616 if err != nil { 617 return nil, err 618 } 619 valResult = b 620 default: 621 return nil, fmt.Errorf( 622 "failed to extract null prop, unsupported type '%T' for null prop '%s'", propType, prop.Name) 623 } 624 625 return &propValuePair{ 626 value: valResult, 627 prop: helpers.PropNull(prop.Name), 628 operator: operator, 629 hasFilterableIndex: HasFilterableIndexPropNull, // TODO text_rbm_inverted_index & with settings 630 hasSearchableIndex: HasSearchableIndexPropNull, // TODO text_rbm_inverted_index & with settings 631 Class: class, 632 }, nil 633 } 634 635 func (s *Searcher) extractContains(path *filters.Path, propType schema.DataType, value interface{}, 636 operator filters.Operator, class *models.Class, 637 ) (*propValuePair, error) { 638 var operands []filters.Clause 639 switch propType { 640 case schema.DataTypeText, schema.DataTypeTextArray: 641 valueStringArray, err := s.extractStringArray(value) 642 if err != nil { 643 return nil, err 644 } 645 operands = getContainsOperands(propType, path, valueStringArray) 646 case schema.DataTypeInt, schema.DataTypeIntArray: 647 valueIntArray, err := s.extractIntArray(value) 648 if err != nil { 649 return nil, err 650 } 651 operands = getContainsOperands(propType, path, valueIntArray) 652 case schema.DataTypeNumber, schema.DataTypeNumberArray: 653 valueFloat64Array, err := s.extractFloat64Array(value) 654 if err != nil { 655 return nil, err 656 } 657 operands = getContainsOperands(propType, path, valueFloat64Array) 658 case schema.DataTypeBoolean, schema.DataTypeBooleanArray: 659 valueBooleanArray, err := s.extractBoolArray(value) 660 if err != nil { 661 return nil, err 662 } 663 operands = getContainsOperands(propType, path, valueBooleanArray) 664 case schema.DataTypeDate, schema.DataTypeDateArray: 665 valueDateArray, err := s.extractStringArray(value) 666 if err != nil { 667 return nil, err 668 } 669 operands = getContainsOperands(propType, path, valueDateArray) 670 default: 671 return nil, fmt.Errorf("unsupported type '%T' for '%v' operator", propType, operator) 672 } 673 674 children, err := s.extractPropValuePairs(operands, schema.ClassName(class.Class)) 675 if err != nil { 676 return nil, err 677 } 678 out, err := newPropValuePair(class, s.logger) 679 if err != nil { 680 return nil, fmt.Errorf("new prop value pair: %w", err) 681 } 682 out.children = children 683 // filters.ContainsAny 684 out.operator = filters.OperatorOr 685 if operator == filters.ContainsAll { 686 out.operator = filters.OperatorAnd 687 } 688 out.Class = class 689 return out, nil 690 } 691 692 // TODO: repeated calls to on... aren't too efficient because we iterate over 693 // the schema each time, might be smarter to have a single method that 694 // determines the type and then we switch based on the result. However, the 695 // effect of that should be very small unless the schema is absolutely massive. 696 func (s *Searcher) onRefProp(property *models.Property) bool { 697 return schema.IsRefDataType(property.DataType) 698 } 699 700 // TODO: repeated calls to on... aren't too efficient because we iterate over 701 // the schema each time, might be smarter to have a single method that 702 // determines the type and then we switch based on the result. However, the 703 // effect of that should be very small unless the schema is absolutely massive. 704 func (s *Searcher) onGeoProp(prop *models.Property) bool { 705 return schema.DataType(prop.DataType[0]) == schema.DataTypeGeoCoordinates 706 } 707 708 // Note: A UUID prop is a user-specified prop of type UUID. This has nothing to 709 // do with the primary ID of an object which happens to always be a UUID in 710 // Weaviate v1 711 // 712 // TODO: repeated calls to on... aren't too efficient because we iterate over 713 // the schema each time, might be smarter to have a single method that 714 // determines the type and then we switch based on the result. However, the 715 // effect of that should be very small unless the schema is absolutely massive. 716 func (s *Searcher) onUUIDProp(prop *models.Property) bool { 717 switch dt, _ := schema.AsPrimitive(prop.DataType); dt { 718 case schema.DataTypeUUID, schema.DataTypeUUIDArray: 719 return true 720 default: 721 return false 722 } 723 } 724 725 func (s *Searcher) onInternalProp(propName string) bool { 726 return filters.IsInternalProperty(schema.PropertyName(propName)) 727 } 728 729 func (s *Searcher) onTokenizableProp(prop *models.Property) bool { 730 switch dt, _ := schema.AsPrimitive(prop.DataType); dt { 731 case schema.DataTypeText, schema.DataTypeTextArray: 732 return true 733 default: 734 return false 735 } 736 } 737 738 func (s *Searcher) extractStringArray(value interface{}) ([]string, error) { 739 switch v := value.(type) { 740 case []string: 741 return v, nil 742 case []interface{}: 743 vals := make([]string, len(v)) 744 for i := range v { 745 val, ok := v[i].(string) 746 if !ok { 747 return nil, fmt.Errorf("value[%d] type should be string but is %T", i, v[i]) 748 } 749 vals[i] = val 750 } 751 return vals, nil 752 default: 753 return nil, fmt.Errorf("value type should be []string but is %T", value) 754 } 755 } 756 757 func (s *Searcher) extractIntArray(value interface{}) ([]int, error) { 758 switch v := value.(type) { 759 case []int: 760 return v, nil 761 case []interface{}: 762 vals := make([]int, len(v)) 763 for i := range v { 764 // in this case all number values are unmarshalled to float64, so we need to cast to float64 765 // and then make int 766 val, ok := v[i].(float64) 767 if !ok { 768 return nil, fmt.Errorf("value[%d] type should be float64 but is %T", i, v[i]) 769 } 770 vals[i] = int(val) 771 } 772 return vals, nil 773 default: 774 return nil, fmt.Errorf("value type should be []int but is %T", value) 775 } 776 } 777 778 func (s *Searcher) extractFloat64Array(value interface{}) ([]float64, error) { 779 switch v := value.(type) { 780 case []float64: 781 return v, nil 782 case []interface{}: 783 vals := make([]float64, len(v)) 784 for i := range v { 785 val, ok := v[i].(float64) 786 if !ok { 787 return nil, fmt.Errorf("value[%d] type should be float64 but is %T", i, v[i]) 788 } 789 vals[i] = val 790 } 791 return vals, nil 792 default: 793 return nil, fmt.Errorf("value type should be []float64 but is %T", value) 794 } 795 } 796 797 func (s *Searcher) extractBoolArray(value interface{}) ([]bool, error) { 798 switch v := value.(type) { 799 case []bool: 800 return v, nil 801 case []interface{}: 802 vals := make([]bool, len(v)) 803 for i := range v { 804 val, ok := v[i].(bool) 805 if !ok { 806 return nil, fmt.Errorf("value[%d] type should be bool but is %T", i, v[i]) 807 } 808 vals[i] = val 809 } 810 return vals, nil 811 default: 812 return nil, fmt.Errorf("value type should be []bool but is %T", value) 813 } 814 } 815 816 func getContainsOperands[T any](propType schema.DataType, path *filters.Path, values []T) []filters.Clause { 817 operands := make([]filters.Clause, len(values)) 818 for i := range values { 819 operands[i] = filters.Clause{ 820 Operator: filters.OperatorEqual, 821 On: path, 822 Value: &filters.Value{ 823 Type: propType, 824 Value: values[i], 825 }, 826 } 827 } 828 return operands 829 } 830 831 type docIDsIterator interface { 832 Next() (uint64, bool) 833 Len() int 834 } 835 836 type sliceDocIDsIterator struct { 837 docIDs []uint64 838 pos int 839 } 840 841 func newSliceDocIDsIterator(docIDs []uint64) docIDsIterator { 842 return &sliceDocIDsIterator{docIDs: docIDs, pos: 0} 843 } 844 845 func (it *sliceDocIDsIterator) Next() (uint64, bool) { 846 if it.pos >= len(it.docIDs) { 847 return 0, false 848 } 849 pos := it.pos 850 it.pos++ 851 return it.docIDs[pos], true 852 } 853 854 func (it *sliceDocIDsIterator) Len() int { 855 return len(it.docIDs) 856 } 857 858 type docBitmap struct { 859 docIDs *sroar.Bitmap 860 } 861 862 // newUninitializedDocBitmap can be used whenever we can be sure that the first 863 // user of the docBitmap will set or replace the bitmap, such as a row reader 864 func newUninitializedDocBitmap() docBitmap { 865 return docBitmap{docIDs: nil} 866 } 867 868 func newDocBitmap() docBitmap { 869 return docBitmap{docIDs: sroar.NewBitmap()} 870 } 871 872 func (dbm *docBitmap) count() int { 873 if dbm.docIDs == nil { 874 return 0 875 } 876 return dbm.docIDs.GetCardinality() 877 } 878 879 func (dbm *docBitmap) IDs() []uint64 { 880 if dbm.docIDs == nil { 881 return []uint64{} 882 } 883 return dbm.docIDs.ToArray() 884 } 885 886 func (dbm *docBitmap) IDsWithLimit(limit int) []uint64 { 887 card := dbm.docIDs.GetCardinality() 888 if limit >= card { 889 return dbm.IDs() 890 } 891 892 out := make([]uint64, limit) 893 for i := range out { 894 // safe to ignore error, it can only error if the index is >= cardinality 895 // which we have already ruled out 896 out[i], _ = dbm.docIDs.Select(uint64(i)) 897 } 898 899 return out 900 } 901 902 type docPointerWithScore struct { 903 id uint64 904 frequency float32 905 propLength float32 906 }