github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/searcher.go

github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/searcher.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"context"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"strconv"
    19  	"time"
    20  
    21  	enterrors "github.com/weaviate/weaviate/entities/errors"
    22  
    23  	"github.com/google/uuid"
    24  	"github.com/sirupsen/logrus"
    25  	"github.com/weaviate/sroar"
    26  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/inverted/stopwords"
    28  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    29  	"github.com/weaviate/weaviate/adapters/repos/db/propertyspecific"
    30  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    31  	"github.com/weaviate/weaviate/adapters/repos/db/sorter"
    32  	"github.com/weaviate/weaviate/entities/additional"
    33  	"github.com/weaviate/weaviate/entities/filters"
    34  	"github.com/weaviate/weaviate/entities/inverted"
    35  	"github.com/weaviate/weaviate/entities/models"
    36  	"github.com/weaviate/weaviate/entities/schema"
    37  	"github.com/weaviate/weaviate/entities/storobj"
    38  	"github.com/weaviate/weaviate/usecases/config"
    39  )
    40  
    41  type Searcher struct {
    42  	logger                 logrus.FieldLogger
    43  	store                  *lsmkv.Store
    44  	schema                 schema.Schema
    45  	classSearcher          ClassSearcher // to allow recursive searches on ref-props
    46  	propIndices            propertyspecific.Indices
    47  	stopwords              stopwords.StopwordDetector
    48  	shardVersion           uint16
    49  	isFallbackToSearchable IsFallbackToSearchable
    50  	tenant                 string
    51  	// nestedCrossRefLimit limits the number of nested cross refs returned for a query
    52  	nestedCrossRefLimit int64
    53  	bitmapFactory       *roaringset.BitmapFactory
    54  }
    55  
    56  func NewSearcher(logger logrus.FieldLogger, store *lsmkv.Store,
    57  	schema schema.Schema, propIndices propertyspecific.Indices,
    58  	classSearcher ClassSearcher, stopwords stopwords.StopwordDetector,
    59  	shardVersion uint16, isFallbackToSearchable IsFallbackToSearchable,
    60  	tenant string, nestedCrossRefLimit int64, bitmapFactory *roaringset.BitmapFactory,
    61  ) *Searcher {
    62  	return &Searcher{
    63  		logger:                 logger,
    64  		store:                  store,
    65  		schema:                 schema,
    66  		propIndices:            propIndices,
    67  		classSearcher:          classSearcher,
    68  		stopwords:              stopwords,
    69  		shardVersion:           shardVersion,
    70  		isFallbackToSearchable: isFallbackToSearchable,
    71  		tenant:                 tenant,
    72  		nestedCrossRefLimit:    nestedCrossRefLimit,
    73  		bitmapFactory:          bitmapFactory,
    74  	}
    75  }
    76  
    77  // Objects returns a list of full objects
    78  func (s *Searcher) Objects(ctx context.Context, limit int,
    79  	filter *filters.LocalFilter, sort []filters.Sort, additional additional.Properties,
    80  	className schema.ClassName,
    81  ) ([]*storobj.Object, error) {
    82  	allowList, err := s.docIDs(ctx, filter, additional, className, limit)
    83  	if err != nil {
    84  		return nil, err
    85  	}
    86  
    87  	var it docIDsIterator
    88  	if len(sort) > 0 {
    89  		docIDs, err := s.sort(ctx, limit, sort, allowList, className)
    90  		if err != nil {
    91  			return nil, fmt.Errorf("sort doc ids: %w", err)
    92  		}
    93  		it = newSliceDocIDsIterator(docIDs)
    94  	} else {
    95  		it = allowList.Iterator()
    96  	}
    97  
    98  	return s.objectsByDocID(it, additional, limit)
    99  }
   100  
   101  func (s *Searcher) sort(ctx context.Context, limit int, sort []filters.Sort,
   102  	docIDs helpers.AllowList, className schema.ClassName,
   103  ) ([]uint64, error) {
   104  	lsmSorter, err := sorter.NewLSMSorter(s.store, s.schema, className)
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  	return lsmSorter.SortDocIDs(ctx, limit, sort, docIDs)
   109  }
   110  
   111  func (s *Searcher) objectsByDocID(it docIDsIterator,
   112  	additional additional.Properties, limit int,
   113  ) ([]*storobj.Object, error) {
   114  	bucket := s.store.Bucket(helpers.ObjectsBucketLSM)
   115  	if bucket == nil {
   116  		return nil, fmt.Errorf("objects bucket not found")
   117  	}
   118  
   119  	out := make([]*storobj.Object, it.Len())
   120  	docIDBytes := make([]byte, 8)
   121  
   122  	// Prevent unbounded iteration
   123  	if limit == 0 {
   124  		limit = int(config.DefaultQueryMaximumResults)
   125  	}
   126  
   127  	i := 0
   128  	for docID, ok := it.Next(); ok; docID, ok = it.Next() {
   129  		binary.LittleEndian.PutUint64(docIDBytes, docID)
   130  		res, err := bucket.GetBySecondary(0, docIDBytes)
   131  		if err != nil {
   132  			return nil, err
   133  		}
   134  
   135  		if res == nil {
   136  			continue
   137  		}
   138  
   139  		var unmarshalled *storobj.Object
   140  		if additional.ReferenceQuery {
   141  			unmarshalled, err = storobj.FromBinaryUUIDOnly(res)
   142  		} else {
   143  			unmarshalled, err = storobj.FromBinaryOptional(res, additional)
   144  		}
   145  		if err != nil {
   146  			return nil, fmt.Errorf("unmarshal data object at position %d: %w", i, err)
   147  		}
   148  
   149  		out[i] = unmarshalled
   150  		i++
   151  
   152  		if i >= limit {
   153  			break
   154  		}
   155  	}
   156  
   157  	return out[:i], nil
   158  }
   159  
   160  // DocIDs is similar to Objects, but does not actually resolve the docIDs to
   161  // full objects. Instead it returns the pure object id pointers. They can then
   162  // be used in a secondary index (e.g. vector index)
   163  //
   164  // DocID queries does not contain a limit by design, as we won't know if the limit
   165  // wouldn't remove the item that is most important for the follow up query.
   166  // Imagine the user sets the limit to 1 and the follow-up is a vector search.
   167  // If we already limited the allowList to 1, the vector search would be
   168  // pointless, as only the first element would be allowed, regardless of which
   169  // had the shortest distance
   170  func (s *Searcher) DocIDs(ctx context.Context, filter *filters.LocalFilter,
   171  	additional additional.Properties, className schema.ClassName,
   172  ) (helpers.AllowList, error) {
   173  	allow, err := s.docIDs(ctx, filter, additional, className, 0)
   174  	if err != nil {
   175  		return nil, err
   176  	}
   177  	// Some filters, such as NotEqual, return a theoretical range of docIDs
   178  	// which also includes a buffer in the underlying bitmap, to reduce the
   179  	// overhead of repopulating the base bitmap. Here we can truncate that
   180  	// buffer to ensure that the caller is receiving only the possible range
   181  	// of docIDs
   182  	return allow.Truncate(s.bitmapFactory.ActualMaxVal()), nil
   183  }
   184  
   185  func (s *Searcher) docIDs(ctx context.Context, filter *filters.LocalFilter,
   186  	additional additional.Properties, className schema.ClassName,
   187  	limit int,
   188  ) (helpers.AllowList, error) {
   189  	pv, err := s.extractPropValuePair(filter.Root, className)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  
   194  	if err := pv.fetchDocIDs(s, limit); err != nil {
   195  		return nil, fmt.Errorf("fetch doc ids for prop/value pair: %w", err)
   196  	}
   197  
   198  	dbm, err := pv.mergeDocIDs()
   199  	if err != nil {
   200  		return nil, fmt.Errorf("merge doc ids by operator: %w", err)
   201  	}
   202  
   203  	return helpers.NewAllowListFromBitmap(dbm.docIDs), nil
   204  }
   205  
   206  func (s *Searcher) extractPropValuePair(filter *filters.Clause,
   207  	className schema.ClassName,
   208  ) (*propValuePair, error) {
   209  	class := s.schema.FindClassByName(schema.ClassName(className))
   210  	if class == nil {
   211  		return nil, fmt.Errorf("class %q not found", className)
   212  	}
   213  	out, err := newPropValuePair(class, s.logger)
   214  	if err != nil {
   215  		return nil, fmt.Errorf("new prop value pair: %w", err)
   216  	}
   217  	if filter.Operands != nil {
   218  		// nested filter
   219  		children, err := s.extractPropValuePairs(filter.Operands, className)
   220  		if err != nil {
   221  			return nil, err
   222  		}
   223  		out.children = children
   224  		out.operator = filter.Operator
   225  		return out, nil
   226  	}
   227  
   228  	if filter.Operator == filters.ContainsAny || filter.Operator == filters.ContainsAll {
   229  		return s.extractContains(filter.On, filter.Value.Type, filter.Value.Value, filter.Operator, class)
   230  	}
   231  
   232  	// on value or non-nested filter
   233  	props := filter.On.Slice()
   234  	propName := props[0]
   235  
   236  	if s.onInternalProp(propName) {
   237  		return s.extractInternalProp(propName, filter.Value.Type, filter.Value.Value, filter.Operator, class)
   238  	}
   239  
   240  	if extractedPropName, ok := schema.IsPropertyLength(propName, 0); ok {
   241  		property, err := s.schema.GetProperty(className, schema.PropertyName(extractedPropName))
   242  		if err != nil {
   243  			return nil, err
   244  		}
   245  		return s.extractPropertyLength(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
   246  	}
   247  
   248  	property, err := s.schema.GetProperty(className, schema.PropertyName(propName))
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  
   253  	if s.onRefProp(property) && len(props) != 1 {
   254  		return s.extractReferenceFilter(property, filter, class)
   255  	}
   256  
   257  	if s.onRefProp(property) && filter.Value.Type == schema.DataTypeInt {
   258  		// ref prop and int type is a special case, the user is looking for the
   259  		// reference count as opposed to the content
   260  		return s.extractReferenceCount(property, filter.Value.Value, filter.Operator, class)
   261  	}
   262  
   263  	if filter.Operator == filters.OperatorIsNull {
   264  		return s.extractPropertyNull(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
   265  	}
   266  
   267  	if s.onGeoProp(property) {
   268  		return s.extractGeoFilter(property, filter.Value.Value, filter.Value.Type, filter.Operator, class)
   269  	}
   270  
   271  	if s.onUUIDProp(property) {
   272  		return s.extractUUIDFilter(property, filter.Value.Value, filter.Value.Type, filter.Operator, class)
   273  	}
   274  
   275  	if s.onTokenizableProp(property) {
   276  		return s.extractTokenizableProp(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
   277  	}
   278  
   279  	return s.extractPrimitiveProp(property, filter.Value.Type, filter.Value.Value, filter.Operator, class)
   280  }
   281  
   282  func (s *Searcher) extractPropValuePairs(operands []filters.Clause, className schema.ClassName) ([]*propValuePair, error) {
   283  	children := make([]*propValuePair, len(operands))
   284  	eg := enterrors.NewErrorGroupWrapper(s.logger)
   285  	// prevent unbounded concurrency, see
   286  	// https://github.com/weaviate/weaviate/issues/3179 for details
   287  	eg.SetLimit(2 * _NUMCPU)
   288  
   289  	for i, clause := range operands {
   290  		i, clause := i, clause
   291  		eg.Go(func() error {
   292  			child, err := s.extractPropValuePair(&clause, className)
   293  			if err != nil {
   294  				return fmt.Errorf("nested clause at pos %d: %w", i, err)
   295  			}
   296  			children[i] = child
   297  
   298  			return nil
   299  		}, clause)
   300  	}
   301  	if err := eg.Wait(); err != nil {
   302  		return nil, fmt.Errorf("nested query: %w", err)
   303  	}
   304  	return children, nil
   305  }
   306  
   307  func (s *Searcher) extractReferenceFilter(prop *models.Property,
   308  	filter *filters.Clause, class *models.Class,
   309  ) (*propValuePair, error) {
   310  	ctx := context.TODO()
   311  	return newRefFilterExtractor(s.logger, s.classSearcher, filter, class, prop, s.tenant, s.nestedCrossRefLimit).
   312  		Do(ctx)
   313  }
   314  
   315  func (s *Searcher) extractPrimitiveProp(prop *models.Property, propType schema.DataType,
   316  	value interface{}, operator filters.Operator, class *models.Class,
   317  ) (*propValuePair, error) {
   318  	var extractValueFn func(in interface{}) ([]byte, error)
   319  	switch propType {
   320  	case schema.DataTypeBoolean:
   321  		extractValueFn = s.extractBoolValue
   322  	case schema.DataTypeInt:
   323  		extractValueFn = s.extractIntValue
   324  	case schema.DataTypeNumber:
   325  		extractValueFn = s.extractNumberValue
   326  	case schema.DataTypeDate:
   327  		extractValueFn = s.extractDateValue
   328  	case "":
   329  		return nil, fmt.Errorf("data type cannot be empty")
   330  	default:
   331  		return nil, fmt.Errorf("data type %q not supported in query", propType)
   332  	}
   333  
   334  	byteValue, err := extractValueFn(value)
   335  	if err != nil {
   336  		return nil, err
   337  	}
   338  
   339  	hasFilterableIndex := HasFilterableIndex(prop)
   340  	hasSearchableIndex := HasSearchableIndex(prop)
   341  
   342  	if !hasFilterableIndex && !hasSearchableIndex {
   343  		return nil, inverted.NewMissingFilterableIndexError(prop.Name)
   344  	}
   345  
   346  	return &propValuePair{
   347  		value:              byteValue,
   348  		prop:               prop.Name,
   349  		operator:           operator,
   350  		hasFilterableIndex: hasFilterableIndex,
   351  		hasSearchableIndex: hasSearchableIndex,
   352  		Class:              class,
   353  	}, nil
   354  }
   355  
   356  func (s *Searcher) extractReferenceCount(prop *models.Property, value interface{},
   357  	operator filters.Operator, class *models.Class,
   358  ) (*propValuePair, error) {
   359  	byteValue, err := s.extractIntCountValue(value)
   360  	if err != nil {
   361  		return nil, err
   362  	}
   363  
   364  	hasFilterableIndex := HasFilterableIndexMetaCount && HasInvertedIndex(prop)
   365  	hasSearchableIndex := HasSearchableIndexMetaCount && HasInvertedIndex(prop)
   366  
   367  	if !hasFilterableIndex && !hasSearchableIndex {
   368  		return nil, inverted.NewMissingFilterableMetaCountIndexError(prop.Name)
   369  	}
   370  
   371  	return &propValuePair{
   372  		value:              byteValue,
   373  		prop:               helpers.MetaCountProp(prop.Name),
   374  		operator:           operator,
   375  		hasFilterableIndex: hasFilterableIndex,
   376  		hasSearchableIndex: hasSearchableIndex,
   377  		Class:              class,
   378  	}, nil
   379  }
   380  
   381  func (s *Searcher) extractGeoFilter(prop *models.Property, value interface{},
   382  	valueType schema.DataType, operator filters.Operator, class *models.Class,
   383  ) (*propValuePair, error) {
   384  	if valueType != schema.DataTypeGeoCoordinates {
   385  		return nil, fmt.Errorf("prop %q is of type geoCoordinates, it can only"+
   386  			"be used with geoRange filters", prop.Name)
   387  	}
   388  
   389  	parsed := value.(filters.GeoRange)
   390  
   391  	return &propValuePair{
   392  		value:              nil, // not going to be served by an inverted index
   393  		valueGeoRange:      &parsed,
   394  		prop:               prop.Name,
   395  		operator:           operator,
   396  		hasFilterableIndex: HasFilterableIndex(prop),
   397  		hasSearchableIndex: HasSearchableIndex(prop),
   398  		Class:              class,
   399  	}, nil
   400  }
   401  
   402  func (s *Searcher) extractUUIDFilter(prop *models.Property, value interface{},
   403  	valueType schema.DataType, operator filters.Operator, class *models.Class,
   404  ) (*propValuePair, error) {
   405  	var byteValue []byte
   406  
   407  	switch valueType {
   408  	case schema.DataTypeText:
   409  		asStr, ok := value.(string)
   410  		if !ok {
   411  			return nil, fmt.Errorf("expected to see uuid as string in filter, got %T", value)
   412  		}
   413  		parsed, err := uuid.Parse(asStr)
   414  		if err != nil {
   415  			return nil, fmt.Errorf("parse uuid string: %w", err)
   416  		}
   417  		byteValue = parsed[:]
   418  	default:
   419  		return nil, fmt.Errorf("prop %q is of type uuid, the uuid to filter "+
   420  			"on must be specified as a string (e.g. valueText:<uuid>)", prop.Name)
   421  	}
   422  
   423  	hasFilterableIndex := HasFilterableIndex(prop)
   424  	hasSearchableIndex := HasSearchableIndex(prop)
   425  
   426  	if !hasFilterableIndex && !hasSearchableIndex {
   427  		return nil, inverted.NewMissingFilterableIndexError(prop.Name)
   428  	}
   429  
   430  	return &propValuePair{
   431  		value:              byteValue,
   432  		prop:               prop.Name,
   433  		operator:           operator,
   434  		hasFilterableIndex: hasFilterableIndex,
   435  		hasSearchableIndex: hasSearchableIndex,
   436  		Class:              class,
   437  	}, nil
   438  }
   439  
   440  func (s *Searcher) extractInternalProp(propName string, propType schema.DataType, value interface{},
   441  	operator filters.Operator, class *models.Class,
   442  ) (*propValuePair, error) {
   443  	switch propName {
   444  	case filters.InternalPropBackwardsCompatID, filters.InternalPropID:
   445  		return s.extractIDProp(propName, propType, value, operator, class)
   446  	case filters.InternalPropCreationTimeUnix, filters.InternalPropLastUpdateTimeUnix:
   447  		return s.extractTimestampProp(propName, propType, value, operator, class)
   448  	default:
   449  		return nil, fmt.Errorf(
   450  			"failed to extract internal prop, unsupported internal prop '%s'", propName)
   451  	}
   452  }
   453  
   454  func (s *Searcher) extractIDProp(propName string, propType schema.DataType,
   455  	value interface{}, operator filters.Operator, class *models.Class,
   456  ) (*propValuePair, error) {
   457  	var byteValue []byte
   458  
   459  	switch propType {
   460  	case schema.DataTypeText:
   461  		v, ok := value.(string)
   462  		if !ok {
   463  			return nil, fmt.Errorf("expected value to be string, got '%T'", value)
   464  		}
   465  		byteValue = []byte(v)
   466  	default:
   467  		return nil, fmt.Errorf(
   468  			"failed to extract id prop, unsupported type '%T' for prop '%s'", propType, propName)
   469  	}
   470  
   471  	return &propValuePair{
   472  		value:              byteValue,
   473  		prop:               filters.InternalPropID,
   474  		operator:           operator,
   475  		hasFilterableIndex: HasFilterableIndexIdProp,
   476  		hasSearchableIndex: HasSearchableIndexIdProp,
   477  		Class:              class,
   478  	}, nil
   479  }
   480  
   481  func (s *Searcher) extractTimestampProp(propName string, propType schema.DataType, value interface{},
   482  	operator filters.Operator, class *models.Class,
   483  ) (*propValuePair, error) {
   484  	var byteValue []byte
   485  
   486  	switch propType {
   487  	case schema.DataTypeText:
   488  		v, ok := value.(string)
   489  		if !ok {
   490  			return nil, fmt.Errorf("expected value to be string, got '%T'", value)
   491  		}
   492  		_, err := strconv.ParseInt(v, 10, 64)
   493  		if err != nil {
   494  			return nil, fmt.Errorf("expected value to be timestamp, got '%s'", v)
   495  		}
   496  		byteValue = []byte(v)
   497  	case schema.DataTypeDate:
   498  		v, ok := value.(string)
   499  		if !ok {
   500  			return nil, fmt.Errorf("expected value to be string, got '%T'", value)
   501  		}
   502  		t, err := time.Parse(time.RFC3339, v)
   503  		if err != nil {
   504  			return nil, fmt.Errorf("trying parse time as RFC3339 string: %w", err)
   505  		}
   506  
   507  		// if propType is a `valueDate`, we need to convert
   508  		// it to ms before fetching. this is the format by
   509  		// which our timestamps are indexed
   510  		byteValue = []byte(strconv.FormatInt(t.UnixMilli(), 10))
   511  	default:
   512  		return nil, fmt.Errorf(
   513  			"failed to extract timestamp prop, unsupported type '%T' for prop '%s'", propType, propName)
   514  	}
   515  
   516  	return &propValuePair{
   517  		value:              byteValue,
   518  		prop:               propName,
   519  		operator:           operator,
   520  		hasFilterableIndex: HasFilterableIndexTimestampProp, // TODO text_rbm_inverted_index & with settings
   521  		hasSearchableIndex: HasSearchableIndexTimestampProp, // TODO text_rbm_inverted_index & with settings
   522  		Class:              class,
   523  	}, nil
   524  }
   525  
   526  func (s *Searcher) extractTokenizableProp(prop *models.Property, propType schema.DataType,
   527  	value interface{}, operator filters.Operator, class *models.Class,
   528  ) (*propValuePair, error) {
   529  	var terms []string
   530  
   531  	valueString, ok := value.(string)
   532  	if !ok {
   533  		return nil, fmt.Errorf("expected value to be string, got '%T'", value)
   534  	}
   535  
   536  	switch propType {
   537  	case schema.DataTypeText:
   538  		// if the operator is like, we cannot apply the regular text-splitting
   539  		// logic as it would remove all wildcard symbols
   540  		if operator == filters.OperatorLike {
   541  			terms = helpers.TokenizeWithWildcards(prop.Tokenization, valueString)
   542  		} else {
   543  			terms = helpers.Tokenize(prop.Tokenization, valueString)
   544  		}
   545  	default:
   546  		return nil, fmt.Errorf("expected value type to be text, got %v", propType)
   547  	}
   548  
   549  	hasFilterableIndex := HasFilterableIndex(prop) && !s.isFallbackToSearchable()
   550  	hasSearchableIndex := HasSearchableIndex(prop)
   551  
   552  	if !hasFilterableIndex && !hasSearchableIndex {
   553  		return nil, inverted.NewMissingFilterableIndexError(prop.Name)
   554  	}
   555  
   556  	propValuePairs := make([]*propValuePair, 0, len(terms))
   557  	for _, term := range terms {
   558  		if s.stopwords.IsStopword(term) {
   559  			continue
   560  		}
   561  		propValuePairs = append(propValuePairs, &propValuePair{
   562  			value:              []byte(term),
   563  			prop:               prop.Name,
   564  			operator:           operator,
   565  			hasFilterableIndex: hasFilterableIndex,
   566  			hasSearchableIndex: hasSearchableIndex,
   567  			Class:              class,
   568  		})
   569  	}
   570  
   571  	if len(propValuePairs) > 1 {
   572  		return &propValuePair{operator: filters.OperatorAnd, children: propValuePairs, Class: class}, nil
   573  	}
   574  	if len(propValuePairs) == 1 {
   575  		return propValuePairs[0], nil
   576  	}
   577  	return nil, fmt.Errorf("invalid search term, only stopwords provided. " +
   578  		"Stopwords can be configured in class.invertedIndexConfig.stopwords")
   579  }
   580  
   581  func (s *Searcher) extractPropertyLength(prop *models.Property, propType schema.DataType,
   582  	value interface{}, operator filters.Operator, class *models.Class,
   583  ) (*propValuePair, error) {
   584  	var byteValue []byte
   585  
   586  	switch propType {
   587  	case schema.DataTypeInt:
   588  		b, err := s.extractIntValue(value)
   589  		if err != nil {
   590  			return nil, err
   591  		}
   592  		byteValue = b
   593  	default:
   594  		return nil, fmt.Errorf(
   595  			"failed to extract length of prop, unsupported type '%T' for length of prop '%s'", propType, prop.Name)
   596  	}
   597  
   598  	return &propValuePair{
   599  		value:              byteValue,
   600  		prop:               helpers.PropLength(prop.Name),
   601  		operator:           operator,
   602  		hasFilterableIndex: HasFilterableIndexPropLength, // TODO text_rbm_inverted_index & with settings
   603  		hasSearchableIndex: HasSearchableIndexPropLength, // TODO text_rbm_inverted_index & with settings
   604  		Class:              class,
   605  	}, nil
   606  }
   607  
   608  func (s *Searcher) extractPropertyNull(prop *models.Property, propType schema.DataType,
   609  	value interface{}, operator filters.Operator, class *models.Class,
   610  ) (*propValuePair, error) {
   611  	var valResult []byte
   612  
   613  	switch propType {
   614  	case schema.DataTypeBoolean:
   615  		b, err := s.extractBoolValue(value)
   616  		if err != nil {
   617  			return nil, err
   618  		}
   619  		valResult = b
   620  	default:
   621  		return nil, fmt.Errorf(
   622  			"failed to extract null prop, unsupported type '%T' for null prop '%s'", propType, prop.Name)
   623  	}
   624  
   625  	return &propValuePair{
   626  		value:              valResult,
   627  		prop:               helpers.PropNull(prop.Name),
   628  		operator:           operator,
   629  		hasFilterableIndex: HasFilterableIndexPropNull, // TODO text_rbm_inverted_index & with settings
   630  		hasSearchableIndex: HasSearchableIndexPropNull, // TODO text_rbm_inverted_index & with settings
   631  		Class:              class,
   632  	}, nil
   633  }
   634  
   635  func (s *Searcher) extractContains(path *filters.Path, propType schema.DataType, value interface{},
   636  	operator filters.Operator, class *models.Class,
   637  ) (*propValuePair, error) {
   638  	var operands []filters.Clause
   639  	switch propType {
   640  	case schema.DataTypeText, schema.DataTypeTextArray:
   641  		valueStringArray, err := s.extractStringArray(value)
   642  		if err != nil {
   643  			return nil, err
   644  		}
   645  		operands = getContainsOperands(propType, path, valueStringArray)
   646  	case schema.DataTypeInt, schema.DataTypeIntArray:
   647  		valueIntArray, err := s.extractIntArray(value)
   648  		if err != nil {
   649  			return nil, err
   650  		}
   651  		operands = getContainsOperands(propType, path, valueIntArray)
   652  	case schema.DataTypeNumber, schema.DataTypeNumberArray:
   653  		valueFloat64Array, err := s.extractFloat64Array(value)
   654  		if err != nil {
   655  			return nil, err
   656  		}
   657  		operands = getContainsOperands(propType, path, valueFloat64Array)
   658  	case schema.DataTypeBoolean, schema.DataTypeBooleanArray:
   659  		valueBooleanArray, err := s.extractBoolArray(value)
   660  		if err != nil {
   661  			return nil, err
   662  		}
   663  		operands = getContainsOperands(propType, path, valueBooleanArray)
   664  	case schema.DataTypeDate, schema.DataTypeDateArray:
   665  		valueDateArray, err := s.extractStringArray(value)
   666  		if err != nil {
   667  			return nil, err
   668  		}
   669  		operands = getContainsOperands(propType, path, valueDateArray)
   670  	default:
   671  		return nil, fmt.Errorf("unsupported type '%T' for '%v' operator", propType, operator)
   672  	}
   673  
   674  	children, err := s.extractPropValuePairs(operands, schema.ClassName(class.Class))
   675  	if err != nil {
   676  		return nil, err
   677  	}
   678  	out, err := newPropValuePair(class, s.logger)
   679  	if err != nil {
   680  		return nil, fmt.Errorf("new prop value pair: %w", err)
   681  	}
   682  	out.children = children
   683  	// filters.ContainsAny
   684  	out.operator = filters.OperatorOr
   685  	if operator == filters.ContainsAll {
   686  		out.operator = filters.OperatorAnd
   687  	}
   688  	out.Class = class
   689  	return out, nil
   690  }
   691  
   692  // TODO: repeated calls to on... aren't too efficient because we iterate over
   693  // the schema each time, might be smarter to have a single method that
   694  // determines the type and then we switch based on the result. However, the
   695  // effect of that should be very small unless the schema is absolutely massive.
   696  func (s *Searcher) onRefProp(property *models.Property) bool {
   697  	return schema.IsRefDataType(property.DataType)
   698  }
   699  
   700  // TODO: repeated calls to on... aren't too efficient because we iterate over
   701  // the schema each time, might be smarter to have a single method that
   702  // determines the type and then we switch based on the result. However, the
   703  // effect of that should be very small unless the schema is absolutely massive.
   704  func (s *Searcher) onGeoProp(prop *models.Property) bool {
   705  	return schema.DataType(prop.DataType[0]) == schema.DataTypeGeoCoordinates
   706  }
   707  
   708  // Note: A UUID prop is a user-specified prop of type UUID. This has nothing to
   709  // do with the primary ID of an object which happens to always be a UUID in
   710  // Weaviate v1
   711  //
   712  // TODO: repeated calls to on... aren't too efficient because we iterate over
   713  // the schema each time, might be smarter to have a single method that
   714  // determines the type and then we switch based on the result. However, the
   715  // effect of that should be very small unless the schema is absolutely massive.
   716  func (s *Searcher) onUUIDProp(prop *models.Property) bool {
   717  	switch dt, _ := schema.AsPrimitive(prop.DataType); dt {
   718  	case schema.DataTypeUUID, schema.DataTypeUUIDArray:
   719  		return true
   720  	default:
   721  		return false
   722  	}
   723  }
   724  
   725  func (s *Searcher) onInternalProp(propName string) bool {
   726  	return filters.IsInternalProperty(schema.PropertyName(propName))
   727  }
   728  
   729  func (s *Searcher) onTokenizableProp(prop *models.Property) bool {
   730  	switch dt, _ := schema.AsPrimitive(prop.DataType); dt {
   731  	case schema.DataTypeText, schema.DataTypeTextArray:
   732  		return true
   733  	default:
   734  		return false
   735  	}
   736  }
   737  
   738  func (s *Searcher) extractStringArray(value interface{}) ([]string, error) {
   739  	switch v := value.(type) {
   740  	case []string:
   741  		return v, nil
   742  	case []interface{}:
   743  		vals := make([]string, len(v))
   744  		for i := range v {
   745  			val, ok := v[i].(string)
   746  			if !ok {
   747  				return nil, fmt.Errorf("value[%d] type should be string but is %T", i, v[i])
   748  			}
   749  			vals[i] = val
   750  		}
   751  		return vals, nil
   752  	default:
   753  		return nil, fmt.Errorf("value type should be []string but is %T", value)
   754  	}
   755  }
   756  
   757  func (s *Searcher) extractIntArray(value interface{}) ([]int, error) {
   758  	switch v := value.(type) {
   759  	case []int:
   760  		return v, nil
   761  	case []interface{}:
   762  		vals := make([]int, len(v))
   763  		for i := range v {
   764  			// in this case all number values are unmarshalled to float64, so we need to cast to float64
   765  			// and then make int
   766  			val, ok := v[i].(float64)
   767  			if !ok {
   768  				return nil, fmt.Errorf("value[%d] type should be float64 but is %T", i, v[i])
   769  			}
   770  			vals[i] = int(val)
   771  		}
   772  		return vals, nil
   773  	default:
   774  		return nil, fmt.Errorf("value type should be []int but is %T", value)
   775  	}
   776  }
   777  
   778  func (s *Searcher) extractFloat64Array(value interface{}) ([]float64, error) {
   779  	switch v := value.(type) {
   780  	case []float64:
   781  		return v, nil
   782  	case []interface{}:
   783  		vals := make([]float64, len(v))
   784  		for i := range v {
   785  			val, ok := v[i].(float64)
   786  			if !ok {
   787  				return nil, fmt.Errorf("value[%d] type should be float64 but is %T", i, v[i])
   788  			}
   789  			vals[i] = val
   790  		}
   791  		return vals, nil
   792  	default:
   793  		return nil, fmt.Errorf("value type should be []float64 but is %T", value)
   794  	}
   795  }
   796  
   797  func (s *Searcher) extractBoolArray(value interface{}) ([]bool, error) {
   798  	switch v := value.(type) {
   799  	case []bool:
   800  		return v, nil
   801  	case []interface{}:
   802  		vals := make([]bool, len(v))
   803  		for i := range v {
   804  			val, ok := v[i].(bool)
   805  			if !ok {
   806  				return nil, fmt.Errorf("value[%d] type should be bool but is %T", i, v[i])
   807  			}
   808  			vals[i] = val
   809  		}
   810  		return vals, nil
   811  	default:
   812  		return nil, fmt.Errorf("value type should be []bool but is %T", value)
   813  	}
   814  }
   815  
   816  func getContainsOperands[T any](propType schema.DataType, path *filters.Path, values []T) []filters.Clause {
   817  	operands := make([]filters.Clause, len(values))
   818  	for i := range values {
   819  		operands[i] = filters.Clause{
   820  			Operator: filters.OperatorEqual,
   821  			On:       path,
   822  			Value: &filters.Value{
   823  				Type:  propType,
   824  				Value: values[i],
   825  			},
   826  		}
   827  	}
   828  	return operands
   829  }
   830  
   831  type docIDsIterator interface {
   832  	Next() (uint64, bool)
   833  	Len() int
   834  }
   835  
   836  type sliceDocIDsIterator struct {
   837  	docIDs []uint64
   838  	pos    int
   839  }
   840  
   841  func newSliceDocIDsIterator(docIDs []uint64) docIDsIterator {
   842  	return &sliceDocIDsIterator{docIDs: docIDs, pos: 0}
   843  }
   844  
   845  func (it *sliceDocIDsIterator) Next() (uint64, bool) {
   846  	if it.pos >= len(it.docIDs) {
   847  		return 0, false
   848  	}
   849  	pos := it.pos
   850  	it.pos++
   851  	return it.docIDs[pos], true
   852  }
   853  
   854  func (it *sliceDocIDsIterator) Len() int {
   855  	return len(it.docIDs)
   856  }
   857  
   858  type docBitmap struct {
   859  	docIDs *sroar.Bitmap
   860  }
   861  
   862  // newUninitializedDocBitmap can be used whenever we can be sure that the first
   863  // user of the docBitmap will set or replace the bitmap, such as a row reader
   864  func newUninitializedDocBitmap() docBitmap {
   865  	return docBitmap{docIDs: nil}
   866  }
   867  
   868  func newDocBitmap() docBitmap {
   869  	return docBitmap{docIDs: sroar.NewBitmap()}
   870  }
   871  
   872  func (dbm *docBitmap) count() int {
   873  	if dbm.docIDs == nil {
   874  		return 0
   875  	}
   876  	return dbm.docIDs.GetCardinality()
   877  }
   878  
   879  func (dbm *docBitmap) IDs() []uint64 {
   880  	if dbm.docIDs == nil {
   881  		return []uint64{}
   882  	}
   883  	return dbm.docIDs.ToArray()
   884  }
   885  
   886  func (dbm *docBitmap) IDsWithLimit(limit int) []uint64 {
   887  	card := dbm.docIDs.GetCardinality()
   888  	if limit >= card {
   889  		return dbm.IDs()
   890  	}
   891  
   892  	out := make([]uint64, limit)
   893  	for i := range out {
   894  		// safe to ignore error, it can only error if the index is >= cardinality
   895  		// which we have already ruled out
   896  		out[i], _ = dbm.docIDs.Select(uint64(i))
   897  	}
   898  
   899  	return out
   900  }
   901  
   902  type docPointerWithScore struct {
   903  	id         uint64
   904  	frequency  float32
   905  	propLength float32
   906  }