github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/prop_value_pairs.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"strings"
    18  
    19  	"github.com/sirupsen/logrus"
    20  	enterrors "github.com/weaviate/weaviate/entities/errors"
    21  
    22  	"github.com/pkg/errors"
    23  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    24  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    25  	"github.com/weaviate/weaviate/entities/filters"
    26  	"github.com/weaviate/weaviate/entities/models"
    27  )
    28  
    29  type propValuePair struct {
    30  	prop     string
    31  	operator filters.Operator
    32  
    33  	// set for all values that can be served by an inverted index, i.e. anything
    34  	// that's not a geoRange
    35  	value []byte
    36  
    37  	// only set if operator=OperatorWithinGeoRange, as that cannot be served by a
    38  	// byte value from an inverted index
    39  	valueGeoRange      *filters.GeoRange
    40  	docIDs             docBitmap
    41  	children           []*propValuePair
    42  	hasFilterableIndex bool
    43  	hasSearchableIndex bool
    44  	Class              *models.Class // The schema
    45  	logger             logrus.FieldLogger
    46  }
    47  
    48  func newPropValuePair(class *models.Class, logger logrus.FieldLogger) (*propValuePair, error) {
    49  	if class == nil {
    50  		return nil, errors.Errorf("class must not be nil")
    51  	}
    52  	return &propValuePair{logger: logger, docIDs: newDocBitmap(), Class: class}, nil
    53  }
    54  
    55  func (pv *propValuePair) fetchDocIDs(s *Searcher, limit int) error {
    56  	if pv.operator.OnValue() {
    57  
    58  		// TODO text_rbm_inverted_index find better way check whether prop len
    59  		if strings.HasSuffix(pv.prop, filters.InternalPropertyLength) &&
    60  			!pv.Class.InvertedIndexConfig.IndexPropertyLength {
    61  			return errors.Errorf("Property length must be indexed to be filterable! add `IndexPropertyLength: true` to the invertedIndexConfig in %v.  Geo-coordinates, phone numbers and data blobs are not supported by property length.", pv.Class.Class)
    62  		}
    63  
    64  		if pv.operator == filters.OperatorIsNull && !pv.Class.InvertedIndexConfig.IndexNullState {
    65  			return errors.Errorf("Nullstate must be indexed to be filterable! Add `indexNullState: true` to the invertedIndexConfig")
    66  		}
    67  
    68  		if (pv.prop == filters.InternalPropCreationTimeUnix ||
    69  			pv.prop == filters.InternalPropLastUpdateTimeUnix) &&
    70  			!pv.Class.InvertedIndexConfig.IndexTimestamps {
    71  			return errors.Errorf("Timestamps must be indexed to be filterable! Add `IndexTimestamps: true` to the InvertedIndexConfig in %v", pv.Class.Class)
    72  		}
    73  
    74  		var bucketName string
    75  		if pv.hasFilterableIndex {
    76  			bucketName = helpers.BucketFromPropNameLSM(pv.prop)
    77  		} else if pv.hasSearchableIndex {
    78  			bucketName = helpers.BucketSearchableFromPropNameLSM(pv.prop)
    79  		} else {
    80  			return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop)
    81  		}
    82  
    83  		b := s.store.Bucket(bucketName)
    84  
    85  		// TODO:  I think we can delete this check entirely.  The bucket will never be nill, and routines should now check if their particular feature is active in the schema.  However, not all those routines have checks yet.
    86  		if b == nil && pv.operator != filters.OperatorWithinGeoRange {
    87  			// a nil bucket is ok for a WithinGeoRange filter, as this query is not
    88  			// served by the inverted index, but propagated to a secondary index in
    89  			// .docPointers()
    90  			return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop)
    91  		}
    92  
    93  		ctx := context.TODO() // TODO: pass through instead of spawning new
    94  		dbm, err := s.docBitmap(ctx, b, limit, pv)
    95  		if err != nil {
    96  			return err
    97  		}
    98  		pv.docIDs = dbm
    99  	} else {
   100  		eg := enterrors.NewErrorGroupWrapper(pv.logger)
   101  		// prevent unbounded concurrency, see
   102  		// https://github.com/weaviate/weaviate/issues/3179 for details
   103  		eg.SetLimit(2 * _NUMCPU)
   104  		for i, child := range pv.children {
   105  			i, child := i, child
   106  			eg.Go(func() error {
   107  				// Explicitly set the limit to 0 (=unlimited) as this is a nested filter,
   108  				// otherwise we run into situations where each subfilter on their own
   109  				// runs into the limit, possibly yielding in "less than limit" results
   110  				// after merging.
   111  				err := child.fetchDocIDs(s, 0)
   112  				if err != nil {
   113  					return errors.Wrapf(err, "nested child %d", i)
   114  				}
   115  
   116  				return nil
   117  			})
   118  		}
   119  		if err := eg.Wait(); err != nil {
   120  			return fmt.Errorf("nested query: %w", err)
   121  		}
   122  	}
   123  
   124  	return nil
   125  }
   126  
   127  func (pv *propValuePair) mergeDocIDs() (*docBitmap, error) {
   128  	if pv.operator.OnValue() {
   129  		return &pv.docIDs, nil
   130  	}
   131  
   132  	if pv.operator != filters.OperatorAnd && pv.operator != filters.OperatorOr {
   133  		return nil, fmt.Errorf("unsupported operator: %s", pv.operator.Name())
   134  	}
   135  	if len(pv.children) == 0 {
   136  		return nil, fmt.Errorf("no children for operator: %s", pv.operator.Name())
   137  	}
   138  
   139  	dbms := make([]*docBitmap, len(pv.children))
   140  	for i, child := range pv.children {
   141  		dbm, err := child.mergeDocIDs()
   142  		if err != nil {
   143  			return nil, errors.Wrapf(err, "retrieve doc bitmap of child %d", i)
   144  		}
   145  		dbms[i] = dbm
   146  	}
   147  
   148  	mergeRes := dbms[0].docIDs.Clone()
   149  	mergeFn := mergeRes.And
   150  	if pv.operator == filters.OperatorOr {
   151  		mergeFn = mergeRes.Or
   152  	}
   153  
   154  	for i := 1; i < len(dbms); i++ {
   155  		mergeFn(dbms[i].docIDs)
   156  	}
   157  
   158  	return &docBitmap{
   159  		docIDs: roaringset.Condense(mergeRes),
   160  	}, nil
   161  }