github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/prop_value_pairs.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import ( 15 "context" 16 "fmt" 17 "strings" 18 19 "github.com/sirupsen/logrus" 20 enterrors "github.com/weaviate/weaviate/entities/errors" 21 22 "github.com/pkg/errors" 23 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 24 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 25 "github.com/weaviate/weaviate/entities/filters" 26 "github.com/weaviate/weaviate/entities/models" 27 ) 28 29 type propValuePair struct { 30 prop string 31 operator filters.Operator 32 33 // set for all values that can be served by an inverted index, i.e. anything 34 // that's not a geoRange 35 value []byte 36 37 // only set if operator=OperatorWithinGeoRange, as that cannot be served by a 38 // byte value from an inverted index 39 valueGeoRange *filters.GeoRange 40 docIDs docBitmap 41 children []*propValuePair 42 hasFilterableIndex bool 43 hasSearchableIndex bool 44 Class *models.Class // The schema 45 logger logrus.FieldLogger 46 } 47 48 func newPropValuePair(class *models.Class, logger logrus.FieldLogger) (*propValuePair, error) { 49 if class == nil { 50 return nil, errors.Errorf("class must not be nil") 51 } 52 return &propValuePair{logger: logger, docIDs: newDocBitmap(), Class: class}, nil 53 } 54 55 func (pv *propValuePair) fetchDocIDs(s *Searcher, limit int) error { 56 if pv.operator.OnValue() { 57 58 // TODO text_rbm_inverted_index find better way check whether prop len 59 if strings.HasSuffix(pv.prop, filters.InternalPropertyLength) && 60 !pv.Class.InvertedIndexConfig.IndexPropertyLength { 61 return errors.Errorf("Property length must be indexed to be filterable! add `IndexPropertyLength: true` to the invertedIndexConfig in %v. Geo-coordinates, phone numbers and data blobs are not supported by property length.", pv.Class.Class) 62 } 63 64 if pv.operator == filters.OperatorIsNull && !pv.Class.InvertedIndexConfig.IndexNullState { 65 return errors.Errorf("Nullstate must be indexed to be filterable! Add `indexNullState: true` to the invertedIndexConfig") 66 } 67 68 if (pv.prop == filters.InternalPropCreationTimeUnix || 69 pv.prop == filters.InternalPropLastUpdateTimeUnix) && 70 !pv.Class.InvertedIndexConfig.IndexTimestamps { 71 return errors.Errorf("Timestamps must be indexed to be filterable! Add `IndexTimestamps: true` to the InvertedIndexConfig in %v", pv.Class.Class) 72 } 73 74 var bucketName string 75 if pv.hasFilterableIndex { 76 bucketName = helpers.BucketFromPropNameLSM(pv.prop) 77 } else if pv.hasSearchableIndex { 78 bucketName = helpers.BucketSearchableFromPropNameLSM(pv.prop) 79 } else { 80 return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop) 81 } 82 83 b := s.store.Bucket(bucketName) 84 85 // TODO: I think we can delete this check entirely. The bucket will never be nill, and routines should now check if their particular feature is active in the schema. However, not all those routines have checks yet. 86 if b == nil && pv.operator != filters.OperatorWithinGeoRange { 87 // a nil bucket is ok for a WithinGeoRange filter, as this query is not 88 // served by the inverted index, but propagated to a secondary index in 89 // .docPointers() 90 return errors.Errorf("bucket for prop %s not found - is it indexed?", pv.prop) 91 } 92 93 ctx := context.TODO() // TODO: pass through instead of spawning new 94 dbm, err := s.docBitmap(ctx, b, limit, pv) 95 if err != nil { 96 return err 97 } 98 pv.docIDs = dbm 99 } else { 100 eg := enterrors.NewErrorGroupWrapper(pv.logger) 101 // prevent unbounded concurrency, see 102 // https://github.com/weaviate/weaviate/issues/3179 for details 103 eg.SetLimit(2 * _NUMCPU) 104 for i, child := range pv.children { 105 i, child := i, child 106 eg.Go(func() error { 107 // Explicitly set the limit to 0 (=unlimited) as this is a nested filter, 108 // otherwise we run into situations where each subfilter on their own 109 // runs into the limit, possibly yielding in "less than limit" results 110 // after merging. 111 err := child.fetchDocIDs(s, 0) 112 if err != nil { 113 return errors.Wrapf(err, "nested child %d", i) 114 } 115 116 return nil 117 }) 118 } 119 if err := eg.Wait(); err != nil { 120 return fmt.Errorf("nested query: %w", err) 121 } 122 } 123 124 return nil 125 } 126 127 func (pv *propValuePair) mergeDocIDs() (*docBitmap, error) { 128 if pv.operator.OnValue() { 129 return &pv.docIDs, nil 130 } 131 132 if pv.operator != filters.OperatorAnd && pv.operator != filters.OperatorOr { 133 return nil, fmt.Errorf("unsupported operator: %s", pv.operator.Name()) 134 } 135 if len(pv.children) == 0 { 136 return nil, fmt.Errorf("no children for operator: %s", pv.operator.Name()) 137 } 138 139 dbms := make([]*docBitmap, len(pv.children)) 140 for i, child := range pv.children { 141 dbm, err := child.mergeDocIDs() 142 if err != nil { 143 return nil, errors.Wrapf(err, "retrieve doc bitmap of child %d", i) 144 } 145 dbms[i] = dbm 146 } 147 148 mergeRes := dbms[0].docIDs.Clone() 149 mergeFn := mergeRes.And 150 if pv.operator == filters.OperatorOr { 151 mergeFn = mergeRes.Or 152 } 153 154 for i := 1; i < len(dbms); i++ { 155 mergeFn(dbms[i].docIDs) 156 } 157 158 return &docBitmap{ 159 docIDs: roaringset.Condense(mergeRes), 160 }, nil 161 }