github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/row_reader.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import ( 15 "bytes" 16 "context" 17 "encoding/binary" 18 "fmt" 19 20 "github.com/weaviate/sroar" 21 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 22 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 23 "github.com/weaviate/weaviate/entities/filters" 24 ) 25 26 // RowReader reads one or many row(s) depending on the specified operator 27 type RowReader struct { 28 value []byte 29 bucket *lsmkv.Bucket 30 operator filters.Operator 31 keyOnly bool 32 bitmapFactory *roaringset.BitmapFactory 33 } 34 35 // If keyOnly is set, the RowReader will request key-only cursors wherever 36 // cursors are used, the specified value arguments in the ReadFn will always be 37 // nil 38 func NewRowReader(bucket *lsmkv.Bucket, value []byte, operator filters.Operator, 39 keyOnly bool, bitmapFactory *roaringset.BitmapFactory, 40 ) *RowReader { 41 return &RowReader{ 42 bucket: bucket, 43 value: value, 44 operator: operator, 45 keyOnly: keyOnly, 46 bitmapFactory: bitmapFactory, 47 } 48 } 49 50 // Read a row using the specified ReadFn. If RowReader was created with 51 // keysOnly==true, the values argument in the readFn will always be nil on all 52 // requests involving cursors 53 func (rr *RowReader) Read(ctx context.Context, readFn ReadFn) error { 54 switch rr.operator { 55 case filters.OperatorEqual: 56 return rr.equal(ctx, readFn) 57 case filters.OperatorNotEqual: 58 return rr.notEqual(ctx, readFn) 59 case filters.OperatorGreaterThan: 60 return rr.greaterThan(ctx, readFn, false) 61 case filters.OperatorGreaterThanEqual: 62 return rr.greaterThan(ctx, readFn, true) 63 case filters.OperatorLessThan: 64 return rr.lessThan(ctx, readFn, false) 65 case filters.OperatorLessThanEqual: 66 return rr.lessThan(ctx, readFn, true) 67 case filters.OperatorLike: 68 return rr.like(ctx, readFn) 69 case filters.OperatorIsNull: // we need to fetch a row with a given value (there is only nil and !nil) and can reuse equal to get the correct row 70 return rr.equal(ctx, readFn) 71 default: 72 return fmt.Errorf("operator %v not supported", rr.operator) 73 } 74 } 75 76 // equal is a special case, as we don't need to iterate, but just read a single 77 // row 78 func (rr *RowReader) equal(ctx context.Context, readFn ReadFn) error { 79 v, err := rr.equalHelper(ctx) 80 if err != nil { 81 return err 82 } 83 84 _, err = readFn(rr.value, rr.transformToBitmap(v)) 85 return err 86 } 87 88 func (rr *RowReader) notEqual(ctx context.Context, readFn ReadFn) error { 89 v, err := rr.equalHelper(ctx) 90 if err != nil { 91 return err 92 } 93 94 // Invert the Equal results for an efficient NotEqual 95 inverted := rr.bitmapFactory.GetBitmap() 96 inverted.AndNot(rr.transformToBitmap(v)) 97 _, err = readFn(rr.value, inverted) 98 return err 99 } 100 101 // greaterThan reads from the specified value to the end. The first row is only 102 // included if allowEqual==true, otherwise it starts with the next one 103 func (rr *RowReader) greaterThan(ctx context.Context, readFn ReadFn, 104 allowEqual bool, 105 ) error { 106 c := rr.newCursor() 107 defer c.Close() 108 109 for k, v := c.Seek(rr.value); k != nil; k, v = c.Next() { 110 if err := ctx.Err(); err != nil { 111 return err 112 } 113 114 if bytes.Equal(k, rr.value) && !allowEqual { 115 continue 116 } 117 118 continueReading, err := readFn(k, rr.transformToBitmap(v)) 119 if err != nil { 120 return err 121 } 122 123 if !continueReading { 124 break 125 } 126 } 127 128 return nil 129 } 130 131 // lessThan reads from the very begging to the specified value. The last 132 // matching row is only included if allowEqual==true, otherwise it ends one 133 // prior to that. 134 func (rr *RowReader) lessThan(ctx context.Context, readFn ReadFn, 135 allowEqual bool, 136 ) error { 137 c := rr.newCursor() 138 defer c.Close() 139 140 for k, v := c.First(); k != nil && bytes.Compare(k, rr.value) != 1; k, v = c.Next() { 141 if err := ctx.Err(); err != nil { 142 return err 143 } 144 145 if bytes.Equal(k, rr.value) && !allowEqual { 146 continue 147 } 148 149 continueReading, err := readFn(k, rr.transformToBitmap(v)) 150 if err != nil { 151 return err 152 } 153 154 if !continueReading { 155 break 156 } 157 } 158 159 return nil 160 } 161 162 func (rr *RowReader) like(ctx context.Context, readFn ReadFn) error { 163 like, err := parseLikeRegexp(rr.value) 164 if err != nil { 165 return fmt.Errorf("parse like value: %w", err) 166 } 167 168 c := rr.newCursor() 169 defer c.Close() 170 171 var ( 172 initialK []byte 173 initialV [][]byte 174 ) 175 176 if like.optimizable { 177 initialK, initialV = c.Seek(like.min) 178 } else { 179 initialK, initialV = c.First() 180 } 181 182 for k, v := initialK, initialV; k != nil; k, v = c.Next() { 183 if err := ctx.Err(); err != nil { 184 return err 185 } 186 187 if like.optimizable { 188 // if the query is optimizable, i.e. it doesn't start with a wildcard, we 189 // can abort once we've moved past the point where the fixed characters 190 // no longer match 191 if len(k) < len(like.min) { 192 break 193 } 194 195 if bytes.Compare(like.min, k[:len(like.min)]) == -1 { 196 break 197 } 198 } 199 200 if !like.regexp.Match(k) { 201 continue 202 } 203 204 continueReading, err := readFn(k, rr.transformToBitmap(v)) 205 if err != nil { 206 return err 207 } 208 209 if !continueReading { 210 break 211 } 212 } 213 214 return nil 215 } 216 217 // newCursor will either return a regular cursor - or a key-only cursor if 218 // keyOnly==true 219 func (rr *RowReader) newCursor() *lsmkv.CursorSet { 220 if rr.keyOnly { 221 return rr.bucket.SetCursorKeyOnly() 222 } 223 224 return rr.bucket.SetCursor() 225 } 226 227 func (rr *RowReader) transformToBitmap(ids [][]byte) *sroar.Bitmap { 228 out := sroar.NewBitmap() 229 for _, asBytes := range ids { 230 out.Set(binary.LittleEndian.Uint64(asBytes)) 231 } 232 return out 233 } 234 235 // equalHelper exists, because the Equal and NotEqual operators share this functionality 236 func (rr *RowReader) equalHelper(ctx context.Context) ([][]byte, error) { 237 if err := ctx.Err(); err != nil { 238 return nil, err 239 } 240 241 v, err := rr.bucket.SetList(rr.value) 242 if err != nil { 243 return nil, err 244 } 245 return v, nil 246 }