github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/row_reader_roaring_set.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import ( 15 "bytes" 16 "context" 17 "fmt" 18 19 "github.com/weaviate/sroar" 20 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 21 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 22 "github.com/weaviate/weaviate/entities/filters" 23 ) 24 25 // RowReaderRoaringSet reads one or many row(s) depending on the specified 26 // operator 27 type RowReaderRoaringSet struct { 28 value []byte 29 operator filters.Operator 30 newCursor func() lsmkv.CursorRoaringSet 31 getter func(key []byte) (*sroar.Bitmap, error) 32 bitmapFactory *roaringset.BitmapFactory 33 } 34 35 // If keyOnly is set, the RowReaderRoaringSet will request key-only cursors 36 // wherever cursors are used, the specified value arguments in the 37 // ReadFn will always be empty 38 func NewRowReaderRoaringSet(bucket *lsmkv.Bucket, value []byte, operator filters.Operator, 39 keyOnly bool, bitmapFactory *roaringset.BitmapFactory, 40 ) *RowReaderRoaringSet { 41 getter := bucket.RoaringSetGet 42 newCursor := bucket.CursorRoaringSet 43 if keyOnly { 44 newCursor = bucket.CursorRoaringSetKeyOnly 45 } 46 47 return &RowReaderRoaringSet{ 48 value: value, 49 operator: operator, 50 newCursor: newCursor, 51 getter: getter, 52 bitmapFactory: bitmapFactory, 53 } 54 } 55 56 // ReadFn will be called 1..n times per match. This means it will also 57 // be called on a non-match, in this case v == empty bitmap. 58 // It is up to the caller to decide if that is an error case or not. 59 // 60 // Note that because what we are parsing is an inverted index row, it can 61 // sometimes become confusing what a key and value actually resembles. The 62 // variables k and v are the literal row key and value. So this means, the 63 // data-value as in "less than 17" where 17 would be the "value" is in the key 64 // variable "k". The value will contain bitmap with docIDs having value "k" 65 // 66 // The boolean return argument is a way to stop iteration (e.g. when a limit is 67 // reached) without producing an error. In normal operation always return true, 68 // if false is returned once, the loop is broken. 69 type ReadFn func(k []byte, v *sroar.Bitmap) (bool, error) 70 71 // Read a row using the specified ReadFn. If RowReader was created with 72 // keysOnly==true, the values argument in the readFn will always be nil on all 73 // requests involving cursors 74 func (rr *RowReaderRoaringSet) Read(ctx context.Context, readFn ReadFn) error { 75 switch rr.operator { 76 case filters.OperatorEqual, filters.OperatorIsNull: 77 return rr.equal(ctx, readFn) 78 case filters.OperatorNotEqual: 79 return rr.notEqual(ctx, readFn) 80 case filters.OperatorGreaterThan: 81 return rr.greaterThan(ctx, readFn, false) 82 case filters.OperatorGreaterThanEqual: 83 return rr.greaterThan(ctx, readFn, true) 84 case filters.OperatorLessThan: 85 return rr.lessThan(ctx, readFn, false) 86 case filters.OperatorLessThanEqual: 87 return rr.lessThan(ctx, readFn, true) 88 case filters.OperatorLike: 89 return rr.like(ctx, readFn) 90 default: 91 return fmt.Errorf("operator %v not supported", rr.operator) 92 } 93 } 94 95 // equal is a special case, as we don't need to iterate, but just read a single 96 // row 97 func (rr *RowReaderRoaringSet) equal(ctx context.Context, 98 readFn ReadFn, 99 ) error { 100 v, err := rr.equalHelper(ctx) 101 if err != nil { 102 return err 103 } 104 105 _, err = readFn(rr.value, v) 106 return err 107 } 108 109 func (rr *RowReaderRoaringSet) notEqual(ctx context.Context, 110 readFn ReadFn, 111 ) error { 112 v, err := rr.equalHelper(ctx) 113 if err != nil { 114 return err 115 } 116 117 inverted := rr.bitmapFactory.GetBitmap() 118 inverted.AndNot(v) 119 _, err = readFn(rr.value, inverted) 120 return err 121 } 122 123 // greaterThan reads from the specified value to the end. The first row is only 124 // included if allowEqual==true, otherwise it starts with the next one 125 func (rr *RowReaderRoaringSet) greaterThan(ctx context.Context, 126 readFn ReadFn, allowEqual bool, 127 ) error { 128 c := rr.newCursor() 129 defer c.Close() 130 131 for k, v := c.Seek(rr.value); k != nil; k, v = c.Next() { 132 if err := ctx.Err(); err != nil { 133 return err 134 } 135 136 if bytes.Equal(k, rr.value) && !allowEqual { 137 continue 138 } 139 140 if continueReading, err := readFn(k, v); err != nil { 141 return err 142 } else if !continueReading { 143 break 144 } 145 } 146 147 return nil 148 } 149 150 // lessThan reads from the very begging to the specified value. The last 151 // matching row is only included if allowEqual==true, otherwise it ends one 152 // prior to that. 153 func (rr *RowReaderRoaringSet) lessThan(ctx context.Context, 154 readFn ReadFn, allowEqual bool, 155 ) error { 156 c := rr.newCursor() 157 defer c.Close() 158 159 for k, v := c.First(); k != nil && bytes.Compare(k, rr.value) < 1; k, v = c.Next() { 160 if err := ctx.Err(); err != nil { 161 return err 162 } 163 164 if bytes.Equal(k, rr.value) && !allowEqual { 165 continue 166 } 167 168 if continueReading, err := readFn(k, v); err != nil { 169 return err 170 } else if !continueReading { 171 break 172 } 173 } 174 175 return nil 176 } 177 178 func (rr *RowReaderRoaringSet) like(ctx context.Context, 179 readFn ReadFn, 180 ) error { 181 like, err := parseLikeRegexp(rr.value) 182 if err != nil { 183 return fmt.Errorf("parse like value: %w", err) 184 } 185 186 c := rr.newCursor() 187 defer c.Close() 188 189 var ( 190 initialK []byte 191 initialV *sroar.Bitmap 192 likeMinLen int 193 ) 194 195 if like.optimizable { 196 initialK, initialV = c.Seek(like.min) 197 likeMinLen = len(like.min) 198 } else { 199 initialK, initialV = c.First() 200 } 201 202 for k, v := initialK, initialV; k != nil; k, v = c.Next() { 203 if err := ctx.Err(); err != nil { 204 return err 205 } 206 207 if like.optimizable { 208 // if the query is optimizable, i.e. it doesn't start with a wildcard, we 209 // can abort once we've moved past the point where the fixed characters 210 // no longer match 211 if len(k) < likeMinLen { 212 break 213 } 214 if bytes.Compare(like.min, k[:likeMinLen]) == -1 { 215 break 216 } 217 } 218 219 if !like.regexp.Match(k) { 220 continue 221 } 222 223 if continueReading, err := readFn(k, v); err != nil { 224 return err 225 } else if !continueReading { 226 break 227 } 228 } 229 230 return nil 231 } 232 233 // equalHelper exists, because the Equal and NotEqual operators share this functionality 234 func (rr *RowReaderRoaringSet) equalHelper(ctx context.Context) (*sroar.Bitmap, error) { 235 if err := ctx.Err(); err != nil { 236 return nil, err 237 } 238 239 return rr.getter(rr.value) 240 }