github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_write_inverted_lsm.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "encoding/binary" 16 "fmt" 17 "math" 18 19 "github.com/pkg/errors" 20 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 21 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 22 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 23 ) 24 25 func (s *Shard) extendInvertedIndicesLSM(props []inverted.Property, nilProps []inverted.NilProperty, 26 docID uint64, 27 ) error { 28 for _, prop := range props { 29 if err := s.addToPropertyValueIndex(docID, prop); err != nil { 30 return err 31 } 32 33 // add non-nil properties to the null-state inverted index, but skip internal properties (__meta_count, _id etc) 34 if isMetaCountProperty(prop) || isInternalProperty(prop) { 35 continue 36 } 37 38 // properties where defining a length does not make sense (floats etc.) have a negative entry as length 39 if s.index.invertedIndexConfig.IndexPropertyLength && prop.Length >= 0 { 40 if err := s.addToPropertyLengthIndex(prop.Name, docID, prop.Length); err != nil { 41 return errors.Wrap(err, "add indexed property length") 42 } 43 } 44 45 if s.index.invertedIndexConfig.IndexNullState { 46 if err := s.addToPropertyNullIndex(prop.Name, docID, prop.Length == 0); err != nil { 47 return errors.Wrap(err, "add indexed null state") 48 } 49 } 50 } 51 52 // add nil properties to the nullstate and property length inverted index 53 for _, nilProperty := range nilProps { 54 if s.index.invertedIndexConfig.IndexPropertyLength && nilProperty.AddToPropertyLength { 55 if err := s.addToPropertyLengthIndex(nilProperty.Name, docID, 0); err != nil { 56 return errors.Wrap(err, "add indexed property length") 57 } 58 } 59 60 if s.index.invertedIndexConfig.IndexNullState { 61 if err := s.addToPropertyNullIndex(nilProperty.Name, docID, true); err != nil { 62 return errors.Wrap(err, "add indexed null state") 63 } 64 } 65 } 66 67 return nil 68 } 69 70 func (s *Shard) addToPropertyValueIndex(docID uint64, property inverted.Property) error { 71 if property.HasFilterableIndex { 72 bucketValue := s.store.Bucket(helpers.BucketFromPropNameLSM(property.Name)) 73 if bucketValue == nil { 74 return errors.Errorf("no bucket for prop '%s' found", property.Name) 75 } 76 77 for _, item := range property.Items { 78 key := item.Data 79 if err := s.addToPropertySetBucket(bucketValue, docID, key); err != nil { 80 return errors.Wrapf(err, "failed adding to prop '%s' value bucket", property.Name) 81 } 82 } 83 } 84 85 if property.HasSearchableIndex { 86 bucketValue := s.store.Bucket(helpers.BucketSearchableFromPropNameLSM(property.Name)) 87 if bucketValue == nil { 88 return errors.Errorf("no bucket searchable for prop '%s' found", property.Name) 89 } 90 91 propLen := float32(len(property.Items)) 92 for _, item := range property.Items { 93 key := item.Data 94 pair := s.pairPropertyWithFrequency(docID, item.TermFrequency, propLen) 95 if err := s.addToPropertyMapBucket(bucketValue, pair, key); err != nil { 96 return errors.Wrapf(err, "failed adding to prop '%s' value bucket", property.Name) 97 } 98 } 99 } 100 101 return nil 102 } 103 104 func (s *Shard) addToPropertyLengthIndex(propName string, docID uint64, length int) error { 105 bucketLength := s.store.Bucket(helpers.BucketFromPropNameLengthLSM(propName)) 106 if bucketLength == nil { 107 return errors.Errorf("no bucket for prop '%s' length found", propName) 108 } 109 110 key, err := bucketKeyPropertyLength(length) 111 if err != nil { 112 return errors.Wrapf(err, "failed creating key for prop '%s' length", propName) 113 } 114 if err := s.addToPropertySetBucket(bucketLength, docID, key); err != nil { 115 return errors.Wrapf(err, "failed adding to prop '%s' length bucket", propName) 116 } 117 return nil 118 } 119 120 func (s *Shard) addToPropertyNullIndex(propName string, docID uint64, isNull bool) error { 121 bucketNull := s.store.Bucket(helpers.BucketFromPropNameNullLSM(propName)) 122 if bucketNull == nil { 123 return errors.Errorf("no bucket for prop '%s' null found", propName) 124 } 125 126 key, err := bucketKeyPropertyNull(isNull) 127 if err != nil { 128 return errors.Wrapf(err, "failed creating key for prop '%s' null", propName) 129 } 130 if err := s.addToPropertySetBucket(bucketNull, docID, key); err != nil { 131 return errors.Wrapf(err, "failed adding to prop '%s' null bucket", propName) 132 } 133 return nil 134 } 135 136 func (s *Shard) pairPropertyWithFrequency(docID uint64, freq, propLen float32) lsmkv.MapPair { 137 // 8 bytes for doc id, 4 bytes for frequency, 4 bytes for prop term length 138 buf := make([]byte, 16) 139 140 // Shard Index version 2 requires BigEndian for sorting, if the shard was 141 // built prior assume it uses LittleEndian 142 if s.versioner.Version() < 2 { 143 binary.LittleEndian.PutUint64(buf[0:8], docID) 144 } else { 145 binary.BigEndian.PutUint64(buf[0:8], docID) 146 } 147 binary.LittleEndian.PutUint32(buf[8:12], math.Float32bits(freq)) 148 binary.LittleEndian.PutUint32(buf[12:16], math.Float32bits(propLen)) 149 150 return lsmkv.MapPair{ 151 Key: buf[:8], 152 Value: buf[8:], 153 } 154 } 155 156 func (s *Shard) addToPropertyMapBucket(bucket *lsmkv.Bucket, pair lsmkv.MapPair, key []byte) error { 157 lsmkv.CheckExpectedStrategy(bucket.Strategy(), lsmkv.StrategyMapCollection) 158 159 return bucket.MapSet(key, pair) 160 } 161 162 func (s *Shard) addToPropertySetBucket(bucket *lsmkv.Bucket, docID uint64, key []byte) error { 163 lsmkv.CheckExpectedStrategy(bucket.Strategy(), lsmkv.StrategySetCollection, lsmkv.StrategyRoaringSet) 164 165 if bucket.Strategy() == lsmkv.StrategySetCollection { 166 docIDBytes := make([]byte, 8) 167 binary.LittleEndian.PutUint64(docIDBytes, docID) 168 169 return bucket.SetAdd(key, [][]byte{docIDBytes}) 170 } 171 172 return bucket.RoaringSetAddOne(key, docID) 173 } 174 175 func (s *Shard) batchExtendInvertedIndexItemsLSMNoFrequency(b *lsmkv.Bucket, 176 item inverted.MergeItem, 177 ) error { 178 if b.Strategy() != lsmkv.StrategySetCollection && b.Strategy() != lsmkv.StrategyRoaringSet { 179 panic("prop has no frequency, but bucket does not have 'Set' nor 'RoaringSet' strategy") 180 } 181 182 if b.Strategy() == lsmkv.StrategyRoaringSet { 183 docIDs := make([]uint64, len(item.DocIDs)) 184 for i, idTuple := range item.DocIDs { 185 docIDs[i] = idTuple.DocID 186 } 187 return b.RoaringSetAddList(item.Data, docIDs) 188 } 189 190 docIDs := make([][]byte, len(item.DocIDs)) 191 for i, idTuple := range item.DocIDs { 192 docIDs[i] = make([]byte, 8) 193 binary.LittleEndian.PutUint64(docIDs[i], idTuple.DocID) 194 } 195 196 return b.SetAdd(item.Data, docIDs) 197 } 198 199 func (s *Shard) SetPropertyLengths(props []inverted.Property) error { 200 for _, prop := range props { 201 if !prop.HasSearchableIndex { 202 continue 203 } 204 205 if err := s.GetPropertyLengthTracker().TrackProperty(prop.Name, float32(len(prop.Items))); err != nil { 206 return err 207 } 208 209 } 210 211 return nil 212 } 213 214 func (s *Shard) subtractPropLengths(props []inverted.Property) error { 215 for _, prop := range props { 216 if !prop.HasSearchableIndex { 217 continue 218 } 219 220 if err := s.GetPropertyLengthTracker().UnTrackProperty(prop.Name, float32(len(prop.Items))); err != nil { 221 return err 222 } 223 224 } 225 226 return nil 227 } 228 229 func (s *Shard) extendDimensionTrackerLSM( 230 dimLength int, docID uint64, 231 ) error { 232 return s.addToDimensionBucket(dimLength, docID, "", false) 233 } 234 235 func (s *Shard) extendDimensionTrackerForVecLSM( 236 dimLength int, docID uint64, vecName string, 237 ) error { 238 if vecName == "" { 239 return fmt.Errorf("vector name can not be empty") 240 } 241 return s.addToDimensionBucket(dimLength, docID, vecName, false) 242 } 243 244 // Key (dimensionality) | Value Doc IDs 245 // 128 | 1,2,4,5,17 246 // 128 | 1,2,4,5,17, Tombstone 4, 247 248 func (s *Shard) removeDimensionsLSM( 249 dimLength int, docID uint64, 250 ) error { 251 return s.addToDimensionBucket(dimLength, docID, "", true) 252 } 253 254 func (s *Shard) removeDimensionsForVecLSM( 255 dimLength int, docID uint64, vecName string, 256 ) error { 257 if vecName == "" { 258 return fmt.Errorf("vector name can not be empty") 259 } 260 return s.addToDimensionBucket(dimLength, docID, vecName, true) 261 } 262 263 func (s *Shard) addToDimensionBucket( 264 dimLength int, docID uint64, vecName string, tombstone bool, 265 ) error { 266 b := s.store.Bucket(helpers.DimensionsBucketLSM) 267 if b == nil { 268 return errors.Errorf("no bucket dimensions") 269 } 270 271 tv := []byte(vecName) 272 // 8 bytes for doc id (map key) 273 // 4 bytes for dim count (row key) 274 // len(vecName) bytes for vector name (prefix of row key) 275 buf := make([]byte, 12+len(tv)) 276 binary.LittleEndian.PutUint64(buf[:8], docID) 277 binary.LittleEndian.PutUint32(buf[8+len(tv):], uint32(dimLength)) 278 copy(buf[8:], tv) 279 280 return b.MapSet(buf[8:], lsmkv.MapPair{ 281 Key: buf[:8], 282 Value: []byte{}, 283 Tombstone: tombstone, 284 }) 285 } 286 287 func isMetaCountProperty(property inverted.Property) bool { 288 return len(property.Name) > 12 && property.Name[len(property.Name)-12:] == "__meta_count" 289 } 290 291 func isInternalProperty(property inverted.Property) bool { 292 return property.Name[0] == '_' 293 }