github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_write_inverted_lsm.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"encoding/binary"
    16  	"fmt"
    17  	"math"
    18  
    19  	"github.com/pkg/errors"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    22  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    23  )
    24  
    25  func (s *Shard) extendInvertedIndicesLSM(props []inverted.Property, nilProps []inverted.NilProperty,
    26  	docID uint64,
    27  ) error {
    28  	for _, prop := range props {
    29  		if err := s.addToPropertyValueIndex(docID, prop); err != nil {
    30  			return err
    31  		}
    32  
    33  		// add non-nil properties to the null-state inverted index, but skip internal properties (__meta_count, _id etc)
    34  		if isMetaCountProperty(prop) || isInternalProperty(prop) {
    35  			continue
    36  		}
    37  
    38  		// properties where defining a length does not make sense (floats etc.) have a negative entry as length
    39  		if s.index.invertedIndexConfig.IndexPropertyLength && prop.Length >= 0 {
    40  			if err := s.addToPropertyLengthIndex(prop.Name, docID, prop.Length); err != nil {
    41  				return errors.Wrap(err, "add indexed property length")
    42  			}
    43  		}
    44  
    45  		if s.index.invertedIndexConfig.IndexNullState {
    46  			if err := s.addToPropertyNullIndex(prop.Name, docID, prop.Length == 0); err != nil {
    47  				return errors.Wrap(err, "add indexed null state")
    48  			}
    49  		}
    50  	}
    51  
    52  	// add nil properties to the nullstate and property length inverted index
    53  	for _, nilProperty := range nilProps {
    54  		if s.index.invertedIndexConfig.IndexPropertyLength && nilProperty.AddToPropertyLength {
    55  			if err := s.addToPropertyLengthIndex(nilProperty.Name, docID, 0); err != nil {
    56  				return errors.Wrap(err, "add indexed property length")
    57  			}
    58  		}
    59  
    60  		if s.index.invertedIndexConfig.IndexNullState {
    61  			if err := s.addToPropertyNullIndex(nilProperty.Name, docID, true); err != nil {
    62  				return errors.Wrap(err, "add indexed null state")
    63  			}
    64  		}
    65  	}
    66  
    67  	return nil
    68  }
    69  
    70  func (s *Shard) addToPropertyValueIndex(docID uint64, property inverted.Property) error {
    71  	if property.HasFilterableIndex {
    72  		bucketValue := s.store.Bucket(helpers.BucketFromPropNameLSM(property.Name))
    73  		if bucketValue == nil {
    74  			return errors.Errorf("no bucket for prop '%s' found", property.Name)
    75  		}
    76  
    77  		for _, item := range property.Items {
    78  			key := item.Data
    79  			if err := s.addToPropertySetBucket(bucketValue, docID, key); err != nil {
    80  				return errors.Wrapf(err, "failed adding to prop '%s' value bucket", property.Name)
    81  			}
    82  		}
    83  	}
    84  
    85  	if property.HasSearchableIndex {
    86  		bucketValue := s.store.Bucket(helpers.BucketSearchableFromPropNameLSM(property.Name))
    87  		if bucketValue == nil {
    88  			return errors.Errorf("no bucket searchable for prop '%s' found", property.Name)
    89  		}
    90  
    91  		propLen := float32(len(property.Items))
    92  		for _, item := range property.Items {
    93  			key := item.Data
    94  			pair := s.pairPropertyWithFrequency(docID, item.TermFrequency, propLen)
    95  			if err := s.addToPropertyMapBucket(bucketValue, pair, key); err != nil {
    96  				return errors.Wrapf(err, "failed adding to prop '%s' value bucket", property.Name)
    97  			}
    98  		}
    99  	}
   100  
   101  	return nil
   102  }
   103  
   104  func (s *Shard) addToPropertyLengthIndex(propName string, docID uint64, length int) error {
   105  	bucketLength := s.store.Bucket(helpers.BucketFromPropNameLengthLSM(propName))
   106  	if bucketLength == nil {
   107  		return errors.Errorf("no bucket for prop '%s' length found", propName)
   108  	}
   109  
   110  	key, err := bucketKeyPropertyLength(length)
   111  	if err != nil {
   112  		return errors.Wrapf(err, "failed creating key for prop '%s' length", propName)
   113  	}
   114  	if err := s.addToPropertySetBucket(bucketLength, docID, key); err != nil {
   115  		return errors.Wrapf(err, "failed adding to prop '%s' length bucket", propName)
   116  	}
   117  	return nil
   118  }
   119  
   120  func (s *Shard) addToPropertyNullIndex(propName string, docID uint64, isNull bool) error {
   121  	bucketNull := s.store.Bucket(helpers.BucketFromPropNameNullLSM(propName))
   122  	if bucketNull == nil {
   123  		return errors.Errorf("no bucket for prop '%s' null found", propName)
   124  	}
   125  
   126  	key, err := bucketKeyPropertyNull(isNull)
   127  	if err != nil {
   128  		return errors.Wrapf(err, "failed creating key for prop '%s' null", propName)
   129  	}
   130  	if err := s.addToPropertySetBucket(bucketNull, docID, key); err != nil {
   131  		return errors.Wrapf(err, "failed adding to prop '%s' null bucket", propName)
   132  	}
   133  	return nil
   134  }
   135  
   136  func (s *Shard) pairPropertyWithFrequency(docID uint64, freq, propLen float32) lsmkv.MapPair {
   137  	// 8 bytes for doc id, 4 bytes for frequency, 4 bytes for prop term length
   138  	buf := make([]byte, 16)
   139  
   140  	// Shard Index version 2 requires BigEndian for sorting, if the shard was
   141  	// built prior assume it uses LittleEndian
   142  	if s.versioner.Version() < 2 {
   143  		binary.LittleEndian.PutUint64(buf[0:8], docID)
   144  	} else {
   145  		binary.BigEndian.PutUint64(buf[0:8], docID)
   146  	}
   147  	binary.LittleEndian.PutUint32(buf[8:12], math.Float32bits(freq))
   148  	binary.LittleEndian.PutUint32(buf[12:16], math.Float32bits(propLen))
   149  
   150  	return lsmkv.MapPair{
   151  		Key:   buf[:8],
   152  		Value: buf[8:],
   153  	}
   154  }
   155  
   156  func (s *Shard) addToPropertyMapBucket(bucket *lsmkv.Bucket, pair lsmkv.MapPair, key []byte) error {
   157  	lsmkv.CheckExpectedStrategy(bucket.Strategy(), lsmkv.StrategyMapCollection)
   158  
   159  	return bucket.MapSet(key, pair)
   160  }
   161  
   162  func (s *Shard) addToPropertySetBucket(bucket *lsmkv.Bucket, docID uint64, key []byte) error {
   163  	lsmkv.CheckExpectedStrategy(bucket.Strategy(), lsmkv.StrategySetCollection, lsmkv.StrategyRoaringSet)
   164  
   165  	if bucket.Strategy() == lsmkv.StrategySetCollection {
   166  		docIDBytes := make([]byte, 8)
   167  		binary.LittleEndian.PutUint64(docIDBytes, docID)
   168  
   169  		return bucket.SetAdd(key, [][]byte{docIDBytes})
   170  	}
   171  
   172  	return bucket.RoaringSetAddOne(key, docID)
   173  }
   174  
   175  func (s *Shard) batchExtendInvertedIndexItemsLSMNoFrequency(b *lsmkv.Bucket,
   176  	item inverted.MergeItem,
   177  ) error {
   178  	if b.Strategy() != lsmkv.StrategySetCollection && b.Strategy() != lsmkv.StrategyRoaringSet {
   179  		panic("prop has no frequency, but bucket does not have 'Set' nor 'RoaringSet' strategy")
   180  	}
   181  
   182  	if b.Strategy() == lsmkv.StrategyRoaringSet {
   183  		docIDs := make([]uint64, len(item.DocIDs))
   184  		for i, idTuple := range item.DocIDs {
   185  			docIDs[i] = idTuple.DocID
   186  		}
   187  		return b.RoaringSetAddList(item.Data, docIDs)
   188  	}
   189  
   190  	docIDs := make([][]byte, len(item.DocIDs))
   191  	for i, idTuple := range item.DocIDs {
   192  		docIDs[i] = make([]byte, 8)
   193  		binary.LittleEndian.PutUint64(docIDs[i], idTuple.DocID)
   194  	}
   195  
   196  	return b.SetAdd(item.Data, docIDs)
   197  }
   198  
   199  func (s *Shard) SetPropertyLengths(props []inverted.Property) error {
   200  	for _, prop := range props {
   201  		if !prop.HasSearchableIndex {
   202  			continue
   203  		}
   204  
   205  		if err := s.GetPropertyLengthTracker().TrackProperty(prop.Name, float32(len(prop.Items))); err != nil {
   206  			return err
   207  		}
   208  
   209  	}
   210  
   211  	return nil
   212  }
   213  
   214  func (s *Shard) subtractPropLengths(props []inverted.Property) error {
   215  	for _, prop := range props {
   216  		if !prop.HasSearchableIndex {
   217  			continue
   218  		}
   219  
   220  		if err := s.GetPropertyLengthTracker().UnTrackProperty(prop.Name, float32(len(prop.Items))); err != nil {
   221  			return err
   222  		}
   223  
   224  	}
   225  
   226  	return nil
   227  }
   228  
   229  func (s *Shard) extendDimensionTrackerLSM(
   230  	dimLength int, docID uint64,
   231  ) error {
   232  	return s.addToDimensionBucket(dimLength, docID, "", false)
   233  }
   234  
   235  func (s *Shard) extendDimensionTrackerForVecLSM(
   236  	dimLength int, docID uint64, vecName string,
   237  ) error {
   238  	if vecName == "" {
   239  		return fmt.Errorf("vector name can not be empty")
   240  	}
   241  	return s.addToDimensionBucket(dimLength, docID, vecName, false)
   242  }
   243  
   244  // Key (dimensionality) | Value Doc IDs
   245  // 128 | 1,2,4,5,17
   246  // 128 | 1,2,4,5,17, Tombstone 4,
   247  
   248  func (s *Shard) removeDimensionsLSM(
   249  	dimLength int, docID uint64,
   250  ) error {
   251  	return s.addToDimensionBucket(dimLength, docID, "", true)
   252  }
   253  
   254  func (s *Shard) removeDimensionsForVecLSM(
   255  	dimLength int, docID uint64, vecName string,
   256  ) error {
   257  	if vecName == "" {
   258  		return fmt.Errorf("vector name can not be empty")
   259  	}
   260  	return s.addToDimensionBucket(dimLength, docID, vecName, true)
   261  }
   262  
   263  func (s *Shard) addToDimensionBucket(
   264  	dimLength int, docID uint64, vecName string, tombstone bool,
   265  ) error {
   266  	b := s.store.Bucket(helpers.DimensionsBucketLSM)
   267  	if b == nil {
   268  		return errors.Errorf("no bucket dimensions")
   269  	}
   270  
   271  	tv := []byte(vecName)
   272  	// 8 bytes for doc id (map key)
   273  	// 4 bytes for dim count (row key)
   274  	// len(vecName) bytes for vector name (prefix of row key)
   275  	buf := make([]byte, 12+len(tv))
   276  	binary.LittleEndian.PutUint64(buf[:8], docID)
   277  	binary.LittleEndian.PutUint32(buf[8+len(tv):], uint32(dimLength))
   278  	copy(buf[8:], tv)
   279  
   280  	return b.MapSet(buf[8:], lsmkv.MapPair{
   281  		Key:       buf[:8],
   282  		Value:     []byte{},
   283  		Tombstone: tombstone,
   284  	})
   285  }
   286  
   287  func isMetaCountProperty(property inverted.Property) bool {
   288  	return len(property.Name) > 12 && property.Name[len(property.Name)-12:] == "__meta_count"
   289  }
   290  
   291  func isInternalProperty(property inverted.Property) bool {
   292  	return property.Name[0] == '_'
   293  }