github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  	"path"
    20  	"sync"
    21  	"time"
    22  
    23  	enterrors "github.com/weaviate/weaviate/entities/errors"
    24  
    25  	"github.com/go-openapi/strfmt"
    26  	"github.com/pkg/errors"
    27  	"github.com/sirupsen/logrus"
    28  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    29  	"github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint"
    30  	"github.com/weaviate/weaviate/adapters/repos/db/indexcounter"
    31  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    32  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    33  	"github.com/weaviate/weaviate/adapters/repos/db/propertyspecific"
    34  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    35  	"github.com/weaviate/weaviate/adapters/repos/db/vector/flat"
    36  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw"
    37  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    38  	"github.com/weaviate/weaviate/adapters/repos/db/vector/noop"
    39  	"github.com/weaviate/weaviate/entities/additional"
    40  	"github.com/weaviate/weaviate/entities/aggregation"
    41  	"github.com/weaviate/weaviate/entities/backup"
    42  	"github.com/weaviate/weaviate/entities/cyclemanager"
    43  	"github.com/weaviate/weaviate/entities/filters"
    44  	"github.com/weaviate/weaviate/entities/models"
    45  	"github.com/weaviate/weaviate/entities/multi"
    46  	"github.com/weaviate/weaviate/entities/schema"
    47  	"github.com/weaviate/weaviate/entities/search"
    48  	"github.com/weaviate/weaviate/entities/searchparams"
    49  	"github.com/weaviate/weaviate/entities/storagestate"
    50  	"github.com/weaviate/weaviate/entities/storobj"
    51  	"github.com/weaviate/weaviate/entities/vectorindex"
    52  	"github.com/weaviate/weaviate/entities/vectorindex/common"
    53  	flatent "github.com/weaviate/weaviate/entities/vectorindex/flat"
    54  	hnswent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    55  	"github.com/weaviate/weaviate/usecases/monitoring"
    56  	"github.com/weaviate/weaviate/usecases/objects"
    57  	"github.com/weaviate/weaviate/usecases/replica"
    58  )
    59  
    60  const IdLockPoolSize = 128
    61  
    62  type ShardLike interface {
    63  	Index() *Index                                                                      // Get the parent index
    64  	Name() string                                                                       // Get the shard name
    65  	Store() *lsmkv.Store                                                                // Get the underlying store
    66  	NotifyReady()                                                                       // Set shard status to ready
    67  	GetStatus() storagestate.Status                                                     // Return the shard status
    68  	UpdateStatus(status string) error                                                   // Set shard status
    69  	FindUUIDs(ctx context.Context, filters *filters.LocalFilter) ([]strfmt.UUID, error) // Search and return document ids
    70  
    71  	Counter() *indexcounter.Counter
    72  	ObjectCount() int
    73  	ObjectCountAsync() int
    74  	GetPropertyLengthTracker() *inverted.JsonPropertyLengthTracker
    75  
    76  	PutObject(context.Context, *storobj.Object) error
    77  	PutObjectBatch(context.Context, []*storobj.Object) []error
    78  	ObjectByID(ctx context.Context, id strfmt.UUID, props search.SelectProperties, additional additional.Properties) (*storobj.Object, error)
    79  	Exists(ctx context.Context, id strfmt.UUID) (bool, error)
    80  	ObjectSearch(ctx context.Context, limit int, filters *filters.LocalFilter, keywordRanking *searchparams.KeywordRanking, sort []filters.Sort, cursor *filters.Cursor, additional additional.Properties) ([]*storobj.Object, []float32, error)
    81  	ObjectVectorSearch(ctx context.Context, searchVector []float32, targetVector string, targetDist float32, limit int, filters *filters.LocalFilter, sort []filters.Sort, groupBy *searchparams.GroupBy, additional additional.Properties) ([]*storobj.Object, []float32, error)
    82  	UpdateVectorIndexConfig(ctx context.Context, updated schema.VectorIndexConfig) error
    83  	UpdateVectorIndexConfigs(ctx context.Context, updated map[string]schema.VectorIndexConfig) error
    84  	AddReferencesBatch(ctx context.Context, refs objects.BatchReferences) []error
    85  	DeleteObjectBatch(ctx context.Context, ids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects // Delete many objects by id
    86  	DeleteObject(ctx context.Context, id strfmt.UUID) error                                           // Delete object by id
    87  	MultiObjectByID(ctx context.Context, query []multi.Identifier) ([]*storobj.Object, error)
    88  	ID() string // Get the shard id
    89  	drop() error
    90  	addIDProperty(ctx context.Context) error
    91  	addDimensionsProperty(ctx context.Context) error
    92  	addTimestampProperties(ctx context.Context) error
    93  	createPropertyIndex(ctx context.Context, prop *models.Property, eg *enterrors.ErrorGroupWrapper)
    94  	BeginBackup(ctx context.Context) error
    95  	ListBackupFiles(ctx context.Context, ret *backup.ShardDescriptor) error
    96  	resumeMaintenanceCycles(ctx context.Context) error
    97  	SetPropertyLengths(props []inverted.Property) error
    98  	AnalyzeObject(*storobj.Object) ([]inverted.Property, []inverted.NilProperty, error)
    99  
   100  	Aggregate(ctx context.Context, params aggregation.Params) (*aggregation.Result, error)
   101  	MergeObject(ctx context.Context, object objects.MergeDocument) error
   102  	Queue() *IndexQueue
   103  	Queues() map[string]*IndexQueue
   104  	Shutdown(context.Context) error // Shutdown the shard
   105  	// TODO tests only
   106  	ObjectList(ctx context.Context, limit int, sort []filters.Sort, cursor *filters.Cursor,
   107  		additional additional.Properties, className schema.ClassName) ([]*storobj.Object, error) // Search and return objects
   108  	WasDeleted(ctx context.Context, id strfmt.UUID) (bool, error) // Check if an object was deleted
   109  	VectorIndex() VectorIndex                                     // Get the vector index
   110  	VectorIndexes() map[string]VectorIndex                        // Get the vector indexes
   111  	hasTargetVectors() bool
   112  	// TODO tests only
   113  	Versioner() *shardVersioner // Get the shard versioner
   114  
   115  	isReadOnly() bool
   116  
   117  	preparePutObject(context.Context, string, *storobj.Object) replica.SimpleResponse
   118  	preparePutObjects(context.Context, string, []*storobj.Object) replica.SimpleResponse
   119  	prepareMergeObject(context.Context, string, *objects.MergeDocument) replica.SimpleResponse
   120  	prepareDeleteObject(context.Context, string, strfmt.UUID) replica.SimpleResponse
   121  	prepareDeleteObjects(context.Context, string, []strfmt.UUID, bool) replica.SimpleResponse
   122  	prepareAddReferences(context.Context, string, []objects.BatchReference) replica.SimpleResponse
   123  
   124  	commitReplication(context.Context, string, *backupMutex) interface{}
   125  	abortReplication(context.Context, string) replica.SimpleResponse
   126  	reinit(context.Context) error
   127  	filePutter(context.Context, string) (io.WriteCloser, error)
   128  
   129  	// TODO tests only
   130  	Dimensions() int // dim(vector)*number vectors
   131  	// TODO tests only
   132  	QuantizedDimensions(segments int) int
   133  	extendDimensionTrackerLSM(dimLength int, docID uint64) error
   134  	extendDimensionTrackerForVecLSM(dimLength int, docID uint64, vecName string) error
   135  	publishDimensionMetrics()
   136  
   137  	addToPropertySetBucket(bucket *lsmkv.Bucket, docID uint64, key []byte) error
   138  	addToPropertyMapBucket(bucket *lsmkv.Bucket, pair lsmkv.MapPair, key []byte) error
   139  	pairPropertyWithFrequency(docID uint64, freq, propLen float32) lsmkv.MapPair
   140  
   141  	setFallbackToSearchable(fallback bool)
   142  	addJobToQueue(job job)
   143  	uuidFromDocID(docID uint64) (strfmt.UUID, error)
   144  	batchDeleteObject(ctx context.Context, id strfmt.UUID) error
   145  	putObjectLSM(object *storobj.Object, idBytes []byte) (objectInsertStatus, error)
   146  	mutableMergeObjectLSM(merge objects.MergeDocument, idBytes []byte) (mutableMergeResult, error)
   147  	deleteFromPropertySetBucket(bucket *lsmkv.Bucket, docID uint64, key []byte) error
   148  	batchExtendInvertedIndexItemsLSMNoFrequency(b *lsmkv.Bucket, item inverted.MergeItem) error
   149  	updatePropertySpecificIndices(object *storobj.Object, status objectInsertStatus) error
   150  	updateVectorIndexIgnoreDelete(vector []float32, status objectInsertStatus) error
   151  	updateVectorIndexesIgnoreDelete(vectors map[string][]float32, status objectInsertStatus) error
   152  	hasGeoIndex() bool
   153  
   154  	Metrics() *Metrics
   155  }
   156  
   157  // Shard is the smallest completely-contained index unit. A shard manages
   158  // database files for all the objects it owns. How a shard is determined for a
   159  // target object (e.g. Murmur hash, etc.) is still open at this point
   160  type Shard struct {
   161  	index            *Index // a reference to the underlying index, which in turn contains schema information
   162  	queue            *IndexQueue
   163  	queues           map[string]*IndexQueue
   164  	name             string
   165  	store            *lsmkv.Store
   166  	counter          *indexcounter.Counter
   167  	indexCheckpoints *indexcheckpoint.Checkpoints
   168  	vectorIndex      VectorIndex
   169  	vectorIndexes    map[string]VectorIndex
   170  	metrics          *Metrics
   171  	promMetrics      *monitoring.PrometheusMetrics
   172  	propertyIndices  propertyspecific.Indices
   173  	propLenTracker   *inverted.JsonPropertyLengthTracker
   174  	versioner        *shardVersioner
   175  
   176  	status              storagestate.Status
   177  	statusLock          sync.Mutex
   178  	propertyIndicesLock sync.RWMutex
   179  	stopMetrics         chan struct{}
   180  
   181  	centralJobQueue chan job // reference to queue used by all shards
   182  
   183  	docIdLock []sync.Mutex
   184  	// replication
   185  	replicationMap pendingReplicaTasks
   186  
   187  	// Indicates whether searchable buckets should be used
   188  	// when filterable buckets are missing for text/text[] properties
   189  	// This can happen for db created before v1.19, where
   190  	// only map (now called searchable) buckets were created as inverted
   191  	// indexes for text/text[] props.
   192  	// Now roaring set (filterable) and map (searchable) buckets can
   193  	// coexists for text/text[] props, and by default both are enabled.
   194  	// So despite property's IndexFilterable and IndexSearchable settings
   195  	// being enabled, only searchable bucket exists
   196  	fallbackToSearchable bool
   197  
   198  	cycleCallbacks *shardCycleCallbacks
   199  	bitmapFactory  *roaringset.BitmapFactory
   200  }
   201  
   202  func NewShard(ctx context.Context, promMetrics *monitoring.PrometheusMetrics,
   203  	shardName string, index *Index, class *models.Class, jobQueueCh chan job,
   204  	indexCheckpoints *indexcheckpoint.Checkpoints,
   205  ) (*Shard, error) {
   206  	before := time.Now()
   207  	var err error
   208  	s := &Shard{
   209  		index:       index,
   210  		name:        shardName,
   211  		promMetrics: promMetrics,
   212  		metrics: NewMetrics(index.logger, promMetrics,
   213  			string(index.Config.ClassName), shardName),
   214  		stopMetrics:      make(chan struct{}),
   215  		replicationMap:   pendingReplicaTasks{Tasks: make(map[string]replicaTask, 32)},
   216  		centralJobQueue:  jobQueueCh,
   217  		indexCheckpoints: indexCheckpoints,
   218  	}
   219  	s.initCycleCallbacks()
   220  
   221  	s.docIdLock = make([]sync.Mutex, IdLockPoolSize)
   222  
   223  	defer s.metrics.ShardStartup(before)
   224  
   225  	_, err = os.Stat(s.path())
   226  	exists := false
   227  	if err == nil {
   228  		exists = true
   229  	}
   230  
   231  	if err := os.MkdirAll(s.path(), os.ModePerm); err != nil {
   232  		return nil, err
   233  	}
   234  
   235  	if err := s.initNonVector(ctx, class); err != nil {
   236  		return nil, errors.Wrapf(err, "init shard %q", s.ID())
   237  	}
   238  
   239  	if s.hasTargetVectors() {
   240  		if err := s.initTargetVectors(ctx); err != nil {
   241  			return nil, err
   242  		}
   243  		if err := s.initTargetQueues(); err != nil {
   244  			return nil, err
   245  		}
   246  	} else {
   247  		if err := s.initLegacyVector(ctx); err != nil {
   248  			return nil, err
   249  		}
   250  		if err := s.initLegacyQueue(); err != nil {
   251  			return nil, err
   252  		}
   253  	}
   254  
   255  	s.initDimensionTracking()
   256  
   257  	if asyncEnabled() {
   258  		f := func() {
   259  			// preload unindexed objects in the background
   260  			if s.hasTargetVectors() {
   261  				for targetVector, queue := range s.queues {
   262  					err := queue.PreloadShard(s)
   263  					if err != nil {
   264  						queue.Logger.WithError(err).Errorf("preload shard for target vector: %s", targetVector)
   265  					}
   266  				}
   267  			} else {
   268  				err := s.queue.PreloadShard(s)
   269  				if err != nil {
   270  					s.queue.Logger.WithError(err).Error("preload shard")
   271  				}
   272  			}
   273  		}
   274  		enterrors.GoWrapper(f, s.index.logger)
   275  	}
   276  	s.NotifyReady()
   277  
   278  	if exists {
   279  		s.index.logger.Printf("Completed loading shard %s in %s", s.ID(), time.Since(before))
   280  	} else {
   281  		s.index.logger.Printf("Created shard %s in %s", s.ID(), time.Since(before))
   282  	}
   283  	return s, nil
   284  }
   285  
   286  func (s *Shard) hasTargetVectors() bool {
   287  	return hasTargetVectors(s.index.vectorIndexUserConfig, s.index.vectorIndexUserConfigs)
   288  }
   289  
   290  // target vectors and legacy vector are (supposed to be) exclusive
   291  // method allows to distinguish which of them is configured for the class
   292  func hasTargetVectors(cfg schema.VectorIndexConfig, targetCfgs map[string]schema.VectorIndexConfig) bool {
   293  	return len(targetCfgs) != 0
   294  }
   295  
   296  func (s *Shard) initTargetVectors(ctx context.Context) error {
   297  	s.vectorIndexes = make(map[string]VectorIndex)
   298  	for targetVector, vectorIndexConfig := range s.index.vectorIndexUserConfigs {
   299  		vectorIndex, err := s.initVectorIndex(ctx, targetVector, vectorIndexConfig)
   300  		if err != nil {
   301  			return fmt.Errorf("cannot create vector index for %q: %w", targetVector, err)
   302  		}
   303  		s.vectorIndexes[targetVector] = vectorIndex
   304  	}
   305  	return nil
   306  }
   307  
   308  func (s *Shard) initTargetQueues() error {
   309  	s.queues = make(map[string]*IndexQueue)
   310  	for targetVector, vectorIndex := range s.vectorIndexes {
   311  		queue, err := NewIndexQueue(s.ID(), targetVector, s, vectorIndex, s.centralJobQueue,
   312  			s.indexCheckpoints, IndexQueueOptions{Logger: s.index.logger})
   313  		if err != nil {
   314  			return fmt.Errorf("cannot create index queue for %q: %w", targetVector, err)
   315  		}
   316  		s.queues[targetVector] = queue
   317  	}
   318  	return nil
   319  }
   320  
   321  func (s *Shard) initLegacyVector(ctx context.Context) error {
   322  	vectorindex, err := s.initVectorIndex(ctx, "", s.index.vectorIndexUserConfig)
   323  	if err != nil {
   324  		return err
   325  	}
   326  	s.vectorIndex = vectorindex
   327  	return nil
   328  }
   329  
   330  func (s *Shard) initLegacyQueue() error {
   331  	queue, err := NewIndexQueue(s.ID(), "", s, s.vectorIndex, s.centralJobQueue,
   332  		s.indexCheckpoints, IndexQueueOptions{Logger: s.index.logger})
   333  	if err != nil {
   334  		return err
   335  	}
   336  	s.queue = queue
   337  	return nil
   338  }
   339  
   340  func (s *Shard) initVectorIndex(ctx context.Context,
   341  	targetVector string, vectorIndexUserConfig schema.VectorIndexConfig,
   342  ) (VectorIndex, error) {
   343  	var distProv distancer.Provider
   344  
   345  	switch vectorIndexUserConfig.DistanceName() {
   346  	case "", common.DistanceCosine:
   347  		distProv = distancer.NewCosineDistanceProvider()
   348  	case common.DistanceDot:
   349  		distProv = distancer.NewDotProductProvider()
   350  	case common.DistanceL2Squared:
   351  		distProv = distancer.NewL2SquaredProvider()
   352  	case common.DistanceManhattan:
   353  		distProv = distancer.NewManhattanProvider()
   354  	case common.DistanceHamming:
   355  		distProv = distancer.NewHammingProvider()
   356  	default:
   357  		return nil, fmt.Errorf("init vector index: %w",
   358  			errors.Errorf("unrecognized distance metric %q,"+
   359  				"choose one of [\"cosine\", \"dot\", \"l2-squared\", \"manhattan\",\"hamming\"]", vectorIndexUserConfig.DistanceName()))
   360  	}
   361  
   362  	var vectorIndex VectorIndex
   363  
   364  	switch vectorIndexUserConfig.IndexType() {
   365  	case vectorindex.VectorIndexTypeHNSW:
   366  		hnswUserConfig, ok := vectorIndexUserConfig.(hnswent.UserConfig)
   367  		if !ok {
   368  			return nil, errors.Errorf("hnsw vector index: config is not hnsw.UserConfig: %T",
   369  				vectorIndexUserConfig)
   370  		}
   371  
   372  		if hnswUserConfig.Skip {
   373  			vectorIndex = noop.NewIndex()
   374  		} else {
   375  			// starts vector cycles if vector is configured
   376  			s.index.cycleCallbacks.vectorCommitLoggerCycle.Start()
   377  			s.index.cycleCallbacks.vectorTombstoneCleanupCycle.Start()
   378  
   379  			// a shard can actually have multiple vector indexes:
   380  			// - the main index, which is used for all normal object vectors
   381  			// - a geo property index for each geo prop in the schema
   382  			//
   383  			// here we label the main vector index as such.
   384  			vecIdxID := s.vectorIndexID(targetVector)
   385  
   386  			vi, err := hnsw.New(hnsw.Config{
   387  				Logger:               s.index.logger,
   388  				RootPath:             s.path(),
   389  				ID:                   vecIdxID,
   390  				ShardName:            s.name,
   391  				ClassName:            s.index.Config.ClassName.String(),
   392  				PrometheusMetrics:    s.promMetrics,
   393  				VectorForIDThunk:     s.vectorByIndexID,
   394  				TempVectorForIDThunk: s.readVectorByIndexIDIntoSlice,
   395  				DistanceProvider:     distProv,
   396  				MakeCommitLoggerThunk: func() (hnsw.CommitLogger, error) {
   397  					return hnsw.NewCommitLogger(s.path(), vecIdxID,
   398  						s.index.logger, s.cycleCallbacks.vectorCommitLoggerCallbacks)
   399  				},
   400  			}, hnswUserConfig, s.cycleCallbacks.vectorTombstoneCleanupCallbacks,
   401  				s.cycleCallbacks.compactionCallbacks, s.cycleCallbacks.flushCallbacks, s.store)
   402  			if err != nil {
   403  				return nil, errors.Wrapf(err, "init shard %q: hnsw index", s.ID())
   404  			}
   405  			vectorIndex = vi
   406  		}
   407  	case vectorindex.VectorIndexTypeFLAT:
   408  		flatUserConfig, ok := vectorIndexUserConfig.(flatent.UserConfig)
   409  		if !ok {
   410  			return nil, errors.Errorf("flat vector index: config is not flat.UserConfig: %T",
   411  				vectorIndexUserConfig)
   412  		}
   413  		s.index.cycleCallbacks.vectorCommitLoggerCycle.Start()
   414  
   415  		// a shard can actually have multiple vector indexes:
   416  		// - the main index, which is used for all normal object vectors
   417  		// - a geo property index for each geo prop in the schema
   418  		//
   419  		// here we label the main vector index as such.
   420  		vecIdxID := s.vectorIndexID(targetVector)
   421  
   422  		vi, err := flat.New(flat.Config{
   423  			ID:               vecIdxID,
   424  			TargetVector:     targetVector,
   425  			Logger:           s.index.logger,
   426  			DistanceProvider: distProv,
   427  		}, flatUserConfig, s.store)
   428  		if err != nil {
   429  			return nil, errors.Wrapf(err, "init shard %q: flat index", s.ID())
   430  		}
   431  		vectorIndex = vi
   432  	default:
   433  		return nil, fmt.Errorf("Unknown vector index type: %q. Choose one from [\"%s\", \"%s\"]",
   434  			vectorIndexUserConfig.IndexType(), vectorindex.VectorIndexTypeHNSW, vectorindex.VectorIndexTypeFLAT)
   435  	}
   436  	defer vectorIndex.PostStartup()
   437  	return vectorIndex, nil
   438  }
   439  
   440  func (s *Shard) initNonVector(ctx context.Context, class *models.Class) error {
   441  	err := s.initLSMStore(ctx)
   442  	if err != nil {
   443  		return errors.Wrapf(err, "init shard %q: shard db", s.ID())
   444  	}
   445  
   446  	counter, err := indexcounter.New(s.path())
   447  	if err != nil {
   448  		return errors.Wrapf(err, "init shard %q: index counter", s.ID())
   449  	}
   450  	s.counter = counter
   451  	s.bitmapFactory = roaringset.NewBitmapFactory(s.counter.Get, s.index.logger)
   452  
   453  	dataPresent := s.counter.PreviewNext() != 0
   454  	versionPath := path.Join(s.path(), "version")
   455  	versioner, err := newShardVersioner(versionPath, dataPresent)
   456  	if err != nil {
   457  		return errors.Wrapf(err, "init shard %q: check versions", s.ID())
   458  	}
   459  	s.versioner = versioner
   460  
   461  	plPath := path.Join(s.path(), "proplengths")
   462  	tracker, err := inverted.NewJsonPropertyLengthTracker(plPath, s.index.logger)
   463  	if err != nil {
   464  		return errors.Wrapf(err, "init shard %q: prop length tracker", s.ID())
   465  	}
   466  
   467  	s.propLenTracker = tracker
   468  
   469  	if err := s.initProperties(class); err != nil {
   470  		return errors.Wrapf(err, "init shard %q: init per property indices", s.ID())
   471  	}
   472  
   473  	return nil
   474  }
   475  
   476  func (s *Shard) ID() string {
   477  	return shardId(s.index.ID(), s.name)
   478  }
   479  
   480  func (s *Shard) path() string {
   481  	return shardPath(s.index.path(), s.name)
   482  }
   483  
   484  func (s *Shard) pathLSM() string {
   485  	return path.Join(s.path(), "lsm")
   486  }
   487  
   488  func (s *Shard) vectorIndexID(targetVector string) string {
   489  	if targetVector != "" {
   490  		return fmt.Sprintf("vectors_%s", targetVector)
   491  	}
   492  	return "main"
   493  }
   494  
   495  func (s *Shard) uuidToIdLockPoolId(idBytes []byte) uint8 {
   496  	// use the last byte of the uuid to determine which locking-pool a given object should use. The last byte is used
   497  	// as uuids probably often have some kind of order and the last byte will in general be the one that changes the most
   498  	return idBytes[15] % IdLockPoolSize
   499  }
   500  
   501  func (s *Shard) initLSMStore(ctx context.Context) error {
   502  	annotatedLogger := s.index.logger.WithFields(logrus.Fields{
   503  		"shard": s.name,
   504  		"index": s.index.ID(),
   505  		"class": s.index.Config.ClassName,
   506  	})
   507  	var metrics *lsmkv.Metrics
   508  	if s.promMetrics != nil {
   509  		metrics = lsmkv.NewMetrics(s.promMetrics, string(s.index.Config.ClassName), s.name)
   510  	}
   511  
   512  	store, err := lsmkv.New(s.pathLSM(), s.path(), annotatedLogger, metrics,
   513  		s.cycleCallbacks.compactionCallbacks, s.cycleCallbacks.flushCallbacks)
   514  	if err != nil {
   515  		return errors.Wrapf(err, "init lsmkv store at %s", s.pathLSM())
   516  	}
   517  
   518  	opts := []lsmkv.BucketOption{
   519  		lsmkv.WithStrategy(lsmkv.StrategyReplace),
   520  		lsmkv.WithSecondaryIndices(1),
   521  		lsmkv.WithPread(s.index.Config.AvoidMMap),
   522  		lsmkv.WithKeepTombstones(true),
   523  		s.dynamicMemtableSizing(),
   524  		s.memtableDirtyConfig(),
   525  	}
   526  
   527  	if s.metrics != nil && !s.metrics.grouped {
   528  		// If metrics are grouped we cannot observe the count of an individual
   529  		// shard's object store because there is just a single metric. We would
   530  		// override it. See https://github.com/weaviate/weaviate/issues/4396 for
   531  		// details.
   532  		opts = append(opts, lsmkv.WithMonitorCount())
   533  	}
   534  	err = store.CreateOrLoadBucket(ctx, helpers.ObjectsBucketLSM, opts...)
   535  	if err != nil {
   536  		return errors.Wrap(err, "create objects bucket")
   537  	}
   538  
   539  	s.store = store
   540  
   541  	return nil
   542  }
   543  
   544  // IMPORTANT:
   545  // Be advised there exists LazyLoadShard::drop() implementation intended
   546  // to drop shard that was not loaded (instantiated) yet.
   547  // It deletes shard by performing required actions and removing entire shard directory.
   548  // If there is any action that needs to be performed beside files/dirs being removed
   549  // from shard directory, it needs to be reflected as well in LazyLoadShard::drop()
   550  // method to keep drop behaviour consistent.
   551  func (s *Shard) drop() error {
   552  	s.metrics.DeleteShardLabels(s.index.Config.ClassName.String(), s.name)
   553  	s.metrics.baseMetrics.StartUnloadingShard(s.index.Config.ClassName.String())
   554  	s.replicationMap.clear()
   555  
   556  	if s.index.Config.TrackVectorDimensions {
   557  		// tracking vector dimensions goroutine only works when tracking is enabled
   558  		// that's why we are trying to stop it only in this case
   559  		s.stopMetrics <- struct{}{}
   560  		// send 0 in when index gets dropped
   561  		s.clearDimensionMetrics()
   562  	}
   563  
   564  	ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second)
   565  	defer cancel()
   566  
   567  	// unregister all callbacks at once, in parallel
   568  	if err := cyclemanager.NewCombinedCallbackCtrl(0, s.index.logger,
   569  		s.cycleCallbacks.compactionCallbacksCtrl,
   570  		s.cycleCallbacks.flushCallbacksCtrl,
   571  		s.cycleCallbacks.vectorCombinedCallbacksCtrl,
   572  		s.cycleCallbacks.geoPropsCombinedCallbacksCtrl,
   573  	).Unregister(ctx); err != nil {
   574  		return err
   575  	}
   576  
   577  	if err := s.store.Shutdown(ctx); err != nil {
   578  		return errors.Wrap(err, "stop lsmkv store")
   579  	}
   580  
   581  	if _, err := os.Stat(s.pathLSM()); err == nil {
   582  		err := os.RemoveAll(s.pathLSM())
   583  		if err != nil {
   584  			return errors.Wrapf(err, "remove lsm store at %s", s.pathLSM())
   585  		}
   586  	}
   587  	// delete indexcount
   588  	err := s.counter.Drop()
   589  	if err != nil {
   590  		return errors.Wrapf(err, "remove indexcount at %s", s.path())
   591  	}
   592  
   593  	// delete version
   594  	err = s.versioner.Drop()
   595  	if err != nil {
   596  		return errors.Wrapf(err, "remove version at %s", s.path())
   597  	}
   598  
   599  	if s.hasTargetVectors() {
   600  		// TODO run in parallel?
   601  		for targetVector, queue := range s.queues {
   602  			if err = queue.Drop(); err != nil {
   603  				return fmt.Errorf("close queue of vector %q at %s: %w", targetVector, s.path(), err)
   604  			}
   605  		}
   606  		for targetVector, vectorIndex := range s.vectorIndexes {
   607  			if err = vectorIndex.Drop(ctx); err != nil {
   608  				return fmt.Errorf("remove vector index of vector %q at %s: %w", targetVector, s.path(), err)
   609  			}
   610  		}
   611  	} else {
   612  		// delete queue cursor
   613  		if err = s.queue.Drop(); err != nil {
   614  			return errors.Wrapf(err, "close queue at %s", s.path())
   615  		}
   616  		// remove vector index
   617  		if err = s.vectorIndex.Drop(ctx); err != nil {
   618  			return errors.Wrapf(err, "remove vector index at %s", s.path())
   619  		}
   620  	}
   621  
   622  	// delete property length tracker
   623  	err = s.GetPropertyLengthTracker().Drop()
   624  	if err != nil {
   625  		return errors.Wrapf(err, "remove prop length tracker at %s", s.path())
   626  	}
   627  
   628  	s.propertyIndicesLock.Lock()
   629  	err = s.propertyIndices.DropAll(ctx)
   630  	s.propertyIndicesLock.Unlock()
   631  	if err != nil {
   632  		return errors.Wrapf(err, "remove property specific indices at %s", s.path())
   633  	}
   634  
   635  	s.metrics.baseMetrics.FinishUnloadingShard(s.index.Config.ClassName.String())
   636  
   637  	return nil
   638  }
   639  
   640  func (s *Shard) addIDProperty(ctx context.Context) error {
   641  	if s.isReadOnly() {
   642  		return storagestate.ErrStatusReadOnly
   643  	}
   644  
   645  	return s.store.CreateOrLoadBucket(ctx,
   646  		helpers.BucketFromPropNameLSM(filters.InternalPropID),
   647  		s.memtableDirtyConfig(),
   648  		lsmkv.WithStrategy(lsmkv.StrategySetCollection),
   649  		lsmkv.WithPread(s.index.Config.AvoidMMap))
   650  }
   651  
   652  func (s *Shard) addDimensionsProperty(ctx context.Context) error {
   653  	if s.isReadOnly() {
   654  		return storagestate.ErrStatusReadOnly
   655  	}
   656  
   657  	// Note: this data would fit the "Set" type better, but since the "Map" type
   658  	// is currently optimized better, it is more efficient to use a Map here.
   659  	err := s.store.CreateOrLoadBucket(ctx,
   660  		helpers.DimensionsBucketLSM,
   661  		lsmkv.WithStrategy(lsmkv.StrategyMapCollection),
   662  		lsmkv.WithPread(s.index.Config.AvoidMMap))
   663  	if err != nil {
   664  		return err
   665  	}
   666  
   667  	return nil
   668  }
   669  
   670  func (s *Shard) addTimestampProperties(ctx context.Context) error {
   671  	if s.isReadOnly() {
   672  		return storagestate.ErrStatusReadOnly
   673  	}
   674  
   675  	if err := s.addCreationTimeUnixProperty(ctx); err != nil {
   676  		return err
   677  	}
   678  	if err := s.addLastUpdateTimeUnixProperty(ctx); err != nil {
   679  		return err
   680  	}
   681  
   682  	return nil
   683  }
   684  
   685  func (s *Shard) addCreationTimeUnixProperty(ctx context.Context) error {
   686  	return s.store.CreateOrLoadBucket(ctx,
   687  		helpers.BucketFromPropNameLSM(filters.InternalPropCreationTimeUnix),
   688  		s.memtableDirtyConfig(),
   689  		lsmkv.WithStrategy(lsmkv.StrategyRoaringSet),
   690  		lsmkv.WithPread(s.index.Config.AvoidMMap))
   691  }
   692  
   693  func (s *Shard) addLastUpdateTimeUnixProperty(ctx context.Context) error {
   694  	return s.store.CreateOrLoadBucket(ctx,
   695  		helpers.BucketFromPropNameLSM(filters.InternalPropLastUpdateTimeUnix),
   696  		s.memtableDirtyConfig(),
   697  		lsmkv.WithStrategy(lsmkv.StrategyRoaringSet),
   698  		lsmkv.WithPread(s.index.Config.AvoidMMap))
   699  }
   700  
   701  func (s *Shard) memtableDirtyConfig() lsmkv.BucketOption {
   702  	return lsmkv.WithDirtyThreshold(
   703  		time.Duration(s.index.Config.MemtablesFlushDirtyAfter) * time.Second)
   704  }
   705  
   706  func (s *Shard) dynamicMemtableSizing() lsmkv.BucketOption {
   707  	return lsmkv.WithDynamicMemtableSizing(
   708  		s.index.Config.MemtablesInitialSizeMB,
   709  		s.index.Config.MemtablesMaxSizeMB,
   710  		s.index.Config.MemtablesMinActiveSeconds,
   711  		s.index.Config.MemtablesMaxActiveSeconds,
   712  	)
   713  }
   714  
   715  func (s *Shard) createPropertyIndex(ctx context.Context, prop *models.Property, eg *enterrors.ErrorGroupWrapper) {
   716  	if !inverted.HasInvertedIndex(prop) {
   717  		return
   718  	}
   719  
   720  	eg.Go(func() error {
   721  		if err := s.createPropertyValueIndex(ctx, prop); err != nil {
   722  			return errors.Wrapf(err, "create property '%s' value index on shard '%s'", prop.Name, s.ID())
   723  		}
   724  
   725  		if s.index.invertedIndexConfig.IndexNullState {
   726  			eg.Go(func() error {
   727  				if err := s.createPropertyNullIndex(ctx, prop); err != nil {
   728  					return errors.Wrapf(err, "create property '%s' null index on shard '%s'", prop.Name, s.ID())
   729  				}
   730  				return nil
   731  			})
   732  		}
   733  
   734  		if s.index.invertedIndexConfig.IndexPropertyLength {
   735  			eg.Go(func() error {
   736  				if err := s.createPropertyLengthIndex(ctx, prop); err != nil {
   737  					return errors.Wrapf(err, "create property '%s' length index on shard '%s'", prop.Name, s.ID())
   738  				}
   739  				return nil
   740  			})
   741  		}
   742  
   743  		return nil
   744  	})
   745  }
   746  
   747  func (s *Shard) createPropertyValueIndex(ctx context.Context, prop *models.Property) error {
   748  	if s.isReadOnly() {
   749  		return storagestate.ErrStatusReadOnly
   750  	}
   751  
   752  	bucketOpts := []lsmkv.BucketOption{
   753  		s.memtableDirtyConfig(),
   754  		s.dynamicMemtableSizing(),
   755  		lsmkv.WithPread(s.index.Config.AvoidMMap),
   756  	}
   757  
   758  	if inverted.HasFilterableIndex(prop) {
   759  		if dt, _ := schema.AsPrimitive(prop.DataType); dt == schema.DataTypeGeoCoordinates {
   760  			return s.initGeoProp(prop)
   761  		}
   762  
   763  		if schema.IsRefDataType(prop.DataType) {
   764  			if err := s.store.CreateOrLoadBucket(ctx,
   765  				helpers.BucketFromPropNameMetaCountLSM(prop.Name),
   766  				append(bucketOpts, lsmkv.WithStrategy(lsmkv.StrategyRoaringSet))...,
   767  			); err != nil {
   768  				return err
   769  			}
   770  		}
   771  
   772  		if err := s.store.CreateOrLoadBucket(ctx,
   773  			helpers.BucketFromPropNameLSM(prop.Name),
   774  			append(bucketOpts, lsmkv.WithStrategy(lsmkv.StrategyRoaringSet))...,
   775  		); err != nil {
   776  			return err
   777  		}
   778  	}
   779  
   780  	if inverted.HasSearchableIndex(prop) {
   781  		searchableBucketOpts := append(bucketOpts,
   782  			lsmkv.WithStrategy(lsmkv.StrategyMapCollection), lsmkv.WithPread(s.index.Config.AvoidMMap))
   783  		if s.versioner.Version() < 2 {
   784  			searchableBucketOpts = append(searchableBucketOpts, lsmkv.WithLegacyMapSorting())
   785  		}
   786  
   787  		if err := s.store.CreateOrLoadBucket(ctx,
   788  			helpers.BucketSearchableFromPropNameLSM(prop.Name),
   789  			searchableBucketOpts...,
   790  		); err != nil {
   791  			return err
   792  		}
   793  	}
   794  
   795  	return nil
   796  }
   797  
   798  func (s *Shard) createPropertyLengthIndex(ctx context.Context, prop *models.Property) error {
   799  	if s.isReadOnly() {
   800  		return storagestate.ErrStatusReadOnly
   801  	}
   802  
   803  	// some datatypes are not added to the inverted index, so we can skip them here
   804  	switch schema.DataType(prop.DataType[0]) {
   805  	case schema.DataTypeGeoCoordinates, schema.DataTypePhoneNumber, schema.DataTypeBlob, schema.DataTypeInt,
   806  		schema.DataTypeNumber, schema.DataTypeBoolean, schema.DataTypeDate:
   807  		return nil
   808  	default:
   809  	}
   810  
   811  	return s.store.CreateOrLoadBucket(ctx,
   812  		helpers.BucketFromPropNameLengthLSM(prop.Name),
   813  		lsmkv.WithStrategy(lsmkv.StrategyRoaringSet),
   814  		lsmkv.WithPread(s.index.Config.AvoidMMap))
   815  }
   816  
   817  func (s *Shard) createPropertyNullIndex(ctx context.Context, prop *models.Property) error {
   818  	if s.isReadOnly() {
   819  		return storagestate.ErrStatusReadOnly
   820  	}
   821  
   822  	return s.store.CreateOrLoadBucket(ctx,
   823  		helpers.BucketFromPropNameNullLSM(prop.Name),
   824  		lsmkv.WithStrategy(lsmkv.StrategyRoaringSet),
   825  		lsmkv.WithPread(s.index.Config.AvoidMMap))
   826  }
   827  
   828  func (s *Shard) UpdateVectorIndexConfig(ctx context.Context, updated schema.VectorIndexConfig) error {
   829  	if s.isReadOnly() {
   830  		return storagestate.ErrStatusReadOnly
   831  	}
   832  
   833  	err := s.UpdateStatus(storagestate.StatusReadOnly.String())
   834  	if err != nil {
   835  		return fmt.Errorf("attempt to mark read-only: %w", err)
   836  	}
   837  
   838  	return s.VectorIndex().UpdateUserConfig(updated, func() {
   839  		s.UpdateStatus(storagestate.StatusReady.String())
   840  	})
   841  }
   842  
   843  func (s *Shard) UpdateVectorIndexConfigs(ctx context.Context, updated map[string]schema.VectorIndexConfig) error {
   844  	if s.isReadOnly() {
   845  		return storagestate.ErrStatusReadOnly
   846  	}
   847  	if err := s.UpdateStatus(storagestate.StatusReadOnly.String()); err != nil {
   848  		return fmt.Errorf("attempt to mark read-only: %w", err)
   849  	}
   850  
   851  	wg := new(sync.WaitGroup)
   852  	var err error
   853  	for targetName, targetCfg := range updated {
   854  		wg.Add(1)
   855  		if err = s.VectorIndexForName(targetName).UpdateUserConfig(targetCfg, wg.Done); err != nil {
   856  			break
   857  		}
   858  	}
   859  
   860  	f := func() {
   861  		wg.Wait()
   862  		s.UpdateStatus(storagestate.StatusReady.String())
   863  	}
   864  	enterrors.GoWrapper(f, s.index.logger)
   865  
   866  	return err
   867  }
   868  
   869  func (s *Shard) Shutdown(ctx context.Context) error {
   870  	if s.index.Config.TrackVectorDimensions {
   871  		// tracking vector dimensions goroutine only works when tracking is enabled
   872  		// that's why we are trying to stop it only in this case
   873  		s.stopMetrics <- struct{}{}
   874  	}
   875  
   876  	var err error
   877  	if err = s.GetPropertyLengthTracker().Close(); err != nil {
   878  		return errors.Wrap(err, "close prop length tracker")
   879  	}
   880  
   881  	if s.hasTargetVectors() {
   882  		// TODO run in parallel?
   883  		for targetVector, queue := range s.queues {
   884  			if err = queue.Close(); err != nil {
   885  				return fmt.Errorf("shut down vector index queue of vector %q: %w", targetVector, err)
   886  			}
   887  		}
   888  		for targetVector, vectorIndex := range s.vectorIndexes {
   889  			if err = vectorIndex.Flush(); err != nil {
   890  				return fmt.Errorf("flush vector index commitlog of vector %q: %w", targetVector, err)
   891  			}
   892  			if err = vectorIndex.Shutdown(ctx); err != nil {
   893  				return fmt.Errorf("shut down vector index of vector %q: %w", targetVector, err)
   894  			}
   895  		}
   896  	} else {
   897  		if err = s.queue.Close(); err != nil {
   898  			return errors.Wrap(err, "shut down vector index queue")
   899  		}
   900  		// to ensure that all commitlog entries are written to disk.
   901  		// otherwise in some cases the tombstone cleanup process'
   902  		// 'RemoveTombstone' entry is not picked up on restarts
   903  		// resulting in perpetually attempting to remove a tombstone
   904  		// which doesn't actually exist anymore
   905  		if err = s.vectorIndex.Flush(); err != nil {
   906  			return errors.Wrap(err, "flush vector index commitlog")
   907  		}
   908  		if err = s.vectorIndex.Shutdown(ctx); err != nil {
   909  			return errors.Wrap(err, "shut down vector index")
   910  		}
   911  	}
   912  
   913  	// unregister all callbacks at once, in parallel
   914  	if err = cyclemanager.NewCombinedCallbackCtrl(0, s.index.logger,
   915  		s.cycleCallbacks.compactionCallbacksCtrl,
   916  		s.cycleCallbacks.flushCallbacksCtrl,
   917  		s.cycleCallbacks.vectorCombinedCallbacksCtrl,
   918  		s.cycleCallbacks.geoPropsCombinedCallbacksCtrl,
   919  	).Unregister(ctx); err != nil {
   920  		return err
   921  	}
   922  
   923  	if err = s.store.Shutdown(ctx); err != nil {
   924  		return errors.Wrap(err, "stop lsmkv store")
   925  	}
   926  
   927  	return nil
   928  }
   929  
   930  func (s *Shard) NotifyReady() {
   931  	s.initStatus()
   932  	s.index.logger.
   933  		WithField("action", "startup").
   934  		Debugf("shard=%s is ready", s.name)
   935  }
   936  
   937  // ObjectCount returns the exact count at any moment
   938  func (s *Shard) ObjectCount() int {
   939  	b := s.store.Bucket(helpers.ObjectsBucketLSM)
   940  	if b == nil {
   941  		return 0
   942  	}
   943  
   944  	return b.Count()
   945  }
   946  
   947  // ObjectCountAsync returns the eventually consistent "async" count which is
   948  // much cheaper to obtain
   949  func (s *Shard) ObjectCountAsync() int {
   950  	b := s.store.Bucket(helpers.ObjectsBucketLSM)
   951  	if b == nil {
   952  		return 0
   953  	}
   954  
   955  	return b.CountAsync()
   956  }
   957  
   958  func (s *Shard) isFallbackToSearchable() bool {
   959  	return s.fallbackToSearchable
   960  }
   961  
   962  func (s *Shard) tenant() string {
   963  	// TODO provide better impl
   964  	if s.index.partitioningEnabled {
   965  		return s.name
   966  	}
   967  	return ""
   968  }
   969  
   970  func shardId(indexId, shardName string) string {
   971  	return fmt.Sprintf("%s_%s", indexId, shardName)
   972  }
   973  
   974  func shardPath(indexPath, shardName string) string {
   975  	return path.Join(indexPath, shardName)
   976  }
   977  
   978  func bucketKeyPropertyLength(length int) ([]byte, error) {
   979  	return inverted.LexicographicallySortableInt64(int64(length))
   980  }
   981  
   982  func bucketKeyPropertyNull(isNull bool) ([]byte, error) {
   983  	if isNull {
   984  		return []byte{uint8(filters.InternalNullState)}, nil
   985  	}
   986  	return []byte{uint8(filters.InternalNotNullState)}, nil
   987  }