github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/index.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"io"
    18  	"math"
    19  	"math/rand"
    20  	"strings"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"github.com/pkg/errors"
    25  	"github.com/sirupsen/logrus"
    26  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/priorityqueue"
    28  	"github.com/weaviate/weaviate/adapters/repos/db/vector/cache"
    29  	"github.com/weaviate/weaviate/adapters/repos/db/vector/common"
    30  	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
    31  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    32  	"github.com/weaviate/weaviate/entities/cyclemanager"
    33  	"github.com/weaviate/weaviate/entities/schema"
    34  	"github.com/weaviate/weaviate/entities/storobj"
    35  	ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    36  )
    37  
    38  type hnsw struct {
    39  	// global lock to prevent concurrent map read/write, etc.
    40  	sync.RWMutex
    41  
    42  	// certain operations related to deleting, such as finding a new entrypoint
    43  	// can only run sequentially, this separate lock helps assuring this without
    44  	// blocking the general usage of the hnsw index
    45  	deleteLock *sync.Mutex
    46  
    47  	tombstoneLock *sync.RWMutex
    48  
    49  	// prevents tombstones cleanup to be performed in parallel with index reset operation
    50  	resetLock *sync.Mutex
    51  	// indicates whether reset operation occurred or not - if so tombstones cleanup method
    52  	// is aborted as it makes no sense anymore
    53  	resetCtx       context.Context
    54  	resetCtxCancel context.CancelFunc
    55  
    56  	// indicates the index is shutting down
    57  	shutdownCtx       context.Context
    58  	shutdownCtxCancel context.CancelFunc
    59  
    60  	// make sure the very first insert happens just once, otherwise we
    61  	// accidentally overwrite previous entrypoints on parallel imports on an
    62  	// empty graph
    63  	initialInsertOnce *sync.Once
    64  
    65  	// Each node should not have more edges than this number
    66  	maximumConnections int
    67  
    68  	// Nodes in the lowest level have a separate (usually higher) max connection
    69  	// limit
    70  	maximumConnectionsLayerZero int
    71  
    72  	// the current maximum can be smaller than the configured maximum because of
    73  	// the exponentially decaying layer function. The initial entry is started at
    74  	// layer 0, but this has the chance to grow with every subsequent entry
    75  	currentMaximumLayer int
    76  
    77  	// this is a point on the highest level, if we insert a new point with a
    78  	// higher level it will become the new entry point. Note tat the level of
    79  	// this point is always currentMaximumLayer
    80  	entryPointID uint64
    81  
    82  	// ef parameter used in construction phases, should be higher than ef during querying
    83  	efConstruction int
    84  
    85  	// ef at search time
    86  	ef int64
    87  
    88  	// only used if ef=-1
    89  	efMin    int64
    90  	efMax    int64
    91  	efFactor int64
    92  
    93  	// on filtered searches with less than n elements, perform flat search
    94  	flatSearchCutoff int64
    95  
    96  	levelNormalizer float64
    97  
    98  	nodes []*vertex
    99  
   100  	vectorForID          common.VectorForID[float32]
   101  	TempVectorForIDThunk common.TempVectorForID
   102  	multiVectorForID     common.MultiVectorForID
   103  	trackDimensionsOnce  sync.Once
   104  	dims                 int32
   105  
   106  	cache cache.Cache[float32]
   107  
   108  	commitLog CommitLogger
   109  
   110  	// a lookup of current tombstones (i.e. nodes that have received a tombstone,
   111  	// but have not been cleaned up yet) Cleanup is the process of removal of all
   112  	// outgoing edges to the tombstone as well as deleting the tombstone itself.
   113  	// This process should happen periodically.
   114  	tombstones map[uint64]struct{}
   115  
   116  	tombstoneCleanupCallbackCtrl cyclemanager.CycleCallbackCtrl
   117  	shardCompactionCallbacks     cyclemanager.CycleCallbackGroup
   118  	shardFlushCallbacks          cyclemanager.CycleCallbackGroup
   119  
   120  	// // for distributed spike, can be used to call a insertExternal on a different graph
   121  	// insertHook func(node, targetLevel int, neighborsAtLevel map[int][]uint32)
   122  
   123  	id       string
   124  	rootPath string
   125  
   126  	logger            logrus.FieldLogger
   127  	distancerProvider distancer.Provider
   128  
   129  	pools *pools
   130  
   131  	forbidFlat bool // mostly used in testing scenarios where we want to use the index even in scenarios where we typically wouldn't
   132  
   133  	metrics       *Metrics
   134  	insertMetrics *insertMetrics
   135  
   136  	randFunc func() float64 // added to temporarily get rid on flakiness in tombstones related tests. to be removed after fixing WEAVIATE-179
   137  
   138  	// The deleteVsInsertLock makes sure that there are no concurrent delete and
   139  	// insert operations happening. It uses an RW-Mutex with:
   140  	//
   141  	// RLock -> Insert operations, this means any number of import operations can
   142  	// happen concurrently.
   143  	//
   144  	// Lock -> Delete operation. This means only a single delete operation can
   145  	// occur at a time, no insert operation can occur simultaneously with a
   146  	// delete. Since the delete is cheap (just marking the node as deleted), the
   147  	// single-threadedness of deletes is not a big problem.
   148  	//
   149  	// This lock was introduced as part of
   150  	// https://github.com/weaviate/weaviate/issues/2194
   151  	//
   152  	// See
   153  	// https://github.com/weaviate/weaviate/pull/2191#issuecomment-1242726787
   154  	// where we ran performance tests to make sure introducing this lock has no
   155  	// negative impact on performance.
   156  	deleteVsInsertLock sync.RWMutex
   157  
   158  	compressed   atomic.Bool
   159  	doNotRescore bool
   160  
   161  	compressor compressionhelpers.VectorCompressor
   162  	pqConfig   ent.PQConfig
   163  
   164  	compressActionLock *sync.RWMutex
   165  	className          string
   166  	shardName          string
   167  	VectorForIDThunk   common.VectorForID[float32]
   168  	shardedNodeLocks   *common.ShardedRWLocks
   169  	store              *lsmkv.Store
   170  }
   171  
   172  type CommitLogger interface {
   173  	ID() string
   174  	AddNode(node *vertex) error
   175  	SetEntryPointWithMaxLayer(id uint64, level int) error
   176  	AddLinkAtLevel(nodeid uint64, level int, target uint64) error
   177  	ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error
   178  	AddTombstone(nodeid uint64) error
   179  	RemoveTombstone(nodeid uint64) error
   180  	DeleteNode(nodeid uint64) error
   181  	ClearLinks(nodeid uint64) error
   182  	ClearLinksAtLevel(nodeid uint64, level uint16) error
   183  	Reset() error
   184  	Drop(ctx context.Context) error
   185  	Flush() error
   186  	Shutdown(ctx context.Context) error
   187  	RootPath() string
   188  	SwitchCommitLogs(bool) error
   189  	AddPQ(compressionhelpers.PQData) error
   190  }
   191  
   192  type BufferedLinksLogger interface {
   193  	AddLinkAtLevel(nodeid uint64, level int, target uint64) error
   194  	ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error
   195  	Close() error // Close should Flush and Close
   196  }
   197  
   198  type MakeCommitLogger func() (CommitLogger, error)
   199  
   200  // New creates a new HNSW index, the commit logger is provided through a thunk
   201  // (a function which can be deferred). This is because creating a commit logger
   202  // opens files for writing. However, checking whether a file is present, is a
   203  // criterium for the index to see if it has to recover from disk or if its a
   204  // truly new index. So instead the index is initialized, with un-biased disk
   205  // checks first and only then is the commit logger created
   206  func New(cfg Config, uc ent.UserConfig, tombstoneCallbacks, shardCompactionCallbacks,
   207  	shardFlushCallbacks cyclemanager.CycleCallbackGroup, store *lsmkv.Store,
   208  ) (*hnsw, error) {
   209  	if err := cfg.Validate(); err != nil {
   210  		return nil, errors.Wrap(err, "invalid config")
   211  	}
   212  
   213  	if cfg.Logger == nil {
   214  		logger := logrus.New()
   215  		logger.Out = io.Discard
   216  		cfg.Logger = logger
   217  	}
   218  
   219  	normalizeOnRead := false
   220  	if cfg.DistanceProvider.Type() == "cosine-dot" {
   221  		normalizeOnRead = true
   222  	}
   223  
   224  	vectorCache := cache.NewShardedFloat32LockCache(cfg.VectorForIDThunk, uc.VectorCacheMaxObjects,
   225  		cfg.Logger, normalizeOnRead, cache.DefaultDeletionInterval)
   226  
   227  	resetCtx, resetCtxCancel := context.WithCancel(context.Background())
   228  	shutdownCtx, shutdownCtxCancel := context.WithCancel(context.Background())
   229  	index := &hnsw{
   230  		maximumConnections: uc.MaxConnections,
   231  
   232  		// inspired by original paper and other implementations
   233  		maximumConnectionsLayerZero: 2 * uc.MaxConnections,
   234  
   235  		// inspired by c++ implementation
   236  		levelNormalizer:   1 / math.Log(float64(uc.MaxConnections)),
   237  		efConstruction:    uc.EFConstruction,
   238  		flatSearchCutoff:  int64(uc.FlatSearchCutoff),
   239  		nodes:             make([]*vertex, cache.InitialSize),
   240  		cache:             vectorCache,
   241  		vectorForID:       vectorCache.Get,
   242  		multiVectorForID:  vectorCache.MultiGet,
   243  		id:                cfg.ID,
   244  		rootPath:          cfg.RootPath,
   245  		tombstones:        map[uint64]struct{}{},
   246  		logger:            cfg.Logger,
   247  		distancerProvider: cfg.DistanceProvider,
   248  		deleteLock:        &sync.Mutex{},
   249  		tombstoneLock:     &sync.RWMutex{},
   250  		resetLock:         &sync.Mutex{},
   251  		resetCtx:          resetCtx,
   252  		resetCtxCancel:    resetCtxCancel,
   253  		shutdownCtx:       shutdownCtx,
   254  		shutdownCtxCancel: shutdownCtxCancel,
   255  		initialInsertOnce: &sync.Once{},
   256  
   257  		ef:       int64(uc.EF),
   258  		efMin:    int64(uc.DynamicEFMin),
   259  		efMax:    int64(uc.DynamicEFMax),
   260  		efFactor: int64(uc.DynamicEFFactor),
   261  
   262  		metrics:   NewMetrics(cfg.PrometheusMetrics, cfg.ClassName, cfg.ShardName),
   263  		shardName: cfg.ShardName,
   264  
   265  		randFunc:             rand.Float64,
   266  		compressActionLock:   &sync.RWMutex{},
   267  		className:            cfg.ClassName,
   268  		VectorForIDThunk:     cfg.VectorForIDThunk,
   269  		TempVectorForIDThunk: cfg.TempVectorForIDThunk,
   270  		pqConfig:             uc.PQ,
   271  		shardedNodeLocks:     common.NewDefaultShardedRWLocks(),
   272  
   273  		shardCompactionCallbacks: shardCompactionCallbacks,
   274  		shardFlushCallbacks:      shardFlushCallbacks,
   275  		store:                    store,
   276  	}
   277  
   278  	if uc.BQ.Enabled {
   279  		var err error
   280  		index.compressor, err = compressionhelpers.NewBQCompressor(index.distancerProvider, uc.VectorCacheMaxObjects, cfg.Logger, store)
   281  		if err != nil {
   282  			return nil, err
   283  		}
   284  		index.compressed.Store(true)
   285  		index.cache.Drop()
   286  		index.cache = nil
   287  	}
   288  
   289  	if err := index.init(cfg); err != nil {
   290  		return nil, errors.Wrapf(err, "init index %q", index.id)
   291  	}
   292  
   293  	// TODO common_cycle_manager move to poststartup?
   294  	id := strings.Join([]string{
   295  		"hnsw", "tombstone_cleanup",
   296  		index.className, index.shardName, index.id,
   297  	}, "/")
   298  	index.tombstoneCleanupCallbackCtrl = tombstoneCallbacks.Register(id, index.tombstoneCleanup)
   299  	index.insertMetrics = newInsertMetrics(index.metrics)
   300  
   301  	return index, nil
   302  }
   303  
   304  // TODO: use this for incoming replication
   305  // func (h *hnsw) insertFromExternal(nodeId, targetLevel int, neighborsAtLevel map[int][]uint32) {
   306  // 	defer m.addBuildingReplication(time.Now())
   307  
   308  // 	// randomly introduce up to 50ms delay to account for network slowness
   309  // 	time.Sleep(time.Duration(rand.Intn(500)) * time.Millisecond)
   310  
   311  // 	var node *hnswVertex
   312  // 	h.RLock()
   313  // 	total := len(h.nodes)
   314  // 	if total > nodeId {
   315  // 		node = h.nodes[nodeId] // it could be that we implicitly added this node already because it was referenced
   316  // 	}
   317  // 	h.RUnlock()
   318  
   319  // 	if node == nil {
   320  // 		node = &hnswVertex{
   321  // 			id:          nodeId,
   322  // 			connections: make(map[int][]uint32),
   323  // 			level:       targetLevel,
   324  // 		}
   325  // 	} else {
   326  // 		node.level = targetLevel
   327  // 	}
   328  
   329  // 	if total == 0 {
   330  // 		h.Lock()
   331  // 		h.commitLog.SetEntryPointWithMaxLayer(node.id, 0)
   332  // 		h.entryPointID = node.id
   333  // 		h.currentMaximumLayer = 0
   334  // 		node.connections = map[int][]uint32{}
   335  // 		node.level = 0
   336  // 		// h.nodes = make([]*hnswVertex, 100000)
   337  // 		h.commitLog.AddNode(node)
   338  // 		h.nodes[node.id] = node
   339  // 		h.Unlock()
   340  // 		return
   341  // 	}
   342  
   343  // 	currentMaximumLayer := h.currentMaximumLayer
   344  // 	h.Lock()
   345  // 	h.nodes[nodeId] = node
   346  // 	h.commitLog.AddNode(node)
   347  // 	h.Unlock()
   348  
   349  // 	for level := min(targetLevel, currentMaximumLayer); level >= 0; level-- {
   350  // 		neighbors := neighborsAtLevel[level]
   351  
   352  // 		for _, neighborID := range neighbors {
   353  // 			h.RLock()
   354  // 			neighbor := h.nodes[neighborID]
   355  // 			if neighbor == nil {
   356  // 				// due to everything being parallel it could be that the linked neighbor
   357  // 				// doesn't exist yet
   358  // 				h.nodes[neighborID] = &hnswVertex{
   359  // 					id:          int(neighborID),
   360  // 					connections: make(map[int][]uint32),
   361  // 				}
   362  // 				neighbor = h.nodes[neighborID]
   363  // 			}
   364  // 			h.RUnlock()
   365  
   366  // 			neighbor.linkAtLevel(level, uint32(nodeId), h.commitLog)
   367  // 			node.linkAtLevel(level, uint32(neighbor.id), h.commitLog)
   368  
   369  // 			neighbor.RLock()
   370  // 			currentConnections := neighbor.connections[level]
   371  // 			neighbor.RUnlock()
   372  
   373  // 			maximumConnections := h.maximumConnections
   374  // 			if level == 0 {
   375  // 				maximumConnections = h.maximumConnectionsLayerZero
   376  // 			}
   377  
   378  // 			if len(currentConnections) <= maximumConnections {
   379  // 				// nothing to do, skip
   380  // 				continue
   381  // 			}
   382  
   383  // 			// TODO: support both neighbor selection algos
   384  // 			updatedConnections := h.selectNeighborsSimpleFromId(nodeId, currentConnections, maximumConnections)
   385  
   386  // 			neighbor.Lock()
   387  // 			h.commitLog.ReplaceLinksAtLevel(neighbor.id, level, updatedConnections)
   388  // 			neighbor.connections[level] = updatedConnections
   389  // 			neighbor.Unlock()
   390  // 		}
   391  // 	}
   392  
   393  // 	if targetLevel > h.currentMaximumLayer {
   394  // 		h.Lock()
   395  // 		h.commitLog.SetEntryPointWithMaxLayer(nodeId, targetLevel)
   396  // 		h.entryPointID = nodeId
   397  // 		h.currentMaximumLayer = targetLevel
   398  // 		h.Unlock()
   399  // 	}
   400  
   401  // }
   402  
   403  func (h *hnsw) findBestEntrypointForNode(currentMaxLevel, targetLevel int,
   404  	entryPointID uint64, nodeVec []float32, distancer compressionhelpers.CompressorDistancer,
   405  ) (uint64, error) {
   406  	// in case the new target is lower than the current max, we need to search
   407  	// each layer for a better candidate and update the candidate
   408  	for level := currentMaxLevel; level > targetLevel; level-- {
   409  		eps := priorityqueue.NewMin[any](1)
   410  		var dist float32
   411  		var ok bool
   412  		var err error
   413  		if h.compressed.Load() {
   414  			dist, ok, err = distancer.DistanceToNode(entryPointID)
   415  			var e storobj.ErrNotFound
   416  			if errors.As(err, &e) {
   417  				h.handleDeletedNode(e.DocID)
   418  			}
   419  		} else {
   420  			dist, ok, err = h.distBetweenNodeAndVec(entryPointID, nodeVec)
   421  		}
   422  		if err != nil {
   423  			return 0, errors.Wrapf(err,
   424  				"calculate distance between insert node and entry point at level %d", level)
   425  		}
   426  		if !ok {
   427  			continue
   428  		}
   429  
   430  		eps.Insert(entryPointID, dist)
   431  		res, err := h.searchLayerByVectorWithDistancer(nodeVec, eps, 1, level, nil, distancer)
   432  		if err != nil {
   433  			return 0,
   434  				errors.Wrapf(err, "update candidate: search layer at level %d", level)
   435  		}
   436  		if res.Len() > 0 {
   437  			// if we could find a new entrypoint, use it
   438  			// in case everything was tombstoned, stick with the existing one
   439  			elem := res.Pop()
   440  			n := h.nodeByID(elem.ID)
   441  			if n != nil && !n.isUnderMaintenance() {
   442  				// but not if the entrypoint is under maintenance
   443  				entryPointID = elem.ID
   444  			}
   445  		}
   446  
   447  		h.pools.pqResults.Put(res)
   448  	}
   449  
   450  	return entryPointID, nil
   451  }
   452  
   453  func min(a, b int) int {
   454  	if a < b {
   455  		return a
   456  	}
   457  	return b
   458  }
   459  
   460  func (h *hnsw) distBetweenNodes(a, b uint64) (float32, bool, error) {
   461  	if h.compressed.Load() {
   462  		dist, err := h.compressor.DistanceBetweenCompressedVectorsFromIDs(context.Background(), a, b)
   463  		if err != nil {
   464  			var e storobj.ErrNotFound
   465  			if errors.As(err, &e) {
   466  				h.handleDeletedNode(e.DocID)
   467  				return 0, false, nil
   468  			} else {
   469  				return 0, false, err
   470  			}
   471  		}
   472  
   473  		return dist, true, nil
   474  	}
   475  
   476  	// TODO: introduce single search/transaction context instead of spawning new
   477  	// ones
   478  	vecA, err := h.vectorForID(context.Background(), a)
   479  	if err != nil {
   480  		var e storobj.ErrNotFound
   481  		if errors.As(err, &e) {
   482  			h.handleDeletedNode(e.DocID)
   483  			return 0, false, nil
   484  		} else {
   485  			// not a typed error, we can recover from, return with err
   486  			return 0, false, errors.Wrapf(err,
   487  				"could not get vector of object at docID %d", a)
   488  		}
   489  	}
   490  
   491  	if len(vecA) == 0 {
   492  		return 0, false, fmt.Errorf("got a nil or zero-length vector at docID %d", a)
   493  	}
   494  
   495  	vecB, err := h.vectorForID(context.Background(), b)
   496  	if err != nil {
   497  		var e storobj.ErrNotFound
   498  		if errors.As(err, &e) {
   499  			h.handleDeletedNode(e.DocID)
   500  			return 0, false, nil
   501  		} else {
   502  			// not a typed error, we can recover from, return with err
   503  			return 0, false, errors.Wrapf(err,
   504  				"could not get vector of object at docID %d", b)
   505  		}
   506  	}
   507  
   508  	if len(vecB) == 0 {
   509  		return 0, false, fmt.Errorf("got a nil or zero-length vector at docID %d", b)
   510  	}
   511  
   512  	return h.distancerProvider.SingleDist(vecA, vecB)
   513  }
   514  
   515  func (h *hnsw) distBetweenNodeAndVec(node uint64, vecB []float32) (float32, bool, error) {
   516  	if h.compressed.Load() {
   517  		dist, err := h.compressor.DistanceBetweenCompressedAndUncompressedVectorsFromID(context.Background(), node, vecB)
   518  		if err != nil {
   519  			var e storobj.ErrNotFound
   520  			if errors.As(err, &e) {
   521  				h.handleDeletedNode(e.DocID)
   522  				return 0, false, nil
   523  			} else {
   524  				return 0, false, err
   525  			}
   526  		}
   527  
   528  		return dist, true, nil
   529  	}
   530  
   531  	// TODO: introduce single search/transaction context instead of spawning new
   532  	// ones
   533  	vecA, err := h.vectorForID(context.Background(), node)
   534  	if err != nil {
   535  		var e storobj.ErrNotFound
   536  		if errors.As(err, &e) {
   537  			h.handleDeletedNode(e.DocID)
   538  			return 0, false, nil
   539  		} else {
   540  			// not a typed error, we can recover from, return with err
   541  			return 0, false, errors.Wrapf(err,
   542  				"could not get vector of object at docID %d", node)
   543  		}
   544  	}
   545  
   546  	if len(vecA) == 0 {
   547  		return 0, false, fmt.Errorf(
   548  			"got a nil or zero-length vector at docID %d", node)
   549  	}
   550  
   551  	if len(vecB) == 0 {
   552  		return 0, false, fmt.Errorf(
   553  			"got a nil or zero-length vector as search vector")
   554  	}
   555  
   556  	return h.distancerProvider.SingleDist(vecA, vecB)
   557  }
   558  
   559  func (h *hnsw) Stats() {
   560  	fmt.Printf("levels: %d\n", h.currentMaximumLayer)
   561  
   562  	perLevelCount := map[int]uint{}
   563  
   564  	for _, node := range h.nodes {
   565  		if node == nil {
   566  			continue
   567  		}
   568  		l := node.level
   569  		if l == 0 && len(node.connections) == 0 {
   570  			// filter out allocated space without nodes
   571  			continue
   572  		}
   573  		c, ok := perLevelCount[l]
   574  		if !ok {
   575  			perLevelCount[l] = 0
   576  		}
   577  
   578  		perLevelCount[l] = c + 1
   579  	}
   580  
   581  	for level, count := range perLevelCount {
   582  		fmt.Printf("unique count on level %d: %d\n", level, count)
   583  	}
   584  }
   585  
   586  func (h *hnsw) isEmpty() bool {
   587  	h.RLock()
   588  	defer h.RUnlock()
   589  	h.shardedNodeLocks.RLock(h.entryPointID)
   590  	defer h.shardedNodeLocks.RUnlock(h.entryPointID)
   591  
   592  	return h.isEmptyUnlocked()
   593  }
   594  
   595  func (h *hnsw) isEmptyUnlocked() bool {
   596  	return h.nodes[h.entryPointID] == nil
   597  }
   598  
   599  func (h *hnsw) nodeByID(id uint64) *vertex {
   600  	h.RLock()
   601  	defer h.RUnlock()
   602  
   603  	if id >= uint64(len(h.nodes)) {
   604  		// See https://github.com/weaviate/weaviate/issues/1838 for details.
   605  		// This could be after a crash recovery when the object store is "further
   606  		// ahead" than the hnsw index and we receive a delete request
   607  		return nil
   608  	}
   609  
   610  	h.shardedNodeLocks.RLock(id)
   611  	defer h.shardedNodeLocks.RUnlock(id)
   612  
   613  	return h.nodes[id]
   614  }
   615  
   616  func (h *hnsw) Drop(ctx context.Context) error {
   617  	// cancel tombstone cleanup goroutine
   618  	if err := h.tombstoneCleanupCallbackCtrl.Unregister(ctx); err != nil {
   619  		return errors.Wrap(err, "hnsw drop")
   620  	}
   621  
   622  	if h.compressed.Load() {
   623  		err := h.compressor.Drop()
   624  		if err != nil {
   625  			return fmt.Errorf("failed to shutdown compressed store")
   626  		}
   627  	} else {
   628  		// cancel vector cache goroutine
   629  		h.cache.Drop()
   630  	}
   631  
   632  	// cancel commit logger last, as the tombstone cleanup cycle might still
   633  	// write while it's still running
   634  	err := h.commitLog.Drop(ctx)
   635  	if err != nil {
   636  		return errors.Wrap(err, "commit log drop")
   637  	}
   638  
   639  	return nil
   640  }
   641  
   642  func (h *hnsw) Shutdown(ctx context.Context) error {
   643  	h.shutdownCtxCancel()
   644  
   645  	if err := h.commitLog.Shutdown(ctx); err != nil {
   646  		return errors.Wrap(err, "hnsw shutdown")
   647  	}
   648  
   649  	if err := h.tombstoneCleanupCallbackCtrl.Unregister(ctx); err != nil {
   650  		return errors.Wrap(err, "hnsw shutdown")
   651  	}
   652  
   653  	if h.compressed.Load() {
   654  		err := h.compressor.Drop()
   655  		if err != nil {
   656  			return errors.Wrap(err, "hnsw shutdown")
   657  		}
   658  	} else {
   659  		h.cache.Drop()
   660  	}
   661  
   662  	return nil
   663  }
   664  
   665  func (h *hnsw) Flush() error {
   666  	return h.commitLog.Flush()
   667  }
   668  
   669  func (h *hnsw) Entrypoint() uint64 {
   670  	h.RLock()
   671  	defer h.RUnlock()
   672  
   673  	return h.entryPointID
   674  }
   675  
   676  func (h *hnsw) DistanceBetweenVectors(x, y []float32) (float32, bool, error) {
   677  	return h.distancerProvider.SingleDist(x, y)
   678  }
   679  
   680  func (h *hnsw) ContainsNode(id uint64) bool {
   681  	h.RLock()
   682  	defer h.RUnlock()
   683  	h.shardedNodeLocks.RLock(id)
   684  	defer h.shardedNodeLocks.RUnlock(id)
   685  
   686  	return len(h.nodes) > int(id) && h.nodes[id] != nil
   687  }
   688  
   689  func (h *hnsw) DistancerProvider() distancer.Provider {
   690  	return h.distancerProvider
   691  }
   692  
   693  func (h *hnsw) ShouldCompress() (bool, int) {
   694  	return h.pqConfig.Enabled, h.pqConfig.TrainingLimit
   695  }
   696  
   697  func (h *hnsw) ShouldCompressFromConfig(config schema.VectorIndexConfig) (bool, int) {
   698  	hnswConfig := config.(ent.UserConfig)
   699  	return hnswConfig.PQ.Enabled, hnswConfig.PQ.TrainingLimit
   700  }
   701  
   702  func (h *hnsw) Compressed() bool {
   703  	return h.compressed.Load()
   704  }
   705  
   706  func (h *hnsw) AlreadyIndexed() uint64 {
   707  	return uint64(h.cache.CountVectors())
   708  }
   709  
   710  func (h *hnsw) normalizeVec(vec []float32) []float32 {
   711  	if h.distancerProvider.Type() == "cosine-dot" {
   712  		// cosine-dot requires normalized vectors, as the dot product and cosine
   713  		// similarity are only identical if the vector is normalized
   714  		return distancer.Normalize(vec)
   715  	}
   716  	return vec
   717  }