github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/maintenance.go

github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/maintenance.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"time"
    16  
    17  	"github.com/sirupsen/logrus"
    18  	"github.com/weaviate/weaviate/adapters/repos/db/vector/cache"
    19  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/visited"
    20  )
    21  
    22  const (
    23  	indexGrowthRate = 1.25
    24  )
    25  
    26  // growIndexToAccomodateNode is a wrapper around the growIndexToAccomodateNode
    27  // function growing the index of the hnsw struct. It does not do any locking on
    28  // its own, make sure that this function is called from a single-thread or
    29  // locked situation
    30  func (h *hnsw) growIndexToAccomodateNode(id uint64, logger logrus.FieldLogger) error {
    31  	defer func() {
    32  		h.metrics.SetSize(len(h.nodes))
    33  	}()
    34  
    35  	before := time.Now()
    36  
    37  	// check whether h.nodes slice needs growing
    38  	// not to unnecessarily lock h.shardedNodeLocks
    39  	if id < uint64(len(h.nodes)) {
    40  		return nil
    41  	}
    42  
    43  	// lock h.nodes' individual elements to avoid race between writing to elements
    44  	// and copying entire slice in growIndexToAccomodateNode method
    45  	newIndex, err := func() ([]*vertex, error) {
    46  		h.shardedNodeLocks.RLockAll()
    47  		defer h.shardedNodeLocks.RUnlockAll()
    48  
    49  		newIndex, _, err := growIndexToAccomodateNode(h.nodes, id, logger)
    50  		return newIndex, err
    51  	}()
    52  	if err != nil {
    53  		return err
    54  	}
    55  
    56  	defer h.metrics.GrowDuration(before)
    57  
    58  	if h.compressed.Load() {
    59  		h.compressor.GrowCache(uint64(len(newIndex)))
    60  	} else {
    61  		h.cache.Grow(uint64(len(newIndex)))
    62  	}
    63  
    64  	h.pools.visitedListsLock.Lock()
    65  	h.pools.visitedLists.Destroy()
    66  	h.pools.visitedLists = nil
    67  	h.pools.visitedLists = visited.NewPool(1, len(newIndex)+512)
    68  	h.pools.visitedListsLock.Unlock()
    69  
    70  	h.shardedNodeLocks.LockAll()
    71  	h.nodes = newIndex
    72  	h.shardedNodeLocks.UnlockAll()
    73  
    74  	return nil
    75  }
    76  
    77  // growIndexToAccomodateNode does not lock the graph for writes as the
    78  // assumption is that it is called as part of an operation that is already
    79  // wrapped inside a lock, such as inserting a node into the graph. If
    80  // growIndexToAccomodateNode is ever called outside of such an operation, the
    81  // caller must make sure to lock the graph as concurrent reads/write would
    82  // otherwise be possible
    83  func growIndexToAccomodateNode(index []*vertex, id uint64,
    84  	logger logrus.FieldLogger,
    85  ) ([]*vertex, bool, error) {
    86  	previousSize := uint64(len(index))
    87  	if id < previousSize {
    88  		// node will fit, nothing to do
    89  		return nil, false, nil
    90  	}
    91  	before := time.Now()
    92  
    93  	var newSize uint64
    94  
    95  	if (indexGrowthRate-1)*float64(previousSize) < float64(cache.MinimumIndexGrowthDelta) {
    96  		// typically grow the index by the delta
    97  		newSize = previousSize + cache.MinimumIndexGrowthDelta
    98  	} else {
    99  		newSize = uint64(float64(previousSize) * indexGrowthRate)
   100  	}
   101  
   102  	if newSize <= id {
   103  		// There are situations were docIDs are not in order. For example, if  the
   104  		// default size is 10k and the default delta is 10k. Imagine the user
   105  		// imports 21 objects, then deletes the first 20,500. When rebuilding the
   106  		// index from disk the first id to be imported would be 20,501, however the
   107  		// index default size and default delta would only reach up to 20,000.
   108  		newSize = id + cache.MinimumIndexGrowthDelta
   109  	}
   110  
   111  	newIndex := make([]*vertex, newSize)
   112  	copy(newIndex, index)
   113  
   114  	took := time.Since(before)
   115  	logger.WithField("action", "hnsw_grow_index").
   116  		WithField("took", took).
   117  		WithField("previous_size", previousSize).
   118  		WithField("new_size", newSize).
   119  		Debugf("index grown from %d to %d, took %s\n", previousSize, newSize, took)
   120  	return newIndex, true, nil
   121  }