github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/maintenance.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "time" 16 17 "github.com/sirupsen/logrus" 18 "github.com/weaviate/weaviate/adapters/repos/db/vector/cache" 19 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/visited" 20 ) 21 22 const ( 23 indexGrowthRate = 1.25 24 ) 25 26 // growIndexToAccomodateNode is a wrapper around the growIndexToAccomodateNode 27 // function growing the index of the hnsw struct. It does not do any locking on 28 // its own, make sure that this function is called from a single-thread or 29 // locked situation 30 func (h *hnsw) growIndexToAccomodateNode(id uint64, logger logrus.FieldLogger) error { 31 defer func() { 32 h.metrics.SetSize(len(h.nodes)) 33 }() 34 35 before := time.Now() 36 37 // check whether h.nodes slice needs growing 38 // not to unnecessarily lock h.shardedNodeLocks 39 if id < uint64(len(h.nodes)) { 40 return nil 41 } 42 43 // lock h.nodes' individual elements to avoid race between writing to elements 44 // and copying entire slice in growIndexToAccomodateNode method 45 newIndex, err := func() ([]*vertex, error) { 46 h.shardedNodeLocks.RLockAll() 47 defer h.shardedNodeLocks.RUnlockAll() 48 49 newIndex, _, err := growIndexToAccomodateNode(h.nodes, id, logger) 50 return newIndex, err 51 }() 52 if err != nil { 53 return err 54 } 55 56 defer h.metrics.GrowDuration(before) 57 58 if h.compressed.Load() { 59 h.compressor.GrowCache(uint64(len(newIndex))) 60 } else { 61 h.cache.Grow(uint64(len(newIndex))) 62 } 63 64 h.pools.visitedListsLock.Lock() 65 h.pools.visitedLists.Destroy() 66 h.pools.visitedLists = nil 67 h.pools.visitedLists = visited.NewPool(1, len(newIndex)+512) 68 h.pools.visitedListsLock.Unlock() 69 70 h.shardedNodeLocks.LockAll() 71 h.nodes = newIndex 72 h.shardedNodeLocks.UnlockAll() 73 74 return nil 75 } 76 77 // growIndexToAccomodateNode does not lock the graph for writes as the 78 // assumption is that it is called as part of an operation that is already 79 // wrapped inside a lock, such as inserting a node into the graph. If 80 // growIndexToAccomodateNode is ever called outside of such an operation, the 81 // caller must make sure to lock the graph as concurrent reads/write would 82 // otherwise be possible 83 func growIndexToAccomodateNode(index []*vertex, id uint64, 84 logger logrus.FieldLogger, 85 ) ([]*vertex, bool, error) { 86 previousSize := uint64(len(index)) 87 if id < previousSize { 88 // node will fit, nothing to do 89 return nil, false, nil 90 } 91 before := time.Now() 92 93 var newSize uint64 94 95 if (indexGrowthRate-1)*float64(previousSize) < float64(cache.MinimumIndexGrowthDelta) { 96 // typically grow the index by the delta 97 newSize = previousSize + cache.MinimumIndexGrowthDelta 98 } else { 99 newSize = uint64(float64(previousSize) * indexGrowthRate) 100 } 101 102 if newSize <= id { 103 // There are situations were docIDs are not in order. For example, if the 104 // default size is 10k and the default delta is 10k. Imagine the user 105 // imports 21 objects, then deletes the first 20,500. When rebuilding the 106 // index from disk the first id to be imported would be 20,501, however the 107 // index default size and default delta would only reach up to 20,000. 108 newSize = id + cache.MinimumIndexGrowthDelta 109 } 110 111 newIndex := make([]*vertex, newSize) 112 copy(newIndex, index) 113 114 took := time.Since(before) 115 logger.WithField("action", "hnsw_grow_index"). 116 WithField("took", took). 117 WithField("previous_size", previousSize). 118 WithField("new_size", newSize). 119 Debugf("index grown from %d to %d, took %s\n", previousSize, newSize, took) 120 return newIndex, true, nil 121 }