github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/startup.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "bufio" 16 "context" 17 "io" 18 "os" 19 "time" 20 21 enterrors "github.com/weaviate/weaviate/entities/errors" 22 23 "github.com/pkg/errors" 24 "github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers" 25 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/visited" 26 "github.com/weaviate/weaviate/entities/cyclemanager" 27 "github.com/weaviate/weaviate/entities/diskio" 28 ) 29 30 func (h *hnsw) init(cfg Config) error { 31 h.pools = newPools(h.maximumConnectionsLayerZero) 32 33 if err := h.restoreFromDisk(); err != nil { 34 return errors.Wrapf(err, "restore hnsw index %q", cfg.ID) 35 } 36 37 // init commit logger for future writes 38 cl, err := cfg.MakeCommitLoggerThunk() 39 if err != nil { 40 return errors.Wrap(err, "create commit logger") 41 } 42 43 h.commitLog = cl 44 45 // report the vector_index_size at server startup. 46 // otherwise on server restart, prometheus reports 47 // a vector_index_size of 0 until more vectors are 48 // added. 49 h.metrics.SetSize(len(h.nodes)) 50 51 return nil 52 } 53 54 // if a commit log is already present it will be read into memory, if not we 55 // start with an empty model 56 func (h *hnsw) restoreFromDisk() error { 57 beforeAll := time.Now() 58 defer h.metrics.TrackStartupTotal(beforeAll) 59 60 fileNames, err := getCommitFileNames(h.rootPath, h.id) 61 if err != nil { 62 return err 63 } 64 65 if len(fileNames) == 0 { 66 // nothing to do 67 return nil 68 } 69 70 fileNames, err = NewCorruptedCommitLogFixer(h.logger).Do(fileNames) 71 if err != nil { 72 return errors.Wrap(err, "corrupted commit log fixer") 73 } 74 75 var state *DeserializationResult 76 for i, fileName := range fileNames { 77 beforeIndividual := time.Now() 78 79 fd, err := os.Open(fileName) 80 if err != nil { 81 return errors.Wrapf(err, "open commit log %q for reading", fileName) 82 } 83 84 defer fd.Close() 85 86 metered := diskio.NewMeteredReader(fd, 87 h.metrics.TrackStartupReadCommitlogDiskIO) 88 fdBuf := bufio.NewReaderSize(metered, 256*1024) 89 90 var valid int 91 state, valid, err = NewDeserializer(h.logger).Do(fdBuf, state, false) 92 if err != nil { 93 if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { 94 // we need to check for both EOF or UnexpectedEOF, as we don't know where 95 // the commit log got corrupted, a field ending that weset a longer 96 // encoding for would return EOF, whereas a field read with binary.Read 97 // with a fixed size would return UnexpectedEOF. From our perspective both 98 // are unexpected. 99 100 h.logger.WithField("action", "hnsw_load_commit_log_corruption"). 101 WithField("path", fileName). 102 Error("write-ahead-log ended abruptly, some elements may not have been recovered") 103 104 // we need to truncate the file to its valid length! 105 if err := os.Truncate(fileName, int64(valid)); err != nil { 106 return errors.Wrapf(err, "truncate corrupt commit log %q", fileName) 107 } 108 } else { 109 // only return an actual error on non-EOF errors, otherwise we'll end 110 // up in a startup crashloop 111 return errors.Wrapf(err, "deserialize commit log %q", fileName) 112 } 113 } 114 115 h.metrics.StartupProgress(float64(i+1) / float64(len(fileNames))) 116 h.metrics.TrackStartupIndividual(beforeIndividual) 117 } 118 119 h.Lock() 120 h.shardedNodeLocks.LockAll() 121 h.nodes = state.Nodes 122 h.shardedNodeLocks.UnlockAll() 123 124 h.currentMaximumLayer = int(state.Level) 125 h.entryPointID = state.Entrypoint 126 h.Unlock() 127 128 h.tombstoneLock.Lock() 129 h.tombstones = state.Tombstones 130 h.tombstoneLock.Unlock() 131 132 if state.Compressed { 133 h.compressed.Store(state.Compressed) 134 h.dims = int32(state.PQData.Dimensions) 135 h.cache.Drop() 136 137 if len(state.PQData.Encoders) > 0 { 138 // 0 means it was created using the default value. The user did not set the value, we calculated for him/her 139 if h.pqConfig.Segments == 0 { 140 h.pqConfig.Segments = int(state.PQData.Dimensions) 141 } 142 h.compressor, err = compressionhelpers.RestoreHNSWPQCompressor( 143 h.pqConfig, 144 h.distancerProvider, 145 int(state.PQData.Dimensions), 146 // ToDo: we need to read this value from somewhere 147 1e12, 148 h.logger, 149 state.PQData.Encoders, 150 h.store, 151 ) 152 if err != nil { 153 return errors.Wrap(err, "Restoring compressed data.") 154 } 155 } 156 // make sure the compressed cache fits the current size 157 h.compressor.GrowCache(uint64(len(h.nodes))) 158 } else if !h.compressed.Load() { 159 // make sure the cache fits the current size 160 h.cache.Grow(uint64(len(h.nodes))) 161 162 if len(h.nodes) > 0 { 163 if vec, err := h.vectorForID(context.Background(), h.entryPointID); err == nil { 164 h.dims = int32(len(vec)) 165 } 166 } 167 } 168 169 // make sure the visited list pool fits the current size 170 h.pools.visitedLists.Destroy() 171 h.pools.visitedLists = nil 172 h.pools.visitedLists = visited.NewPool(1, len(h.nodes)+512) 173 174 return nil 175 } 176 177 func (h *hnsw) tombstoneCleanup(shouldAbort cyclemanager.ShouldAbortCallback) bool { 178 executed, err := h.cleanUpTombstonedNodes(shouldAbort) 179 if err != nil { 180 h.logger.WithField("action", "hnsw_tombstone_cleanup"). 181 WithError(err).Error("tombstone cleanup errord") 182 } 183 return executed 184 } 185 186 // PostStartup triggers routines that should happen after startup. The startup 187 // process is triggered during the creation which in turn happens as part of 188 // the shard creation. Some post-startup routines, such as prefilling the 189 // vector cache, however, depend on the shard being ready as they will call 190 // getVectorForID. 191 func (h *hnsw) PostStartup() { 192 h.prefillCache() 193 } 194 195 func (h *hnsw) prefillCache() { 196 limit := 0 197 if h.compressed.Load() { 198 limit = int(h.compressor.GetCacheMaxSize()) 199 } else { 200 limit = int(h.cache.CopyMaxSize()) 201 } 202 203 f := func() { 204 ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) 205 defer cancel() 206 207 var err error 208 if h.compressed.Load() { 209 h.compressor.PrefillCache() 210 } else { 211 err = newVectorCachePrefiller(h.cache, h, h.logger).Prefill(ctx, limit) 212 } 213 214 if err != nil { 215 h.logger.WithError(err).Error("prefill vector cache") 216 } 217 } 218 enterrors.GoWrapper(f, h.logger) 219 }