github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/startup.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"bufio"
    16  	"context"
    17  	"io"
    18  	"os"
    19  	"time"
    20  
    21  	enterrors "github.com/weaviate/weaviate/entities/errors"
    22  
    23  	"github.com/pkg/errors"
    24  	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
    25  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/visited"
    26  	"github.com/weaviate/weaviate/entities/cyclemanager"
    27  	"github.com/weaviate/weaviate/entities/diskio"
    28  )
    29  
    30  func (h *hnsw) init(cfg Config) error {
    31  	h.pools = newPools(h.maximumConnectionsLayerZero)
    32  
    33  	if err := h.restoreFromDisk(); err != nil {
    34  		return errors.Wrapf(err, "restore hnsw index %q", cfg.ID)
    35  	}
    36  
    37  	// init commit logger for future writes
    38  	cl, err := cfg.MakeCommitLoggerThunk()
    39  	if err != nil {
    40  		return errors.Wrap(err, "create commit logger")
    41  	}
    42  
    43  	h.commitLog = cl
    44  
    45  	// report the vector_index_size at server startup.
    46  	// otherwise on server restart, prometheus reports
    47  	// a vector_index_size of 0 until more vectors are
    48  	// added.
    49  	h.metrics.SetSize(len(h.nodes))
    50  
    51  	return nil
    52  }
    53  
    54  // if a commit log is already present it will be read into memory, if not we
    55  // start with an empty model
    56  func (h *hnsw) restoreFromDisk() error {
    57  	beforeAll := time.Now()
    58  	defer h.metrics.TrackStartupTotal(beforeAll)
    59  
    60  	fileNames, err := getCommitFileNames(h.rootPath, h.id)
    61  	if err != nil {
    62  		return err
    63  	}
    64  
    65  	if len(fileNames) == 0 {
    66  		// nothing to do
    67  		return nil
    68  	}
    69  
    70  	fileNames, err = NewCorruptedCommitLogFixer(h.logger).Do(fileNames)
    71  	if err != nil {
    72  		return errors.Wrap(err, "corrupted commit log fixer")
    73  	}
    74  
    75  	var state *DeserializationResult
    76  	for i, fileName := range fileNames {
    77  		beforeIndividual := time.Now()
    78  
    79  		fd, err := os.Open(fileName)
    80  		if err != nil {
    81  			return errors.Wrapf(err, "open commit log %q for reading", fileName)
    82  		}
    83  
    84  		defer fd.Close()
    85  
    86  		metered := diskio.NewMeteredReader(fd,
    87  			h.metrics.TrackStartupReadCommitlogDiskIO)
    88  		fdBuf := bufio.NewReaderSize(metered, 256*1024)
    89  
    90  		var valid int
    91  		state, valid, err = NewDeserializer(h.logger).Do(fdBuf, state, false)
    92  		if err != nil {
    93  			if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
    94  				// we need to check for both EOF or UnexpectedEOF, as we don't know where
    95  				// the commit log got corrupted, a field ending that weset a longer
    96  				// encoding for would return EOF, whereas a field read with binary.Read
    97  				// with a fixed size would return UnexpectedEOF. From our perspective both
    98  				// are unexpected.
    99  
   100  				h.logger.WithField("action", "hnsw_load_commit_log_corruption").
   101  					WithField("path", fileName).
   102  					Error("write-ahead-log ended abruptly, some elements may not have been recovered")
   103  
   104  				// we need to truncate the file to its valid length!
   105  				if err := os.Truncate(fileName, int64(valid)); err != nil {
   106  					return errors.Wrapf(err, "truncate corrupt commit log %q", fileName)
   107  				}
   108  			} else {
   109  				// only return an actual error on non-EOF errors, otherwise we'll end
   110  				// up in a startup crashloop
   111  				return errors.Wrapf(err, "deserialize commit log %q", fileName)
   112  			}
   113  		}
   114  
   115  		h.metrics.StartupProgress(float64(i+1) / float64(len(fileNames)))
   116  		h.metrics.TrackStartupIndividual(beforeIndividual)
   117  	}
   118  
   119  	h.Lock()
   120  	h.shardedNodeLocks.LockAll()
   121  	h.nodes = state.Nodes
   122  	h.shardedNodeLocks.UnlockAll()
   123  
   124  	h.currentMaximumLayer = int(state.Level)
   125  	h.entryPointID = state.Entrypoint
   126  	h.Unlock()
   127  
   128  	h.tombstoneLock.Lock()
   129  	h.tombstones = state.Tombstones
   130  	h.tombstoneLock.Unlock()
   131  
   132  	if state.Compressed {
   133  		h.compressed.Store(state.Compressed)
   134  		h.dims = int32(state.PQData.Dimensions)
   135  		h.cache.Drop()
   136  
   137  		if len(state.PQData.Encoders) > 0 {
   138  			// 0 means it was created using the default value. The user did not set the value, we calculated for him/her
   139  			if h.pqConfig.Segments == 0 {
   140  				h.pqConfig.Segments = int(state.PQData.Dimensions)
   141  			}
   142  			h.compressor, err = compressionhelpers.RestoreHNSWPQCompressor(
   143  				h.pqConfig,
   144  				h.distancerProvider,
   145  				int(state.PQData.Dimensions),
   146  				// ToDo: we need to read this value from somewhere
   147  				1e12,
   148  				h.logger,
   149  				state.PQData.Encoders,
   150  				h.store,
   151  			)
   152  			if err != nil {
   153  				return errors.Wrap(err, "Restoring compressed data.")
   154  			}
   155  		}
   156  		// make sure the compressed cache fits the current size
   157  		h.compressor.GrowCache(uint64(len(h.nodes)))
   158  	} else if !h.compressed.Load() {
   159  		// make sure the cache fits the current size
   160  		h.cache.Grow(uint64(len(h.nodes)))
   161  
   162  		if len(h.nodes) > 0 {
   163  			if vec, err := h.vectorForID(context.Background(), h.entryPointID); err == nil {
   164  				h.dims = int32(len(vec))
   165  			}
   166  		}
   167  	}
   168  
   169  	// make sure the visited list pool fits the current size
   170  	h.pools.visitedLists.Destroy()
   171  	h.pools.visitedLists = nil
   172  	h.pools.visitedLists = visited.NewPool(1, len(h.nodes)+512)
   173  
   174  	return nil
   175  }
   176  
   177  func (h *hnsw) tombstoneCleanup(shouldAbort cyclemanager.ShouldAbortCallback) bool {
   178  	executed, err := h.cleanUpTombstonedNodes(shouldAbort)
   179  	if err != nil {
   180  		h.logger.WithField("action", "hnsw_tombstone_cleanup").
   181  			WithError(err).Error("tombstone cleanup errord")
   182  	}
   183  	return executed
   184  }
   185  
   186  // PostStartup triggers routines that should happen after startup. The startup
   187  // process is triggered during the creation which in turn happens as part of
   188  // the shard creation. Some post-startup routines, such as prefilling the
   189  // vector cache, however, depend on the shard being ready as they will call
   190  // getVectorForID.
   191  func (h *hnsw) PostStartup() {
   192  	h.prefillCache()
   193  }
   194  
   195  func (h *hnsw) prefillCache() {
   196  	limit := 0
   197  	if h.compressed.Load() {
   198  		limit = int(h.compressor.GetCacheMaxSize())
   199  	} else {
   200  		limit = int(h.cache.CopyMaxSize())
   201  	}
   202  
   203  	f := func() {
   204  		ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
   205  		defer cancel()
   206  
   207  		var err error
   208  		if h.compressed.Load() {
   209  			h.compressor.PrefillCache()
   210  		} else {
   211  			err = newVectorCachePrefiller(h.cache, h, h.logger).Prefill(ctx, limit)
   212  		}
   213  
   214  		if err != nil {
   215  			h.logger.WithError(err).Error("prefill vector cache")
   216  		}
   217  	}
   218  	enterrors.GoWrapper(f, h.logger)
   219  }