github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/commit_logger.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"os"
    18  	"path/filepath"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/pkg/errors"
    26  	"github.com/sirupsen/logrus"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
    28  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/commitlog"
    29  	"github.com/weaviate/weaviate/entities/cyclemanager"
    30  	"github.com/weaviate/weaviate/entities/errorcompounder"
    31  )
    32  
    33  const defaultCommitLogSize = 500 * 1024 * 1024
    34  
    35  func commitLogFileName(rootPath, indexName, fileName string) string {
    36  	return fmt.Sprintf("%s/%s", commitLogDirectory(rootPath, indexName), fileName)
    37  }
    38  
    39  func commitLogDirectory(rootPath, name string) string {
    40  	return fmt.Sprintf("%s/%s.hnsw.commitlog.d", rootPath, name)
    41  }
    42  
    43  func NewCommitLogger(rootPath, name string, logger logrus.FieldLogger,
    44  	maintenanceCallbacks cyclemanager.CycleCallbackGroup, opts ...CommitlogOption,
    45  ) (*hnswCommitLogger, error) {
    46  	l := &hnswCommitLogger{
    47  		rootPath:  rootPath,
    48  		id:        name,
    49  		condensor: NewMemoryCondensor(logger),
    50  		logger:    logger,
    51  
    52  		// both can be overwritten using functional options
    53  		maxSizeIndividual: defaultCommitLogSize / 5,
    54  		maxSizeCombining:  defaultCommitLogSize,
    55  	}
    56  
    57  	for _, o := range opts {
    58  		if err := o(l); err != nil {
    59  			return nil, err
    60  		}
    61  	}
    62  
    63  	fd, err := getLatestCommitFileOrCreate(rootPath, name)
    64  	if err != nil {
    65  		return nil, err
    66  	}
    67  
    68  	id := func(elems ...string) string {
    69  		elems = append([]string{"commit_logger"}, elems...)
    70  		elems = append(elems, l.id)
    71  		return strings.Join(elems, "/")
    72  	}
    73  	l.commitLogger = commitlog.NewLoggerWithFile(fd)
    74  	l.switchLogsCallbackCtrl = maintenanceCallbacks.Register(id("switch_logs"), l.startSwitchLogs)
    75  	l.condenseLogsCallbackCtrl = maintenanceCallbacks.Register(id("condense_logs"), l.startCombineAndCondenseLogs)
    76  
    77  	return l, nil
    78  }
    79  
    80  func getLatestCommitFileOrCreate(rootPath, name string) (*os.File, error) {
    81  	dir := commitLogDirectory(rootPath, name)
    82  	err := os.MkdirAll(dir, os.ModePerm)
    83  	if err != nil {
    84  		return nil, errors.Wrap(err, "create commit logger directory")
    85  	}
    86  
    87  	fileName, ok, err := getCurrentCommitLogFileName(dir)
    88  	if err != nil {
    89  		return nil, errors.Wrap(err, "find commit logger file in directory")
    90  	}
    91  
    92  	if !ok {
    93  		// this is a new commit log, initialize with the current time stamp
    94  		fileName = fmt.Sprintf("%d", time.Now().Unix())
    95  	}
    96  
    97  	fd, err := os.OpenFile(commitLogFileName(rootPath, name, fileName),
    98  		os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666)
    99  	if err != nil {
   100  		return nil, errors.Wrap(err, "create commit log file")
   101  	}
   102  
   103  	return fd, nil
   104  }
   105  
   106  // getCommitFileNames in order, from old to new
   107  func getCommitFileNames(rootPath, name string) ([]string, error) {
   108  	dir := commitLogDirectory(rootPath, name)
   109  	err := os.MkdirAll(dir, os.ModePerm)
   110  	if err != nil {
   111  		return nil, errors.Wrap(err, "create commit logger directory")
   112  	}
   113  
   114  	files, err := os.ReadDir(dir)
   115  	if err != nil {
   116  		return nil, errors.Wrap(err, "browse commit logger directory")
   117  	}
   118  
   119  	files = removeTmpScratchOrHiddenFiles(files)
   120  	files, err = removeTmpCombiningFiles(dir, files)
   121  	if err != nil {
   122  		return nil, errors.Wrap(err, "remove temporary files")
   123  	}
   124  
   125  	if len(files) == 0 {
   126  		return nil, nil
   127  	}
   128  
   129  	ec := &errorcompounder.ErrorCompounder{}
   130  	sort.Slice(files, func(a, b int) bool {
   131  		ts1, err := asTimeStamp(files[a].Name())
   132  		if err != nil {
   133  			ec.Add(err)
   134  		}
   135  
   136  		ts2, err := asTimeStamp(files[b].Name())
   137  		if err != nil {
   138  			ec.Add(err)
   139  		}
   140  		return ts1 < ts2
   141  	})
   142  	if err := ec.ToError(); err != nil {
   143  		return nil, err
   144  	}
   145  
   146  	out := make([]string, len(files))
   147  	for i, file := range files {
   148  		out[i] = commitLogFileName(rootPath, name, file.Name())
   149  	}
   150  
   151  	return out, nil
   152  }
   153  
   154  // getCurrentCommitLogFileName returns the fileName and true if a file was
   155  // present. If no file was present, the second arg is false.
   156  func getCurrentCommitLogFileName(dirPath string) (string, bool, error) {
   157  	files, err := os.ReadDir(dirPath)
   158  	if err != nil {
   159  		return "", false, errors.Wrap(err, "browse commit logger directory")
   160  	}
   161  
   162  	if len(files) == 0 {
   163  		return "", false, nil
   164  	}
   165  
   166  	files = removeTmpScratchOrHiddenFiles(files)
   167  	files, err = removeTmpCombiningFiles(dirPath, files)
   168  	if err != nil {
   169  		return "", false, errors.Wrap(err, "clean up tmp combining files")
   170  	}
   171  
   172  	ec := &errorcompounder.ErrorCompounder{}
   173  	sort.Slice(files, func(a, b int) bool {
   174  		ts1, err := asTimeStamp(files[a].Name())
   175  		if err != nil {
   176  			ec.Add(err)
   177  		}
   178  
   179  		ts2, err := asTimeStamp(files[b].Name())
   180  		if err != nil {
   181  			ec.Add(err)
   182  		}
   183  		return ts1 > ts2
   184  	})
   185  	if err := ec.ToError(); err != nil {
   186  		return "", false, err
   187  	}
   188  
   189  	return files[0].Name(), true, nil
   190  }
   191  
   192  func removeTmpScratchOrHiddenFiles(in []os.DirEntry) []os.DirEntry {
   193  	out := make([]os.DirEntry, len(in))
   194  	i := 0
   195  	for _, info := range in {
   196  		if strings.HasSuffix(info.Name(), ".scratch.tmp") {
   197  			continue
   198  		}
   199  
   200  		if strings.HasPrefix(info.Name(), ".") {
   201  			continue
   202  		}
   203  
   204  		out[i] = info
   205  		i++
   206  	}
   207  
   208  	return out[:i]
   209  }
   210  
   211  func removeTmpCombiningFiles(dirPath string,
   212  	in []os.DirEntry,
   213  ) ([]os.DirEntry, error) {
   214  	out := make([]os.DirEntry, len(in))
   215  	i := 0
   216  	for _, info := range in {
   217  		if strings.HasSuffix(info.Name(), ".combined.tmp") {
   218  			// a temporary combining file was found which means that the combining
   219  			// process never completed, this file is thus considered corrupt (too
   220  			// short) and must be deleted. The original sources still exist (because
   221  			// the only get deleted after the .tmp file is removed), so it's safe to
   222  			// delete this without data loss.
   223  
   224  			if err := os.Remove(filepath.Join(dirPath, info.Name())); err != nil {
   225  				return out, errors.Wrap(err, "remove tmp combining file")
   226  			}
   227  			continue
   228  		}
   229  
   230  		out[i] = info
   231  		i++
   232  	}
   233  
   234  	return out[:i], nil
   235  }
   236  
   237  func asTimeStamp(in string) (int64, error) {
   238  	return strconv.ParseInt(strings.TrimSuffix(in, ".condensed"), 10, 64)
   239  }
   240  
   241  type condensor interface {
   242  	Do(filename string) error
   243  }
   244  
   245  type hnswCommitLogger struct {
   246  	// protect against concurrent attempts to write in the underlying file or
   247  	// buffer
   248  	sync.Mutex
   249  
   250  	rootPath          string
   251  	id                string
   252  	condensor         condensor
   253  	logger            logrus.FieldLogger
   254  	maxSizeIndividual int64
   255  	maxSizeCombining  int64
   256  	commitLogger      *commitlog.Logger
   257  
   258  	switchLogsCallbackCtrl   cyclemanager.CycleCallbackCtrl
   259  	condenseLogsCallbackCtrl cyclemanager.CycleCallbackCtrl
   260  }
   261  
   262  type HnswCommitType uint8 // 256 options, plenty of room for future extensions
   263  
   264  const (
   265  	AddNode HnswCommitType = iota
   266  	SetEntryPointMaxLevel
   267  	AddLinkAtLevel
   268  	ReplaceLinksAtLevel
   269  	AddTombstone
   270  	RemoveTombstone
   271  	ClearLinks
   272  	DeleteNode
   273  	ResetIndex
   274  	ClearLinksAtLevel // added in v1.8.0-rc.1, see https://github.com/weaviate/weaviate/issues/1701
   275  	AddLinksAtLevel   // added in v1.8.0-rc.1, see https://github.com/weaviate/weaviate/issues/1705
   276  	AddPQ
   277  )
   278  
   279  func (t HnswCommitType) String() string {
   280  	switch t {
   281  	case AddNode:
   282  		return "AddNode"
   283  	case SetEntryPointMaxLevel:
   284  		return "SetEntryPointWithMaxLayer"
   285  	case AddLinkAtLevel:
   286  		return "AddLinkAtLevel"
   287  	case AddLinksAtLevel:
   288  		return "AddLinksAtLevel"
   289  	case ReplaceLinksAtLevel:
   290  		return "ReplaceLinksAtLevel"
   291  	case AddTombstone:
   292  		return "AddTombstone"
   293  	case RemoveTombstone:
   294  		return "RemoveTombstone"
   295  	case ClearLinks:
   296  		return "ClearLinks"
   297  	case DeleteNode:
   298  		return "DeleteNode"
   299  	case ResetIndex:
   300  		return "ResetIndex"
   301  	case ClearLinksAtLevel:
   302  		return "ClearLinksAtLevel"
   303  	case AddPQ:
   304  		return "AddProductQuantizer"
   305  	}
   306  	return "unknown commit type"
   307  }
   308  
   309  func (l *hnswCommitLogger) ID() string {
   310  	return l.id
   311  }
   312  
   313  func (l *hnswCommitLogger) AddPQ(data compressionhelpers.PQData) error {
   314  	l.Lock()
   315  	defer l.Unlock()
   316  
   317  	return l.commitLogger.AddPQ(data)
   318  }
   319  
   320  // AddNode adds an empty node
   321  func (l *hnswCommitLogger) AddNode(node *vertex) error {
   322  	l.Lock()
   323  	defer l.Unlock()
   324  
   325  	return l.commitLogger.AddNode(node.id, node.level)
   326  }
   327  
   328  func (l *hnswCommitLogger) SetEntryPointWithMaxLayer(id uint64, level int) error {
   329  	l.Lock()
   330  	defer l.Unlock()
   331  
   332  	return l.commitLogger.SetEntryPointWithMaxLayer(id, level)
   333  }
   334  
   335  func (l *hnswCommitLogger) ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error {
   336  	l.Lock()
   337  	defer l.Unlock()
   338  
   339  	return l.commitLogger.ReplaceLinksAtLevel(nodeid, level, targets)
   340  }
   341  
   342  func (l *hnswCommitLogger) AddLinkAtLevel(nodeid uint64, level int,
   343  	target uint64,
   344  ) error {
   345  	l.Lock()
   346  	defer l.Unlock()
   347  
   348  	return l.commitLogger.AddLinkAtLevel(nodeid, level, target)
   349  }
   350  
   351  func (l *hnswCommitLogger) AddTombstone(nodeid uint64) error {
   352  	l.Lock()
   353  	defer l.Unlock()
   354  
   355  	return l.commitLogger.AddTombstone(nodeid)
   356  }
   357  
   358  func (l *hnswCommitLogger) RemoveTombstone(nodeid uint64) error {
   359  	l.Lock()
   360  	defer l.Unlock()
   361  
   362  	return l.commitLogger.RemoveTombstone(nodeid)
   363  }
   364  
   365  func (l *hnswCommitLogger) ClearLinks(nodeid uint64) error {
   366  	l.Lock()
   367  	defer l.Unlock()
   368  
   369  	return l.commitLogger.ClearLinks(nodeid)
   370  }
   371  
   372  func (l *hnswCommitLogger) ClearLinksAtLevel(nodeid uint64, level uint16) error {
   373  	l.Lock()
   374  	defer l.Unlock()
   375  
   376  	return l.commitLogger.ClearLinksAtLevel(nodeid, level)
   377  }
   378  
   379  func (l *hnswCommitLogger) DeleteNode(nodeid uint64) error {
   380  	l.Lock()
   381  	defer l.Unlock()
   382  
   383  	return l.commitLogger.DeleteNode(nodeid)
   384  }
   385  
   386  func (l *hnswCommitLogger) Reset() error {
   387  	l.Lock()
   388  	defer l.Unlock()
   389  
   390  	return l.commitLogger.Reset()
   391  }
   392  
   393  // Shutdown waits for ongoing maintenance processes to stop, then cancels their
   394  // scheduling. The caller can be sure that state on disk is immutable after
   395  // calling Shutdown().
   396  func (l *hnswCommitLogger) Shutdown(ctx context.Context) error {
   397  	if err := l.switchLogsCallbackCtrl.Unregister(ctx); err != nil {
   398  		return errors.Wrap(err, "failed to unregister commitlog switch from maintenance cycle")
   399  	}
   400  	if err := l.condenseLogsCallbackCtrl.Unregister(ctx); err != nil {
   401  		return errors.Wrap(err, "failed to unregister commitlog condense from maintenance cycle")
   402  	}
   403  	return nil
   404  }
   405  
   406  func (l *hnswCommitLogger) RootPath() string {
   407  	return l.rootPath
   408  }
   409  
   410  func (l *hnswCommitLogger) startSwitchLogs(shouldAbort cyclemanager.ShouldAbortCallback) bool {
   411  	executed, err := l.switchCommitLogs(false)
   412  	if err != nil {
   413  		l.logger.WithError(err).
   414  			WithField("action", "hnsw_commit_log_maintenance").
   415  			Error("hnsw commit log maintenance failed")
   416  	}
   417  	return executed
   418  }
   419  
   420  func (l *hnswCommitLogger) startCombineAndCondenseLogs(shouldAbort cyclemanager.ShouldAbortCallback) bool {
   421  	executed1, err := l.combineLogs()
   422  	if err != nil {
   423  		l.logger.WithError(err).
   424  			WithField("action", "hnsw_commit_log_combining").
   425  			Error("hnsw commit log maintenance (combining) failed")
   426  	}
   427  
   428  	executed2, err := l.condenseOldLogs()
   429  	if err != nil {
   430  		l.logger.WithError(err).
   431  			WithField("action", "hnsw_commit_log_condensing").
   432  			Error("hnsw commit log maintenance (condensing) failed")
   433  	}
   434  	return executed1 || executed2
   435  }
   436  
   437  func (l *hnswCommitLogger) SwitchCommitLogs(force bool) error {
   438  	_, err := l.switchCommitLogs(force)
   439  	return err
   440  }
   441  
   442  func (l *hnswCommitLogger) switchCommitLogs(force bool) (bool, error) {
   443  	l.Lock()
   444  	defer l.Unlock()
   445  
   446  	size, err := l.commitLogger.FileSize()
   447  	if err != nil {
   448  		return false, err
   449  	}
   450  
   451  	if size <= l.maxSizeIndividual && !force {
   452  		return false, nil
   453  	}
   454  
   455  	oldFileName, err := l.commitLogger.FileName()
   456  	if err != nil {
   457  		return false, err
   458  	}
   459  
   460  	if err := l.commitLogger.Close(); err != nil {
   461  		return true, err
   462  	}
   463  
   464  	// this is a new commit log, initialize with the current time stamp
   465  	fileName := fmt.Sprintf("%d", time.Now().Unix())
   466  
   467  	if force {
   468  		l.logger.WithField("action", "commit_log_file_switched").
   469  			WithField("id", l.id).
   470  			WithField("old_file_name", oldFileName).
   471  			WithField("old_file_size", size).
   472  			WithField("new_file_name", fileName).
   473  			Debug("commit log switched forced")
   474  	} else {
   475  		l.logger.WithField("action", "commit_log_file_switched").
   476  			WithField("id", l.id).
   477  			WithField("old_file_name", oldFileName).
   478  			WithField("old_file_size", size).
   479  			WithField("new_file_name", fileName).
   480  			Info("commit log size crossed threshold, switching to new file")
   481  	}
   482  
   483  	fd, err := os.OpenFile(commitLogFileName(l.rootPath, l.id, fileName),
   484  		os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666)
   485  	if err != nil {
   486  		return true, errors.Wrap(err, "create commit log file")
   487  	}
   488  
   489  	l.commitLogger = commitlog.NewLoggerWithFile(fd)
   490  
   491  	return true, nil
   492  }
   493  
   494  func (l *hnswCommitLogger) condenseOldLogs() (bool, error) {
   495  	files, err := getCommitFileNames(l.rootPath, l.id)
   496  	if err != nil {
   497  		return false, err
   498  	}
   499  
   500  	if len(files) <= 1 {
   501  		// if there are no files there is nothing to do
   502  		// if there is only a single file, it must still be in use, we can't do
   503  		// anything yet
   504  		return false, nil
   505  	}
   506  
   507  	// cut off last element, as that's never a candidate
   508  	candidates := files[:len(files)-1]
   509  
   510  	for _, candidate := range candidates {
   511  		if strings.HasSuffix(candidate, ".condensed") {
   512  			// don't attempt to condense logs which are already condensed
   513  			continue
   514  		}
   515  
   516  		return true, l.condensor.Do(candidate)
   517  	}
   518  
   519  	return false, nil
   520  }
   521  
   522  func (l *hnswCommitLogger) combineLogs() (bool, error) {
   523  	// maxSize is the desired final size, since we assume a lot of redundancy we
   524  	// can set the combining threshold higher than the final threshold under the
   525  	// assumption that the combined file will be considerably smaller than the
   526  	// sum of both input files
   527  	threshold := int64(float64(l.maxSizeCombining) * 1.75)
   528  	return NewCommitLogCombiner(l.rootPath, l.id, threshold, l.logger).Do()
   529  }
   530  
   531  func (l *hnswCommitLogger) Drop(ctx context.Context) error {
   532  	if err := l.commitLogger.Close(); err != nil {
   533  		return errors.Wrap(err, "close hnsw commit logger prior to delete")
   534  	}
   535  
   536  	// stop all goroutines
   537  	if err := l.Shutdown(ctx); err != nil {
   538  		return errors.Wrap(err, "drop commitlog")
   539  	}
   540  
   541  	// remove commit log directory if exists
   542  	dir := commitLogDirectory(l.rootPath, l.id)
   543  	if _, err := os.Stat(dir); err == nil {
   544  		err := os.RemoveAll(dir)
   545  		if err != nil {
   546  			return errors.Wrap(err, "delete commit files directory")
   547  		}
   548  	}
   549  	return nil
   550  }
   551  
   552  func (l *hnswCommitLogger) Flush() error {
   553  	l.Lock()
   554  	defer l.Unlock()
   555  
   556  	return l.commitLogger.Flush()
   557  }