github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/libkbfs/disk_md_cache.go (about)

     1  // Copyright 2018 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package libkbfs
     6  
     7  import (
     8  	"context"
     9  	"io"
    10  	"path/filepath"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/keybase/client/go/kbfs/kbfsmd"
    15  	"github.com/keybase/client/go/kbfs/ldbutils"
    16  	"github.com/keybase/client/go/kbfs/tlf"
    17  	"github.com/keybase/client/go/logger"
    18  	"github.com/pkg/errors"
    19  	ldberrors "github.com/syndtr/goleveldb/leveldb/errors"
    20  	"github.com/syndtr/goleveldb/leveldb/filter"
    21  	"github.com/syndtr/goleveldb/leveldb/opt"
    22  	"github.com/syndtr/goleveldb/leveldb/storage"
    23  )
    24  
    25  const (
    26  	headsDbFilename           string = "diskCacheMDHeads.leveldb"
    27  	initialDiskMDCacheVersion uint64 = 1
    28  	currentDiskMDCacheVersion uint64 = initialDiskMDCacheVersion
    29  	defaultMDCacheTableSize   int    = 50 * opt.MiB
    30  	mdCacheFolderName         string = "kbfs_md_cache"
    31  )
    32  
    33  // diskMDCacheConfig specifies the interfaces that a DiskMDCacheStandard
    34  // needs to perform its functions. This adheres to the standard libkbfs Config
    35  // API.
    36  type diskMDCacheConfig interface {
    37  	codecGetter
    38  	logMaker
    39  }
    40  
    41  type diskMDBlock struct {
    42  	// Exported only for serialization.
    43  	Buf      []byte
    44  	Ver      kbfsmd.MetadataVer
    45  	Time     time.Time
    46  	Revision kbfsmd.Revision
    47  }
    48  
    49  // DiskMDCacheLocal is the standard implementation for DiskMDCache.
    50  type DiskMDCacheLocal struct {
    51  	config diskMDCacheConfig
    52  	log    logger.Logger
    53  
    54  	// Track the cache hit rate and eviction rate
    55  	hitMeter  *ldbutils.CountMeter
    56  	missMeter *ldbutils.CountMeter
    57  	putMeter  *ldbutils.CountMeter
    58  	// Protect the disk caches from being shutdown while they're being
    59  	// accessed, and mutable data.
    60  	lock       sync.RWMutex
    61  	headsDb    *ldbutils.LevelDb // tlfID -> metadata block
    62  	tlfsCached map[tlf.ID]kbfsmd.Revision
    63  	tlfsStaged map[tlf.ID][]diskMDBlock
    64  
    65  	startedCh  chan struct{}
    66  	startErrCh chan struct{}
    67  	shutdownCh chan struct{}
    68  
    69  	closer func()
    70  }
    71  
    72  var _ DiskMDCache = (*DiskMDCacheLocal)(nil)
    73  
    74  // DiskMDCacheStartState represents whether this disk MD cache has
    75  // started or failed.
    76  type DiskMDCacheStartState int
    77  
    78  // String allows DiskMDCacheStartState to be output as a string.
    79  func (s DiskMDCacheStartState) String() string {
    80  	switch s {
    81  	case DiskMDCacheStartStateStarting:
    82  		return "starting"
    83  	case DiskMDCacheStartStateStarted:
    84  		return "started"
    85  	case DiskMDCacheStartStateFailed:
    86  		return "failed"
    87  	default:
    88  		return "unknown"
    89  	}
    90  }
    91  
    92  const (
    93  	// DiskMDCacheStartStateStarting represents when the cache is starting.
    94  	DiskMDCacheStartStateStarting DiskMDCacheStartState = iota
    95  	// DiskMDCacheStartStateStarted represents when the cache has started.
    96  	DiskMDCacheStartStateStarted
    97  	// DiskMDCacheStartStateFailed represents when the cache has failed to
    98  	// start.
    99  	DiskMDCacheStartStateFailed
   100  )
   101  
   102  // DiskMDCacheStatus represents the status of the MD cache.
   103  type DiskMDCacheStatus struct {
   104  	StartState DiskMDCacheStartState
   105  	NumMDs     uint64
   106  	NumStaged  uint64
   107  	Hits       ldbutils.MeterStatus
   108  	Misses     ldbutils.MeterStatus
   109  	Puts       ldbutils.MeterStatus
   110  	DBStats    []string `json:",omitempty"`
   111  }
   112  
   113  // newDiskMDCacheLocalFromStorage creates a new *DiskMDCacheLocal
   114  // with the passed-in storage.Storage interfaces as storage layers for each
   115  // cache.
   116  func newDiskMDCacheLocalFromStorage(
   117  	config diskMDCacheConfig, headsStorage storage.Storage, mode InitMode) (
   118  	cache *DiskMDCacheLocal, err error) {
   119  	log := config.MakeLogger("DMC")
   120  	closers := make([]io.Closer, 0, 1)
   121  	closer := func() {
   122  		for _, c := range closers {
   123  			closeErr := c.Close()
   124  			if closeErr != nil {
   125  				log.Warning("Error closing leveldb or storage: %+v", closeErr)
   126  			}
   127  		}
   128  	}
   129  	defer func() {
   130  		if err != nil {
   131  			err = errors.WithStack(err)
   132  			closer()
   133  		}
   134  	}()
   135  	mdDbOptions := ldbutils.LeveldbOptions(mode)
   136  	mdDbOptions.CompactionTableSize = defaultMDCacheTableSize
   137  	mdDbOptions.Filter = filter.NewBloomFilter(16)
   138  	headsDb, err := ldbutils.OpenLevelDbWithOptions(headsStorage, mdDbOptions)
   139  	if err != nil {
   140  		return nil, err
   141  	}
   142  	closers = append(closers, headsDb)
   143  
   144  	startedCh := make(chan struct{})
   145  	startErrCh := make(chan struct{})
   146  	cache = &DiskMDCacheLocal{
   147  		config:     config,
   148  		hitMeter:   ldbutils.NewCountMeter(),
   149  		missMeter:  ldbutils.NewCountMeter(),
   150  		putMeter:   ldbutils.NewCountMeter(),
   151  		log:        log,
   152  		headsDb:    headsDb,
   153  		tlfsStaged: make(map[tlf.ID][]diskMDBlock),
   154  		startedCh:  startedCh,
   155  		startErrCh: startErrCh,
   156  		shutdownCh: make(chan struct{}),
   157  		closer:     closer,
   158  	}
   159  	// Sync the MD counts asynchronously so syncing doesn't block init.
   160  	// Since this method blocks, any Get or Put requests to the disk MD
   161  	// cache will block until this is done. The log will contain the beginning
   162  	// and end of this sync.
   163  	go func() {
   164  		err := cache.syncMDCountsFromDb()
   165  		if err != nil {
   166  			close(startErrCh)
   167  			closer()
   168  			log.Warning("Disabling disk MD cache due to error syncing the "+
   169  				"MD counts from DB: %+v", err)
   170  			return
   171  		}
   172  		close(startedCh)
   173  	}()
   174  	return cache, nil
   175  }
   176  
   177  // newDiskMDCacheLocal creates a new *DiskMDCacheLocal with a
   178  // specified directory on the filesystem as storage.
   179  func newDiskMDCacheLocal(
   180  	config diskBlockCacheConfig, dirPath string, mode InitMode) (
   181  	cache *DiskMDCacheLocal, err error) {
   182  	log := config.MakeLogger("DMC")
   183  	defer func() {
   184  		if err != nil {
   185  			log.Error("Error initializing MD cache: %+v", err)
   186  		}
   187  	}()
   188  	cachePath := filepath.Join(dirPath, mdCacheFolderName)
   189  	versionPath, err := ldbutils.GetVersionedPathForDb(
   190  		log, cachePath, "disk md cache", currentDiskMDCacheVersion)
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  	headsDbPath := filepath.Join(versionPath, headsDbFilename)
   195  	headsStorage, err := storage.OpenFile(headsDbPath, false)
   196  	if err != nil {
   197  		return nil, err
   198  	}
   199  	defer func() {
   200  		if err != nil {
   201  			headsStorage.Close()
   202  		}
   203  	}()
   204  	return newDiskMDCacheLocalFromStorage(config, headsStorage, mode)
   205  }
   206  
   207  // WaitUntilStarted waits until this cache has started.
   208  func (cache *DiskMDCacheLocal) WaitUntilStarted() error {
   209  	select {
   210  	case <-cache.startedCh:
   211  		return nil
   212  	case <-cache.startErrCh:
   213  		return DiskMDCacheError{"error starting channel"}
   214  	}
   215  }
   216  
   217  func (cache *DiskMDCacheLocal) syncMDCountsFromDb() error {
   218  	cache.log.Debug("+ syncMDCountsFromDb begin")
   219  	defer cache.log.Debug("- syncMDCountsFromDb end")
   220  	// We take a write lock for this to prevent any reads from happening while
   221  	// we're syncing the MD counts.
   222  	cache.lock.Lock()
   223  	defer cache.lock.Unlock()
   224  
   225  	tlfsCached := make(map[tlf.ID]kbfsmd.Revision)
   226  	iter := cache.headsDb.NewIterator(nil, nil)
   227  	defer iter.Release()
   228  	for iter.Next() {
   229  		var tlfID tlf.ID
   230  		err := tlfID.UnmarshalBinary(iter.Key())
   231  		if err != nil {
   232  			return err
   233  		}
   234  
   235  		var md diskMDBlock
   236  		err = cache.config.Codec().Decode(iter.Value(), &md)
   237  		if err != nil {
   238  			return err
   239  		}
   240  
   241  		tlfsCached[tlfID] = md.Revision
   242  	}
   243  	cache.tlfsCached = tlfsCached
   244  	return nil
   245  }
   246  
   247  // getMetadataLocked retrieves the metadata for a block in the cache, or
   248  // returns leveldb.ErrNotFound and a zero-valued metadata otherwise.
   249  func (cache *DiskMDCacheLocal) getMetadataLocked(
   250  	tlfID tlf.ID, metered bool) (metadata diskMDBlock, err error) {
   251  	var hitMeter, missMeter *ldbutils.CountMeter
   252  	if ldbutils.Metered {
   253  		hitMeter = cache.hitMeter
   254  		missMeter = cache.missMeter
   255  	}
   256  
   257  	metadataBytes, err := cache.headsDb.GetWithMeter(
   258  		tlfID.Bytes(), hitMeter, missMeter)
   259  	if err != nil {
   260  		return diskMDBlock{}, err
   261  	}
   262  	err = cache.config.Codec().Decode(metadataBytes, &metadata)
   263  	if err != nil {
   264  		return diskMDBlock{}, err
   265  	}
   266  	return metadata, nil
   267  }
   268  
   269  // checkAndLockCache checks whether the cache is started.
   270  func (cache *DiskMDCacheLocal) checkCacheLocked(
   271  	ctx context.Context, method string) error {
   272  	// First see if the context has expired since we began.
   273  	select {
   274  	case <-ctx.Done():
   275  		return ctx.Err()
   276  	default:
   277  	}
   278  
   279  	select {
   280  	case <-cache.startedCh:
   281  	case <-cache.startErrCh:
   282  		// The cache will never be started. No need for a stack here since this
   283  		// could happen anywhere.
   284  		return DiskCacheStartingError{method}
   285  	default:
   286  		// If the cache hasn't started yet, return an error.  No need for a
   287  		// stack here since this could happen anywhere.
   288  		return DiskCacheStartingError{method}
   289  	}
   290  	// shutdownCh has to be checked under lock, otherwise we can race.
   291  	select {
   292  	case <-cache.shutdownCh:
   293  		return errors.WithStack(DiskCacheClosedError{method})
   294  	default:
   295  	}
   296  	if cache.headsDb == nil {
   297  		return errors.WithStack(DiskCacheClosedError{method})
   298  	}
   299  	return nil
   300  }
   301  
   302  // Get implements the DiskMDCache interface for DiskMDCacheLocal.
   303  func (cache *DiskMDCacheLocal) Get(
   304  	ctx context.Context, tlfID tlf.ID) (
   305  	buf []byte, ver kbfsmd.MetadataVer, timestamp time.Time, err error) {
   306  	cache.lock.RLock()
   307  	defer cache.lock.RUnlock()
   308  	err = cache.checkCacheLocked(ctx, "MD(Get)")
   309  	if err != nil {
   310  		return nil, -1, time.Time{}, err
   311  	}
   312  
   313  	if _, ok := cache.tlfsCached[tlfID]; !ok {
   314  		cache.missMeter.Mark(1)
   315  		return nil, -1, time.Time{}, errors.WithStack(ldberrors.ErrNotFound)
   316  	}
   317  
   318  	md, err := cache.getMetadataLocked(tlfID, ldbutils.Metered)
   319  	if err != nil {
   320  		return nil, -1, time.Time{}, err
   321  	}
   322  	return md.Buf, md.Ver, md.Time, nil
   323  }
   324  
   325  // Stage implements the DiskMDCache interface for DiskMDCacheLocal.
   326  func (cache *DiskMDCacheLocal) Stage(
   327  	ctx context.Context, tlfID tlf.ID, rev kbfsmd.Revision, buf []byte,
   328  	ver kbfsmd.MetadataVer, timestamp time.Time) error {
   329  	cache.lock.Lock()
   330  	defer cache.lock.Unlock()
   331  	err := cache.checkCacheLocked(ctx, "MD(Stage)")
   332  	if err != nil {
   333  		return err
   334  	}
   335  
   336  	if cachedRev, ok := cache.tlfsCached[tlfID]; ok && cachedRev >= rev {
   337  		// Ignore stages for older revisions
   338  		return nil
   339  	}
   340  
   341  	md := diskMDBlock{
   342  		Buf:      buf,
   343  		Ver:      ver,
   344  		Time:     timestamp,
   345  		Revision: rev,
   346  	}
   347  
   348  	cache.tlfsStaged[tlfID] = append(cache.tlfsStaged[tlfID], md)
   349  	return nil
   350  }
   351  
   352  // Commit implements the DiskMDCache interface for DiskMDCacheLocal.
   353  func (cache *DiskMDCacheLocal) Commit(
   354  	ctx context.Context, tlfID tlf.ID, rev kbfsmd.Revision) error {
   355  	cache.lock.Lock()
   356  	defer cache.lock.Unlock()
   357  	err := cache.checkCacheLocked(ctx, "MD(Commit)")
   358  	if err != nil {
   359  		return err
   360  	}
   361  
   362  	stagedMDs := cache.tlfsStaged[tlfID]
   363  	if len(stagedMDs) == 0 {
   364  		// Nothing to do.
   365  		return nil
   366  	}
   367  	newStagedMDs := make([]diskMDBlock, 0, len(stagedMDs)-1)
   368  	foundMD := false
   369  	// The staged MDs list is unordered, so iterate through the whole
   370  	// thing to find what should remain after commiting `rev`.
   371  	for _, md := range stagedMDs {
   372  		switch {
   373  		case md.Revision > rev:
   374  			newStagedMDs = append(newStagedMDs, md)
   375  			continue
   376  		case md.Revision < rev:
   377  			continue
   378  		case foundMD:
   379  			// Duplicate.
   380  			continue
   381  		}
   382  		foundMD = true
   383  
   384  		encodedMetadata, err := cache.config.Codec().Encode(&md)
   385  		if err != nil {
   386  			return err
   387  		}
   388  
   389  		err = cache.headsDb.PutWithMeter(
   390  			tlfID.Bytes(), encodedMetadata, cache.putMeter)
   391  		if err != nil {
   392  			return err
   393  		}
   394  	}
   395  
   396  	if !foundMD {
   397  		// Nothing to do.
   398  		return nil
   399  	}
   400  
   401  	cache.tlfsCached[tlfID] = rev
   402  	if len(newStagedMDs) == 0 {
   403  		delete(cache.tlfsStaged, tlfID)
   404  	} else {
   405  		cache.tlfsStaged[tlfID] = newStagedMDs
   406  	}
   407  	return nil
   408  }
   409  
   410  // Unstage implements the DiskMDCache interface for DiskMDCacheLocal.
   411  func (cache *DiskMDCacheLocal) Unstage(
   412  	ctx context.Context, tlfID tlf.ID, rev kbfsmd.Revision) error {
   413  	cache.lock.Lock()
   414  	defer cache.lock.Unlock()
   415  	err := cache.checkCacheLocked(ctx, "MD(Unstage)")
   416  	if err != nil {
   417  		return err
   418  	}
   419  
   420  	// Just remove the first one matching `rev`.
   421  	stagedMDs := cache.tlfsStaged[tlfID]
   422  	for i, md := range stagedMDs {
   423  		if md.Revision == rev {
   424  			if len(stagedMDs) == 1 {
   425  				delete(cache.tlfsStaged, tlfID)
   426  			} else {
   427  				cache.tlfsStaged[tlfID] = append(
   428  					stagedMDs[:i], stagedMDs[i+1:]...)
   429  			}
   430  			return nil
   431  		}
   432  	}
   433  
   434  	return nil
   435  }
   436  
   437  // Status implements the DiskMDCache interface for DiskMDCacheLocal.
   438  func (cache *DiskMDCacheLocal) Status(ctx context.Context) DiskMDCacheStatus {
   439  	select {
   440  	case <-cache.startedCh:
   441  	case <-cache.startErrCh:
   442  		return DiskMDCacheStatus{StartState: DiskMDCacheStartStateFailed}
   443  	default:
   444  		return DiskMDCacheStatus{StartState: DiskMDCacheStartStateStarting}
   445  	}
   446  
   447  	cache.lock.RLock()
   448  	defer cache.lock.RUnlock()
   449  	numStaged := uint64(0)
   450  	for _, mds := range cache.tlfsStaged {
   451  		numStaged += uint64(len(mds))
   452  	}
   453  
   454  	var dbStats []string
   455  	if err := cache.checkCacheLocked(ctx, "MD(Status)"); err == nil {
   456  		dbStats, err = cache.headsDb.StatStrings()
   457  		if err != nil {
   458  			cache.log.CDebugf(ctx, "Couldn't get db stats: %+v", err)
   459  		}
   460  	}
   461  
   462  	return DiskMDCacheStatus{
   463  		StartState: DiskMDCacheStartStateStarted,
   464  		NumMDs:     uint64(len(cache.tlfsCached)),
   465  		NumStaged:  numStaged,
   466  		Hits:       ldbutils.RateMeterToStatus(cache.hitMeter),
   467  		Misses:     ldbutils.RateMeterToStatus(cache.missMeter),
   468  		Puts:       ldbutils.RateMeterToStatus(cache.putMeter),
   469  		DBStats:    dbStats,
   470  	}
   471  }
   472  
   473  // Shutdown implements the DiskMDCache interface for DiskMDCacheLocal.
   474  func (cache *DiskMDCacheLocal) Shutdown(ctx context.Context) {
   475  	// Wait for the cache to either finish starting or error.
   476  	select {
   477  	case <-cache.startedCh:
   478  	case <-cache.startErrCh:
   479  		return
   480  	}
   481  	cache.lock.Lock()
   482  	defer cache.lock.Unlock()
   483  	// shutdownCh has to be checked under lock, otherwise we can race.
   484  	select {
   485  	case <-cache.shutdownCh:
   486  		cache.log.CWarningf(ctx, "Shutdown called more than once")
   487  		return
   488  	default:
   489  	}
   490  	close(cache.shutdownCh)
   491  	if cache.headsDb == nil {
   492  		return
   493  	}
   494  	cache.closer()
   495  	cache.headsDb = nil
   496  	cache.hitMeter.Shutdown()
   497  	cache.missMeter.Shutdown()
   498  	cache.putMeter.Shutdown()
   499  }