github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/libkbfs/unflushed_path_cache.go (about)

     1  // Copyright 2016 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package libkbfs
     6  
     7  import (
     8  	"fmt"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/keybase/client/go/kbfs/idutil"
    13  	"github.com/keybase/client/go/kbfs/kbfscodec"
    14  	"github.com/keybase/client/go/kbfs/kbfscrypto"
    15  	"github.com/keybase/client/go/kbfs/kbfsmd"
    16  	"github.com/keybase/client/go/logger"
    17  	"github.com/keybase/client/go/protocol/keybase1"
    18  	"golang.org/x/net/context"
    19  )
    20  
    21  type unflushedPathCacheState int
    22  
    23  const (
    24  	upcUninitialized unflushedPathCacheState = iota
    25  	upcInitializing
    26  	upcInitialized
    27  )
    28  
    29  type unflushedPathsPerRevMap map[string]bool
    30  type unflushedPathsMap map[kbfsmd.Revision]unflushedPathsPerRevMap
    31  
    32  type upcQueuedOpType int
    33  
    34  const (
    35  	upcOpAppend upcQueuedOpType = iota
    36  	upcOpRemove
    37  	upcOpReinit
    38  )
    39  
    40  type upcQueuedOp struct {
    41  	op upcQueuedOpType
    42  
    43  	// Remove ops don't need an info.
    44  	info unflushedPathMDInfo
    45  
    46  	// All op types should set this.
    47  	rev kbfsmd.Revision
    48  
    49  	// Only reinit ops need to set this explicitly.
    50  	isLocalSquash bool
    51  }
    52  
    53  // unflushedPathCache tracks the paths that have been modified by MD
    54  // updates that haven't yet been flushed from the journal.
    55  type unflushedPathCache struct {
    56  	lock            sync.RWMutex
    57  	state           unflushedPathCacheState
    58  	unflushedPaths  unflushedPathsMap
    59  	ready           chan struct{}
    60  	chainsPopulator chainsPathPopulator
    61  	queue           []upcQueuedOp
    62  }
    63  
    64  // getUnflushedPaths returns a copy of the unflushed path cache if it
    65  // has been initialized, otherwise nil.  It must be called under the
    66  // same lock as the callers of appendToCache/removeFromCache.  The
    67  // caller must not modify the inner per-revision maps of the return
    68  // value.
    69  func (upc *unflushedPathCache) getUnflushedPaths() unflushedPathsMap {
    70  	upc.lock.RLock()
    71  	defer upc.lock.RUnlock()
    72  	if upc.unflushedPaths == nil {
    73  		return nil
    74  	}
    75  	cache := make(unflushedPathsMap)
    76  	// Only need to deep-copy the outer level of the map; the inner
    77  	// level (per-revision) shouldn't be modified us once it's set,
    78  	// and the caller isn't supposed to modify it.
    79  	for k, v := range upc.unflushedPaths {
    80  		cache[k] = v
    81  	}
    82  	return cache
    83  }
    84  
    85  func (upc *unflushedPathCache) doStartInitialization() (
    86  	bool, <-chan struct{}) {
    87  	upc.lock.Lock()
    88  	defer upc.lock.Unlock()
    89  	switch upc.state {
    90  	case upcUninitialized:
    91  		upc.state = upcInitializing
    92  		if upc.ready != nil {
    93  			panic("Unflushed path cache should not have a non-nil channel " +
    94  				"when uninitialized")
    95  		}
    96  		upc.ready = make(chan struct{})
    97  		return true, upc.ready
    98  	case upcInitializing:
    99  		return false, upc.ready
   100  	case upcInitialized:
   101  		return false, nil
   102  	default:
   103  		panic(fmt.Sprintf("Unknown unflushedPathsCache state: %v", upc.state))
   104  	}
   105  }
   106  
   107  // startInitializeOrWait returns true if the caller should start
   108  // initialization, otherwise false (which means the cache is already
   109  // initialized when it returns).  It may block for an extended period
   110  // of time during while another caller is initializing.
   111  func (upc *unflushedPathCache) startInitializeOrWait(ctx context.Context) (
   112  	bool, error) {
   113  	// Retry in case the original initializer has to abort due to
   114  	// error; limit the number of retries by the lifetime of `ctx`.
   115  	for {
   116  		doInit, readyCh := upc.doStartInitialization()
   117  		if doInit {
   118  			return true, nil
   119  		} else if readyCh == nil {
   120  			// Already initialized.
   121  			return false, nil
   122  		}
   123  		select {
   124  		case <-readyCh:
   125  			continue
   126  		case <-ctx.Done():
   127  			return false, ctx.Err()
   128  		}
   129  	}
   130  }
   131  
   132  func (upc *unflushedPathCache) abortInitialization() {
   133  	upc.lock.Lock()
   134  	defer upc.lock.Unlock()
   135  	upc.state = upcUninitialized
   136  	upc.queue = nil
   137  	if upc.ready != nil {
   138  		close(upc.ready)
   139  		upc.ready = nil
   140  	}
   141  }
   142  
   143  // unflushedPathMDInfo is the subset of metadata info needed by
   144  // unflushedPathCache.
   145  type unflushedPathMDInfo struct {
   146  	revision       kbfsmd.Revision
   147  	kmd            KeyMetadataWithRootDirEntry
   148  	pmd            PrivateMetadata
   149  	localTimestamp time.Time
   150  }
   151  
   152  // addUnflushedPaths populates the given unflushed paths object.  The
   153  // caller should NOT be holding any locks, as it's possible that
   154  // blocks will need to be fetched.
   155  func addUnflushedPaths(ctx context.Context,
   156  	uid keybase1.UID, key kbfscrypto.VerifyingKey, codec kbfscodec.Codec,
   157  	log logger.Logger, osg idutil.OfflineStatusGetter,
   158  	mdInfos []unflushedPathMDInfo, cpp chainsPathPopulator,
   159  	unflushedPaths unflushedPathsMap) error {
   160  	// Make chains over the entire range to get the unflushed files.
   161  	chains := newCRChainsEmpty(cpp.obfuscatorMaker())
   162  	if len(mdInfos) > 0 {
   163  		mostRecentMDInfo := mdInfos[len(mdInfos)-1]
   164  		chains.mostRecentChainMDInfo = mostRecentMDInfo.kmd
   165  	}
   166  	processedOne := false
   167  	for _, mdInfo := range mdInfos {
   168  		offline := keybase1.OfflineAvailability_NONE
   169  		if osg != nil {
   170  			offline = osg.OfflineAvailabilityForID(mdInfo.kmd.TlfID())
   171  		}
   172  		winfo := newWriterInfo(uid, key, mdInfo.revision, offline)
   173  		if _, ok := unflushedPaths[mdInfo.revision]; ok {
   174  			if processedOne {
   175  				return fmt.Errorf("Couldn't skip revision %d after "+
   176  					"already processing one", mdInfo.revision)
   177  			}
   178  
   179  			log.CDebugf(ctx, "Skipping unflushed paths for revision %d "+
   180  				"since it's already in the cache", mdInfo.revision)
   181  			continue
   182  		}
   183  		unflushedPaths[mdInfo.revision] = make(map[string]bool)
   184  
   185  		processedOne = true
   186  		err := chains.addOps(codec, mdInfo.pmd, winfo, mdInfo.localTimestamp)
   187  		if err != nil {
   188  			return err
   189  		}
   190  	}
   191  	if !processedOne {
   192  		return nil
   193  	}
   194  
   195  	// Does the last op already have a valid path in each chain?  If
   196  	// so, we don't need to bother populating the paths, which can
   197  	// take a fair amount of CPU since the node cache isn't already
   198  	// up-to-date with the current set of pointers (because the MDs
   199  	// haven't been committed yet).
   200  	populatePaths := false
   201  	for _, chain := range chains.byOriginal {
   202  		if len(chain.ops) > 0 &&
   203  			!chain.ops[len(chain.ops)-1].getFinalPath().
   204  				IsValidForNotification() {
   205  			populatePaths = true
   206  			break
   207  		}
   208  	}
   209  
   210  	if populatePaths {
   211  		err := cpp.populateChainPaths(ctx, log, chains, true)
   212  		if err != nil {
   213  			return err
   214  		}
   215  	}
   216  
   217  	for _, chain := range chains.byOriginal {
   218  		if len(chain.ops) > 0 {
   219  			// Use the same final path from the chain for all ops.
   220  			// Use the plaintext here, since this will be included
   221  			// directly in the `.kbfs_status` output for unflushed
   222  			// paths.
   223  			finalPath := chain.ops[len(chain.ops)-1].getFinalPath().
   224  				CanonicalPathPlaintext()
   225  			for _, op := range chain.ops {
   226  				revPaths, ok := unflushedPaths[op.getWriterInfo().revision]
   227  				if !ok {
   228  					panic(fmt.Sprintf("No rev map for revision %d",
   229  						op.getWriterInfo().revision))
   230  				}
   231  				revPaths[finalPath] = true
   232  			}
   233  		}
   234  	}
   235  	return nil
   236  }
   237  
   238  // prepUnflushedPaths returns a set of paths that were updated in the
   239  // given revision.
   240  func (upc *unflushedPathCache) prepUnflushedPaths(ctx context.Context,
   241  	uid keybase1.UID, key kbfscrypto.VerifyingKey, codec kbfscodec.Codec,
   242  	log logger.Logger, osg idutil.OfflineStatusGetter, mdInfo unflushedPathMDInfo) (
   243  	unflushedPathsPerRevMap, error) {
   244  	cpp := func() chainsPathPopulator {
   245  		upc.lock.Lock()
   246  		defer upc.lock.Unlock()
   247  		return upc.chainsPopulator
   248  	}()
   249  
   250  	// The unflushed paths haven't been initialized yet.
   251  	if cpp == nil {
   252  		return nil, nil
   253  	}
   254  
   255  	newUnflushedPaths := make(unflushedPathsMap)
   256  	mdInfos := []unflushedPathMDInfo{mdInfo}
   257  
   258  	err := addUnflushedPaths(
   259  		ctx, uid, key, codec, log, osg, mdInfos, cpp, newUnflushedPaths)
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  	if len(newUnflushedPaths) > 1 {
   264  		return nil, fmt.Errorf("%d unflushed revisions on a single put",
   265  			len(newUnflushedPaths))
   266  	}
   267  
   268  	perRevMap, ok := newUnflushedPaths[mdInfo.revision]
   269  	if !ok {
   270  		panic(fmt.Errorf("Cannot find per-revision map for revision %d",
   271  			mdInfo.revision))
   272  	}
   273  
   274  	return perRevMap, nil
   275  }
   276  
   277  // appendToCache returns true when successful, and false if it needs
   278  // to be retried after the per-revision map is recomputed.
   279  func (upc *unflushedPathCache) appendToCache(mdInfo unflushedPathMDInfo,
   280  	perRevMap unflushedPathsPerRevMap) bool {
   281  	upc.lock.Lock()
   282  	defer upc.lock.Unlock()
   283  	switch upc.state {
   284  	case upcUninitialized:
   285  		// Nothing to do.
   286  	case upcInitializing:
   287  		// Append to queue for processing at the end of initialization.
   288  		upc.queue = append(upc.queue, upcQueuedOp{
   289  			op:   upcOpAppend,
   290  			info: mdInfo,
   291  			rev:  mdInfo.revision,
   292  		})
   293  	case upcInitialized:
   294  		if perRevMap == nil {
   295  			// This was prepared before `upc.chainsPopulator` was set,
   296  			// and needs to be done again.
   297  			return false
   298  		}
   299  		// Update the cache with the prepared paths.
   300  		upc.unflushedPaths[mdInfo.revision] = perRevMap
   301  	default:
   302  		panic(fmt.Sprintf("Unknown unflushedPathsCache state: %v", upc.state))
   303  	}
   304  	return true
   305  }
   306  
   307  func (upc *unflushedPathCache) removeFromCache(rev kbfsmd.Revision) {
   308  	upc.lock.Lock()
   309  	defer upc.lock.Unlock()
   310  	switch upc.state {
   311  	case upcUninitialized:
   312  		// Nothing to do.
   313  	case upcInitializing:
   314  		// Append to queue for processing at the end of initialization.
   315  		upc.queue = append(upc.queue, upcQueuedOp{
   316  			op:  upcOpRemove,
   317  			rev: rev,
   318  		})
   319  	case upcInitialized:
   320  		delete(upc.unflushedPaths, rev)
   321  	default:
   322  		panic(fmt.Sprintf("Unknown unflushedPathsCache state: %v", upc.state))
   323  	}
   324  }
   325  
   326  func (upc *unflushedPathCache) setCacheIfPossible(cache unflushedPathsMap,
   327  	cpp chainsPathPopulator) []upcQueuedOp {
   328  	upc.lock.Lock()
   329  	defer upc.lock.Unlock()
   330  	if len(upc.queue) > 0 {
   331  		// We need to process more appends!
   332  		queue := upc.queue
   333  		upc.queue = nil
   334  		return queue
   335  	}
   336  
   337  	upc.unflushedPaths = cache
   338  	upc.chainsPopulator = cpp
   339  	if upc.ready != nil {
   340  		close(upc.ready)
   341  		upc.ready = nil
   342  	}
   343  	upc.state = upcInitialized
   344  	return nil
   345  }
   346  
   347  func reinitUpcCache(revision kbfsmd.Revision,
   348  	unflushedPaths unflushedPathsMap, perRevMap unflushedPathsPerRevMap,
   349  	isLocalSquash bool) {
   350  	// Remove all entries equal or bigger to this revision.  Keep
   351  	// earlier revisions (likely preserved local squashes).
   352  	for rev := range unflushedPaths {
   353  		// Keep the revision if this is a local squash and it's
   354  		// smaller than the squash revision.
   355  		if isLocalSquash && rev < revision {
   356  			continue
   357  		}
   358  		delete(unflushedPaths, rev)
   359  	}
   360  	unflushedPaths[revision] = perRevMap
   361  }
   362  
   363  // initialize should only be called when the caller saw a `true` value
   364  // from `startInitializeOrWait()`.  It returns the unflushed paths
   365  // associated with `irmds`.  If it returns a `false` boolean, the
   366  // caller must abort the initialization (although as long as `err` is
   367  // nil, the returns unflushed paths may be used).  The caller should
   368  // not modify any of the per-revision inner maps of the returned
   369  // unflushed path map.
   370  func (upc *unflushedPathCache) initialize(ctx context.Context,
   371  	uid keybase1.UID, key kbfscrypto.VerifyingKey, codec kbfscodec.Codec,
   372  	log logger.Logger, osg idutil.OfflineStatusGetter, cpp chainsPathPopulator,
   373  	mdInfos []unflushedPathMDInfo) (unflushedPathsMap, bool, error) {
   374  	// First get all the paths for the given range.  On the first try
   375  	unflushedPaths := make(unflushedPathsMap)
   376  	log.CDebugf(ctx, "Initializing unflushed path cache with %d revisions",
   377  		len(mdInfos))
   378  	err := addUnflushedPaths(
   379  		ctx, uid, key, codec, log, osg, mdInfos, cpp, unflushedPaths)
   380  	if err != nil {
   381  		return nil, false, err
   382  	}
   383  
   384  	initialUnflushedPaths := make(unflushedPathsMap)
   385  	// Only need to deep-copy the outer level of the map; the inner
   386  	// level (per-revision) shouldn't be modified us once it's set,
   387  	// and the caller isn't supposed to modify it.
   388  	for k, v := range unflushedPaths {
   389  		initialUnflushedPaths[k] = v
   390  	}
   391  
   392  	// Try to drain the queue a few times.  We may be unable to if we
   393  	// are continuously racing with MD puts.
   394  	for i := 0; i < 10; i++ {
   395  		queue := upc.setCacheIfPossible(unflushedPaths, cpp)
   396  		if len(queue) == 0 {
   397  			// Return the paths corresponding only to the original set
   398  			// of RMDs, not to anything from the queue.
   399  			return initialUnflushedPaths, true, nil
   400  		}
   401  
   402  		select {
   403  		case <-ctx.Done():
   404  			return nil, false, ctx.Err()
   405  		default:
   406  		}
   407  
   408  		// Do the queued appends up to the first reinitialization.
   409  		for len(queue) > 0 {
   410  			var appends []unflushedPathMDInfo
   411  			for _, op := range queue {
   412  				if op.op == upcOpAppend {
   413  					appends = append(appends, op.info)
   414  				} else if op.op == upcOpReinit {
   415  					break
   416  				}
   417  			}
   418  
   419  			log.CDebugf(ctx, "Processing unflushed paths for %d items in "+
   420  				"the append queue", len(appends))
   421  			err := addUnflushedPaths(
   422  				ctx, uid, key, codec, log, osg, appends, cpp, unflushedPaths)
   423  			if err != nil {
   424  				return nil, false, err
   425  			}
   426  
   427  			// Do the queued removes up to the first reinitialization.
   428  			// Then to do the reinitialization and repeat using the
   429  			// remainder of the queue.
   430  			reinit := false
   431  			for i, op := range queue {
   432  				if op.op == upcOpRemove {
   433  					delete(unflushedPaths, op.rev)
   434  				} else if op.op == upcOpReinit {
   435  					perRevMap, err := upc.prepUnflushedPaths(
   436  						ctx, uid, key, codec, log, osg, op.info)
   437  					if err != nil {
   438  						return nil, false, err
   439  					}
   440  					reinitUpcCache(
   441  						op.rev, unflushedPaths, perRevMap, op.isLocalSquash)
   442  					queue = queue[i+1:]
   443  					reinit = true
   444  					break
   445  				}
   446  			}
   447  			if !reinit {
   448  				queue = nil
   449  			}
   450  		}
   451  
   452  	}
   453  	// If we can't catch up to the queue, then instruct the caller to
   454  	// abort the initialization.
   455  	return initialUnflushedPaths, false, nil
   456  }
   457  
   458  // reinitializeWithResolution returns true when successful, and false
   459  // if it needs to be retried after the per-revision map is recomputed.
   460  func (upc *unflushedPathCache) reinitializeWithResolution(
   461  	mdInfo unflushedPathMDInfo, perRevMap unflushedPathsPerRevMap,
   462  	isLocalSquash bool) bool {
   463  	upc.lock.Lock()
   464  	defer upc.lock.Unlock()
   465  
   466  	if perRevMap == nil {
   467  		switch upc.state {
   468  		case upcInitialized:
   469  			// Initialization started since the perRevMap was created,
   470  			// so try again.
   471  			return false
   472  		case upcInitializing:
   473  			// Save this reinit for later.
   474  			upc.queue = append(upc.queue, upcQueuedOp{
   475  				op:            upcOpReinit,
   476  				info:          mdInfo,
   477  				rev:           mdInfo.revision,
   478  				isLocalSquash: isLocalSquash,
   479  			})
   480  			return true
   481  		default:
   482  			// We can't initialize with a nil revision map.
   483  			return true
   484  		}
   485  	}
   486  
   487  	if upc.unflushedPaths != nil {
   488  		reinitUpcCache(
   489  			mdInfo.revision, upc.unflushedPaths, perRevMap, isLocalSquash)
   490  	} else {
   491  		upc.unflushedPaths = unflushedPathsMap{mdInfo.revision: perRevMap}
   492  	}
   493  	upc.queue = nil
   494  	if upc.ready != nil {
   495  		close(upc.ready)
   496  		upc.ready = nil
   497  	}
   498  	upc.state = upcInitialized
   499  	return true
   500  }