github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/libkbfs/folder_block_manager.go (about)

     1  // Copyright 2016 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package libkbfs
     6  
     7  import (
     8  	"fmt"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/keybase/backoff"
    13  	"github.com/keybase/client/go/kbfs/data"
    14  	"github.com/keybase/client/go/kbfs/env"
    15  	"github.com/keybase/client/go/kbfs/kbfsblock"
    16  	"github.com/keybase/client/go/kbfs/kbfsmd"
    17  	"github.com/keybase/client/go/kbfs/kbfssync"
    18  	"github.com/keybase/client/go/kbfs/tlf"
    19  	"github.com/keybase/client/go/kbfs/tlfhandle"
    20  	"github.com/keybase/client/go/logger"
    21  	"github.com/keybase/client/go/protocol/keybase1"
    22  	"github.com/pkg/errors"
    23  	"golang.org/x/net/context"
    24  	"golang.org/x/sync/errgroup"
    25  )
    26  
    27  type fbmHelper interface {
    28  	getMostRecentFullyMergedMD(ctx context.Context) (
    29  		ImmutableRootMetadata, error)
    30  	finalizeGCOp(ctx context.Context, gco *GCOp) error
    31  	getLatestMergedRevision(lState *kbfssync.LockState) kbfsmd.Revision
    32  }
    33  
    34  const (
    35  	// How many pointers to downgrade in a single Archive/Delete call.
    36  	numPointersToDowngradePerChunk = 20
    37  	// Once the number of pointers being deleted in a single gc op
    38  	// passes this threshold, we'll stop garbage collection at the
    39  	// current revision.
    40  	numPointersPerGCThresholdDefault = 100
    41  	// The most revisions to consider for each QR run.
    42  	numMaxRevisionsPerQR = 100
    43  )
    44  
    45  type blockDeleteType int
    46  
    47  const (
    48  	// Delete the blocks only if the given MD failed to make it to the
    49  	// servers.
    50  	blockDeleteOnMDFail blockDeleteType = iota
    51  
    52  	// Always delete the blocks, without first checking if the given
    53  	// revision was successful.  This is just an optimization to avoid
    54  	// fetching the MD from the server when we know for sure it had
    55  	// failed.
    56  	blockDeleteAlways
    57  )
    58  
    59  type blocksToDelete struct {
    60  	md      ReadOnlyRootMetadata
    61  	blocks  []data.BlockPointer
    62  	bdType  blockDeleteType
    63  	backoff backoff.BackOff
    64  }
    65  
    66  // folderBlockManager is a helper class for managing the blocks in a
    67  // particular TLF.  It archives historical blocks and reclaims quota
    68  // usage, all in the background.
    69  type folderBlockManager struct {
    70  	appStateUpdater env.AppStateUpdater
    71  	config          Config
    72  	log             logger.Logger
    73  	shutdownChan    chan struct{}
    74  	id              tlf.ID
    75  
    76  	numPointersPerGCThreshold int
    77  
    78  	// A queue of MD updates for this folder that need to have their
    79  	// unref's blocks archived
    80  	archiveChan chan ReadOnlyRootMetadata
    81  
    82  	archivePauseChan chan (<-chan struct{})
    83  
    84  	// archiveGroup tracks the outstanding archives.
    85  	archiveGroup kbfssync.RepeatedWaitGroup
    86  
    87  	archiveCancelLock sync.Mutex
    88  	archiveCancel     context.CancelFunc
    89  
    90  	// blocksToDeleteChan is a list of blocks, for a given
    91  	// metadata revision, that may have been Put as part of a failed
    92  	// MD write. These blocks should be deleted as soon as we know
    93  	// for sure that the MD write isn't visible to others.
    94  	// TODO: Persist these to disk?
    95  	blocksToDeleteChan      chan blocksToDelete
    96  	blocksToDeletePauseChan chan (<-chan struct{})
    97  	blocksToDeleteWaitGroup kbfssync.RepeatedWaitGroup
    98  
    99  	blocksToDeleteCancelLock sync.Mutex
   100  	blocksToDeleteCancel     context.CancelFunc
   101  
   102  	// forceReclamation forces the manager to start a reclamation
   103  	// process.
   104  	forceReclamationChan chan struct{}
   105  
   106  	// reclamationGroup tracks the outstanding quota reclamations.
   107  	reclamationGroup kbfssync.RepeatedWaitGroup
   108  
   109  	reclamationCancelLock sync.Mutex
   110  	reclamationCancel     context.CancelFunc
   111  
   112  	// latestMergedChan signals when we learn about a newer latest
   113  	// merged revision for this TLF.
   114  	latestMergedChan chan struct{}
   115  
   116  	// cleanDiskCachesGroup tracks the outstanding disk-cache cleanings.
   117  	cleanDiskCachesGroup kbfssync.RepeatedWaitGroup
   118  
   119  	cleanDiskCacheCancelLock sync.Mutex
   120  	cleanDiskCacheCancel     context.CancelFunc
   121  
   122  	helper fbmHelper
   123  
   124  	// Remembers what happened last time during quota reclamation.
   125  	lastQRLock          sync.Mutex
   126  	lastQRHeadRev       kbfsmd.Revision
   127  	wasLastQRComplete   bool
   128  	lastReclamationTime time.Time
   129  	lastReclaimedRev    kbfsmd.Revision
   130  }
   131  
   132  func newFolderBlockManager(
   133  	appStateUpdater env.AppStateUpdater, config Config, fb data.FolderBranch,
   134  	bType branchType, helper fbmHelper) *folderBlockManager {
   135  	tlfStringFull := fb.Tlf.String()
   136  	log := config.MakeLogger(fmt.Sprintf("FBM %s", tlfStringFull[:8]))
   137  
   138  	var latestMergedChan chan struct{}
   139  	qrEnabled :=
   140  		fb.Branch == data.MasterBranch && config.Mode().QuotaReclamationEnabled()
   141  	if qrEnabled {
   142  		latestMergedChan = make(chan struct{}, 1)
   143  	}
   144  
   145  	fbm := &folderBlockManager{
   146  		appStateUpdater:           appStateUpdater,
   147  		config:                    config,
   148  		log:                       log,
   149  		shutdownChan:              make(chan struct{}),
   150  		id:                        fb.Tlf,
   151  		numPointersPerGCThreshold: numPointersPerGCThresholdDefault,
   152  		archiveChan:               make(chan ReadOnlyRootMetadata, 500),
   153  		archivePauseChan:          make(chan (<-chan struct{})),
   154  		blocksToDeleteChan:        make(chan blocksToDelete, 25),
   155  		blocksToDeletePauseChan:   make(chan (<-chan struct{})),
   156  		forceReclamationChan:      make(chan struct{}, 1),
   157  		latestMergedChan:          latestMergedChan,
   158  		helper:                    helper,
   159  	}
   160  
   161  	if bType != standard || !config.Mode().BlockManagementEnabled() {
   162  		return fbm
   163  	}
   164  
   165  	go fbm.archiveBlocksInBackground()
   166  	go fbm.deleteBlocksInBackground()
   167  	if qrEnabled {
   168  		go fbm.reclaimQuotaInBackground()
   169  		go fbm.cleanDiskCachesInBackground()
   170  	}
   171  	return fbm
   172  }
   173  
   174  func (fbm *folderBlockManager) setBlocksToDeleteCancel(cancel context.CancelFunc) {
   175  	fbm.blocksToDeleteCancelLock.Lock()
   176  	defer fbm.blocksToDeleteCancelLock.Unlock()
   177  	fbm.blocksToDeleteCancel = cancel
   178  }
   179  
   180  func (fbm *folderBlockManager) cancelBlocksToDelete() {
   181  	blocksToDeleteCancel := func() context.CancelFunc {
   182  		fbm.blocksToDeleteCancelLock.Lock()
   183  		defer fbm.blocksToDeleteCancelLock.Unlock()
   184  		blocksToDeleteCancel := fbm.blocksToDeleteCancel
   185  		fbm.blocksToDeleteCancel = nil
   186  		return blocksToDeleteCancel
   187  	}()
   188  	if blocksToDeleteCancel != nil {
   189  		blocksToDeleteCancel()
   190  	}
   191  }
   192  
   193  func (fbm *folderBlockManager) setArchiveCancel(cancel context.CancelFunc) {
   194  	fbm.archiveCancelLock.Lock()
   195  	defer fbm.archiveCancelLock.Unlock()
   196  	fbm.archiveCancel = cancel
   197  }
   198  
   199  func (fbm *folderBlockManager) cancelArchive() {
   200  	archiveCancel := func() context.CancelFunc {
   201  		fbm.archiveCancelLock.Lock()
   202  		defer fbm.archiveCancelLock.Unlock()
   203  		archiveCancel := fbm.archiveCancel
   204  		fbm.archiveCancel = nil
   205  		return archiveCancel
   206  	}()
   207  	if archiveCancel != nil {
   208  		archiveCancel()
   209  	}
   210  }
   211  
   212  func (fbm *folderBlockManager) setReclamationCancel(cancel context.CancelFunc) {
   213  	fbm.reclamationCancelLock.Lock()
   214  	defer fbm.reclamationCancelLock.Unlock()
   215  	fbm.reclamationCancel = cancel
   216  }
   217  
   218  func (fbm *folderBlockManager) cancelReclamation() {
   219  	reclamationCancel := func() context.CancelFunc {
   220  		fbm.reclamationCancelLock.Lock()
   221  		defer fbm.reclamationCancelLock.Unlock()
   222  		reclamationCancel := fbm.reclamationCancel
   223  		fbm.reclamationCancel = nil
   224  		return reclamationCancel
   225  	}()
   226  	if reclamationCancel != nil {
   227  		reclamationCancel()
   228  	}
   229  }
   230  
   231  func (fbm *folderBlockManager) setCleanDiskCacheCancel(
   232  	cancel context.CancelFunc) {
   233  	fbm.cleanDiskCacheCancelLock.Lock()
   234  	defer fbm.cleanDiskCacheCancelLock.Unlock()
   235  	fbm.cleanDiskCacheCancel = cancel
   236  }
   237  
   238  func (fbm *folderBlockManager) cancelCleanDiskCache() {
   239  	cleanDiskCacheCancel := func() context.CancelFunc {
   240  		fbm.cleanDiskCacheCancelLock.Lock()
   241  		defer fbm.cleanDiskCacheCancelLock.Unlock()
   242  		cleanDiskCacheCancel := fbm.cleanDiskCacheCancel
   243  		fbm.cleanDiskCacheCancel = nil
   244  		return cleanDiskCacheCancel
   245  	}()
   246  	if cleanDiskCacheCancel != nil {
   247  		cleanDiskCacheCancel()
   248  	}
   249  }
   250  
   251  func (fbm *folderBlockManager) shutdown() {
   252  	close(fbm.shutdownChan)
   253  	fbm.cancelArchive()
   254  	fbm.cancelBlocksToDelete()
   255  	fbm.cancelReclamation()
   256  	fbm.cancelCleanDiskCache()
   257  }
   258  
   259  // cleanUpBlockState cleans up any blocks that may have been orphaned
   260  // by a failure during or after blocks have been sent to the
   261  // server. This is usually used in a defer right before a call to
   262  // fbo.doBlockPuts like so:
   263  //
   264  //	defer func() {
   265  //	  if err != nil {
   266  //	    ...cleanUpBlockState(md.ReadOnly(), bps)
   267  //	  }
   268  //	}()
   269  //
   270  //	... = ...doBlockPuts(ctx, md.ReadOnly(), *bps)
   271  //
   272  // The exception is for when blocks might get reused across multiple
   273  // attempts at the same operation (like for a Sync).  In that case,
   274  // failed blocks should be built up in a separate data structure, and
   275  // this should be called when the operation finally succeeds.
   276  func (fbm *folderBlockManager) cleanUpBlockState(
   277  	md ReadOnlyRootMetadata, bps blockPutState, bdType blockDeleteType) {
   278  	fbm.log.CDebugf(
   279  		context.TODO(), "Clean up md %d %s, bdType=%d", md.Revision(),
   280  		md.MergedStatus(), bdType)
   281  	expBackoff := backoff.NewExponentialBackOff()
   282  	// Never give up when trying to delete blocks; it might just take
   283  	// a long time to confirm with the server whether a revision
   284  	// succeeded or not.
   285  	expBackoff.MaxElapsedTime = 0
   286  	toDelete := blocksToDelete{
   287  		md:      md,
   288  		bdType:  bdType,
   289  		backoff: expBackoff,
   290  	}
   291  	toDelete.blocks = append(toDelete.blocks, bps.Ptrs()...)
   292  	fbm.enqueueBlocksToDelete(toDelete)
   293  }
   294  
   295  func (fbm *folderBlockManager) enqueueBlocksToDelete(toDelete blocksToDelete) {
   296  	fbm.blocksToDeleteWaitGroup.Add(1)
   297  	fbm.blocksToDeleteChan <- toDelete
   298  }
   299  
   300  func (fbm *folderBlockManager) enqueueBlocksToDeleteAfterShortDelay(
   301  	ctx context.Context, toDelete blocksToDelete) {
   302  	fbm.blocksToDeleteWaitGroup.Add(1)
   303  	duration := toDelete.backoff.NextBackOff()
   304  	if duration == backoff.Stop {
   305  		panic(fmt.Sprintf("Backoff stopped while checking whether we "+
   306  			"should delete revision %d", toDelete.md.Revision()))
   307  	}
   308  	time.AfterFunc(duration,
   309  		func() {
   310  			select {
   311  			case fbm.blocksToDeleteChan <- toDelete:
   312  			case <-fbm.shutdownChan:
   313  				fbm.blocksToDeleteWaitGroup.Done()
   314  			}
   315  		})
   316  }
   317  
   318  // enqueueBlocksToDeleteNoWait enqueues blocks to be deleted just like
   319  // enqueueBlocksToDelete, except that when fbm.blocksToDeleteChan is full, it
   320  // doesn't block, but instead spawns a goroutine to handle the sending.
   321  //
   322  // This is necessary to prevent a situation like following:
   323  //  1. A delete fails when fbm.blocksToDeleteChan is full
   324  //  2. The goroutine tries to put the failed toDelete back to
   325  //     fbm.blocksToDeleteChan
   326  //  3. Step 2 becomes synchronous and is blocked because
   327  //     fbm.blocksToDeleteChan is already full
   328  //  4. fbm.blocksToDeleteChan never gets drained because the goroutine that
   329  //     drains it is waiting for sending on the same channel.
   330  //  5. Deadlock!
   331  func (fbm *folderBlockManager) enqueueBlocksToDeleteNoWait(toDelete blocksToDelete) {
   332  	fbm.blocksToDeleteWaitGroup.Add(1)
   333  
   334  	select {
   335  	case fbm.blocksToDeleteChan <- toDelete:
   336  		return
   337  	default:
   338  		go func() { fbm.blocksToDeleteChan <- toDelete }()
   339  	}
   340  }
   341  
   342  func isArchivableOp(op op) bool {
   343  	switch op.(type) {
   344  	case *createOp:
   345  		return true
   346  	case *rmOp:
   347  		return true
   348  	case *renameOp:
   349  		return true
   350  	case *syncOp:
   351  		return true
   352  	case *setAttrOp:
   353  		return true
   354  	case *resolutionOp:
   355  		return true
   356  	default:
   357  		// rekey ops don't have anything to archive, and gc
   358  		// ops only have deleted blocks.
   359  		return false
   360  	}
   361  }
   362  
   363  func isArchivableMDOrError(md ReadOnlyRootMetadata) error {
   364  	if md.MergedStatus() != kbfsmd.Merged {
   365  		return fmt.Errorf("md rev=%d is not merged", md.Revision())
   366  	}
   367  
   368  	for _, op := range md.data.Changes.Ops {
   369  		if !isArchivableOp(op) {
   370  			return fmt.Errorf(
   371  				"md rev=%d has unarchivable op %s",
   372  				md.Revision(), op)
   373  		}
   374  	}
   375  	return nil
   376  }
   377  
   378  func (fbm *folderBlockManager) archiveUnrefBlocks(md ReadOnlyRootMetadata) {
   379  	// Don't archive for unmerged revisions, because conflict
   380  	// resolution might undo some of the unreferences.
   381  	if md.MergedStatus() != kbfsmd.Merged {
   382  		return
   383  	}
   384  
   385  	if err := isArchivableMDOrError(md); err != nil {
   386  		panic(err)
   387  	}
   388  
   389  	fbm.archiveGroup.Add(1)
   390  	fbm.archiveChan <- md
   391  }
   392  
   393  // archiveUnrefBlocksNoWait enqueues the MD for archiving without
   394  // blocking.  By the time it returns, the archive group has been
   395  // incremented so future waits will block on this archive.  This
   396  // method is for internal use within folderBlockManager only.
   397  func (fbm *folderBlockManager) archiveUnrefBlocksNoWait(md ReadOnlyRootMetadata) {
   398  	// Don't archive for unmerged revisions, because conflict
   399  	// resolution might undo some of the unreferences.
   400  	if md.MergedStatus() != kbfsmd.Merged {
   401  		return
   402  	}
   403  
   404  	if err := isArchivableMDOrError(md); err != nil {
   405  		panic(err)
   406  	}
   407  
   408  	fbm.archiveGroup.Add(1)
   409  
   410  	// Don't block if the channel is full; instead do the send in a
   411  	// background goroutine.  We've already done the Add above, so the
   412  	// wait calls should all work just fine.
   413  	select {
   414  	case fbm.archiveChan <- md:
   415  		return
   416  	default:
   417  		go func() { fbm.archiveChan <- md }()
   418  	}
   419  }
   420  
   421  func (fbm *folderBlockManager) waitForArchives(ctx context.Context) error {
   422  	return fbm.archiveGroup.Wait(ctx)
   423  }
   424  
   425  func (fbm *folderBlockManager) waitForDeletingBlocks(ctx context.Context) error {
   426  	return fbm.blocksToDeleteWaitGroup.Wait(ctx)
   427  }
   428  
   429  func (fbm *folderBlockManager) waitForQuotaReclamations(
   430  	ctx context.Context) error {
   431  	return fbm.reclamationGroup.Wait(ctx)
   432  }
   433  
   434  func (fbm *folderBlockManager) waitForDiskCacheCleans(
   435  	ctx context.Context) error {
   436  	return fbm.cleanDiskCachesGroup.Wait(ctx)
   437  }
   438  
   439  func (fbm *folderBlockManager) forceQuotaReclamation() {
   440  	fbm.reclamationGroup.Add(1)
   441  	select {
   442  	case fbm.forceReclamationChan <- struct{}{}:
   443  	default:
   444  		fbm.reclamationGroup.Done()
   445  	}
   446  }
   447  
   448  // doChunkedDowngrades sends batched archive or delete messages to the
   449  // block server for the given block pointers.  For deletes, it returns
   450  // a list of block IDs that no longer have any references.
   451  func (fbm *folderBlockManager) doChunkedDowngrades(ctx context.Context,
   452  	tlfID tlf.ID, ptrs []data.BlockPointer, archive bool) (
   453  	[]kbfsblock.ID, error) {
   454  	fbm.log.CDebugf(ctx, "Downgrading %d pointers (archive=%t)",
   455  		len(ptrs), archive)
   456  	bops := fbm.config.BlockOps()
   457  
   458  	// Round up to find the number of chunks.
   459  	numChunks := (len(ptrs) + numPointersToDowngradePerChunk - 1) /
   460  		numPointersToDowngradePerChunk
   461  	numWorkers := numChunks
   462  	if numWorkers > maxParallelBlockPuts {
   463  		numWorkers = maxParallelBlockPuts
   464  	}
   465  	chunks := make(chan []data.BlockPointer, numChunks)
   466  
   467  	var wg sync.WaitGroup
   468  	defer wg.Wait()
   469  
   470  	ctx, cancel := context.WithCancel(ctx)
   471  	defer cancel()
   472  
   473  	type workerResult struct {
   474  		zeroRefCounts []kbfsblock.ID
   475  		err           error
   476  	}
   477  
   478  	chunkResults := make(chan workerResult, numChunks)
   479  	worker := func() {
   480  		defer wg.Done()
   481  		for chunk := range chunks {
   482  			var res workerResult
   483  			fbm.log.CDebugf(ctx, "Downgrading chunk of %d pointers", len(chunk))
   484  			if archive {
   485  				res.err = bops.Archive(ctx, tlfID, chunk)
   486  			} else {
   487  				var liveCounts map[kbfsblock.ID]int
   488  				liveCounts, res.err = bops.Delete(ctx, tlfID, chunk)
   489  				if res.err == nil {
   490  					for id, count := range liveCounts {
   491  						if count == 0 {
   492  							res.zeroRefCounts = append(res.zeroRefCounts, id)
   493  						}
   494  					}
   495  				}
   496  			}
   497  			chunkResults <- res
   498  			select {
   499  			// return early if the context has been canceled
   500  			case <-ctx.Done():
   501  				return
   502  			default:
   503  			}
   504  		}
   505  	}
   506  	for i := 0; i < numWorkers; i++ {
   507  		wg.Add(1)
   508  		go worker()
   509  	}
   510  
   511  	for start := 0; start < len(ptrs); start += numPointersToDowngradePerChunk {
   512  		end := start + numPointersToDowngradePerChunk
   513  		if end > len(ptrs) {
   514  			end = len(ptrs)
   515  		}
   516  		chunks <- ptrs[start:end]
   517  	}
   518  	close(chunks)
   519  
   520  	var zeroRefCounts []kbfsblock.ID
   521  	for i := 0; i < numChunks; i++ {
   522  		result := <-chunkResults
   523  		if result.err != nil {
   524  			// deferred cancel will stop the other workers.
   525  			return nil, result.err
   526  		}
   527  		zeroRefCounts = append(zeroRefCounts, result.zeroRefCounts...)
   528  	}
   529  	return zeroRefCounts, nil
   530  }
   531  
   532  // deleteBlockRefs sends batched delete messages to the block server
   533  // for the given block pointers.  It returns a list of block IDs that
   534  // no longer have any references.
   535  func (fbm *folderBlockManager) deleteBlockRefs(ctx context.Context,
   536  	tlfID tlf.ID, ptrs []data.BlockPointer) ([]kbfsblock.ID, error) {
   537  	return fbm.doChunkedDowngrades(ctx, tlfID, ptrs, false)
   538  }
   539  
   540  func (fbm *folderBlockManager) processBlocksToDelete(ctx context.Context, toDelete blocksToDelete) error {
   541  	// also attempt to delete any error references
   542  
   543  	defer fbm.blocksToDeleteWaitGroup.Done()
   544  
   545  	// Make sure all blocks in the journal (if journaling is enabled)
   546  	// are flushed before attempting to delete any of them.
   547  	if jManager, err := GetJournalManager(fbm.config); err == nil {
   548  		fbm.log.CDebugf(ctx, "Waiting for journal to flush")
   549  		if err := jManager.WaitForCompleteFlush(ctx, fbm.id); err != nil {
   550  			return err
   551  		}
   552  	}
   553  
   554  	fbm.log.CDebugf(ctx, "Checking deleted blocks for revision %d",
   555  		toDelete.md.Revision())
   556  	// Make sure that the MD didn't actually become part of the folder
   557  	// history.  (This could happen if the Sync was canceled while the
   558  	// MD put was outstanding.)  If the private MD is not set, there's
   559  	// no way the revision made it to the server, so we are free to
   560  	// clean it up without checking with the server.
   561  	if toDelete.bdType == blockDeleteOnMDFail &&
   562  		toDelete.md.bareMd.GetSerializedPrivateMetadata() != nil {
   563  		// Don't use `getSingleMD` here, since it returns an error if
   564  		// the revision isn't found, and that's useful information for
   565  		// us here.
   566  		rmds, err := getMDRange(
   567  			ctx, fbm.config, fbm.id, toDelete.md.BID(), toDelete.md.Revision(),
   568  			toDelete.md.Revision(), toDelete.md.MergedStatus(), nil)
   569  		if err != nil {
   570  			fbm.log.CDebugf(ctx,
   571  				"Error trying to get MD %d; retrying after a delay",
   572  				toDelete.md.Revision())
   573  			// We don't know whether or not the revision made it to
   574  			// the server, so try again.  But don't re-enqueue
   575  			// immediately to avoid fast infinite loops.
   576  			fbm.enqueueBlocksToDeleteAfterShortDelay(ctx, toDelete)
   577  			return nil
   578  		}
   579  
   580  		var rmd ImmutableRootMetadata
   581  		if len(rmds) == 0 {
   582  			// The rmd.mdID check below will fail intentionally since
   583  			// rmd is empty.  Note that this assumes that the MD
   584  			// servers don't cache negative lookups, or if they do,
   585  			// they use synchronous cache invalidations for that case.
   586  			// If we ever allow MD servers to cache negative lookups,
   587  			// we'll have to retry here for at least the amount of the
   588  			// maximum allowable cache timeout.
   589  			fbm.log.CDebugf(ctx, "No revision %d found on MD server, so we "+
   590  				"can safely archive", toDelete.md.Revision())
   591  		} else {
   592  			rmd = rmds[0]
   593  		}
   594  
   595  		mdID, err := kbfsmd.MakeID(fbm.config.Codec(), toDelete.md.bareMd)
   596  		if err != nil {
   597  			fbm.log.CErrorf(ctx, "Error when comparing dirs: %v", err)
   598  		} else if mdID == rmd.mdID {
   599  			if err := isArchivableMDOrError(rmd.ReadOnly()); err != nil {
   600  				fbm.log.CDebugf(ctx, "Skipping archiving for non-deleted, "+
   601  					"unarchivable revision %d: %v", rmd.Revision(), err)
   602  				return nil
   603  			}
   604  
   605  			// This md is part of the history of the folder, so we
   606  			// shouldn't delete the blocks.  But, since this MD put
   607  			// seems to have succeeded, we should archive it.
   608  			fbm.log.CDebugf(ctx, "Not deleting blocks from revision %d; "+
   609  				"archiving it", rmd.Revision())
   610  			// Don't block on archiving the MD, because that could
   611  			// lead to deadlock.
   612  			fbm.archiveUnrefBlocksNoWait(rmd.ReadOnly())
   613  			return nil
   614  		}
   615  
   616  		// Otherwise something else has been written over
   617  		// this MD, so get rid of the blocks.
   618  		fbm.log.CDebugf(ctx, "Cleaning up blocks for failed revision %d",
   619  			toDelete.md.Revision())
   620  	} else {
   621  		fbm.log.CDebugf(ctx, "Cleaning up blocks for revision %d",
   622  			toDelete.md.Revision())
   623  	}
   624  
   625  	_, err := fbm.deleteBlockRefs(ctx, toDelete.md.TlfID(), toDelete.blocks)
   626  	// Ignore permanent errors
   627  	_, isPermErr := err.(kbfsblock.ServerError)
   628  	_, isNonceNonExistentErr := err.(kbfsblock.ServerErrorNonceNonExistent)
   629  	_, isBadRequestErr := err.(kbfsblock.ServerErrorBadRequest)
   630  	if err != nil {
   631  		fbm.log.CWarningf(ctx, "Couldn't delete some ref in batch %v: %v",
   632  			toDelete.blocks, err)
   633  		if !isPermErr && !isNonceNonExistentErr && !isBadRequestErr {
   634  			fbm.enqueueBlocksToDeleteNoWait(toDelete)
   635  			return nil
   636  		}
   637  	}
   638  
   639  	return nil
   640  }
   641  
   642  // CtxFBMTagKey is the type used for unique context tags within
   643  // folderBlockManager
   644  type CtxFBMTagKey int
   645  
   646  const (
   647  	// CtxFBMIDKey is the type of the tag for unique operation IDs
   648  	// within folderBlockManager.
   649  	CtxFBMIDKey CtxFBMTagKey = iota
   650  )
   651  
   652  // CtxFBMOpID is the display name for the unique operation
   653  // folderBlockManager ID tag.
   654  const CtxFBMOpID = "FBMID"
   655  
   656  func (fbm *folderBlockManager) ctxWithFBMID(
   657  	ctx context.Context) context.Context {
   658  	return CtxWithRandomIDReplayable(ctx, CtxFBMIDKey, CtxFBMOpID, fbm.log)
   659  }
   660  
   661  // Run the passed function with a context that's canceled on shutdown.
   662  func (fbm *folderBlockManager) runUnlessShutdownWithCtx(
   663  	ctx context.Context, fn func(ctx context.Context) error) error {
   664  	ctx, cancelFunc := context.WithCancel(ctx)
   665  	defer cancelFunc()
   666  	errChan := make(chan error, 1)
   667  	go func() {
   668  		errChan <- fn(ctx)
   669  	}()
   670  
   671  	select {
   672  	case err := <-errChan:
   673  		return err
   674  	case <-fbm.shutdownChan:
   675  		return errors.New("shutdown received")
   676  	}
   677  }
   678  
   679  // Run the passed function with a context that's canceled on shutdown.
   680  func (fbm *folderBlockManager) runUnlessShutdown(
   681  	fn func(ctx context.Context) error) error {
   682  	ctx := fbm.ctxWithFBMID(context.Background())
   683  	return fbm.runUnlessShutdownWithCtx(ctx, fn)
   684  }
   685  
   686  func (fbm *folderBlockManager) archiveBlockRefs(ctx context.Context,
   687  	tlfID tlf.ID, ptrs []data.BlockPointer) error {
   688  	_, err := fbm.doChunkedDowngrades(ctx, tlfID, ptrs, true)
   689  	return err
   690  }
   691  
   692  type unrefIterator struct {
   693  	nextPtr int
   694  }
   695  
   696  // getUnrefPointersFromMD returns a slice of BlockPointers that were
   697  // unreferenced by the given `rmd`.  If there are too many pointers to
   698  // process, given the current mode, then it will return a partial
   699  // list, plus a non-nil `iter` parameter that can be passed into a
   700  // subsequent call to get the next set of unreferenced BlockPointers
   701  // from the same MD.  If a nil `iter` is given, pointers are returned
   702  // from the beginning of the list.
   703  func (fbm *folderBlockManager) getUnrefPointersFromMD(
   704  	rmd ReadOnlyRootMetadata, includeGC bool, iter *unrefIterator) (
   705  	ptrs []data.BlockPointer, nextIter *unrefIterator) {
   706  	currPtr := 0
   707  	complete := true
   708  	nextPtr := 0
   709  	if iter != nil {
   710  		nextPtr = iter.nextPtr
   711  	}
   712  	ptrMap := make(map[data.BlockPointer]bool)
   713  	max := fbm.config.Mode().MaxBlockPtrsToManageAtOnce()
   714  opLoop:
   715  	for _, op := range rmd.data.Changes.Ops {
   716  		if _, ok := op.(*GCOp); !includeGC && ok {
   717  			continue
   718  		}
   719  		for _, ptr := range op.Unrefs() {
   720  			currPtr++
   721  			// Skip past any ptrs we've already processed.
   722  			if currPtr <= nextPtr {
   723  				continue
   724  			}
   725  
   726  			// Can be zeroPtr in weird failed sync scenarios.
   727  			// See syncInfo.replaceRemovedBlock for an example
   728  			// of how this can happen.
   729  			if ptr != data.ZeroPtr && !ptrMap[ptr] {
   730  				ptrMap[ptr] = true
   731  			}
   732  			nextPtr++
   733  			if max >= 0 && len(ptrMap) >= max {
   734  				complete = false
   735  				break opLoop
   736  			}
   737  		}
   738  		for _, update := range op.allUpdates() {
   739  			currPtr++
   740  			// Skip past any ptrs we've already processed.
   741  			if currPtr <= nextPtr {
   742  				continue
   743  			}
   744  
   745  			// It's legal for there to be an "update" between
   746  			// two identical pointers (usually because of
   747  			// conflict resolution), so ignore that for quota
   748  			// reclamation purposes.
   749  			if update.Ref != update.Unref && !ptrMap[update.Unref] {
   750  				ptrMap[update.Unref] = true
   751  			}
   752  			nextPtr++
   753  			if max >= 0 && len(ptrMap) >= max {
   754  				complete = false
   755  				break opLoop
   756  			}
   757  		}
   758  	}
   759  	ptrs = make([]data.BlockPointer, 0, len(ptrMap))
   760  	for ptr := range ptrMap {
   761  		ptrs = append(ptrs, ptr)
   762  	}
   763  	if !complete {
   764  		nextIter = &unrefIterator{nextPtr}
   765  	}
   766  	return ptrs, nextIter
   767  }
   768  
   769  func (fbm *folderBlockManager) archiveAllBlocksInMD(md ReadOnlyRootMetadata) {
   770  	// This func doesn't take any locks, though it can
   771  	// block md writes due to the buffered channel.
   772  	// So use the long timeout to make sure things get
   773  	// unblocked eventually, but no need for a short
   774  	// timeout.
   775  	ctx := fbm.ctxWithFBMID(context.Background())
   776  	ctx, cancel := context.WithTimeout(ctx, data.BackgroundTaskTimeout)
   777  	fbm.setArchiveCancel(cancel)
   778  	defer fbm.cancelArchive()
   779  
   780  	iter := &unrefIterator{0}
   781  	defer fbm.archiveGroup.Done()
   782  	for iter != nil {
   783  		var ptrs []data.BlockPointer
   784  		ptrs, iter = fbm.getUnrefPointersFromMD(md, true, iter)
   785  		_ = fbm.runUnlessShutdownWithCtx(
   786  			ctx, func(ctx context.Context) (err error) {
   787  				fbm.log.CDebugf(
   788  					ctx, "Archiving %d block pointers as a result "+
   789  						"of revision %d", len(ptrs), md.Revision())
   790  				err = fbm.archiveBlockRefs(ctx, md.TlfID(), ptrs)
   791  				if err != nil {
   792  					fbm.log.CWarningf(
   793  						ctx, "Couldn't archive blocks: %v", err)
   794  					return err
   795  				}
   796  
   797  				return nil
   798  			})
   799  		if iter != nil {
   800  			fbm.log.CDebugf(
   801  				ctx, "Archived %d pointers for revision %d, "+
   802  					"now looking for more", len(ptrs), md.Revision())
   803  		}
   804  	}
   805  }
   806  
   807  func (fbm *folderBlockManager) archiveBlocksInBackground() {
   808  	for {
   809  		select {
   810  		case md := <-fbm.archiveChan:
   811  			fbm.archiveAllBlocksInMD(md)
   812  		case unpause := <-fbm.archivePauseChan:
   813  			_ = fbm.runUnlessShutdown(func(ctx context.Context) (err error) {
   814  				fbm.log.CInfof(ctx, "Archives paused")
   815  				// wait to be unpaused
   816  				select {
   817  				case <-unpause:
   818  					fbm.log.CInfof(ctx, "Archives unpaused")
   819  				case <-ctx.Done():
   820  					return ctx.Err()
   821  				}
   822  				return nil
   823  			})
   824  		case <-fbm.shutdownChan:
   825  			return
   826  		}
   827  	}
   828  }
   829  
   830  func (fbm *folderBlockManager) deleteBlocksInBackground() {
   831  	for {
   832  		select {
   833  		case toDelete := <-fbm.blocksToDeleteChan:
   834  			_ = fbm.runUnlessShutdown(func(ctx context.Context) (err error) {
   835  				ctx, cancel := context.WithTimeout(
   836  					ctx, data.BackgroundTaskTimeout)
   837  				fbm.setBlocksToDeleteCancel(cancel)
   838  				defer fbm.cancelBlocksToDelete()
   839  
   840  				if err := fbm.processBlocksToDelete(ctx, toDelete); err != nil {
   841  					fbm.log.CDebugf(ctx, "Error deleting blocks: %v", err)
   842  					return err
   843  				}
   844  
   845  				return nil
   846  			})
   847  		case unpause := <-fbm.blocksToDeletePauseChan:
   848  			_ = fbm.runUnlessShutdown(func(ctx context.Context) (err error) {
   849  				fbm.log.CInfof(ctx, "deleteBlocks paused")
   850  				select {
   851  				case <-unpause:
   852  					fbm.log.CInfof(ctx, "deleteBlocks unpaused")
   853  				case <-ctx.Done():
   854  					return ctx.Err()
   855  				}
   856  				return nil
   857  			})
   858  		case <-fbm.shutdownChan:
   859  			return
   860  		}
   861  	}
   862  }
   863  
   864  func (fbm *folderBlockManager) isOldEnough(rmd ImmutableRootMetadata) bool {
   865  	// Trust the server's timestamp on this MD.
   866  	mtime := rmd.localTimestamp
   867  	unrefAge := fbm.config.Mode().QuotaReclamationMinUnrefAge()
   868  	return mtime.Add(unrefAge).Before(fbm.config.Clock().Now())
   869  }
   870  
   871  // getMostRecentGCRevision returns the latest revision that was
   872  // scrubbed by the previous gc op.
   873  func (fbm *folderBlockManager) getMostRecentGCRevision(
   874  	ctx context.Context, head ReadOnlyRootMetadata) (
   875  	lastGCRev kbfsmd.Revision, err error) {
   876  	if head.data.LastGCRevision >= kbfsmd.RevisionInitial {
   877  		fbm.log.CDebugf(ctx, "Found last gc revision %d in "+
   878  			"head MD revision %d", head.data.LastGCRevision,
   879  			head.Revision())
   880  		return head.data.LastGCRevision, nil
   881  	}
   882  
   883  	// Very old TLFs might not have a filled-in `LastGCRevision`, so
   884  	// we need to walk backwards to find the latest gcOp.
   885  	endRev := head.Revision()
   886  	for {
   887  		startRev := endRev - maxMDsAtATime + 1 // (kbfsmd.Revision is signed)
   888  		if startRev < kbfsmd.RevisionInitial {
   889  			startRev = kbfsmd.RevisionInitial
   890  		}
   891  
   892  		rmds, err := getMDRange(
   893  			ctx, fbm.config, fbm.id, kbfsmd.NullBranchID, startRev,
   894  			endRev, kbfsmd.Merged, nil)
   895  		if err != nil {
   896  			return kbfsmd.RevisionUninitialized, err
   897  		}
   898  
   899  		numNew := len(rmds)
   900  		for i := len(rmds) - 1; i >= 0; i-- {
   901  			rmd := rmds[i]
   902  			if rmd.data.LastGCRevision >= kbfsmd.RevisionInitial {
   903  				fbm.log.CDebugf(ctx, "Found last gc revision %d in "+
   904  					"MD revision %d", rmd.data.LastGCRevision,
   905  					rmd.Revision())
   906  				return rmd.data.LastGCRevision, nil
   907  			}
   908  			for j := len(rmd.data.Changes.Ops) - 1; j >= 0; j-- {
   909  				GCOp, ok := rmd.data.Changes.Ops[j].(*GCOp)
   910  				if !ok || GCOp.LatestRev == kbfsmd.RevisionUninitialized {
   911  					continue
   912  				}
   913  				fbm.log.CDebugf(ctx, "Found last gc op: %s", GCOp)
   914  				return GCOp.LatestRev, nil
   915  			}
   916  		}
   917  
   918  		if numNew > 0 {
   919  			endRev = rmds[0].Revision() - 1
   920  		}
   921  
   922  		if numNew < maxMDsAtATime || endRev < kbfsmd.RevisionInitial {
   923  			// Never been GC'd.
   924  			return kbfsmd.RevisionUninitialized, nil
   925  		}
   926  	}
   927  }
   928  
   929  // getUnrefBlocks returns a slice containing all the block pointers
   930  // that were unreferenced after the earliestRev, up to and including
   931  // those in latestRev.  If the number of pointers is too large, it
   932  // will shorten the range of the revisions being reclaimed, and return
   933  // the latest revision represented in the returned slice of pointers.
   934  func (fbm *folderBlockManager) getUnreferencedBlocks(
   935  	ctx context.Context, earliestRev, mostRecentRev kbfsmd.Revision) (
   936  	ptrs []data.BlockPointer, lastRev kbfsmd.Revision,
   937  	complete bool, err error) {
   938  	fbm.log.CDebugf(ctx, "Getting unreferenced blocks between revisions "+
   939  		"%d and %d", earliestRev, mostRecentRev)
   940  	defer func() {
   941  		if err == nil {
   942  			fbm.log.CDebugf(ctx, "Found %d pointers to clean between "+
   943  				"revisions %d and %d", len(ptrs), earliestRev, lastRev)
   944  		}
   945  	}()
   946  
   947  	// Walk forward, starting from just after earliestRev, until we
   948  	// get enough pointers or until we reach the head or a revision
   949  	// that's not old enough, gathering pointers to GC.
   950  	startRev := earliestRev + 1
   951  outer:
   952  	for {
   953  		endRev := startRev + maxMDsAtATime
   954  		if endRev > mostRecentRev {
   955  			endRev = mostRecentRev
   956  		}
   957  
   958  		rmds, err := getMDRange(
   959  			ctx, fbm.config, fbm.id, kbfsmd.NullBranchID, startRev,
   960  			endRev, kbfsmd.Merged, nil)
   961  		if err != nil {
   962  			return nil, kbfsmd.RevisionUninitialized, false, err
   963  		}
   964  
   965  		numNew := len(rmds)
   966  		for _, rmd := range rmds {
   967  			if !fbm.isOldEnough(rmd) {
   968  				fbm.log.CDebugf(ctx, "Revision %d is too recent; stopping QR",
   969  					rmd.Revision())
   970  				complete = true
   971  				break outer
   972  			}
   973  			lastRev = rmd.Revision()
   974  			// A garbage-collection op *must* contain all pointers in
   975  			// its respective op.  If this device can't handle it,
   976  			// error the process and let another device take care of
   977  			// it.
   978  			newPtrs, iter := fbm.getUnrefPointersFromMD(
   979  				rmd.ReadOnlyRootMetadata, false, &unrefIterator{0})
   980  			if iter != nil {
   981  				return nil, kbfsmd.RevisionUninitialized, false, errors.New(
   982  					fmt.Sprintf(
   983  						"Can't handle the unref'd pointers of revision %d",
   984  						lastRev))
   985  			}
   986  			ptrs = append(ptrs, newPtrs...)
   987  			// TODO: when can we clean up the MD's unembedded block
   988  			// changes pointer?  It's not safe until we know for sure
   989  			// that all existing clients have received the latest
   990  			// update (and also that there are no outstanding staged
   991  			// branches).  Let's do that as part of the bigger issue
   992  			// KBFS-793 -- for now we have to leak those blocks.
   993  			if len(ptrs) > fbm.numPointersPerGCThreshold {
   994  				fbm.log.CDebugf(ctx, "Shortening GC range to [%d:%d]",
   995  					earliestRev, rmd.Revision())
   996  				break outer
   997  			}
   998  		}
   999  
  1000  		if numNew > 0 {
  1001  			startRev = rmds[len(rmds)-1].Revision() + 1
  1002  		}
  1003  
  1004  		if numNew < maxMDsAtATime || startRev > mostRecentRev {
  1005  			complete = true
  1006  			break
  1007  		}
  1008  	}
  1009  
  1010  	return ptrs, lastRev, complete, nil
  1011  }
  1012  
  1013  func (fbm *folderBlockManager) finalizeReclamation(ctx context.Context,
  1014  	ptrs []data.BlockPointer, zeroRefCounts []kbfsblock.ID,
  1015  	latestRev kbfsmd.Revision) error {
  1016  	gco := newGCOp(latestRev)
  1017  	for _, id := range zeroRefCounts {
  1018  		gco.AddUnrefBlock(data.BlockPointer{ID: id})
  1019  	}
  1020  
  1021  	ctx, err := tlfhandle.MakeExtendedIdentify(
  1022  		// TLFIdentifyBehavior_KBFS_QR makes service suppress the tracker popup.
  1023  		ctx, keybase1.TLFIdentifyBehavior_KBFS_QR)
  1024  	if err != nil {
  1025  		return err
  1026  	}
  1027  
  1028  	fbm.log.CDebugf(ctx, "Finalizing reclamation %s with %d ptrs", gco,
  1029  		len(ptrs))
  1030  	// finalizeGCOp could wait indefinitely on locks, so run it in a
  1031  	// goroutine.
  1032  	return runUnlessCanceled(ctx,
  1033  		func() error { return fbm.helper.finalizeGCOp(ctx, gco) })
  1034  }
  1035  
  1036  func (fbm *folderBlockManager) isQRNecessary(
  1037  	ctx context.Context, head ImmutableRootMetadata) bool {
  1038  	fbm.lastQRLock.Lock()
  1039  	defer fbm.lastQRLock.Unlock()
  1040  	if head == (ImmutableRootMetadata{}) {
  1041  		return false
  1042  	}
  1043  
  1044  	session, err := fbm.config.KBPKI().GetCurrentSession(ctx)
  1045  	if err != nil {
  1046  		fbm.log.CWarningf(ctx, "Couldn't get the current session: %+v", err)
  1047  		return false
  1048  	}
  1049  	// It's ok to treat both MDs written by this process on this
  1050  	// device, and MDs written by other processes (e.g., kbgit) in the
  1051  	// same way.  Other processes are likely to be short-lived, and
  1052  	// probably won't do their own QR, so a conflict is unlikely here.
  1053  	selfWroteHead := session.VerifyingKey == head.LastModifyingWriterVerifyingKey()
  1054  
  1055  	// Don't do reclamation if the head isn't old enough and it wasn't
  1056  	// written by this device.  We want to avoid fighting with other
  1057  	// active writers whenever possible.
  1058  	if !selfWroteHead {
  1059  		minHeadAge := fbm.config.Mode().QuotaReclamationMinHeadAge()
  1060  		if minHeadAge <= 0 {
  1061  			return false
  1062  		}
  1063  		headAge := fbm.config.Clock().Now().Sub(head.localTimestamp)
  1064  		if headAge < minHeadAge {
  1065  			return false
  1066  		}
  1067  	}
  1068  
  1069  	// If the head includes a single gcOp that covers everything up to
  1070  	// the previous head, we can skip QR.
  1071  	if len(head.data.Changes.Ops) == 1 {
  1072  		gcOp, isGCOp := head.data.Changes.Ops[0].(*GCOp)
  1073  		if isGCOp && gcOp.LatestRev == head.Revision()-1 {
  1074  			return false
  1075  		}
  1076  	}
  1077  
  1078  	// Do QR if:
  1079  	//   * The head has changed since last time, OR
  1080  	//   * The last QR did not completely clean every available thing, OR
  1081  	//   * The head is now old enough for QR
  1082  	isNecessary := head.Revision() != fbm.lastQRHeadRev ||
  1083  		!fbm.wasLastQRComplete || fbm.isOldEnough(head)
  1084  	if !isNecessary {
  1085  		return false
  1086  	}
  1087  
  1088  	// Make sure the root block of the TLF is readable.  If not, we
  1089  	// don't want to to garbage collect, since we might need to
  1090  	// recover to those older versions of the TLF.
  1091  	headRootPtr := head.data.Dir.BlockPointer
  1092  	ch := fbm.config.BlockOps().BlockRetriever().Request(
  1093  		ctx, defaultOnDemandRequestPriority, head, headRootPtr,
  1094  		data.NewDirBlock(), data.TransientEntry, BlockRequestSolo)
  1095  	select {
  1096  	case err := <-ch:
  1097  		if err != nil {
  1098  			fbm.log.CWarningf(
  1099  				ctx, "Couldn't fetch root block %v for TLF %s: %+v",
  1100  				headRootPtr, head.TlfID(), err)
  1101  			return false
  1102  		}
  1103  	case <-ctx.Done():
  1104  		fbm.log.CDebugf(
  1105  			ctx, "Couldn't fetch root block %v for TLF %s: %+v",
  1106  			headRootPtr, head.TlfID(), ctx.Err())
  1107  		return false
  1108  	}
  1109  
  1110  	return true
  1111  }
  1112  
  1113  func (fbm *folderBlockManager) doReclamation(timer *time.Timer) (err error) {
  1114  	ctx, cancel := context.WithCancel(fbm.ctxWithFBMID(context.Background()))
  1115  	fbm.setReclamationCancel(cancel)
  1116  	defer fbm.cancelReclamation()
  1117  	nextPeriod := fbm.config.Mode().QuotaReclamationPeriod()
  1118  	defer func() {
  1119  		// `nextPeriod` may be changed by later code in this function,
  1120  		// to speed up the next QR cycle when we couldn't reclaim a
  1121  		// complete set of blocks during this run.
  1122  		timer.Reset(nextPeriod)
  1123  	}()
  1124  	defer fbm.reclamationGroup.Done()
  1125  
  1126  	// Don't set a context deadline.  For users that have written a
  1127  	// lot of updates since their last QR, this might involve fetching
  1128  	// a lot of MD updates in small chunks.  It doesn't hold locks for
  1129  	// any considerable amount of time, so it should be safe to let it
  1130  	// run indefinitely.
  1131  
  1132  	// First get the most recent fully merged MD (might be different
  1133  	// from the local head if journaling is enabled), and see if we're
  1134  	// staged or not.
  1135  	head, err := fbm.helper.getMostRecentFullyMergedMD(ctx)
  1136  	if err != nil {
  1137  		return err
  1138  	}
  1139  	if err := isReadableOrError(
  1140  		ctx, fbm.config.KBPKI(), fbm.config, head.ReadOnly()); err != nil {
  1141  		return err
  1142  	}
  1143  	switch {
  1144  	case head.MergedStatus() != kbfsmd.Merged:
  1145  		return errors.New("Supposedly fully-merged MD is unexpectedly unmerged")
  1146  	case head.IsFinal():
  1147  		return kbfsmd.MetadataIsFinalError{}
  1148  	}
  1149  
  1150  	// Make sure we're a writer
  1151  	session, err := fbm.config.KBPKI().GetCurrentSession(ctx)
  1152  	if err != nil {
  1153  		return err
  1154  	}
  1155  	isWriter, err := head.IsWriter(
  1156  		ctx, fbm.config.KBPKI(), fbm.config, session.UID, session.VerifyingKey)
  1157  	if err != nil {
  1158  		return err
  1159  	}
  1160  	if !isWriter {
  1161  		return tlfhandle.NewWriteAccessError(head.GetTlfHandle(), session.Name,
  1162  			head.GetTlfHandle().GetCanonicalPath())
  1163  	}
  1164  
  1165  	if !fbm.isQRNecessary(ctx, head) {
  1166  		// Nothing has changed since last time, or the current head is
  1167  		// too new, so no need to do any QR.
  1168  		return nil
  1169  	}
  1170  	var complete bool
  1171  	var reclamationTime time.Time
  1172  	var lastRev kbfsmd.Revision
  1173  	defer func() {
  1174  		fbm.lastQRLock.Lock()
  1175  		defer fbm.lastQRLock.Unlock()
  1176  		// Remember the QR we just performed.
  1177  		if err == nil && head != (ImmutableRootMetadata{}) {
  1178  			fbm.lastQRHeadRev = head.Revision()
  1179  			fbm.wasLastQRComplete = complete
  1180  		}
  1181  		if !reclamationTime.IsZero() {
  1182  			fbm.lastReclamationTime = reclamationTime
  1183  		}
  1184  		if lastRev > kbfsmd.RevisionUninitialized {
  1185  			fbm.lastReclaimedRev = lastRev
  1186  		}
  1187  		if !complete {
  1188  			// If there's more data to reclaim, only wait a short
  1189  			// while before the next QR attempt.
  1190  			nextPeriod = 1 * time.Minute
  1191  		}
  1192  	}()
  1193  
  1194  	// Then grab the lock for this folder, so we're the only one doing
  1195  	// garbage collection for a while.
  1196  	locked, err := fbm.config.MDServer().TruncateLock(ctx, fbm.id)
  1197  	if err != nil {
  1198  		return err
  1199  	}
  1200  	if !locked {
  1201  		fbm.log.CDebugf(ctx, "Couldn't get the truncate lock")
  1202  		return fmt.Errorf("Couldn't get the truncate lock for folder %d",
  1203  			fbm.id)
  1204  	}
  1205  	defer func() {
  1206  		unlocked, unlockErr := fbm.config.MDServer().TruncateUnlock(ctx, fbm.id)
  1207  		if unlockErr != nil {
  1208  			fbm.log.CDebugf(ctx, "Couldn't release the truncate lock: %v",
  1209  				unlockErr)
  1210  		}
  1211  		if !unlocked {
  1212  			fbm.log.CDebugf(ctx, "Couldn't unlock the truncate lock")
  1213  		}
  1214  	}()
  1215  
  1216  	lastGCRev, err := fbm.getMostRecentGCRevision(ctx, head.ReadOnly())
  1217  	if err != nil {
  1218  		return err
  1219  	}
  1220  	if head.Revision() <= lastGCRev {
  1221  		// TODO: need a log level more fine-grained than Debug to
  1222  		// print out that we're not doing reclamation.
  1223  		complete = true
  1224  		return nil
  1225  	}
  1226  
  1227  	// Don't try to do too many at a time.
  1228  	shortened := false
  1229  	mostRecentRev := head.Revision()
  1230  	if mostRecentRev-lastGCRev > numMaxRevisionsPerQR {
  1231  		mostRecentRev = lastGCRev + numMaxRevisionsPerQR
  1232  		shortened = true
  1233  	}
  1234  
  1235  	// Don't print these until we know for sure that we'll be
  1236  	// reclaiming some quota, to avoid log pollution.
  1237  	fbm.log.CDebugf(ctx, "Starting quota reclamation process")
  1238  	defer func() {
  1239  		fbm.log.CDebugf(ctx, "Ending quota reclamation process: %v", err)
  1240  		reclamationTime = fbm.config.Clock().Now()
  1241  	}()
  1242  
  1243  	ptrs, lastRev, complete, err := fbm.getUnreferencedBlocks(
  1244  		ctx, lastGCRev, mostRecentRev)
  1245  	if err != nil {
  1246  		return err
  1247  	}
  1248  	if lastRev == kbfsmd.RevisionUninitialized {
  1249  		fbm.log.CDebugf(ctx, "No recent revisions to GC")
  1250  		complete = true
  1251  		return nil
  1252  	}
  1253  	if len(ptrs) == 0 && !shortened {
  1254  		complete = true
  1255  
  1256  		// Add a new gcOp to show other clients that they don't need
  1257  		// to explore this range again.
  1258  		return fbm.finalizeReclamation(ctx, nil, nil, lastRev)
  1259  	} else if shortened {
  1260  		complete = false
  1261  	}
  1262  
  1263  	zeroRefCounts, err := fbm.deleteBlockRefs(ctx, head.TlfID(), ptrs)
  1264  	if err != nil {
  1265  		return err
  1266  	}
  1267  
  1268  	return fbm.finalizeReclamation(ctx, ptrs, zeroRefCounts, lastRev)
  1269  }
  1270  
  1271  func isPermanentQRError(err error) bool {
  1272  	switch errors.Cause(err).(type) {
  1273  	case tlfhandle.WriteAccessError, kbfsmd.MetadataIsFinalError,
  1274  		RevokedDeviceVerificationError:
  1275  		return true
  1276  	default:
  1277  		return false
  1278  	}
  1279  }
  1280  
  1281  func (fbm *folderBlockManager) reclaimQuotaInBackground() {
  1282  	autoQR := true
  1283  	timer := time.NewTimer(fbm.config.Mode().QuotaReclamationPeriod())
  1284  
  1285  	if fbm.config.Mode().QuotaReclamationPeriod().Seconds() != 0 {
  1286  		// Run QR once immediately at the start of the period.
  1287  		fbm.reclamationGroup.Add(1)
  1288  		err := fbm.doReclamation(timer)
  1289  		if isPermanentQRError(err) {
  1290  			autoQR = false
  1291  			fbm.log.CDebugf(context.Background(),
  1292  				"Permanently stopping QR due to initial error: %+v", err)
  1293  		}
  1294  	}
  1295  
  1296  	timerChan := timer.C
  1297  	for {
  1298  		// Don't let the timer fire if auto-reclamation is turned off.
  1299  		if !autoQR ||
  1300  			fbm.config.Mode().QuotaReclamationPeriod().Seconds() == 0 {
  1301  			timer.Stop()
  1302  			// Use a channel that will never fire instead.
  1303  			timerChan = make(chan time.Time)
  1304  		}
  1305  
  1306  		state := keybase1.MobileAppState_FOREGROUND
  1307  		select {
  1308  		case <-fbm.shutdownChan:
  1309  			return
  1310  		case state = <-fbm.appStateUpdater.NextAppStateUpdate(&state):
  1311  			for state != keybase1.MobileAppState_FOREGROUND {
  1312  				fbm.log.CDebugf(context.Background(),
  1313  					"Pausing QR while not foregrounded: state=%s", state)
  1314  				state = <-fbm.appStateUpdater.NextAppStateUpdate(&state)
  1315  			}
  1316  			fbm.log.CDebugf(
  1317  				context.Background(), "Resuming QR while foregrounded")
  1318  			continue
  1319  		case <-timerChan:
  1320  			fbm.reclamationGroup.Add(1)
  1321  		case <-fbm.forceReclamationChan:
  1322  		}
  1323  
  1324  		err := fbm.doReclamation(timer)
  1325  		if isPermanentQRError(err) {
  1326  			// If we can't write the MD, don't bother with the timer
  1327  			// anymore. Don't completely shut down, since we don't
  1328  			// want forced reclamations to hang.
  1329  			timer.Stop()
  1330  			timerChan = make(chan time.Time)
  1331  			autoQR = false
  1332  			fbm.log.CDebugf(context.Background(),
  1333  				"Permanently stopping QR due to error: %+v", err)
  1334  		}
  1335  	}
  1336  }
  1337  
  1338  func (fbm *folderBlockManager) getLastQRData() (time.Time, kbfsmd.Revision) {
  1339  	fbm.lastQRLock.Lock()
  1340  	defer fbm.lastQRLock.Unlock()
  1341  	return fbm.lastReclamationTime, fbm.lastReclaimedRev
  1342  }
  1343  
  1344  func (fbm *folderBlockManager) clearLastQRData() {
  1345  	fbm.lastQRLock.Lock()
  1346  	defer fbm.lastQRLock.Unlock()
  1347  	fbm.lastQRHeadRev = kbfsmd.RevisionUninitialized
  1348  	fbm.wasLastQRComplete = false
  1349  	fbm.lastReclamationTime = time.Time{}
  1350  	fbm.lastReclaimedRev = kbfsmd.RevisionUninitialized
  1351  }
  1352  
  1353  func (fbm *folderBlockManager) doChunkedGetNonLiveBlocks(
  1354  	ctx context.Context, ptrs []data.BlockPointer) (
  1355  	nonLiveBlocks []kbfsblock.ID, err error) {
  1356  	fbm.log.CDebugf(ctx, "Get live count for %d pointers", len(ptrs))
  1357  	bops := fbm.config.BlockOps()
  1358  
  1359  	// Round up to find the number of chunks.
  1360  	numChunks := (len(ptrs) + numPointersToDowngradePerChunk - 1) /
  1361  		numPointersToDowngradePerChunk
  1362  	numWorkers := numChunks
  1363  	if numWorkers > maxParallelBlockPuts {
  1364  		numWorkers = maxParallelBlockPuts
  1365  	}
  1366  	chunks := make(chan []data.BlockPointer, numChunks)
  1367  
  1368  	eg, groupCtx := errgroup.WithContext(ctx)
  1369  	chunkResults := make(chan []kbfsblock.ID, numChunks)
  1370  	for i := 0; i < numWorkers; i++ {
  1371  		eg.Go(func() error {
  1372  			for chunk := range chunks {
  1373  				fbm.log.CDebugf(groupCtx,
  1374  					"Getting live count for chunk of %d pointers", len(chunk))
  1375  				liveCounts, err := bops.GetLiveCount(ctx, fbm.id, chunk)
  1376  				if err != nil {
  1377  					return err
  1378  				}
  1379  				ids := make([]kbfsblock.ID, 0, len(liveCounts))
  1380  				for id, count := range liveCounts {
  1381  					if count == 0 {
  1382  						ids = append(ids, id)
  1383  					} else {
  1384  						fbm.log.CDebugf(groupCtx,
  1385  							"Ignoring live block %s with %d refs", id, count)
  1386  					}
  1387  				}
  1388  				chunkResults <- ids
  1389  				select {
  1390  				// return early if the context has been canceled
  1391  				case <-groupCtx.Done():
  1392  					return groupCtx.Err()
  1393  				default:
  1394  				}
  1395  			}
  1396  			return nil
  1397  		})
  1398  	}
  1399  
  1400  	for start := 0; start < len(ptrs); start += numPointersToDowngradePerChunk {
  1401  		end := start + numPointersToDowngradePerChunk
  1402  		if end > len(ptrs) {
  1403  			end = len(ptrs)
  1404  		}
  1405  		chunks <- ptrs[start:end]
  1406  	}
  1407  	close(chunks)
  1408  
  1409  	err = eg.Wait()
  1410  	if err != nil {
  1411  		return nil, err
  1412  	}
  1413  	close(chunkResults)
  1414  
  1415  	for result := range chunkResults {
  1416  		nonLiveBlocks = append(nonLiveBlocks, result...)
  1417  	}
  1418  	return nonLiveBlocks, nil
  1419  }
  1420  
  1421  func (fbm *folderBlockManager) doCleanDiskCache(cacheType DiskBlockCacheType) (
  1422  	err error) {
  1423  	dbc := fbm.config.DiskBlockCache()
  1424  	if dbc == nil {
  1425  		return nil
  1426  	}
  1427  
  1428  	ctx, cancel := context.WithCancel(fbm.ctxWithFBMID(context.Background()))
  1429  	fbm.setCleanDiskCacheCancel(cancel)
  1430  	defer fbm.cancelCleanDiskCache()
  1431  
  1432  	lState := makeFBOLockState()
  1433  	recentRev := fbm.helper.getLatestMergedRevision(lState)
  1434  
  1435  	lastRev, err := dbc.GetLastUnrefRev(ctx, fbm.id, cacheType)
  1436  	if err != nil {
  1437  		return err
  1438  	}
  1439  
  1440  	if lastRev < kbfsmd.RevisionInitial {
  1441  		if recentRev > kbfsmd.RevisionInitial {
  1442  			// This can happen if the sync cache was created and
  1443  			// populated before we started keeping track of the last
  1444  			// unref'd revision. In that case, we just let the blocks
  1445  			// from the old revision stay in the cache until they are
  1446  			// manually cleaned up.
  1447  			//
  1448  			// It can also happen if the device just started
  1449  			// monitoring the TLF for syncing, in which case it
  1450  			// shouldn't have any cached blocks that were unref'd in
  1451  			// earlier revisions.
  1452  			fbm.log.CDebugf(ctx, "Starting to clean %s at revision %d",
  1453  				cacheType, recentRev)
  1454  			lastRev = recentRev - 1
  1455  		} else {
  1456  			// No revisions to clean yet.
  1457  			return dbc.PutLastUnrefRev(
  1458  				ctx, fbm.id, recentRev, cacheType)
  1459  		}
  1460  	}
  1461  
  1462  	if lastRev >= recentRev {
  1463  		// Nothing to do.
  1464  		return nil
  1465  	}
  1466  
  1467  	fbm.log.CDebugf(ctx, "Cleaning %s revisions after %d, "+
  1468  		"up to %d", cacheType, lastRev, recentRev)
  1469  	defer func() {
  1470  		fbm.log.CDebugf(ctx, "Done cleaning %s: %+v", cacheType, err)
  1471  	}()
  1472  	for nextRev := lastRev + 1; nextRev <= recentRev; nextRev++ {
  1473  		rmd, err := GetSingleMD(
  1474  			ctx, fbm.config, fbm.id, kbfsmd.NullBranchID, nextRev,
  1475  			kbfsmd.Merged, nil)
  1476  		if err != nil {
  1477  			return err
  1478  		}
  1479  
  1480  		iter := &unrefIterator{0}
  1481  		for iter != nil {
  1482  			// Include unrefs from `gcOp`s here, as a double-check
  1483  			// against archive races (see comment below).
  1484  			var ptrs []data.BlockPointer
  1485  			ptrs, iter = fbm.getUnrefPointersFromMD(
  1486  				rmd.ReadOnlyRootMetadata, true, iter)
  1487  
  1488  			// Cancel any prefetches for these blocks that might be in
  1489  			// flight, to make sure they don't get put into the cache
  1490  			// after we're done cleaning it.  Ideally we would cancel
  1491  			// them in a particular order (the lowest level ones
  1492  			// first, up to the root), but since we already do one
  1493  			// round of prefetch-canceling as part of applying the MD
  1494  			// and updating the pointers, doing a second round here
  1495  			// should be good enough to catch any weird relationships
  1496  			// between the pointers where one non-yet-canceled
  1497  			// prefetch can revive the prefetch of an already-canceled
  1498  			// child block.
  1499  			for _, ptr := range ptrs {
  1500  				fbm.config.BlockOps().Prefetcher().CancelPrefetch(ptr)
  1501  				c, err := fbm.config.BlockOps().Prefetcher().
  1502  					WaitChannelForBlockPrefetch(ctx, ptr)
  1503  				if err != nil {
  1504  					return err
  1505  				}
  1506  				select {
  1507  				case <-c:
  1508  				case <-ctx.Done():
  1509  					return ctx.Err()
  1510  				}
  1511  			}
  1512  
  1513  			var ids []kbfsblock.ID
  1514  			if cacheType == DiskBlockSyncCache {
  1515  				// Wait for our own archives to complete, to make sure
  1516  				// the bserver already knows this block isn't live yet
  1517  				// when we make the call below.  However, when dealing
  1518  				// with MDs written by other clients, there could be a
  1519  				// race here where we see the ID is live before the
  1520  				// other client gets to archive the block, leading to
  1521  				// a leak.  Once the revision is GC'd though, we
  1522  				// should run through this code again with the `gcOp`,
  1523  				// and we'll delete the block then.  (Note there's
  1524  				// always a chance for a race here, since the client
  1525  				// could crash before archiving the blocks.  But the
  1526  				// GC should always catch it eventually.)
  1527  				err := fbm.waitForArchives(ctx)
  1528  				if err != nil {
  1529  					return err
  1530  				}
  1531  
  1532  				ids, err = fbm.doChunkedGetNonLiveBlocks(ctx, ptrs)
  1533  				if err != nil {
  1534  					return err
  1535  				}
  1536  			} else {
  1537  				ids = make([]kbfsblock.ID, 0, len(ptrs))
  1538  				for _, ptr := range ptrs {
  1539  					ids = append(ids, ptr.ID)
  1540  				}
  1541  			}
  1542  			fbm.log.CDebugf(ctx, "Deleting %d blocks from cache", len(ids))
  1543  			_, _, err = dbc.Delete(ctx, ids, cacheType)
  1544  			if err != nil {
  1545  				return err
  1546  			}
  1547  
  1548  			if iter != nil {
  1549  				fbm.log.CDebugf(
  1550  					ctx, "Cleaned %d pointers for revision %d, "+
  1551  						"now looking for more", len(ptrs), rmd.Revision())
  1552  			}
  1553  		}
  1554  
  1555  		err = dbc.PutLastUnrefRev(ctx, fbm.id, nextRev, cacheType)
  1556  		if err != nil {
  1557  			return err
  1558  		}
  1559  
  1560  	}
  1561  	return nil
  1562  }
  1563  
  1564  func (fbm *folderBlockManager) doCleanDiskCaches() (err error) {
  1565  	defer fbm.cleanDiskCachesGroup.Done()
  1566  
  1567  	// Clean out sync cache only if it is enabled
  1568  	syncConfig := fbm.config.GetTlfSyncState(fbm.id)
  1569  	if syncConfig.Mode != keybase1.FolderSyncMode_DISABLED {
  1570  		err = fbm.doCleanDiskCache(DiskBlockSyncCache)
  1571  		if err != nil {
  1572  			return err
  1573  		}
  1574  	}
  1575  	return fbm.doCleanDiskCache(DiskBlockWorkingSetCache)
  1576  }
  1577  
  1578  func (fbm *folderBlockManager) cleanDiskCachesInBackground() {
  1579  	// While in the foreground, clean the disk caches every time we learn about
  1580  	// a newer latest merged revision for this TLF.
  1581  	for {
  1582  		state := keybase1.MobileAppState_FOREGROUND
  1583  		select {
  1584  		case <-fbm.latestMergedChan:
  1585  		case <-fbm.shutdownChan:
  1586  			return
  1587  		case state = <-fbm.appStateUpdater.NextAppStateUpdate(&state):
  1588  			for state != keybase1.MobileAppState_FOREGROUND {
  1589  				fbm.log.CDebugf(context.Background(),
  1590  					"Pausing sync-cache cleaning while not foregrounded: "+
  1591  						"state=%s", state)
  1592  				state = <-fbm.appStateUpdater.NextAppStateUpdate(&state)
  1593  			}
  1594  			fbm.log.CDebugf(context.Background(),
  1595  				"Resuming sync-cache cleaning while foregrounded")
  1596  			continue
  1597  		}
  1598  
  1599  		_ = fbm.doCleanDiskCaches()
  1600  	}
  1601  }
  1602  
  1603  func (fbm *folderBlockManager) signalLatestMergedRevision() {
  1604  	if fbm.latestMergedChan == nil {
  1605  		return
  1606  	}
  1607  
  1608  	fbm.cleanDiskCachesGroup.Add(1)
  1609  	select {
  1610  	case fbm.latestMergedChan <- struct{}{}:
  1611  	default:
  1612  		fbm.cleanDiskCachesGroup.Done()
  1613  	}
  1614  }