github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/libkbfs/folder_block_manager.go (about) 1 // Copyright 2016 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package libkbfs 6 7 import ( 8 "fmt" 9 "sync" 10 "time" 11 12 "github.com/keybase/backoff" 13 "github.com/keybase/client/go/kbfs/data" 14 "github.com/keybase/client/go/kbfs/env" 15 "github.com/keybase/client/go/kbfs/kbfsblock" 16 "github.com/keybase/client/go/kbfs/kbfsmd" 17 "github.com/keybase/client/go/kbfs/kbfssync" 18 "github.com/keybase/client/go/kbfs/tlf" 19 "github.com/keybase/client/go/kbfs/tlfhandle" 20 "github.com/keybase/client/go/logger" 21 "github.com/keybase/client/go/protocol/keybase1" 22 "github.com/pkg/errors" 23 "golang.org/x/net/context" 24 "golang.org/x/sync/errgroup" 25 ) 26 27 type fbmHelper interface { 28 getMostRecentFullyMergedMD(ctx context.Context) ( 29 ImmutableRootMetadata, error) 30 finalizeGCOp(ctx context.Context, gco *GCOp) error 31 getLatestMergedRevision(lState *kbfssync.LockState) kbfsmd.Revision 32 } 33 34 const ( 35 // How many pointers to downgrade in a single Archive/Delete call. 36 numPointersToDowngradePerChunk = 20 37 // Once the number of pointers being deleted in a single gc op 38 // passes this threshold, we'll stop garbage collection at the 39 // current revision. 40 numPointersPerGCThresholdDefault = 100 41 // The most revisions to consider for each QR run. 42 numMaxRevisionsPerQR = 100 43 ) 44 45 type blockDeleteType int 46 47 const ( 48 // Delete the blocks only if the given MD failed to make it to the 49 // servers. 50 blockDeleteOnMDFail blockDeleteType = iota 51 52 // Always delete the blocks, without first checking if the given 53 // revision was successful. This is just an optimization to avoid 54 // fetching the MD from the server when we know for sure it had 55 // failed. 56 blockDeleteAlways 57 ) 58 59 type blocksToDelete struct { 60 md ReadOnlyRootMetadata 61 blocks []data.BlockPointer 62 bdType blockDeleteType 63 backoff backoff.BackOff 64 } 65 66 // folderBlockManager is a helper class for managing the blocks in a 67 // particular TLF. It archives historical blocks and reclaims quota 68 // usage, all in the background. 69 type folderBlockManager struct { 70 appStateUpdater env.AppStateUpdater 71 config Config 72 log logger.Logger 73 shutdownChan chan struct{} 74 id tlf.ID 75 76 numPointersPerGCThreshold int 77 78 // A queue of MD updates for this folder that need to have their 79 // unref's blocks archived 80 archiveChan chan ReadOnlyRootMetadata 81 82 archivePauseChan chan (<-chan struct{}) 83 84 // archiveGroup tracks the outstanding archives. 85 archiveGroup kbfssync.RepeatedWaitGroup 86 87 archiveCancelLock sync.Mutex 88 archiveCancel context.CancelFunc 89 90 // blocksToDeleteChan is a list of blocks, for a given 91 // metadata revision, that may have been Put as part of a failed 92 // MD write. These blocks should be deleted as soon as we know 93 // for sure that the MD write isn't visible to others. 94 // TODO: Persist these to disk? 95 blocksToDeleteChan chan blocksToDelete 96 blocksToDeletePauseChan chan (<-chan struct{}) 97 blocksToDeleteWaitGroup kbfssync.RepeatedWaitGroup 98 99 blocksToDeleteCancelLock sync.Mutex 100 blocksToDeleteCancel context.CancelFunc 101 102 // forceReclamation forces the manager to start a reclamation 103 // process. 104 forceReclamationChan chan struct{} 105 106 // reclamationGroup tracks the outstanding quota reclamations. 107 reclamationGroup kbfssync.RepeatedWaitGroup 108 109 reclamationCancelLock sync.Mutex 110 reclamationCancel context.CancelFunc 111 112 // latestMergedChan signals when we learn about a newer latest 113 // merged revision for this TLF. 114 latestMergedChan chan struct{} 115 116 // cleanDiskCachesGroup tracks the outstanding disk-cache cleanings. 117 cleanDiskCachesGroup kbfssync.RepeatedWaitGroup 118 119 cleanDiskCacheCancelLock sync.Mutex 120 cleanDiskCacheCancel context.CancelFunc 121 122 helper fbmHelper 123 124 // Remembers what happened last time during quota reclamation. 125 lastQRLock sync.Mutex 126 lastQRHeadRev kbfsmd.Revision 127 wasLastQRComplete bool 128 lastReclamationTime time.Time 129 lastReclaimedRev kbfsmd.Revision 130 } 131 132 func newFolderBlockManager( 133 appStateUpdater env.AppStateUpdater, config Config, fb data.FolderBranch, 134 bType branchType, helper fbmHelper) *folderBlockManager { 135 tlfStringFull := fb.Tlf.String() 136 log := config.MakeLogger(fmt.Sprintf("FBM %s", tlfStringFull[:8])) 137 138 var latestMergedChan chan struct{} 139 qrEnabled := 140 fb.Branch == data.MasterBranch && config.Mode().QuotaReclamationEnabled() 141 if qrEnabled { 142 latestMergedChan = make(chan struct{}, 1) 143 } 144 145 fbm := &folderBlockManager{ 146 appStateUpdater: appStateUpdater, 147 config: config, 148 log: log, 149 shutdownChan: make(chan struct{}), 150 id: fb.Tlf, 151 numPointersPerGCThreshold: numPointersPerGCThresholdDefault, 152 archiveChan: make(chan ReadOnlyRootMetadata, 500), 153 archivePauseChan: make(chan (<-chan struct{})), 154 blocksToDeleteChan: make(chan blocksToDelete, 25), 155 blocksToDeletePauseChan: make(chan (<-chan struct{})), 156 forceReclamationChan: make(chan struct{}, 1), 157 latestMergedChan: latestMergedChan, 158 helper: helper, 159 } 160 161 if bType != standard || !config.Mode().BlockManagementEnabled() { 162 return fbm 163 } 164 165 go fbm.archiveBlocksInBackground() 166 go fbm.deleteBlocksInBackground() 167 if qrEnabled { 168 go fbm.reclaimQuotaInBackground() 169 go fbm.cleanDiskCachesInBackground() 170 } 171 return fbm 172 } 173 174 func (fbm *folderBlockManager) setBlocksToDeleteCancel(cancel context.CancelFunc) { 175 fbm.blocksToDeleteCancelLock.Lock() 176 defer fbm.blocksToDeleteCancelLock.Unlock() 177 fbm.blocksToDeleteCancel = cancel 178 } 179 180 func (fbm *folderBlockManager) cancelBlocksToDelete() { 181 blocksToDeleteCancel := func() context.CancelFunc { 182 fbm.blocksToDeleteCancelLock.Lock() 183 defer fbm.blocksToDeleteCancelLock.Unlock() 184 blocksToDeleteCancel := fbm.blocksToDeleteCancel 185 fbm.blocksToDeleteCancel = nil 186 return blocksToDeleteCancel 187 }() 188 if blocksToDeleteCancel != nil { 189 blocksToDeleteCancel() 190 } 191 } 192 193 func (fbm *folderBlockManager) setArchiveCancel(cancel context.CancelFunc) { 194 fbm.archiveCancelLock.Lock() 195 defer fbm.archiveCancelLock.Unlock() 196 fbm.archiveCancel = cancel 197 } 198 199 func (fbm *folderBlockManager) cancelArchive() { 200 archiveCancel := func() context.CancelFunc { 201 fbm.archiveCancelLock.Lock() 202 defer fbm.archiveCancelLock.Unlock() 203 archiveCancel := fbm.archiveCancel 204 fbm.archiveCancel = nil 205 return archiveCancel 206 }() 207 if archiveCancel != nil { 208 archiveCancel() 209 } 210 } 211 212 func (fbm *folderBlockManager) setReclamationCancel(cancel context.CancelFunc) { 213 fbm.reclamationCancelLock.Lock() 214 defer fbm.reclamationCancelLock.Unlock() 215 fbm.reclamationCancel = cancel 216 } 217 218 func (fbm *folderBlockManager) cancelReclamation() { 219 reclamationCancel := func() context.CancelFunc { 220 fbm.reclamationCancelLock.Lock() 221 defer fbm.reclamationCancelLock.Unlock() 222 reclamationCancel := fbm.reclamationCancel 223 fbm.reclamationCancel = nil 224 return reclamationCancel 225 }() 226 if reclamationCancel != nil { 227 reclamationCancel() 228 } 229 } 230 231 func (fbm *folderBlockManager) setCleanDiskCacheCancel( 232 cancel context.CancelFunc) { 233 fbm.cleanDiskCacheCancelLock.Lock() 234 defer fbm.cleanDiskCacheCancelLock.Unlock() 235 fbm.cleanDiskCacheCancel = cancel 236 } 237 238 func (fbm *folderBlockManager) cancelCleanDiskCache() { 239 cleanDiskCacheCancel := func() context.CancelFunc { 240 fbm.cleanDiskCacheCancelLock.Lock() 241 defer fbm.cleanDiskCacheCancelLock.Unlock() 242 cleanDiskCacheCancel := fbm.cleanDiskCacheCancel 243 fbm.cleanDiskCacheCancel = nil 244 return cleanDiskCacheCancel 245 }() 246 if cleanDiskCacheCancel != nil { 247 cleanDiskCacheCancel() 248 } 249 } 250 251 func (fbm *folderBlockManager) shutdown() { 252 close(fbm.shutdownChan) 253 fbm.cancelArchive() 254 fbm.cancelBlocksToDelete() 255 fbm.cancelReclamation() 256 fbm.cancelCleanDiskCache() 257 } 258 259 // cleanUpBlockState cleans up any blocks that may have been orphaned 260 // by a failure during or after blocks have been sent to the 261 // server. This is usually used in a defer right before a call to 262 // fbo.doBlockPuts like so: 263 // 264 // defer func() { 265 // if err != nil { 266 // ...cleanUpBlockState(md.ReadOnly(), bps) 267 // } 268 // }() 269 // 270 // ... = ...doBlockPuts(ctx, md.ReadOnly(), *bps) 271 // 272 // The exception is for when blocks might get reused across multiple 273 // attempts at the same operation (like for a Sync). In that case, 274 // failed blocks should be built up in a separate data structure, and 275 // this should be called when the operation finally succeeds. 276 func (fbm *folderBlockManager) cleanUpBlockState( 277 md ReadOnlyRootMetadata, bps blockPutState, bdType blockDeleteType) { 278 fbm.log.CDebugf( 279 context.TODO(), "Clean up md %d %s, bdType=%d", md.Revision(), 280 md.MergedStatus(), bdType) 281 expBackoff := backoff.NewExponentialBackOff() 282 // Never give up when trying to delete blocks; it might just take 283 // a long time to confirm with the server whether a revision 284 // succeeded or not. 285 expBackoff.MaxElapsedTime = 0 286 toDelete := blocksToDelete{ 287 md: md, 288 bdType: bdType, 289 backoff: expBackoff, 290 } 291 toDelete.blocks = append(toDelete.blocks, bps.Ptrs()...) 292 fbm.enqueueBlocksToDelete(toDelete) 293 } 294 295 func (fbm *folderBlockManager) enqueueBlocksToDelete(toDelete blocksToDelete) { 296 fbm.blocksToDeleteWaitGroup.Add(1) 297 fbm.blocksToDeleteChan <- toDelete 298 } 299 300 func (fbm *folderBlockManager) enqueueBlocksToDeleteAfterShortDelay( 301 ctx context.Context, toDelete blocksToDelete) { 302 fbm.blocksToDeleteWaitGroup.Add(1) 303 duration := toDelete.backoff.NextBackOff() 304 if duration == backoff.Stop { 305 panic(fmt.Sprintf("Backoff stopped while checking whether we "+ 306 "should delete revision %d", toDelete.md.Revision())) 307 } 308 time.AfterFunc(duration, 309 func() { 310 select { 311 case fbm.blocksToDeleteChan <- toDelete: 312 case <-fbm.shutdownChan: 313 fbm.blocksToDeleteWaitGroup.Done() 314 } 315 }) 316 } 317 318 // enqueueBlocksToDeleteNoWait enqueues blocks to be deleted just like 319 // enqueueBlocksToDelete, except that when fbm.blocksToDeleteChan is full, it 320 // doesn't block, but instead spawns a goroutine to handle the sending. 321 // 322 // This is necessary to prevent a situation like following: 323 // 1. A delete fails when fbm.blocksToDeleteChan is full 324 // 2. The goroutine tries to put the failed toDelete back to 325 // fbm.blocksToDeleteChan 326 // 3. Step 2 becomes synchronous and is blocked because 327 // fbm.blocksToDeleteChan is already full 328 // 4. fbm.blocksToDeleteChan never gets drained because the goroutine that 329 // drains it is waiting for sending on the same channel. 330 // 5. Deadlock! 331 func (fbm *folderBlockManager) enqueueBlocksToDeleteNoWait(toDelete blocksToDelete) { 332 fbm.blocksToDeleteWaitGroup.Add(1) 333 334 select { 335 case fbm.blocksToDeleteChan <- toDelete: 336 return 337 default: 338 go func() { fbm.blocksToDeleteChan <- toDelete }() 339 } 340 } 341 342 func isArchivableOp(op op) bool { 343 switch op.(type) { 344 case *createOp: 345 return true 346 case *rmOp: 347 return true 348 case *renameOp: 349 return true 350 case *syncOp: 351 return true 352 case *setAttrOp: 353 return true 354 case *resolutionOp: 355 return true 356 default: 357 // rekey ops don't have anything to archive, and gc 358 // ops only have deleted blocks. 359 return false 360 } 361 } 362 363 func isArchivableMDOrError(md ReadOnlyRootMetadata) error { 364 if md.MergedStatus() != kbfsmd.Merged { 365 return fmt.Errorf("md rev=%d is not merged", md.Revision()) 366 } 367 368 for _, op := range md.data.Changes.Ops { 369 if !isArchivableOp(op) { 370 return fmt.Errorf( 371 "md rev=%d has unarchivable op %s", 372 md.Revision(), op) 373 } 374 } 375 return nil 376 } 377 378 func (fbm *folderBlockManager) archiveUnrefBlocks(md ReadOnlyRootMetadata) { 379 // Don't archive for unmerged revisions, because conflict 380 // resolution might undo some of the unreferences. 381 if md.MergedStatus() != kbfsmd.Merged { 382 return 383 } 384 385 if err := isArchivableMDOrError(md); err != nil { 386 panic(err) 387 } 388 389 fbm.archiveGroup.Add(1) 390 fbm.archiveChan <- md 391 } 392 393 // archiveUnrefBlocksNoWait enqueues the MD for archiving without 394 // blocking. By the time it returns, the archive group has been 395 // incremented so future waits will block on this archive. This 396 // method is for internal use within folderBlockManager only. 397 func (fbm *folderBlockManager) archiveUnrefBlocksNoWait(md ReadOnlyRootMetadata) { 398 // Don't archive for unmerged revisions, because conflict 399 // resolution might undo some of the unreferences. 400 if md.MergedStatus() != kbfsmd.Merged { 401 return 402 } 403 404 if err := isArchivableMDOrError(md); err != nil { 405 panic(err) 406 } 407 408 fbm.archiveGroup.Add(1) 409 410 // Don't block if the channel is full; instead do the send in a 411 // background goroutine. We've already done the Add above, so the 412 // wait calls should all work just fine. 413 select { 414 case fbm.archiveChan <- md: 415 return 416 default: 417 go func() { fbm.archiveChan <- md }() 418 } 419 } 420 421 func (fbm *folderBlockManager) waitForArchives(ctx context.Context) error { 422 return fbm.archiveGroup.Wait(ctx) 423 } 424 425 func (fbm *folderBlockManager) waitForDeletingBlocks(ctx context.Context) error { 426 return fbm.blocksToDeleteWaitGroup.Wait(ctx) 427 } 428 429 func (fbm *folderBlockManager) waitForQuotaReclamations( 430 ctx context.Context) error { 431 return fbm.reclamationGroup.Wait(ctx) 432 } 433 434 func (fbm *folderBlockManager) waitForDiskCacheCleans( 435 ctx context.Context) error { 436 return fbm.cleanDiskCachesGroup.Wait(ctx) 437 } 438 439 func (fbm *folderBlockManager) forceQuotaReclamation() { 440 fbm.reclamationGroup.Add(1) 441 select { 442 case fbm.forceReclamationChan <- struct{}{}: 443 default: 444 fbm.reclamationGroup.Done() 445 } 446 } 447 448 // doChunkedDowngrades sends batched archive or delete messages to the 449 // block server for the given block pointers. For deletes, it returns 450 // a list of block IDs that no longer have any references. 451 func (fbm *folderBlockManager) doChunkedDowngrades(ctx context.Context, 452 tlfID tlf.ID, ptrs []data.BlockPointer, archive bool) ( 453 []kbfsblock.ID, error) { 454 fbm.log.CDebugf(ctx, "Downgrading %d pointers (archive=%t)", 455 len(ptrs), archive) 456 bops := fbm.config.BlockOps() 457 458 // Round up to find the number of chunks. 459 numChunks := (len(ptrs) + numPointersToDowngradePerChunk - 1) / 460 numPointersToDowngradePerChunk 461 numWorkers := numChunks 462 if numWorkers > maxParallelBlockPuts { 463 numWorkers = maxParallelBlockPuts 464 } 465 chunks := make(chan []data.BlockPointer, numChunks) 466 467 var wg sync.WaitGroup 468 defer wg.Wait() 469 470 ctx, cancel := context.WithCancel(ctx) 471 defer cancel() 472 473 type workerResult struct { 474 zeroRefCounts []kbfsblock.ID 475 err error 476 } 477 478 chunkResults := make(chan workerResult, numChunks) 479 worker := func() { 480 defer wg.Done() 481 for chunk := range chunks { 482 var res workerResult 483 fbm.log.CDebugf(ctx, "Downgrading chunk of %d pointers", len(chunk)) 484 if archive { 485 res.err = bops.Archive(ctx, tlfID, chunk) 486 } else { 487 var liveCounts map[kbfsblock.ID]int 488 liveCounts, res.err = bops.Delete(ctx, tlfID, chunk) 489 if res.err == nil { 490 for id, count := range liveCounts { 491 if count == 0 { 492 res.zeroRefCounts = append(res.zeroRefCounts, id) 493 } 494 } 495 } 496 } 497 chunkResults <- res 498 select { 499 // return early if the context has been canceled 500 case <-ctx.Done(): 501 return 502 default: 503 } 504 } 505 } 506 for i := 0; i < numWorkers; i++ { 507 wg.Add(1) 508 go worker() 509 } 510 511 for start := 0; start < len(ptrs); start += numPointersToDowngradePerChunk { 512 end := start + numPointersToDowngradePerChunk 513 if end > len(ptrs) { 514 end = len(ptrs) 515 } 516 chunks <- ptrs[start:end] 517 } 518 close(chunks) 519 520 var zeroRefCounts []kbfsblock.ID 521 for i := 0; i < numChunks; i++ { 522 result := <-chunkResults 523 if result.err != nil { 524 // deferred cancel will stop the other workers. 525 return nil, result.err 526 } 527 zeroRefCounts = append(zeroRefCounts, result.zeroRefCounts...) 528 } 529 return zeroRefCounts, nil 530 } 531 532 // deleteBlockRefs sends batched delete messages to the block server 533 // for the given block pointers. It returns a list of block IDs that 534 // no longer have any references. 535 func (fbm *folderBlockManager) deleteBlockRefs(ctx context.Context, 536 tlfID tlf.ID, ptrs []data.BlockPointer) ([]kbfsblock.ID, error) { 537 return fbm.doChunkedDowngrades(ctx, tlfID, ptrs, false) 538 } 539 540 func (fbm *folderBlockManager) processBlocksToDelete(ctx context.Context, toDelete blocksToDelete) error { 541 // also attempt to delete any error references 542 543 defer fbm.blocksToDeleteWaitGroup.Done() 544 545 // Make sure all blocks in the journal (if journaling is enabled) 546 // are flushed before attempting to delete any of them. 547 if jManager, err := GetJournalManager(fbm.config); err == nil { 548 fbm.log.CDebugf(ctx, "Waiting for journal to flush") 549 if err := jManager.WaitForCompleteFlush(ctx, fbm.id); err != nil { 550 return err 551 } 552 } 553 554 fbm.log.CDebugf(ctx, "Checking deleted blocks for revision %d", 555 toDelete.md.Revision()) 556 // Make sure that the MD didn't actually become part of the folder 557 // history. (This could happen if the Sync was canceled while the 558 // MD put was outstanding.) If the private MD is not set, there's 559 // no way the revision made it to the server, so we are free to 560 // clean it up without checking with the server. 561 if toDelete.bdType == blockDeleteOnMDFail && 562 toDelete.md.bareMd.GetSerializedPrivateMetadata() != nil { 563 // Don't use `getSingleMD` here, since it returns an error if 564 // the revision isn't found, and that's useful information for 565 // us here. 566 rmds, err := getMDRange( 567 ctx, fbm.config, fbm.id, toDelete.md.BID(), toDelete.md.Revision(), 568 toDelete.md.Revision(), toDelete.md.MergedStatus(), nil) 569 if err != nil { 570 fbm.log.CDebugf(ctx, 571 "Error trying to get MD %d; retrying after a delay", 572 toDelete.md.Revision()) 573 // We don't know whether or not the revision made it to 574 // the server, so try again. But don't re-enqueue 575 // immediately to avoid fast infinite loops. 576 fbm.enqueueBlocksToDeleteAfterShortDelay(ctx, toDelete) 577 return nil 578 } 579 580 var rmd ImmutableRootMetadata 581 if len(rmds) == 0 { 582 // The rmd.mdID check below will fail intentionally since 583 // rmd is empty. Note that this assumes that the MD 584 // servers don't cache negative lookups, or if they do, 585 // they use synchronous cache invalidations for that case. 586 // If we ever allow MD servers to cache negative lookups, 587 // we'll have to retry here for at least the amount of the 588 // maximum allowable cache timeout. 589 fbm.log.CDebugf(ctx, "No revision %d found on MD server, so we "+ 590 "can safely archive", toDelete.md.Revision()) 591 } else { 592 rmd = rmds[0] 593 } 594 595 mdID, err := kbfsmd.MakeID(fbm.config.Codec(), toDelete.md.bareMd) 596 if err != nil { 597 fbm.log.CErrorf(ctx, "Error when comparing dirs: %v", err) 598 } else if mdID == rmd.mdID { 599 if err := isArchivableMDOrError(rmd.ReadOnly()); err != nil { 600 fbm.log.CDebugf(ctx, "Skipping archiving for non-deleted, "+ 601 "unarchivable revision %d: %v", rmd.Revision(), err) 602 return nil 603 } 604 605 // This md is part of the history of the folder, so we 606 // shouldn't delete the blocks. But, since this MD put 607 // seems to have succeeded, we should archive it. 608 fbm.log.CDebugf(ctx, "Not deleting blocks from revision %d; "+ 609 "archiving it", rmd.Revision()) 610 // Don't block on archiving the MD, because that could 611 // lead to deadlock. 612 fbm.archiveUnrefBlocksNoWait(rmd.ReadOnly()) 613 return nil 614 } 615 616 // Otherwise something else has been written over 617 // this MD, so get rid of the blocks. 618 fbm.log.CDebugf(ctx, "Cleaning up blocks for failed revision %d", 619 toDelete.md.Revision()) 620 } else { 621 fbm.log.CDebugf(ctx, "Cleaning up blocks for revision %d", 622 toDelete.md.Revision()) 623 } 624 625 _, err := fbm.deleteBlockRefs(ctx, toDelete.md.TlfID(), toDelete.blocks) 626 // Ignore permanent errors 627 _, isPermErr := err.(kbfsblock.ServerError) 628 _, isNonceNonExistentErr := err.(kbfsblock.ServerErrorNonceNonExistent) 629 _, isBadRequestErr := err.(kbfsblock.ServerErrorBadRequest) 630 if err != nil { 631 fbm.log.CWarningf(ctx, "Couldn't delete some ref in batch %v: %v", 632 toDelete.blocks, err) 633 if !isPermErr && !isNonceNonExistentErr && !isBadRequestErr { 634 fbm.enqueueBlocksToDeleteNoWait(toDelete) 635 return nil 636 } 637 } 638 639 return nil 640 } 641 642 // CtxFBMTagKey is the type used for unique context tags within 643 // folderBlockManager 644 type CtxFBMTagKey int 645 646 const ( 647 // CtxFBMIDKey is the type of the tag for unique operation IDs 648 // within folderBlockManager. 649 CtxFBMIDKey CtxFBMTagKey = iota 650 ) 651 652 // CtxFBMOpID is the display name for the unique operation 653 // folderBlockManager ID tag. 654 const CtxFBMOpID = "FBMID" 655 656 func (fbm *folderBlockManager) ctxWithFBMID( 657 ctx context.Context) context.Context { 658 return CtxWithRandomIDReplayable(ctx, CtxFBMIDKey, CtxFBMOpID, fbm.log) 659 } 660 661 // Run the passed function with a context that's canceled on shutdown. 662 func (fbm *folderBlockManager) runUnlessShutdownWithCtx( 663 ctx context.Context, fn func(ctx context.Context) error) error { 664 ctx, cancelFunc := context.WithCancel(ctx) 665 defer cancelFunc() 666 errChan := make(chan error, 1) 667 go func() { 668 errChan <- fn(ctx) 669 }() 670 671 select { 672 case err := <-errChan: 673 return err 674 case <-fbm.shutdownChan: 675 return errors.New("shutdown received") 676 } 677 } 678 679 // Run the passed function with a context that's canceled on shutdown. 680 func (fbm *folderBlockManager) runUnlessShutdown( 681 fn func(ctx context.Context) error) error { 682 ctx := fbm.ctxWithFBMID(context.Background()) 683 return fbm.runUnlessShutdownWithCtx(ctx, fn) 684 } 685 686 func (fbm *folderBlockManager) archiveBlockRefs(ctx context.Context, 687 tlfID tlf.ID, ptrs []data.BlockPointer) error { 688 _, err := fbm.doChunkedDowngrades(ctx, tlfID, ptrs, true) 689 return err 690 } 691 692 type unrefIterator struct { 693 nextPtr int 694 } 695 696 // getUnrefPointersFromMD returns a slice of BlockPointers that were 697 // unreferenced by the given `rmd`. If there are too many pointers to 698 // process, given the current mode, then it will return a partial 699 // list, plus a non-nil `iter` parameter that can be passed into a 700 // subsequent call to get the next set of unreferenced BlockPointers 701 // from the same MD. If a nil `iter` is given, pointers are returned 702 // from the beginning of the list. 703 func (fbm *folderBlockManager) getUnrefPointersFromMD( 704 rmd ReadOnlyRootMetadata, includeGC bool, iter *unrefIterator) ( 705 ptrs []data.BlockPointer, nextIter *unrefIterator) { 706 currPtr := 0 707 complete := true 708 nextPtr := 0 709 if iter != nil { 710 nextPtr = iter.nextPtr 711 } 712 ptrMap := make(map[data.BlockPointer]bool) 713 max := fbm.config.Mode().MaxBlockPtrsToManageAtOnce() 714 opLoop: 715 for _, op := range rmd.data.Changes.Ops { 716 if _, ok := op.(*GCOp); !includeGC && ok { 717 continue 718 } 719 for _, ptr := range op.Unrefs() { 720 currPtr++ 721 // Skip past any ptrs we've already processed. 722 if currPtr <= nextPtr { 723 continue 724 } 725 726 // Can be zeroPtr in weird failed sync scenarios. 727 // See syncInfo.replaceRemovedBlock for an example 728 // of how this can happen. 729 if ptr != data.ZeroPtr && !ptrMap[ptr] { 730 ptrMap[ptr] = true 731 } 732 nextPtr++ 733 if max >= 0 && len(ptrMap) >= max { 734 complete = false 735 break opLoop 736 } 737 } 738 for _, update := range op.allUpdates() { 739 currPtr++ 740 // Skip past any ptrs we've already processed. 741 if currPtr <= nextPtr { 742 continue 743 } 744 745 // It's legal for there to be an "update" between 746 // two identical pointers (usually because of 747 // conflict resolution), so ignore that for quota 748 // reclamation purposes. 749 if update.Ref != update.Unref && !ptrMap[update.Unref] { 750 ptrMap[update.Unref] = true 751 } 752 nextPtr++ 753 if max >= 0 && len(ptrMap) >= max { 754 complete = false 755 break opLoop 756 } 757 } 758 } 759 ptrs = make([]data.BlockPointer, 0, len(ptrMap)) 760 for ptr := range ptrMap { 761 ptrs = append(ptrs, ptr) 762 } 763 if !complete { 764 nextIter = &unrefIterator{nextPtr} 765 } 766 return ptrs, nextIter 767 } 768 769 func (fbm *folderBlockManager) archiveAllBlocksInMD(md ReadOnlyRootMetadata) { 770 // This func doesn't take any locks, though it can 771 // block md writes due to the buffered channel. 772 // So use the long timeout to make sure things get 773 // unblocked eventually, but no need for a short 774 // timeout. 775 ctx := fbm.ctxWithFBMID(context.Background()) 776 ctx, cancel := context.WithTimeout(ctx, data.BackgroundTaskTimeout) 777 fbm.setArchiveCancel(cancel) 778 defer fbm.cancelArchive() 779 780 iter := &unrefIterator{0} 781 defer fbm.archiveGroup.Done() 782 for iter != nil { 783 var ptrs []data.BlockPointer 784 ptrs, iter = fbm.getUnrefPointersFromMD(md, true, iter) 785 _ = fbm.runUnlessShutdownWithCtx( 786 ctx, func(ctx context.Context) (err error) { 787 fbm.log.CDebugf( 788 ctx, "Archiving %d block pointers as a result "+ 789 "of revision %d", len(ptrs), md.Revision()) 790 err = fbm.archiveBlockRefs(ctx, md.TlfID(), ptrs) 791 if err != nil { 792 fbm.log.CWarningf( 793 ctx, "Couldn't archive blocks: %v", err) 794 return err 795 } 796 797 return nil 798 }) 799 if iter != nil { 800 fbm.log.CDebugf( 801 ctx, "Archived %d pointers for revision %d, "+ 802 "now looking for more", len(ptrs), md.Revision()) 803 } 804 } 805 } 806 807 func (fbm *folderBlockManager) archiveBlocksInBackground() { 808 for { 809 select { 810 case md := <-fbm.archiveChan: 811 fbm.archiveAllBlocksInMD(md) 812 case unpause := <-fbm.archivePauseChan: 813 _ = fbm.runUnlessShutdown(func(ctx context.Context) (err error) { 814 fbm.log.CInfof(ctx, "Archives paused") 815 // wait to be unpaused 816 select { 817 case <-unpause: 818 fbm.log.CInfof(ctx, "Archives unpaused") 819 case <-ctx.Done(): 820 return ctx.Err() 821 } 822 return nil 823 }) 824 case <-fbm.shutdownChan: 825 return 826 } 827 } 828 } 829 830 func (fbm *folderBlockManager) deleteBlocksInBackground() { 831 for { 832 select { 833 case toDelete := <-fbm.blocksToDeleteChan: 834 _ = fbm.runUnlessShutdown(func(ctx context.Context) (err error) { 835 ctx, cancel := context.WithTimeout( 836 ctx, data.BackgroundTaskTimeout) 837 fbm.setBlocksToDeleteCancel(cancel) 838 defer fbm.cancelBlocksToDelete() 839 840 if err := fbm.processBlocksToDelete(ctx, toDelete); err != nil { 841 fbm.log.CDebugf(ctx, "Error deleting blocks: %v", err) 842 return err 843 } 844 845 return nil 846 }) 847 case unpause := <-fbm.blocksToDeletePauseChan: 848 _ = fbm.runUnlessShutdown(func(ctx context.Context) (err error) { 849 fbm.log.CInfof(ctx, "deleteBlocks paused") 850 select { 851 case <-unpause: 852 fbm.log.CInfof(ctx, "deleteBlocks unpaused") 853 case <-ctx.Done(): 854 return ctx.Err() 855 } 856 return nil 857 }) 858 case <-fbm.shutdownChan: 859 return 860 } 861 } 862 } 863 864 func (fbm *folderBlockManager) isOldEnough(rmd ImmutableRootMetadata) bool { 865 // Trust the server's timestamp on this MD. 866 mtime := rmd.localTimestamp 867 unrefAge := fbm.config.Mode().QuotaReclamationMinUnrefAge() 868 return mtime.Add(unrefAge).Before(fbm.config.Clock().Now()) 869 } 870 871 // getMostRecentGCRevision returns the latest revision that was 872 // scrubbed by the previous gc op. 873 func (fbm *folderBlockManager) getMostRecentGCRevision( 874 ctx context.Context, head ReadOnlyRootMetadata) ( 875 lastGCRev kbfsmd.Revision, err error) { 876 if head.data.LastGCRevision >= kbfsmd.RevisionInitial { 877 fbm.log.CDebugf(ctx, "Found last gc revision %d in "+ 878 "head MD revision %d", head.data.LastGCRevision, 879 head.Revision()) 880 return head.data.LastGCRevision, nil 881 } 882 883 // Very old TLFs might not have a filled-in `LastGCRevision`, so 884 // we need to walk backwards to find the latest gcOp. 885 endRev := head.Revision() 886 for { 887 startRev := endRev - maxMDsAtATime + 1 // (kbfsmd.Revision is signed) 888 if startRev < kbfsmd.RevisionInitial { 889 startRev = kbfsmd.RevisionInitial 890 } 891 892 rmds, err := getMDRange( 893 ctx, fbm.config, fbm.id, kbfsmd.NullBranchID, startRev, 894 endRev, kbfsmd.Merged, nil) 895 if err != nil { 896 return kbfsmd.RevisionUninitialized, err 897 } 898 899 numNew := len(rmds) 900 for i := len(rmds) - 1; i >= 0; i-- { 901 rmd := rmds[i] 902 if rmd.data.LastGCRevision >= kbfsmd.RevisionInitial { 903 fbm.log.CDebugf(ctx, "Found last gc revision %d in "+ 904 "MD revision %d", rmd.data.LastGCRevision, 905 rmd.Revision()) 906 return rmd.data.LastGCRevision, nil 907 } 908 for j := len(rmd.data.Changes.Ops) - 1; j >= 0; j-- { 909 GCOp, ok := rmd.data.Changes.Ops[j].(*GCOp) 910 if !ok || GCOp.LatestRev == kbfsmd.RevisionUninitialized { 911 continue 912 } 913 fbm.log.CDebugf(ctx, "Found last gc op: %s", GCOp) 914 return GCOp.LatestRev, nil 915 } 916 } 917 918 if numNew > 0 { 919 endRev = rmds[0].Revision() - 1 920 } 921 922 if numNew < maxMDsAtATime || endRev < kbfsmd.RevisionInitial { 923 // Never been GC'd. 924 return kbfsmd.RevisionUninitialized, nil 925 } 926 } 927 } 928 929 // getUnrefBlocks returns a slice containing all the block pointers 930 // that were unreferenced after the earliestRev, up to and including 931 // those in latestRev. If the number of pointers is too large, it 932 // will shorten the range of the revisions being reclaimed, and return 933 // the latest revision represented in the returned slice of pointers. 934 func (fbm *folderBlockManager) getUnreferencedBlocks( 935 ctx context.Context, earliestRev, mostRecentRev kbfsmd.Revision) ( 936 ptrs []data.BlockPointer, lastRev kbfsmd.Revision, 937 complete bool, err error) { 938 fbm.log.CDebugf(ctx, "Getting unreferenced blocks between revisions "+ 939 "%d and %d", earliestRev, mostRecentRev) 940 defer func() { 941 if err == nil { 942 fbm.log.CDebugf(ctx, "Found %d pointers to clean between "+ 943 "revisions %d and %d", len(ptrs), earliestRev, lastRev) 944 } 945 }() 946 947 // Walk forward, starting from just after earliestRev, until we 948 // get enough pointers or until we reach the head or a revision 949 // that's not old enough, gathering pointers to GC. 950 startRev := earliestRev + 1 951 outer: 952 for { 953 endRev := startRev + maxMDsAtATime 954 if endRev > mostRecentRev { 955 endRev = mostRecentRev 956 } 957 958 rmds, err := getMDRange( 959 ctx, fbm.config, fbm.id, kbfsmd.NullBranchID, startRev, 960 endRev, kbfsmd.Merged, nil) 961 if err != nil { 962 return nil, kbfsmd.RevisionUninitialized, false, err 963 } 964 965 numNew := len(rmds) 966 for _, rmd := range rmds { 967 if !fbm.isOldEnough(rmd) { 968 fbm.log.CDebugf(ctx, "Revision %d is too recent; stopping QR", 969 rmd.Revision()) 970 complete = true 971 break outer 972 } 973 lastRev = rmd.Revision() 974 // A garbage-collection op *must* contain all pointers in 975 // its respective op. If this device can't handle it, 976 // error the process and let another device take care of 977 // it. 978 newPtrs, iter := fbm.getUnrefPointersFromMD( 979 rmd.ReadOnlyRootMetadata, false, &unrefIterator{0}) 980 if iter != nil { 981 return nil, kbfsmd.RevisionUninitialized, false, errors.New( 982 fmt.Sprintf( 983 "Can't handle the unref'd pointers of revision %d", 984 lastRev)) 985 } 986 ptrs = append(ptrs, newPtrs...) 987 // TODO: when can we clean up the MD's unembedded block 988 // changes pointer? It's not safe until we know for sure 989 // that all existing clients have received the latest 990 // update (and also that there are no outstanding staged 991 // branches). Let's do that as part of the bigger issue 992 // KBFS-793 -- for now we have to leak those blocks. 993 if len(ptrs) > fbm.numPointersPerGCThreshold { 994 fbm.log.CDebugf(ctx, "Shortening GC range to [%d:%d]", 995 earliestRev, rmd.Revision()) 996 break outer 997 } 998 } 999 1000 if numNew > 0 { 1001 startRev = rmds[len(rmds)-1].Revision() + 1 1002 } 1003 1004 if numNew < maxMDsAtATime || startRev > mostRecentRev { 1005 complete = true 1006 break 1007 } 1008 } 1009 1010 return ptrs, lastRev, complete, nil 1011 } 1012 1013 func (fbm *folderBlockManager) finalizeReclamation(ctx context.Context, 1014 ptrs []data.BlockPointer, zeroRefCounts []kbfsblock.ID, 1015 latestRev kbfsmd.Revision) error { 1016 gco := newGCOp(latestRev) 1017 for _, id := range zeroRefCounts { 1018 gco.AddUnrefBlock(data.BlockPointer{ID: id}) 1019 } 1020 1021 ctx, err := tlfhandle.MakeExtendedIdentify( 1022 // TLFIdentifyBehavior_KBFS_QR makes service suppress the tracker popup. 1023 ctx, keybase1.TLFIdentifyBehavior_KBFS_QR) 1024 if err != nil { 1025 return err 1026 } 1027 1028 fbm.log.CDebugf(ctx, "Finalizing reclamation %s with %d ptrs", gco, 1029 len(ptrs)) 1030 // finalizeGCOp could wait indefinitely on locks, so run it in a 1031 // goroutine. 1032 return runUnlessCanceled(ctx, 1033 func() error { return fbm.helper.finalizeGCOp(ctx, gco) }) 1034 } 1035 1036 func (fbm *folderBlockManager) isQRNecessary( 1037 ctx context.Context, head ImmutableRootMetadata) bool { 1038 fbm.lastQRLock.Lock() 1039 defer fbm.lastQRLock.Unlock() 1040 if head == (ImmutableRootMetadata{}) { 1041 return false 1042 } 1043 1044 session, err := fbm.config.KBPKI().GetCurrentSession(ctx) 1045 if err != nil { 1046 fbm.log.CWarningf(ctx, "Couldn't get the current session: %+v", err) 1047 return false 1048 } 1049 // It's ok to treat both MDs written by this process on this 1050 // device, and MDs written by other processes (e.g., kbgit) in the 1051 // same way. Other processes are likely to be short-lived, and 1052 // probably won't do their own QR, so a conflict is unlikely here. 1053 selfWroteHead := session.VerifyingKey == head.LastModifyingWriterVerifyingKey() 1054 1055 // Don't do reclamation if the head isn't old enough and it wasn't 1056 // written by this device. We want to avoid fighting with other 1057 // active writers whenever possible. 1058 if !selfWroteHead { 1059 minHeadAge := fbm.config.Mode().QuotaReclamationMinHeadAge() 1060 if minHeadAge <= 0 { 1061 return false 1062 } 1063 headAge := fbm.config.Clock().Now().Sub(head.localTimestamp) 1064 if headAge < minHeadAge { 1065 return false 1066 } 1067 } 1068 1069 // If the head includes a single gcOp that covers everything up to 1070 // the previous head, we can skip QR. 1071 if len(head.data.Changes.Ops) == 1 { 1072 gcOp, isGCOp := head.data.Changes.Ops[0].(*GCOp) 1073 if isGCOp && gcOp.LatestRev == head.Revision()-1 { 1074 return false 1075 } 1076 } 1077 1078 // Do QR if: 1079 // * The head has changed since last time, OR 1080 // * The last QR did not completely clean every available thing, OR 1081 // * The head is now old enough for QR 1082 isNecessary := head.Revision() != fbm.lastQRHeadRev || 1083 !fbm.wasLastQRComplete || fbm.isOldEnough(head) 1084 if !isNecessary { 1085 return false 1086 } 1087 1088 // Make sure the root block of the TLF is readable. If not, we 1089 // don't want to to garbage collect, since we might need to 1090 // recover to those older versions of the TLF. 1091 headRootPtr := head.data.Dir.BlockPointer 1092 ch := fbm.config.BlockOps().BlockRetriever().Request( 1093 ctx, defaultOnDemandRequestPriority, head, headRootPtr, 1094 data.NewDirBlock(), data.TransientEntry, BlockRequestSolo) 1095 select { 1096 case err := <-ch: 1097 if err != nil { 1098 fbm.log.CWarningf( 1099 ctx, "Couldn't fetch root block %v for TLF %s: %+v", 1100 headRootPtr, head.TlfID(), err) 1101 return false 1102 } 1103 case <-ctx.Done(): 1104 fbm.log.CDebugf( 1105 ctx, "Couldn't fetch root block %v for TLF %s: %+v", 1106 headRootPtr, head.TlfID(), ctx.Err()) 1107 return false 1108 } 1109 1110 return true 1111 } 1112 1113 func (fbm *folderBlockManager) doReclamation(timer *time.Timer) (err error) { 1114 ctx, cancel := context.WithCancel(fbm.ctxWithFBMID(context.Background())) 1115 fbm.setReclamationCancel(cancel) 1116 defer fbm.cancelReclamation() 1117 nextPeriod := fbm.config.Mode().QuotaReclamationPeriod() 1118 defer func() { 1119 // `nextPeriod` may be changed by later code in this function, 1120 // to speed up the next QR cycle when we couldn't reclaim a 1121 // complete set of blocks during this run. 1122 timer.Reset(nextPeriod) 1123 }() 1124 defer fbm.reclamationGroup.Done() 1125 1126 // Don't set a context deadline. For users that have written a 1127 // lot of updates since their last QR, this might involve fetching 1128 // a lot of MD updates in small chunks. It doesn't hold locks for 1129 // any considerable amount of time, so it should be safe to let it 1130 // run indefinitely. 1131 1132 // First get the most recent fully merged MD (might be different 1133 // from the local head if journaling is enabled), and see if we're 1134 // staged or not. 1135 head, err := fbm.helper.getMostRecentFullyMergedMD(ctx) 1136 if err != nil { 1137 return err 1138 } 1139 if err := isReadableOrError( 1140 ctx, fbm.config.KBPKI(), fbm.config, head.ReadOnly()); err != nil { 1141 return err 1142 } 1143 switch { 1144 case head.MergedStatus() != kbfsmd.Merged: 1145 return errors.New("Supposedly fully-merged MD is unexpectedly unmerged") 1146 case head.IsFinal(): 1147 return kbfsmd.MetadataIsFinalError{} 1148 } 1149 1150 // Make sure we're a writer 1151 session, err := fbm.config.KBPKI().GetCurrentSession(ctx) 1152 if err != nil { 1153 return err 1154 } 1155 isWriter, err := head.IsWriter( 1156 ctx, fbm.config.KBPKI(), fbm.config, session.UID, session.VerifyingKey) 1157 if err != nil { 1158 return err 1159 } 1160 if !isWriter { 1161 return tlfhandle.NewWriteAccessError(head.GetTlfHandle(), session.Name, 1162 head.GetTlfHandle().GetCanonicalPath()) 1163 } 1164 1165 if !fbm.isQRNecessary(ctx, head) { 1166 // Nothing has changed since last time, or the current head is 1167 // too new, so no need to do any QR. 1168 return nil 1169 } 1170 var complete bool 1171 var reclamationTime time.Time 1172 var lastRev kbfsmd.Revision 1173 defer func() { 1174 fbm.lastQRLock.Lock() 1175 defer fbm.lastQRLock.Unlock() 1176 // Remember the QR we just performed. 1177 if err == nil && head != (ImmutableRootMetadata{}) { 1178 fbm.lastQRHeadRev = head.Revision() 1179 fbm.wasLastQRComplete = complete 1180 } 1181 if !reclamationTime.IsZero() { 1182 fbm.lastReclamationTime = reclamationTime 1183 } 1184 if lastRev > kbfsmd.RevisionUninitialized { 1185 fbm.lastReclaimedRev = lastRev 1186 } 1187 if !complete { 1188 // If there's more data to reclaim, only wait a short 1189 // while before the next QR attempt. 1190 nextPeriod = 1 * time.Minute 1191 } 1192 }() 1193 1194 // Then grab the lock for this folder, so we're the only one doing 1195 // garbage collection for a while. 1196 locked, err := fbm.config.MDServer().TruncateLock(ctx, fbm.id) 1197 if err != nil { 1198 return err 1199 } 1200 if !locked { 1201 fbm.log.CDebugf(ctx, "Couldn't get the truncate lock") 1202 return fmt.Errorf("Couldn't get the truncate lock for folder %d", 1203 fbm.id) 1204 } 1205 defer func() { 1206 unlocked, unlockErr := fbm.config.MDServer().TruncateUnlock(ctx, fbm.id) 1207 if unlockErr != nil { 1208 fbm.log.CDebugf(ctx, "Couldn't release the truncate lock: %v", 1209 unlockErr) 1210 } 1211 if !unlocked { 1212 fbm.log.CDebugf(ctx, "Couldn't unlock the truncate lock") 1213 } 1214 }() 1215 1216 lastGCRev, err := fbm.getMostRecentGCRevision(ctx, head.ReadOnly()) 1217 if err != nil { 1218 return err 1219 } 1220 if head.Revision() <= lastGCRev { 1221 // TODO: need a log level more fine-grained than Debug to 1222 // print out that we're not doing reclamation. 1223 complete = true 1224 return nil 1225 } 1226 1227 // Don't try to do too many at a time. 1228 shortened := false 1229 mostRecentRev := head.Revision() 1230 if mostRecentRev-lastGCRev > numMaxRevisionsPerQR { 1231 mostRecentRev = lastGCRev + numMaxRevisionsPerQR 1232 shortened = true 1233 } 1234 1235 // Don't print these until we know for sure that we'll be 1236 // reclaiming some quota, to avoid log pollution. 1237 fbm.log.CDebugf(ctx, "Starting quota reclamation process") 1238 defer func() { 1239 fbm.log.CDebugf(ctx, "Ending quota reclamation process: %v", err) 1240 reclamationTime = fbm.config.Clock().Now() 1241 }() 1242 1243 ptrs, lastRev, complete, err := fbm.getUnreferencedBlocks( 1244 ctx, lastGCRev, mostRecentRev) 1245 if err != nil { 1246 return err 1247 } 1248 if lastRev == kbfsmd.RevisionUninitialized { 1249 fbm.log.CDebugf(ctx, "No recent revisions to GC") 1250 complete = true 1251 return nil 1252 } 1253 if len(ptrs) == 0 && !shortened { 1254 complete = true 1255 1256 // Add a new gcOp to show other clients that they don't need 1257 // to explore this range again. 1258 return fbm.finalizeReclamation(ctx, nil, nil, lastRev) 1259 } else if shortened { 1260 complete = false 1261 } 1262 1263 zeroRefCounts, err := fbm.deleteBlockRefs(ctx, head.TlfID(), ptrs) 1264 if err != nil { 1265 return err 1266 } 1267 1268 return fbm.finalizeReclamation(ctx, ptrs, zeroRefCounts, lastRev) 1269 } 1270 1271 func isPermanentQRError(err error) bool { 1272 switch errors.Cause(err).(type) { 1273 case tlfhandle.WriteAccessError, kbfsmd.MetadataIsFinalError, 1274 RevokedDeviceVerificationError: 1275 return true 1276 default: 1277 return false 1278 } 1279 } 1280 1281 func (fbm *folderBlockManager) reclaimQuotaInBackground() { 1282 autoQR := true 1283 timer := time.NewTimer(fbm.config.Mode().QuotaReclamationPeriod()) 1284 1285 if fbm.config.Mode().QuotaReclamationPeriod().Seconds() != 0 { 1286 // Run QR once immediately at the start of the period. 1287 fbm.reclamationGroup.Add(1) 1288 err := fbm.doReclamation(timer) 1289 if isPermanentQRError(err) { 1290 autoQR = false 1291 fbm.log.CDebugf(context.Background(), 1292 "Permanently stopping QR due to initial error: %+v", err) 1293 } 1294 } 1295 1296 timerChan := timer.C 1297 for { 1298 // Don't let the timer fire if auto-reclamation is turned off. 1299 if !autoQR || 1300 fbm.config.Mode().QuotaReclamationPeriod().Seconds() == 0 { 1301 timer.Stop() 1302 // Use a channel that will never fire instead. 1303 timerChan = make(chan time.Time) 1304 } 1305 1306 state := keybase1.MobileAppState_FOREGROUND 1307 select { 1308 case <-fbm.shutdownChan: 1309 return 1310 case state = <-fbm.appStateUpdater.NextAppStateUpdate(&state): 1311 for state != keybase1.MobileAppState_FOREGROUND { 1312 fbm.log.CDebugf(context.Background(), 1313 "Pausing QR while not foregrounded: state=%s", state) 1314 state = <-fbm.appStateUpdater.NextAppStateUpdate(&state) 1315 } 1316 fbm.log.CDebugf( 1317 context.Background(), "Resuming QR while foregrounded") 1318 continue 1319 case <-timerChan: 1320 fbm.reclamationGroup.Add(1) 1321 case <-fbm.forceReclamationChan: 1322 } 1323 1324 err := fbm.doReclamation(timer) 1325 if isPermanentQRError(err) { 1326 // If we can't write the MD, don't bother with the timer 1327 // anymore. Don't completely shut down, since we don't 1328 // want forced reclamations to hang. 1329 timer.Stop() 1330 timerChan = make(chan time.Time) 1331 autoQR = false 1332 fbm.log.CDebugf(context.Background(), 1333 "Permanently stopping QR due to error: %+v", err) 1334 } 1335 } 1336 } 1337 1338 func (fbm *folderBlockManager) getLastQRData() (time.Time, kbfsmd.Revision) { 1339 fbm.lastQRLock.Lock() 1340 defer fbm.lastQRLock.Unlock() 1341 return fbm.lastReclamationTime, fbm.lastReclaimedRev 1342 } 1343 1344 func (fbm *folderBlockManager) clearLastQRData() { 1345 fbm.lastQRLock.Lock() 1346 defer fbm.lastQRLock.Unlock() 1347 fbm.lastQRHeadRev = kbfsmd.RevisionUninitialized 1348 fbm.wasLastQRComplete = false 1349 fbm.lastReclamationTime = time.Time{} 1350 fbm.lastReclaimedRev = kbfsmd.RevisionUninitialized 1351 } 1352 1353 func (fbm *folderBlockManager) doChunkedGetNonLiveBlocks( 1354 ctx context.Context, ptrs []data.BlockPointer) ( 1355 nonLiveBlocks []kbfsblock.ID, err error) { 1356 fbm.log.CDebugf(ctx, "Get live count for %d pointers", len(ptrs)) 1357 bops := fbm.config.BlockOps() 1358 1359 // Round up to find the number of chunks. 1360 numChunks := (len(ptrs) + numPointersToDowngradePerChunk - 1) / 1361 numPointersToDowngradePerChunk 1362 numWorkers := numChunks 1363 if numWorkers > maxParallelBlockPuts { 1364 numWorkers = maxParallelBlockPuts 1365 } 1366 chunks := make(chan []data.BlockPointer, numChunks) 1367 1368 eg, groupCtx := errgroup.WithContext(ctx) 1369 chunkResults := make(chan []kbfsblock.ID, numChunks) 1370 for i := 0; i < numWorkers; i++ { 1371 eg.Go(func() error { 1372 for chunk := range chunks { 1373 fbm.log.CDebugf(groupCtx, 1374 "Getting live count for chunk of %d pointers", len(chunk)) 1375 liveCounts, err := bops.GetLiveCount(ctx, fbm.id, chunk) 1376 if err != nil { 1377 return err 1378 } 1379 ids := make([]kbfsblock.ID, 0, len(liveCounts)) 1380 for id, count := range liveCounts { 1381 if count == 0 { 1382 ids = append(ids, id) 1383 } else { 1384 fbm.log.CDebugf(groupCtx, 1385 "Ignoring live block %s with %d refs", id, count) 1386 } 1387 } 1388 chunkResults <- ids 1389 select { 1390 // return early if the context has been canceled 1391 case <-groupCtx.Done(): 1392 return groupCtx.Err() 1393 default: 1394 } 1395 } 1396 return nil 1397 }) 1398 } 1399 1400 for start := 0; start < len(ptrs); start += numPointersToDowngradePerChunk { 1401 end := start + numPointersToDowngradePerChunk 1402 if end > len(ptrs) { 1403 end = len(ptrs) 1404 } 1405 chunks <- ptrs[start:end] 1406 } 1407 close(chunks) 1408 1409 err = eg.Wait() 1410 if err != nil { 1411 return nil, err 1412 } 1413 close(chunkResults) 1414 1415 for result := range chunkResults { 1416 nonLiveBlocks = append(nonLiveBlocks, result...) 1417 } 1418 return nonLiveBlocks, nil 1419 } 1420 1421 func (fbm *folderBlockManager) doCleanDiskCache(cacheType DiskBlockCacheType) ( 1422 err error) { 1423 dbc := fbm.config.DiskBlockCache() 1424 if dbc == nil { 1425 return nil 1426 } 1427 1428 ctx, cancel := context.WithCancel(fbm.ctxWithFBMID(context.Background())) 1429 fbm.setCleanDiskCacheCancel(cancel) 1430 defer fbm.cancelCleanDiskCache() 1431 1432 lState := makeFBOLockState() 1433 recentRev := fbm.helper.getLatestMergedRevision(lState) 1434 1435 lastRev, err := dbc.GetLastUnrefRev(ctx, fbm.id, cacheType) 1436 if err != nil { 1437 return err 1438 } 1439 1440 if lastRev < kbfsmd.RevisionInitial { 1441 if recentRev > kbfsmd.RevisionInitial { 1442 // This can happen if the sync cache was created and 1443 // populated before we started keeping track of the last 1444 // unref'd revision. In that case, we just let the blocks 1445 // from the old revision stay in the cache until they are 1446 // manually cleaned up. 1447 // 1448 // It can also happen if the device just started 1449 // monitoring the TLF for syncing, in which case it 1450 // shouldn't have any cached blocks that were unref'd in 1451 // earlier revisions. 1452 fbm.log.CDebugf(ctx, "Starting to clean %s at revision %d", 1453 cacheType, recentRev) 1454 lastRev = recentRev - 1 1455 } else { 1456 // No revisions to clean yet. 1457 return dbc.PutLastUnrefRev( 1458 ctx, fbm.id, recentRev, cacheType) 1459 } 1460 } 1461 1462 if lastRev >= recentRev { 1463 // Nothing to do. 1464 return nil 1465 } 1466 1467 fbm.log.CDebugf(ctx, "Cleaning %s revisions after %d, "+ 1468 "up to %d", cacheType, lastRev, recentRev) 1469 defer func() { 1470 fbm.log.CDebugf(ctx, "Done cleaning %s: %+v", cacheType, err) 1471 }() 1472 for nextRev := lastRev + 1; nextRev <= recentRev; nextRev++ { 1473 rmd, err := GetSingleMD( 1474 ctx, fbm.config, fbm.id, kbfsmd.NullBranchID, nextRev, 1475 kbfsmd.Merged, nil) 1476 if err != nil { 1477 return err 1478 } 1479 1480 iter := &unrefIterator{0} 1481 for iter != nil { 1482 // Include unrefs from `gcOp`s here, as a double-check 1483 // against archive races (see comment below). 1484 var ptrs []data.BlockPointer 1485 ptrs, iter = fbm.getUnrefPointersFromMD( 1486 rmd.ReadOnlyRootMetadata, true, iter) 1487 1488 // Cancel any prefetches for these blocks that might be in 1489 // flight, to make sure they don't get put into the cache 1490 // after we're done cleaning it. Ideally we would cancel 1491 // them in a particular order (the lowest level ones 1492 // first, up to the root), but since we already do one 1493 // round of prefetch-canceling as part of applying the MD 1494 // and updating the pointers, doing a second round here 1495 // should be good enough to catch any weird relationships 1496 // between the pointers where one non-yet-canceled 1497 // prefetch can revive the prefetch of an already-canceled 1498 // child block. 1499 for _, ptr := range ptrs { 1500 fbm.config.BlockOps().Prefetcher().CancelPrefetch(ptr) 1501 c, err := fbm.config.BlockOps().Prefetcher(). 1502 WaitChannelForBlockPrefetch(ctx, ptr) 1503 if err != nil { 1504 return err 1505 } 1506 select { 1507 case <-c: 1508 case <-ctx.Done(): 1509 return ctx.Err() 1510 } 1511 } 1512 1513 var ids []kbfsblock.ID 1514 if cacheType == DiskBlockSyncCache { 1515 // Wait for our own archives to complete, to make sure 1516 // the bserver already knows this block isn't live yet 1517 // when we make the call below. However, when dealing 1518 // with MDs written by other clients, there could be a 1519 // race here where we see the ID is live before the 1520 // other client gets to archive the block, leading to 1521 // a leak. Once the revision is GC'd though, we 1522 // should run through this code again with the `gcOp`, 1523 // and we'll delete the block then. (Note there's 1524 // always a chance for a race here, since the client 1525 // could crash before archiving the blocks. But the 1526 // GC should always catch it eventually.) 1527 err := fbm.waitForArchives(ctx) 1528 if err != nil { 1529 return err 1530 } 1531 1532 ids, err = fbm.doChunkedGetNonLiveBlocks(ctx, ptrs) 1533 if err != nil { 1534 return err 1535 } 1536 } else { 1537 ids = make([]kbfsblock.ID, 0, len(ptrs)) 1538 for _, ptr := range ptrs { 1539 ids = append(ids, ptr.ID) 1540 } 1541 } 1542 fbm.log.CDebugf(ctx, "Deleting %d blocks from cache", len(ids)) 1543 _, _, err = dbc.Delete(ctx, ids, cacheType) 1544 if err != nil { 1545 return err 1546 } 1547 1548 if iter != nil { 1549 fbm.log.CDebugf( 1550 ctx, "Cleaned %d pointers for revision %d, "+ 1551 "now looking for more", len(ptrs), rmd.Revision()) 1552 } 1553 } 1554 1555 err = dbc.PutLastUnrefRev(ctx, fbm.id, nextRev, cacheType) 1556 if err != nil { 1557 return err 1558 } 1559 1560 } 1561 return nil 1562 } 1563 1564 func (fbm *folderBlockManager) doCleanDiskCaches() (err error) { 1565 defer fbm.cleanDiskCachesGroup.Done() 1566 1567 // Clean out sync cache only if it is enabled 1568 syncConfig := fbm.config.GetTlfSyncState(fbm.id) 1569 if syncConfig.Mode != keybase1.FolderSyncMode_DISABLED { 1570 err = fbm.doCleanDiskCache(DiskBlockSyncCache) 1571 if err != nil { 1572 return err 1573 } 1574 } 1575 return fbm.doCleanDiskCache(DiskBlockWorkingSetCache) 1576 } 1577 1578 func (fbm *folderBlockManager) cleanDiskCachesInBackground() { 1579 // While in the foreground, clean the disk caches every time we learn about 1580 // a newer latest merged revision for this TLF. 1581 for { 1582 state := keybase1.MobileAppState_FOREGROUND 1583 select { 1584 case <-fbm.latestMergedChan: 1585 case <-fbm.shutdownChan: 1586 return 1587 case state = <-fbm.appStateUpdater.NextAppStateUpdate(&state): 1588 for state != keybase1.MobileAppState_FOREGROUND { 1589 fbm.log.CDebugf(context.Background(), 1590 "Pausing sync-cache cleaning while not foregrounded: "+ 1591 "state=%s", state) 1592 state = <-fbm.appStateUpdater.NextAppStateUpdate(&state) 1593 } 1594 fbm.log.CDebugf(context.Background(), 1595 "Resuming sync-cache cleaning while foregrounded") 1596 continue 1597 } 1598 1599 _ = fbm.doCleanDiskCaches() 1600 } 1601 } 1602 1603 func (fbm *folderBlockManager) signalLatestMergedRevision() { 1604 if fbm.latestMergedChan == nil { 1605 return 1606 } 1607 1608 fbm.cleanDiskCachesGroup.Add(1) 1609 select { 1610 case fbm.latestMergedChan <- struct{}{}: 1611 default: 1612 fbm.cleanDiskCachesGroup.Done() 1613 } 1614 }