github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/libkbfs/disk_block_cache.go (about) 1 // Copyright 2017 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package libkbfs 6 7 import ( 8 "fmt" 9 "io" 10 "math" 11 "math/rand" 12 "path/filepath" 13 "sort" 14 "sync" 15 "time" 16 17 "github.com/keybase/client/go/kbfs/data" 18 "github.com/keybase/client/go/kbfs/kbfsblock" 19 "github.com/keybase/client/go/kbfs/kbfscrypto" 20 "github.com/keybase/client/go/kbfs/kbfshash" 21 "github.com/keybase/client/go/kbfs/kbfsmd" 22 "github.com/keybase/client/go/kbfs/ldbutils" 23 "github.com/keybase/client/go/kbfs/tlf" 24 "github.com/keybase/client/go/logger" 25 "github.com/keybase/client/go/protocol/keybase1" 26 "github.com/pkg/errors" 27 "github.com/syndtr/goleveldb/leveldb" 28 "github.com/syndtr/goleveldb/leveldb/filter" 29 "github.com/syndtr/goleveldb/leveldb/opt" 30 "github.com/syndtr/goleveldb/leveldb/storage" 31 "github.com/syndtr/goleveldb/leveldb/util" 32 "golang.org/x/net/context" 33 ) 34 35 const ( 36 defaultBlockCacheTableSize int = 50 * opt.MiB 37 defaultBlockCacheBlockSize int = 4 * opt.MiB 38 defaultBlockCacheCapacity int = 8 * opt.MiB 39 evictionConsiderationFactor int = 3 40 minNumBlocksToEvictInBatch int = 10 41 maxNumBlocksToEvictInBatch int = 500 42 defaultNumBlocksToEvictOnClear int = 100 43 defaultNumUnmarkedBlocksToCheck int = 100 44 defaultClearTickerDuration = 1 * time.Second 45 maxEvictionsPerPut int = 100 46 blockDbFilename string = "diskCacheBlocks.leveldb" 47 metaDbFilename string = "diskCacheMetadata.leveldb" 48 tlfDbFilename string = "diskCacheTLF.leveldb" 49 lastUnrefDbFilename string = "diskCacheLastUnref.leveldb" 50 initialDiskBlockCacheVersion uint64 = 1 51 currentDiskBlockCacheVersion uint64 = initialDiskBlockCacheVersion 52 syncCacheName string = "SyncBlockCache" 53 workingSetCacheName string = "WorkingSetBlockCache" 54 crDirtyBlockCacheName string = "DirtyBlockCache" 55 minDiskBlockWriteBufferSize = 3 * data.MaxBlockSizeBytesDefault // ~ 1 MB 56 deleteCompactThreshold int = 25 57 compactTimer = time.Minute * 5 58 ) 59 60 var errTeamOrUnknownTLFAddedAsHome = errors.New( 61 "Team or Unknown TLF added to disk block cache as home TLF") 62 63 type evictionPriority int 64 65 const ( 66 priorityNotHome evictionPriority = iota 67 priorityPublicHome 68 priorityPrivateHome 69 ) 70 71 // DiskBlockCacheLocal is the standard implementation for DiskBlockCache. 72 type DiskBlockCacheLocal struct { 73 config diskBlockCacheConfig 74 log logger.Logger 75 maxBlockID []byte 76 dirPath string 77 78 clearTickerDuration time.Duration 79 numBlocksToEvictOnClear int 80 numUnmarkedBlocksToCheck int 81 82 // Track the cache hit rate and eviction rate 83 hitMeter *ldbutils.CountMeter 84 missMeter *ldbutils.CountMeter 85 putMeter *ldbutils.CountMeter 86 updateMeter *ldbutils.CountMeter 87 evictCountMeter *ldbutils.CountMeter 88 evictSizeMeter *ldbutils.CountMeter 89 deleteCountMeter *ldbutils.CountMeter 90 deleteSizeMeter *ldbutils.CountMeter 91 92 // Protect the disk caches from being shutdown while they're being 93 // accessed, and mutable data. 94 lock sync.RWMutex 95 blockDb *ldbutils.LevelDb 96 metaDb *ldbutils.LevelDb 97 tlfDb *ldbutils.LevelDb 98 lastUnrefDb *ldbutils.LevelDb 99 cacheType diskLimitTrackerType 100 // Track the number of blocks in the cache per TLF and overall. 101 tlfCounts map[tlf.ID]int 102 numBlocks int 103 // Track the number of blocks in the cahce per eviction priority, 104 // for easy eviction counting. 105 priorityBlockCounts map[evictionPriority]int 106 priorityTlfMap map[evictionPriority]map[tlf.ID]int 107 // Track the aggregate size of blocks in the cache per TLF and overall. 108 tlfSizes map[tlf.ID]uint64 109 // Track the last unref'd revisions for each TLF. 110 tlfLastUnrefs map[tlf.ID]kbfsmd.Revision 111 // Don't evict files from the user's private or public home directory. 112 // Higher numbers are more important not to evict. 113 homeDirs map[tlf.ID]evictionPriority 114 115 // currBytes gets its own lock, since tests need to access it 116 // directly and taking the full lock causes deadlocks under some 117 // situations. 118 currBytesLock sync.RWMutex 119 currBytes uint64 120 121 compactCh chan struct{} 122 useCh chan struct{} 123 startedCh chan struct{} 124 startErrCh chan struct{} 125 shutdownCh chan struct{} 126 doneCh chan struct{} 127 128 closer func() 129 } 130 131 // DiskBlockCacheStartState represents whether this disk block cache has 132 // started or failed. 133 type DiskBlockCacheStartState int 134 135 // String allows DiskBlockCacheStartState to be output as a string. 136 func (s DiskBlockCacheStartState) String() string { 137 switch s { 138 case DiskBlockCacheStartStateStarting: 139 return "starting" 140 case DiskBlockCacheStartStateStarted: 141 return "started" 142 case DiskBlockCacheStartStateFailed: 143 return "failed" 144 default: 145 return "unknown" 146 } 147 } 148 149 const ( 150 // DiskBlockCacheStartStateStarting represents when the cache is starting. 151 DiskBlockCacheStartStateStarting DiskBlockCacheStartState = iota 152 // DiskBlockCacheStartStateStarted represents when the cache has started. 153 DiskBlockCacheStartStateStarted 154 // DiskBlockCacheStartStateFailed represents when the cache has failed to 155 // start. 156 DiskBlockCacheStartStateFailed 157 ) 158 159 // DiskBlockCacheStatus represents the status of the disk cache. 160 type DiskBlockCacheStatus struct { 161 StartState DiskBlockCacheStartState 162 NumBlocks uint64 163 BlockBytes uint64 164 CurrByteLimit uint64 165 LastUnrefCount uint64 166 Hits ldbutils.MeterStatus 167 Misses ldbutils.MeterStatus 168 Puts ldbutils.MeterStatus 169 MetadataUpdates ldbutils.MeterStatus 170 NumEvicted ldbutils.MeterStatus 171 SizeEvicted ldbutils.MeterStatus 172 NumDeleted ldbutils.MeterStatus 173 SizeDeleted ldbutils.MeterStatus 174 175 LocalDiskBytesAvailable uint64 176 LocalDiskBytesTotal uint64 177 178 BlockDBStats []string `json:",omitempty"` 179 MetaDBStats []string `json:",omitempty"` 180 TLFDBStats []string `json:",omitempty"` 181 LastUnrefStats []string `json:",omitempty"` 182 MemCompActive bool `json:",omitempty"` 183 TableCompActive bool `json:",omitempty"` 184 MetaMemCompActive bool `json:",omitempty"` 185 MetaTableCompActive bool `json:",omitempty"` 186 } 187 188 type lastUnrefEntry struct { 189 Rev kbfsmd.Revision 190 Ctime time.Time // Not used yet, but save it in case we ever need it. 191 } 192 193 // newDiskBlockCacheLocalFromStorage creates a new *DiskBlockCacheLocal 194 // with the passed-in storage.Storage interfaces as storage layers for each 195 // cache. 196 func newDiskBlockCacheLocalFromStorage( 197 config diskBlockCacheConfig, cacheType diskLimitTrackerType, 198 blockStorage, metadataStorage, tlfStorage, 199 lastUnrefStorage storage.Storage, mode InitMode) ( 200 cache *DiskBlockCacheLocal, err error) { 201 log := config.MakeLogger("KBC") 202 closers := make([]io.Closer, 0, 3) 203 closer := func() { 204 for _, c := range closers { 205 closeErr := c.Close() 206 if closeErr != nil { 207 log.Warning("Error closing leveldb or storage: %+v", closeErr) 208 } 209 } 210 } 211 defer func() { 212 if err != nil { 213 err = errors.WithStack(err) 214 closer() 215 } 216 }() 217 blockDbOptions := ldbutils.LeveldbOptions(mode) 218 blockDbOptions.CompactionTableSize = defaultBlockCacheTableSize 219 blockDbOptions.BlockSize = defaultBlockCacheBlockSize 220 blockDbOptions.BlockCacheCapacity = defaultBlockCacheCapacity 221 blockDbOptions.Filter = filter.NewBloomFilter(16) 222 if blockDbOptions.WriteBuffer < minDiskBlockWriteBufferSize { 223 blockDbOptions.WriteBuffer = minDiskBlockWriteBufferSize 224 } 225 blockDb, err := ldbutils.OpenLevelDbWithOptions(blockStorage, blockDbOptions) 226 if err != nil { 227 return nil, err 228 } 229 closers = append(closers, blockDb) 230 231 metaDb, err := ldbutils.OpenLevelDb(metadataStorage, mode) 232 if err != nil { 233 return nil, err 234 } 235 closers = append(closers, metaDb) 236 237 tlfDb, err := ldbutils.OpenLevelDb(tlfStorage, mode) 238 if err != nil { 239 return nil, err 240 } 241 closers = append(closers, tlfDb) 242 243 lastUnrefDb, err := ldbutils.OpenLevelDb(lastUnrefStorage, mode) 244 if err != nil { 245 return nil, err 246 } 247 closers = append(closers, lastUnrefDb) 248 249 maxBlockID, err := kbfshash.HashFromRaw( 250 kbfshash.MaxHashType, kbfshash.MaxDefaultHash[:]) 251 if err != nil { 252 return nil, err 253 } 254 startedCh := make(chan struct{}) 255 startErrCh := make(chan struct{}) 256 cache = &DiskBlockCacheLocal{ 257 config: config, 258 maxBlockID: maxBlockID.Bytes(), 259 clearTickerDuration: defaultClearTickerDuration, 260 numBlocksToEvictOnClear: defaultNumBlocksToEvictOnClear, 261 numUnmarkedBlocksToCheck: defaultNumUnmarkedBlocksToCheck, 262 cacheType: cacheType, 263 hitMeter: ldbutils.NewCountMeter(), 264 missMeter: ldbutils.NewCountMeter(), 265 putMeter: ldbutils.NewCountMeter(), 266 updateMeter: ldbutils.NewCountMeter(), 267 evictCountMeter: ldbutils.NewCountMeter(), 268 evictSizeMeter: ldbutils.NewCountMeter(), 269 deleteCountMeter: ldbutils.NewCountMeter(), 270 deleteSizeMeter: ldbutils.NewCountMeter(), 271 homeDirs: map[tlf.ID]evictionPriority{}, 272 log: log, 273 blockDb: blockDb, 274 metaDb: metaDb, 275 tlfDb: tlfDb, 276 lastUnrefDb: lastUnrefDb, 277 tlfCounts: map[tlf.ID]int{}, 278 priorityBlockCounts: map[evictionPriority]int{}, 279 priorityTlfMap: map[evictionPriority]map[tlf.ID]int{ 280 priorityPublicHome: {}, 281 priorityPrivateHome: {}, 282 priorityNotHome: {}, 283 }, 284 tlfSizes: map[tlf.ID]uint64{}, 285 tlfLastUnrefs: map[tlf.ID]kbfsmd.Revision{}, 286 compactCh: make(chan struct{}, 1), 287 useCh: make(chan struct{}, 1), 288 startedCh: startedCh, 289 startErrCh: startErrCh, 290 shutdownCh: make(chan struct{}), 291 doneCh: make(chan struct{}), 292 closer: closer, 293 } 294 // Sync the block counts asynchronously so syncing doesn't block init. 295 // Since this method blocks, any Get or Put requests to the disk block 296 // cache will block until this is done. The log will contain the beginning 297 // and end of this sync. 298 go func() { 299 err := cache.syncBlockCountsAndUnrefsFromDb() 300 if err != nil { 301 close(startErrCh) 302 closer() 303 log.Warning("Disabling disk block cache due to error syncing the "+ 304 "block counts from DB: %+v", err) 305 return 306 } 307 diskLimiter := cache.config.DiskLimiter() 308 if diskLimiter != nil && cache.useLimiter() { 309 // Notify the disk limiter of the disk cache's size once we've 310 // determined it. 311 ctx := context.Background() 312 cache.config.DiskLimiter().onSimpleByteTrackerEnable(ctx, 313 cache.cacheType, int64(cache.getCurrBytes())) 314 } 315 close(startedCh) 316 }() 317 318 // Only do background compaction on desktop for now, because on 319 // mobile we'd probably cause issues if we try to do it while 320 // backgrounded. 321 if mode.DiskCacheCompactionEnabled() { 322 go cache.compactLoop() 323 } 324 325 return cache, nil 326 } 327 328 // newDiskBlockCacheLocal creates a new *DiskBlockCacheLocal with a 329 // specified directory on the filesystem as storage. 330 func newDiskBlockCacheLocal(config diskBlockCacheConfig, 331 cacheType diskLimitTrackerType, dirPath string, mode InitMode) ( 332 cache *DiskBlockCacheLocal, err error) { 333 log := config.MakeLogger("DBC") 334 defer func() { 335 if err != nil { 336 log.Error("Error initializing disk cache: %+v", err) 337 config.GetPerfLog().CDebugf( 338 context.TODO(), 339 "KBFS couldn't initialize disk cache of type %s: %v", 340 cacheType, err) 341 } 342 }() 343 versionPath, err := ldbutils.GetVersionedPathForDb( 344 log, dirPath, "disk block cache", currentDiskBlockCacheVersion) 345 if err != nil { 346 return nil, err 347 } 348 blockDbPath := filepath.Join(versionPath, blockDbFilename) 349 blockStorage, err := storage.OpenFile(blockDbPath, false) 350 if err != nil { 351 return nil, err 352 } 353 defer func() { 354 if err != nil { 355 blockStorage.Close() 356 } 357 }() 358 metaDbPath := filepath.Join(versionPath, metaDbFilename) 359 metadataStorage, err := storage.OpenFile(metaDbPath, false) 360 if err != nil { 361 return nil, err 362 } 363 defer func() { 364 if err != nil { 365 metadataStorage.Close() 366 } 367 }() 368 tlfDbPath := filepath.Join(versionPath, tlfDbFilename) 369 tlfStorage, err := storage.OpenFile(tlfDbPath, false) 370 if err != nil { 371 return nil, err 372 } 373 defer func() { 374 if err != nil { 375 tlfStorage.Close() 376 } 377 }() 378 lastUnrefDbPath := filepath.Join(versionPath, lastUnrefDbFilename) 379 lastUnrefStorage, err := storage.OpenFile(lastUnrefDbPath, false) 380 if err != nil { 381 return nil, err 382 } 383 defer func() { 384 if err != nil { 385 lastUnrefStorage.Close() 386 } 387 }() 388 cache, err = newDiskBlockCacheLocalFromStorage(config, cacheType, 389 blockStorage, metadataStorage, tlfStorage, lastUnrefStorage, mode) 390 if err != nil { 391 return nil, err 392 } 393 cache.dirPath = dirPath 394 return cache, nil 395 } 396 397 func newDiskBlockCacheLocalForTest(config diskBlockCacheConfig, 398 cacheType diskLimitTrackerType) (*DiskBlockCacheLocal, error) { 399 return newDiskBlockCacheLocalFromStorage( 400 config, cacheType, storage.NewMemStorage(), 401 storage.NewMemStorage(), storage.NewMemStorage(), 402 storage.NewMemStorage(), &modeTest{modeDefault{}}) 403 } 404 405 func (cache *DiskBlockCacheLocal) useLimiter() bool { 406 return cache.cacheType != crDirtyBlockCacheLimitTrackerType 407 } 408 409 func (cache *DiskBlockCacheLocal) getCurrBytes() uint64 { 410 cache.currBytesLock.RLock() 411 defer cache.currBytesLock.RUnlock() 412 return cache.currBytes 413 } 414 415 func (cache *DiskBlockCacheLocal) setCurrBytes(b uint64) { 416 cache.currBytesLock.Lock() 417 defer cache.currBytesLock.Unlock() 418 cache.currBytes = b 419 } 420 421 func (cache *DiskBlockCacheLocal) addCurrBytes(b uint64) { 422 cache.currBytesLock.Lock() 423 defer cache.currBytesLock.Unlock() 424 cache.currBytes += b 425 } 426 427 func (cache *DiskBlockCacheLocal) subCurrBytes(b uint64) { 428 cache.currBytesLock.Lock() 429 defer cache.currBytesLock.Unlock() 430 if b <= cache.currBytes { 431 cache.currBytes -= b 432 } 433 } 434 435 // WaitUntilStarted waits until this cache has started. 436 func (cache *DiskBlockCacheLocal) WaitUntilStarted() error { 437 select { 438 case <-cache.startedCh: 439 return nil 440 case <-cache.startErrCh: 441 return DiskBlockCacheError{"error starting channel"} 442 } 443 } 444 445 func (cache *DiskBlockCacheLocal) decodeLastUnref(buf []byte) ( 446 rev kbfsmd.Revision, err error) { 447 var entry lastUnrefEntry 448 err = cache.config.Codec().Decode(buf, &entry) 449 if err != nil { 450 return kbfsmd.RevisionUninitialized, err 451 } 452 return entry.Rev, nil 453 } 454 455 func (cache *DiskBlockCacheLocal) encodeLastUnref(rev kbfsmd.Revision) ( 456 []byte, error) { 457 entry := lastUnrefEntry{ 458 Rev: rev, 459 Ctime: cache.config.Clock().Now(), 460 } 461 return cache.config.Codec().Encode(&entry) 462 } 463 464 func (cache *DiskBlockCacheLocal) syncBlockCountsAndUnrefsFromDb() error { 465 cache.log.Debug("+ syncBlockCountsAndUnrefsFromDb begin") 466 defer cache.log.Debug("- syncBlockCountsAndUnrefsFromDb end") 467 // We take a write lock for this to prevent any reads from happening while 468 // we're syncing the block counts. 469 cache.lock.Lock() 470 defer cache.lock.Unlock() 471 472 tlfCounts := make(map[tlf.ID]int) 473 tlfSizes := make(map[tlf.ID]uint64) 474 priorityBlockCounts := make(map[evictionPriority]int) 475 priorityTlfMap := map[evictionPriority]map[tlf.ID]int{ 476 priorityNotHome: {}, 477 priorityPublicHome: {}, 478 priorityPrivateHome: {}, 479 } 480 numBlocks := 0 481 totalSize := uint64(0) 482 iter := cache.metaDb.NewIterator(nil, nil) 483 defer iter.Release() 484 for iter.Next() { 485 metadata := DiskBlockCacheMetadata{} 486 err := cache.config.Codec().Decode(iter.Value(), &metadata) 487 if err != nil { 488 return err 489 } 490 size := uint64(metadata.BlockSize) 491 tlfCounts[metadata.TlfID]++ 492 tlfSizes[metadata.TlfID] += size 493 priorityBlockCounts[cache.homeDirs[metadata.TlfID]]++ 494 priorityTlfMap[cache.homeDirs[metadata.TlfID]][metadata.TlfID]++ 495 numBlocks++ 496 totalSize += size 497 } 498 cache.tlfCounts = tlfCounts 499 cache.numBlocks = numBlocks 500 cache.tlfSizes = tlfSizes 501 cache.setCurrBytes(totalSize) 502 cache.priorityTlfMap = priorityTlfMap 503 cache.priorityBlockCounts = priorityBlockCounts 504 505 cache.log.Debug("| syncBlockCountsAndUnrefsFromDb block counts done") 506 507 tlfLastUnrefs := make(map[tlf.ID]kbfsmd.Revision) 508 lastUnrefIter := cache.lastUnrefDb.NewIterator(nil, nil) 509 defer lastUnrefIter.Release() 510 for lastUnrefIter.Next() { 511 var tlfID tlf.ID 512 err := tlfID.UnmarshalBinary(lastUnrefIter.Key()) 513 if err != nil { 514 return err 515 } 516 517 rev, err := cache.decodeLastUnref(lastUnrefIter.Value()) 518 if err != nil { 519 return err 520 } 521 tlfLastUnrefs[tlfID] = rev 522 } 523 cache.tlfLastUnrefs = tlfLastUnrefs 524 525 return nil 526 } 527 528 // tlfKey generates a TLF cache key from a tlf.ID and a binary-encoded block 529 // ID. 530 func (*DiskBlockCacheLocal) tlfKey(tlfID tlf.ID, blockKey []byte) []byte { 531 return append(tlfID.Bytes(), blockKey...) 532 } 533 534 // updateMetadataLocked updates the LRU time of a block in the LRU cache to 535 // the current time. 536 func (cache *DiskBlockCacheLocal) updateMetadataLocked(ctx context.Context, 537 blockKey []byte, metadata DiskBlockCacheMetadata, metered bool) error { 538 metadata.LRUTime.Time = cache.config.Clock().Now() 539 encodedMetadata, err := cache.config.Codec().Encode(&metadata) 540 if err != nil { 541 return err 542 } 543 var putMeter *ldbutils.CountMeter 544 if ldbutils.Metered { 545 putMeter = cache.updateMeter 546 } 547 err = cache.metaDb.PutWithMeter(blockKey, encodedMetadata, putMeter) 548 if err != nil { 549 cache.log.CWarningf(ctx, "Error writing to disk cache meta "+ 550 "database: %+v", err) 551 } 552 return err 553 } 554 555 // getMetadataLocked retrieves the metadata for a block in the cache, or 556 // returns leveldb.ErrNotFound and a zero-valued metadata otherwise. 557 func (cache *DiskBlockCacheLocal) getMetadataLocked( 558 blockID kbfsblock.ID, metered bool) ( 559 metadata DiskBlockCacheMetadata, err error) { 560 var hitMeter, missMeter *ldbutils.CountMeter 561 if ldbutils.Metered { 562 hitMeter = cache.hitMeter 563 missMeter = cache.missMeter 564 } 565 566 metadataBytes, err := cache.metaDb.GetWithMeter( 567 blockID.Bytes(), hitMeter, missMeter) 568 if err != nil { 569 return DiskBlockCacheMetadata{}, err 570 } 571 err = cache.config.Codec().Decode(metadataBytes, &metadata) 572 return metadata, err 573 } 574 575 // getLRULocked retrieves the LRU time for a block in the cache, or returns 576 // leveldb.ErrNotFound and a zero-valued time.Time otherwise. 577 func (cache *DiskBlockCacheLocal) getLRULocked(blockID kbfsblock.ID) ( 578 time.Time, error) { 579 metadata, err := cache.getMetadataLocked(blockID, false) 580 if err != nil { 581 return time.Time{}, err 582 } 583 return metadata.LRUTime.Time, nil 584 } 585 586 // decodeBlockCacheEntry decodes a disk block cache entry buffer into an 587 // encoded block and server half. 588 func (cache *DiskBlockCacheLocal) decodeBlockCacheEntry(buf []byte) ([]byte, 589 kbfscrypto.BlockCryptKeyServerHalf, error) { 590 entry := diskBlockCacheEntry{} 591 err := cache.config.Codec().Decode(buf, &entry) 592 if err != nil { 593 return nil, kbfscrypto.BlockCryptKeyServerHalf{}, err 594 } 595 return entry.Buf, entry.ServerHalf, nil 596 } 597 598 // encodeBlockCacheEntry encodes an encoded block and serverHalf into a single 599 // buffer. 600 func (cache *DiskBlockCacheLocal) encodeBlockCacheEntry(buf []byte, 601 serverHalf kbfscrypto.BlockCryptKeyServerHalf) ([]byte, error) { 602 entry := diskBlockCacheEntry{ 603 Buf: buf, 604 ServerHalf: serverHalf, 605 } 606 return cache.config.Codec().Encode(&entry) 607 } 608 609 func (cache *DiskBlockCacheLocal) used() { 610 select { 611 case cache.useCh <- struct{}{}: 612 default: 613 } 614 } 615 616 // checkAndLockCache checks whether the cache is started. 617 func (cache *DiskBlockCacheLocal) checkCacheLocked(method string) (err error) { 618 defer func() { 619 if err == nil { 620 cache.used() 621 } 622 }() 623 624 select { 625 case <-cache.startedCh: 626 case <-cache.startErrCh: 627 // The cache will never be started. No need for a stack here since this 628 // could happen anywhere. 629 return DiskCacheStartingError{method} 630 default: 631 // If the cache hasn't started yet, return an error. No need for a 632 // stack here since this could happen anywhere. 633 return DiskCacheStartingError{method} 634 } 635 // shutdownCh has to be checked under lock, otherwise we can race. 636 select { 637 case <-cache.shutdownCh: 638 return errors.WithStack(DiskCacheClosedError{method}) 639 default: 640 } 641 if cache.blockDb == nil { 642 return errors.WithStack(DiskCacheClosedError{method}) 643 } 644 return nil 645 } 646 647 // Get implements the DiskBlockCache interface for DiskBlockCacheLocal. 648 func (cache *DiskBlockCacheLocal) Get( 649 ctx context.Context, tlfID tlf.ID, blockID kbfsblock.ID) (buf []byte, 650 serverHalf kbfscrypto.BlockCryptKeyServerHalf, 651 prefetchStatus PrefetchStatus, err error) { 652 cache.lock.RLock() 653 defer cache.lock.RUnlock() 654 err = cache.checkCacheLocked("Block(Get)") 655 if err != nil { 656 return nil, kbfscrypto.BlockCryptKeyServerHalf{}, NoPrefetch, err 657 } 658 659 blockKey := blockID.Bytes() 660 entry, err := cache.blockDb.Get(blockKey, nil) 661 if err != nil { 662 return nil, kbfscrypto.BlockCryptKeyServerHalf{}, NoPrefetch, 663 data.NoSuchBlockError{ID: blockID} 664 } 665 md, err := cache.getMetadataLocked(blockID, true) 666 if err != nil { 667 return nil, kbfscrypto.BlockCryptKeyServerHalf{}, NoPrefetch, err 668 } 669 err = cache.updateMetadataLocked(ctx, blockKey, md, ldbutils.Unmetered) 670 if err != nil { 671 return nil, kbfscrypto.BlockCryptKeyServerHalf{}, NoPrefetch, err 672 } 673 buf, serverHalf, err = cache.decodeBlockCacheEntry(entry) 674 return buf, serverHalf, md.PrefetchStatus(), err 675 } 676 677 // numBlocksToEvict estimates the number of blocks to evict to make 678 // enough room for new blocks, based on the average block size in the 679 // cache. 680 func (cache *DiskBlockCacheLocal) numBlocksToEvictLocked( 681 bytesAvailable int64) int { 682 if cache.numBlocks <= 0 || bytesAvailable > 0 { 683 return minNumBlocksToEvictInBatch 684 } 685 686 bytesPerBlock := int(cache.getCurrBytes()) / cache.numBlocks 687 toEvict := -int(bytesAvailable) / bytesPerBlock 688 if toEvict < minNumBlocksToEvictInBatch { 689 return minNumBlocksToEvictInBatch 690 } else if toEvict > maxNumBlocksToEvictInBatch { 691 return maxNumBlocksToEvictInBatch 692 } 693 return toEvict 694 } 695 696 func (cache *DiskBlockCacheLocal) evictUntilBytesAvailableLocked( 697 ctx context.Context, encodedLen int64) (hasEnoughSpace bool, err error) { 698 if !cache.useLimiter() { 699 return true, nil 700 } 701 for i := 0; i < maxEvictionsPerPut; i++ { 702 select { 703 // Ensure we don't loop infinitely 704 case <-ctx.Done(): 705 return false, ctx.Err() 706 default: 707 } 708 bytesAvailable, err := cache.config.DiskLimiter().reserveBytes( 709 ctx, cache.cacheType, encodedLen) 710 if err != nil { 711 cache.log.CWarningf(ctx, "Error obtaining space for the disk "+ 712 "block cache: %+v", err) 713 return false, err 714 } 715 if bytesAvailable >= 0 { 716 return true, nil 717 } 718 cache.log.CDebugf(ctx, "Need more bytes. Available: %d", bytesAvailable) 719 numRemoved, _, err := cache.evictLocked( 720 ctx, cache.numBlocksToEvictLocked(bytesAvailable)) 721 if err != nil { 722 return false, err 723 } 724 if numRemoved == 0 { 725 return false, errors.New("couldn't evict any more blocks from " + 726 "the disk block cache") 727 } 728 } 729 return false, nil 730 } 731 732 // Put implements the DiskBlockCache interface for DiskBlockCacheLocal. 733 func (cache *DiskBlockCacheLocal) Put( 734 ctx context.Context, tlfID tlf.ID, blockID kbfsblock.ID, buf []byte, 735 serverHalf kbfscrypto.BlockCryptKeyServerHalf) (err error) { 736 cache.lock.Lock() 737 defer cache.lock.Unlock() 738 err = cache.checkCacheLocked("Block(Put)") 739 if err != nil { 740 return err 741 } 742 743 blockLen := len(buf) 744 entry, err := cache.encodeBlockCacheEntry(buf, serverHalf) 745 if err != nil { 746 return err 747 } 748 encodedLen := int64(len(entry)) 749 defer func() { 750 cache.log.CDebugf(ctx, "Cache Put id=%s tlf=%s bSize=%d entrySize=%d "+ 751 "cacheType=%s err=%+v", blockID, tlfID, blockLen, encodedLen, 752 cache.cacheType, err) 753 }() 754 blockKey := blockID.Bytes() 755 hasKey, err := cache.blockDb.Has(blockKey, nil) 756 if err != nil { 757 cache.log.CDebugf(ctx, "Cache Put failed due to error from "+ 758 "blockDb.Has: %+v", err) 759 return err 760 } 761 if !hasKey { 762 if cache.cacheType == syncCacheLimitTrackerType { 763 bytesAvailable, err := cache.config.DiskLimiter().reserveBytes( 764 ctx, cache.cacheType, encodedLen) 765 if err != nil { 766 cache.log.CWarningf(ctx, "Error obtaining space for the disk "+ 767 "block cache: %+v", err) 768 return err 769 } 770 if bytesAvailable < 0 { 771 return data.CachePutCacheFullError{BlockID: blockID} 772 } 773 } else { 774 hasEnoughSpace, err := cache.evictUntilBytesAvailableLocked( 775 ctx, encodedLen) 776 if err != nil { 777 return err 778 } 779 if !hasEnoughSpace { 780 return data.CachePutCacheFullError{BlockID: blockID} 781 } 782 } 783 err = cache.blockDb.PutWithMeter(blockKey, entry, cache.putMeter) 784 if err != nil { 785 if cache.useLimiter() { 786 cache.config.DiskLimiter().commitOrRollback(ctx, 787 cache.cacheType, encodedLen, 0, false, "") 788 } 789 return err 790 } 791 if cache.useLimiter() { 792 cache.config.DiskLimiter().commitOrRollback(ctx, cache.cacheType, 793 encodedLen, 0, true, "") 794 } 795 cache.tlfCounts[tlfID]++ 796 cache.priorityBlockCounts[cache.homeDirs[tlfID]]++ 797 cache.priorityTlfMap[cache.homeDirs[tlfID]][tlfID]++ 798 cache.numBlocks++ 799 encodedLenUint := uint64(encodedLen) 800 cache.tlfSizes[tlfID] += encodedLenUint 801 cache.addCurrBytes(encodedLenUint) 802 } 803 tlfKey := cache.tlfKey(tlfID, blockKey) 804 hasKey, err = cache.tlfDb.Has(tlfKey, nil) 805 if err != nil { 806 cache.log.CWarningf(ctx, "Error reading from TLF cache database: %+v", 807 err) 808 } 809 if !hasKey { 810 err = cache.tlfDb.Put(tlfKey, []byte{}, nil) 811 if err != nil { 812 cache.log.CWarningf(ctx, 813 "Error writing to TLF cache database: %+v", err) 814 } 815 } 816 md, err := cache.getMetadataLocked(blockID, false) 817 if err != nil { 818 // Only set the relevant fields if we had trouble getting the metadata. 819 // Initially leave TriggeredPrefetch and FinishedPrefetch as false; 820 // rely on the later-called UpdateMetadata to fix it. 821 md.TlfID = tlfID 822 md.BlockSize = uint32(encodedLen) 823 err = nil 824 } 825 return cache.updateMetadataLocked(ctx, blockKey, md, ldbutils.Unmetered) 826 } 827 828 // GetMetadata implements the DiskBlockCache interface for 829 // DiskBlockCacheLocal. 830 func (cache *DiskBlockCacheLocal) GetMetadata(ctx context.Context, 831 blockID kbfsblock.ID) (DiskBlockCacheMetadata, error) { 832 cache.lock.RLock() 833 defer cache.lock.RUnlock() 834 err := cache.checkCacheLocked("Block(GetMetadata)") 835 if err != nil { 836 return DiskBlockCacheMetadata{}, err 837 } 838 return cache.getMetadataLocked(blockID, false) 839 } 840 841 // UpdateMetadata implements the DiskBlockCache interface for 842 // DiskBlockCacheLocal. 843 func (cache *DiskBlockCacheLocal) UpdateMetadata(ctx context.Context, 844 blockID kbfsblock.ID, prefetchStatus PrefetchStatus) (err error) { 845 cache.lock.Lock() 846 defer cache.lock.Unlock() 847 err = cache.checkCacheLocked("Block(UpdateMetadata)") 848 if err != nil { 849 return err 850 } 851 852 md, err := cache.getMetadataLocked(blockID, false) 853 if err != nil { 854 return data.NoSuchBlockError{ID: blockID} 855 } 856 if md.FinishedPrefetch { 857 // Don't update md that's already completed. 858 return nil 859 } 860 md.TriggeredPrefetch = false 861 md.FinishedPrefetch = false 862 switch prefetchStatus { 863 case TriggeredPrefetch: 864 md.TriggeredPrefetch = true 865 case FinishedPrefetch: 866 md.TriggeredPrefetch = true 867 md.FinishedPrefetch = true 868 } 869 return cache.updateMetadataLocked(ctx, blockID.Bytes(), md, ldbutils.Metered) 870 } 871 872 func (cache *DiskBlockCacheLocal) decCacheCountsLocked( 873 tlfID tlf.ID, numBlocks int, totalSize uint64) { 874 if numBlocks <= cache.tlfCounts[tlfID] { 875 cache.tlfCounts[tlfID] -= numBlocks 876 } 877 if numBlocks <= cache.priorityBlockCounts[cache.homeDirs[tlfID]] { 878 cache.priorityBlockCounts[cache.homeDirs[tlfID]] -= numBlocks 879 } 880 if numBlocks <= cache.priorityTlfMap[cache.homeDirs[tlfID]][tlfID] { 881 cache.priorityTlfMap[cache.homeDirs[tlfID]][tlfID] -= numBlocks 882 } 883 if numBlocks <= cache.numBlocks { 884 cache.numBlocks -= numBlocks 885 } 886 if totalSize <= cache.tlfSizes[tlfID] { 887 cache.tlfSizes[tlfID] -= totalSize 888 } 889 cache.subCurrBytes(totalSize) 890 } 891 892 // deleteLocked deletes a set of blocks from the disk block cache. 893 func (cache *DiskBlockCacheLocal) deleteLocked(ctx context.Context, 894 blockEntries []kbfsblock.ID) (numRemoved int, sizeRemoved int64, 895 err error) { 896 if len(blockEntries) == 0 { 897 return 0, 0, nil 898 } 899 defer func() { 900 if err == nil { 901 cache.deleteCountMeter.Mark(int64(numRemoved)) 902 cache.deleteSizeMeter.Mark(sizeRemoved) 903 } 904 }() 905 blockBatch := new(leveldb.Batch) 906 metadataBatch := new(leveldb.Batch) 907 tlfBatch := new(leveldb.Batch) 908 removalCounts := make(map[tlf.ID]int) 909 removalSizes := make(map[tlf.ID]uint64) 910 for _, entry := range blockEntries { 911 blockKey := entry.Bytes() 912 metadataBytes, err := cache.metaDb.Get(blockKey, nil) 913 if err != nil { 914 // If we can't retrieve the block, don't try to delete it, and 915 // don't account for its non-presence. 916 continue 917 } 918 metadata := DiskBlockCacheMetadata{} 919 err = cache.config.Codec().Decode(metadataBytes, &metadata) 920 if err != nil { 921 return 0, 0, err 922 } 923 blockBatch.Delete(blockKey) 924 metadataBatch.Delete(blockKey) 925 tlfDbKey := cache.tlfKey(metadata.TlfID, blockKey) 926 tlfBatch.Delete(tlfDbKey) 927 removalCounts[metadata.TlfID]++ 928 removalSizes[metadata.TlfID] += uint64(metadata.BlockSize) 929 sizeRemoved += int64(metadata.BlockSize) 930 numRemoved++ 931 } 932 // TODO: more gracefully handle non-atomic failures here. 933 if err := cache.metaDb.Write(metadataBatch, nil); err != nil { 934 return 0, 0, err 935 } 936 if err := cache.tlfDb.Write(tlfBatch, nil); err != nil { 937 return 0, 0, err 938 } 939 if err := cache.blockDb.Write(blockBatch, nil); err != nil { 940 return 0, 0, err 941 } 942 943 // Update the cache's totals. 944 for k, v := range removalCounts { 945 cache.decCacheCountsLocked(k, v, removalSizes[k]) 946 } 947 if cache.useLimiter() { 948 cache.config.DiskLimiter().release( 949 ctx, cache.cacheType, sizeRemoved, 0) 950 } 951 952 return numRemoved, sizeRemoved, nil 953 } 954 955 // compactDBs forces the disk cache to compact. It should be called 956 // after bulk deletion events, since level db doesn't run compactions 957 // after those (only after a certain number of lookups occur). 958 func (cache *DiskBlockCacheLocal) compactDBs(ctx context.Context) (err error) { 959 // We need to lock here to make sure the caches don't get shut 960 // down. TODO: make this less restrictive so this cache can still 961 // serve writes during compaction; however, I'm not yet sure if 962 // this is even allowed on the leveldb side or not. 963 cache.lock.RLock() 964 defer cache.lock.RUnlock() 965 err = cache.checkCacheLocked("compactDBs") 966 if err != nil { 967 return err 968 } 969 970 cache.log.CDebugf(ctx, "Compacting DBs for cacheType=%s", cache.cacheType) 971 defer func() { 972 cache.log.CDebugf( 973 ctx, "Done compacting DBs for cacheType=%s: %+v", cache.cacheType, 974 err) 975 }() 976 err = cache.blockDb.CompactRange(util.Range{}) 977 if err != nil { 978 return err 979 } 980 err = cache.tlfDb.CompactRange(util.Range{}) 981 if err != nil { 982 return err 983 } 984 return cache.metaDb.CompactRange(util.Range{}) 985 } 986 987 // compactLoops fires compaction, but only after five minutes of 988 // non-usage has passed. 989 func (cache *DiskBlockCacheLocal) compactLoop() { 990 ctx := context.Background() 991 var timer *time.Timer 992 var timerCh <-chan time.Time 993 for { 994 select { 995 case <-cache.compactCh: 996 // If we explicitly need to compact, start a new timer no 997 // matter what (since a compaction request implies a use 998 // of the cache). 999 if timer != nil { 1000 timer.Stop() 1001 } else { 1002 cache.log.CDebugf(ctx, "Starting initial compaction timer") 1003 } 1004 1005 timer = time.NewTimer(compactTimer) 1006 timerCh = timer.C 1007 case <-cache.useCh: 1008 // If we've just been used, interrupt any timer that's 1009 // already running, but don't start a new one if one isn't 1010 // already running. 1011 if timer != nil { 1012 timer.Stop() 1013 timer = time.NewTimer(compactTimer) 1014 timerCh = timer.C 1015 } 1016 case <-timerCh: 1017 err := cache.compactDBs(ctx) 1018 if err != nil { 1019 cache.log.CDebugf(ctx, "Error compacting DBs: %+v", err) 1020 } 1021 timerCh = nil 1022 timer = nil 1023 case <-cache.shutdownCh: 1024 close(cache.doneCh) 1025 return 1026 } 1027 } 1028 } 1029 1030 func (cache *DiskBlockCacheLocal) doCompact() { 1031 select { 1032 case cache.compactCh <- struct{}{}: 1033 default: 1034 } 1035 } 1036 1037 // Delete implements the DiskBlockCache interface for DiskBlockCacheLocal. 1038 func (cache *DiskBlockCacheLocal) Delete(ctx context.Context, 1039 blockIDs []kbfsblock.ID) (numRemoved int, sizeRemoved int64, err error) { 1040 defer func() { 1041 if err == nil && numRemoved > deleteCompactThreshold { 1042 cache.doCompact() 1043 } 1044 }() 1045 1046 cache.lock.Lock() 1047 defer cache.lock.Unlock() 1048 err = cache.checkCacheLocked("Block(Delete)") 1049 if err != nil { 1050 return 0, 0, err 1051 } 1052 1053 cache.log.CDebugf(ctx, "Cache Delete numBlocks=%d", len(blockIDs)) 1054 defer func() { 1055 cache.log.CDebugf(ctx, "Deleted numRequested=%d numRemoved=%d sizeRemoved=%d err=%+v", len(blockIDs), numRemoved, sizeRemoved, err) 1056 }() 1057 if cache.config.IsTestMode() { 1058 for _, bID := range blockIDs { 1059 cache.log.CDebugf(ctx, "Cache type=%d delete block ID %s", 1060 cache.cacheType, bID) 1061 } 1062 } 1063 return cache.deleteLocked(ctx, blockIDs) 1064 } 1065 1066 // getRandomBlockID gives us a pivot block ID for picking a random range of 1067 // blocks to consider deleting. We pick a point to start our range based on 1068 // the proportion of the TLF space taken up by numElements/totalElements. E.g. 1069 // if we need to consider 100 out of 400 blocks, and we assume that the block 1070 // IDs are uniformly distributed, then our random start point should be in the 1071 // [0,0.75) interval on the [0,1.0) block ID space. 1072 func (cache *DiskBlockCacheLocal) getRandomBlockID(numElements, 1073 totalElements int) (kbfsblock.ID, error) { 1074 if totalElements == 0 { 1075 return kbfsblock.ID{}, errors.New("") 1076 } 1077 // Return a 0 block ID pivot if we require more elements than the total 1078 // number available. 1079 if numElements >= totalElements { 1080 return kbfsblock.ID{}, nil 1081 } 1082 // Generate a random block ID to start the range. 1083 pivot := 1.0 - (float64(numElements) / float64(totalElements)) 1084 if cache.config.IsTestMode() { 1085 return kbfsblock.MakeRandomIDInRange(0, pivot, 1086 kbfsblock.UseMathRandForTest) 1087 } 1088 return kbfsblock.MakeRandomIDInRange(0, pivot, kbfsblock.UseRealRandomness) 1089 } 1090 1091 // evictSomeBlocks tries to evict `numBlocks` blocks from the cache. If 1092 // `blockIDs` doesn't have enough blocks, we evict them all and report how many 1093 // we evicted. 1094 func (cache *DiskBlockCacheLocal) evictSomeBlocks(ctx context.Context, 1095 numBlocks int, blockIDs blockIDsByTime) (numRemoved int, sizeRemoved int64, 1096 err error) { 1097 defer func() { 1098 cache.log.CDebugf(ctx, "Cache evictSomeBlocks numBlocksRequested=%d "+ 1099 "numBlocksEvicted=%d sizeBlocksEvicted=%d err=%+v", numBlocks, 1100 numRemoved, sizeRemoved, err) 1101 }() 1102 if len(blockIDs) <= numBlocks { 1103 numBlocks = len(blockIDs) 1104 } else { 1105 // Only sort if we need to grab a subset of blocks. 1106 sort.Sort(blockIDs) 1107 } 1108 1109 blocksToDelete := blockIDs.ToBlockIDSlice(numBlocks) 1110 return cache.deleteLocked(ctx, blocksToDelete) 1111 } 1112 1113 func (cache *DiskBlockCacheLocal) removeBrokenBlock( 1114 ctx context.Context, tlfID tlf.ID, blockID kbfsblock.ID) int64 { 1115 cache.log.CDebugf(ctx, "Removing broken block %s from the cache", blockID) 1116 blockKey := blockID.Bytes() 1117 entry, err := cache.blockDb.Get(blockKey, nil) 1118 if err != nil { 1119 cache.log.CDebugf(ctx, "Couldn't get %s: %+v", blockID, err) 1120 return 0 1121 } 1122 1123 err = cache.blockDb.Delete(blockKey, nil) 1124 if err != nil { 1125 cache.log.CDebugf(ctx, "Couldn't delete %s from block cache: %+v", 1126 blockID, err) 1127 return 0 1128 } 1129 1130 tlfKey := cache.tlfKey(tlfID, blockKey) 1131 err = cache.tlfDb.Delete(tlfKey, nil) 1132 if err != nil { 1133 cache.log.CWarningf(ctx, 1134 "Couldn't delete from TLF cache database: %+v", err) 1135 } 1136 1137 size := int64(len(entry)) 1138 // It's tough to know whether the block actually made it into the 1139 // block stats or not. If the block was added during this run of 1140 // KBFS, it will be in there; if it was loaded from disk, it 1141 // probably won't be in there, since the stats are loaded by 1142 // iterating over the metadata db. So it's very possible that 1143 // this will make the stats incorrect. ‾\_(ツ)_/‾. 1144 cache.decCacheCountsLocked(tlfID, 1, uint64(size)) 1145 if cache.useLimiter() { 1146 cache.config.DiskLimiter().release(ctx, cache.cacheType, size, 0) 1147 } 1148 1149 // Attempt to clean up corrupted metadata, if any. 1150 _ = cache.metaDb.Delete(blockKey, nil) 1151 return size 1152 } 1153 1154 // evictFromTLFLocked evicts a number of blocks from the cache for a given TLF. 1155 // We choose a pivot variable b randomly. Then begin an iterator into 1156 // cache.tlfDb.Range(tlfID + b, tlfID + MaxBlockID) and iterate from there to 1157 // get numBlocks * evictionConsiderationFactor block IDs. We sort the 1158 // resulting blocks by value (LRU time) and pick the minimum numBlocks. We then 1159 // call cache.Delete() on that list of block IDs. 1160 func (cache *DiskBlockCacheLocal) evictFromTLFLocked(ctx context.Context, 1161 tlfID tlf.ID, numBlocks int) (numRemoved int, sizeRemoved int64, err error) { 1162 tlfBytes := tlfID.Bytes() 1163 numElements := numBlocks * evictionConsiderationFactor 1164 blockID, err := cache.getRandomBlockID(numElements, cache.tlfCounts[tlfID]) 1165 if err != nil { 1166 return 0, 0, err 1167 } 1168 rng := &util.Range{ 1169 Start: append(tlfBytes, blockID.Bytes()...), 1170 Limit: append(tlfBytes, cache.maxBlockID...), 1171 } 1172 iter := cache.tlfDb.NewIterator(rng, nil) 1173 defer iter.Release() 1174 1175 blockIDs := make(blockIDsByTime, 0, numElements) 1176 var brokenIDs []kbfsblock.ID 1177 1178 for i := 0; i < numElements; i++ { 1179 if !iter.Next() { 1180 break 1181 } 1182 key := iter.Key() 1183 1184 blockIDBytes := key[len(tlfBytes):] 1185 blockID, err := kbfsblock.IDFromBytes(blockIDBytes) 1186 if err != nil { 1187 cache.log.CWarningf( 1188 ctx, "Error decoding block ID %x: %+v", blockIDBytes, err) 1189 brokenIDs = append(brokenIDs, blockID) 1190 continue 1191 } 1192 lru, err := cache.getLRULocked(blockID) 1193 if err != nil { 1194 cache.log.CWarningf( 1195 ctx, "Error decoding LRU time for block %s: %+v", blockID, err) 1196 brokenIDs = append(brokenIDs, blockID) 1197 continue 1198 } 1199 blockIDs = append(blockIDs, lruEntry{blockID, lru}) 1200 } 1201 1202 numRemoved, sizeRemoved, err = cache.evictSomeBlocks( 1203 ctx, numBlocks, blockIDs) 1204 if err != nil { 1205 return 0, 0, err 1206 } 1207 1208 for _, id := range brokenIDs { 1209 // Assume that a block that is in `tlfDB`, but for which the 1210 // metadata is missing or busted, 1211 size := cache.removeBrokenBlock(ctx, tlfID, id) 1212 if size > 0 { 1213 numRemoved++ 1214 sizeRemoved += size 1215 } 1216 } 1217 return numRemoved, sizeRemoved, nil 1218 } 1219 1220 // weightedByCount is used to shuffle TLF IDs, weighting by per-TLF block count. 1221 type weightedByCount struct { 1222 key float64 1223 value tlf.ID 1224 } 1225 1226 // shuffleTLFsAtPriorityWeighted shuffles the TLFs at a given priority, 1227 // weighting by per-TLF block count. 1228 func (cache *DiskBlockCacheLocal) shuffleTLFsAtPriorityWeighted( 1229 priority evictionPriority) []weightedByCount { 1230 weightedSlice := make([]weightedByCount, 0, 1231 len(cache.priorityTlfMap[priority])) 1232 idx := 0 1233 // Use an exponential distribution to ensure the weights are 1234 // correctly used. 1235 // See http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf 1236 for tlfID, count := range cache.priorityTlfMap[priority] { 1237 if count == 0 { 1238 continue 1239 } 1240 weightedSlice = append(weightedSlice, weightedByCount{ 1241 key: math.Pow(rand.Float64(), 1.0/float64(count)), 1242 value: tlfID, 1243 }) 1244 idx++ 1245 } 1246 sort.Slice(weightedSlice, func(i, j int) bool { 1247 return weightedSlice[i].key > weightedSlice[j].key 1248 }) 1249 return weightedSlice 1250 } 1251 1252 // evictLocked evicts a number of blocks from the cache. We search the lowest 1253 // eviction priority level for blocks to evict first, then the next highest 1254 // priority and so on until enough blocks have been evicted. Within each 1255 // priority, we first shuffle the TLFs, weighting by how many blocks they 1256 // contain, and then we take the top TLFs from that shuffle and evict the 1257 // least recently used blocks from them. 1258 func (cache *DiskBlockCacheLocal) evictLocked(ctx context.Context, 1259 numBlocks int) (numRemoved int, sizeRemoved int64, err error) { 1260 numRemoved = 0 1261 sizeRemoved = 0 1262 defer func() { 1263 cache.evictCountMeter.Mark(int64(numRemoved)) 1264 cache.evictSizeMeter.Mark(sizeRemoved) 1265 }() 1266 for priorityToEvict := priorityNotHome; (priorityToEvict <= priorityPrivateHome) && (numRemoved < numBlocks); priorityToEvict++ { 1267 // Shuffle the TLFs of this priority, weighting by block count. 1268 shuffledSlice := cache.shuffleTLFsAtPriorityWeighted(priorityToEvict) 1269 // Select some TLFs to evict from. 1270 numElements := (numBlocks - numRemoved) * evictionConsiderationFactor 1271 1272 // blockIDs is a slice of blocks from which evictions will be selected. 1273 blockIDs := make(blockIDsByTime, 0, numElements) 1274 1275 // For each TLF until we get enough elements to select among, 1276 // add its blocks to the eviction slice. 1277 for _, tlfIDStruct := range shuffledSlice { 1278 tlfID := tlfIDStruct.value 1279 if cache.tlfCounts[tlfID] == 0 { 1280 cache.log.CDebugf(ctx, "No blocks to delete in TLF %s", tlfID) 1281 continue 1282 } 1283 tlfBytes := tlfID.Bytes() 1284 1285 blockID, err := cache.getRandomBlockID(numElements, 1286 cache.tlfCounts[tlfID]) 1287 if err != nil { 1288 return 0, 0, err 1289 } 1290 rng := &util.Range{ 1291 Start: append(tlfBytes, blockID.Bytes()...), 1292 Limit: append(tlfBytes, cache.maxBlockID...), 1293 } 1294 1295 // Extra func exists to make defers work. 1296 func() { 1297 iter := cache.tlfDb.NewIterator(rng, nil) 1298 defer iter.Release() 1299 1300 var brokenIDs []kbfsblock.ID 1301 for i := 0; i < numElements; i++ { 1302 if !iter.Next() { 1303 break 1304 } 1305 key := iter.Key() 1306 1307 blockIDBytes := key[len(tlfBytes):] 1308 blockID, err := kbfsblock.IDFromBytes(blockIDBytes) 1309 if err != nil { 1310 cache.log.CWarningf( 1311 ctx, "Error decoding block ID %x", blockIDBytes) 1312 brokenIDs = append(brokenIDs, blockID) 1313 continue 1314 } 1315 lru, err := cache.getLRULocked(blockID) 1316 if err != nil { 1317 cache.log.CWarningf( 1318 ctx, "Error decoding LRU time for block %s", 1319 blockID) 1320 brokenIDs = append(brokenIDs, blockID) 1321 continue 1322 } 1323 blockIDs = append(blockIDs, lruEntry{blockID, lru}) 1324 } 1325 1326 for _, id := range brokenIDs { 1327 // Assume that a block that is in `tlfDB`, but for which the 1328 // metadata is missing or busted, 1329 size := cache.removeBrokenBlock(ctx, tlfID, id) 1330 if size > 0 { 1331 numRemoved++ 1332 sizeRemoved += size 1333 } 1334 } 1335 }() 1336 if len(blockIDs) == numElements { 1337 break 1338 } 1339 } 1340 // Evict some of the selected blocks. 1341 currNumRemoved, currSizeRemoved, err := cache.evictSomeBlocks(ctx, 1342 numBlocks-numRemoved, blockIDs) 1343 if err != nil { 1344 return numRemoved, sizeRemoved, err 1345 } 1346 // Update the evicted count. 1347 numRemoved += currNumRemoved 1348 sizeRemoved += currSizeRemoved 1349 } 1350 1351 return numRemoved, sizeRemoved, nil 1352 } 1353 1354 func (cache *DiskBlockCacheLocal) deleteNextBatchFromClearedTlf( 1355 ctx context.Context, tlfID tlf.ID) (numLeft int, err error) { 1356 cache.lock.Lock() 1357 defer cache.lock.Unlock() 1358 err = cache.checkCacheLocked("Block(deleteNextBatchFromClearedTlf)") 1359 if err != nil { 1360 return 0, err 1361 } 1362 1363 select { 1364 case <-ctx.Done(): 1365 return 0, ctx.Err() 1366 default: 1367 } 1368 1369 _, _, err = cache.evictFromTLFLocked( 1370 ctx, tlfID, cache.numBlocksToEvictOnClear) 1371 if err != nil { 1372 return 0, err 1373 } 1374 return cache.tlfCounts[tlfID], nil 1375 } 1376 1377 // ClearAllTlfBlocks implements the DiskBlockCache interface for 1378 // DiskBlockCacheLocal. 1379 func (cache *DiskBlockCacheLocal) ClearAllTlfBlocks( 1380 ctx context.Context, tlfID tlf.ID) (err error) { 1381 defer func() { 1382 cache.log.CDebugf(ctx, 1383 "Finished clearing blocks from %s: %+v", tlfID, err) 1384 if err == nil { 1385 cache.doCompact() 1386 } 1387 }() 1388 1389 // Delete the blocks in batches, so we don't keep the lock for too 1390 // long. 1391 for { 1392 cache.log.CDebugf(ctx, "Deleting a batch of blocks from %s", tlfID) 1393 numLeft, err := cache.deleteNextBatchFromClearedTlf(ctx, tlfID) 1394 if err != nil { 1395 return err 1396 } 1397 if numLeft == 0 { 1398 cache.log.CDebugf(ctx, "Deleted all blocks from %s", tlfID) 1399 return nil 1400 } 1401 cache.log.CDebugf( 1402 ctx, "%d blocks left to delete from %s", numLeft, tlfID) 1403 1404 c := time.After(cache.clearTickerDuration) 1405 select { 1406 case <-c: 1407 case <-ctx.Done(): 1408 return ctx.Err() 1409 } 1410 } 1411 } 1412 1413 // GetLastUnrefRev implements the DiskBlockCache interface for 1414 // DiskBlockCacheLocal. 1415 func (cache *DiskBlockCacheLocal) GetLastUnrefRev( 1416 ctx context.Context, tlfID tlf.ID) (kbfsmd.Revision, error) { 1417 cache.lock.RLock() 1418 defer cache.lock.RUnlock() 1419 err := cache.checkCacheLocked("Block(GetLastUnrefRev)") 1420 if err != nil { 1421 return kbfsmd.RevisionUninitialized, err 1422 } 1423 1424 rev, ok := cache.tlfLastUnrefs[tlfID] 1425 if !ok { 1426 // No known unref'd revision. 1427 return kbfsmd.RevisionUninitialized, nil 1428 } 1429 return rev, nil 1430 } 1431 1432 // PutLastUnrefRev implements the DiskBlockCache interface for 1433 // DiskBlockCacheLocal. 1434 func (cache *DiskBlockCacheLocal) PutLastUnrefRev( 1435 ctx context.Context, tlfID tlf.ID, rev kbfsmd.Revision) error { 1436 cache.lock.Lock() 1437 defer cache.lock.Unlock() 1438 err := cache.checkCacheLocked("Block(PutLastUnrefRev)") 1439 if err != nil { 1440 return err 1441 } 1442 1443 if currRev, ok := cache.tlfLastUnrefs[tlfID]; ok { 1444 if rev <= currRev { 1445 // A later revision has already been unref'd, so ignore this. 1446 return nil 1447 } 1448 } 1449 1450 buf, err := cache.encodeLastUnref(rev) 1451 if err != nil { 1452 return err 1453 } 1454 err = cache.lastUnrefDb.Put(tlfID.Bytes(), buf, nil) 1455 if err != nil { 1456 return err 1457 } 1458 cache.tlfLastUnrefs[tlfID] = rev 1459 return nil 1460 } 1461 1462 // Status implements the DiskBlockCache interface for DiskBlockCacheLocal. 1463 func (cache *DiskBlockCacheLocal) Status( 1464 ctx context.Context) map[string]DiskBlockCacheStatus { 1465 var name string 1466 var maxLimit uint64 1467 limiterStatus := cache.config.DiskLimiter().getStatus( 1468 ctx, keybase1.UserOrTeamID("")).(backpressureDiskLimiterStatus) 1469 switch cache.cacheType { 1470 case syncCacheLimitTrackerType: 1471 name = syncCacheName 1472 maxLimit = uint64(limiterStatus.SyncCacheByteStatus.Max) 1473 case workingSetCacheLimitTrackerType: 1474 name = workingSetCacheName 1475 maxLimit = uint64(limiterStatus.DiskCacheByteStatus.Max) 1476 case crDirtyBlockCacheLimitTrackerType: 1477 name = crDirtyBlockCacheName 1478 } 1479 select { 1480 case <-cache.startedCh: 1481 case <-cache.startErrCh: 1482 return map[string]DiskBlockCacheStatus{name: {StartState: DiskBlockCacheStartStateFailed}} 1483 default: 1484 return map[string]DiskBlockCacheStatus{name: {StartState: DiskBlockCacheStartStateStarting}} 1485 } 1486 availableBytes, totalBytes := uint64(math.MaxInt64), uint64(math.MaxInt64) 1487 if cache.dirPath != "" { 1488 var err error 1489 availableBytes, totalBytes, _, _, err = getDiskLimits(cache.dirPath) 1490 if err != nil { 1491 cache.log.CDebugf(ctx, "Couldn't get disk stats: %+v", err) 1492 } 1493 } 1494 1495 cache.lock.RLock() 1496 defer cache.lock.RUnlock() 1497 1498 var blockStats, metaStats, tlfStats, lastUnrefStats []string 1499 var memCompActive, tableCompActive bool 1500 var metaMemCompActive, metaTableCompActive bool 1501 if err := cache.checkCacheLocked("Block(Status)"); err == nil { 1502 blockStats, err = cache.blockDb.StatStrings() 1503 if err != nil { 1504 cache.log.CDebugf(ctx, "Couldn't get block db stats: %+v", err) 1505 } 1506 metaStats, err = cache.metaDb.StatStrings() 1507 if err != nil { 1508 cache.log.CDebugf(ctx, "Couldn't get meta db stats: %+v", err) 1509 } 1510 tlfStats, err = cache.tlfDb.StatStrings() 1511 if err != nil { 1512 cache.log.CDebugf(ctx, "Couldn't get TLF db stats: %+v", err) 1513 } 1514 lastUnrefStats, err = cache.lastUnrefDb.StatStrings() 1515 if err != nil { 1516 cache.log.CDebugf(ctx, "Couldn't get last unref db stats: %+v", err) 1517 } 1518 var dbStats leveldb.DBStats 1519 err = cache.blockDb.Stats(&dbStats) 1520 if err != nil { 1521 cache.log.CDebugf( 1522 ctx, "Couldn't get block db compaction stats: %+v", err) 1523 } 1524 memCompActive, tableCompActive = 1525 dbStats.MemCompactionActive, dbStats.TableCompactionActive 1526 err = cache.metaDb.Stats(&dbStats) 1527 if err != nil { 1528 cache.log.CDebugf( 1529 ctx, "Couldn't get meta db compaction stats: %+v", err) 1530 } 1531 metaMemCompActive, metaTableCompActive = 1532 dbStats.MemCompactionActive, dbStats.TableCompactionActive 1533 } 1534 1535 // The disk cache status doesn't depend on the chargedTo ID, and 1536 // we don't have easy access to the UID here, so pass in a dummy. 1537 return map[string]DiskBlockCacheStatus{ 1538 name: { 1539 StartState: DiskBlockCacheStartStateStarted, 1540 NumBlocks: uint64(cache.numBlocks), 1541 BlockBytes: cache.getCurrBytes(), 1542 CurrByteLimit: maxLimit, 1543 LastUnrefCount: uint64(len(cache.tlfLastUnrefs)), 1544 Hits: ldbutils.RateMeterToStatus(cache.hitMeter), 1545 Misses: ldbutils.RateMeterToStatus(cache.missMeter), 1546 Puts: ldbutils.RateMeterToStatus(cache.putMeter), 1547 MetadataUpdates: ldbutils.RateMeterToStatus(cache.updateMeter), 1548 NumEvicted: ldbutils.RateMeterToStatus(cache.evictCountMeter), 1549 SizeEvicted: ldbutils.RateMeterToStatus(cache.evictSizeMeter), 1550 NumDeleted: ldbutils.RateMeterToStatus(cache.deleteCountMeter), 1551 SizeDeleted: ldbutils.RateMeterToStatus(cache.deleteSizeMeter), 1552 LocalDiskBytesAvailable: availableBytes, 1553 LocalDiskBytesTotal: totalBytes, 1554 BlockDBStats: blockStats, 1555 MetaDBStats: metaStats, 1556 MemCompActive: memCompActive, 1557 TableCompActive: tableCompActive, 1558 MetaMemCompActive: metaMemCompActive, 1559 MetaTableCompActive: metaTableCompActive, 1560 TLFDBStats: tlfStats, 1561 LastUnrefStats: lastUnrefStats, 1562 }, 1563 } 1564 } 1565 1566 // DoesCacheHaveSpace returns true if we have more than 1% of space 1567 // left in the cache. 1568 func (cache *DiskBlockCacheLocal) DoesCacheHaveSpace( 1569 ctx context.Context) (hasSpace bool, howMuch int64, err error) { 1570 limiterStatus := cache.config.DiskLimiter().getStatus( 1571 ctx, keybase1.UserOrTeamID("")).(backpressureDiskLimiterStatus) 1572 switch cache.cacheType { 1573 case syncCacheLimitTrackerType: 1574 // The tracker doesn't track sync cache usage because we never 1575 // want to throttle it, so rely on our local byte usage count 1576 // instead of the fraction returned by the tracker. 1577 limit := float64(limiterStatus.SyncCacheByteStatus.Max) 1578 return float64(cache.getCurrBytes())/limit <= .99, 1579 limiterStatus.DiskCacheByteStatus.Free, nil 1580 case workingSetCacheLimitTrackerType: 1581 return limiterStatus.DiskCacheByteStatus.UsedFrac <= .99, 1582 limiterStatus.DiskCacheByteStatus.Free, nil 1583 case crDirtyBlockCacheLimitTrackerType: 1584 return true, 0, nil 1585 default: 1586 panic(fmt.Sprintf("Unknown cache type: %d", cache.cacheType)) 1587 } 1588 } 1589 1590 // Mark updates the metadata of the given block with the tag. 1591 func (cache *DiskBlockCacheLocal) Mark( 1592 ctx context.Context, blockID kbfsblock.ID, tag string) error { 1593 cache.lock.Lock() 1594 defer cache.lock.Unlock() 1595 err := cache.checkCacheLocked("Block(UpdateMetadata)") 1596 if err != nil { 1597 return err 1598 } 1599 1600 md, err := cache.getMetadataLocked(blockID, false) 1601 if err != nil { 1602 return data.NoSuchBlockError{ID: blockID} 1603 } 1604 md.Tag = tag 1605 return cache.updateMetadataLocked(ctx, blockID.Bytes(), md, false) 1606 } 1607 1608 func (cache *DiskBlockCacheLocal) deleteNextUnmarkedBatchFromTlf( 1609 ctx context.Context, tlfID tlf.ID, tag string, startingKey []byte) ( 1610 nextKey []byte, err error) { 1611 cache.lock.Lock() 1612 defer cache.lock.Unlock() 1613 err = cache.checkCacheLocked("Block(deleteNextUnmarkedBatchFromTlf)") 1614 if err != nil { 1615 return nil, err 1616 } 1617 1618 select { 1619 case <-ctx.Done(): 1620 return nil, ctx.Err() 1621 default: 1622 } 1623 1624 tlfBytes := tlfID.Bytes() 1625 rng := &util.Range{ 1626 Start: startingKey, 1627 Limit: append(tlfBytes, cache.maxBlockID...), 1628 } 1629 iter := cache.tlfDb.NewIterator(rng, nil) 1630 defer iter.Release() 1631 1632 blockIDs := make([]kbfsblock.ID, 0, cache.numUnmarkedBlocksToCheck) 1633 for i := 0; i < cache.numUnmarkedBlocksToCheck; i++ { 1634 if !iter.Next() { 1635 break 1636 } 1637 key := iter.Key() 1638 1639 blockIDBytes := key[len(tlfBytes):] 1640 blockID, err := kbfsblock.IDFromBytes(blockIDBytes) 1641 if err != nil { 1642 cache.log.CWarningf(ctx, "Error decoding block ID %x", blockIDBytes) 1643 continue 1644 } 1645 md, err := cache.getMetadataLocked(blockID, false) 1646 if err != nil { 1647 cache.log.CWarningf( 1648 ctx, "No metadata for %s while checking mark", blockID) 1649 continue 1650 } 1651 if md.Tag != tag { 1652 blockIDs = append(blockIDs, blockID) 1653 } 1654 } 1655 1656 if iter.Next() { 1657 nextKey = iter.Key() 1658 } 1659 1660 if len(blockIDs) > 0 { 1661 cache.log.CDebugf(ctx, "Deleting %d unmarked blocks (tag=%s) from %s", 1662 len(blockIDs), tag, tlfID) 1663 _, _, err = cache.deleteLocked(ctx, blockIDs) 1664 if err != nil { 1665 return nil, err 1666 } 1667 } 1668 return nextKey, nil 1669 } 1670 1671 // DeleteUnmarked deletes all the blocks without the given tag. 1672 func (cache *DiskBlockCacheLocal) DeleteUnmarked( 1673 ctx context.Context, tlfID tlf.ID, tag string) (err error) { 1674 defer func() { 1675 cache.log.CDebugf(ctx, 1676 "Finished deleting unmarked blocks (tag=%s) from %s: %+v", 1677 tag, tlfID, err) 1678 if err == nil { 1679 cache.doCompact() 1680 } 1681 }() 1682 1683 // Delete the blocks in batches, so we don't keep the lock for too 1684 // long. 1685 startingKey := cache.tlfKey(tlfID, nil) 1686 for { 1687 cache.log.CDebugf( 1688 ctx, "Deleting a batch of unmarked blocks (tag=%s) from %s", 1689 tag, tlfID) 1690 startingKey, err = cache.deleteNextUnmarkedBatchFromTlf( 1691 ctx, tlfID, tag, startingKey) 1692 if err != nil { 1693 return err 1694 } 1695 if startingKey == nil { 1696 return nil 1697 } 1698 1699 c := time.After(cache.clearTickerDuration) 1700 select { 1701 case <-c: 1702 case <-ctx.Done(): 1703 return ctx.Err() 1704 } 1705 } 1706 } 1707 1708 // AddHomeTLF implements this DiskBlockCache interace for DiskBlockCacheLocal. 1709 func (cache *DiskBlockCacheLocal) AddHomeTLF(ctx context.Context, tlfID tlf.ID) error { 1710 cache.lock.Lock() 1711 defer cache.lock.Unlock() 1712 cache.priorityBlockCounts[cache.homeDirs[tlfID]] -= cache.tlfCounts[tlfID] 1713 cache.priorityTlfMap[cache.homeDirs[tlfID]][tlfID] -= cache.tlfCounts[tlfID] 1714 1715 switch tlfID.Type() { 1716 case tlf.Private: 1717 cache.homeDirs[tlfID] = priorityPrivateHome 1718 case tlf.Public: 1719 cache.homeDirs[tlfID] = priorityPublicHome 1720 default: 1721 return errTeamOrUnknownTLFAddedAsHome 1722 } 1723 cache.priorityBlockCounts[cache.homeDirs[tlfID]] += cache.tlfCounts[tlfID] 1724 cache.priorityTlfMap[cache.homeDirs[tlfID]][tlfID] += cache.tlfCounts[tlfID] 1725 1726 return nil 1727 } 1728 1729 // ClearHomeTLFs implements this DiskBlockCache interace for 1730 // DiskBlockCacheLocal. 1731 func (cache *DiskBlockCacheLocal) ClearHomeTLFs(ctx context.Context) error { 1732 cache.lock.Lock() 1733 defer cache.lock.Unlock() 1734 for tlfID, priority := range cache.homeDirs { 1735 cache.priorityBlockCounts[priority] -= cache.tlfCounts[tlfID] 1736 cache.priorityTlfMap[priority][tlfID] -= cache.tlfCounts[tlfID] 1737 cache.priorityBlockCounts[priorityNotHome] += cache.tlfCounts[tlfID] 1738 cache.priorityTlfMap[priorityNotHome][tlfID] += cache.tlfCounts[tlfID] 1739 } 1740 cache.homeDirs = make(map[tlf.ID]evictionPriority) 1741 return nil 1742 } 1743 1744 // GetTlfSize returns the number of bytes stored for the given TLF in 1745 // the cache. 1746 func (cache *DiskBlockCacheLocal) GetTlfSize( 1747 _ context.Context, tlfID tlf.ID) (uint64, error) { 1748 cache.lock.RLock() 1749 defer cache.lock.RUnlock() 1750 return cache.tlfSizes[tlfID], nil 1751 } 1752 1753 // GetTlfIDs returns the IDs of all the TLFs with blocks stored in 1754 // the cache. 1755 func (cache *DiskBlockCacheLocal) GetTlfIDs( 1756 _ context.Context) (tlfIDs []tlf.ID, err error) { 1757 cache.lock.RLock() 1758 defer cache.lock.RUnlock() 1759 tlfIDs = make([]tlf.ID, 0, len(cache.tlfSizes)) 1760 for id := range cache.tlfSizes { 1761 tlfIDs = append(tlfIDs, id) 1762 } 1763 return tlfIDs, nil 1764 } 1765 1766 // Shutdown implements the DiskBlockCache interface for DiskBlockCacheLocal. 1767 func (cache *DiskBlockCacheLocal) Shutdown(ctx context.Context) <-chan struct{} { 1768 // Wait for the cache to either finish starting or error. 1769 select { 1770 case <-cache.startedCh: 1771 case <-cache.startErrCh: 1772 close(cache.doneCh) 1773 return cache.doneCh 1774 } 1775 cache.lock.Lock() 1776 defer cache.lock.Unlock() 1777 // shutdownCh has to be checked under lock, otherwise we can race. 1778 select { 1779 case <-cache.shutdownCh: 1780 cache.log.CWarningf(ctx, "Shutdown called more than once") 1781 return cache.doneCh 1782 default: 1783 } 1784 close(cache.shutdownCh) 1785 if cache.blockDb == nil { 1786 return cache.doneCh 1787 } 1788 cache.closer() 1789 cache.blockDb = nil 1790 cache.metaDb = nil 1791 cache.tlfDb = nil 1792 if cache.useLimiter() { 1793 cache.config.DiskLimiter().onSimpleByteTrackerDisable(ctx, 1794 cache.cacheType, int64(cache.getCurrBytes())) 1795 } 1796 cache.hitMeter.Shutdown() 1797 cache.missMeter.Shutdown() 1798 cache.putMeter.Shutdown() 1799 cache.updateMeter.Shutdown() 1800 cache.evictCountMeter.Shutdown() 1801 cache.evictSizeMeter.Shutdown() 1802 cache.deleteCountMeter.Shutdown() 1803 cache.deleteSizeMeter.Shutdown() 1804 return cache.doneCh 1805 }