github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/cache/clockpro.go (about) 1 // Copyright 2018. All rights reserved. Use of this source code is governed by 2 // an MIT-style license that can be found in the LICENSE file. 3 4 // Package cache implements the CLOCK-Pro caching algorithm. 5 // 6 // CLOCK-Pro is a patent-free alternative to the Adaptive Replacement Cache, 7 // https://en.wikipedia.org/wiki/Adaptive_replacement_cache. 8 // It is an approximation of LIRS ( https://en.wikipedia.org/wiki/LIRS_caching_algorithm ), 9 // much like the CLOCK page replacement algorithm is an approximation of LRU. 10 // 11 // This implementation is based on the python code from https://bitbucket.org/SamiLehtinen/pyclockpro . 12 // 13 // Slides describing the algorithm: http://fr.slideshare.net/huliang64/clockpro 14 // 15 // The original paper: http://static.usenix.org/event/usenix05/tech/general/full_papers/jiang/jiang_html/html.html 16 // 17 // It is MIT licensed, like the original. 18 package cache // import "github.com/cockroachdb/pebble/internal/cache" 19 20 import ( 21 "fmt" 22 "os" 23 "runtime" 24 "runtime/debug" 25 "strings" 26 "sync" 27 "sync/atomic" 28 29 "github.com/cockroachdb/pebble/internal/base" 30 "github.com/cockroachdb/pebble/internal/invariants" 31 ) 32 33 type fileKey struct { 34 // id is the namespace for fileNums. 35 id uint64 36 fileNum base.DiskFileNum 37 } 38 39 type key struct { 40 fileKey 41 offset uint64 42 } 43 44 // file returns the "file key" for the receiver. This is the key used for the 45 // shard.files map. 46 func (k key) file() key { 47 k.offset = 0 48 return k 49 } 50 51 func (k key) String() string { 52 return fmt.Sprintf("%d/%d/%d", k.id, k.fileNum, k.offset) 53 } 54 55 // Handle provides a strong reference to a value in the cache. The reference 56 // does not pin the value in the cache, but it does prevent the underlying byte 57 // slice from being reused. 58 type Handle struct { 59 value *Value 60 } 61 62 // Get returns the value stored in handle. 63 func (h Handle) Get() []byte { 64 if h.value != nil { 65 // NB: We don't increment shard.hits in this code path because we only want 66 // to record a hit when the handle is retrieved from the cache. 67 return h.value.buf 68 } 69 return nil 70 } 71 72 // Release releases the reference to the cache entry. 73 func (h Handle) Release() { 74 h.value.release() 75 } 76 77 type shard struct { 78 hits atomic.Int64 79 misses atomic.Int64 80 81 mu sync.RWMutex 82 83 reservedSize int64 84 maxSize int64 85 coldTarget int64 86 blocks robinHoodMap // fileNum+offset -> block 87 files robinHoodMap // fileNum -> list of blocks 88 89 // The blocks and files maps store values in manually managed memory that is 90 // invisible to the Go GC. This is fine for Value and entry objects that are 91 // stored in manually managed memory, but when the "invariants" build tag is 92 // set, all Value and entry objects are Go allocated and the entries map will 93 // contain a reference to every entry. 94 entries map[*entry]struct{} 95 96 handHot *entry 97 handCold *entry 98 handTest *entry 99 100 sizeHot int64 101 sizeCold int64 102 sizeTest int64 103 104 // The count fields are used exclusively for asserting expectations. 105 // We've seen infinite looping (cockroachdb/cockroach#70154) that 106 // could be explained by a corrupted sizeCold. Through asserting on 107 // these fields, we hope to gain more insight from any future 108 // reproductions. 109 countHot int64 110 countCold int64 111 countTest int64 112 } 113 114 func (c *shard) Get(id uint64, fileNum base.DiskFileNum, offset uint64) Handle { 115 c.mu.RLock() 116 var value *Value 117 if e := c.blocks.Get(key{fileKey{id, fileNum}, offset}); e != nil { 118 value = e.acquireValue() 119 if value != nil { 120 e.referenced.Store(true) 121 } 122 } 123 c.mu.RUnlock() 124 if value == nil { 125 c.misses.Add(1) 126 return Handle{} 127 } 128 c.hits.Add(1) 129 return Handle{value: value} 130 } 131 132 func (c *shard) Set(id uint64, fileNum base.DiskFileNum, offset uint64, value *Value) Handle { 133 if n := value.refs(); n != 1 { 134 panic(fmt.Sprintf("pebble: Value has already been added to the cache: refs=%d", n)) 135 } 136 137 c.mu.Lock() 138 defer c.mu.Unlock() 139 140 k := key{fileKey{id, fileNum}, offset} 141 e := c.blocks.Get(k) 142 143 switch { 144 case e == nil: 145 // no cache entry? add it 146 e = newEntry(c, k, int64(len(value.buf))) 147 e.setValue(value) 148 if c.metaAdd(k, e) { 149 value.ref.trace("add-cold") 150 c.sizeCold += e.size 151 c.countCold++ 152 } else { 153 value.ref.trace("skip-cold") 154 e.free() 155 e = nil 156 } 157 158 case e.peekValue() != nil: 159 // cache entry was a hot or cold page 160 e.setValue(value) 161 e.referenced.Store(true) 162 delta := int64(len(value.buf)) - e.size 163 e.size = int64(len(value.buf)) 164 if e.ptype == etHot { 165 value.ref.trace("add-hot") 166 c.sizeHot += delta 167 } else { 168 value.ref.trace("add-cold") 169 c.sizeCold += delta 170 } 171 c.evict() 172 173 default: 174 // cache entry was a test page 175 c.sizeTest -= e.size 176 c.countTest-- 177 c.metaDel(e).release() 178 c.metaCheck(e) 179 180 e.size = int64(len(value.buf)) 181 c.coldTarget += e.size 182 if c.coldTarget > c.targetSize() { 183 c.coldTarget = c.targetSize() 184 } 185 186 e.referenced.Store(false) 187 e.setValue(value) 188 e.ptype = etHot 189 if c.metaAdd(k, e) { 190 value.ref.trace("add-hot") 191 c.sizeHot += e.size 192 c.countHot++ 193 } else { 194 value.ref.trace("skip-hot") 195 e.free() 196 e = nil 197 } 198 } 199 200 c.checkConsistency() 201 202 // Values are initialized with a reference count of 1. That reference count 203 // is being transferred to the returned Handle. 204 return Handle{value: value} 205 } 206 207 func (c *shard) checkConsistency() { 208 // See the comment above the count{Hot,Cold,Test} fields. 209 switch { 210 case c.sizeHot < 0 || c.sizeCold < 0 || c.sizeTest < 0 || c.countHot < 0 || c.countCold < 0 || c.countTest < 0: 211 panic(fmt.Sprintf("pebble: unexpected negative: %d (%d bytes) hot, %d (%d bytes) cold, %d (%d bytes) test", 212 c.countHot, c.sizeHot, c.countCold, c.sizeCold, c.countTest, c.sizeTest)) 213 case c.sizeHot > 0 && c.countHot == 0: 214 panic(fmt.Sprintf("pebble: mismatch %d hot size, %d hot count", c.sizeHot, c.countHot)) 215 case c.sizeCold > 0 && c.countCold == 0: 216 panic(fmt.Sprintf("pebble: mismatch %d cold size, %d cold count", c.sizeCold, c.countCold)) 217 case c.sizeTest > 0 && c.countTest == 0: 218 panic(fmt.Sprintf("pebble: mismatch %d test size, %d test count", c.sizeTest, c.countTest)) 219 } 220 } 221 222 // Delete deletes the cached value for the specified file and offset. 223 func (c *shard) Delete(id uint64, fileNum base.DiskFileNum, offset uint64) { 224 // The common case is there is nothing to delete, so do a quick check with 225 // shared lock. 226 k := key{fileKey{id, fileNum}, offset} 227 c.mu.RLock() 228 exists := c.blocks.Get(k) != nil 229 c.mu.RUnlock() 230 if !exists { 231 return 232 } 233 234 var deletedValue *Value 235 func() { 236 c.mu.Lock() 237 defer c.mu.Unlock() 238 239 e := c.blocks.Get(k) 240 if e == nil { 241 return 242 } 243 deletedValue = c.metaEvict(e) 244 c.checkConsistency() 245 }() 246 // Now that the mutex has been dropped, release the reference which will 247 // potentially free the memory associated with the previous cached value. 248 deletedValue.release() 249 } 250 251 // EvictFile evicts all of the cache values for the specified file. 252 func (c *shard) EvictFile(id uint64, fileNum base.DiskFileNum) { 253 fkey := key{fileKey{id, fileNum}, 0} 254 for c.evictFileRun(fkey) { 255 // Sched switch to give another goroutine an opportunity to acquire the 256 // shard mutex. 257 runtime.Gosched() 258 } 259 } 260 261 func (c *shard) evictFileRun(fkey key) (moreRemaining bool) { 262 // If most of the file's blocks are held in the block cache, evicting all 263 // the blocks may take a while. We don't want to block the entire cache 264 // shard, forcing concurrent readers to wait until we're finished. We drop 265 // the mutex every [blocksPerMutexAcquisition] blocks to give other 266 // goroutines an opportunity to make progress. 267 const blocksPerMutexAcquisition = 5 268 c.mu.Lock() 269 270 // Releasing a value may result in free-ing it back to the memory allocator. 271 // This can have a nontrivial cost that we'd prefer to not pay while holding 272 // the shard mutex, so we collect the evicted values in a local slice and 273 // only release them in a defer after dropping the cache mutex. 274 var obsoleteValuesAlloc [blocksPerMutexAcquisition]*Value 275 obsoleteValues := obsoleteValuesAlloc[:0] 276 defer func() { 277 c.mu.Unlock() 278 for _, v := range obsoleteValues { 279 v.release() 280 } 281 }() 282 283 blocks := c.files.Get(fkey) 284 if blocks == nil { 285 // No blocks for this file. 286 return false 287 } 288 289 // b is the current head of the doubly linked list, and n is the entry after b. 290 for b, n := blocks, (*entry)(nil); len(obsoleteValues) < cap(obsoleteValues); b = n { 291 n = b.fileLink.next 292 obsoleteValues = append(obsoleteValues, c.metaEvict(b)) 293 if b == n { 294 // b == n represents the case where b was the last entry remaining 295 // in the doubly linked list, which is why it pointed at itself. So 296 // no more entries left. 297 c.checkConsistency() 298 return false 299 } 300 } 301 // Exhausted blocksPerMutexAcquisition. 302 return true 303 } 304 305 func (c *shard) Free() { 306 c.mu.Lock() 307 defer c.mu.Unlock() 308 309 // NB: we use metaDel rather than metaEvict in order to avoid the expensive 310 // metaCheck call when the "invariants" build tag is specified. 311 for c.handHot != nil { 312 e := c.handHot 313 c.metaDel(c.handHot).release() 314 e.free() 315 } 316 317 c.blocks.free() 318 c.files.free() 319 } 320 321 func (c *shard) Reserve(n int) { 322 c.mu.Lock() 323 defer c.mu.Unlock() 324 c.reservedSize += int64(n) 325 326 // Changing c.reservedSize will either increase or decrease 327 // the targetSize. But we want coldTarget to be in the range 328 // [0, targetSize]. So, if c.targetSize decreases, make sure 329 // that the coldTarget fits within the limits. 330 targetSize := c.targetSize() 331 if c.coldTarget > targetSize { 332 c.coldTarget = targetSize 333 } 334 335 c.evict() 336 c.checkConsistency() 337 } 338 339 // Size returns the current space used by the cache. 340 func (c *shard) Size() int64 { 341 c.mu.RLock() 342 size := c.sizeHot + c.sizeCold 343 c.mu.RUnlock() 344 return size 345 } 346 347 func (c *shard) targetSize() int64 { 348 target := c.maxSize - c.reservedSize 349 // Always return a positive integer for targetSize. This is so that we don't 350 // end up in an infinite loop in evict(), in cases where reservedSize is 351 // greater than or equal to maxSize. 352 if target < 1 { 353 return 1 354 } 355 return target 356 } 357 358 // Add the entry to the cache, returning true if the entry was added and false 359 // if it would not fit in the cache. 360 func (c *shard) metaAdd(key key, e *entry) bool { 361 c.evict() 362 if e.size > c.targetSize() { 363 // The entry is larger than the target cache size. 364 return false 365 } 366 367 c.blocks.Put(key, e) 368 if entriesGoAllocated { 369 // Go allocated entries need to be referenced from Go memory. The entries 370 // map provides that reference. 371 c.entries[e] = struct{}{} 372 } 373 374 if c.handHot == nil { 375 // first element 376 c.handHot = e 377 c.handCold = e 378 c.handTest = e 379 } else { 380 c.handHot.link(e) 381 } 382 383 if c.handCold == c.handHot { 384 c.handCold = c.handCold.prev() 385 } 386 387 fkey := key.file() 388 if fileBlocks := c.files.Get(fkey); fileBlocks == nil { 389 c.files.Put(fkey, e) 390 } else { 391 fileBlocks.linkFile(e) 392 } 393 return true 394 } 395 396 // Remove the entry from the cache. This removes the entry from the blocks map, 397 // the files map, and ensures that hand{Hot,Cold,Test} are not pointing at the 398 // entry. Returns the deleted value that must be released, if any. 399 func (c *shard) metaDel(e *entry) (deletedValue *Value) { 400 if value := e.peekValue(); value != nil { 401 value.ref.trace("metaDel") 402 } 403 // Remove the pointer to the value. 404 deletedValue = e.val 405 e.val = nil 406 407 c.blocks.Delete(e.key) 408 if entriesGoAllocated { 409 // Go allocated entries need to be referenced from Go memory. The entries 410 // map provides that reference. 411 delete(c.entries, e) 412 } 413 414 if e == c.handHot { 415 c.handHot = c.handHot.prev() 416 } 417 if e == c.handCold { 418 c.handCold = c.handCold.prev() 419 } 420 if e == c.handTest { 421 c.handTest = c.handTest.prev() 422 } 423 424 if e.unlink() == e { 425 // This was the last entry in the cache. 426 c.handHot = nil 427 c.handCold = nil 428 c.handTest = nil 429 } 430 431 fkey := e.key.file() 432 if next := e.unlinkFile(); e == next { 433 c.files.Delete(fkey) 434 } else { 435 c.files.Put(fkey, next) 436 } 437 return deletedValue 438 } 439 440 // Check that the specified entry is not referenced by the cache. 441 func (c *shard) metaCheck(e *entry) { 442 if invariants.Enabled { 443 if _, ok := c.entries[e]; ok { 444 fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in entries map\n%s", 445 e, e.key, debug.Stack()) 446 os.Exit(1) 447 } 448 if c.blocks.findByValue(e) != nil { 449 fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in blocks map\n%s\n%s", 450 e, e.key, &c.blocks, debug.Stack()) 451 os.Exit(1) 452 } 453 if c.files.findByValue(e) != nil { 454 fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in files map\n%s\n%s", 455 e, e.key, &c.files, debug.Stack()) 456 os.Exit(1) 457 } 458 // NB: c.hand{Hot,Cold,Test} are pointers into a single linked list. We 459 // only have to traverse one of them to check all of them. 460 var countHot, countCold, countTest int64 461 var sizeHot, sizeCold, sizeTest int64 462 for t := c.handHot.next(); t != nil; t = t.next() { 463 // Recompute count{Hot,Cold,Test} and size{Hot,Cold,Test}. 464 switch t.ptype { 465 case etHot: 466 countHot++ 467 sizeHot += t.size 468 case etCold: 469 countCold++ 470 sizeCold += t.size 471 case etTest: 472 countTest++ 473 sizeTest += t.size 474 } 475 if e == t { 476 fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in blocks list\n%s", 477 e, e.key, debug.Stack()) 478 os.Exit(1) 479 } 480 if t == c.handHot { 481 break 482 } 483 } 484 if countHot != c.countHot || countCold != c.countCold || countTest != c.countTest || 485 sizeHot != c.sizeHot || sizeCold != c.sizeCold || sizeTest != c.sizeTest { 486 fmt.Fprintf(os.Stderr, `divergence of Hot,Cold,Test statistics 487 cache's statistics: hot %d, %d, cold %d, %d, test %d, %d 488 recalculated statistics: hot %d, %d, cold %d, %d, test %d, %d\n%s`, 489 c.countHot, c.sizeHot, c.countCold, c.sizeCold, c.countTest, c.sizeTest, 490 countHot, sizeHot, countCold, sizeCold, countTest, sizeTest, 491 debug.Stack()) 492 os.Exit(1) 493 } 494 } 495 } 496 497 func (c *shard) metaEvict(e *entry) (evictedValue *Value) { 498 switch e.ptype { 499 case etHot: 500 c.sizeHot -= e.size 501 c.countHot-- 502 case etCold: 503 c.sizeCold -= e.size 504 c.countCold-- 505 case etTest: 506 c.sizeTest -= e.size 507 c.countTest-- 508 } 509 evictedValue = c.metaDel(e) 510 c.metaCheck(e) 511 e.free() 512 return evictedValue 513 } 514 515 func (c *shard) evict() { 516 for c.targetSize() <= c.sizeHot+c.sizeCold && c.handCold != nil { 517 c.runHandCold(c.countCold, c.sizeCold) 518 } 519 } 520 521 func (c *shard) runHandCold(countColdDebug, sizeColdDebug int64) { 522 // countColdDebug and sizeColdDebug should equal c.countCold and 523 // c.sizeCold. They're parameters only to aid in debugging of 524 // cockroachdb/cockroach#70154. Since they're parameters, their 525 // arguments will appear within stack traces should we encounter 526 // a reproduction. 527 if c.countCold != countColdDebug || c.sizeCold != sizeColdDebug { 528 panic(fmt.Sprintf("runHandCold: cold count and size are %d, %d, arguments are %d and %d", 529 c.countCold, c.sizeCold, countColdDebug, sizeColdDebug)) 530 } 531 532 e := c.handCold 533 if e.ptype == etCold { 534 if e.referenced.Load() { 535 e.referenced.Store(false) 536 e.ptype = etHot 537 c.sizeCold -= e.size 538 c.countCold-- 539 c.sizeHot += e.size 540 c.countHot++ 541 } else { 542 e.setValue(nil) 543 e.ptype = etTest 544 c.sizeCold -= e.size 545 c.countCold-- 546 c.sizeTest += e.size 547 c.countTest++ 548 for c.targetSize() < c.sizeTest && c.handTest != nil { 549 c.runHandTest() 550 } 551 } 552 } 553 554 c.handCold = c.handCold.next() 555 556 for c.targetSize()-c.coldTarget <= c.sizeHot && c.handHot != nil { 557 c.runHandHot() 558 } 559 } 560 561 func (c *shard) runHandHot() { 562 if c.handHot == c.handTest && c.handTest != nil { 563 c.runHandTest() 564 if c.handHot == nil { 565 return 566 } 567 } 568 569 e := c.handHot 570 if e.ptype == etHot { 571 if e.referenced.Load() { 572 e.referenced.Store(false) 573 } else { 574 e.ptype = etCold 575 c.sizeHot -= e.size 576 c.countHot-- 577 c.sizeCold += e.size 578 c.countCold++ 579 } 580 } 581 582 c.handHot = c.handHot.next() 583 } 584 585 func (c *shard) runHandTest() { 586 if c.sizeCold > 0 && c.handTest == c.handCold && c.handCold != nil { 587 // sizeCold is > 0, so assert that countCold == 0. See the 588 // comment above count{Hot,Cold,Test}. 589 if c.countCold == 0 { 590 panic(fmt.Sprintf("pebble: mismatch %d cold size, %d cold count", c.sizeCold, c.countCold)) 591 } 592 593 c.runHandCold(c.countCold, c.sizeCold) 594 if c.handTest == nil { 595 return 596 } 597 } 598 599 e := c.handTest 600 if e.ptype == etTest { 601 c.sizeTest -= e.size 602 c.countTest-- 603 c.coldTarget -= e.size 604 if c.coldTarget < 0 { 605 c.coldTarget = 0 606 } 607 c.metaDel(e).release() 608 c.metaCheck(e) 609 e.free() 610 } 611 612 c.handTest = c.handTest.next() 613 } 614 615 // Metrics holds metrics for the cache. 616 type Metrics struct { 617 // The number of bytes inuse by the cache. 618 Size int64 619 // The count of objects (blocks or tables) in the cache. 620 Count int64 621 // The number of cache hits. 622 Hits int64 623 // The number of cache misses. 624 Misses int64 625 } 626 627 // Cache implements Pebble's sharded block cache. The Clock-PRO algorithm is 628 // used for page replacement 629 // (http://static.usenix.org/event/usenix05/tech/general/full_papers/jiang/jiang_html/html.html). In 630 // order to provide better concurrency, 4 x NumCPUs shards are created, with 631 // each shard being given 1/n of the target cache size. The Clock-PRO algorithm 632 // is run independently on each shard. 633 // 634 // Blocks are keyed by an (id, fileNum, offset) triple. The ID is a namespace 635 // for file numbers and allows a single Cache to be shared between multiple 636 // Pebble instances. The fileNum and offset refer to an sstable file number and 637 // the offset of the block within the file. Because sstables are immutable and 638 // file numbers are never reused, (fileNum,offset) are unique for the lifetime 639 // of a Pebble instance. 640 // 641 // In addition to maintaining a map from (fileNum,offset) to data, each shard 642 // maintains a map of the cached blocks for a particular fileNum. This allows 643 // efficient eviction of all of the blocks for a file which is used when an 644 // sstable is deleted from disk. 645 // 646 // # Memory Management 647 // 648 // In order to reduce pressure on the Go GC, manual memory management is 649 // performed for the data stored in the cache. Manual memory management is 650 // performed by calling into C.{malloc,free} to allocate memory. Cache.Values 651 // are reference counted and the memory backing a manual value is freed when 652 // the reference count drops to 0. 653 // 654 // Manual memory management brings the possibility of memory leaks. It is 655 // imperative that every Handle returned by Cache.{Get,Set} is eventually 656 // released. The "invariants" build tag enables a leak detection facility that 657 // places a GC finalizer on cache.Value. When the cache.Value finalizer is run, 658 // if the underlying buffer is still present a leak has occurred. The "tracing" 659 // build tag enables tracing of cache.Value reference count manipulation and 660 // eases finding where a leak has occurred. These two facilities are usually 661 // used in combination by specifying `-tags invariants,tracing`. Note that 662 // "tracing" produces a significant slowdown, while "invariants" does not. 663 type Cache struct { 664 refs atomic.Int64 665 maxSize int64 666 idAlloc atomic.Uint64 667 shards []shard 668 669 // Traces recorded by Cache.trace. Used for debugging. 670 tr struct { 671 sync.Mutex 672 msgs []string 673 } 674 } 675 676 // New creates a new cache of the specified size. Memory for the cache is 677 // allocated on demand, not during initialization. The cache is created with a 678 // reference count of 1. Each DB it is associated with adds a reference, so the 679 // creator of the cache should usually release their reference after the DB is 680 // created. 681 // 682 // c := cache.New(...) 683 // defer c.Unref() 684 // d, err := pebble.Open(pebble.Options{Cache: c}) 685 func New(size int64) *Cache { 686 // How many cache shards should we create? 687 // 688 // Note that the probability two processors will try to access the same 689 // shard at the same time increases superlinearly with the number of 690 // processors (Eg, consider the brithday problem where each CPU is a person, 691 // and each shard is a possible birthday). 692 // 693 // We could consider growing the number of shards superlinearly, but 694 // increasing the shard count may reduce the effectiveness of the caching 695 // algorithm if frequently-accessed blocks are insufficiently distributed 696 // across shards. If a shard's size is smaller than a single frequently 697 // scanned sstable, then the shard will be unable to hold the entire 698 // frequently-scanned table in memory despite other shards still holding 699 // infrequently accessed blocks. 700 // 701 // Experimentally, we've observed contention contributing to tail latencies 702 // at 2 shards per processor. For now we use 4 shards per processor, 703 // recognizing this may not be final word. 704 m := 4 * runtime.GOMAXPROCS(0) 705 706 // In tests we can use large CPU machines with small cache sizes and have 707 // many caches in existence at a time. If sharding into m shards would 708 // produce too small shards, constrain the number of shards to 4. 709 const minimumShardSize = 4 << 20 // 4 MiB 710 if m > 4 && int(size)/m < minimumShardSize { 711 m = 4 712 } 713 return newShards(size, m) 714 } 715 716 func newShards(size int64, shards int) *Cache { 717 c := &Cache{ 718 maxSize: size, 719 shards: make([]shard, shards), 720 } 721 c.refs.Store(1) 722 c.idAlloc.Store(1) 723 c.trace("alloc", c.refs.Load()) 724 for i := range c.shards { 725 c.shards[i] = shard{ 726 maxSize: size / int64(len(c.shards)), 727 coldTarget: size / int64(len(c.shards)), 728 } 729 if entriesGoAllocated { 730 c.shards[i].entries = make(map[*entry]struct{}) 731 } 732 c.shards[i].blocks.init(16) 733 c.shards[i].files.init(16) 734 } 735 736 // Note: this is a no-op if invariants are disabled or race is enabled. 737 invariants.SetFinalizer(c, func(obj interface{}) { 738 c := obj.(*Cache) 739 if v := c.refs.Load(); v != 0 { 740 c.tr.Lock() 741 fmt.Fprintf(os.Stderr, 742 "pebble: cache (%p) has non-zero reference count: %d\n", c, v) 743 if len(c.tr.msgs) > 0 { 744 fmt.Fprintf(os.Stderr, "%s\n", strings.Join(c.tr.msgs, "\n")) 745 } 746 c.tr.Unlock() 747 os.Exit(1) 748 } 749 }) 750 return c 751 } 752 753 func (c *Cache) getShard(id uint64, fileNum base.DiskFileNum, offset uint64) *shard { 754 if id == 0 { 755 panic("pebble: 0 cache ID is invalid") 756 } 757 758 // Inlined version of fnv.New64 + Write. 759 const offset64 = 14695981039346656037 760 const prime64 = 1099511628211 761 762 h := uint64(offset64) 763 for i := 0; i < 8; i++ { 764 h *= prime64 765 h ^= uint64(id & 0xff) 766 id >>= 8 767 } 768 fileNumVal := uint64(fileNum.FileNum()) 769 for i := 0; i < 8; i++ { 770 h *= prime64 771 h ^= uint64(fileNumVal) & 0xff 772 fileNumVal >>= 8 773 } 774 for i := 0; i < 8; i++ { 775 h *= prime64 776 h ^= uint64(offset & 0xff) 777 offset >>= 8 778 } 779 780 return &c.shards[h%uint64(len(c.shards))] 781 } 782 783 // Ref adds a reference to the cache. The cache only remains valid as long a 784 // reference is maintained to it. 785 func (c *Cache) Ref() { 786 v := c.refs.Add(1) 787 if v <= 1 { 788 panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v)) 789 } 790 c.trace("ref", v) 791 } 792 793 // Unref releases a reference on the cache. 794 func (c *Cache) Unref() { 795 v := c.refs.Add(-1) 796 c.trace("unref", v) 797 switch { 798 case v < 0: 799 panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v)) 800 case v == 0: 801 for i := range c.shards { 802 c.shards[i].Free() 803 } 804 } 805 } 806 807 // Get retrieves the cache value for the specified file and offset, returning 808 // nil if no value is present. 809 func (c *Cache) Get(id uint64, fileNum base.DiskFileNum, offset uint64) Handle { 810 return c.getShard(id, fileNum, offset).Get(id, fileNum, offset) 811 } 812 813 // Set sets the cache value for the specified file and offset, overwriting an 814 // existing value if present. A Handle is returned which provides faster 815 // retrieval of the cached value than Get (lock-free and avoidance of the map 816 // lookup). The value must have been allocated by Cache.Alloc. 817 func (c *Cache) Set(id uint64, fileNum base.DiskFileNum, offset uint64, value *Value) Handle { 818 return c.getShard(id, fileNum, offset).Set(id, fileNum, offset, value) 819 } 820 821 // Delete deletes the cached value for the specified file and offset. 822 func (c *Cache) Delete(id uint64, fileNum base.DiskFileNum, offset uint64) { 823 c.getShard(id, fileNum, offset).Delete(id, fileNum, offset) 824 } 825 826 // EvictFile evicts all of the cache values for the specified file. 827 func (c *Cache) EvictFile(id uint64, fileNum base.DiskFileNum) { 828 if id == 0 { 829 panic("pebble: 0 cache ID is invalid") 830 } 831 for i := range c.shards { 832 c.shards[i].EvictFile(id, fileNum) 833 } 834 } 835 836 // MaxSize returns the max size of the cache. 837 func (c *Cache) MaxSize() int64 { 838 return c.maxSize 839 } 840 841 // Size returns the current space used by the cache. 842 func (c *Cache) Size() int64 { 843 var size int64 844 for i := range c.shards { 845 size += c.shards[i].Size() 846 } 847 return size 848 } 849 850 // Alloc allocates a byte slice of the specified size, possibly reusing 851 // previously allocated but unused memory. The memory backing the value is 852 // manually managed. The caller MUST either add the value to the cache (via 853 // Cache.Set), or release the value (via Cache.Free). Failure to do so will 854 // result in a memory leak. 855 func Alloc(n int) *Value { 856 return newValue(n) 857 } 858 859 // Free frees the specified value. The buffer associated with the value will 860 // possibly be reused, making it invalid to use the buffer after calling 861 // Free. Do not call Free on a value that has been added to the cache. 862 func Free(v *Value) { 863 if n := v.refs(); n > 1 { 864 panic(fmt.Sprintf("pebble: Value has been added to the cache: refs=%d", n)) 865 } 866 v.release() 867 } 868 869 // Reserve N bytes in the cache. This effectively shrinks the size of the cache 870 // by N bytes, without actually consuming any memory. The returned closure 871 // should be invoked to release the reservation. 872 func (c *Cache) Reserve(n int) func() { 873 // Round-up the per-shard reservation. Most reservations should be large, so 874 // this probably doesn't matter in practice. 875 shardN := (n + len(c.shards) - 1) / len(c.shards) 876 for i := range c.shards { 877 c.shards[i].Reserve(shardN) 878 } 879 return func() { 880 if shardN == -1 { 881 panic("pebble: cache reservation already released") 882 } 883 for i := range c.shards { 884 c.shards[i].Reserve(-shardN) 885 } 886 shardN = -1 887 } 888 } 889 890 // Metrics returns the metrics for the cache. 891 func (c *Cache) Metrics() Metrics { 892 var m Metrics 893 for i := range c.shards { 894 s := &c.shards[i] 895 s.mu.RLock() 896 m.Count += int64(s.blocks.Count()) 897 m.Size += s.sizeHot + s.sizeCold 898 s.mu.RUnlock() 899 m.Hits += s.hits.Load() 900 m.Misses += s.misses.Load() 901 } 902 return m 903 } 904 905 // NewID returns a new ID to be used as a namespace for cached file 906 // blocks. 907 func (c *Cache) NewID() uint64 { 908 return c.idAlloc.Add(1) 909 }