github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/cache/clockpro.go (about)

     1  // Copyright 2018. All rights reserved. Use of this source code is governed by
     2  // an MIT-style license that can be found in the LICENSE file.
     3  
     4  // Package cache implements the CLOCK-Pro caching algorithm.
     5  //
     6  // CLOCK-Pro is a patent-free alternative to the Adaptive Replacement Cache,
     7  // https://en.wikipedia.org/wiki/Adaptive_replacement_cache.
     8  // It is an approximation of LIRS ( https://en.wikipedia.org/wiki/LIRS_caching_algorithm ),
     9  // much like the CLOCK page replacement algorithm is an approximation of LRU.
    10  //
    11  // This implementation is based on the python code from https://bitbucket.org/SamiLehtinen/pyclockpro .
    12  //
    13  // Slides describing the algorithm: http://fr.slideshare.net/huliang64/clockpro
    14  //
    15  // The original paper: http://static.usenix.org/event/usenix05/tech/general/full_papers/jiang/jiang_html/html.html
    16  //
    17  // It is MIT licensed, like the original.
    18  package cache // import "github.com/zuoyebang/bitalostable/internal/cache"
    19  
    20  import (
    21  	"fmt"
    22  	"os"
    23  	"runtime"
    24  	"runtime/debug"
    25  	"strings"
    26  	"sync"
    27  	"sync/atomic"
    28  
    29  	"github.com/zuoyebang/bitalostable/internal/base"
    30  	"github.com/zuoyebang/bitalostable/internal/invariants"
    31  )
    32  
    33  type fileKey struct {
    34  	// id is the namespace for fileNums.
    35  	id      uint64
    36  	fileNum base.FileNum
    37  }
    38  
    39  type key struct {
    40  	fileKey
    41  	offset uint64
    42  }
    43  
    44  // file returns the "file key" for the receiver. This is the key used for the
    45  // shard.files map.
    46  func (k key) file() key {
    47  	k.offset = 0
    48  	return k
    49  }
    50  
    51  func (k key) String() string {
    52  	return fmt.Sprintf("%d/%d/%d", k.id, k.fileNum, k.offset)
    53  }
    54  
    55  // Handle provides a strong reference to a value in the cache. The reference
    56  // does not pin the value in the cache, but it does prevent the underlying byte
    57  // slice from being reused.
    58  type Handle struct {
    59  	value *Value
    60  }
    61  
    62  // Get returns the value stored in handle.
    63  func (h Handle) Get() []byte {
    64  	if h.value != nil {
    65  		// NB: We don't increment shard.hits in this code path because we only want
    66  		// to record a hit when the handle is retrieved from the cache.
    67  		return h.value.buf
    68  	}
    69  	return nil
    70  }
    71  
    72  // Release releases the reference to the cache entry.
    73  func (h Handle) Release() {
    74  	if h.value != nil {
    75  		h.value.release()
    76  	}
    77  }
    78  
    79  type shard struct {
    80  	hits   int64
    81  	misses int64
    82  
    83  	mu sync.RWMutex
    84  
    85  	reservedSize int64
    86  	maxSize      int64
    87  	coldTarget   int64
    88  	blocks       robinHoodMap // fileNum+offset -> block
    89  	files        robinHoodMap // fileNum -> list of blocks
    90  
    91  	// The blocks and files maps store values in manually managed memory that is
    92  	// invisible to the Go GC. This is fine for Value and entry objects that are
    93  	// stored in manually managed memory, but when the "invariants" build tag is
    94  	// set, all Value and entry objects are Go allocated and the entries map will
    95  	// contain a reference to every entry.
    96  	entries map[*entry]struct{}
    97  
    98  	handHot  *entry
    99  	handCold *entry
   100  	handTest *entry
   101  
   102  	sizeHot  int64
   103  	sizeCold int64
   104  	sizeTest int64
   105  
   106  	// The count fields are used exclusively for asserting expectations.
   107  	// We've seen infinite looping (cockroachdb/cockroach#70154) that
   108  	// could be explained by a corrupted sizeCold. Through asserting on
   109  	// these fields, we hope to gain more insight from any future
   110  	// reproductions.
   111  	countHot  int64
   112  	countCold int64
   113  	countTest int64
   114  }
   115  
   116  func (c *shard) Get(id uint64, fileNum base.FileNum, offset uint64) Handle {
   117  	c.mu.RLock()
   118  	var value *Value
   119  	if e := c.blocks.Get(key{fileKey{id, fileNum}, offset}); e != nil {
   120  		value = e.acquireValue()
   121  		if value != nil {
   122  			atomic.StoreInt32(&e.referenced, 1)
   123  		}
   124  	}
   125  	c.mu.RUnlock()
   126  	if value == nil {
   127  		atomic.AddInt64(&c.misses, 1)
   128  		return Handle{}
   129  	}
   130  	atomic.AddInt64(&c.hits, 1)
   131  	return Handle{value: value}
   132  }
   133  
   134  func (c *shard) Set(id uint64, fileNum base.FileNum, offset uint64, value *Value) Handle {
   135  	if n := value.refs(); n != 1 {
   136  		panic(fmt.Sprintf("bitalostable: Value has already been added to the cache: refs=%d", n))
   137  	}
   138  
   139  	c.mu.Lock()
   140  	defer c.mu.Unlock()
   141  
   142  	k := key{fileKey{id, fileNum}, offset}
   143  	e := c.blocks.Get(k)
   144  
   145  	switch {
   146  	case e == nil:
   147  		// no cache entry? add it
   148  		e = newEntry(c, k, int64(len(value.buf)))
   149  		e.setValue(value)
   150  		if c.metaAdd(k, e) {
   151  			value.ref.trace("add-cold")
   152  			c.sizeCold += e.size
   153  			c.countCold++
   154  		} else {
   155  			value.ref.trace("skip-cold")
   156  			e.free()
   157  			e = nil
   158  		}
   159  
   160  	case e.peekValue() != nil:
   161  		// cache entry was a hot or cold page
   162  		e.setValue(value)
   163  		atomic.StoreInt32(&e.referenced, 1)
   164  		delta := int64(len(value.buf)) - e.size
   165  		e.size = int64(len(value.buf))
   166  		if e.ptype == etHot {
   167  			value.ref.trace("add-hot")
   168  			c.sizeHot += delta
   169  		} else {
   170  			value.ref.trace("add-cold")
   171  			c.sizeCold += delta
   172  		}
   173  		c.evict()
   174  
   175  	default:
   176  		// cache entry was a test page
   177  		c.sizeTest -= e.size
   178  		c.countTest--
   179  		c.metaDel(e)
   180  		c.metaCheck(e)
   181  
   182  		e.size = int64(len(value.buf))
   183  		c.coldTarget += e.size
   184  		if c.coldTarget > c.targetSize() {
   185  			c.coldTarget = c.targetSize()
   186  		}
   187  
   188  		atomic.StoreInt32(&e.referenced, 0)
   189  		e.setValue(value)
   190  		e.ptype = etHot
   191  		if c.metaAdd(k, e) {
   192  			value.ref.trace("add-hot")
   193  			c.sizeHot += e.size
   194  			c.countHot++
   195  		} else {
   196  			value.ref.trace("skip-hot")
   197  			e.free()
   198  			e = nil
   199  		}
   200  	}
   201  
   202  	c.checkConsistency()
   203  
   204  	// Values are initialized with a reference count of 1. That reference count
   205  	// is being transferred to the returned Handle.
   206  	return Handle{value: value}
   207  }
   208  
   209  func (c *shard) checkConsistency() {
   210  	// See the comment above the count{Hot,Cold,Test} fields.
   211  	switch {
   212  	case c.sizeHot < 0 || c.sizeCold < 0 || c.sizeTest < 0 || c.countHot < 0 || c.countCold < 0 || c.countTest < 0:
   213  		panic(fmt.Sprintf("bitalostable: unexpected negative: %d (%d bytes) hot, %d (%d bytes) cold, %d (%d bytes) test",
   214  			c.countHot, c.sizeHot, c.countCold, c.sizeCold, c.countTest, c.sizeTest))
   215  	case c.sizeHot > 0 && c.countHot == 0:
   216  		panic(fmt.Sprintf("bitalostable: mismatch %d hot size, %d hot count", c.sizeHot, c.countHot))
   217  	case c.sizeCold > 0 && c.countCold == 0:
   218  		panic(fmt.Sprintf("bitalostable: mismatch %d cold size, %d cold count", c.sizeCold, c.countCold))
   219  	case c.sizeTest > 0 && c.countTest == 0:
   220  		panic(fmt.Sprintf("bitalostable: mismatch %d test size, %d test count", c.sizeTest, c.countTest))
   221  	}
   222  }
   223  
   224  // Delete deletes the cached value for the specified file and offset.
   225  func (c *shard) Delete(id uint64, fileNum base.FileNum, offset uint64) {
   226  	// The common case is there is nothing to delete, so do a quick check with
   227  	// shared lock.
   228  	k := key{fileKey{id, fileNum}, offset}
   229  	c.mu.RLock()
   230  	exists := c.blocks.Get(k) != nil
   231  	c.mu.RUnlock()
   232  	if !exists {
   233  		return
   234  	}
   235  
   236  	c.mu.Lock()
   237  	defer c.mu.Unlock()
   238  
   239  	e := c.blocks.Get(k)
   240  	if e == nil {
   241  		return
   242  	}
   243  	c.metaEvict(e)
   244  
   245  	c.checkConsistency()
   246  }
   247  
   248  // EvictFile evicts all of the cache values for the specified file.
   249  func (c *shard) EvictFile(id uint64, fileNum base.FileNum) {
   250  	c.mu.Lock()
   251  	defer c.mu.Unlock()
   252  
   253  	fkey := key{fileKey{id, fileNum}, 0}
   254  	blocks := c.files.Get(fkey)
   255  	if blocks == nil {
   256  		return
   257  	}
   258  	for b, n := blocks, (*entry)(nil); ; b = n {
   259  		n = b.fileLink.next
   260  		c.metaEvict(b)
   261  		if b == n {
   262  			break
   263  		}
   264  	}
   265  
   266  	c.checkConsistency()
   267  }
   268  
   269  func (c *shard) Free() {
   270  	c.mu.Lock()
   271  	defer c.mu.Unlock()
   272  
   273  	// NB: we use metaDel rather than metaEvict in order to avoid the expensive
   274  	// metaCheck call when the "invariants" build tag is specified.
   275  	for c.handHot != nil {
   276  		e := c.handHot
   277  		c.metaDel(c.handHot)
   278  		e.free()
   279  	}
   280  
   281  	c.blocks.free()
   282  	c.files.free()
   283  }
   284  
   285  func (c *shard) Reserve(n int) {
   286  	c.mu.Lock()
   287  	defer c.mu.Unlock()
   288  	c.reservedSize += int64(n)
   289  
   290  	// Changing c.reservedSize will either increase or decrease
   291  	// the targetSize. But we want coldTarget to be in the range
   292  	// [0, targetSize]. So, if c.targetSize decreases, make sure
   293  	// that the coldTarget fits within the limits.
   294  	targetSize := c.targetSize()
   295  	if c.coldTarget > targetSize {
   296  		c.coldTarget = targetSize
   297  	}
   298  
   299  	c.evict()
   300  	c.checkConsistency()
   301  }
   302  
   303  // Size returns the current space used by the cache.
   304  func (c *shard) Size() int64 {
   305  	c.mu.RLock()
   306  	size := c.sizeHot + c.sizeCold
   307  	c.mu.RUnlock()
   308  	return size
   309  }
   310  
   311  func (c *shard) targetSize() int64 {
   312  	target := c.maxSize - c.reservedSize
   313  	// Always return a positive integer for targetSize. This is so that we don't
   314  	// end up in an infinite loop in evict(), in cases where reservedSize is
   315  	// greater than or equal to maxSize.
   316  	if target < 1 {
   317  		return 1
   318  	}
   319  	return target
   320  }
   321  
   322  // Add the entry to the cache, returning true if the entry was added and false
   323  // if it would not fit in the cache.
   324  func (c *shard) metaAdd(key key, e *entry) bool {
   325  	c.evict()
   326  	if e.size > c.targetSize() {
   327  		// The entry is larger than the target cache size.
   328  		return false
   329  	}
   330  
   331  	c.blocks.Put(key, e)
   332  	if entriesGoAllocated {
   333  		// Go allocated entries need to be referenced from Go memory. The entries
   334  		// map provides that reference.
   335  		c.entries[e] = struct{}{}
   336  	}
   337  
   338  	if c.handHot == nil {
   339  		// first element
   340  		c.handHot = e
   341  		c.handCold = e
   342  		c.handTest = e
   343  	} else {
   344  		c.handHot.link(e)
   345  	}
   346  
   347  	if c.handCold == c.handHot {
   348  		c.handCold = c.handCold.prev()
   349  	}
   350  
   351  	fkey := key.file()
   352  	if fileBlocks := c.files.Get(fkey); fileBlocks == nil {
   353  		c.files.Put(fkey, e)
   354  	} else {
   355  		fileBlocks.linkFile(e)
   356  	}
   357  	return true
   358  }
   359  
   360  // Remove the entry from the cache. This removes the entry from the blocks map,
   361  // the files map, and ensures that hand{Hot,Cold,Test} are not pointing at the
   362  // entry.
   363  func (c *shard) metaDel(e *entry) {
   364  	if value := e.peekValue(); value != nil {
   365  		value.ref.trace("metaDel")
   366  	}
   367  	e.setValue(nil)
   368  
   369  	c.blocks.Delete(e.key)
   370  	if entriesGoAllocated {
   371  		// Go allocated entries need to be referenced from Go memory. The entries
   372  		// map provides that reference.
   373  		delete(c.entries, e)
   374  	}
   375  
   376  	if e == c.handHot {
   377  		c.handHot = c.handHot.prev()
   378  	}
   379  	if e == c.handCold {
   380  		c.handCold = c.handCold.prev()
   381  	}
   382  	if e == c.handTest {
   383  		c.handTest = c.handTest.prev()
   384  	}
   385  
   386  	if e.unlink() == e {
   387  		// This was the last entry in the cache.
   388  		c.handHot = nil
   389  		c.handCold = nil
   390  		c.handTest = nil
   391  	}
   392  
   393  	fkey := e.key.file()
   394  	if next := e.unlinkFile(); e == next {
   395  		c.files.Delete(fkey)
   396  	} else {
   397  		c.files.Put(fkey, next)
   398  	}
   399  }
   400  
   401  // Check that the specified entry is not referenced by the cache.
   402  func (c *shard) metaCheck(e *entry) {
   403  	if invariants.Enabled {
   404  		if _, ok := c.entries[e]; ok {
   405  			fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in entries map\n%s",
   406  				e, e.key, debug.Stack())
   407  			os.Exit(1)
   408  		}
   409  		if c.blocks.findByValue(e) != nil {
   410  			fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in blocks map\n%s\n%s",
   411  				e, e.key, &c.blocks, debug.Stack())
   412  			os.Exit(1)
   413  		}
   414  		if c.files.findByValue(e) != nil {
   415  			fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in files map\n%s\n%s",
   416  				e, e.key, &c.files, debug.Stack())
   417  			os.Exit(1)
   418  		}
   419  		// NB: c.hand{Hot,Cold,Test} are pointers into a single linked list. We
   420  		// only have to traverse one of them to check all of them.
   421  		var countHot, countCold, countTest int64
   422  		var sizeHot, sizeCold, sizeTest int64
   423  		for t := c.handHot.next(); t != nil; t = t.next() {
   424  			// Recompute count{Hot,Cold,Test} and size{Hot,Cold,Test}.
   425  			switch t.ptype {
   426  			case etHot:
   427  				countHot++
   428  				sizeHot += t.size
   429  			case etCold:
   430  				countCold++
   431  				sizeCold += t.size
   432  			case etTest:
   433  				countTest++
   434  				sizeTest += t.size
   435  			}
   436  			if e == t {
   437  				fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in blocks list\n%s",
   438  					e, e.key, debug.Stack())
   439  				os.Exit(1)
   440  			}
   441  			if t == c.handHot {
   442  				break
   443  			}
   444  		}
   445  		if countHot != c.countHot || countCold != c.countCold || countTest != c.countTest ||
   446  			sizeHot != c.sizeHot || sizeCold != c.sizeCold || sizeTest != c.sizeTest {
   447  			fmt.Fprintf(os.Stderr, `divergence of Hot,Cold,Test statistics
   448  				cache's statistics: hot %d, %d, cold %d, %d, test %d, %d
   449  				recalculated statistics: hot %d, %d, cold %d, %d, test %d, %d\n%s`,
   450  				c.countHot, c.sizeHot, c.countCold, c.sizeCold, c.countTest, c.sizeTest,
   451  				countHot, sizeHot, countCold, sizeCold, countTest, sizeTest,
   452  				debug.Stack())
   453  			os.Exit(1)
   454  		}
   455  	}
   456  }
   457  
   458  func (c *shard) metaEvict(e *entry) {
   459  	switch e.ptype {
   460  	case etHot:
   461  		c.sizeHot -= e.size
   462  		c.countHot--
   463  	case etCold:
   464  		c.sizeCold -= e.size
   465  		c.countCold--
   466  	case etTest:
   467  		c.sizeTest -= e.size
   468  		c.countTest--
   469  	}
   470  	c.metaDel(e)
   471  	c.metaCheck(e)
   472  	e.free()
   473  }
   474  
   475  func (c *shard) evict() {
   476  	for c.targetSize() <= c.sizeHot+c.sizeCold && c.handCold != nil {
   477  		c.runHandCold(c.countCold, c.sizeCold)
   478  	}
   479  }
   480  
   481  func (c *shard) runHandCold(countColdDebug, sizeColdDebug int64) {
   482  	// countColdDebug and sizeColdDebug should equal c.countCold and
   483  	// c.sizeCold. They're parameters only to aid in debugging of
   484  	// cockroachdb/cockroach#70154. Since they're parameters, their
   485  	// arguments will appear within stack traces should we encounter
   486  	// a reproduction.
   487  	if c.countCold != countColdDebug || c.sizeCold != sizeColdDebug {
   488  		panic(fmt.Sprintf("runHandCold: cold count and size are %d, %d, arguments are %d and %d",
   489  			c.countCold, c.sizeCold, countColdDebug, sizeColdDebug))
   490  	}
   491  
   492  	e := c.handCold
   493  	if e.ptype == etCold {
   494  		if atomic.LoadInt32(&e.referenced) == 1 {
   495  			atomic.StoreInt32(&e.referenced, 0)
   496  			e.ptype = etHot
   497  			c.sizeCold -= e.size
   498  			c.countCold--
   499  			c.sizeHot += e.size
   500  			c.countHot++
   501  		} else {
   502  			e.setValue(nil)
   503  			e.ptype = etTest
   504  			c.sizeCold -= e.size
   505  			c.countCold--
   506  			c.sizeTest += e.size
   507  			c.countTest++
   508  			for c.targetSize() < c.sizeTest && c.handTest != nil {
   509  				c.runHandTest()
   510  			}
   511  		}
   512  	}
   513  
   514  	c.handCold = c.handCold.next()
   515  
   516  	for c.targetSize()-c.coldTarget <= c.sizeHot && c.handHot != nil {
   517  		c.runHandHot()
   518  	}
   519  }
   520  
   521  func (c *shard) runHandHot() {
   522  	if c.handHot == c.handTest && c.handTest != nil {
   523  		c.runHandTest()
   524  		if c.handHot == nil {
   525  			return
   526  		}
   527  	}
   528  
   529  	e := c.handHot
   530  	if e.ptype == etHot {
   531  		if atomic.LoadInt32(&e.referenced) == 1 {
   532  			atomic.StoreInt32(&e.referenced, 0)
   533  		} else {
   534  			e.ptype = etCold
   535  			c.sizeHot -= e.size
   536  			c.countHot--
   537  			c.sizeCold += e.size
   538  			c.countCold++
   539  		}
   540  	}
   541  
   542  	c.handHot = c.handHot.next()
   543  }
   544  
   545  func (c *shard) runHandTest() {
   546  	if c.sizeCold > 0 && c.handTest == c.handCold && c.handCold != nil {
   547  		// sizeCold is > 0, so assert that countCold == 0. See the
   548  		// comment above count{Hot,Cold,Test}.
   549  		if c.countCold == 0 {
   550  			panic(fmt.Sprintf("bitalostable: mismatch %d cold size, %d cold count", c.sizeCold, c.countCold))
   551  		}
   552  
   553  		c.runHandCold(c.countCold, c.sizeCold)
   554  		if c.handTest == nil {
   555  			return
   556  		}
   557  	}
   558  
   559  	e := c.handTest
   560  	if e.ptype == etTest {
   561  		c.sizeTest -= e.size
   562  		c.countTest--
   563  		c.coldTarget -= e.size
   564  		if c.coldTarget < 0 {
   565  			c.coldTarget = 0
   566  		}
   567  		c.metaDel(e)
   568  		c.metaCheck(e)
   569  		e.free()
   570  	}
   571  
   572  	c.handTest = c.handTest.next()
   573  }
   574  
   575  // Metrics holds metrics for the cache.
   576  type Metrics struct {
   577  	// The number of bytes inuse by the cache.
   578  	Size int64
   579  	// The count of objects (blocks or tables) in the cache.
   580  	Count int64
   581  	// The number of cache hits.
   582  	Hits int64
   583  	// The number of cache misses.
   584  	Misses int64
   585  }
   586  
   587  // Cache implements Pebble's sharded block cache. The Clock-PRO algorithm is
   588  // used for page replacement
   589  // (http://static.usenix.org/event/usenix05/tech/general/full_papers/jiang/jiang_html/html.html). In
   590  // order to provide better concurrency, 2 x NumCPUs shards are created, with
   591  // each shard being given 1/n of the target cache size. The Clock-PRO algorithm
   592  // is run independently on each shard.
   593  //
   594  // Blocks are keyed by an (id, fileNum, offset) triple. The ID is a namespace
   595  // for file numbers and allows a single Cache to be shared between multiple
   596  // Pebble instances. The fileNum and offset refer to an sstable file number and
   597  // the offset of the block within the file. Because sstables are immutable and
   598  // file numbers are never reused, (fileNum,offset) are unique for the lifetime
   599  // of a Pebble instance.
   600  //
   601  // In addition to maintaining a map from (fileNum,offset) to data, each shard
   602  // maintains a map of the cached blocks for a particular fileNum. This allows
   603  // efficient eviction of all of the blocks for a file which is used when an
   604  // sstable is deleted from disk.
   605  //
   606  // # Memory Management
   607  //
   608  // In order to reduce pressure on the Go GC, manual memory management is
   609  // performed for the data stored in the cache. Manual memory management is
   610  // performed by calling into C.{malloc,free} to allocate memory. Cache.Values
   611  // are reference counted and the memory backing a manual value is freed when
   612  // the reference count drops to 0.
   613  //
   614  // Manual memory management brings the possibility of memory leaks. It is
   615  // imperative that every Handle returned by Cache.{Get,Set} is eventually
   616  // released. The "invariants" build tag enables a leak detection facility that
   617  // places a GC finalizer on cache.Value. When the cache.Value finalizer is run,
   618  // if the underlying buffer is still present a leak has occurred. The "tracing"
   619  // build tag enables tracing of cache.Value reference count manipulation and
   620  // eases finding where a leak has occurred. These two facilities are usually
   621  // used in combination by specifying `-tags invariants,tracing`. Note that
   622  // "tracing" produces a significant slowdown, while "invariants" does not.
   623  type Cache struct {
   624  	refs    int64
   625  	maxSize int64
   626  	idAlloc uint64
   627  	shards  []shard
   628  
   629  	// Traces recorded by Cache.trace. Used for debugging.
   630  	tr struct {
   631  		sync.Mutex
   632  		msgs []string
   633  	}
   634  }
   635  
   636  // New creates a new cache of the specified size. Memory for the cache is
   637  // allocated on demand, not during initialization. The cache is created with a
   638  // reference count of 1. Each DB it is associated with adds a reference, so the
   639  // creator of the cache should usually release their reference after the DB is
   640  // created.
   641  //
   642  //	c := cache.New(...)
   643  //	defer c.Unref()
   644  //	d, err := bitalostable.Open(bitalostable.Options{Cache: c})
   645  func New(size int64) *Cache {
   646  	return newShards(size, 2*runtime.GOMAXPROCS(0))
   647  }
   648  
   649  func newShards(size int64, shards int) *Cache {
   650  	c := &Cache{
   651  		refs:    1,
   652  		maxSize: size,
   653  		idAlloc: 1,
   654  		shards:  make([]shard, shards),
   655  	}
   656  	c.trace("alloc", c.refs)
   657  	for i := range c.shards {
   658  		c.shards[i] = shard{
   659  			maxSize:    size / int64(len(c.shards)),
   660  			coldTarget: size / int64(len(c.shards)),
   661  		}
   662  		if entriesGoAllocated {
   663  			c.shards[i].entries = make(map[*entry]struct{})
   664  		}
   665  		c.shards[i].blocks.init(16)
   666  		c.shards[i].files.init(16)
   667  	}
   668  
   669  	// Note: this is a no-op if invariants are disabled or race is enabled.
   670  	invariants.SetFinalizer(c, func(obj interface{}) {
   671  		c := obj.(*Cache)
   672  		if v := atomic.LoadInt64(&c.refs); v != 0 {
   673  			c.tr.Lock()
   674  			fmt.Fprintf(os.Stderr,
   675  				"bitalostable: cache (%p) has non-zero reference count: %d\n", c, v)
   676  			if len(c.tr.msgs) > 0 {
   677  				fmt.Fprintf(os.Stderr, "%s\n", strings.Join(c.tr.msgs, "\n"))
   678  			}
   679  			c.tr.Unlock()
   680  			os.Exit(1)
   681  		}
   682  	})
   683  	return c
   684  }
   685  
   686  func (c *Cache) getShard(id uint64, fileNum base.FileNum, offset uint64) *shard {
   687  	if id == 0 {
   688  		panic("bitalostable: 0 cache ID is invalid")
   689  	}
   690  
   691  	// Inlined version of fnv.New64 + Write.
   692  	const offset64 = 14695981039346656037
   693  	const prime64 = 1099511628211
   694  
   695  	h := uint64(offset64)
   696  	for i := 0; i < 8; i++ {
   697  		h *= prime64
   698  		h ^= uint64(id & 0xff)
   699  		id >>= 8
   700  	}
   701  	for i := 0; i < 8; i++ {
   702  		h *= prime64
   703  		h ^= uint64(fileNum & 0xff)
   704  		fileNum >>= 8
   705  	}
   706  	for i := 0; i < 8; i++ {
   707  		h *= prime64
   708  		h ^= uint64(offset & 0xff)
   709  		offset >>= 8
   710  	}
   711  
   712  	return &c.shards[h%uint64(len(c.shards))]
   713  }
   714  
   715  // Ref adds a reference to the cache. The cache only remains valid as long a
   716  // reference is maintained to it.
   717  func (c *Cache) Ref() {
   718  	v := atomic.AddInt64(&c.refs, 1)
   719  	if v <= 1 {
   720  		panic(fmt.Sprintf("bitalostable: inconsistent reference count: %d", v))
   721  	}
   722  	c.trace("ref", v)
   723  }
   724  
   725  // Unref releases a reference on the cache.
   726  func (c *Cache) Unref() {
   727  	v := atomic.AddInt64(&c.refs, -1)
   728  	c.trace("unref", v)
   729  	switch {
   730  	case v < 0:
   731  		panic(fmt.Sprintf("bitalostable: inconsistent reference count: %d", v))
   732  	case v == 0:
   733  		for i := range c.shards {
   734  			c.shards[i].Free()
   735  		}
   736  	}
   737  }
   738  
   739  // Get retrieves the cache value for the specified file and offset, returning
   740  // nil if no value is present.
   741  func (c *Cache) Get(id uint64, fileNum base.FileNum, offset uint64) Handle {
   742  	return c.getShard(id, fileNum, offset).Get(id, fileNum, offset)
   743  }
   744  
   745  // Set sets the cache value for the specified file and offset, overwriting an
   746  // existing value if present. A Handle is returned which provides faster
   747  // retrieval of the cached value than Get (lock-free and avoidance of the map
   748  // lookup). The value must have been allocated by Cache.Alloc.
   749  func (c *Cache) Set(id uint64, fileNum base.FileNum, offset uint64, value *Value) Handle {
   750  	return c.getShard(id, fileNum, offset).Set(id, fileNum, offset, value)
   751  }
   752  
   753  // Delete deletes the cached value for the specified file and offset.
   754  func (c *Cache) Delete(id uint64, fileNum base.FileNum, offset uint64) {
   755  	c.getShard(id, fileNum, offset).Delete(id, fileNum, offset)
   756  }
   757  
   758  // EvictFile evicts all of the cache values for the specified file.
   759  func (c *Cache) EvictFile(id uint64, fileNum base.FileNum) {
   760  	if id == 0 {
   761  		panic("bitalostable: 0 cache ID is invalid")
   762  	}
   763  	for i := range c.shards {
   764  		c.shards[i].EvictFile(id, fileNum)
   765  	}
   766  }
   767  
   768  // MaxSize returns the max size of the cache.
   769  func (c *Cache) MaxSize() int64 {
   770  	return c.maxSize
   771  }
   772  
   773  // Size returns the current space used by the cache.
   774  func (c *Cache) Size() int64 {
   775  	var size int64
   776  	for i := range c.shards {
   777  		size += c.shards[i].Size()
   778  	}
   779  	return size
   780  }
   781  
   782  // Alloc allocates a byte slice of the specified size, possibly reusing
   783  // previously allocated but unused memory. The memory backing the value is
   784  // manually managed. The caller MUST either add the value to the cache (via
   785  // Cache.Set), or release the value (via Cache.Free). Failure to do so will
   786  // result in a memory leak.
   787  func (c *Cache) Alloc(n int) *Value {
   788  	return newValue(n)
   789  }
   790  
   791  // Free frees the specified value. The buffer associated with the value will
   792  // possibly be reused, making it invalid to use the buffer after calling
   793  // Free. Do not call Free on a value that has been added to the cache.
   794  func (c *Cache) Free(v *Value) {
   795  	if n := v.refs(); n > 1 {
   796  		panic(fmt.Sprintf("bitalostable: Value has been added to the cache: refs=%d", n))
   797  	}
   798  	v.release()
   799  }
   800  
   801  // Reserve N bytes in the cache. This effectively shrinks the size of the cache
   802  // by N bytes, without actually consuming any memory. The returned closure
   803  // should be invoked to release the reservation.
   804  func (c *Cache) Reserve(n int) func() {
   805  	// Round-up the per-shard reservation. Most reservations should be large, so
   806  	// this probably doesn't matter in practice.
   807  	shardN := (n + len(c.shards) - 1) / len(c.shards)
   808  	for i := range c.shards {
   809  		c.shards[i].Reserve(shardN)
   810  	}
   811  	return func() {
   812  		if shardN == -1 {
   813  			panic("bitalostable: cache reservation already released")
   814  		}
   815  		for i := range c.shards {
   816  			c.shards[i].Reserve(-shardN)
   817  		}
   818  		shardN = -1
   819  	}
   820  }
   821  
   822  // Metrics returns the metrics for the cache.
   823  func (c *Cache) Metrics() Metrics {
   824  	var m Metrics
   825  	for i := range c.shards {
   826  		s := &c.shards[i]
   827  		s.mu.RLock()
   828  		m.Count += int64(s.blocks.Count())
   829  		m.Size += s.sizeHot + s.sizeCold
   830  		s.mu.RUnlock()
   831  		m.Hits += atomic.LoadInt64(&s.hits)
   832  		m.Misses += atomic.LoadInt64(&s.misses)
   833  	}
   834  	return m
   835  }
   836  
   837  // NewID returns a new ID to be used as a namespace for cached file
   838  // blocks.
   839  func (c *Cache) NewID() uint64 {
   840  	return atomic.AddUint64(&c.idAlloc, 1)
   841  }