github.com/angenalZZZ/gofunc@v0.0.0-20210507121333-48ff1be3917b/data/cache/fastcache/fastcache.go

github.com/angenalZZZ/gofunc@v0.0.0-20210507121333-48ff1be3917b/data/cache/fastcache/fastcache.go (about)

     1  // The package has been extracted from https://victoriametrics.com/
     2  package fastcache
     3  
     4  import (
     5  	"fmt"
     6  	"sync"
     7  	"sync/atomic"
     8  
     9  	"github.com/cespare/xxhash/v2"
    10  )
    11  
    12  const bucketsCount = 512
    13  
    14  const chunkSize = 64 * 1024
    15  
    16  const bucketSizeBits = 40
    17  
    18  const genSizeBits = 64 - bucketSizeBits
    19  
    20  const maxGen = 1<<genSizeBits - 1
    21  
    22  const maxBucketSize uint64 = 1 << bucketSizeBits
    23  
    24  // Stats represents cache stats.
    25  //
    26  // Use Cache.UpdateStats for obtaining fresh stats from the cache.
    27  type Stats struct {
    28  	// GetCalls is the number of Get calls.
    29  	GetCalls uint64
    30  
    31  	// SetCalls is the number of Set calls.
    32  	SetCalls uint64
    33  
    34  	// Misses is the number of cache misses.
    35  	Misses uint64
    36  
    37  	// Collisions is the number of cache collisions.
    38  	//
    39  	// Usually the number of collisions must be close to zero.
    40  	// High number of collisions suggest something wrong with cache.
    41  	Collisions uint64
    42  
    43  	// Corruptions is the number of detected corruptions of the cache.
    44  	//
    45  	// Corruptions may occur when corrupted cache is loaded from file.
    46  	Corruptions uint64
    47  
    48  	// EntriesCount is the current number of entries in the cache.
    49  	EntriesCount uint64
    50  
    51  	// BytesSize is the current size of the cache in bytes.
    52  	BytesSize uint64
    53  
    54  	// BigStats contains stats for GetBig/SetBig methods.
    55  	BigStats
    56  }
    57  
    58  // Reset resets s, so it may be re-used again in Cache.UpdateStats.
    59  func (s *Stats) Reset() {
    60  	*s = Stats{}
    61  }
    62  
    63  // BigStats contains stats for GetBig/SetBig methods.
    64  type BigStats struct {
    65  	// GetBigCalls is the number of GetBig calls.
    66  	GetBigCalls uint64
    67  
    68  	// SetBigCalls is the number of SetBig calls.
    69  	SetBigCalls uint64
    70  
    71  	// TooBigKeyErrors is the number of calls to SetBig with too big key.
    72  	TooBigKeyErrors uint64
    73  
    74  	// InvalidMetavalueErrors is the number of calls to GetBig resulting
    75  	// to invalid metavalue.
    76  	InvalidMetavalueErrors uint64
    77  
    78  	// InvalidValueLenErrors is the number of calls to GetBig resulting
    79  	// to a chunk with invalid length.
    80  	InvalidValueLenErrors uint64
    81  
    82  	// InvalidValueHashErrors is the number of calls to GetBig resulting
    83  	// to a chunk with invalid hash value.
    84  	InvalidValueHashErrors uint64
    85  }
    86  
    87  func (bs *BigStats) reset() {
    88  	atomic.StoreUint64(&bs.GetBigCalls, 0)
    89  	atomic.StoreUint64(&bs.SetBigCalls, 0)
    90  	atomic.StoreUint64(&bs.TooBigKeyErrors, 0)
    91  	atomic.StoreUint64(&bs.InvalidMetavalueErrors, 0)
    92  	atomic.StoreUint64(&bs.InvalidValueLenErrors, 0)
    93  	atomic.StoreUint64(&bs.InvalidValueHashErrors, 0)
    94  }
    95  
    96  // Cache is a fast thread-safe inmemory cache optimized for big number
    97  // of entries.
    98  //
    99  // It has much lower impact on GC comparing to a simple `map[string][]byte`.
   100  //
   101  // Use New or LoadFromFile* for creating new cache instance.
   102  // Concurrent goroutines may call any Cache methods on the same cache instance.
   103  //
   104  // Call Reset when the cache is no longer needed. This reclaims the allocated
   105  // memory.
   106  type Cache struct {
   107  	buckets [bucketsCount]bucket
   108  
   109  	bigStats BigStats
   110  }
   111  
   112  // New returns new cache with the given maxBytes capacity in bytes.
   113  //
   114  // maxBytes must be smaller than the available RAM size for the app,
   115  // since the cache holds data in memory.
   116  //
   117  // If maxBytes is less than 32MB, then the minimum cache capacity is 32MB.
   118  func New(maxBytes int) *Cache {
   119  	if maxBytes <= 0 {
   120  		panic(fmt.Errorf("maxBytes must be greater than 0; got %d", maxBytes))
   121  	}
   122  	var c Cache
   123  	maxBucketBytes := uint64((maxBytes + bucketsCount - 1) / bucketsCount)
   124  	for i := range c.buckets[:] {
   125  		c.buckets[i].Init(maxBucketBytes)
   126  	}
   127  	return &c
   128  }
   129  
   130  // Set stores (k, v) in the cache.
   131  //
   132  // Get must be used for reading the stored entry.
   133  //
   134  // The stored entry may be evicted at any time either due to cache
   135  // overflow or due to unlikely hash collision.
   136  // Pass higher maxBytes value to New if the added items disappear
   137  // frequently.
   138  //
   139  // (k, v) entries with summary size exceeding 64KB aren't stored in the cache.
   140  // SetBig can be used for storing entries exceeding 64KB.
   141  //
   142  // k and v contents may be modified after returning from Set.
   143  func (c *Cache) Set(k, v []byte) {
   144  	h := xxhash.Sum64(k)
   145  	idx := h % bucketsCount
   146  	c.buckets[idx].Set(k, v, h)
   147  }
   148  
   149  // Get appends value by the key k to dst and returns the result.
   150  //
   151  // Get allocates new byte slice for the returned value if dst is nil.
   152  //
   153  // Get returns only values stored in c via Set.
   154  //
   155  // k contents may be modified after returning from Get.
   156  func (c *Cache) Get(dst, k []byte) []byte {
   157  	h := xxhash.Sum64(k)
   158  	idx := h % bucketsCount
   159  	dst, _ = c.buckets[idx].Get(dst, k, h, true)
   160  	return dst
   161  }
   162  
   163  // HasGet works identically to Get, but also returns whether the given key
   164  // exists in the cache. This method makes it possible to differentiate between a
   165  // stored nil/empty value versus and non-existing value.
   166  func (c *Cache) HasGet(dst, k []byte) ([]byte, bool) {
   167  	h := xxhash.Sum64(k)
   168  	idx := h % bucketsCount
   169  	return c.buckets[idx].Get(dst, k, h, true)
   170  }
   171  
   172  // Has returns true if entry for the given key k exists in the cache.
   173  func (c *Cache) Has(k []byte) bool {
   174  	h := xxhash.Sum64(k)
   175  	idx := h % bucketsCount
   176  	_, ok := c.buckets[idx].Get(nil, k, h, false)
   177  	return ok
   178  }
   179  
   180  // Del deletes value for the given k from the cache.
   181  //
   182  // k contents may be modified after returning from Del.
   183  func (c *Cache) Del(k []byte) {
   184  	h := xxhash.Sum64(k)
   185  	idx := h % bucketsCount
   186  	c.buckets[idx].Del(h)
   187  }
   188  
   189  // Reset removes all the items from the cache.
   190  func (c *Cache) Reset() {
   191  	for i := range c.buckets[:] {
   192  		c.buckets[i].Reset()
   193  	}
   194  	c.bigStats.reset()
   195  }
   196  
   197  // UpdateStats adds cache stats to s.
   198  //
   199  // Call s.Reset before calling UpdateStats if s is re-used.
   200  func (c *Cache) UpdateStats(s *Stats) {
   201  	for i := range c.buckets[:] {
   202  		c.buckets[i].UpdateStats(s)
   203  	}
   204  	s.GetBigCalls += atomic.LoadUint64(&c.bigStats.GetBigCalls)
   205  	s.SetBigCalls += atomic.LoadUint64(&c.bigStats.SetBigCalls)
   206  	s.TooBigKeyErrors += atomic.LoadUint64(&c.bigStats.TooBigKeyErrors)
   207  	s.InvalidMetavalueErrors += atomic.LoadUint64(&c.bigStats.InvalidMetavalueErrors)
   208  	s.InvalidValueLenErrors += atomic.LoadUint64(&c.bigStats.InvalidValueLenErrors)
   209  	s.InvalidValueHashErrors += atomic.LoadUint64(&c.bigStats.InvalidValueHashErrors)
   210  }
   211  
   212  type bucket struct {
   213  	mu sync.RWMutex
   214  
   215  	// chunks is a ring buffer with encoded (k, v) pairs.
   216  	// It consists of 64KB chunks.
   217  	chunks [][]byte
   218  
   219  	// m maps hash(k) to idx of (k, v) pair in chunks.
   220  	m map[uint64]uint64
   221  
   222  	// idx points to chunks for writing the next (k, v) pair.
   223  	idx uint64
   224  
   225  	// gen is the generation of chunks.
   226  	gen uint64
   227  
   228  	getCalls    uint64
   229  	setCalls    uint64
   230  	misses      uint64
   231  	collisions  uint64
   232  	corruptions uint64
   233  }
   234  
   235  func (b *bucket) Init(maxBytes uint64) {
   236  	if maxBytes == 0 {
   237  		panic(fmt.Errorf("maxBytes cannot be zero"))
   238  	}
   239  	if maxBytes >= maxBucketSize {
   240  		panic(fmt.Errorf("too big maxBytes=%d; should be smaller than %d", maxBytes, maxBucketSize))
   241  	}
   242  	maxChunks := (maxBytes + chunkSize - 1) / chunkSize
   243  	b.chunks = make([][]byte, maxChunks)
   244  	b.m = make(map[uint64]uint64)
   245  	b.Reset()
   246  }
   247  
   248  func (b *bucket) Reset() {
   249  	b.mu.Lock()
   250  	chunks := b.chunks
   251  	for i := range chunks {
   252  		//putChunk(chunks[i])
   253  		chunks[i] = nil
   254  	}
   255  	bm := b.m
   256  	for k := range bm {
   257  		delete(bm, k)
   258  	}
   259  	b.idx = 0
   260  	b.gen = 1
   261  	atomic.StoreUint64(&b.getCalls, 0)
   262  	atomic.StoreUint64(&b.setCalls, 0)
   263  	atomic.StoreUint64(&b.misses, 0)
   264  	atomic.StoreUint64(&b.collisions, 0)
   265  	atomic.StoreUint64(&b.corruptions, 0)
   266  	b.mu.Unlock()
   267  }
   268  
   269  func (b *bucket) Clean() {
   270  	b.mu.Lock()
   271  	bGen := b.gen & ((1 << genSizeBits) - 1)
   272  	bIdx := b.idx
   273  	bm := b.m
   274  	for k, v := range bm {
   275  		gen := v >> bucketSizeBits
   276  		idx := v & ((1 << bucketSizeBits) - 1)
   277  		if gen == bGen && idx < bIdx || gen+1 == bGen && idx >= bIdx || gen == maxGen && bGen == 1 && idx >= bIdx {
   278  			continue
   279  		}
   280  		delete(bm, k)
   281  	}
   282  	b.mu.Unlock()
   283  }
   284  
   285  func (b *bucket) UpdateStats(s *Stats) {
   286  	s.GetCalls += atomic.LoadUint64(&b.getCalls)
   287  	s.SetCalls += atomic.LoadUint64(&b.setCalls)
   288  	s.Misses += atomic.LoadUint64(&b.misses)
   289  	s.Collisions += atomic.LoadUint64(&b.collisions)
   290  	s.Corruptions += atomic.LoadUint64(&b.corruptions)
   291  
   292  	b.mu.RLock()
   293  	s.EntriesCount += uint64(len(b.m))
   294  	for _, chunk := range b.chunks {
   295  		s.BytesSize += uint64(cap(chunk))
   296  	}
   297  	b.mu.RUnlock()
   298  }
   299  
   300  func (b *bucket) Set(k, v []byte, h uint64) {
   301  	setCalls := atomic.AddUint64(&b.setCalls, 1)
   302  	if setCalls%16384 == 0 {
   303  		//if setCalls%(1<<14) == 0 {
   304  		b.Clean()
   305  	}
   306  	// key string is changed, larger than 256 bits are not accepted.
   307  	if len(k) >= 256 || len(v) >= 65536 {
   308  		//if len(k) >= (1<<16) || len(v) >= (1<<16) {
   309  		// Too big key or value - its length cannot be encoded
   310  		// with 2 bytes (see below). Skip the entry.
   311  		return
   312  	}
   313  	var kvLenBuf [4]byte
   314  	kvLenBuf[0] = byte(uint16(len(k)) >> 8)
   315  	kvLenBuf[1] = byte(len(k))
   316  	kvLenBuf[2] = byte(uint16(len(v)) >> 8)
   317  	kvLenBuf[3] = byte(len(v))
   318  	kvLen := uint64(len(kvLenBuf) + len(k) + len(v))
   319  	if kvLen >= chunkSize {
   320  		// Do not store too big keys and values, since they do not
   321  		// fit a chunk.
   322  		return
   323  	}
   324  
   325  	b.mu.Lock()
   326  	idx := b.idx
   327  	idxNew := idx + kvLen
   328  	chunkIdx := idx / chunkSize
   329  	chunkIdxNew := idxNew / chunkSize
   330  	if chunkIdxNew > chunkIdx {
   331  		if chunkIdxNew >= uint64(len(b.chunks)) {
   332  			idx = 0
   333  			idxNew = kvLen
   334  			chunkIdx = 0
   335  			b.gen++
   336  			if b.gen&((1<<genSizeBits)-1) == 0 {
   337  				b.gen++
   338  			}
   339  		} else {
   340  			idx = chunkIdxNew * chunkSize
   341  			idxNew = idx + kvLen
   342  			chunkIdx = chunkIdxNew
   343  		}
   344  		b.chunks[chunkIdx] = b.chunks[chunkIdx][:0]
   345  	}
   346  	chunk := b.chunks[chunkIdx]
   347  	if chunk == nil {
   348  		chunk = make([]byte, chunkSize)
   349  		//chunk = getChunk()
   350  		chunk = chunk[:0]
   351  	}
   352  	chunk = append(chunk, kvLenBuf[:]...)
   353  	chunk = append(chunk, k...)
   354  	chunk = append(chunk, v...)
   355  	b.chunks[chunkIdx] = chunk
   356  	b.m[h] = idx | (b.gen << bucketSizeBits)
   357  	b.idx = idxNew
   358  	b.mu.Unlock()
   359  }
   360  
   361  func (b *bucket) Get(dst, k []byte, h uint64, returnDst bool) ([]byte, bool) {
   362  	atomic.AddUint64(&b.getCalls, 1)
   363  	found := false
   364  	b.mu.RLock()
   365  	v := b.m[h]
   366  	bGen := b.gen & ((1 << genSizeBits) - 1)
   367  	if v > 0 {
   368  		gen := v >> bucketSizeBits
   369  		idx := v & ((1 << bucketSizeBits) - 1)
   370  		if gen == bGen && idx < b.idx || gen+1 == bGen && idx >= b.idx || gen == maxGen && bGen == 1 && idx >= b.idx {
   371  			chunkIdx := idx / chunkSize
   372  			if chunkIdx >= uint64(len(b.chunks)) {
   373  				// Corrupted data during the load from file. Just skip it.
   374  				atomic.AddUint64(&b.corruptions, 1)
   375  				goto end
   376  			}
   377  			chunk := b.chunks[chunkIdx]
   378  			idx %= chunkSize
   379  			if idx+4 >= chunkSize {
   380  				// Corrupted data during the load from file. Just skip it.
   381  				atomic.AddUint64(&b.corruptions, 1)
   382  				goto end
   383  			}
   384  			kvLenBuf := chunk[idx : idx+4]
   385  			keyLen := (uint64(kvLenBuf[0]) << 8) | uint64(kvLenBuf[1])
   386  			valLen := (uint64(kvLenBuf[2]) << 8) | uint64(kvLenBuf[3])
   387  			idx += 4
   388  			if idx+keyLen+valLen >= chunkSize {
   389  				// Corrupted data during the load from file. Just skip it.
   390  				atomic.AddUint64(&b.corruptions, 1)
   391  				goto end
   392  			}
   393  			if string(k) == string(chunk[idx:idx+keyLen]) {
   394  				idx += keyLen
   395  				if returnDst {
   396  					dst = append(dst, chunk[idx:idx+valLen]...)
   397  				}
   398  				found = true
   399  			} else {
   400  				atomic.AddUint64(&b.collisions, 1)
   401  			}
   402  		}
   403  	}
   404  end:
   405  	b.mu.RUnlock()
   406  	if !found {
   407  		atomic.AddUint64(&b.misses, 1)
   408  	}
   409  	return dst, found
   410  }
   411  
   412  func (b *bucket) Del(h uint64) {
   413  	b.mu.Lock()
   414  	delete(b.m, h)
   415  	b.mu.Unlock()
   416  }