github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/cache/sketch.go (about)

     1  /*
     2   * Copyright 2019 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // This package includes multiple probabalistic data structures needed for
    18  // admission/eviction metadata. Most are Counting Bloom Filter variations, but
    19  // a caching-specific feature that is also required is a "freshness" mechanism,
    20  // which basically serves as a "lifetime" process. This freshness mechanism
    21  // was described in the original TinyLFU paper [1], but other mechanisms may
    22  // be better suited for certain data distributions.
    23  //
    24  // [1]: https://arxiv.org/abs/1512.00727
    25  package cache
    26  
    27  import (
    28  	"fmt"
    29  	"math/rand"
    30  	"time"
    31  )
    32  
    33  // cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily
    34  // based on Damian Gryski's CM4 [1].
    35  //
    36  // [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go
    37  type cmSketch struct {
    38  	rows [cmDepth]cmRow
    39  	seed [cmDepth]uint64
    40  	mask uint64
    41  }
    42  
    43  const (
    44  	// cmDepth is the number of counter copies to store (think of it as rows)
    45  	cmDepth = 4
    46  )
    47  
    48  func newCmSketch(numCounters int64) *cmSketch {
    49  	if numCounters == 0 {
    50  		panic("cmSketch: bad numCounters")
    51  	}
    52  	// get the next power of 2 for better cache performance
    53  	numCounters = next2Power(numCounters)
    54  	sketch := &cmSketch{mask: uint64(numCounters - 1)}
    55  	// initialize rows of counters and seeds
    56  	source := rand.New(rand.NewSource(time.Now().UnixNano()))
    57  	for i := 0; i < cmDepth; i++ {
    58  		sketch.seed[i] = source.Uint64()
    59  		sketch.rows[i] = newCmRow(numCounters)
    60  	}
    61  	return sketch
    62  }
    63  
    64  // Increment increments the count(ers) for the specified key.
    65  func (s *cmSketch) Increment(hashed uint64) {
    66  	for i := range s.rows {
    67  		s.rows[i].increment((hashed ^ s.seed[i]) & s.mask)
    68  	}
    69  }
    70  
    71  // Estimate returns the value of the specified key.
    72  func (s *cmSketch) Estimate(hashed uint64) int64 {
    73  	min := byte(255)
    74  	for i := range s.rows {
    75  		val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask)
    76  		if val < min {
    77  			min = val
    78  		}
    79  	}
    80  	return int64(min)
    81  }
    82  
    83  // Reset halves all counter values.
    84  func (s *cmSketch) Reset() {
    85  	for _, r := range s.rows {
    86  		r.reset()
    87  	}
    88  }
    89  
    90  // Clear zeroes all counters.
    91  func (s *cmSketch) Clear() {
    92  	for _, r := range s.rows {
    93  		r.clear()
    94  	}
    95  }
    96  
    97  // cmRow is a row of bytes, with each byte holding two counters
    98  type cmRow []byte
    99  
   100  func newCmRow(numCounters int64) cmRow {
   101  	return make(cmRow, numCounters/2)
   102  }
   103  
   104  func (r cmRow) get(n uint64) byte {
   105  	return byte(r[n/2]>>((n&1)*4)) & 0x0f
   106  }
   107  
   108  func (r cmRow) increment(n uint64) {
   109  	// index of the counter
   110  	i := n / 2
   111  	// shift distance (even 0, odd 4)
   112  	s := (n & 1) * 4
   113  	// counter value
   114  	v := (r[i] >> s) & 0x0f
   115  	// only increment if not max value (overflow wrap is bad for LFU)
   116  	if v < 15 {
   117  		r[i] += 1 << s
   118  	}
   119  }
   120  
   121  func (r cmRow) reset() {
   122  	// halve each counter
   123  	for i := range r {
   124  		r[i] = (r[i] >> 1) & 0x77
   125  	}
   126  }
   127  
   128  func (r cmRow) clear() {
   129  	// zero each counter
   130  	for i := range r {
   131  		r[i] = 0
   132  	}
   133  }
   134  
   135  func (r cmRow) string() string {
   136  	s := ""
   137  	for i := uint64(0); i < uint64(len(r)*2); i++ {
   138  		s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f)
   139  	}
   140  	s = s[:len(s)-1]
   141  	return s
   142  }
   143  
   144  // next2Power rounds x up to the next power of 2, if it's not already one.
   145  func next2Power(x int64) int64 {
   146  	x--
   147  	x |= x >> 1
   148  	x |= x >> 2
   149  	x |= x >> 4
   150  	x |= x >> 8
   151  	x |= x >> 16
   152  	x |= x >> 32
   153  	x++
   154  	return x
   155  }