github.com/outcaste-io/ristretto@v0.2.3/sketch.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // This package includes multiple probabalistic data structures needed for 18 // admission/eviction metadata. Most are Counting Bloom Filter variations, but 19 // a caching-specific feature that is also required is a "freshness" mechanism, 20 // which basically serves as a "lifetime" process. This freshness mechanism 21 // was described in the original TinyLFU paper [1], but other mechanisms may 22 // be better suited for certain data distributions. 23 // 24 // [1]: https://arxiv.org/abs/1512.00727 25 package ristretto 26 27 import ( 28 "fmt" 29 "math/rand" 30 "time" 31 ) 32 33 // cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily 34 // based on Damian Gryski's CM4 [1]. 35 // 36 // [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go 37 type cmSketch struct { 38 rows [cmDepth]cmRow 39 seed [cmDepth]uint64 40 mask uint64 41 } 42 43 const ( 44 // cmDepth is the number of counter copies to store (think of it as rows). 45 cmDepth = 4 46 ) 47 48 func newCmSketch(numCounters int64) *cmSketch { 49 if numCounters == 0 { 50 panic("cmSketch: bad numCounters") 51 } 52 // Get the next power of 2 for better cache performance. 53 numCounters = next2Power(numCounters) 54 sketch := &cmSketch{mask: uint64(numCounters - 1)} 55 // Initialize rows of counters and seeds. 56 source := rand.New(rand.NewSource(time.Now().UnixNano())) 57 for i := 0; i < cmDepth; i++ { 58 sketch.seed[i] = source.Uint64() 59 sketch.rows[i] = newCmRow(numCounters) 60 } 61 return sketch 62 } 63 64 // Increment increments the count(ers) for the specified key. 65 func (s *cmSketch) Increment(hashed uint64) { 66 for i := range s.rows { 67 s.rows[i].increment((hashed ^ s.seed[i]) & s.mask) 68 } 69 } 70 71 // Estimate returns the value of the specified key. 72 func (s *cmSketch) Estimate(hashed uint64) int64 { 73 min := byte(255) 74 for i := range s.rows { 75 val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask) 76 if val < min { 77 min = val 78 } 79 } 80 return int64(min) 81 } 82 83 // Reset halves all counter values. 84 func (s *cmSketch) Reset() { 85 for _, r := range s.rows { 86 r.reset() 87 } 88 } 89 90 // Clear zeroes all counters. 91 func (s *cmSketch) Clear() { 92 for _, r := range s.rows { 93 r.clear() 94 } 95 } 96 97 // cmRow is a row of bytes, with each byte holding two counters. 98 type cmRow []byte 99 100 func newCmRow(numCounters int64) cmRow { 101 return make(cmRow, numCounters/2) 102 } 103 104 func (r cmRow) get(n uint64) byte { 105 return byte(r[n/2]>>((n&1)*4)) & 0x0f 106 } 107 108 func (r cmRow) increment(n uint64) { 109 // Index of the counter. 110 i := n / 2 111 // Shift distance (even 0, odd 4). 112 s := (n & 1) * 4 113 // Counter value. 114 v := (r[i] >> s) & 0x0f 115 // Only increment if not max value (overflow wrap is bad for LFU). 116 if v < 15 { 117 r[i] += 1 << s 118 } 119 } 120 121 func (r cmRow) reset() { 122 // Halve each counter. 123 for i := range r { 124 r[i] = (r[i] >> 1) & 0x77 125 } 126 } 127 128 func (r cmRow) clear() { 129 // Zero each counter. 130 for i := range r { 131 r[i] = 0 132 } 133 } 134 135 func (r cmRow) string() string { 136 s := "" 137 for i := uint64(0); i < uint64(len(r)*2); i++ { 138 s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f) 139 } 140 s = s[:len(s)-1] 141 return s 142 } 143 144 // next2Power rounds x up to the next power of 2, if it's not already one. 145 func next2Power(x int64) int64 { 146 x-- 147 x |= x >> 1 148 x |= x >> 2 149 x |= x >> 4 150 x |= x >> 8 151 x |= x >> 16 152 x |= x >> 32 153 x++ 154 return x 155 }