github.com/dgraph-io/ristretto@v0.1.2-0.20240116140435-c67e07994f91/policy.go (about) 1 /* 2 * Copyright 2020 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package ristretto 18 19 import ( 20 "math" 21 "sync" 22 "sync/atomic" 23 24 "github.com/dgraph-io/ristretto/z" 25 ) 26 27 const ( 28 // lfuSample is the number of items to sample when looking at eviction 29 // candidates. 5 seems to be the most optimal number [citation needed]. 30 lfuSample = 5 31 ) 32 33 // policy is the interface encapsulating eviction/admission behavior. 34 // TODO: remove this interface and just rename defaultPolicy to policy, as we 35 // are probably only going to use/implement/maintain one policy. 36 type policy[V any] interface { 37 ringConsumer 38 // Add attempts to Add the key-cost pair to the Policy. It returns a slice 39 // of evicted keys and a bool denoting whether or not the key-cost pair 40 // was added. If it returns true, the key should be stored in cache. 41 Add(uint64, int64) ([]*Item[V], bool) 42 // Has returns true if the key exists in the Policy. 43 Has(uint64) bool 44 // Del deletes the key from the Policy. 45 Del(uint64) 46 // Cap returns the available capacity. 47 Cap() int64 48 // Close stops all goroutines and closes all channels. 49 Close() 50 // Update updates the cost value for the key. 51 Update(uint64, int64) 52 // Cost returns the cost value of a key or -1 if missing. 53 Cost(uint64) int64 54 // Optionally, set stats object to track how policy is performing. 55 CollectMetrics(*Metrics) 56 // Clear zeroes out all counters and clears hashmaps. 57 Clear() 58 // MaxCost returns the current max cost of the cache policy. 59 MaxCost() int64 60 // UpdateMaxCost updates the max cost of the cache policy. 61 UpdateMaxCost(int64) 62 } 63 64 func newPolicy[V any](numCounters, maxCost int64) policy[V] { 65 return newDefaultPolicy[V](numCounters, maxCost) 66 } 67 68 type defaultPolicy[V any] struct { 69 sync.Mutex 70 admit *tinyLFU 71 evict *sampledLFU 72 itemsCh chan []uint64 73 stop chan struct{} 74 isClosed bool 75 metrics *Metrics 76 } 77 78 func newDefaultPolicy[V any](numCounters, maxCost int64) *defaultPolicy[V] { 79 p := &defaultPolicy[V]{ 80 admit: newTinyLFU(numCounters), 81 evict: newSampledLFU(maxCost), 82 itemsCh: make(chan []uint64, 3), 83 stop: make(chan struct{}), 84 } 85 go p.processItems() 86 return p 87 } 88 89 func (p *defaultPolicy[V]) CollectMetrics(metrics *Metrics) { 90 p.metrics = metrics 91 p.evict.metrics = metrics 92 } 93 94 type policyPair struct { 95 key uint64 96 cost int64 97 } 98 99 func (p *defaultPolicy[V]) processItems() { 100 for { 101 select { 102 case items := <-p.itemsCh: 103 p.Lock() 104 p.admit.Push(items) 105 p.Unlock() 106 case <-p.stop: 107 return 108 } 109 } 110 } 111 112 func (p *defaultPolicy[V]) Push(keys []uint64) bool { 113 if p.isClosed { 114 return false 115 } 116 117 if len(keys) == 0 { 118 return true 119 } 120 121 select { 122 case p.itemsCh <- keys: 123 p.metrics.add(keepGets, keys[0], uint64(len(keys))) 124 return true 125 default: 126 p.metrics.add(dropGets, keys[0], uint64(len(keys))) 127 return false 128 } 129 } 130 131 // Add decides whether the item with the given key and cost should be accepted by 132 // the policy. It returns the list of victims that have been evicted and a boolean 133 // indicating whether the incoming item should be accepted. 134 func (p *defaultPolicy[V]) Add(key uint64, cost int64) ([]*Item[V], bool) { 135 p.Lock() 136 defer p.Unlock() 137 138 // Cannot add an item bigger than entire cache. 139 if cost > p.evict.getMaxCost() { 140 return nil, false 141 } 142 143 // No need to go any further if the item is already in the cache. 144 if has := p.evict.updateIfHas(key, cost); has { 145 // An update does not count as an addition, so return false. 146 return nil, false 147 } 148 149 // If the execution reaches this point, the key doesn't exist in the cache. 150 // Calculate the remaining room in the cache (usually bytes). 151 room := p.evict.roomLeft(cost) 152 if room >= 0 { 153 // There's enough room in the cache to store the new item without 154 // overflowing. Do that now and stop here. 155 p.evict.add(key, cost) 156 p.metrics.add(costAdd, key, uint64(cost)) 157 return nil, true 158 } 159 160 // incHits is the hit count for the incoming item. 161 incHits := p.admit.Estimate(key) 162 // sample is the eviction candidate pool to be filled via random sampling. 163 // TODO: perhaps we should use a min heap here. Right now our time 164 // complexity is N for finding the min. Min heap should bring it down to 165 // O(lg N). 166 sample := make([]*policyPair, 0, lfuSample) 167 // As items are evicted they will be appended to victims. 168 victims := make([]*Item[V], 0) 169 170 // Delete victims until there's enough space or a minKey is found that has 171 // more hits than incoming item. 172 for ; room < 0; room = p.evict.roomLeft(cost) { 173 // Fill up empty slots in sample. 174 sample = p.evict.fillSample(sample) 175 176 // Find minimally used item in sample. 177 minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0) 178 for i, pair := range sample { 179 // Look up hit count for sample key. 180 if hits := p.admit.Estimate(pair.key); hits < minHits { 181 minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost 182 } 183 } 184 185 // If the incoming item isn't worth keeping in the policy, reject. 186 if incHits < minHits { 187 p.metrics.add(rejectSets, key, 1) 188 return victims, false 189 } 190 191 // Delete the victim from metadata. 192 p.evict.del(minKey) 193 194 // Delete the victim from sample. 195 sample[minId] = sample[len(sample)-1] 196 sample = sample[:len(sample)-1] 197 // Store victim in evicted victims slice. 198 victims = append(victims, &Item[V]{ 199 Key: minKey, 200 Conflict: 0, 201 Cost: minCost, 202 }) 203 } 204 205 p.evict.add(key, cost) 206 p.metrics.add(costAdd, key, uint64(cost)) 207 return victims, true 208 } 209 210 func (p *defaultPolicy[V]) Has(key uint64) bool { 211 p.Lock() 212 _, exists := p.evict.keyCosts[key] 213 p.Unlock() 214 return exists 215 } 216 217 func (p *defaultPolicy[V]) Del(key uint64) { 218 p.Lock() 219 p.evict.del(key) 220 p.Unlock() 221 } 222 223 func (p *defaultPolicy[V]) Cap() int64 { 224 p.Lock() 225 capacity := p.evict.getMaxCost() - p.evict.used 226 p.Unlock() 227 return capacity 228 } 229 230 func (p *defaultPolicy[V]) Update(key uint64, cost int64) { 231 p.Lock() 232 p.evict.updateIfHas(key, cost) 233 p.Unlock() 234 } 235 236 func (p *defaultPolicy[V]) Cost(key uint64) int64 { 237 p.Lock() 238 if cost, found := p.evict.keyCosts[key]; found { 239 p.Unlock() 240 return cost 241 } 242 p.Unlock() 243 return -1 244 } 245 246 func (p *defaultPolicy[V]) Clear() { 247 p.Lock() 248 p.admit.clear() 249 p.evict.clear() 250 p.Unlock() 251 } 252 253 func (p *defaultPolicy[V]) Close() { 254 if p.isClosed { 255 return 256 } 257 258 // Block until the p.processItems goroutine returns. 259 p.stop <- struct{}{} 260 close(p.stop) 261 close(p.itemsCh) 262 p.isClosed = true 263 } 264 265 func (p *defaultPolicy[V]) MaxCost() int64 { 266 if p == nil || p.evict == nil { 267 return 0 268 } 269 return p.evict.getMaxCost() 270 } 271 272 func (p *defaultPolicy[V]) UpdateMaxCost(maxCost int64) { 273 if p == nil || p.evict == nil { 274 return 275 } 276 p.evict.updateMaxCost(maxCost) 277 } 278 279 // sampledLFU is an eviction helper storing key-cost pairs. 280 type sampledLFU struct { 281 // NOTE: align maxCost to 64-bit boundary for use with atomic. 282 // As per https://golang.org/pkg/sync/atomic/: "On ARM, x86-32, 283 // and 32-bit MIPS, it is the caller’s responsibility to arrange 284 // for 64-bit alignment of 64-bit words accessed atomically. 285 // The first word in a variable or in an allocated struct, array, 286 // or slice can be relied upon to be 64-bit aligned." 287 maxCost int64 288 used int64 289 metrics *Metrics 290 keyCosts map[uint64]int64 291 } 292 293 func newSampledLFU(maxCost int64) *sampledLFU { 294 return &sampledLFU{ 295 keyCosts: make(map[uint64]int64), 296 maxCost: maxCost, 297 } 298 } 299 300 func (p *sampledLFU) getMaxCost() int64 { 301 return atomic.LoadInt64(&p.maxCost) 302 } 303 304 func (p *sampledLFU) updateMaxCost(maxCost int64) { 305 atomic.StoreInt64(&p.maxCost, maxCost) 306 } 307 308 func (p *sampledLFU) roomLeft(cost int64) int64 { 309 return p.getMaxCost() - (p.used + cost) 310 } 311 312 func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair { 313 if len(in) >= lfuSample { 314 return in 315 } 316 for key, cost := range p.keyCosts { 317 in = append(in, &policyPair{key, cost}) 318 if len(in) >= lfuSample { 319 return in 320 } 321 } 322 return in 323 } 324 325 func (p *sampledLFU) del(key uint64) { 326 cost, ok := p.keyCosts[key] 327 if !ok { 328 return 329 } 330 p.used -= cost 331 delete(p.keyCosts, key) 332 p.metrics.add(costEvict, key, uint64(cost)) 333 p.metrics.add(keyEvict, key, 1) 334 } 335 336 func (p *sampledLFU) add(key uint64, cost int64) { 337 p.keyCosts[key] = cost 338 p.used += cost 339 } 340 341 func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool { 342 if prev, found := p.keyCosts[key]; found { 343 // Update the cost of an existing key, but don't worry about evicting. 344 // Evictions will be handled the next time a new item is added. 345 p.metrics.add(keyUpdate, key, 1) 346 if prev > cost { 347 diff := prev - cost 348 p.metrics.add(costAdd, key, ^(uint64(diff) - 1)) 349 } else if cost > prev { 350 diff := cost - prev 351 p.metrics.add(costAdd, key, uint64(diff)) 352 } 353 p.used += cost - prev 354 p.keyCosts[key] = cost 355 return true 356 } 357 return false 358 } 359 360 func (p *sampledLFU) clear() { 361 p.used = 0 362 p.keyCosts = make(map[uint64]int64) 363 } 364 365 // tinyLFU is an admission helper that keeps track of access frequency using 366 // tiny (4-bit) counters in the form of a count-min sketch. 367 // tinyLFU is NOT thread safe. 368 type tinyLFU struct { 369 freq *cmSketch 370 door *z.Bloom 371 incrs int64 372 resetAt int64 373 } 374 375 func newTinyLFU(numCounters int64) *tinyLFU { 376 return &tinyLFU{ 377 freq: newCmSketch(numCounters), 378 door: z.NewBloomFilter(float64(numCounters), 0.01), 379 resetAt: numCounters, 380 } 381 } 382 383 func (p *tinyLFU) Push(keys []uint64) { 384 for _, key := range keys { 385 p.Increment(key) 386 } 387 } 388 389 func (p *tinyLFU) Estimate(key uint64) int64 { 390 hits := p.freq.Estimate(key) 391 if p.door.Has(key) { 392 hits++ 393 } 394 return hits 395 } 396 397 func (p *tinyLFU) Increment(key uint64) { 398 // Flip doorkeeper bit if not already done. 399 if added := p.door.AddIfNotHas(key); !added { 400 // Increment count-min counter if doorkeeper bit is already set. 401 p.freq.Increment(key) 402 } 403 p.incrs++ 404 if p.incrs >= p.resetAt { 405 p.reset() 406 } 407 } 408 409 func (p *tinyLFU) reset() { 410 // Zero out incrs. 411 p.incrs = 0 412 // clears doorkeeper bits 413 p.door.Clear() 414 // halves count-min counters 415 p.freq.Reset() 416 } 417 418 func (p *tinyLFU) clear() { 419 p.incrs = 0 420 p.door.Clear() 421 p.freq.Clear() 422 }