github.com/dgraph-io/ristretto@v0.1.2-0.20240116140435-c67e07994f91/cache.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Ristretto is a fast, fixed size, in-memory cache with a dual focus on 18 // throughput and hit ratio performance. You can easily add Ristretto to an 19 // existing system and keep the most valuable data where you need it. 20 package ristretto 21 22 import ( 23 "bytes" 24 "errors" 25 "fmt" 26 "sync" 27 "sync/atomic" 28 "time" 29 "unsafe" 30 31 "github.com/dgraph-io/ristretto/z" 32 ) 33 34 var ( 35 // TODO: find the optimal value for this or make it configurable 36 setBufSize = 32 * 1024 37 ) 38 39 const itemSize = int64(unsafe.Sizeof(storeItem[any]{})) 40 41 func zeroValue[T any]() T { 42 var zero T 43 return zero 44 } 45 46 type Key = z.Key 47 48 // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission 49 // policy and a Sampled LFU eviction policy. You can use the same Cache instance 50 // from as many goroutines as you want. 51 type Cache[K Key, V any] struct { 52 // storedItems is the central concurrent hashmap where key-value items are stored. 53 storedItems store[V] 54 // cachePolicy determines what gets let in to the cache and what gets kicked out. 55 cachePolicy policy[V] 56 // getBuf is a custom ring buffer implementation that gets pushed to when 57 // keys are read. 58 getBuf *ringBuffer 59 // setBuf is a buffer allowing us to batch/drop Sets during times of high 60 // contention. 61 setBuf chan *Item[V] 62 // onEvict is called for item evictions. 63 onEvict func(*Item[V]) 64 // onReject is called when an item is rejected via admission policy. 65 onReject func(*Item[V]) 66 // onExit is called whenever a value goes out of scope from the cache. 67 onExit (func(V)) 68 // KeyToHash function is used to customize the key hashing algorithm. 69 // Each key will be hashed using the provided function. If keyToHash value 70 // is not set, the default keyToHash function is used. 71 keyToHash func(K) (uint64, uint64) 72 // stop is used to stop the processItems goroutine. 73 stop chan struct{} 74 // indicates whether cache is closed. 75 isClosed bool 76 // cost calculates cost from a value. 77 cost func(value V) int64 78 // ignoreInternalCost dictates whether to ignore the cost of internally storing 79 // the item in the cost calculation. 80 ignoreInternalCost bool 81 // cleanupTicker is used to periodically check for entries whose TTL has passed. 82 cleanupTicker *time.Ticker 83 // Metrics contains a running log of important statistics like hits, misses, 84 // and dropped items. 85 Metrics *Metrics 86 } 87 88 // Config is passed to NewCache for creating new Cache instances. 89 type Config[K Key, V any] struct { 90 // NumCounters determines the number of counters (keys) to keep that hold 91 // access frequency information. It's generally a good idea to have more 92 // counters than the max cache capacity, as this will improve eviction 93 // accuracy and subsequent hit ratios. 94 // 95 // For example, if you expect your cache to hold 1,000,000 items when full, 96 // NumCounters should be 10,000,000 (10x). Each counter takes up roughly 97 // 3 bytes (4 bits for each counter * 4 copies plus about a byte per 98 // counter for the bloom filter). Note that the number of counters is 99 // internally rounded up to the nearest power of 2, so the space usage 100 // may be a little larger than 3 bytes * NumCounters. 101 NumCounters int64 102 // MaxCost can be considered as the cache capacity, in whatever units you 103 // choose to use. 104 // 105 // For example, if you want the cache to have a max capacity of 100MB, you 106 // would set MaxCost to 100,000,000 and pass an item's number of bytes as 107 // the `cost` parameter for calls to Set. If new items are accepted, the 108 // eviction process will take care of making room for the new item and not 109 // overflowing the MaxCost value. 110 MaxCost int64 111 // BufferItems determines the size of Get buffers. 112 // 113 // Unless you have a rare use case, using `64` as the BufferItems value 114 // results in good performance. 115 BufferItems int64 116 // Metrics determines whether cache statistics are kept during the cache's 117 // lifetime. There *is* some overhead to keeping statistics, so you should 118 // only set this flag to true when testing or throughput performance isn't a 119 // major factor. 120 Metrics bool 121 // OnEvict is called for every eviction and passes the hashed key, value, 122 // and cost to the function. 123 OnEvict func(item *Item[V]) 124 // OnReject is called for every rejection done via the policy. 125 OnReject func(item *Item[V]) 126 // OnExit is called whenever a value is removed from cache. This can be 127 // used to do manual memory deallocation. Would also be called on eviction 128 // and rejection of the value. 129 OnExit func(val V) 130 // KeyToHash function is used to customize the key hashing algorithm. 131 // Each key will be hashed using the provided function. If keyToHash value 132 // is not set, the default keyToHash function is used. 133 KeyToHash func(key K) (uint64, uint64) 134 // Cost evaluates a value and outputs a corresponding cost. This function 135 // is ran after Set is called for a new item or an item update with a cost 136 // param of 0. 137 Cost func(value V) int64 138 // IgnoreInternalCost set to true indicates to the cache that the cost of 139 // internally storing the value should be ignored. This is useful when the 140 // cost passed to set is not using bytes as units. Keep in mind that setting 141 // this to true will increase the memory usage. 142 IgnoreInternalCost bool 143 // TtlTickerDurationInSec set the value of time ticker for cleanup keys on ttl 144 TtlTickerDurationInSec int64 145 } 146 147 type itemFlag byte 148 149 const ( 150 itemNew itemFlag = iota 151 itemDelete 152 itemUpdate 153 ) 154 155 // Item is passed to setBuf so items can eventually be added to the cache. 156 type Item[V any] struct { 157 flag itemFlag 158 Key uint64 159 Conflict uint64 160 Value V 161 Cost int64 162 Expiration time.Time 163 wg *sync.WaitGroup 164 } 165 166 // NewCache returns a new Cache instance and any configuration errors, if any. 167 func NewCache[K Key, V any](config *Config[K, V]) (*Cache[K, V], error) { 168 switch { 169 case config.NumCounters == 0: 170 return nil, errors.New("NumCounters can't be zero") 171 case config.MaxCost == 0: 172 return nil, errors.New("MaxCost can't be zero") 173 case config.BufferItems == 0: 174 return nil, errors.New("BufferItems can't be zero") 175 case config.TtlTickerDurationInSec == 0: 176 config.TtlTickerDurationInSec = bucketDurationSecs 177 } 178 policy := newPolicy[V](config.NumCounters, config.MaxCost) 179 cache := &Cache[K, V]{ 180 storedItems: newStore[V](), 181 cachePolicy: policy, 182 getBuf: newRingBuffer(policy, config.BufferItems), 183 setBuf: make(chan *Item[V], setBufSize), 184 keyToHash: config.KeyToHash, 185 stop: make(chan struct{}), 186 cost: config.Cost, 187 ignoreInternalCost: config.IgnoreInternalCost, 188 cleanupTicker: time.NewTicker(time.Duration(config.TtlTickerDurationInSec) * time.Second / 2), 189 } 190 cache.onExit = func(val V) { 191 if config.OnExit != nil { 192 config.OnExit(val) 193 } 194 } 195 cache.onEvict = func(item *Item[V]) { 196 if config.OnEvict != nil { 197 config.OnEvict(item) 198 } 199 cache.onExit(item.Value) 200 } 201 cache.onReject = func(item *Item[V]) { 202 if config.OnReject != nil { 203 config.OnReject(item) 204 } 205 cache.onExit(item.Value) 206 } 207 if cache.keyToHash == nil { 208 cache.keyToHash = z.KeyToHash[K] 209 } 210 211 if config.Metrics { 212 cache.collectMetrics() 213 } 214 // NOTE: benchmarks seem to show that performance decreases the more 215 // goroutines we have running cache.processItems(), so 1 should 216 // usually be sufficient 217 go cache.processItems() 218 return cache, nil 219 } 220 221 // Wait blocks until all buffered writes have been applied. This ensures a call to Set() 222 // will be visible to future calls to Get(). 223 func (c *Cache[K, V]) Wait() { 224 if c == nil || c.isClosed { 225 return 226 } 227 wg := &sync.WaitGroup{} 228 wg.Add(1) 229 c.setBuf <- &Item[V]{wg: wg} 230 wg.Wait() 231 } 232 233 // Get returns the value (if any) and a boolean representing whether the 234 // value was found or not. The value can be nil and the boolean can be true at 235 // the same time. Get will not return expired items. 236 func (c *Cache[K, V]) Get(key K) (V, bool) { 237 if c == nil || c.isClosed { 238 return zeroValue[V](), false 239 } 240 keyHash, conflictHash := c.keyToHash(key) 241 242 c.getBuf.Push(keyHash) 243 value, ok := c.storedItems.Get(keyHash, conflictHash) 244 if ok { 245 c.Metrics.add(hit, keyHash, 1) 246 } else { 247 c.Metrics.add(miss, keyHash, 1) 248 } 249 return value, ok 250 } 251 252 // Set attempts to add the key-value item to the cache. If it returns false, 253 // then the Set was dropped and the key-value item isn't added to the cache. If 254 // it returns true, there's still a chance it could be dropped by the policy if 255 // its determined that the key-value item isn't worth keeping, but otherwise the 256 // item will be added and other items will be evicted in order to make room. 257 // 258 // To dynamically evaluate the items cost using the Config.Coster function, set 259 // the cost parameter to 0 and Coster will be ran when needed in order to find 260 // the items true cost. 261 func (c *Cache[K, V]) Set(key K, value V, cost int64) bool { 262 return c.SetWithTTL(key, value, cost, 0*time.Second) 263 } 264 265 // SetWithTTL works like Set but adds a key-value pair to the cache that will expire 266 // after the specified TTL (time to live) has passed. A zero value means the value never 267 // expires, which is identical to calling Set. A negative value is a no-op and the value 268 // is discarded. 269 func (c *Cache[K, V]) SetWithTTL(key K, value V, cost int64, ttl time.Duration) bool { 270 if c == nil || c.isClosed { 271 return false 272 } 273 274 var expiration time.Time 275 switch { 276 case ttl == 0: 277 // No expiration. 278 break 279 case ttl < 0: 280 // Treat this a no-op. 281 return false 282 default: 283 expiration = time.Now().Add(ttl) 284 } 285 286 keyHash, conflictHash := c.keyToHash(key) 287 i := &Item[V]{ 288 flag: itemNew, 289 Key: keyHash, 290 Conflict: conflictHash, 291 Value: value, 292 Cost: cost, 293 Expiration: expiration, 294 } 295 // cost is eventually updated. The expiration must also be immediately updated 296 // to prevent items from being prematurely removed from the map. 297 if prev, ok := c.storedItems.Update(i); ok { 298 c.onExit(prev) 299 i.flag = itemUpdate 300 } 301 // Attempt to send item to cachePolicy. 302 select { 303 case c.setBuf <- i: 304 return true 305 default: 306 if i.flag == itemUpdate { 307 // Return true if this was an update operation since we've already 308 // updated the storedItems. For all the other operations (set/delete), we 309 // return false which means the item was not inserted. 310 return true 311 } 312 c.Metrics.add(dropSets, keyHash, 1) 313 return false 314 } 315 } 316 317 // Del deletes the key-value item from the cache if it exists. 318 func (c *Cache[K, V]) Del(key K) { 319 if c == nil || c.isClosed { 320 return 321 } 322 keyHash, conflictHash := c.keyToHash(key) 323 // Delete immediately. 324 _, prev := c.storedItems.Del(keyHash, conflictHash) 325 c.onExit(prev) 326 // If we've set an item, it would be applied slightly later. 327 // So we must push the same item to `setBuf` with the deletion flag. 328 // This ensures that if a set is followed by a delete, it will be 329 // applied in the correct order. 330 c.setBuf <- &Item[V]{ 331 flag: itemDelete, 332 Key: keyHash, 333 Conflict: conflictHash, 334 } 335 } 336 337 // GetTTL returns the TTL for the specified key and a bool that is true if the 338 // item was found and is not expired. 339 func (c *Cache[K, V]) GetTTL(key K) (time.Duration, bool) { 340 if c == nil { 341 return 0, false 342 } 343 344 keyHash, conflictHash := c.keyToHash(key) 345 if _, ok := c.storedItems.Get(keyHash, conflictHash); !ok { 346 // not found 347 return 0, false 348 } 349 350 expiration := c.storedItems.Expiration(keyHash) 351 if expiration.IsZero() { 352 // found but no expiration 353 return 0, true 354 } 355 356 if time.Now().After(expiration) { 357 // found but expired 358 return 0, false 359 } 360 361 return time.Until(expiration), true 362 } 363 364 // Close stops all goroutines and closes all channels. 365 func (c *Cache[K, V]) Close() { 366 if c == nil || c.isClosed { 367 return 368 } 369 c.Clear() 370 371 // Block until processItems goroutine is returned. 372 c.stop <- struct{}{} 373 close(c.stop) 374 close(c.setBuf) 375 c.cachePolicy.Close() 376 c.cleanupTicker.Stop() 377 c.isClosed = true 378 } 379 380 // Clear empties the hashmap and zeroes all cachePolicy counters. Note that this is 381 // not an atomic operation (but that shouldn't be a problem as it's assumed that 382 // Set/Get calls won't be occurring until after this). 383 func (c *Cache[K, V]) Clear() { 384 if c == nil || c.isClosed { 385 return 386 } 387 // Block until processItems goroutine is returned. 388 c.stop <- struct{}{} 389 390 // Clear out the setBuf channel. 391 loop: 392 for { 393 select { 394 case i := <-c.setBuf: 395 if i.wg != nil { 396 i.wg.Done() 397 continue 398 } 399 if i.flag != itemUpdate { 400 // In itemUpdate, the value is already set in the storedItems. So, no need to call 401 // onEvict here. 402 c.onEvict(i) 403 } 404 default: 405 break loop 406 } 407 } 408 409 // Clear value hashmap and cachePolicy data. 410 c.cachePolicy.Clear() 411 c.storedItems.Clear(c.onEvict) 412 // Only reset metrics if they're enabled. 413 if c.Metrics != nil { 414 c.Metrics.Clear() 415 } 416 // Restart processItems goroutine. 417 go c.processItems() 418 } 419 420 // MaxCost returns the max cost of the cache. 421 func (c *Cache[K, V]) MaxCost() int64 { 422 if c == nil { 423 return 0 424 } 425 return c.cachePolicy.MaxCost() 426 } 427 428 // UpdateMaxCost updates the maxCost of an existing cache. 429 func (c *Cache[K, V]) UpdateMaxCost(maxCost int64) { 430 if c == nil { 431 return 432 } 433 c.cachePolicy.UpdateMaxCost(maxCost) 434 } 435 436 // processItems is ran by goroutines processing the Set buffer. 437 func (c *Cache[K, V]) processItems() { 438 startTs := make(map[uint64]time.Time) 439 numToKeep := 100000 // TODO: Make this configurable via options. 440 441 trackAdmission := func(key uint64) { 442 if c.Metrics == nil { 443 return 444 } 445 startTs[key] = time.Now() 446 if len(startTs) > numToKeep { 447 for k := range startTs { 448 if len(startTs) <= numToKeep { 449 break 450 } 451 delete(startTs, k) 452 } 453 } 454 } 455 onEvict := func(i *Item[V]) { 456 if ts, has := startTs[i.Key]; has { 457 c.Metrics.trackEviction(int64(time.Since(ts) / time.Second)) 458 delete(startTs, i.Key) 459 } 460 if c.onEvict != nil { 461 c.onEvict(i) 462 } 463 } 464 465 for { 466 select { 467 case i := <-c.setBuf: 468 if i.wg != nil { 469 i.wg.Done() 470 continue 471 } 472 // Calculate item cost value if new or update. 473 if i.Cost == 0 && c.cost != nil && i.flag != itemDelete { 474 i.Cost = c.cost(i.Value) 475 } 476 if !c.ignoreInternalCost { 477 // Add the cost of internally storing the object. 478 i.Cost += itemSize 479 } 480 481 switch i.flag { 482 case itemNew: 483 victims, added := c.cachePolicy.Add(i.Key, i.Cost) 484 if added { 485 c.storedItems.Set(i) 486 c.Metrics.add(keyAdd, i.Key, 1) 487 trackAdmission(i.Key) 488 } else { 489 c.onReject(i) 490 } 491 for _, victim := range victims { 492 victim.Conflict, victim.Value = c.storedItems.Del(victim.Key, 0) 493 onEvict(victim) 494 } 495 496 case itemUpdate: 497 c.cachePolicy.Update(i.Key, i.Cost) 498 499 case itemDelete: 500 c.cachePolicy.Del(i.Key) // Deals with metrics updates. 501 _, val := c.storedItems.Del(i.Key, i.Conflict) 502 c.onExit(val) 503 } 504 case <-c.cleanupTicker.C: 505 c.storedItems.Cleanup(c.cachePolicy, onEvict) 506 case <-c.stop: 507 return 508 } 509 } 510 } 511 512 // collectMetrics just creates a new *Metrics instance and adds the pointers 513 // to the cache and policy instances. 514 func (c *Cache[K, V]) collectMetrics() { 515 c.Metrics = newMetrics() 516 c.cachePolicy.CollectMetrics(c.Metrics) 517 } 518 519 type metricType int 520 521 const ( 522 // The following 2 keep track of hits and misses. 523 hit = iota 524 miss 525 // The following 3 keep track of number of keys added, updated and evicted. 526 keyAdd 527 keyUpdate 528 keyEvict 529 // The following 2 keep track of cost of keys added and evicted. 530 costAdd 531 costEvict 532 // The following keep track of how many sets were dropped or rejected later. 533 dropSets 534 rejectSets 535 // The following 2 keep track of how many gets were kept and dropped on the 536 // floor. 537 dropGets 538 keepGets 539 // This should be the final enum. Other enums should be set before this. 540 doNotUse 541 ) 542 543 func stringFor(t metricType) string { 544 switch t { 545 case hit: 546 return "hit" 547 case miss: 548 return "miss" 549 case keyAdd: 550 return "keys-added" 551 case keyUpdate: 552 return "keys-updated" 553 case keyEvict: 554 return "keys-evicted" 555 case costAdd: 556 return "cost-added" 557 case costEvict: 558 return "cost-evicted" 559 case dropSets: 560 return "sets-dropped" 561 case rejectSets: 562 return "sets-rejected" // by policy. 563 case dropGets: 564 return "gets-dropped" 565 case keepGets: 566 return "gets-kept" 567 default: 568 return "unidentified" 569 } 570 } 571 572 // Metrics is a snapshot of performance statistics for the lifetime of a cache instance. 573 type Metrics struct { 574 all [doNotUse][]*uint64 575 576 mu sync.RWMutex 577 life *z.HistogramData // Tracks the life expectancy of a key. 578 } 579 580 func newMetrics() *Metrics { 581 s := &Metrics{ 582 life: z.NewHistogramData(z.HistogramBounds(1, 16)), 583 } 584 for i := 0; i < doNotUse; i++ { 585 s.all[i] = make([]*uint64, 256) 586 slice := s.all[i] 587 for j := range slice { 588 slice[j] = new(uint64) 589 } 590 } 591 return s 592 } 593 594 func (p *Metrics) add(t metricType, hash, delta uint64) { 595 if p == nil { 596 return 597 } 598 valp := p.all[t] 599 // Avoid false sharing by padding at least 64 bytes of space between two 600 // atomic counters which would be incremented. 601 idx := (hash % 25) * 10 602 atomic.AddUint64(valp[idx], delta) 603 } 604 605 func (p *Metrics) get(t metricType) uint64 { 606 if p == nil { 607 return 0 608 } 609 valp := p.all[t] 610 var total uint64 611 for i := range valp { 612 total += atomic.LoadUint64(valp[i]) 613 } 614 return total 615 } 616 617 // Hits is the number of Get calls where a value was found for the corresponding key. 618 func (p *Metrics) Hits() uint64 { 619 return p.get(hit) 620 } 621 622 // Misses is the number of Get calls where a value was not found for the corresponding key. 623 func (p *Metrics) Misses() uint64 { 624 return p.get(miss) 625 } 626 627 // KeysAdded is the total number of Set calls where a new key-value item was added. 628 func (p *Metrics) KeysAdded() uint64 { 629 return p.get(keyAdd) 630 } 631 632 // KeysUpdated is the total number of Set calls where the value was updated. 633 func (p *Metrics) KeysUpdated() uint64 { 634 return p.get(keyUpdate) 635 } 636 637 // KeysEvicted is the total number of keys evicted. 638 func (p *Metrics) KeysEvicted() uint64 { 639 return p.get(keyEvict) 640 } 641 642 // CostAdded is the sum of costs that have been added (successful Set calls). 643 func (p *Metrics) CostAdded() uint64 { 644 return p.get(costAdd) 645 } 646 647 // CostEvicted is the sum of all costs that have been evicted. 648 func (p *Metrics) CostEvicted() uint64 { 649 return p.get(costEvict) 650 } 651 652 // SetsDropped is the number of Set calls that don't make it into internal 653 // buffers (due to contention or some other reason). 654 func (p *Metrics) SetsDropped() uint64 { 655 return p.get(dropSets) 656 } 657 658 // SetsRejected is the number of Set calls rejected by the policy (TinyLFU). 659 func (p *Metrics) SetsRejected() uint64 { 660 return p.get(rejectSets) 661 } 662 663 // GetsDropped is the number of Get counter increments that are dropped 664 // internally. 665 func (p *Metrics) GetsDropped() uint64 { 666 return p.get(dropGets) 667 } 668 669 // GetsKept is the number of Get counter increments that are kept. 670 func (p *Metrics) GetsKept() uint64 { 671 return p.get(keepGets) 672 } 673 674 // Ratio is the number of Hits over all accesses (Hits + Misses). This is the 675 // percentage of successful Get calls. 676 func (p *Metrics) Ratio() float64 { 677 if p == nil { 678 return 0.0 679 } 680 hits, misses := p.get(hit), p.get(miss) 681 if hits == 0 && misses == 0 { 682 return 0.0 683 } 684 return float64(hits) / float64(hits+misses) 685 } 686 687 func (p *Metrics) trackEviction(numSeconds int64) { 688 if p == nil { 689 return 690 } 691 p.mu.Lock() 692 defer p.mu.Unlock() 693 p.life.Update(numSeconds) 694 } 695 696 func (p *Metrics) LifeExpectancySeconds() *z.HistogramData { 697 if p == nil { 698 return nil 699 } 700 p.mu.RLock() 701 defer p.mu.RUnlock() 702 return p.life.Copy() 703 } 704 705 // Clear resets all the metrics. 706 func (p *Metrics) Clear() { 707 if p == nil { 708 return 709 } 710 for i := 0; i < doNotUse; i++ { 711 for j := range p.all[i] { 712 atomic.StoreUint64(p.all[i][j], 0) 713 } 714 } 715 p.mu.Lock() 716 p.life = z.NewHistogramData(z.HistogramBounds(1, 16)) 717 p.mu.Unlock() 718 } 719 720 // String returns a string representation of the metrics. 721 func (p *Metrics) String() string { 722 if p == nil { 723 return "" 724 } 725 var buf bytes.Buffer 726 for i := 0; i < doNotUse; i++ { 727 t := metricType(i) 728 fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t)) 729 } 730 fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss)) 731 fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio()) 732 return buf.String() 733 }