github.com/fiatjaf/generic-ristretto@v0.0.1/cache.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Ristretto is a fast, fixed size, in-memory cache with a dual focus on 18 // throughput and hit ratio performance. You can easily add Ristretto to an 19 // existing system and keep the most valuable data where you need it. 20 package ristretto 21 22 import ( 23 "bytes" 24 "errors" 25 "fmt" 26 "sync" 27 "sync/atomic" 28 "time" 29 "unsafe" 30 31 "github.com/fiatjaf/generic-ristretto/z" 32 ) 33 34 var ( 35 // TODO: find the optimal value for this or make it configurable 36 setBufSize = 32 * 1024 37 ) 38 39 const itemSize = int64(unsafe.Sizeof(storeItem[any]{})) 40 41 func zeroValue[T any]() T { 42 var zero T 43 return zero 44 } 45 46 // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission 47 // policy and a Sampled LFU eviction policy. You can use the same Cache instance 48 // from as many goroutines as you want. 49 type Cache[K any, V any] struct { 50 // storedItems is the central concurrent hashmap where key-value items are stored. 51 storedItems store[V] 52 // cachePolicy determines what gets let in to the cache and what gets kicked out. 53 cachePolicy policy[V] 54 // getBuf is a custom ring buffer implementation that gets pushed to when 55 // keys are read. 56 getBuf *ringBuffer 57 // setBuf is a buffer allowing us to batch/drop Sets during times of high 58 // contention. 59 setBuf chan *Item[V] 60 // onEvict is called for item evictions. 61 onEvict func(*Item[V]) 62 // onReject is called when an item is rejected via admission policy. 63 onReject func(*Item[V]) 64 // onExit is called whenever a value goes out of scope from the cache. 65 onExit (func(V)) 66 // KeyToHash function is used to customize the key hashing algorithm. 67 // Each key will be hashed using the provided function. If keyToHash value 68 // is not set, the default keyToHash function is used. 69 keyToHash func(any) (uint64, uint64) 70 // stop is used to stop the processItems goroutine. 71 stop chan struct{} 72 // indicates whether cache is closed. 73 isClosed bool 74 // cost calculates cost from a value. 75 cost func(value V) int64 76 // ignoreInternalCost dictates whether to ignore the cost of internally storing 77 // the item in the cost calculation. 78 ignoreInternalCost bool 79 // cleanupTicker is used to periodically check for entries whose TTL has passed. 80 cleanupTicker *time.Ticker 81 // Metrics contains a running log of important statistics like hits, misses, 82 // and dropped items. 83 Metrics *Metrics 84 } 85 86 // Config is passed to NewCache for creating new Cache instances. 87 type Config[K any, V any] struct { 88 // NumCounters determines the number of counters (keys) to keep that hold 89 // access frequency information. It's generally a good idea to have more 90 // counters than the max cache capacity, as this will improve eviction 91 // accuracy and subsequent hit ratios. 92 // 93 // For example, if you expect your cache to hold 1,000,000 items when full, 94 // NumCounters should be 10,000,000 (10x). Each counter takes up roughly 95 // 3 bytes (4 bits for each counter * 4 copies plus about a byte per 96 // counter for the bloom filter). Note that the number of counters is 97 // internally rounded up to the nearest power of 2, so the space usage 98 // may be a little larger than 3 bytes * NumCounters. 99 NumCounters int64 100 // MaxCost can be considered as the cache capacity, in whatever units you 101 // choose to use. 102 // 103 // For example, if you want the cache to have a max capacity of 100MB, you 104 // would set MaxCost to 100,000,000 and pass an item's number of bytes as 105 // the `cost` parameter for calls to Set. If new items are accepted, the 106 // eviction process will take care of making room for the new item and not 107 // overflowing the MaxCost value. 108 MaxCost int64 109 // BufferItems determines the size of Get buffers. 110 // 111 // Unless you have a rare use case, using `64` as the BufferItems value 112 // results in good performance. 113 BufferItems int64 114 // Metrics determines whether cache statistics are kept during the cache's 115 // lifetime. There *is* some overhead to keeping statistics, so you should 116 // only set this flag to true when testing or throughput performance isn't a 117 // major factor. 118 Metrics bool 119 // OnEvict is called for every eviction and passes the hashed key, value, 120 // and cost to the function. 121 OnEvict func(item *Item[V]) 122 // OnReject is called for every rejection done via the policy. 123 OnReject func(item *Item[V]) 124 // OnExit is called whenever a value is removed from cache. This can be 125 // used to do manual memory deallocation. Would also be called on eviction 126 // and rejection of the value. 127 OnExit func(val V) 128 // KeyToHash function is used to customize the key hashing algorithm. 129 // Each key will be hashed using the provided function. If keyToHash value 130 // is not set, the default keyToHash function is used. 131 KeyToHash func(key K) (uint64, uint64) 132 // Cost evaluates a value and outputs a corresponding cost. This function 133 // is ran after Set is called for a new item or an item update with a cost 134 // param of 0. 135 Cost func(value V) int64 136 // IgnoreInternalCost set to true indicates to the cache that the cost of 137 // internally storing the value should be ignored. This is useful when the 138 // cost passed to set is not using bytes as units. Keep in mind that setting 139 // this to true will increase the memory usage. 140 IgnoreInternalCost bool 141 } 142 143 type itemFlag byte 144 145 const ( 146 itemNew itemFlag = iota 147 itemDelete 148 itemUpdate 149 ) 150 151 // Item is passed to setBuf so items can eventually be added to the cache. 152 type Item[V any] struct { 153 flag itemFlag 154 Key uint64 155 Conflict uint64 156 Value V 157 Cost int64 158 Expiration time.Time 159 wg *sync.WaitGroup 160 } 161 162 // NewCache returns a new Cache instance and any configuration errors, if any. 163 func NewCache[K any, V any](config *Config[K, V]) (*Cache[K, V], error) { 164 switch { 165 case config.NumCounters == 0: 166 return nil, errors.New("NumCounters can't be zero") 167 case config.MaxCost == 0: 168 return nil, errors.New("MaxCost can't be zero") 169 case config.BufferItems == 0: 170 return nil, errors.New("BufferItems can't be zero") 171 } 172 policy := newPolicy[V](config.NumCounters, config.MaxCost) 173 cache := &Cache[K, V]{ 174 storedItems: newStore[V](), 175 cachePolicy: policy, 176 getBuf: newRingBuffer(policy, config.BufferItems), 177 setBuf: make(chan *Item[V], setBufSize), 178 stop: make(chan struct{}), 179 cost: config.Cost, 180 ignoreInternalCost: config.IgnoreInternalCost, 181 cleanupTicker: time.NewTicker(time.Duration(bucketDurationSecs) * time.Second / 2), 182 } 183 cache.onExit = func(val V) { 184 if config.OnExit != nil { 185 config.OnExit(val) 186 } 187 } 188 cache.onEvict = func(item *Item[V]) { 189 if config.OnEvict != nil { 190 config.OnEvict(item) 191 } 192 cache.onExit(item.Value) 193 } 194 cache.onReject = func(item *Item[V]) { 195 if config.OnReject != nil { 196 config.OnReject(item) 197 } 198 cache.onExit(item.Value) 199 } 200 201 // The use must provide a key to hash for non primitive types. 202 if config.KeyToHash == nil { 203 var emptyKey K 204 cache.keyToHash = z.GetKeyToHash(emptyKey) 205 } else { 206 // This forces the user to create a function that takes the key type, and will avoid runtime errors. 207 cache.keyToHash = func(key any) (uint64, uint64) { 208 return config.KeyToHash(key.(K)) 209 } 210 } 211 if config.Metrics { 212 cache.collectMetrics() 213 } 214 // NOTE: benchmarks seem to show that performance decreases the more 215 // goroutines we have running cache.processItems(), so 1 should 216 // usually be sufficient 217 go cache.processItems() 218 219 return cache, nil 220 } 221 222 func (c *Cache[K, V]) Wait() { 223 if c == nil || c.isClosed { 224 return 225 } 226 wg := &sync.WaitGroup{} 227 wg.Add(1) 228 c.setBuf <- &Item[V]{wg: wg} 229 wg.Wait() 230 } 231 232 // Get returns the value (if any) and a boolean representing whether the 233 // value was found or not. The value can be nil and the boolean can be true at 234 // the same time. 235 func (c *Cache[K, V]) Get(key K) (V, bool) { 236 if c == nil || c.isClosed { 237 return zeroValue[V](), false 238 } 239 keyHash, conflictHash := c.keyToHash(key) 240 241 c.getBuf.Push(keyHash) 242 value, ok := c.storedItems.Get(keyHash, conflictHash) 243 if ok { 244 c.Metrics.add(hit, keyHash, 1) 245 } else { 246 c.Metrics.add(miss, keyHash, 1) 247 } 248 return value, ok 249 } 250 251 // Set attempts to add the key-value item to the cache. If it returns false, 252 // then the Set was dropped and the key-value item isn't added to the cache. If 253 // it returns true, there's still a chance it could be dropped by the policy if 254 // its determined that the key-value item isn't worth keeping, but otherwise the 255 // item will be added and other items will be evicted in order to make room. 256 // 257 // To dynamically evaluate the items cost using the Config.Coster function, set 258 // the cost parameter to 0 and Coster will be ran when needed in order to find 259 // the items true cost. 260 func (c *Cache[K, V]) Set(key K, value V, cost int64) bool { 261 return c.SetWithTTL(key, value, cost, 0*time.Second) 262 } 263 264 // SetWithTTL works like Set but adds a key-value pair to the cache that will expire 265 // after the specified TTL (time to live) has passed. A zero value means the value never 266 // expires, which is identical to calling Set. A negative value is a no-op and the value 267 // is discarded. 268 func (c *Cache[K, V]) SetWithTTL(key K, value V, cost int64, ttl time.Duration) bool { 269 if c == nil || c.isClosed || &key == nil { 270 return false 271 } 272 273 var expiration time.Time 274 switch { 275 case ttl == 0: 276 // No expiration. 277 break 278 case ttl < 0: 279 // Treat this a a no-op. 280 return false 281 default: 282 expiration = time.Now().Add(ttl) 283 } 284 285 keyHash, conflictHash := c.keyToHash(key) 286 i := &Item[V]{ 287 flag: itemNew, 288 Key: keyHash, 289 Conflict: conflictHash, 290 Value: value, 291 Cost: cost, 292 Expiration: expiration, 293 } 294 // cost is eventually updated. The expiration must also be immediately updated 295 // to prevent items from being prematurely removed from the map. 296 if prev, ok := c.storedItems.Update(i); ok { 297 c.onExit(prev) 298 i.flag = itemUpdate 299 } 300 // Attempt to send item to cachePolicy. 301 select { 302 case c.setBuf <- i: 303 return true 304 default: 305 if i.flag == itemUpdate { 306 // Return true if this was an update operation since we've already 307 // updated the storedItems. For all the other operations (set/delete), we 308 // return false which means the item was not inserted. 309 return true 310 } 311 c.Metrics.add(dropSets, keyHash, 1) 312 return false 313 } 314 } 315 316 // Del deletes the key-value item from the cache if it exists. 317 func (c *Cache[K, V]) Del(key K) { 318 if c == nil || c.isClosed || &key == nil { 319 return 320 } 321 keyHash, conflictHash := c.keyToHash(key) 322 // Delete immediately. 323 _, prev := c.storedItems.Del(keyHash, conflictHash) 324 c.onExit(prev) 325 // If we've set an item, it would be applied slightly later. 326 // So we must push the same item to `setBuf` with the deletion flag. 327 // This ensures that if a set is followed by a delete, it will be 328 // applied in the correct order. 329 c.setBuf <- &Item[V]{ 330 flag: itemDelete, 331 Key: keyHash, 332 Conflict: conflictHash, 333 } 334 } 335 336 // GetTTL returns the TTL for the specified key and a bool that is true if the 337 // item was found and is not expired. 338 func (c *Cache[K, V]) GetTTL(key K) (time.Duration, bool) { 339 if c == nil || &key == nil { 340 return 0, false 341 } 342 343 keyHash, conflictHash := c.keyToHash(key) 344 if _, ok := c.storedItems.Get(keyHash, conflictHash); !ok { 345 // not found 346 return 0, false 347 } 348 349 expiration := c.storedItems.Expiration(keyHash) 350 if expiration.IsZero() { 351 // found but no expiration 352 return 0, true 353 } 354 355 if time.Now().After(expiration) { 356 // found but expired 357 return 0, false 358 } 359 360 return time.Until(expiration), true 361 } 362 363 // Close stops all goroutines and closes all channels. 364 func (c *Cache[K, V]) Close() { 365 if c == nil || c.isClosed { 366 return 367 } 368 c.Clear() 369 370 // Block until processItems goroutine is returned. 371 c.stop <- struct{}{} 372 close(c.stop) 373 close(c.setBuf) 374 c.cachePolicy.Close() 375 c.isClosed = true 376 } 377 378 // Clear empties the hashmap and zeroes all cachePolicy counters. Note that this is 379 // not an atomic operation (but that shouldn't be a problem as it's assumed that 380 // Set/Get calls won't be occurring until after this). 381 func (c *Cache[K, V]) Clear() { 382 if c == nil || c.isClosed { 383 return 384 } 385 // Block until processItems goroutine is returned. 386 c.stop <- struct{}{} 387 388 // Clear out the setBuf channel. 389 loop: 390 for { 391 select { 392 case i := <-c.setBuf: 393 if i.wg != nil { 394 i.wg.Done() 395 continue 396 } 397 if i.flag != itemUpdate { 398 // In itemUpdate, the value is already set in the storedItems. So, no need to call 399 // onEvict here. 400 c.onEvict(i) 401 } 402 default: 403 break loop 404 } 405 } 406 407 // Clear value hashmap and cachePolicy data. 408 c.cachePolicy.Clear() 409 c.storedItems.Clear(c.onEvict) 410 // Only reset metrics if they're enabled. 411 if c.Metrics != nil { 412 c.Metrics.Clear() 413 } 414 // Restart processItems goroutine. 415 go c.processItems() 416 } 417 418 // MaxCost returns the max cost of the cache. 419 func (c *Cache[K, V]) MaxCost() int64 { 420 if c == nil { 421 return 0 422 } 423 return c.cachePolicy.MaxCost() 424 } 425 426 // UpdateMaxCost updates the maxCost of an existing cache. 427 func (c *Cache[K, V]) UpdateMaxCost(maxCost int64) { 428 if c == nil { 429 return 430 } 431 c.cachePolicy.UpdateMaxCost(maxCost) 432 } 433 434 // processItems is ran by goroutines processing the Set buffer. 435 func (c *Cache[K, V]) processItems() { 436 startTs := make(map[uint64]time.Time) 437 numToKeep := 100000 // TODO: Make this configurable via options. 438 439 trackAdmission := func(key uint64) { 440 if c.Metrics == nil { 441 return 442 } 443 startTs[key] = time.Now() 444 if len(startTs) > numToKeep { 445 for k := range startTs { 446 if len(startTs) <= numToKeep { 447 break 448 } 449 delete(startTs, k) 450 } 451 } 452 } 453 onEvict := func(i *Item[V]) { 454 if ts, has := startTs[i.Key]; has { 455 c.Metrics.trackEviction(int64(time.Since(ts) / time.Second)) 456 delete(startTs, i.Key) 457 } 458 if c.onEvict != nil { 459 c.onEvict(i) 460 } 461 } 462 463 for { 464 select { 465 case i := <-c.setBuf: 466 if i.wg != nil { 467 i.wg.Done() 468 continue 469 } 470 // Calculate item cost value if new or update. 471 if i.Cost == 0 && c.cost != nil && i.flag != itemDelete { 472 i.Cost = c.cost(i.Value) 473 } 474 if !c.ignoreInternalCost { 475 // Add the cost of internally storing the object. 476 i.Cost += itemSize 477 } 478 479 switch i.flag { 480 case itemNew: 481 victims, added := c.cachePolicy.Add(i.Key, i.Cost) 482 if added { 483 c.storedItems.Set(i) 484 c.Metrics.add(keyAdd, i.Key, 1) 485 trackAdmission(i.Key) 486 } else { 487 c.onReject(i) 488 } 489 for _, victim := range victims { 490 victim.Conflict, victim.Value = c.storedItems.Del(victim.Key, 0) 491 onEvict(victim) 492 } 493 494 case itemUpdate: 495 c.cachePolicy.Update(i.Key, i.Cost) 496 497 case itemDelete: 498 c.cachePolicy.Del(i.Key) // Deals with metrics updates. 499 _, val := c.storedItems.Del(i.Key, i.Conflict) 500 c.onExit(val) 501 } 502 case <-c.cleanupTicker.C: 503 c.storedItems.Cleanup(c.cachePolicy, onEvict) 504 case <-c.stop: 505 return 506 } 507 } 508 } 509 510 // collectMetrics just creates a new *Metrics instance and adds the pointers 511 // to the cache and policy instances. 512 func (c *Cache[K, V]) collectMetrics() { 513 c.Metrics = newMetrics() 514 c.cachePolicy.CollectMetrics(c.Metrics) 515 } 516 517 type metricType int 518 519 const ( 520 // The following 2 keep track of hits and misses. 521 hit = iota 522 miss 523 // The following 3 keep track of number of keys added, updated and evicted. 524 keyAdd 525 keyUpdate 526 keyEvict 527 // The following 2 keep track of cost of keys added and evicted. 528 costAdd 529 costEvict 530 // The following keep track of how many sets were dropped or rejected later. 531 dropSets 532 rejectSets 533 // The following 2 keep track of how many gets were kept and dropped on the 534 // floor. 535 dropGets 536 keepGets 537 // This should be the final enum. Other enums should be set before this. 538 doNotUse 539 ) 540 541 func stringFor(t metricType) string { 542 switch t { 543 case hit: 544 return "hit" 545 case miss: 546 return "miss" 547 case keyAdd: 548 return "keys-added" 549 case keyUpdate: 550 return "keys-updated" 551 case keyEvict: 552 return "keys-evicted" 553 case costAdd: 554 return "cost-added" 555 case costEvict: 556 return "cost-evicted" 557 case dropSets: 558 return "sets-dropped" 559 case rejectSets: 560 return "sets-rejected" // by policy. 561 case dropGets: 562 return "gets-dropped" 563 case keepGets: 564 return "gets-kept" 565 default: 566 return "unidentified" 567 } 568 } 569 570 // Metrics is a snapshot of performance statistics for the lifetime of a cache instance. 571 type Metrics struct { 572 all [doNotUse][]*uint64 573 574 mu sync.RWMutex 575 life *z.HistogramData // Tracks the life expectancy of a key. 576 } 577 578 func newMetrics() *Metrics { 579 s := &Metrics{ 580 life: z.NewHistogramData(z.HistogramBounds(1, 16)), 581 } 582 for i := 0; i < doNotUse; i++ { 583 s.all[i] = make([]*uint64, 256) 584 slice := s.all[i] 585 for j := range slice { 586 slice[j] = new(uint64) 587 } 588 } 589 return s 590 } 591 592 func (p *Metrics) add(t metricType, hash, delta uint64) { 593 if p == nil { 594 return 595 } 596 valp := p.all[t] 597 // Avoid false sharing by padding at least 64 bytes of space between two 598 // atomic counters which would be incremented. 599 idx := (hash % 25) * 10 600 atomic.AddUint64(valp[idx], delta) 601 } 602 603 func (p *Metrics) get(t metricType) uint64 { 604 if p == nil { 605 return 0 606 } 607 valp := p.all[t] 608 var total uint64 609 for i := range valp { 610 total += atomic.LoadUint64(valp[i]) 611 } 612 return total 613 } 614 615 // Hits is the number of Get calls where a value was found for the corresponding key. 616 func (p *Metrics) Hits() uint64 { 617 return p.get(hit) 618 } 619 620 // Misses is the number of Get calls where a value was not found for the corresponding key. 621 func (p *Metrics) Misses() uint64 { 622 return p.get(miss) 623 } 624 625 // KeysAdded is the total number of Set calls where a new key-value item was added. 626 func (p *Metrics) KeysAdded() uint64 { 627 return p.get(keyAdd) 628 } 629 630 // KeysUpdated is the total number of Set calls where the value was updated. 631 func (p *Metrics) KeysUpdated() uint64 { 632 return p.get(keyUpdate) 633 } 634 635 // KeysEvicted is the total number of keys evicted. 636 func (p *Metrics) KeysEvicted() uint64 { 637 return p.get(keyEvict) 638 } 639 640 // CostAdded is the sum of costs that have been added (successful Set calls). 641 func (p *Metrics) CostAdded() uint64 { 642 return p.get(costAdd) 643 } 644 645 // CostEvicted is the sum of all costs that have been evicted. 646 func (p *Metrics) CostEvicted() uint64 { 647 return p.get(costEvict) 648 } 649 650 // SetsDropped is the number of Set calls that don't make it into internal 651 // buffers (due to contention or some other reason). 652 func (p *Metrics) SetsDropped() uint64 { 653 return p.get(dropSets) 654 } 655 656 // SetsRejected is the number of Set calls rejected by the policy (TinyLFU). 657 func (p *Metrics) SetsRejected() uint64 { 658 return p.get(rejectSets) 659 } 660 661 // GetsDropped is the number of Get counter increments that are dropped 662 // internally. 663 func (p *Metrics) GetsDropped() uint64 { 664 return p.get(dropGets) 665 } 666 667 // GetsKept is the number of Get counter increments that are kept. 668 func (p *Metrics) GetsKept() uint64 { 669 return p.get(keepGets) 670 } 671 672 // Ratio is the number of Hits over all accesses (Hits + Misses). This is the 673 // percentage of successful Get calls. 674 func (p *Metrics) Ratio() float64 { 675 if p == nil { 676 return 0.0 677 } 678 hits, misses := p.get(hit), p.get(miss) 679 if hits == 0 && misses == 0 { 680 return 0.0 681 } 682 return float64(hits) / float64(hits+misses) 683 } 684 685 func (p *Metrics) trackEviction(numSeconds int64) { 686 if p == nil { 687 return 688 } 689 p.mu.Lock() 690 defer p.mu.Unlock() 691 p.life.Update(numSeconds) 692 } 693 694 func (p *Metrics) LifeExpectancySeconds() *z.HistogramData { 695 if p == nil { 696 return nil 697 } 698 p.mu.RLock() 699 defer p.mu.RUnlock() 700 return p.life.Copy() 701 } 702 703 // Clear resets all the metrics. 704 func (p *Metrics) Clear() { 705 if p == nil { 706 return 707 } 708 for i := 0; i < doNotUse; i++ { 709 for j := range p.all[i] { 710 atomic.StoreUint64(p.all[i][j], 0) 711 } 712 } 713 p.mu.Lock() 714 p.life = z.NewHistogramData(z.HistogramBounds(1, 16)) 715 p.mu.Unlock() 716 } 717 718 // String returns a string representation of the metrics. 719 func (p *Metrics) String() string { 720 if p == nil { 721 return "" 722 } 723 var buf bytes.Buffer 724 for i := 0; i < doNotUse; i++ { 725 t := metricType(i) 726 fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t)) 727 } 728 fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss)) 729 fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio()) 730 return buf.String() 731 }