github.com/bhojpur/cache@v0.0.4/pkg/engine/ristretto/cache.go (about) 1 package ristretto 2 3 // Copyright (c) 2018 Bhojpur Consulting Private Limited, India. All rights reserved. 4 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 // Package ristretto is a fast, fixed size, in-memory cache with a dual focus on 24 // throughput and hit ratio performance. You can easily add Ristretto to an 25 // existing system and keep the most valuable data where you need it. 26 27 import ( 28 "bytes" 29 "errors" 30 "fmt" 31 "sync" 32 "sync/atomic" 33 "time" 34 "unsafe" 35 36 "github.com/bhojpur/cache/pkg/hack" 37 ) 38 39 var ( 40 // TODO: find the optimal value for this or make it configurable 41 setBufSize = 32 * 1024 42 ) 43 44 func defaultStringHash(key string) (uint64, uint64) { 45 const Seed1 = uint64(0x1122334455667788) 46 const Seed2 = uint64(0x8877665544332211) 47 return hack.RuntimeStrhash(key, Seed1), hack.RuntimeStrhash(key, Seed2) 48 } 49 50 type itemCallback func(*Item) 51 52 // CacheItemSize is the overhead in bytes for every stored cache item 53 var CacheItemSize = hack.RuntimeAllocSize(int64(unsafe.Sizeof(storeItem{}))) 54 55 // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission 56 // policy and a Sampled LFU eviction policy. You can use the same Cache instance 57 // from as many goroutines as you want. 58 type Cache struct { 59 // store is the central concurrent hashmap where key-value items are stored. 60 store store 61 // policy determines what gets let in to the cache and what gets kicked out. 62 policy policy 63 // getBuf is a custom ring buffer implementation that gets pushed to when 64 // keys are read. 65 getBuf *ringBuffer 66 // setBuf is a buffer allowing us to batch/drop Sets during times of high 67 // contention. 68 setBuf chan *Item 69 // onEvict is called for item evictions. 70 onEvict itemCallback 71 // onReject is called when an item is rejected via admission policy. 72 onReject itemCallback 73 // onExit is called whenever a value goes out of scope from the cache. 74 onExit func(interface{}) 75 // KeyToHash function is used to customize the key hashing algorithm. 76 // Each key will be hashed using the provided function. If keyToHash value 77 // is not set, the default keyToHash function is used. 78 keyToHash func(string) (uint64, uint64) 79 // stop is used to stop the processItems goroutine. 80 stop chan struct{} 81 // indicates whether cache is closed. 82 isClosed bool 83 // cost calculates cost from a value. 84 cost func(value interface{}) int64 85 // ignoreInternalCost dictates whether to ignore the cost of internally storing 86 // the item in the cost calculation. 87 ignoreInternalCost bool 88 // Metrics contains a running log of important statistics like hits, misses, 89 // and dropped items. 90 Metrics *Metrics 91 } 92 93 // Config is passed to NewCache for creating new Cache instances. 94 type Config struct { 95 // NumCounters determines the number of counters (keys) to keep that hold 96 // access frequency information. It's generally a good idea to have more 97 // counters than the max cache capacity, as this will improve eviction 98 // accuracy and subsequent hit ratios. 99 // 100 // For example, if you expect your cache to hold 1,000,000 items when full, 101 // NumCounters should be 10,000,000 (10x). Each counter takes up 4 bits, so 102 // keeping 10,000,000 counters would require 5MB of memory. 103 NumCounters int64 104 // MaxCost can be considered as the cache capacity, in whatever units you 105 // choose to use. 106 // 107 // For example, if you want the cache to have a max capacity of 100MB, you 108 // would set MaxCost to 100,000,000 and pass an item's number of bytes as 109 // the `cost` parameter for calls to Set. If new items are accepted, the 110 // eviction process will take care of making room for the new item and not 111 // overflowing the MaxCost value. 112 MaxCost int64 113 // BufferItems determines the size of Get buffers. 114 // 115 // Unless you have a rare use case, using `64` as the BufferItems value 116 // results in good performance. 117 BufferItems int64 118 // Metrics determines whether cache statistics are kept during the cache's 119 // lifetime. There *is* some overhead to keeping statistics, so you should 120 // only set this flag to true when testing or throughput performance isn't a 121 // major factor. 122 Metrics bool 123 // OnEvict is called for every eviction and passes the hashed key, value, 124 // and cost to the function. 125 OnEvict func(item *Item) 126 // OnReject is called for every rejection done via the policy. 127 OnReject func(item *Item) 128 // OnExit is called whenever a value is removed from cache. This can be 129 // used to do manual memory deallocation. Would also be called on eviction 130 // and rejection of the value. 131 OnExit func(val interface{}) 132 // KeyToHash function is used to customize the key hashing algorithm. 133 // Each key will be hashed using the provided function. If keyToHash value 134 // is not set, the default keyToHash function is used. 135 KeyToHash func(string) (uint64, uint64) 136 // Cost evaluates a value and outputs a corresponding cost. This function 137 // is ran after Set is called for a new item or an item update with a cost 138 // param of 0. 139 Cost func(value interface{}) int64 140 // IgnoreInternalCost set to true indicates to the cache that the cost of 141 // internally storing the value should be ignored. This is useful when the 142 // cost passed to set is not using bytes as units. Keep in mind that setting 143 // this to true will increase the memory usage. 144 IgnoreInternalCost bool 145 } 146 147 type itemFlag byte 148 149 const ( 150 itemNew itemFlag = iota 151 itemDelete 152 itemUpdate 153 ) 154 155 // Item is passed to setBuf so items can eventually be added to the cache. 156 type Item struct { 157 flag itemFlag 158 Key uint64 159 Conflict uint64 160 Value interface{} 161 Cost int64 162 wg *sync.WaitGroup 163 } 164 165 // NewCache returns a new Cache instance and any configuration errors, if any. 166 func NewCache(config *Config) (*Cache, error) { 167 switch { 168 case config.NumCounters == 0: 169 return nil, errors.New("NumCounters can't be zero") 170 case config.MaxCost == 0: 171 return nil, errors.New("Capacity can't be zero") 172 case config.BufferItems == 0: 173 return nil, errors.New("BufferItems can't be zero") 174 } 175 policy := newPolicy(config.NumCounters, config.MaxCost) 176 cache := &Cache{ 177 store: newStore(), 178 policy: policy, 179 getBuf: newRingBuffer(policy, config.BufferItems), 180 setBuf: make(chan *Item, setBufSize), 181 keyToHash: config.KeyToHash, 182 stop: make(chan struct{}), 183 cost: config.Cost, 184 ignoreInternalCost: config.IgnoreInternalCost, 185 } 186 cache.onExit = func(val interface{}) { 187 if config.OnExit != nil && val != nil { 188 config.OnExit(val) 189 } 190 } 191 cache.onEvict = func(item *Item) { 192 if config.OnEvict != nil { 193 config.OnEvict(item) 194 } 195 cache.onExit(item.Value) 196 } 197 cache.onReject = func(item *Item) { 198 if config.OnReject != nil { 199 config.OnReject(item) 200 } 201 cache.onExit(item.Value) 202 } 203 if cache.keyToHash == nil { 204 cache.keyToHash = defaultStringHash 205 } 206 if config.Metrics { 207 cache.collectMetrics() 208 } 209 // NOTE: benchmarks seem to show that performance decreases the more 210 // goroutines we have running cache.processItems(), so 1 should 211 // usually be sufficient 212 go cache.processItems() 213 return cache, nil 214 } 215 216 // Wait blocks until all the current cache operations have been processed in the background 217 func (c *Cache) Wait() { 218 if c == nil || c.isClosed { 219 return 220 } 221 wg := &sync.WaitGroup{} 222 wg.Add(1) 223 c.setBuf <- &Item{wg: wg} 224 wg.Wait() 225 } 226 227 // Get returns the value (if any) and a boolean representing whether the 228 // value was found or not. The value can be nil and the boolean can be true at 229 // the same time. 230 func (c *Cache) Get(key string) (interface{}, bool) { 231 if c == nil || c.isClosed { 232 return nil, false 233 } 234 keyHash, conflictHash := c.keyToHash(key) 235 c.getBuf.Push(keyHash) 236 value, ok := c.store.Get(keyHash, conflictHash) 237 if ok { 238 c.Metrics.add(hit, keyHash, 1) 239 } else { 240 c.Metrics.add(miss, keyHash, 1) 241 } 242 return value, ok 243 } 244 245 // Set attempts to add the key-value item to the cache. If it returns false, 246 // then the Set was dropped and the key-value item isn't added to the cache. If 247 // it returns true, there's still a chance it could be dropped by the policy if 248 // its determined that the key-value item isn't worth keeping, but otherwise the 249 // item will be added and other items will be evicted in order to make room. 250 // 251 // The cost of the entry will be evaluated lazily by the cache's Cost function. 252 func (c *Cache) Set(key string, value interface{}) bool { 253 return c.SetWithCost(key, value, 0) 254 } 255 256 // SetWithCost works like Set but adds a key-value pair to the cache with a specific 257 // cost. The built-in Cost function will not be called to evaluate the object's cost 258 // and instead the given value will be used. 259 func (c *Cache) SetWithCost(key string, value interface{}, cost int64) bool { 260 if c == nil || c.isClosed { 261 return false 262 } 263 264 keyHash, conflictHash := c.keyToHash(key) 265 i := &Item{ 266 flag: itemNew, 267 Key: keyHash, 268 Conflict: conflictHash, 269 Value: value, 270 Cost: cost, 271 } 272 // cost is eventually updated. The expiration must also be immediately updated 273 // to prevent items from being prematurely removed from the map. 274 if prev, ok := c.store.Update(i); ok { 275 c.onExit(prev) 276 i.flag = itemUpdate 277 } 278 // Attempt to send item to policy. 279 select { 280 case c.setBuf <- i: 281 return true 282 default: 283 if i.flag == itemUpdate { 284 // Return true if this was an update operation since we've already 285 // updated the store. For all the other operations (set/delete), we 286 // return false which means the item was not inserted. 287 return true 288 } 289 c.Metrics.add(dropSets, keyHash, 1) 290 return false 291 } 292 } 293 294 // Delete deletes the key-value item from the cache if it exists. 295 func (c *Cache) Delete(key string) { 296 if c == nil || c.isClosed { 297 return 298 } 299 keyHash, conflictHash := c.keyToHash(key) 300 // Delete immediately. 301 _, prev := c.store.Del(keyHash, conflictHash) 302 c.onExit(prev) 303 // If we've set an item, it would be applied slightly later. 304 // So we must push the same item to `setBuf` with the deletion flag. 305 // This ensures that if a set is followed by a delete, it will be 306 // applied in the correct order. 307 c.setBuf <- &Item{ 308 flag: itemDelete, 309 Key: keyHash, 310 Conflict: conflictHash, 311 } 312 } 313 314 // Close stops all goroutines and closes all channels. 315 func (c *Cache) Close() { 316 if c == nil || c.isClosed { 317 return 318 } 319 c.Clear() 320 321 // Block until processItems goroutine is returned. 322 c.stop <- struct{}{} 323 close(c.stop) 324 close(c.setBuf) 325 c.policy.Close() 326 c.isClosed = true 327 } 328 329 // Clear empties the hashmap and zeroes all policy counters. Note that this is 330 // not an atomic operation (but that shouldn't be a problem as it's assumed that 331 // Set/Get calls won't be occurring until after this). 332 func (c *Cache) Clear() { 333 if c == nil || c.isClosed { 334 return 335 } 336 // Block until processItems goroutine is returned. 337 c.stop <- struct{}{} 338 339 // Clear out the setBuf channel. 340 loop: 341 for { 342 select { 343 case i := <-c.setBuf: 344 if i.wg != nil { 345 i.wg.Done() 346 continue 347 } 348 if i.flag != itemUpdate { 349 // In itemUpdate, the value is already set in the store. So, no need to call 350 // onEvict here. 351 c.onEvict(i) 352 } 353 default: 354 break loop 355 } 356 } 357 358 // Clear value hashmap and policy data. 359 c.policy.Clear() 360 c.store.Clear(c.onEvict) 361 // Only reset metrics if they're enabled. 362 if c.Metrics != nil { 363 c.Metrics.Clear() 364 } 365 // Restart processItems goroutine. 366 go c.processItems() 367 } 368 369 // Len returns the size of the cache (in entries) 370 func (c *Cache) Len() int { 371 if c == nil { 372 return 0 373 } 374 return c.store.Len() 375 } 376 377 // UsedCapacity returns the size of the cache (in bytes) 378 func (c *Cache) UsedCapacity() int64 { 379 if c == nil { 380 return 0 381 } 382 return c.policy.Used() 383 } 384 385 // MaxCapacity returns the max cost of the cache (in bytes) 386 func (c *Cache) MaxCapacity() int64 { 387 if c == nil { 388 return 0 389 } 390 return c.policy.MaxCost() 391 } 392 393 // SetCapacity updates the maxCost of an existing cache. 394 func (c *Cache) SetCapacity(maxCost int64) { 395 if c == nil { 396 return 397 } 398 c.policy.UpdateMaxCost(maxCost) 399 } 400 401 // Evictions returns the number of evictions 402 func (c *Cache) Evictions() int64 { 403 // TODO 404 if c == nil || c.Metrics == nil { 405 return 0 406 } 407 return int64(c.Metrics.KeysEvicted()) 408 } 409 410 // ForEach yields all the values currently stored in the cache to the given callback. 411 // The callback may return `false` to stop the iteration early. 412 func (c *Cache) ForEach(forEach func(interface{}) bool) { 413 if c == nil { 414 return 415 } 416 c.store.ForEach(forEach) 417 } 418 419 // processItems is ran by goroutines processing the Set buffer. 420 func (c *Cache) processItems() { 421 startTs := make(map[uint64]time.Time) 422 numToKeep := 100000 // TODO: Make this configurable via options. 423 424 trackAdmission := func(key uint64) { 425 if c.Metrics == nil { 426 return 427 } 428 startTs[key] = time.Now() 429 if len(startTs) > numToKeep { 430 for k := range startTs { 431 if len(startTs) <= numToKeep { 432 break 433 } 434 delete(startTs, k) 435 } 436 } 437 } 438 onEvict := func(i *Item) { 439 delete(startTs, i.Key) 440 if c.onEvict != nil { 441 c.onEvict(i) 442 } 443 } 444 445 for { 446 select { 447 case i := <-c.setBuf: 448 if i.wg != nil { 449 i.wg.Done() 450 continue 451 } 452 // Calculate item cost value if new or update. 453 if i.Cost == 0 && c.cost != nil && i.flag != itemDelete { 454 i.Cost = c.cost(i.Value) 455 } 456 if !c.ignoreInternalCost { 457 // Add the cost of internally storing the object. 458 i.Cost += CacheItemSize 459 } 460 461 switch i.flag { 462 case itemNew: 463 victims, added := c.policy.Add(i.Key, i.Cost) 464 if added { 465 c.store.Set(i) 466 c.Metrics.add(keyAdd, i.Key, 1) 467 trackAdmission(i.Key) 468 } else { 469 c.onReject(i) 470 } 471 for _, victim := range victims { 472 victim.Conflict, victim.Value = c.store.Del(victim.Key, 0) 473 onEvict(victim) 474 } 475 476 case itemUpdate: 477 c.policy.Update(i.Key, i.Cost) 478 479 case itemDelete: 480 c.policy.Del(i.Key) // Deals with metrics updates. 481 _, val := c.store.Del(i.Key, i.Conflict) 482 c.onExit(val) 483 } 484 case <-c.stop: 485 return 486 } 487 } 488 } 489 490 // collectMetrics just creates a new *Metrics instance and adds the pointers 491 // to the cache and policy instances. 492 func (c *Cache) collectMetrics() { 493 c.Metrics = newMetrics() 494 c.policy.CollectMetrics(c.Metrics) 495 } 496 497 type metricType int 498 499 const ( 500 // The following 2 keep track of hits and misses. 501 hit = iota 502 miss 503 // The following 3 keep track of number of keys added, updated and evicted. 504 keyAdd 505 keyUpdate 506 keyEvict 507 // The following 2 keep track of cost of keys added and evicted. 508 costAdd 509 costEvict 510 // The following keep track of how many sets were dropped or rejected later. 511 dropSets 512 rejectSets 513 // The following 2 keep track of how many gets were kept and dropped on the 514 // floor. 515 dropGets 516 keepGets 517 // This should be the final enum. Other enums should be set before this. 518 doNotUse 519 ) 520 521 func stringFor(t metricType) string { 522 switch t { 523 case hit: 524 return "hit" 525 case miss: 526 return "miss" 527 case keyAdd: 528 return "keys-added" 529 case keyUpdate: 530 return "keys-updated" 531 case keyEvict: 532 return "keys-evicted" 533 case costAdd: 534 return "cost-added" 535 case costEvict: 536 return "cost-evicted" 537 case dropSets: 538 return "sets-dropped" 539 case rejectSets: 540 return "sets-rejected" // by policy. 541 case dropGets: 542 return "gets-dropped" 543 case keepGets: 544 return "gets-kept" 545 default: 546 return "unidentified" 547 } 548 } 549 550 // Metrics is a snapshot of performance statistics for the lifetime of a cache instance. 551 type Metrics struct { 552 all [doNotUse][]*uint64 553 } 554 555 func newMetrics() *Metrics { 556 s := &Metrics{} 557 for i := 0; i < doNotUse; i++ { 558 s.all[i] = make([]*uint64, 256) 559 slice := s.all[i] 560 for j := range slice { 561 slice[j] = new(uint64) 562 } 563 } 564 return s 565 } 566 567 func (p *Metrics) add(t metricType, hash, delta uint64) { 568 if p == nil { 569 return 570 } 571 valp := p.all[t] 572 // Avoid false sharing by padding at least 64 bytes of space between two 573 // atomic counters which would be incremented. 574 idx := (hash % 25) * 10 575 atomic.AddUint64(valp[idx], delta) 576 } 577 578 func (p *Metrics) get(t metricType) uint64 { 579 if p == nil { 580 return 0 581 } 582 valp := p.all[t] 583 var total uint64 584 for i := range valp { 585 total += atomic.LoadUint64(valp[i]) 586 } 587 return total 588 } 589 590 // Hits is the number of Get calls where a value was found for the corresponding key. 591 func (p *Metrics) Hits() uint64 { 592 return p.get(hit) 593 } 594 595 // Misses is the number of Get calls where a value was not found for the corresponding key. 596 func (p *Metrics) Misses() uint64 { 597 return p.get(miss) 598 } 599 600 // KeysAdded is the total number of Set calls where a new key-value item was added. 601 func (p *Metrics) KeysAdded() uint64 { 602 return p.get(keyAdd) 603 } 604 605 // KeysUpdated is the total number of Set calls where the value was updated. 606 func (p *Metrics) KeysUpdated() uint64 { 607 return p.get(keyUpdate) 608 } 609 610 // KeysEvicted is the total number of keys evicted. 611 func (p *Metrics) KeysEvicted() uint64 { 612 return p.get(keyEvict) 613 } 614 615 // CostAdded is the sum of costs that have been added (successful Set calls). 616 func (p *Metrics) CostAdded() uint64 { 617 return p.get(costAdd) 618 } 619 620 // CostEvicted is the sum of all costs that have been evicted. 621 func (p *Metrics) CostEvicted() uint64 { 622 return p.get(costEvict) 623 } 624 625 // SetsDropped is the number of Set calls that don't make it into internal 626 // buffers (due to contention or some other reason). 627 func (p *Metrics) SetsDropped() uint64 { 628 return p.get(dropSets) 629 } 630 631 // SetsRejected is the number of Set calls rejected by the policy (TinyLFU). 632 func (p *Metrics) SetsRejected() uint64 { 633 return p.get(rejectSets) 634 } 635 636 // GetsDropped is the number of Get counter increments that are dropped 637 // internally. 638 func (p *Metrics) GetsDropped() uint64 { 639 return p.get(dropGets) 640 } 641 642 // GetsKept is the number of Get counter increments that are kept. 643 func (p *Metrics) GetsKept() uint64 { 644 return p.get(keepGets) 645 } 646 647 // Ratio is the number of Hits over all accesses (Hits + Misses). This is the 648 // percentage of successful Get calls. 649 func (p *Metrics) Ratio() float64 { 650 if p == nil { 651 return 0.0 652 } 653 hits, misses := p.get(hit), p.get(miss) 654 if hits == 0 && misses == 0 { 655 return 0.0 656 } 657 return float64(hits) / float64(hits+misses) 658 } 659 660 // Clear resets all the metrics. 661 func (p *Metrics) Clear() { 662 if p == nil { 663 return 664 } 665 for i := 0; i < doNotUse; i++ { 666 for j := range p.all[i] { 667 atomic.StoreUint64(p.all[i][j], 0) 668 } 669 } 670 } 671 672 // String returns a string representation of the metrics. 673 func (p *Metrics) String() string { 674 if p == nil { 675 return "" 676 } 677 var buf bytes.Buffer 678 for i := 0; i < doNotUse; i++ { 679 t := metricType(i) 680 fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t)) 681 } 682 fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss)) 683 fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio()) 684 return buf.String() 685 }