github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/cache/cache.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Ristretto is a fast, fixed size, in-memory cache with a dual focus on 18 // throughput and hit ratio performance. You can easily add Ristretto to an 19 // existing system and keep the most valuable data where you need it. 20 package cache 21 22 import ( 23 "bytes" 24 "errors" 25 "fmt" 26 "sync/atomic" 27 ) 28 29 // Config is passed to NewCache for creating new Cache instances. 30 type Config struct { 31 // NumCounters determines the number of counters (keys) to keep that hold 32 // access frequency information. It's generally a good idea to have more 33 // counters than the max cache capacity, as this will improve eviction 34 // accuracy and subsequent hit ratios. 35 // 36 // For example, if you expect your cache to hold 1,000,000 items when full, 37 // NumCounters should be 10,000,000 (10x). Each counter takes up 4 bits, so 38 // keeping 10,000,000 counters would require 5MB of memory. 39 NumCounters int64 40 // MaxCost can be considered as the cache capacity, in whatever units you 41 // choose to use. 42 // 43 // For example, if you want the cache to have a max capacity of 100MB, you 44 // would set MaxCost to 100,000,000 and pass an item's number of bytes as 45 // the `cost` parameter for calls to Set. If new items are accepted, the 46 // eviction process will take care of making room for the new item and not 47 // overflowing the MaxCost value. 48 MaxCost int64 49 // BufferItems determines the size of Get buffers. 50 // 51 // Unless you have a rare use case, using `64` as the BufferItems value 52 // results in good performance. 53 BufferItems int64 54 // Metrics determines whether cache statistics are kept during the cache's 55 // lifetime. There *is* some overhead to keeping statistics, so you should 56 // only set this flag to true when testing or throughput performance isn't a 57 // major factor. 58 Metrics bool 59 // OnEvict is called for every eviction and passes the hashed key and 60 // value to the function. 61 OnEvict func(key uint64, value interface{}) 62 // Cost evaluates a value and outputs a corresponding cost. This function 63 // is ran after Set is called for a new item or an item update with a cost 64 // param of 0. 65 Cost func(value interface{}) int64 66 } 67 68 const ( 69 // TODO: find the optimal value for this or make it configurable 70 setBufSize = 32 * 1024 71 ) 72 73 type setEvent struct { 74 del bool 75 key uint64 76 cost int64 77 } 78 79 // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission 80 // policy and a Sampled LFU eviction policy. You can use the same Cache instance 81 // from as many goroutines as you want. 82 type Cache struct { 83 // store is the central concurrent hashmap where key-value items are stored 84 store *store 85 // policy determines what gets let in to the cache and what gets kicked out 86 policy *policy 87 // getBuf is a custom ring buffer implementation that gets pushed to when 88 // keys are read 89 getBuf *ringBuffer 90 // setBuf is a buffer allowing us to batch/drop Sets during times of high 91 // contention 92 setBuf chan setEvent 93 // onEvict is called for item evictions 94 onEvict func(uint64, interface{}) 95 // stop is used to stop the processItems goroutine 96 stop chan struct{} 97 // cost calculates cost from a value 98 cost func(value interface{}) int64 99 // Metrics contains a running log of important statistics like hits, misses, 100 // and dropped items 101 Metrics *Metrics 102 } 103 104 // NewCache returns a new Cache instance and any configuration errors, if any. 105 func NewCache(config *Config) (*Cache, error) { 106 switch { 107 case config.NumCounters == 0: 108 return nil, errors.New("NumCounters can't be zero.") 109 case config.MaxCost == 0: 110 return nil, errors.New("MaxCost can't be zero.") 111 case config.BufferItems == 0: 112 return nil, errors.New("BufferItems can't be zero.") 113 } 114 policy := newPolicy(config.NumCounters, config.MaxCost) 115 cache := &Cache{ 116 store: newStore(), 117 policy: policy, 118 getBuf: newRingBuffer(policy, config.BufferItems), 119 setBuf: make(chan setEvent, setBufSize), 120 onEvict: config.OnEvict, 121 stop: make(chan struct{}), 122 cost: config.Cost, 123 } 124 if config.Metrics { 125 cache.collectMetrics() 126 } 127 // NOTE: benchmarks seem to show that performance decreases the more 128 // goroutines we have running cache.processItems(), so 1 should 129 // usually be sufficient 130 go cache.processItems() 131 return cache, nil 132 } 133 134 // Get returns the value (if any) and a boolean representing whether the 135 // value was found or not. The value can be nil and the boolean can be true at 136 // the same time. 137 func (c *Cache) Get(key uint64) (interface{}, bool) { 138 if c == nil { 139 return nil, false 140 } 141 c.getBuf.Push(key) 142 value, ok := c.store.GetValue(key) 143 if ok { 144 c.Metrics.add(hit, key, 1) 145 } else { 146 c.Metrics.add(miss, key, 1) 147 } 148 return value, ok 149 } 150 151 // Set attempts to add the key-value item to the cache. There's still a chance it 152 // could be dropped by the policy if its determined that the key-value item 153 // isn't worth keeping, but otherwise the item will be added and other items 154 // will be evicted in order to make room. 155 // 156 // To dynamically evaluate the items cost using the Config.Coster function, set 157 // the cost parameter to 0 and Coster will be ran when needed in order to find 158 // the items true cost. 159 func (c *Cache) Set(key uint64, value interface{}, cost int64) { 160 if c == nil { 161 return 162 } 163 if cost == 0 && c.cost != nil { 164 cost = c.cost(value) 165 } 166 for { 167 i := c.store.GetOrNew(key) 168 i.Lock() 169 if i.dead { 170 i.Unlock() 171 continue 172 } 173 174 i.value.Store(value) 175 // Send event to channel while holding mutex, so we can keep the order of event log respect to 176 // the order of mutations on hash map. If we send envent after i.Unlock() we may have danling item in hash map: 177 // * Mutations is `A delete k -> B insert K` 178 // * But the event log is `B insert K -> A delete K` 179 // * After apply the event log, there are danling item in hash map which cannot be evicted. 180 // Delete item when apply delete event is not a good idea, because we may overwrite the following insert. 181 // Delay all hash map mutations to log replay seems a good idea, but it will result in a confusing behavior, 182 // that is you cannot get the item you just inserted. 183 c.setBuf <- setEvent{del: false, key: key, cost: cost} 184 185 i.Unlock() 186 return 187 } 188 } 189 190 // SetNewMaxCost set maxCost to newMaxCost 191 func (c *Cache) SetNewMaxCost(newMaxCost int64) { 192 c.policy.setNewMaxCost(newMaxCost) 193 } 194 195 // GetOrCompute returns the value of key. If there is no such key, it will compute the 196 // value using the factory function `f`. If there are concurrent call on same key, 197 // the factory function will be called only once. 198 func (c *Cache) GetOrCompute(key uint64, f func() (interface{}, int64, error)) (interface{}, error) { 199 if c == nil { 200 return nil, nil 201 } 202 for { 203 i := c.store.GetOrNew(key) 204 if v := i.value.Load(); v != nil { 205 return v, nil 206 } 207 if v, err, ok := c.compute(i, f); ok { 208 return v, err 209 } 210 } 211 } 212 213 func (c *Cache) compute(i *item, f func() (interface{}, int64, error)) (interface{}, error, bool) { 214 i.Lock() 215 defer i.Unlock() 216 if i.dead { 217 return nil, nil, false 218 } 219 220 // Double check. 221 if v := i.value.Load(); v != nil { 222 return v, nil, true 223 } 224 225 v, cost, err := f() 226 if err != nil { 227 return nil, err, true 228 } 229 i.value.Store(v) 230 231 if cost == 0 && c.cost != nil { 232 cost = c.cost(v) 233 } 234 c.setBuf <- setEvent{del: false, key: i.key, cost: cost} 235 return v, nil, true 236 } 237 238 // Del deletes the key-value item from the cache if it exists. 239 func (c *Cache) Del(key uint64) interface{} { 240 if c == nil { 241 return nil 242 } 243 i, ok := c.store.Get(key) 244 if !ok { 245 return nil 246 } 247 i.Lock() 248 if i.del(c.store) { 249 c.setBuf <- setEvent{del: true, key: key} 250 c.store.Del(key) 251 } 252 v := i.value.Load() 253 i.Unlock() 254 return v 255 } 256 257 // Close stops all goroutines and closes all channels. 258 func (c *Cache) Close() { 259 // block until processItems goroutine is returned 260 c.stop <- struct{}{} 261 close(c.stop) 262 263 // TODO: Close will be called when shutdown DB, but some table will try to 264 // evict data from cache in epoch's background thread, if we close setBuf here 265 // runtime will panic. 266 // 267 // It is safe to let this channel keeps open, because the DB process is going to terminate. 268 // 269 // To address this issue, we must wait epoch manger to close before close cache. 270 // For now just ignore this channel. 271 272 // close(c.setBuf) 273 274 c.policy.Close() 275 } 276 277 // IsEmpty check setBuf is empty, if it is not empty, pop a buf. 278 func IsEmpty(ch <-chan setEvent) bool { 279 select { 280 case <-ch: 281 return false 282 default: 283 return true 284 } 285 } 286 287 // Clear empties the hashmap and zeroes all policy counters. Note that this is 288 // not an atomic operation (but that shouldn't be a problem as it's assumed that 289 // Set/Get calls won't be occurring until after this). 290 func (c *Cache) Clear() { 291 // block until processItems goroutine is returned 292 c.stop <- struct{}{} 293 // Empty the setBuf 294 // there should not be any Set or Get when invoke Cache.Clear(). 295 for !IsEmpty(c.setBuf) { 296 } 297 // clear value hashmap and policy data 298 c.policy.Clear() 299 c.store.Clear() 300 // only reset metrics if they're enabled 301 if c.Metrics != nil { 302 c.Metrics.Clear() 303 } 304 // restart processItems goroutine 305 go c.processItems() 306 } 307 308 // processItems is ran by goroutines processing the Set buffer. 309 func (c *Cache) processItems() { 310 for { 311 select { 312 case e := <-c.setBuf: 313 if e.del { 314 c.policy.Del(e.key) 315 continue 316 } 317 c.handleNewItem(e.key, e.cost) 318 case <-c.stop: 319 return 320 } 321 } 322 } 323 324 func (c *Cache) handleNewItem(key uint64, cost int64) { 325 itemInMap, ok := c.store.Get(key) 326 if !ok { 327 // This item dropped by admission policy, 328 // ignore this event or we may have danling item in policy. 329 return 330 } 331 332 // TODO: do evict after all events in current batch handled. 333 victims, added := c.policy.Add(key, cost) 334 if !added { 335 // Item dropped by admission policy, delete it from hash map. 336 // Otherwise this danling item will be kept in cache forever. 337 i, ok := c.store.Get(key) 338 if !ok || i != itemInMap { 339 return 340 } 341 i.Lock() 342 deleted := i.del(c.store) 343 i.Unlock() 344 345 if deleted && c.onEvict != nil { 346 c.onEvict(i.key, i.value) 347 } 348 return 349 } 350 351 for _, victim := range victims { 352 victim, ok = c.store.Get(victim.key) 353 if !ok { 354 continue 355 } 356 victim.Lock() 357 deleted := victim.del(c.store) 358 victim.Unlock() 359 if deleted && c.onEvict != nil { 360 c.onEvict(victim.key, victim.value.Load()) 361 } 362 } 363 } 364 365 // collectMetrics just creates a new *Metrics instance and adds the pointers 366 // to the cache and policy instances. 367 func (c *Cache) collectMetrics() { 368 c.Metrics = newMetrics() 369 c.policy.CollectMetrics(c.Metrics) 370 } 371 372 type metricType int 373 374 const ( 375 // The following 2 keep track of hits and misses. 376 hit = iota 377 miss 378 // The following 3 keep track of number of keys added, updated and evicted. 379 keyAdd 380 keyUpdate 381 keyEvict 382 // The following 2 keep track of cost of keys added and evicted. 383 costAdd 384 costEvict 385 // The following keep track of how many sets were dropped or rejected later. 386 dropSets 387 rejectSets 388 // The following 2 keep track of how many gets were kept and dropped on the 389 // floor. 390 dropGets 391 keepGets 392 // This should be the final enum. Other enums should be set before this. 393 doNotUse 394 ) 395 396 func stringFor(t metricType) string { 397 switch t { 398 case hit: 399 return "hit" 400 case miss: 401 return "miss" 402 case keyAdd: 403 return "keys-added" 404 case keyUpdate: 405 return "keys-updated" 406 case keyEvict: 407 return "keys-evicted" 408 case costAdd: 409 return "cost-added" 410 case costEvict: 411 return "cost-evicted" 412 case dropSets: 413 return "sets-dropped" 414 case rejectSets: 415 return "sets-rejected" // by policy. 416 case dropGets: 417 return "gets-dropped" 418 case keepGets: 419 return "gets-kept" 420 default: 421 return "unidentified" 422 } 423 } 424 425 // Metrics is a snapshot of performance statistics for the lifetime of a cache 426 // instance. 427 type Metrics struct { 428 all [doNotUse][]*uint64 429 } 430 431 func newMetrics() *Metrics { 432 s := &Metrics{} 433 for i := 0; i < doNotUse; i++ { 434 s.all[i] = make([]*uint64, 256) 435 slice := s.all[i] 436 for j := range slice { 437 slice[j] = new(uint64) 438 } 439 } 440 return s 441 } 442 443 func (p *Metrics) add(t metricType, hash, delta uint64) { 444 if p == nil { 445 return 446 } 447 valp := p.all[t] 448 // Avoid false sharing by padding at least 64 bytes of space between two 449 // atomic counters which would be incremented. 450 idx := (hash % 25) * 10 451 atomic.AddUint64(valp[idx], delta) 452 } 453 454 func (p *Metrics) get(t metricType) uint64 { 455 if p == nil { 456 return 0 457 } 458 valp := p.all[t] 459 var total uint64 460 for i := range valp { 461 total += atomic.LoadUint64(valp[i]) 462 } 463 return total 464 } 465 466 // Hits is the number of Get calls where a value was found for the corresponding 467 // key. 468 func (p *Metrics) Hits() uint64 { 469 return p.get(hit) 470 } 471 472 // Misses is the number of Get calls where a value was not found for the 473 // corresponding key. 474 func (p *Metrics) Misses() uint64 { 475 return p.get(miss) 476 } 477 478 // KeysAdded is the total number of Set calls where a new key-value item was 479 // added. 480 func (p *Metrics) KeysAdded() uint64 { 481 return p.get(keyAdd) 482 } 483 484 // KeysUpdated is the total number of Set calls where the value was updated. 485 func (p *Metrics) KeysUpdated() uint64 { 486 return p.get(keyUpdate) 487 } 488 489 // KeysEvicted is the total number of keys evicted. 490 func (p *Metrics) KeysEvicted() uint64 { 491 return p.get(keyEvict) 492 } 493 494 // CostAdded is the sum of costs that have been added (successful Set calls). 495 func (p *Metrics) CostAdded() uint64 { 496 return p.get(costAdd) 497 } 498 499 // CostEvicted is the sum of all costs that have been evicted. 500 func (p *Metrics) CostEvicted() uint64 { 501 return p.get(costEvict) 502 } 503 504 // SetsDropped is the number of Set calls that don't make it into internal 505 // buffers (due to contention or some other reason). 506 func (p *Metrics) SetsDropped() uint64 { 507 return p.get(dropSets) 508 } 509 510 // SetsRejected is the number of Set calls rejected by the policy (TinyLFU). 511 func (p *Metrics) SetsRejected() uint64 { 512 return p.get(rejectSets) 513 } 514 515 // GetsDropped is the number of Get counter increments that are dropped 516 // internally. 517 func (p *Metrics) GetsDropped() uint64 { 518 return p.get(dropGets) 519 } 520 521 // GetsKept is the number of Get counter increments that are kept. 522 func (p *Metrics) GetsKept() uint64 { 523 return p.get(keepGets) 524 } 525 526 // Ratio is the number of Hits over all accesses (Hits + Misses). This is the 527 // percentage of successful Get calls. 528 func (p *Metrics) Ratio() float64 { 529 if p == nil { 530 return 0.0 531 } 532 hits, misses := p.get(hit), p.get(miss) 533 if hits == 0 && misses == 0 { 534 return 0.0 535 } 536 return float64(hits) / float64(hits+misses) 537 } 538 539 func (p *Metrics) Clear() { 540 if p == nil { 541 return 542 } 543 for i := 0; i < doNotUse; i++ { 544 for j := range p.all[i] { 545 atomic.StoreUint64(p.all[i][j], 0) 546 } 547 } 548 } 549 550 func (p *Metrics) String() string { 551 if p == nil { 552 return "" 553 } 554 var buf bytes.Buffer 555 for i := 0; i < doNotUse; i++ { 556 t := metricType(i) 557 fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t)) 558 } 559 fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss)) 560 fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio()) 561 return buf.String() 562 }