github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/cache/cache.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Ristretto is a fast, fixed size, in-memory cache with a dual focus on 18 // throughput and hit ratio performance. You can easily add Ristretto to an 19 // existing system and keep the most valuable data where you need it. 20 package cache 21 22 import ( 23 "bytes" 24 "errors" 25 "fmt" 26 "sync/atomic" 27 ) 28 29 // Config is passed to NewCache for creating new Cache instances. 30 type Config struct { 31 // NumCounters determines the number of counters (keys) to keep that hold 32 // access frequency information. It's generally a good idea to have more 33 // counters than the max cache capacity, as this will improve eviction 34 // accuracy and subsequent hit ratios. 35 // 36 // For example, if you expect your cache to hold 1,000,000 items when full, 37 // NumCounters should be 10,000,000 (10x). Each counter takes up 4 bits, so 38 // keeping 10,000,000 counters would require 5MB of memory. 39 NumCounters int64 40 // MaxCost can be considered as the cache capacity, in whatever units you 41 // choose to use. 42 // 43 // For example, if you want the cache to have a max capacity of 100MB, you 44 // would set MaxCost to 100,000,000 and pass an item's number of bytes as 45 // the `cost` parameter for calls to Set. If new items are accepted, the 46 // eviction process will take care of making room for the new item and not 47 // overflowing the MaxCost value. 48 MaxCost int64 49 // BufferItems determines the size of Get buffers. 50 // 51 // Unless you have a rare use case, using `64` as the BufferItems value 52 // results in good performance. 53 BufferItems int64 54 // Metrics determines whether cache statistics are kept during the cache's 55 // lifetime. There *is* some overhead to keeping statistics, so you should 56 // only set this flag to true when testing or throughput performance isn't a 57 // major factor. 58 Metrics bool 59 // OnEvict is called for every eviction and passes the hashed key and 60 // value to the function. 61 OnEvict func(key uint64, value interface{}) 62 // Cost evaluates a value and outputs a corresponding cost. This function 63 // is ran after Set is called for a new item or an item update with a cost 64 // param of 0. 65 Cost func(value interface{}) int64 66 } 67 68 const ( 69 // TODO: find the optimal value for this or make it configurable 70 setBufSize = 32 * 1024 71 ) 72 73 type setEvent struct { 74 del bool 75 key uint64 76 cost int64 77 } 78 79 // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission 80 // policy and a Sampled LFU eviction policy. You can use the same Cache instance 81 // from as many goroutines as you want. 82 type Cache struct { 83 // store is the central concurrent hashmap where key-value items are stored 84 store *store 85 // policy determines what gets let in to the cache and what gets kicked out 86 policy *policy 87 // getBuf is a custom ring buffer implementation that gets pushed to when 88 // keys are read 89 getBuf *ringBuffer 90 // setBuf is a buffer allowing us to batch/drop Sets during times of high 91 // contention 92 setBuf chan setEvent 93 // onEvict is called for item evictions 94 onEvict func(uint64, interface{}) 95 // stop is used to stop the processItems goroutine 96 stop chan struct{} 97 // cost calculates cost from a value 98 cost func(value interface{}) int64 99 // Metrics contains a running log of important statistics like hits, misses, 100 // and dropped items 101 Metrics *Metrics 102 } 103 104 // NewCache returns a new Cache instance and any configuration errors, if any. 105 func NewCache(config *Config) (*Cache, error) { 106 switch { 107 case config.NumCounters == 0: 108 return nil, errors.New("NumCounters can't be zero.") 109 case config.MaxCost == 0: 110 return nil, errors.New("MaxCost can't be zero.") 111 case config.BufferItems == 0: 112 return nil, errors.New("BufferItems can't be zero.") 113 } 114 policy := newPolicy(config.NumCounters, config.MaxCost) 115 cache := &Cache{ 116 store: newStore(), 117 policy: policy, 118 getBuf: newRingBuffer(policy, config.BufferItems), 119 setBuf: make(chan setEvent, setBufSize), 120 onEvict: config.OnEvict, 121 stop: make(chan struct{}), 122 cost: config.Cost, 123 } 124 if config.Metrics { 125 cache.collectMetrics() 126 } 127 // NOTE: benchmarks seem to show that performance decreases the more 128 // goroutines we have running cache.processItems(), so 1 should 129 // usually be sufficient 130 go cache.processItems() 131 return cache, nil 132 } 133 134 // Get returns the value (if any) and a boolean representing whether the 135 // value was found or not. The value can be nil and the boolean can be true at 136 // the same time. 137 func (c *Cache) Get(key uint64) (interface{}, bool) { 138 if c == nil { 139 return nil, false 140 } 141 c.getBuf.Push(key) 142 value, ok := c.store.GetValue(key) 143 if ok { 144 c.Metrics.add(hit, key, 1) 145 } else { 146 c.Metrics.add(miss, key, 1) 147 } 148 return value, ok 149 } 150 151 // Set attempts to add the key-value item to the cache. There's still a chance it 152 // could be dropped by the policy if its determined that the key-value item 153 // isn't worth keeping, but otherwise the item will be added and other items 154 // will be evicted in order to make room. 155 // 156 // To dynamically evaluate the items cost using the Config.Coster function, set 157 // the cost parameter to 0 and Coster will be ran when needed in order to find 158 // the items true cost. 159 func (c *Cache) Set(key uint64, value interface{}, cost int64) { 160 if c == nil { 161 return 162 } 163 if cost == 0 && c.cost != nil { 164 cost = c.cost(value) 165 } 166 for { 167 i := c.store.GetOrNew(key) 168 i.Lock() 169 if i.dead { 170 i.Unlock() 171 continue 172 } 173 174 i.value.Store(value) 175 // Send event to channel while holding mutex, so we can keep the order of event log respect to 176 // the order of mutations on hash map. If we send envent after i.Unlock() we may have danling item in hash map: 177 // * Mutations is `A delete k -> B insert K` 178 // * But the event log is `B insert K -> A delete K` 179 // * After apply the event log, there are danling item in hash map which cannot be evicted. 180 // Delete item when apply delete event is not a good idea, because we may overwrite the following insert. 181 // Delay all hash map mutations to log replay seems a good idea, but it will result in a confusing behavior, 182 // that is you cannot get the item you just inserted. 183 c.setBuf <- setEvent{del: false, key: key, cost: cost} 184 185 i.Unlock() 186 return 187 } 188 } 189 190 // GetOrCompute returns the value of key. If there is no such key, it will compute the 191 // value using the factory function `f`. If there are concurrent call on same key, 192 // the factory function will be called only once. 193 func (c *Cache) GetOrCompute(key uint64, f func() (interface{}, int64, error)) (interface{}, error) { 194 if c == nil { 195 return nil, nil 196 } 197 for { 198 i := c.store.GetOrNew(key) 199 if v := i.value.Load(); v != nil { 200 return v, nil 201 } 202 if v, err, ok := c.compute(i, f); ok { 203 return v, err 204 } 205 } 206 } 207 208 func (c *Cache) compute(i *item, f func() (interface{}, int64, error)) (interface{}, error, bool) { 209 i.Lock() 210 defer i.Unlock() 211 if i.dead { 212 return nil, nil, false 213 } 214 215 // Double check. 216 if v := i.value.Load(); v != nil { 217 return v, nil, true 218 } 219 220 v, cost, err := f() 221 if err != nil { 222 return nil, err, true 223 } 224 i.value.Store(v) 225 226 if cost == 0 && c.cost != nil { 227 cost = c.cost(v) 228 } 229 c.setBuf <- setEvent{del: false, key: i.key, cost: cost} 230 return v, nil, true 231 } 232 233 // Del deletes the key-value item from the cache if it exists. 234 func (c *Cache) Del(key uint64) interface{} { 235 if c == nil { 236 return nil 237 } 238 i, ok := c.store.Get(key) 239 if !ok { 240 return nil 241 } 242 i.Lock() 243 if i.del(c.store) { 244 c.setBuf <- setEvent{del: true, key: key} 245 c.store.Del(key) 246 } 247 v := i.value.Load() 248 i.Unlock() 249 return v 250 } 251 252 // Close stops all goroutines and closes all channels. 253 func (c *Cache) Close() { 254 // block until processItems goroutine is returned 255 c.stop <- struct{}{} 256 close(c.stop) 257 258 // TODO: Close will be called when shutdown DB, but some table will try to 259 // evict data from cache in epoch's background thread, if we close setBuf here 260 // runtime will panic. 261 // 262 // It is safe to let this channel keeps open, because the DB process is going to terminate. 263 // 264 // To address this issue, we must wait epoch manger to close before close cache. 265 // For now just ignore this channel. 266 267 // close(c.setBuf) 268 269 c.policy.Close() 270 } 271 272 // Clear empties the hashmap and zeroes all policy counters. Note that this is 273 // not an atomic operation (but that shouldn't be a problem as it's assumed that 274 // Set/Get calls won't be occurring until after this). 275 func (c *Cache) Clear() { 276 // block until processItems goroutine is returned 277 c.stop <- struct{}{} 278 // swap out the setBuf channel 279 c.setBuf = make(chan setEvent, setBufSize) 280 // clear value hashmap and policy data 281 c.policy.Clear() 282 c.store.Clear() 283 // only reset metrics if they're enabled 284 if c.Metrics != nil { 285 c.Metrics.Clear() 286 } 287 // restart processItems goroutine 288 go c.processItems() 289 } 290 291 // processItems is ran by goroutines processing the Set buffer. 292 func (c *Cache) processItems() { 293 for { 294 select { 295 case e := <-c.setBuf: 296 if e.del { 297 c.policy.Del(e.key) 298 continue 299 } 300 c.handleNewItem(e.key, e.cost) 301 case <-c.stop: 302 return 303 } 304 } 305 } 306 307 func (c *Cache) handleNewItem(key uint64, cost int64) { 308 itemInMap, ok := c.store.Get(key) 309 if !ok { 310 // This item dropped by admission policy, 311 // ignore this event or we may have danling item in policy. 312 return 313 } 314 315 // TODO: do evict after all events in current batch handled. 316 victims, added := c.policy.Add(key, cost) 317 if !added { 318 // Item dropped by admission policy, delete it from hash map. 319 // Otherwise this danling item will be kept in cache forever. 320 i, ok := c.store.Get(key) 321 if !ok || i != itemInMap { 322 return 323 } 324 i.Lock() 325 deleted := i.del(c.store) 326 i.Unlock() 327 328 if deleted && c.onEvict != nil { 329 c.onEvict(i.key, i.value) 330 } 331 return 332 } 333 334 for _, victim := range victims { 335 victim, ok = c.store.Get(victim.key) 336 if !ok { 337 continue 338 } 339 victim.Lock() 340 deleted := victim.del(c.store) 341 victim.Unlock() 342 if deleted && c.onEvict != nil { 343 c.onEvict(victim.key, victim.value.Load()) 344 } 345 } 346 } 347 348 // collectMetrics just creates a new *Metrics instance and adds the pointers 349 // to the cache and policy instances. 350 func (c *Cache) collectMetrics() { 351 c.Metrics = newMetrics() 352 c.policy.CollectMetrics(c.Metrics) 353 } 354 355 type metricType int 356 357 const ( 358 // The following 2 keep track of hits and misses. 359 hit = iota 360 miss 361 // The following 3 keep track of number of keys added, updated and evicted. 362 keyAdd 363 keyUpdate 364 keyEvict 365 // The following 2 keep track of cost of keys added and evicted. 366 costAdd 367 costEvict 368 // The following keep track of how many sets were dropped or rejected later. 369 dropSets 370 rejectSets 371 // The following 2 keep track of how many gets were kept and dropped on the 372 // floor. 373 dropGets 374 keepGets 375 // This should be the final enum. Other enums should be set before this. 376 doNotUse 377 ) 378 379 func stringFor(t metricType) string { 380 switch t { 381 case hit: 382 return "hit" 383 case miss: 384 return "miss" 385 case keyAdd: 386 return "keys-added" 387 case keyUpdate: 388 return "keys-updated" 389 case keyEvict: 390 return "keys-evicted" 391 case costAdd: 392 return "cost-added" 393 case costEvict: 394 return "cost-evicted" 395 case dropSets: 396 return "sets-dropped" 397 case rejectSets: 398 return "sets-rejected" // by policy. 399 case dropGets: 400 return "gets-dropped" 401 case keepGets: 402 return "gets-kept" 403 default: 404 return "unidentified" 405 } 406 } 407 408 // Metrics is a snapshot of performance statistics for the lifetime of a cache 409 // instance. 410 type Metrics struct { 411 all [doNotUse][]*uint64 412 } 413 414 func newMetrics() *Metrics { 415 s := &Metrics{} 416 for i := 0; i < doNotUse; i++ { 417 s.all[i] = make([]*uint64, 256) 418 slice := s.all[i] 419 for j := range slice { 420 slice[j] = new(uint64) 421 } 422 } 423 return s 424 } 425 426 func (p *Metrics) add(t metricType, hash, delta uint64) { 427 if p == nil { 428 return 429 } 430 valp := p.all[t] 431 // Avoid false sharing by padding at least 64 bytes of space between two 432 // atomic counters which would be incremented. 433 idx := (hash % 25) * 10 434 atomic.AddUint64(valp[idx], delta) 435 } 436 437 func (p *Metrics) get(t metricType) uint64 { 438 if p == nil { 439 return 0 440 } 441 valp := p.all[t] 442 var total uint64 443 for i := range valp { 444 total += atomic.LoadUint64(valp[i]) 445 } 446 return total 447 } 448 449 // Hits is the number of Get calls where a value was found for the corresponding 450 // key. 451 func (p *Metrics) Hits() uint64 { 452 return p.get(hit) 453 } 454 455 // Misses is the number of Get calls where a value was not found for the 456 // corresponding key. 457 func (p *Metrics) Misses() uint64 { 458 return p.get(miss) 459 } 460 461 // KeysAdded is the total number of Set calls where a new key-value item was 462 // added. 463 func (p *Metrics) KeysAdded() uint64 { 464 return p.get(keyAdd) 465 } 466 467 // KeysUpdated is the total number of Set calls where the value was updated. 468 func (p *Metrics) KeysUpdated() uint64 { 469 return p.get(keyUpdate) 470 } 471 472 // KeysEvicted is the total number of keys evicted. 473 func (p *Metrics) KeysEvicted() uint64 { 474 return p.get(keyEvict) 475 } 476 477 // CostAdded is the sum of costs that have been added (successful Set calls). 478 func (p *Metrics) CostAdded() uint64 { 479 return p.get(costAdd) 480 } 481 482 // CostEvicted is the sum of all costs that have been evicted. 483 func (p *Metrics) CostEvicted() uint64 { 484 return p.get(costEvict) 485 } 486 487 // SetsDropped is the number of Set calls that don't make it into internal 488 // buffers (due to contention or some other reason). 489 func (p *Metrics) SetsDropped() uint64 { 490 return p.get(dropSets) 491 } 492 493 // SetsRejected is the number of Set calls rejected by the policy (TinyLFU). 494 func (p *Metrics) SetsRejected() uint64 { 495 return p.get(rejectSets) 496 } 497 498 // GetsDropped is the number of Get counter increments that are dropped 499 // internally. 500 func (p *Metrics) GetsDropped() uint64 { 501 return p.get(dropGets) 502 } 503 504 // GetsKept is the number of Get counter increments that are kept. 505 func (p *Metrics) GetsKept() uint64 { 506 return p.get(keepGets) 507 } 508 509 // Ratio is the number of Hits over all accesses (Hits + Misses). This is the 510 // percentage of successful Get calls. 511 func (p *Metrics) Ratio() float64 { 512 if p == nil { 513 return 0.0 514 } 515 hits, misses := p.get(hit), p.get(miss) 516 if hits == 0 && misses == 0 { 517 return 0.0 518 } 519 return float64(hits) / float64(hits+misses) 520 } 521 522 func (p *Metrics) Clear() { 523 if p == nil { 524 return 525 } 526 for i := 0; i < doNotUse; i++ { 527 for j := range p.all[i] { 528 atomic.StoreUint64(p.all[i][j], 0) 529 } 530 } 531 } 532 533 func (p *Metrics) String() string { 534 if p == nil { 535 return "" 536 } 537 var buf bytes.Buffer 538 for i := 0; i < doNotUse; i++ { 539 t := metricType(i) 540 fmt.Fprintf(&buf, "%s: %d ", stringFor(t), p.get(t)) 541 } 542 fmt.Fprintf(&buf, "gets-total: %d ", p.get(hit)+p.get(miss)) 543 fmt.Fprintf(&buf, "hit-ratio: %.2f", p.Ratio()) 544 return buf.String() 545 }