github.com/outcaste-io/ristretto@v0.2.3/cache.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Ristretto is a fast, fixed size, in-memory cache with a dual focus on 18 // throughput and hit ratio performance. You can easily add Ristretto to an 19 // existing system and keep the most valuable data where you need it. 20 package ristretto 21 22 import ( 23 "errors" 24 "sync" 25 "time" 26 "unsafe" 27 28 "github.com/outcaste-io/ristretto/z" 29 "go.uber.org/atomic" 30 ) 31 32 var ( 33 // TODO: find the optimal value for this or make it configurable 34 setBufSize = 32 * 1024 35 ) 36 37 type itemCallback func(*Item) 38 39 const itemSize = int64(unsafe.Sizeof(storeItem{})) 40 41 // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission 42 // policy and a Sampled LFU eviction policy. You can use the same Cache instance 43 // from as many goroutines as you want. 44 type Cache struct { 45 // store is the central concurrent hashmap where key-value items are stored. 46 store *shardedMap 47 // policy determines what gets let in to the cache and what gets kicked out. 48 policy *lfuPolicy 49 // getBuf is a custom ring buffer implementation that gets pushed to when 50 // keys are read. 51 getBuf *ringBuffer 52 // setBuf is a buffer allowing us to batch/drop Sets during times of high 53 // contention. 54 setBuf chan *Item 55 // onEvict is called for item evictions. 56 onEvict itemCallback 57 // onReject is called when an item is rejected via admission policy. 58 onReject itemCallback 59 // onExit is called whenever a value goes out of scope from the cache. 60 onExit (func(interface{})) 61 // KeyToHash function is used to customize the key hashing algorithm. 62 // Each key will be hashed using the provided function. If keyToHash value 63 // is not set, the default keyToHash function is used. 64 keyToHash func(interface{}) (uint64, uint64) 65 // stop is used to stop the processItems goroutine. 66 stop chan struct{} 67 // indicates whether cache is closed. 68 isClosed atomic.Bool 69 // cost calculates cost from a value. 70 cost func(value interface{}) int64 71 // ignoreInternalCost dictates whether to ignore the cost of internally storing 72 // the item in the cost calculation. 73 ignoreInternalCost bool 74 // cleanupTicker is used to periodically check for entries whose TTL has passed. 75 cleanupTicker *time.Ticker 76 // Metrics contains a running log of important statistics like hits, misses, 77 // and dropped items. 78 Metrics *Metrics 79 } 80 81 // Config is passed to NewCache for creating new Cache instances. 82 type Config struct { 83 // NumCounters determines the number of counters (keys) to keep that hold 84 // access frequency information. It's generally a good idea to have more 85 // counters than the max cache capacity, as this will improve eviction 86 // accuracy and subsequent hit ratios. 87 // 88 // For example, if you expect your cache to hold 1,000,000 items when full, 89 // NumCounters should be 10,000,000 (10x). Each counter takes up roughly 90 // 3 bytes (4 bits for each counter * 4 copies plus about a byte per 91 // counter for the bloom filter). Note that the number of counters is 92 // internally rounded up to the nearest power of 2, so the space usage 93 // may be a little larger than 3 bytes * NumCounters. 94 NumCounters int64 95 // MaxCost can be considered as the cache capacity, in whatever units you 96 // choose to use. 97 // 98 // For example, if you want the cache to have a max capacity of 100MB, you 99 // would set MaxCost to 100,000,000 and pass an item's number of bytes as 100 // the `cost` parameter for calls to Set. If new items are accepted, the 101 // eviction process will take care of making room for the new item and not 102 // overflowing the MaxCost value. 103 MaxCost int64 104 // BufferItems determines the size of Get buffers. 105 // 106 // Unless you have a rare use case, using `64` as the BufferItems value 107 // results in good performance. 108 BufferItems int64 109 // Metrics determines whether cache statistics are kept during the cache's 110 // lifetime. There *is* some overhead to keeping statistics, so you should 111 // only set this flag to true when testing or throughput performance isn't a 112 // major factor. 113 Metrics bool 114 // OnEvict is called for every eviction and passes the hashed key, value, 115 // and cost to the function. 116 OnEvict func(item *Item) 117 // OnReject is called for every rejection done via the policy. 118 OnReject func(item *Item) 119 // OnExit is called whenever a value is removed from cache. This can be 120 // used to do manual memory deallocation. Would also be called on eviction 121 // and rejection of the value. 122 OnExit func(val interface{}) 123 // KeyToHash function is used to customize the key hashing algorithm. 124 // Each key will be hashed using the provided function. If keyToHash value 125 // is not set, the default keyToHash function is used. 126 KeyToHash func(key interface{}) (uint64, uint64) 127 // shouldUpdate is called when a value already exists in cache and is being updated. 128 ShouldUpdate func(prev, cur interface{}) bool 129 // Cost evaluates a value and outputs a corresponding cost. This function 130 // is ran after Set is called for a new item or an item update with a cost 131 // param of 0. 132 Cost func(value interface{}) int64 133 // IgnoreInternalCost set to true indicates to the cache that the cost of 134 // internally storing the value should be ignored. This is useful when the 135 // cost passed to set is not using bytes as units. Keep in mind that setting 136 // this to true will increase the memory usage. 137 IgnoreInternalCost bool 138 } 139 140 type itemFlag byte 141 142 const ( 143 itemNew itemFlag = iota 144 itemDelete 145 itemUpdate 146 ) 147 148 // Item is passed to setBuf so items can eventually be added to the cache. 149 type Item struct { 150 flag itemFlag 151 Key uint64 152 Conflict uint64 153 Value interface{} 154 Cost int64 155 Expiration time.Time 156 wg *sync.WaitGroup 157 } 158 159 // NewCache returns a new Cache instance and any configuration errors, if any. 160 func NewCache(config *Config) (*Cache, error) { 161 switch { 162 case config.NumCounters == 0: 163 return nil, errors.New("NumCounters can't be zero") 164 case config.MaxCost == 0: 165 return nil, errors.New("MaxCost can't be zero") 166 case config.BufferItems == 0: 167 return nil, errors.New("BufferItems can't be zero") 168 } 169 policy := newPolicy(config.NumCounters, config.MaxCost) 170 cache := &Cache{ 171 store: newShardedMap(config.ShouldUpdate), 172 policy: policy, 173 getBuf: newRingBuffer(policy, config.BufferItems), 174 setBuf: make(chan *Item, setBufSize), 175 keyToHash: config.KeyToHash, 176 stop: make(chan struct{}), 177 cost: config.Cost, 178 ignoreInternalCost: config.IgnoreInternalCost, 179 cleanupTicker: time.NewTicker(time.Duration(bucketDurationSecs) * time.Second / 2), 180 } 181 cache.onExit = func(val interface{}) { 182 if config.OnExit != nil && val != nil { 183 config.OnExit(val) 184 } 185 } 186 cache.onEvict = func(item *Item) { 187 if config.OnEvict != nil { 188 config.OnEvict(item) 189 } 190 cache.onExit(item.Value) 191 } 192 cache.onReject = func(item *Item) { 193 if config.OnReject != nil { 194 config.OnReject(item) 195 } 196 cache.onExit(item.Value) 197 } 198 cache.store.shouldUpdate = func(prev, cur interface{}) bool { 199 if config.ShouldUpdate != nil { 200 return config.ShouldUpdate(prev, cur) 201 } 202 return true 203 } 204 if cache.keyToHash == nil { 205 cache.keyToHash = z.KeyToHash 206 } 207 if config.Metrics { 208 cache.collectMetrics() 209 } 210 // NOTE: benchmarks seem to show that performance decreases the more 211 // goroutines we have running cache.processItems(), so 1 should 212 // usually be sufficient 213 go cache.processItems() 214 return cache, nil 215 } 216 217 func (c *Cache) Wait() { 218 if c == nil || c.isClosed.Load() { 219 return 220 } 221 wg := &sync.WaitGroup{} 222 wg.Add(1) 223 c.setBuf <- &Item{wg: wg} 224 wg.Wait() 225 } 226 227 // Get returns the value (if any) and a boolean representing whether the 228 // value was found or not. The value can be nil and the boolean can be true at 229 // the same time. 230 func (c *Cache) Get(key interface{}) (interface{}, bool) { 231 if c == nil || c.isClosed.Load() || key == nil { 232 return nil, false 233 } 234 keyHash, conflictHash := c.keyToHash(key) 235 c.getBuf.Push(keyHash) 236 value, ok := c.store.Get(keyHash, conflictHash) 237 if ok { 238 c.Metrics.add(hit, keyHash, 1) 239 } else { 240 c.Metrics.add(miss, keyHash, 1) 241 } 242 return value, ok 243 } 244 245 // Set attempts to add the key-value item to the cache. If it returns false, 246 // then the Set was dropped and the key-value item isn't added to the cache. If 247 // it returns true, there's still a chance it could be dropped by the policy if 248 // its determined that the key-value item isn't worth keeping, but otherwise the 249 // item will be added and other items will be evicted in order to make room. 250 // 251 // To dynamically evaluate the items cost using the Config.Coster function, set 252 // the cost parameter to 0 and Coster will be ran when needed in order to find 253 // the items true cost. 254 func (c *Cache) Set(key, value interface{}, cost int64) bool { 255 return c.SetWithTTL(key, value, cost, 0*time.Second) 256 } 257 258 // SetWithTTL works like Set but adds a key-value pair to the cache that will expire 259 // after the specified TTL (time to live) has passed. A zero value means the value never 260 // expires, which is identical to calling Set. A negative value is a no-op and the value 261 // is discarded. 262 func (c *Cache) SetWithTTL(key, value interface{}, cost int64, ttl time.Duration) bool { 263 return c.setInternal(key, value, cost, ttl, false) 264 } 265 266 // SetIfPresent is like Set, but only updates the value of an existing key. It 267 // does NOT add the key to cache if it's absent. 268 func (c *Cache) SetIfPresent(key, value interface{}, cost int64) bool { 269 return c.setInternal(key, value, cost, 0*time.Second, true) 270 } 271 272 func (c *Cache) setInternal(key, value interface{}, 273 cost int64, ttl time.Duration, onlyUpdate bool) bool { 274 if c == nil || c.isClosed.Load() || key == nil { 275 return false 276 } 277 278 var expiration time.Time 279 switch { 280 case ttl == 0: 281 // No expiration. 282 break 283 case ttl < 0: 284 // Treat this a a no-op. 285 return false 286 default: 287 expiration = time.Now().Add(ttl) 288 } 289 290 keyHash, conflictHash := c.keyToHash(key) 291 i := &Item{ 292 flag: itemNew, 293 Key: keyHash, 294 Conflict: conflictHash, 295 Value: value, 296 Cost: cost, 297 Expiration: expiration, 298 } 299 if onlyUpdate { 300 i.flag = itemUpdate 301 } 302 // cost is eventually updated. The expiration must also be immediately updated 303 // to prevent items from being prematurely removed from the map. 304 if prev, ok := c.store.Update(i); ok { 305 c.onExit(prev) 306 i.flag = itemUpdate 307 } else if onlyUpdate { 308 // The instruction was to update the key, but store.Update failed. So, 309 // this is a NOOP. 310 return false 311 } 312 // Attempt to send item to policy. 313 select { 314 case c.setBuf <- i: 315 return true 316 default: 317 if i.flag == itemUpdate { 318 // Return true if this was an update operation since we've already 319 // updated the store. For all the other operations (set/delete), we 320 // return false which means the item was not inserted. 321 return true 322 } 323 c.Metrics.add(dropSets, keyHash, 1) 324 return false 325 } 326 } 327 328 // Del deletes the key-value item from the cache if it exists. 329 func (c *Cache) Del(key interface{}) { 330 if c == nil || c.isClosed.Load() || key == nil { 331 return 332 } 333 keyHash, conflictHash := c.keyToHash(key) 334 // Delete immediately. 335 _, prev := c.store.Del(keyHash, conflictHash) 336 c.onExit(prev) 337 // If we've set an item, it would be applied slightly later. 338 // So we must push the same item to `setBuf` with the deletion flag. 339 // This ensures that if a set is followed by a delete, it will be 340 // applied in the correct order. 341 c.setBuf <- &Item{ 342 flag: itemDelete, 343 Key: keyHash, 344 Conflict: conflictHash, 345 } 346 } 347 348 // GetTTL returns the TTL for the specified key and a bool that is true if the 349 // item was found and is not expired. 350 func (c *Cache) GetTTL(key interface{}) (time.Duration, bool) { 351 if c == nil || key == nil { 352 return 0, false 353 } 354 355 keyHash, conflictHash := c.keyToHash(key) 356 if _, ok := c.store.Get(keyHash, conflictHash); !ok { 357 // not found 358 return 0, false 359 } 360 361 expiration := c.store.Expiration(keyHash) 362 if expiration.IsZero() { 363 // found but no expiration 364 return 0, true 365 } 366 367 if time.Now().After(expiration) { 368 // found but expired 369 return 0, false 370 } 371 372 return time.Until(expiration), true 373 } 374 375 // Close stops all goroutines and closes all channels. 376 func (c *Cache) Close() { 377 if c == nil || c.isClosed.Load() { 378 return 379 } 380 c.Clear() 381 382 // Block until processItems goroutine is returned. 383 c.stop <- struct{}{} 384 close(c.stop) 385 close(c.setBuf) 386 c.policy.Close() 387 c.isClosed.Store(true) 388 } 389 390 // Clear empties the hashmap and zeroes all policy counters. Note that this is 391 // not an atomic operation (but that shouldn't be a problem as it's assumed that 392 // Set/Get calls won't be occurring until after this). 393 func (c *Cache) Clear() { 394 if c == nil || c.isClosed.Load() { 395 return 396 } 397 // Block until processItems goroutine is returned. 398 c.stop <- struct{}{} 399 400 // Clear out the setBuf channel. 401 loop: 402 for { 403 select { 404 case i := <-c.setBuf: 405 if i.wg != nil { 406 i.wg.Done() 407 continue 408 } 409 if i.flag != itemUpdate { 410 // In itemUpdate, the value is already set in the store. So, no need to call 411 // onEvict here. 412 c.onEvict(i) 413 } 414 default: 415 break loop 416 } 417 } 418 419 // Clear value hashmap and policy data. 420 c.policy.Clear() 421 c.store.Clear(c.onEvict) 422 // Only reset metrics if they're enabled. 423 if c.Metrics != nil { 424 c.Metrics.Clear() 425 } 426 // Restart processItems goroutine. 427 go c.processItems() 428 } 429 430 // MaxCost returns the max cost of the cache. 431 func (c *Cache) MaxCost() int64 { 432 if c == nil { 433 return 0 434 } 435 return c.policy.MaxCost() 436 } 437 438 // UpdateMaxCost updates the maxCost of an existing cache. 439 func (c *Cache) UpdateMaxCost(maxCost int64) { 440 if c == nil { 441 return 442 } 443 c.policy.UpdateMaxCost(maxCost) 444 } 445 446 // processItems is ran by goroutines processing the Set buffer. 447 func (c *Cache) processItems() { 448 startTs := make(map[uint64]time.Time) 449 numToKeep := 100000 // TODO: Make this configurable via options. 450 451 trackAdmission := func(key uint64) { 452 if c.Metrics == nil { 453 return 454 } 455 startTs[key] = time.Now() 456 if len(startTs) > numToKeep { 457 for k := range startTs { 458 if len(startTs) <= numToKeep { 459 break 460 } 461 delete(startTs, k) 462 } 463 } 464 } 465 onEvict := func(i *Item) { 466 if ts, has := startTs[i.Key]; has { 467 c.Metrics.trackEviction(int64(time.Since(ts) / time.Second)) 468 delete(startTs, i.Key) 469 } 470 if c.onEvict != nil { 471 c.onEvict(i) 472 } 473 } 474 475 for { 476 select { 477 case i := <-c.setBuf: 478 if i.wg != nil { 479 i.wg.Done() 480 continue 481 } 482 // Calculate item cost value if new or update. 483 if i.Cost == 0 && c.cost != nil && i.flag != itemDelete { 484 i.Cost = c.cost(i.Value) 485 } 486 if !c.ignoreInternalCost { 487 // Add the cost of internally storing the object. 488 i.Cost += itemSize 489 } 490 491 switch i.flag { 492 case itemNew: 493 victims, added := c.policy.Add(i.Key, i.Cost) 494 if added { 495 c.store.Set(i) 496 c.Metrics.add(keyAdd, i.Key, 1) 497 trackAdmission(i.Key) 498 } else { 499 c.onReject(i) 500 } 501 for _, victim := range victims { 502 victim.Conflict, victim.Value = c.store.Del(victim.Key, 0) 503 onEvict(victim) 504 } 505 506 case itemUpdate: 507 c.policy.Update(i.Key, i.Cost) 508 509 case itemDelete: 510 c.policy.Del(i.Key) // Deals with metrics updates. 511 _, val := c.store.Del(i.Key, i.Conflict) 512 c.onExit(val) 513 } 514 case <-c.cleanupTicker.C: 515 c.store.Cleanup(c.policy, onEvict) 516 case <-c.stop: 517 return 518 } 519 } 520 }