github.com/etecs-ru/ristretto@v0.9.1/cache.go (about) 1 /* 2 * Copyright 2019 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Ristretto is a fast, fixed size, in-memory cache with a dual focus on 18 // throughput and hit ratio performance. You can easily add Ristretto to an 19 // existing system and keep the most valuable data where you need it. 20 package ristretto 21 22 import ( 23 "errors" 24 "sync" 25 "time" 26 "unsafe" 27 28 "github.com/etecs-ru/ristretto/z" 29 ) 30 31 // TODO: find the optimal value for this or make it configurable 32 var setBufSize = 32 * 1024 //nolint:gochecknoglobals // adopt fork, do not touch it 33 34 type itemCallback func(*Item) 35 36 const itemSize = int64(unsafe.Sizeof(storeItem{})) 37 38 // CacheInterface exposes the common cache functions for the purpose of mocking 39 // in unit tests. 40 type CacheInterface interface { 41 // Get returns the value (if any) and a boolean representing whether the 42 // value was found or not. The value can be nil and the boolean can be true at 43 // the same time. 44 Get(key interface{}) (interface{}, bool) 45 46 // Set attempts to add the key-value item to the cache. If it returns false, 47 // then the Set was dropped and the key-value item isn't added to the cache. If 48 // it returns true, there's still a chance it could be dropped by the policy if 49 // its determined that the key-value item isn't worth keeping, but otherwise the 50 // item will be added and other items will be evicted in order to make room. 51 // 52 // To dynamically evaluate the items cost using the Config.Coster function, set 53 // the cost parameter to 0 and Coster will be ran when needed in order to find 54 // the items true cost. 55 Set(key, value interface{}, cost int64) bool 56 57 // SetWithTTL works like Set but adds a key-value pair to the cache that will expire 58 // after the specified TTL (time to live) has passed. A zero value means the value never 59 // expires, which is identical to calling Set. A negative value is a no-op and the value 60 // is discarded. 61 SetWithTTL(key, value interface{}, cost int64, ttl time.Duration) bool 62 63 // SetIfPresent is like Set, but only updates the value of an existing key. It 64 // does NOT add the key to cache if it's absent. 65 SetIfPresent(key, value interface{}, cost int64) bool 66 67 // Del deletes the key-value item from the cache if it exists. 68 Del(key interface{}) 69 70 // GetTTL returns the TTL for the specified key and a bool that is true if the 71 // item was found and is not expired. 72 GetTTL(key interface{}) (time.Duration, bool) 73 74 // Close stops all goroutines and closes all channels. 75 Close() 76 77 // Clear empties the hashmap and zeroes all policy counters. Note that this is 78 // not an atomic operation (but that shouldn't be a problem as it's assumed that 79 // Set/Get calls won't be occurring until after this). 80 Clear() 81 82 // MaxCost returns the max cost of the cache. 83 MaxCost() int64 84 85 // UpdateMaxCost updates the maxCost of an existing cache. 86 UpdateMaxCost(maxCost int64) 87 } 88 89 // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission 90 // policy and a Sampled LFU eviction policy. You can use the same Cache instance 91 // from as many goroutines as you want. 92 type Cache struct { 93 store *shardedMap 94 policy *lfuPolicy 95 getBuf *ringBuffer 96 setBuf chan *Item 97 onEvict itemCallback 98 onReject itemCallback 99 onExit func(interface{}) 100 keyToHash func(interface{}) (uint64, uint64) 101 stop chan struct{} 102 cleanupTicker *time.Ticker 103 cost func(value interface{}) int64 104 Metrics *Metrics 105 ignoreInternalCost bool 106 isClosed bool 107 } 108 109 // Verify that Cache implements the CacheInterface. 110 // https://golang.org/doc/faq#guarantee_satisfies_interface 111 var _ CacheInterface = &Cache{} 112 113 // Config is passed to NewCache for creating new Cache instances. 114 type Config struct { 115 // OnExit is called whenever a value is removed from cache. This can be 116 // used to do manual memory deallocation. Would also be called on eviction 117 // and rejection of the value. 118 OnExit func(val interface{}) 119 // KeyToHash function is used to customize the key hashing algorithm. 120 // Each key will be hashed using the provided function. If keyToHash value 121 // is not set, the default keyToHash function is used. 122 KeyToHash func(key interface{}) (uint64, uint64) 123 // ShouldUpdate is called when a value already exists in cache and is being updated. 124 ShouldUpdate func(prev, cur interface{}) bool 125 // Cost evaluates a value and outputs a corresponding cost. This function 126 // is ran after Set is called for a new item or an item update with a cost 127 // param of 0. 128 Cost func(value interface{}) int64 129 // OnEvict is called for every eviction and passes the hashed key, value, 130 // and cost to the function. 131 OnEvict func(item *Item) 132 // OnReject is called for every rejection done via the policy. 133 OnReject func(item *Item) 134 // NumCounters determines the number of counters (keys) to keep that hold 135 // access frequency information. It's generally a good idea to have more 136 // counters than the max cache capacity, as this will improve eviction 137 // accuracy and subsequent hit ratios. 138 // 139 // For example, if you expect your cache to hold 1,000,000 items when full, 140 // NumCounters should be 10,000,000 (10x). Each counter takes up roughly 141 // 3 bytes (4 bits for each counter * 4 copies plus about a byte per 142 // counter for the bloom filter). Note that the number of counters is 143 // internally rounded up to the nearest power of 2, so the space usage 144 // may be a little larger than 3 bytes * NumCounters. 145 NumCounters int64 146 // MaxCost can be considered as the cache capacity, in whatever units you 147 // choose to use. 148 // 149 // For example, if you want the cache to have a max capacity of 100MB, you 150 // would set MaxCost to 100,000,000 and pass an item's number of bytes as 151 // the `cost` parameter for calls to Set. If new items are accepted, the 152 // eviction process will take care of making room for the new item and not 153 // overflowing the MaxCost value. 154 MaxCost int64 155 // BufferItems determines the size of Get buffers. 156 // 157 // Unless you have a rare use case, using `64` as the BufferItems value 158 // results in good performance. 159 BufferItems int64 160 // Metrics determines whether cache statistics are kept during the cache's 161 // lifetime. There *is* some overhead to keeping statistics, so you should 162 // only set this flag to true when testing or throughput performance isn't a 163 // major factor. 164 Metrics bool 165 // IgnoreInternalCost set to true indicates to the cache that the cost of 166 // internally storing the value should be ignored. This is useful when the 167 // cost passed to set is not using bytes as units. Keep in mind that setting 168 // this to true will increase the memory usage. 169 IgnoreInternalCost bool 170 } 171 172 type itemFlag byte 173 174 const ( 175 itemNew itemFlag = iota 176 itemDelete 177 itemUpdate 178 ) 179 180 // Item is passed to setBuf so items can eventually be added to the cache. 181 type Item struct { 182 Expiration time.Time 183 Value interface{} 184 wg *sync.WaitGroup 185 Key uint64 186 Conflict uint64 187 Cost int64 188 flag itemFlag 189 } 190 191 // NewCache returns a new Cache instance and any configuration errors, if any. 192 func NewCache(config *Config) (*Cache, error) { 193 switch { 194 case config.NumCounters == 0: 195 return nil, errors.New("NumCounters can't be zero") 196 case config.MaxCost == 0: 197 return nil, errors.New("MaxCost can't be zero") 198 case config.BufferItems == 0: 199 return nil, errors.New("BufferItems can't be zero") 200 } 201 policy := newPolicy(config.NumCounters, config.MaxCost) 202 cache := &Cache{ 203 store: newShardedMap(config.ShouldUpdate), 204 policy: policy, 205 getBuf: newRingBuffer(policy, config.BufferItems), 206 setBuf: make(chan *Item, setBufSize), 207 keyToHash: config.KeyToHash, 208 stop: make(chan struct{}), 209 cost: config.Cost, 210 ignoreInternalCost: config.IgnoreInternalCost, 211 cleanupTicker: time.NewTicker(time.Duration(bucketDurationSecs) * time.Second / 2), 212 } 213 cache.onExit = func(val interface{}) { 214 if config.OnExit != nil && val != nil { 215 config.OnExit(val) 216 } 217 } 218 cache.onEvict = func(item *Item) { 219 if config.OnEvict != nil { 220 config.OnEvict(item) 221 } 222 cache.onExit(item.Value) 223 } 224 cache.onReject = func(item *Item) { 225 if config.OnReject != nil { 226 config.OnReject(item) 227 } 228 cache.onExit(item.Value) 229 } 230 cache.store.shouldUpdate = func(prev, cur interface{}) bool { 231 if config.ShouldUpdate != nil { 232 return config.ShouldUpdate(prev, cur) 233 } 234 return true 235 } 236 if cache.keyToHash == nil { 237 cache.keyToHash = z.KeyToHash 238 } 239 if config.Metrics { 240 cache.collectMetrics() 241 } 242 // NOTE: benchmarks seem to show that performance decreases the more 243 // goroutines we have running cache.processItems(), so 1 should 244 // usually be sufficient 245 go cache.processItems() 246 return cache, nil 247 } 248 249 func (c *Cache) Wait() { 250 if c == nil || c.isClosed { 251 return 252 } 253 wg := &sync.WaitGroup{} 254 wg.Add(1) 255 c.setBuf <- &Item{wg: wg} 256 wg.Wait() 257 } 258 259 // Get returns the value (if any) and a boolean representing whether the 260 // value was found or not. The value can be nil and the boolean can be true at 261 // the same time. 262 func (c *Cache) Get(key interface{}) (interface{}, bool) { 263 if c == nil || c.isClosed || key == nil { 264 return nil, false 265 } 266 keyHash, conflictHash := c.keyToHash(key) 267 c.getBuf.Push(keyHash) 268 value, ok := c.store.Get(keyHash, conflictHash) 269 if ok { 270 c.Metrics.add(hit, keyHash, 1) 271 } else { 272 c.Metrics.add(miss, keyHash, 1) 273 } 274 return value, ok 275 } 276 277 // Set attempts to add the key-value item to the cache. If it returns false, 278 // then the Set was dropped and the key-value item isn't added to the cache. If 279 // it returns true, there's still a chance it could be dropped by the policy if 280 // its determined that the key-value item isn't worth keeping, but otherwise the 281 // item will be added and other items will be evicted in order to make room. 282 // 283 // To dynamically evaluate the items cost using the Config.Coster function, set 284 // the cost parameter to 0 and Coster will be ran when needed in order to find 285 // the items true cost. 286 func (c *Cache) Set(key, value interface{}, cost int64) bool { 287 return c.SetWithTTL(key, value, cost, 0*time.Second) 288 } 289 290 // SetWithTTL works like Set but adds a key-value pair to the cache that will expire 291 // after the specified TTL (time to live) has passed. A zero value means the value never 292 // expires, which is identical to calling Set. A negative value is a no-op and the value 293 // is discarded. 294 func (c *Cache) SetWithTTL(key, value interface{}, cost int64, ttl time.Duration) bool { 295 return c.setInternal(key, value, cost, ttl, false) 296 } 297 298 // SetIfPresent is like Set, but only updates the value of an existing key. It 299 // does NOT add the key to cache if it's absent. 300 func (c *Cache) SetIfPresent(key, value interface{}, cost int64) bool { 301 return c.setInternal(key, value, cost, 0*time.Second, true) 302 } 303 304 func (c *Cache) setInternal(key, value interface{}, 305 cost int64, ttl time.Duration, onlyUpdate bool) bool { 306 if c == nil || c.isClosed || key == nil { 307 return false 308 } 309 310 var expiration time.Time 311 switch { 312 case ttl == 0: 313 // No expiration. 314 break 315 case ttl < 0: 316 // Treat this a a no-op. 317 return false 318 default: 319 expiration = time.Now().Add(ttl) 320 } 321 322 keyHash, conflictHash := c.keyToHash(key) 323 i := &Item{ 324 flag: itemNew, 325 Key: keyHash, 326 Conflict: conflictHash, 327 Value: value, 328 Cost: cost, 329 Expiration: expiration, 330 } 331 if onlyUpdate { 332 i.flag = itemUpdate 333 } 334 // cost is eventually updated. The expiration must also be immediately updated 335 // to prevent items from being prematurely removed from the map. 336 if prev, ok := c.store.Update(i); ok { 337 c.onExit(prev) 338 i.flag = itemUpdate 339 } else if onlyUpdate { 340 // The instruction was to update the key, but store.Update failed. So, 341 // this is a NOOP. 342 return false 343 } 344 // Attempt to send item to policy. 345 select { 346 case c.setBuf <- i: 347 return true 348 default: 349 if i.flag == itemUpdate { 350 // Return true if this was an update operation since we've already 351 // updated the store. For all the other operations (set/delete), we 352 // return false which means the item was not inserted. 353 return true 354 } 355 c.Metrics.add(dropSets, keyHash, 1) 356 return false 357 } 358 } 359 360 // Del deletes the key-value item from the cache if it exists. 361 func (c *Cache) Del(key interface{}) { 362 if c == nil || c.isClosed || key == nil { 363 return 364 } 365 keyHash, conflictHash := c.keyToHash(key) 366 // Delete immediately. 367 _, prev := c.store.Del(keyHash, conflictHash) 368 c.onExit(prev) 369 // If we've set an item, it would be applied slightly later. 370 // So we must push the same item to `setBuf` with the deletion flag. 371 // This ensures that if a set is followed by a delete, it will be 372 // applied in the correct order. 373 c.setBuf <- &Item{ 374 flag: itemDelete, 375 Key: keyHash, 376 Conflict: conflictHash, 377 } 378 } 379 380 // GetTTL returns the TTL for the specified key and a bool that is true if the 381 // item was found and is not expired. 382 func (c *Cache) GetTTL(key interface{}) (time.Duration, bool) { 383 if c == nil || key == nil { 384 return 0, false 385 } 386 387 keyHash, conflictHash := c.keyToHash(key) 388 if _, ok := c.store.Get(keyHash, conflictHash); !ok { 389 // not found 390 return 0, false 391 } 392 393 expiration := c.store.Expiration(keyHash) 394 if expiration.IsZero() { 395 // found but no expiration 396 return 0, true 397 } 398 399 if time.Now().After(expiration) { 400 // found but expired 401 return 0, false 402 } 403 404 return time.Until(expiration), true 405 } 406 407 // Close stops all goroutines and closes all channels. 408 func (c *Cache) Close() { 409 if c == nil || c.isClosed { 410 return 411 } 412 c.Clear() 413 414 // Block until processItems goroutine is returned. 415 c.stop <- struct{}{} 416 close(c.stop) 417 close(c.setBuf) 418 c.policy.Close() 419 c.isClosed = true 420 } 421 422 // Clear empties the hashmap and zeroes all policy counters. Note that this is 423 // not an atomic operation (but that shouldn't be a problem as it's assumed that 424 // Set/Get calls won't be occurring until after this). 425 func (c *Cache) Clear() { 426 if c == nil || c.isClosed { 427 return 428 } 429 // Block until processItems goroutine is returned. 430 c.stop <- struct{}{} 431 432 // Clear out the setBuf channel. 433 loop: 434 for { 435 select { 436 case i := <-c.setBuf: 437 if i.wg != nil { 438 i.wg.Done() 439 continue 440 } 441 if i.flag != itemUpdate { 442 // In itemUpdate, the value is already set in the store. So, no need to call 443 // onEvict here. 444 c.onEvict(i) 445 } 446 default: 447 break loop 448 } 449 } 450 451 // Clear value hashmap and policy data. 452 c.policy.Clear() 453 c.store.Clear(c.onEvict) 454 // Only reset metrics if they're enabled. 455 if c.Metrics != nil { 456 c.Metrics.Clear() 457 } 458 // Restart processItems goroutine. 459 go c.processItems() 460 } 461 462 // MaxCost returns the max cost of the cache. 463 func (c *Cache) MaxCost() int64 { 464 if c == nil { 465 return 0 466 } 467 return c.policy.MaxCost() 468 } 469 470 // UpdateMaxCost updates the maxCost of an existing cache. 471 func (c *Cache) UpdateMaxCost(maxCost int64) { 472 if c == nil { 473 return 474 } 475 c.policy.UpdateMaxCost(maxCost) 476 } 477 478 // processItems is ran by goroutines processing the Set buffer. 479 func (c *Cache) processItems() { 480 startTs := make(map[uint64]time.Time) 481 numToKeep := 100000 // TODO: Make this configurable via options. 482 483 trackAdmission := func(key uint64) { 484 if c.Metrics == nil { 485 return 486 } 487 startTs[key] = time.Now() 488 if len(startTs) > numToKeep { 489 for k := range startTs { 490 if len(startTs) <= numToKeep { 491 break 492 } 493 delete(startTs, k) 494 } 495 } 496 } 497 onEvict := func(i *Item) { 498 if ts, has := startTs[i.Key]; has { 499 c.Metrics.trackEviction(int64(time.Since(ts) / time.Second)) 500 delete(startTs, i.Key) 501 } 502 if c.onEvict != nil { 503 c.onEvict(i) 504 } 505 } 506 507 for { 508 select { 509 case i := <-c.setBuf: 510 if i.wg != nil { 511 i.wg.Done() 512 continue 513 } 514 // Calculate item cost value if new or update. 515 if i.Cost == 0 && c.cost != nil && i.flag != itemDelete { 516 i.Cost = c.cost(i.Value) 517 } 518 if !c.ignoreInternalCost { 519 // Add the cost of internally storing the object. 520 i.Cost += itemSize 521 } 522 523 switch i.flag { 524 case itemNew: 525 victims, added := c.policy.Add(i.Key, i.Cost) 526 if added { 527 c.store.Set(i) 528 c.Metrics.add(keyAdd, i.Key, 1) 529 trackAdmission(i.Key) 530 } else { 531 c.onReject(i) 532 } 533 for _, victim := range victims { 534 victim.Conflict, victim.Value = c.store.Del(victim.Key, 0) 535 onEvict(victim) 536 } 537 538 case itemUpdate: 539 c.policy.Update(i.Key, i.Cost) 540 541 case itemDelete: 542 c.policy.Del(i.Key) // Deals with metrics updates. 543 _, val := c.store.Del(i.Key, i.Conflict) 544 c.onExit(val) 545 } 546 case <-c.cleanupTicker.C: 547 c.store.Cleanup(c.policy, onEvict) 548 case <-c.stop: 549 return 550 } 551 } 552 }