github.com/outcaste-io/ristretto@v0.2.3/cache.go (about)

     1  /*
     2   * Copyright 2019 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Ristretto is a fast, fixed size, in-memory cache with a dual focus on
    18  // throughput and hit ratio performance. You can easily add Ristretto to an
    19  // existing system and keep the most valuable data where you need it.
    20  package ristretto
    21  
    22  import (
    23  	"errors"
    24  	"sync"
    25  	"time"
    26  	"unsafe"
    27  
    28  	"github.com/outcaste-io/ristretto/z"
    29  	"go.uber.org/atomic"
    30  )
    31  
    32  var (
    33  	// TODO: find the optimal value for this or make it configurable
    34  	setBufSize = 32 * 1024
    35  )
    36  
    37  type itemCallback func(*Item)
    38  
    39  const itemSize = int64(unsafe.Sizeof(storeItem{}))
    40  
    41  // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission
    42  // policy and a Sampled LFU eviction policy. You can use the same Cache instance
    43  // from as many goroutines as you want.
    44  type Cache struct {
    45  	// store is the central concurrent hashmap where key-value items are stored.
    46  	store *shardedMap
    47  	// policy determines what gets let in to the cache and what gets kicked out.
    48  	policy *lfuPolicy
    49  	// getBuf is a custom ring buffer implementation that gets pushed to when
    50  	// keys are read.
    51  	getBuf *ringBuffer
    52  	// setBuf is a buffer allowing us to batch/drop Sets during times of high
    53  	// contention.
    54  	setBuf chan *Item
    55  	// onEvict is called for item evictions.
    56  	onEvict itemCallback
    57  	// onReject is called when an item is rejected via admission policy.
    58  	onReject itemCallback
    59  	// onExit is called whenever a value goes out of scope from the cache.
    60  	onExit (func(interface{}))
    61  	// KeyToHash function is used to customize the key hashing algorithm.
    62  	// Each key will be hashed using the provided function. If keyToHash value
    63  	// is not set, the default keyToHash function is used.
    64  	keyToHash func(interface{}) (uint64, uint64)
    65  	// stop is used to stop the processItems goroutine.
    66  	stop chan struct{}
    67  	// indicates whether cache is closed.
    68  	isClosed atomic.Bool
    69  	// cost calculates cost from a value.
    70  	cost func(value interface{}) int64
    71  	// ignoreInternalCost dictates whether to ignore the cost of internally storing
    72  	// the item in the cost calculation.
    73  	ignoreInternalCost bool
    74  	// cleanupTicker is used to periodically check for entries whose TTL has passed.
    75  	cleanupTicker *time.Ticker
    76  	// Metrics contains a running log of important statistics like hits, misses,
    77  	// and dropped items.
    78  	Metrics *Metrics
    79  }
    80  
    81  // Config is passed to NewCache for creating new Cache instances.
    82  type Config struct {
    83  	// NumCounters determines the number of counters (keys) to keep that hold
    84  	// access frequency information. It's generally a good idea to have more
    85  	// counters than the max cache capacity, as this will improve eviction
    86  	// accuracy and subsequent hit ratios.
    87  	//
    88  	// For example, if you expect your cache to hold 1,000,000 items when full,
    89  	// NumCounters should be 10,000,000 (10x). Each counter takes up roughly
    90  	// 3 bytes (4 bits for each counter * 4 copies plus about a byte per
    91  	// counter for the bloom filter). Note that the number of counters is
    92  	// internally rounded up to the nearest power of 2, so the space usage
    93  	// may be a little larger than 3 bytes * NumCounters.
    94  	NumCounters int64
    95  	// MaxCost can be considered as the cache capacity, in whatever units you
    96  	// choose to use.
    97  	//
    98  	// For example, if you want the cache to have a max capacity of 100MB, you
    99  	// would set MaxCost to 100,000,000 and pass an item's number of bytes as
   100  	// the `cost` parameter for calls to Set. If new items are accepted, the
   101  	// eviction process will take care of making room for the new item and not
   102  	// overflowing the MaxCost value.
   103  	MaxCost int64
   104  	// BufferItems determines the size of Get buffers.
   105  	//
   106  	// Unless you have a rare use case, using `64` as the BufferItems value
   107  	// results in good performance.
   108  	BufferItems int64
   109  	// Metrics determines whether cache statistics are kept during the cache's
   110  	// lifetime. There *is* some overhead to keeping statistics, so you should
   111  	// only set this flag to true when testing or throughput performance isn't a
   112  	// major factor.
   113  	Metrics bool
   114  	// OnEvict is called for every eviction and passes the hashed key, value,
   115  	// and cost to the function.
   116  	OnEvict func(item *Item)
   117  	// OnReject is called for every rejection done via the policy.
   118  	OnReject func(item *Item)
   119  	// OnExit is called whenever a value is removed from cache. This can be
   120  	// used to do manual memory deallocation. Would also be called on eviction
   121  	// and rejection of the value.
   122  	OnExit func(val interface{})
   123  	// KeyToHash function is used to customize the key hashing algorithm.
   124  	// Each key will be hashed using the provided function. If keyToHash value
   125  	// is not set, the default keyToHash function is used.
   126  	KeyToHash func(key interface{}) (uint64, uint64)
   127  	// shouldUpdate is called when a value already exists in cache and is being updated.
   128  	ShouldUpdate func(prev, cur interface{}) bool
   129  	// Cost evaluates a value and outputs a corresponding cost. This function
   130  	// is ran after Set is called for a new item or an item update with a cost
   131  	// param of 0.
   132  	Cost func(value interface{}) int64
   133  	// IgnoreInternalCost set to true indicates to the cache that the cost of
   134  	// internally storing the value should be ignored. This is useful when the
   135  	// cost passed to set is not using bytes as units. Keep in mind that setting
   136  	// this to true will increase the memory usage.
   137  	IgnoreInternalCost bool
   138  }
   139  
   140  type itemFlag byte
   141  
   142  const (
   143  	itemNew itemFlag = iota
   144  	itemDelete
   145  	itemUpdate
   146  )
   147  
   148  // Item is passed to setBuf so items can eventually be added to the cache.
   149  type Item struct {
   150  	flag       itemFlag
   151  	Key        uint64
   152  	Conflict   uint64
   153  	Value      interface{}
   154  	Cost       int64
   155  	Expiration time.Time
   156  	wg         *sync.WaitGroup
   157  }
   158  
   159  // NewCache returns a new Cache instance and any configuration errors, if any.
   160  func NewCache(config *Config) (*Cache, error) {
   161  	switch {
   162  	case config.NumCounters == 0:
   163  		return nil, errors.New("NumCounters can't be zero")
   164  	case config.MaxCost == 0:
   165  		return nil, errors.New("MaxCost can't be zero")
   166  	case config.BufferItems == 0:
   167  		return nil, errors.New("BufferItems can't be zero")
   168  	}
   169  	policy := newPolicy(config.NumCounters, config.MaxCost)
   170  	cache := &Cache{
   171  		store:              newShardedMap(config.ShouldUpdate),
   172  		policy:             policy,
   173  		getBuf:             newRingBuffer(policy, config.BufferItems),
   174  		setBuf:             make(chan *Item, setBufSize),
   175  		keyToHash:          config.KeyToHash,
   176  		stop:               make(chan struct{}),
   177  		cost:               config.Cost,
   178  		ignoreInternalCost: config.IgnoreInternalCost,
   179  		cleanupTicker:      time.NewTicker(time.Duration(bucketDurationSecs) * time.Second / 2),
   180  	}
   181  	cache.onExit = func(val interface{}) {
   182  		if config.OnExit != nil && val != nil {
   183  			config.OnExit(val)
   184  		}
   185  	}
   186  	cache.onEvict = func(item *Item) {
   187  		if config.OnEvict != nil {
   188  			config.OnEvict(item)
   189  		}
   190  		cache.onExit(item.Value)
   191  	}
   192  	cache.onReject = func(item *Item) {
   193  		if config.OnReject != nil {
   194  			config.OnReject(item)
   195  		}
   196  		cache.onExit(item.Value)
   197  	}
   198  	cache.store.shouldUpdate = func(prev, cur interface{}) bool {
   199  		if config.ShouldUpdate != nil {
   200  			return config.ShouldUpdate(prev, cur)
   201  		}
   202  		return true
   203  	}
   204  	if cache.keyToHash == nil {
   205  		cache.keyToHash = z.KeyToHash
   206  	}
   207  	if config.Metrics {
   208  		cache.collectMetrics()
   209  	}
   210  	// NOTE: benchmarks seem to show that performance decreases the more
   211  	//       goroutines we have running cache.processItems(), so 1 should
   212  	//       usually be sufficient
   213  	go cache.processItems()
   214  	return cache, nil
   215  }
   216  
   217  func (c *Cache) Wait() {
   218  	if c == nil || c.isClosed.Load() {
   219  		return
   220  	}
   221  	wg := &sync.WaitGroup{}
   222  	wg.Add(1)
   223  	c.setBuf <- &Item{wg: wg}
   224  	wg.Wait()
   225  }
   226  
   227  // Get returns the value (if any) and a boolean representing whether the
   228  // value was found or not. The value can be nil and the boolean can be true at
   229  // the same time.
   230  func (c *Cache) Get(key interface{}) (interface{}, bool) {
   231  	if c == nil || c.isClosed.Load() || key == nil {
   232  		return nil, false
   233  	}
   234  	keyHash, conflictHash := c.keyToHash(key)
   235  	c.getBuf.Push(keyHash)
   236  	value, ok := c.store.Get(keyHash, conflictHash)
   237  	if ok {
   238  		c.Metrics.add(hit, keyHash, 1)
   239  	} else {
   240  		c.Metrics.add(miss, keyHash, 1)
   241  	}
   242  	return value, ok
   243  }
   244  
   245  // Set attempts to add the key-value item to the cache. If it returns false,
   246  // then the Set was dropped and the key-value item isn't added to the cache. If
   247  // it returns true, there's still a chance it could be dropped by the policy if
   248  // its determined that the key-value item isn't worth keeping, but otherwise the
   249  // item will be added and other items will be evicted in order to make room.
   250  //
   251  // To dynamically evaluate the items cost using the Config.Coster function, set
   252  // the cost parameter to 0 and Coster will be ran when needed in order to find
   253  // the items true cost.
   254  func (c *Cache) Set(key, value interface{}, cost int64) bool {
   255  	return c.SetWithTTL(key, value, cost, 0*time.Second)
   256  }
   257  
   258  // SetWithTTL works like Set but adds a key-value pair to the cache that will expire
   259  // after the specified TTL (time to live) has passed. A zero value means the value never
   260  // expires, which is identical to calling Set. A negative value is a no-op and the value
   261  // is discarded.
   262  func (c *Cache) SetWithTTL(key, value interface{}, cost int64, ttl time.Duration) bool {
   263  	return c.setInternal(key, value, cost, ttl, false)
   264  }
   265  
   266  // SetIfPresent is like Set, but only updates the value of an existing key. It
   267  // does NOT add the key to cache if it's absent.
   268  func (c *Cache) SetIfPresent(key, value interface{}, cost int64) bool {
   269  	return c.setInternal(key, value, cost, 0*time.Second, true)
   270  }
   271  
   272  func (c *Cache) setInternal(key, value interface{},
   273  	cost int64, ttl time.Duration, onlyUpdate bool) bool {
   274  	if c == nil || c.isClosed.Load() || key == nil {
   275  		return false
   276  	}
   277  
   278  	var expiration time.Time
   279  	switch {
   280  	case ttl == 0:
   281  		// No expiration.
   282  		break
   283  	case ttl < 0:
   284  		// Treat this a a no-op.
   285  		return false
   286  	default:
   287  		expiration = time.Now().Add(ttl)
   288  	}
   289  
   290  	keyHash, conflictHash := c.keyToHash(key)
   291  	i := &Item{
   292  		flag:       itemNew,
   293  		Key:        keyHash,
   294  		Conflict:   conflictHash,
   295  		Value:      value,
   296  		Cost:       cost,
   297  		Expiration: expiration,
   298  	}
   299  	if onlyUpdate {
   300  		i.flag = itemUpdate
   301  	}
   302  	// cost is eventually updated. The expiration must also be immediately updated
   303  	// to prevent items from being prematurely removed from the map.
   304  	if prev, ok := c.store.Update(i); ok {
   305  		c.onExit(prev)
   306  		i.flag = itemUpdate
   307  	} else if onlyUpdate {
   308  		// The instruction was to update the key, but store.Update failed. So,
   309  		// this is a NOOP.
   310  		return false
   311  	}
   312  	// Attempt to send item to policy.
   313  	select {
   314  	case c.setBuf <- i:
   315  		return true
   316  	default:
   317  		if i.flag == itemUpdate {
   318  			// Return true if this was an update operation since we've already
   319  			// updated the store. For all the other operations (set/delete), we
   320  			// return false which means the item was not inserted.
   321  			return true
   322  		}
   323  		c.Metrics.add(dropSets, keyHash, 1)
   324  		return false
   325  	}
   326  }
   327  
   328  // Del deletes the key-value item from the cache if it exists.
   329  func (c *Cache) Del(key interface{}) {
   330  	if c == nil || c.isClosed.Load() || key == nil {
   331  		return
   332  	}
   333  	keyHash, conflictHash := c.keyToHash(key)
   334  	// Delete immediately.
   335  	_, prev := c.store.Del(keyHash, conflictHash)
   336  	c.onExit(prev)
   337  	// If we've set an item, it would be applied slightly later.
   338  	// So we must push the same item to `setBuf` with the deletion flag.
   339  	// This ensures that if a set is followed by a delete, it will be
   340  	// applied in the correct order.
   341  	c.setBuf <- &Item{
   342  		flag:     itemDelete,
   343  		Key:      keyHash,
   344  		Conflict: conflictHash,
   345  	}
   346  }
   347  
   348  // GetTTL returns the TTL for the specified key and a bool that is true if the
   349  // item was found and is not expired.
   350  func (c *Cache) GetTTL(key interface{}) (time.Duration, bool) {
   351  	if c == nil || key == nil {
   352  		return 0, false
   353  	}
   354  
   355  	keyHash, conflictHash := c.keyToHash(key)
   356  	if _, ok := c.store.Get(keyHash, conflictHash); !ok {
   357  		// not found
   358  		return 0, false
   359  	}
   360  
   361  	expiration := c.store.Expiration(keyHash)
   362  	if expiration.IsZero() {
   363  		// found but no expiration
   364  		return 0, true
   365  	}
   366  
   367  	if time.Now().After(expiration) {
   368  		// found but expired
   369  		return 0, false
   370  	}
   371  
   372  	return time.Until(expiration), true
   373  }
   374  
   375  // Close stops all goroutines and closes all channels.
   376  func (c *Cache) Close() {
   377  	if c == nil || c.isClosed.Load() {
   378  		return
   379  	}
   380  	c.Clear()
   381  
   382  	// Block until processItems goroutine is returned.
   383  	c.stop <- struct{}{}
   384  	close(c.stop)
   385  	close(c.setBuf)
   386  	c.policy.Close()
   387  	c.isClosed.Store(true)
   388  }
   389  
   390  // Clear empties the hashmap and zeroes all policy counters. Note that this is
   391  // not an atomic operation (but that shouldn't be a problem as it's assumed that
   392  // Set/Get calls won't be occurring until after this).
   393  func (c *Cache) Clear() {
   394  	if c == nil || c.isClosed.Load() {
   395  		return
   396  	}
   397  	// Block until processItems goroutine is returned.
   398  	c.stop <- struct{}{}
   399  
   400  	// Clear out the setBuf channel.
   401  loop:
   402  	for {
   403  		select {
   404  		case i := <-c.setBuf:
   405  			if i.wg != nil {
   406  				i.wg.Done()
   407  				continue
   408  			}
   409  			if i.flag != itemUpdate {
   410  				// In itemUpdate, the value is already set in the store.  So, no need to call
   411  				// onEvict here.
   412  				c.onEvict(i)
   413  			}
   414  		default:
   415  			break loop
   416  		}
   417  	}
   418  
   419  	// Clear value hashmap and policy data.
   420  	c.policy.Clear()
   421  	c.store.Clear(c.onEvict)
   422  	// Only reset metrics if they're enabled.
   423  	if c.Metrics != nil {
   424  		c.Metrics.Clear()
   425  	}
   426  	// Restart processItems goroutine.
   427  	go c.processItems()
   428  }
   429  
   430  // MaxCost returns the max cost of the cache.
   431  func (c *Cache) MaxCost() int64 {
   432  	if c == nil {
   433  		return 0
   434  	}
   435  	return c.policy.MaxCost()
   436  }
   437  
   438  // UpdateMaxCost updates the maxCost of an existing cache.
   439  func (c *Cache) UpdateMaxCost(maxCost int64) {
   440  	if c == nil {
   441  		return
   442  	}
   443  	c.policy.UpdateMaxCost(maxCost)
   444  }
   445  
   446  // processItems is ran by goroutines processing the Set buffer.
   447  func (c *Cache) processItems() {
   448  	startTs := make(map[uint64]time.Time)
   449  	numToKeep := 100000 // TODO: Make this configurable via options.
   450  
   451  	trackAdmission := func(key uint64) {
   452  		if c.Metrics == nil {
   453  			return
   454  		}
   455  		startTs[key] = time.Now()
   456  		if len(startTs) > numToKeep {
   457  			for k := range startTs {
   458  				if len(startTs) <= numToKeep {
   459  					break
   460  				}
   461  				delete(startTs, k)
   462  			}
   463  		}
   464  	}
   465  	onEvict := func(i *Item) {
   466  		if ts, has := startTs[i.Key]; has {
   467  			c.Metrics.trackEviction(int64(time.Since(ts) / time.Second))
   468  			delete(startTs, i.Key)
   469  		}
   470  		if c.onEvict != nil {
   471  			c.onEvict(i)
   472  		}
   473  	}
   474  
   475  	for {
   476  		select {
   477  		case i := <-c.setBuf:
   478  			if i.wg != nil {
   479  				i.wg.Done()
   480  				continue
   481  			}
   482  			// Calculate item cost value if new or update.
   483  			if i.Cost == 0 && c.cost != nil && i.flag != itemDelete {
   484  				i.Cost = c.cost(i.Value)
   485  			}
   486  			if !c.ignoreInternalCost {
   487  				// Add the cost of internally storing the object.
   488  				i.Cost += itemSize
   489  			}
   490  
   491  			switch i.flag {
   492  			case itemNew:
   493  				victims, added := c.policy.Add(i.Key, i.Cost)
   494  				if added {
   495  					c.store.Set(i)
   496  					c.Metrics.add(keyAdd, i.Key, 1)
   497  					trackAdmission(i.Key)
   498  				} else {
   499  					c.onReject(i)
   500  				}
   501  				for _, victim := range victims {
   502  					victim.Conflict, victim.Value = c.store.Del(victim.Key, 0)
   503  					onEvict(victim)
   504  				}
   505  
   506  			case itemUpdate:
   507  				c.policy.Update(i.Key, i.Cost)
   508  
   509  			case itemDelete:
   510  				c.policy.Del(i.Key) // Deals with metrics updates.
   511  				_, val := c.store.Del(i.Key, i.Conflict)
   512  				c.onExit(val)
   513  			}
   514  		case <-c.cleanupTicker.C:
   515  			c.store.Cleanup(c.policy, onEvict)
   516  		case <-c.stop:
   517  			return
   518  		}
   519  	}
   520  }