github.com/etecs-ru/ristretto@v0.9.1/cache.go (about)

     1  /*
     2   * Copyright 2019 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Ristretto is a fast, fixed size, in-memory cache with a dual focus on
    18  // throughput and hit ratio performance. You can easily add Ristretto to an
    19  // existing system and keep the most valuable data where you need it.
    20  package ristretto
    21  
    22  import (
    23  	"errors"
    24  	"sync"
    25  	"time"
    26  	"unsafe"
    27  
    28  	"github.com/etecs-ru/ristretto/z"
    29  )
    30  
    31  // TODO: find the optimal value for this or make it configurable
    32  var setBufSize = 32 * 1024 //nolint:gochecknoglobals // adopt fork, do not touch it
    33  
    34  type itemCallback func(*Item)
    35  
    36  const itemSize = int64(unsafe.Sizeof(storeItem{}))
    37  
    38  // CacheInterface exposes the common cache functions for the purpose of mocking
    39  // in unit tests.
    40  type CacheInterface interface {
    41  	// Get returns the value (if any) and a boolean representing whether the
    42  	// value was found or not. The value can be nil and the boolean can be true at
    43  	// the same time.
    44  	Get(key interface{}) (interface{}, bool)
    45  
    46  	// Set attempts to add the key-value item to the cache. If it returns false,
    47  	// then the Set was dropped and the key-value item isn't added to the cache. If
    48  	// it returns true, there's still a chance it could be dropped by the policy if
    49  	// its determined that the key-value item isn't worth keeping, but otherwise the
    50  	// item will be added and other items will be evicted in order to make room.
    51  	//
    52  	// To dynamically evaluate the items cost using the Config.Coster function, set
    53  	// the cost parameter to 0 and Coster will be ran when needed in order to find
    54  	// the items true cost.
    55  	Set(key, value interface{}, cost int64) bool
    56  
    57  	// SetWithTTL works like Set but adds a key-value pair to the cache that will expire
    58  	// after the specified TTL (time to live) has passed. A zero value means the value never
    59  	// expires, which is identical to calling Set. A negative value is a no-op and the value
    60  	// is discarded.
    61  	SetWithTTL(key, value interface{}, cost int64, ttl time.Duration) bool
    62  
    63  	// SetIfPresent is like Set, but only updates the value of an existing key. It
    64  	// does NOT add the key to cache if it's absent.
    65  	SetIfPresent(key, value interface{}, cost int64) bool
    66  
    67  	// Del deletes the key-value item from the cache if it exists.
    68  	Del(key interface{})
    69  
    70  	// GetTTL returns the TTL for the specified key and a bool that is true if the
    71  	// item was found and is not expired.
    72  	GetTTL(key interface{}) (time.Duration, bool)
    73  
    74  	// Close stops all goroutines and closes all channels.
    75  	Close()
    76  
    77  	// Clear empties the hashmap and zeroes all policy counters. Note that this is
    78  	// not an atomic operation (but that shouldn't be a problem as it's assumed that
    79  	// Set/Get calls won't be occurring until after this).
    80  	Clear()
    81  
    82  	// MaxCost returns the max cost of the cache.
    83  	MaxCost() int64
    84  
    85  	// UpdateMaxCost updates the maxCost of an existing cache.
    86  	UpdateMaxCost(maxCost int64)
    87  }
    88  
    89  // Cache is a thread-safe implementation of a hashmap with a TinyLFU admission
    90  // policy and a Sampled LFU eviction policy. You can use the same Cache instance
    91  // from as many goroutines as you want.
    92  type Cache struct {
    93  	store              *shardedMap
    94  	policy             *lfuPolicy
    95  	getBuf             *ringBuffer
    96  	setBuf             chan *Item
    97  	onEvict            itemCallback
    98  	onReject           itemCallback
    99  	onExit             func(interface{})
   100  	keyToHash          func(interface{}) (uint64, uint64)
   101  	stop               chan struct{}
   102  	cleanupTicker      *time.Ticker
   103  	cost               func(value interface{}) int64
   104  	Metrics            *Metrics
   105  	ignoreInternalCost bool
   106  	isClosed           bool
   107  }
   108  
   109  // Verify that Cache implements the CacheInterface.
   110  // https://golang.org/doc/faq#guarantee_satisfies_interface
   111  var _ CacheInterface = &Cache{}
   112  
   113  // Config is passed to NewCache for creating new Cache instances.
   114  type Config struct {
   115  	// OnExit is called whenever a value is removed from cache. This can be
   116  	// used to do manual memory deallocation. Would also be called on eviction
   117  	// and rejection of the value.
   118  	OnExit func(val interface{})
   119  	// KeyToHash function is used to customize the key hashing algorithm.
   120  	// Each key will be hashed using the provided function. If keyToHash value
   121  	// is not set, the default keyToHash function is used.
   122  	KeyToHash func(key interface{}) (uint64, uint64)
   123  	// ShouldUpdate is called when a value already exists in cache and is being updated.
   124  	ShouldUpdate func(prev, cur interface{}) bool
   125  	// Cost evaluates a value and outputs a corresponding cost. This function
   126  	// is ran after Set is called for a new item or an item update with a cost
   127  	// param of 0.
   128  	Cost func(value interface{}) int64
   129  	// OnEvict is called for every eviction and passes the hashed key, value,
   130  	// and cost to the function.
   131  	OnEvict func(item *Item)
   132  	// OnReject is called for every rejection done via the policy.
   133  	OnReject func(item *Item)
   134  	// NumCounters determines the number of counters (keys) to keep that hold
   135  	// access frequency information. It's generally a good idea to have more
   136  	// counters than the max cache capacity, as this will improve eviction
   137  	// accuracy and subsequent hit ratios.
   138  	//
   139  	// For example, if you expect your cache to hold 1,000,000 items when full,
   140  	// NumCounters should be 10,000,000 (10x). Each counter takes up roughly
   141  	// 3 bytes (4 bits for each counter * 4 copies plus about a byte per
   142  	// counter for the bloom filter). Note that the number of counters is
   143  	// internally rounded up to the nearest power of 2, so the space usage
   144  	// may be a little larger than 3 bytes * NumCounters.
   145  	NumCounters int64
   146  	// MaxCost can be considered as the cache capacity, in whatever units you
   147  	// choose to use.
   148  	//
   149  	// For example, if you want the cache to have a max capacity of 100MB, you
   150  	// would set MaxCost to 100,000,000 and pass an item's number of bytes as
   151  	// the `cost` parameter for calls to Set. If new items are accepted, the
   152  	// eviction process will take care of making room for the new item and not
   153  	// overflowing the MaxCost value.
   154  	MaxCost int64
   155  	// BufferItems determines the size of Get buffers.
   156  	//
   157  	// Unless you have a rare use case, using `64` as the BufferItems value
   158  	// results in good performance.
   159  	BufferItems int64
   160  	// Metrics determines whether cache statistics are kept during the cache's
   161  	// lifetime. There *is* some overhead to keeping statistics, so you should
   162  	// only set this flag to true when testing or throughput performance isn't a
   163  	// major factor.
   164  	Metrics bool
   165  	// IgnoreInternalCost set to true indicates to the cache that the cost of
   166  	// internally storing the value should be ignored. This is useful when the
   167  	// cost passed to set is not using bytes as units. Keep in mind that setting
   168  	// this to true will increase the memory usage.
   169  	IgnoreInternalCost bool
   170  }
   171  
   172  type itemFlag byte
   173  
   174  const (
   175  	itemNew itemFlag = iota
   176  	itemDelete
   177  	itemUpdate
   178  )
   179  
   180  // Item is passed to setBuf so items can eventually be added to the cache.
   181  type Item struct {
   182  	Expiration time.Time
   183  	Value      interface{}
   184  	wg         *sync.WaitGroup
   185  	Key        uint64
   186  	Conflict   uint64
   187  	Cost       int64
   188  	flag       itemFlag
   189  }
   190  
   191  // NewCache returns a new Cache instance and any configuration errors, if any.
   192  func NewCache(config *Config) (*Cache, error) {
   193  	switch {
   194  	case config.NumCounters == 0:
   195  		return nil, errors.New("NumCounters can't be zero")
   196  	case config.MaxCost == 0:
   197  		return nil, errors.New("MaxCost can't be zero")
   198  	case config.BufferItems == 0:
   199  		return nil, errors.New("BufferItems can't be zero")
   200  	}
   201  	policy := newPolicy(config.NumCounters, config.MaxCost)
   202  	cache := &Cache{
   203  		store:              newShardedMap(config.ShouldUpdate),
   204  		policy:             policy,
   205  		getBuf:             newRingBuffer(policy, config.BufferItems),
   206  		setBuf:             make(chan *Item, setBufSize),
   207  		keyToHash:          config.KeyToHash,
   208  		stop:               make(chan struct{}),
   209  		cost:               config.Cost,
   210  		ignoreInternalCost: config.IgnoreInternalCost,
   211  		cleanupTicker:      time.NewTicker(time.Duration(bucketDurationSecs) * time.Second / 2),
   212  	}
   213  	cache.onExit = func(val interface{}) {
   214  		if config.OnExit != nil && val != nil {
   215  			config.OnExit(val)
   216  		}
   217  	}
   218  	cache.onEvict = func(item *Item) {
   219  		if config.OnEvict != nil {
   220  			config.OnEvict(item)
   221  		}
   222  		cache.onExit(item.Value)
   223  	}
   224  	cache.onReject = func(item *Item) {
   225  		if config.OnReject != nil {
   226  			config.OnReject(item)
   227  		}
   228  		cache.onExit(item.Value)
   229  	}
   230  	cache.store.shouldUpdate = func(prev, cur interface{}) bool {
   231  		if config.ShouldUpdate != nil {
   232  			return config.ShouldUpdate(prev, cur)
   233  		}
   234  		return true
   235  	}
   236  	if cache.keyToHash == nil {
   237  		cache.keyToHash = z.KeyToHash
   238  	}
   239  	if config.Metrics {
   240  		cache.collectMetrics()
   241  	}
   242  	// NOTE: benchmarks seem to show that performance decreases the more
   243  	//       goroutines we have running cache.processItems(), so 1 should
   244  	//       usually be sufficient
   245  	go cache.processItems()
   246  	return cache, nil
   247  }
   248  
   249  func (c *Cache) Wait() {
   250  	if c == nil || c.isClosed {
   251  		return
   252  	}
   253  	wg := &sync.WaitGroup{}
   254  	wg.Add(1)
   255  	c.setBuf <- &Item{wg: wg}
   256  	wg.Wait()
   257  }
   258  
   259  // Get returns the value (if any) and a boolean representing whether the
   260  // value was found or not. The value can be nil and the boolean can be true at
   261  // the same time.
   262  func (c *Cache) Get(key interface{}) (interface{}, bool) {
   263  	if c == nil || c.isClosed || key == nil {
   264  		return nil, false
   265  	}
   266  	keyHash, conflictHash := c.keyToHash(key)
   267  	c.getBuf.Push(keyHash)
   268  	value, ok := c.store.Get(keyHash, conflictHash)
   269  	if ok {
   270  		c.Metrics.add(hit, keyHash, 1)
   271  	} else {
   272  		c.Metrics.add(miss, keyHash, 1)
   273  	}
   274  	return value, ok
   275  }
   276  
   277  // Set attempts to add the key-value item to the cache. If it returns false,
   278  // then the Set was dropped and the key-value item isn't added to the cache. If
   279  // it returns true, there's still a chance it could be dropped by the policy if
   280  // its determined that the key-value item isn't worth keeping, but otherwise the
   281  // item will be added and other items will be evicted in order to make room.
   282  //
   283  // To dynamically evaluate the items cost using the Config.Coster function, set
   284  // the cost parameter to 0 and Coster will be ran when needed in order to find
   285  // the items true cost.
   286  func (c *Cache) Set(key, value interface{}, cost int64) bool {
   287  	return c.SetWithTTL(key, value, cost, 0*time.Second)
   288  }
   289  
   290  // SetWithTTL works like Set but adds a key-value pair to the cache that will expire
   291  // after the specified TTL (time to live) has passed. A zero value means the value never
   292  // expires, which is identical to calling Set. A negative value is a no-op and the value
   293  // is discarded.
   294  func (c *Cache) SetWithTTL(key, value interface{}, cost int64, ttl time.Duration) bool {
   295  	return c.setInternal(key, value, cost, ttl, false)
   296  }
   297  
   298  // SetIfPresent is like Set, but only updates the value of an existing key. It
   299  // does NOT add the key to cache if it's absent.
   300  func (c *Cache) SetIfPresent(key, value interface{}, cost int64) bool {
   301  	return c.setInternal(key, value, cost, 0*time.Second, true)
   302  }
   303  
   304  func (c *Cache) setInternal(key, value interface{},
   305  	cost int64, ttl time.Duration, onlyUpdate bool) bool {
   306  	if c == nil || c.isClosed || key == nil {
   307  		return false
   308  	}
   309  
   310  	var expiration time.Time
   311  	switch {
   312  	case ttl == 0:
   313  		// No expiration.
   314  		break
   315  	case ttl < 0:
   316  		// Treat this a a no-op.
   317  		return false
   318  	default:
   319  		expiration = time.Now().Add(ttl)
   320  	}
   321  
   322  	keyHash, conflictHash := c.keyToHash(key)
   323  	i := &Item{
   324  		flag:       itemNew,
   325  		Key:        keyHash,
   326  		Conflict:   conflictHash,
   327  		Value:      value,
   328  		Cost:       cost,
   329  		Expiration: expiration,
   330  	}
   331  	if onlyUpdate {
   332  		i.flag = itemUpdate
   333  	}
   334  	// cost is eventually updated. The expiration must also be immediately updated
   335  	// to prevent items from being prematurely removed from the map.
   336  	if prev, ok := c.store.Update(i); ok {
   337  		c.onExit(prev)
   338  		i.flag = itemUpdate
   339  	} else if onlyUpdate {
   340  		// The instruction was to update the key, but store.Update failed. So,
   341  		// this is a NOOP.
   342  		return false
   343  	}
   344  	// Attempt to send item to policy.
   345  	select {
   346  	case c.setBuf <- i:
   347  		return true
   348  	default:
   349  		if i.flag == itemUpdate {
   350  			// Return true if this was an update operation since we've already
   351  			// updated the store. For all the other operations (set/delete), we
   352  			// return false which means the item was not inserted.
   353  			return true
   354  		}
   355  		c.Metrics.add(dropSets, keyHash, 1)
   356  		return false
   357  	}
   358  }
   359  
   360  // Del deletes the key-value item from the cache if it exists.
   361  func (c *Cache) Del(key interface{}) {
   362  	if c == nil || c.isClosed || key == nil {
   363  		return
   364  	}
   365  	keyHash, conflictHash := c.keyToHash(key)
   366  	// Delete immediately.
   367  	_, prev := c.store.Del(keyHash, conflictHash)
   368  	c.onExit(prev)
   369  	// If we've set an item, it would be applied slightly later.
   370  	// So we must push the same item to `setBuf` with the deletion flag.
   371  	// This ensures that if a set is followed by a delete, it will be
   372  	// applied in the correct order.
   373  	c.setBuf <- &Item{
   374  		flag:     itemDelete,
   375  		Key:      keyHash,
   376  		Conflict: conflictHash,
   377  	}
   378  }
   379  
   380  // GetTTL returns the TTL for the specified key and a bool that is true if the
   381  // item was found and is not expired.
   382  func (c *Cache) GetTTL(key interface{}) (time.Duration, bool) {
   383  	if c == nil || key == nil {
   384  		return 0, false
   385  	}
   386  
   387  	keyHash, conflictHash := c.keyToHash(key)
   388  	if _, ok := c.store.Get(keyHash, conflictHash); !ok {
   389  		// not found
   390  		return 0, false
   391  	}
   392  
   393  	expiration := c.store.Expiration(keyHash)
   394  	if expiration.IsZero() {
   395  		// found but no expiration
   396  		return 0, true
   397  	}
   398  
   399  	if time.Now().After(expiration) {
   400  		// found but expired
   401  		return 0, false
   402  	}
   403  
   404  	return time.Until(expiration), true
   405  }
   406  
   407  // Close stops all goroutines and closes all channels.
   408  func (c *Cache) Close() {
   409  	if c == nil || c.isClosed {
   410  		return
   411  	}
   412  	c.Clear()
   413  
   414  	// Block until processItems goroutine is returned.
   415  	c.stop <- struct{}{}
   416  	close(c.stop)
   417  	close(c.setBuf)
   418  	c.policy.Close()
   419  	c.isClosed = true
   420  }
   421  
   422  // Clear empties the hashmap and zeroes all policy counters. Note that this is
   423  // not an atomic operation (but that shouldn't be a problem as it's assumed that
   424  // Set/Get calls won't be occurring until after this).
   425  func (c *Cache) Clear() {
   426  	if c == nil || c.isClosed {
   427  		return
   428  	}
   429  	// Block until processItems goroutine is returned.
   430  	c.stop <- struct{}{}
   431  
   432  	// Clear out the setBuf channel.
   433  loop:
   434  	for {
   435  		select {
   436  		case i := <-c.setBuf:
   437  			if i.wg != nil {
   438  				i.wg.Done()
   439  				continue
   440  			}
   441  			if i.flag != itemUpdate {
   442  				// In itemUpdate, the value is already set in the store.  So, no need to call
   443  				// onEvict here.
   444  				c.onEvict(i)
   445  			}
   446  		default:
   447  			break loop
   448  		}
   449  	}
   450  
   451  	// Clear value hashmap and policy data.
   452  	c.policy.Clear()
   453  	c.store.Clear(c.onEvict)
   454  	// Only reset metrics if they're enabled.
   455  	if c.Metrics != nil {
   456  		c.Metrics.Clear()
   457  	}
   458  	// Restart processItems goroutine.
   459  	go c.processItems()
   460  }
   461  
   462  // MaxCost returns the max cost of the cache.
   463  func (c *Cache) MaxCost() int64 {
   464  	if c == nil {
   465  		return 0
   466  	}
   467  	return c.policy.MaxCost()
   468  }
   469  
   470  // UpdateMaxCost updates the maxCost of an existing cache.
   471  func (c *Cache) UpdateMaxCost(maxCost int64) {
   472  	if c == nil {
   473  		return
   474  	}
   475  	c.policy.UpdateMaxCost(maxCost)
   476  }
   477  
   478  // processItems is ran by goroutines processing the Set buffer.
   479  func (c *Cache) processItems() {
   480  	startTs := make(map[uint64]time.Time)
   481  	numToKeep := 100000 // TODO: Make this configurable via options.
   482  
   483  	trackAdmission := func(key uint64) {
   484  		if c.Metrics == nil {
   485  			return
   486  		}
   487  		startTs[key] = time.Now()
   488  		if len(startTs) > numToKeep {
   489  			for k := range startTs {
   490  				if len(startTs) <= numToKeep {
   491  					break
   492  				}
   493  				delete(startTs, k)
   494  			}
   495  		}
   496  	}
   497  	onEvict := func(i *Item) {
   498  		if ts, has := startTs[i.Key]; has {
   499  			c.Metrics.trackEviction(int64(time.Since(ts) / time.Second))
   500  			delete(startTs, i.Key)
   501  		}
   502  		if c.onEvict != nil {
   503  			c.onEvict(i)
   504  		}
   505  	}
   506  
   507  	for {
   508  		select {
   509  		case i := <-c.setBuf:
   510  			if i.wg != nil {
   511  				i.wg.Done()
   512  				continue
   513  			}
   514  			// Calculate item cost value if new or update.
   515  			if i.Cost == 0 && c.cost != nil && i.flag != itemDelete {
   516  				i.Cost = c.cost(i.Value)
   517  			}
   518  			if !c.ignoreInternalCost {
   519  				// Add the cost of internally storing the object.
   520  				i.Cost += itemSize
   521  			}
   522  
   523  			switch i.flag {
   524  			case itemNew:
   525  				victims, added := c.policy.Add(i.Key, i.Cost)
   526  				if added {
   527  					c.store.Set(i)
   528  					c.Metrics.add(keyAdd, i.Key, 1)
   529  					trackAdmission(i.Key)
   530  				} else {
   531  					c.onReject(i)
   532  				}
   533  				for _, victim := range victims {
   534  					victim.Conflict, victim.Value = c.store.Del(victim.Key, 0)
   535  					onEvict(victim)
   536  				}
   537  
   538  			case itemUpdate:
   539  				c.policy.Update(i.Key, i.Cost)
   540  
   541  			case itemDelete:
   542  				c.policy.Del(i.Key) // Deals with metrics updates.
   543  				_, val := c.store.Del(i.Key, i.Conflict)
   544  				c.onExit(val)
   545  			}
   546  		case <-c.cleanupTicker.C:
   547  			c.store.Cleanup(c.policy, onEvict)
   548  		case <-c.stop:
   549  			return
   550  		}
   551  	}
   552  }