github.com/dgraph-io/ristretto@v0.1.2-0.20240116140435-c67e07994f91/policy.go (about)

     1  /*
     2   * Copyright 2020 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package ristretto
    18  
    19  import (
    20  	"math"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"github.com/dgraph-io/ristretto/z"
    25  )
    26  
    27  const (
    28  	// lfuSample is the number of items to sample when looking at eviction
    29  	// candidates. 5 seems to be the most optimal number [citation needed].
    30  	lfuSample = 5
    31  )
    32  
    33  // policy is the interface encapsulating eviction/admission behavior.
    34  // TODO: remove this interface and just rename defaultPolicy to policy, as we
    35  // are probably only going to use/implement/maintain one policy.
    36  type policy[V any] interface {
    37  	ringConsumer
    38  	// Add attempts to Add the key-cost pair to the Policy. It returns a slice
    39  	// of evicted keys and a bool denoting whether or not the key-cost pair
    40  	// was added. If it returns true, the key should be stored in cache.
    41  	Add(uint64, int64) ([]*Item[V], bool)
    42  	// Has returns true if the key exists in the Policy.
    43  	Has(uint64) bool
    44  	// Del deletes the key from the Policy.
    45  	Del(uint64)
    46  	// Cap returns the available capacity.
    47  	Cap() int64
    48  	// Close stops all goroutines and closes all channels.
    49  	Close()
    50  	// Update updates the cost value for the key.
    51  	Update(uint64, int64)
    52  	// Cost returns the cost value of a key or -1 if missing.
    53  	Cost(uint64) int64
    54  	// Optionally, set stats object to track how policy is performing.
    55  	CollectMetrics(*Metrics)
    56  	// Clear zeroes out all counters and clears hashmaps.
    57  	Clear()
    58  	// MaxCost returns the current max cost of the cache policy.
    59  	MaxCost() int64
    60  	// UpdateMaxCost updates the max cost of the cache policy.
    61  	UpdateMaxCost(int64)
    62  }
    63  
    64  func newPolicy[V any](numCounters, maxCost int64) policy[V] {
    65  	return newDefaultPolicy[V](numCounters, maxCost)
    66  }
    67  
    68  type defaultPolicy[V any] struct {
    69  	sync.Mutex
    70  	admit    *tinyLFU
    71  	evict    *sampledLFU
    72  	itemsCh  chan []uint64
    73  	stop     chan struct{}
    74  	isClosed bool
    75  	metrics  *Metrics
    76  }
    77  
    78  func newDefaultPolicy[V any](numCounters, maxCost int64) *defaultPolicy[V] {
    79  	p := &defaultPolicy[V]{
    80  		admit:   newTinyLFU(numCounters),
    81  		evict:   newSampledLFU(maxCost),
    82  		itemsCh: make(chan []uint64, 3),
    83  		stop:    make(chan struct{}),
    84  	}
    85  	go p.processItems()
    86  	return p
    87  }
    88  
    89  func (p *defaultPolicy[V]) CollectMetrics(metrics *Metrics) {
    90  	p.metrics = metrics
    91  	p.evict.metrics = metrics
    92  }
    93  
    94  type policyPair struct {
    95  	key  uint64
    96  	cost int64
    97  }
    98  
    99  func (p *defaultPolicy[V]) processItems() {
   100  	for {
   101  		select {
   102  		case items := <-p.itemsCh:
   103  			p.Lock()
   104  			p.admit.Push(items)
   105  			p.Unlock()
   106  		case <-p.stop:
   107  			return
   108  		}
   109  	}
   110  }
   111  
   112  func (p *defaultPolicy[V]) Push(keys []uint64) bool {
   113  	if p.isClosed {
   114  		return false
   115  	}
   116  
   117  	if len(keys) == 0 {
   118  		return true
   119  	}
   120  
   121  	select {
   122  	case p.itemsCh <- keys:
   123  		p.metrics.add(keepGets, keys[0], uint64(len(keys)))
   124  		return true
   125  	default:
   126  		p.metrics.add(dropGets, keys[0], uint64(len(keys)))
   127  		return false
   128  	}
   129  }
   130  
   131  // Add decides whether the item with the given key and cost should be accepted by
   132  // the policy. It returns the list of victims that have been evicted and a boolean
   133  // indicating whether the incoming item should be accepted.
   134  func (p *defaultPolicy[V]) Add(key uint64, cost int64) ([]*Item[V], bool) {
   135  	p.Lock()
   136  	defer p.Unlock()
   137  
   138  	// Cannot add an item bigger than entire cache.
   139  	if cost > p.evict.getMaxCost() {
   140  		return nil, false
   141  	}
   142  
   143  	// No need to go any further if the item is already in the cache.
   144  	if has := p.evict.updateIfHas(key, cost); has {
   145  		// An update does not count as an addition, so return false.
   146  		return nil, false
   147  	}
   148  
   149  	// If the execution reaches this point, the key doesn't exist in the cache.
   150  	// Calculate the remaining room in the cache (usually bytes).
   151  	room := p.evict.roomLeft(cost)
   152  	if room >= 0 {
   153  		// There's enough room in the cache to store the new item without
   154  		// overflowing. Do that now and stop here.
   155  		p.evict.add(key, cost)
   156  		p.metrics.add(costAdd, key, uint64(cost))
   157  		return nil, true
   158  	}
   159  
   160  	// incHits is the hit count for the incoming item.
   161  	incHits := p.admit.Estimate(key)
   162  	// sample is the eviction candidate pool to be filled via random sampling.
   163  	// TODO: perhaps we should use a min heap here. Right now our time
   164  	// complexity is N for finding the min. Min heap should bring it down to
   165  	// O(lg N).
   166  	sample := make([]*policyPair, 0, lfuSample)
   167  	// As items are evicted they will be appended to victims.
   168  	victims := make([]*Item[V], 0)
   169  
   170  	// Delete victims until there's enough space or a minKey is found that has
   171  	// more hits than incoming item.
   172  	for ; room < 0; room = p.evict.roomLeft(cost) {
   173  		// Fill up empty slots in sample.
   174  		sample = p.evict.fillSample(sample)
   175  
   176  		// Find minimally used item in sample.
   177  		minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0)
   178  		for i, pair := range sample {
   179  			// Look up hit count for sample key.
   180  			if hits := p.admit.Estimate(pair.key); hits < minHits {
   181  				minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost
   182  			}
   183  		}
   184  
   185  		// If the incoming item isn't worth keeping in the policy, reject.
   186  		if incHits < minHits {
   187  			p.metrics.add(rejectSets, key, 1)
   188  			return victims, false
   189  		}
   190  
   191  		// Delete the victim from metadata.
   192  		p.evict.del(minKey)
   193  
   194  		// Delete the victim from sample.
   195  		sample[minId] = sample[len(sample)-1]
   196  		sample = sample[:len(sample)-1]
   197  		// Store victim in evicted victims slice.
   198  		victims = append(victims, &Item[V]{
   199  			Key:      minKey,
   200  			Conflict: 0,
   201  			Cost:     minCost,
   202  		})
   203  	}
   204  
   205  	p.evict.add(key, cost)
   206  	p.metrics.add(costAdd, key, uint64(cost))
   207  	return victims, true
   208  }
   209  
   210  func (p *defaultPolicy[V]) Has(key uint64) bool {
   211  	p.Lock()
   212  	_, exists := p.evict.keyCosts[key]
   213  	p.Unlock()
   214  	return exists
   215  }
   216  
   217  func (p *defaultPolicy[V]) Del(key uint64) {
   218  	p.Lock()
   219  	p.evict.del(key)
   220  	p.Unlock()
   221  }
   222  
   223  func (p *defaultPolicy[V]) Cap() int64 {
   224  	p.Lock()
   225  	capacity := p.evict.getMaxCost() - p.evict.used
   226  	p.Unlock()
   227  	return capacity
   228  }
   229  
   230  func (p *defaultPolicy[V]) Update(key uint64, cost int64) {
   231  	p.Lock()
   232  	p.evict.updateIfHas(key, cost)
   233  	p.Unlock()
   234  }
   235  
   236  func (p *defaultPolicy[V]) Cost(key uint64) int64 {
   237  	p.Lock()
   238  	if cost, found := p.evict.keyCosts[key]; found {
   239  		p.Unlock()
   240  		return cost
   241  	}
   242  	p.Unlock()
   243  	return -1
   244  }
   245  
   246  func (p *defaultPolicy[V]) Clear() {
   247  	p.Lock()
   248  	p.admit.clear()
   249  	p.evict.clear()
   250  	p.Unlock()
   251  }
   252  
   253  func (p *defaultPolicy[V]) Close() {
   254  	if p.isClosed {
   255  		return
   256  	}
   257  
   258  	// Block until the p.processItems goroutine returns.
   259  	p.stop <- struct{}{}
   260  	close(p.stop)
   261  	close(p.itemsCh)
   262  	p.isClosed = true
   263  }
   264  
   265  func (p *defaultPolicy[V]) MaxCost() int64 {
   266  	if p == nil || p.evict == nil {
   267  		return 0
   268  	}
   269  	return p.evict.getMaxCost()
   270  }
   271  
   272  func (p *defaultPolicy[V]) UpdateMaxCost(maxCost int64) {
   273  	if p == nil || p.evict == nil {
   274  		return
   275  	}
   276  	p.evict.updateMaxCost(maxCost)
   277  }
   278  
   279  // sampledLFU is an eviction helper storing key-cost pairs.
   280  type sampledLFU struct {
   281  	// NOTE: align maxCost to 64-bit boundary for use with atomic.
   282  	// As per https://golang.org/pkg/sync/atomic/: "On ARM, x86-32,
   283  	// and 32-bit MIPS, it is the caller’s responsibility to arrange
   284  	// for 64-bit alignment of 64-bit words accessed atomically.
   285  	// The first word in a variable or in an allocated struct, array,
   286  	// or slice can be relied upon to be 64-bit aligned."
   287  	maxCost  int64
   288  	used     int64
   289  	metrics  *Metrics
   290  	keyCosts map[uint64]int64
   291  }
   292  
   293  func newSampledLFU(maxCost int64) *sampledLFU {
   294  	return &sampledLFU{
   295  		keyCosts: make(map[uint64]int64),
   296  		maxCost:  maxCost,
   297  	}
   298  }
   299  
   300  func (p *sampledLFU) getMaxCost() int64 {
   301  	return atomic.LoadInt64(&p.maxCost)
   302  }
   303  
   304  func (p *sampledLFU) updateMaxCost(maxCost int64) {
   305  	atomic.StoreInt64(&p.maxCost, maxCost)
   306  }
   307  
   308  func (p *sampledLFU) roomLeft(cost int64) int64 {
   309  	return p.getMaxCost() - (p.used + cost)
   310  }
   311  
   312  func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair {
   313  	if len(in) >= lfuSample {
   314  		return in
   315  	}
   316  	for key, cost := range p.keyCosts {
   317  		in = append(in, &policyPair{key, cost})
   318  		if len(in) >= lfuSample {
   319  			return in
   320  		}
   321  	}
   322  	return in
   323  }
   324  
   325  func (p *sampledLFU) del(key uint64) {
   326  	cost, ok := p.keyCosts[key]
   327  	if !ok {
   328  		return
   329  	}
   330  	p.used -= cost
   331  	delete(p.keyCosts, key)
   332  	p.metrics.add(costEvict, key, uint64(cost))
   333  	p.metrics.add(keyEvict, key, 1)
   334  }
   335  
   336  func (p *sampledLFU) add(key uint64, cost int64) {
   337  	p.keyCosts[key] = cost
   338  	p.used += cost
   339  }
   340  
   341  func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool {
   342  	if prev, found := p.keyCosts[key]; found {
   343  		// Update the cost of an existing key, but don't worry about evicting.
   344  		// Evictions will be handled the next time a new item is added.
   345  		p.metrics.add(keyUpdate, key, 1)
   346  		if prev > cost {
   347  			diff := prev - cost
   348  			p.metrics.add(costAdd, key, ^(uint64(diff) - 1))
   349  		} else if cost > prev {
   350  			diff := cost - prev
   351  			p.metrics.add(costAdd, key, uint64(diff))
   352  		}
   353  		p.used += cost - prev
   354  		p.keyCosts[key] = cost
   355  		return true
   356  	}
   357  	return false
   358  }
   359  
   360  func (p *sampledLFU) clear() {
   361  	p.used = 0
   362  	p.keyCosts = make(map[uint64]int64)
   363  }
   364  
   365  // tinyLFU is an admission helper that keeps track of access frequency using
   366  // tiny (4-bit) counters in the form of a count-min sketch.
   367  // tinyLFU is NOT thread safe.
   368  type tinyLFU struct {
   369  	freq    *cmSketch
   370  	door    *z.Bloom
   371  	incrs   int64
   372  	resetAt int64
   373  }
   374  
   375  func newTinyLFU(numCounters int64) *tinyLFU {
   376  	return &tinyLFU{
   377  		freq:    newCmSketch(numCounters),
   378  		door:    z.NewBloomFilter(float64(numCounters), 0.01),
   379  		resetAt: numCounters,
   380  	}
   381  }
   382  
   383  func (p *tinyLFU) Push(keys []uint64) {
   384  	for _, key := range keys {
   385  		p.Increment(key)
   386  	}
   387  }
   388  
   389  func (p *tinyLFU) Estimate(key uint64) int64 {
   390  	hits := p.freq.Estimate(key)
   391  	if p.door.Has(key) {
   392  		hits++
   393  	}
   394  	return hits
   395  }
   396  
   397  func (p *tinyLFU) Increment(key uint64) {
   398  	// Flip doorkeeper bit if not already done.
   399  	if added := p.door.AddIfNotHas(key); !added {
   400  		// Increment count-min counter if doorkeeper bit is already set.
   401  		p.freq.Increment(key)
   402  	}
   403  	p.incrs++
   404  	if p.incrs >= p.resetAt {
   405  		p.reset()
   406  	}
   407  }
   408  
   409  func (p *tinyLFU) reset() {
   410  	// Zero out incrs.
   411  	p.incrs = 0
   412  	// clears doorkeeper bits
   413  	p.door.Clear()
   414  	// halves count-min counters
   415  	p.freq.Reset()
   416  }
   417  
   418  func (p *tinyLFU) clear() {
   419  	p.incrs = 0
   420  	p.door.Clear()
   421  	p.freq.Clear()
   422  }