github.com/fiatjaf/generic-ristretto@v0.0.1/policy.go (about)

     1  /*
     2   * Copyright 2020 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package ristretto
    18  
    19  import (
    20  	"math"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"github.com/fiatjaf/generic-ristretto/z"
    25  )
    26  
    27  const (
    28  	// lfuSample is the number of items to sample when looking at eviction
    29  	// candidates. 5 seems to be the most optimal number [citation needed].
    30  	lfuSample = 5
    31  )
    32  
    33  // policy is the interface encapsulating eviction/admission behavior.
    34  //
    35  // TODO: remove this interface and just rename defaultPolicy to policy, as we
    36  //
    37  //	are probably only going to use/implement/maintain one policy.
    38  type policy[V any] interface {
    39  	ringConsumer
    40  	// Add attempts to Add the key-cost pair to the Policy. It returns a slice
    41  	// of evicted keys and a bool denoting whether or not the key-cost pair
    42  	// was added. If it returns true, the key should be stored in cache.
    43  	Add(uint64, int64) ([]*Item[V], bool)
    44  	// Has returns true if the key exists in the Policy.
    45  	Has(uint64) bool
    46  	// Del deletes the key from the Policy.
    47  	Del(uint64)
    48  	// Cap returns the available capacity.
    49  	Cap() int64
    50  	// Close stops all goroutines and closes all channels.
    51  	Close()
    52  	// Update updates the cost value for the key.
    53  	Update(uint64, int64)
    54  	// Cost returns the cost value of a key or -1 if missing.
    55  	Cost(uint64) int64
    56  	// Optionally, set stats object to track how policy is performing.
    57  	CollectMetrics(*Metrics)
    58  	// Clear zeroes out all counters and clears hashmaps.
    59  	Clear()
    60  	// MaxCost returns the current max cost of the cache policy.
    61  	MaxCost() int64
    62  	// UpdateMaxCost updates the max cost of the cache policy.
    63  	UpdateMaxCost(int64)
    64  }
    65  
    66  func newPolicy[V any](numCounters, maxCost int64) policy[V] {
    67  	return newDefaultPolicy[V](numCounters, maxCost)
    68  }
    69  
    70  type defaultPolicy[V any] struct {
    71  	sync.Mutex
    72  	admit    *tinyLFU
    73  	evict    *sampledLFU
    74  	itemsCh  chan []uint64
    75  	stop     chan struct{}
    76  	isClosed bool
    77  	metrics  *Metrics
    78  }
    79  
    80  func newDefaultPolicy[V any](numCounters, maxCost int64) *defaultPolicy[V] {
    81  	p := &defaultPolicy[V]{
    82  		admit:   newTinyLFU(numCounters),
    83  		evict:   newSampledLFU(maxCost),
    84  		itemsCh: make(chan []uint64, 3),
    85  		stop:    make(chan struct{}),
    86  	}
    87  	go p.processItems()
    88  	return p
    89  }
    90  
    91  func (p *defaultPolicy[V]) CollectMetrics(metrics *Metrics) {
    92  	p.metrics = metrics
    93  	p.evict.metrics = metrics
    94  }
    95  
    96  type policyPair struct {
    97  	key  uint64
    98  	cost int64
    99  }
   100  
   101  func (p *defaultPolicy[V]) processItems() {
   102  	for {
   103  		select {
   104  		case items := <-p.itemsCh:
   105  			p.Lock()
   106  			p.admit.Push(items)
   107  			p.Unlock()
   108  		case <-p.stop:
   109  			return
   110  		}
   111  	}
   112  }
   113  
   114  func (p *defaultPolicy[V]) Push(keys []uint64) bool {
   115  	if p.isClosed {
   116  		return false
   117  	}
   118  
   119  	if len(keys) == 0 {
   120  		return true
   121  	}
   122  
   123  	select {
   124  	case p.itemsCh <- keys:
   125  		p.metrics.add(keepGets, keys[0], uint64(len(keys)))
   126  		return true
   127  	default:
   128  		p.metrics.add(dropGets, keys[0], uint64(len(keys)))
   129  		return false
   130  	}
   131  }
   132  
   133  // Add decides whether the item with the given key and cost should be accepted by
   134  // the policy. It returns the list of victims that have been evicted and a boolean
   135  // indicating whether the incoming item should be accepted.
   136  func (p *defaultPolicy[V]) Add(key uint64, cost int64) ([]*Item[V], bool) {
   137  	p.Lock()
   138  	defer p.Unlock()
   139  
   140  	// Cannot add an item bigger than entire cache.
   141  	if cost > p.evict.getMaxCost() {
   142  		return nil, false
   143  	}
   144  
   145  	// No need to go any further if the item is already in the cache.
   146  	if has := p.evict.updateIfHas(key, cost); has {
   147  		// An update does not count as an addition, so return false.
   148  		return nil, false
   149  	}
   150  
   151  	// If the execution reaches this point, the key doesn't exist in the cache.
   152  	// Calculate the remaining room in the cache (usually bytes).
   153  	room := p.evict.roomLeft(cost)
   154  	if room >= 0 {
   155  		// There's enough room in the cache to store the new item without
   156  		// overflowing. Do that now and stop here.
   157  		p.evict.add(key, cost)
   158  		p.metrics.add(costAdd, key, uint64(cost))
   159  		return nil, true
   160  	}
   161  
   162  	// incHits is the hit count for the incoming item.
   163  	incHits := p.admit.Estimate(key)
   164  	// sample is the eviction candidate pool to be filled via random sampling.
   165  	// TODO: perhaps we should use a min heap here. Right now our time
   166  	// complexity is N for finding the min. Min heap should bring it down to
   167  	// O(lg N).
   168  	sample := make([]*policyPair, 0, lfuSample)
   169  	// As items are evicted they will be appended to victims.
   170  	victims := make([]*Item[V], 0)
   171  
   172  	// Delete victims until there's enough space or a minKey is found that has
   173  	// more hits than incoming item.
   174  	for ; room < 0; room = p.evict.roomLeft(cost) {
   175  		// Fill up empty slots in sample.
   176  		sample = p.evict.fillSample(sample)
   177  
   178  		// Find minimally used item in sample.
   179  		minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0)
   180  		for i, pair := range sample {
   181  			// Look up hit count for sample key.
   182  			if hits := p.admit.Estimate(pair.key); hits < minHits {
   183  				minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost
   184  			}
   185  		}
   186  
   187  		// If the incoming item isn't worth keeping in the policy, reject.
   188  		if incHits < minHits {
   189  			p.metrics.add(rejectSets, key, 1)
   190  			return victims, false
   191  		}
   192  
   193  		// Delete the victim from metadata.
   194  		p.evict.del(minKey)
   195  
   196  		// Delete the victim from sample.
   197  		sample[minId] = sample[len(sample)-1]
   198  		sample = sample[:len(sample)-1]
   199  		// Store victim in evicted victims slice.
   200  		victims = append(victims, &Item[V]{
   201  			Key:      minKey,
   202  			Conflict: 0,
   203  			Cost:     minCost,
   204  		})
   205  	}
   206  
   207  	p.evict.add(key, cost)
   208  	p.metrics.add(costAdd, key, uint64(cost))
   209  	return victims, true
   210  }
   211  
   212  func (p *defaultPolicy[V]) Has(key uint64) bool {
   213  	p.Lock()
   214  	_, exists := p.evict.keyCosts[key]
   215  	p.Unlock()
   216  	return exists
   217  }
   218  
   219  func (p *defaultPolicy[V]) Del(key uint64) {
   220  	p.Lock()
   221  	p.evict.del(key)
   222  	p.Unlock()
   223  }
   224  
   225  func (p *defaultPolicy[V]) Cap() int64 {
   226  	p.Lock()
   227  	capacity := int64(p.evict.getMaxCost() - p.evict.used)
   228  	p.Unlock()
   229  	return capacity
   230  }
   231  
   232  func (p *defaultPolicy[V]) Update(key uint64, cost int64) {
   233  	p.Lock()
   234  	p.evict.updateIfHas(key, cost)
   235  	p.Unlock()
   236  }
   237  
   238  func (p *defaultPolicy[V]) Cost(key uint64) int64 {
   239  	p.Lock()
   240  	if cost, found := p.evict.keyCosts[key]; found {
   241  		p.Unlock()
   242  		return cost
   243  	}
   244  	p.Unlock()
   245  	return -1
   246  }
   247  
   248  func (p *defaultPolicy[V]) Clear() {
   249  	p.Lock()
   250  	p.admit.clear()
   251  	p.evict.clear()
   252  	p.Unlock()
   253  }
   254  
   255  func (p *defaultPolicy[V]) Close() {
   256  	if p.isClosed {
   257  		return
   258  	}
   259  
   260  	// Block until the p.processItems goroutine returns.
   261  	p.stop <- struct{}{}
   262  	close(p.stop)
   263  	close(p.itemsCh)
   264  	p.isClosed = true
   265  }
   266  
   267  func (p *defaultPolicy[V]) MaxCost() int64 {
   268  	if p == nil || p.evict == nil {
   269  		return 0
   270  	}
   271  	return p.evict.getMaxCost()
   272  }
   273  
   274  func (p *defaultPolicy[V]) UpdateMaxCost(maxCost int64) {
   275  	if p == nil || p.evict == nil {
   276  		return
   277  	}
   278  	p.evict.updateMaxCost(maxCost)
   279  }
   280  
   281  // sampledLFU is an eviction helper storing key-cost pairs.
   282  type sampledLFU struct {
   283  	// NOTE: align maxCost to 64-bit boundary for use with atomic.
   284  	// As per https://golang.org/pkg/sync/atomic/: "On ARM, x86-32,
   285  	// and 32-bit MIPS, it is the caller’s responsibility to arrange
   286  	// for 64-bit alignment of 64-bit words accessed atomically.
   287  	// The first word in a variable or in an allocated struct, array,
   288  	// or slice can be relied upon to be 64-bit aligned."
   289  	maxCost  int64
   290  	used     int64
   291  	metrics  *Metrics
   292  	keyCosts map[uint64]int64
   293  }
   294  
   295  func newSampledLFU(maxCost int64) *sampledLFU {
   296  	return &sampledLFU{
   297  		keyCosts: make(map[uint64]int64),
   298  		maxCost:  maxCost,
   299  	}
   300  }
   301  
   302  func (p *sampledLFU) getMaxCost() int64 {
   303  	return atomic.LoadInt64(&p.maxCost)
   304  }
   305  
   306  func (p *sampledLFU) updateMaxCost(maxCost int64) {
   307  	atomic.StoreInt64(&p.maxCost, maxCost)
   308  }
   309  
   310  func (p *sampledLFU) roomLeft(cost int64) int64 {
   311  	return p.getMaxCost() - (p.used + cost)
   312  }
   313  
   314  func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair {
   315  	if len(in) >= lfuSample {
   316  		return in
   317  	}
   318  	for key, cost := range p.keyCosts {
   319  		in = append(in, &policyPair{key, cost})
   320  		if len(in) >= lfuSample {
   321  			return in
   322  		}
   323  	}
   324  	return in
   325  }
   326  
   327  func (p *sampledLFU) del(key uint64) {
   328  	cost, ok := p.keyCosts[key]
   329  	if !ok {
   330  		return
   331  	}
   332  	p.used -= cost
   333  	delete(p.keyCosts, key)
   334  	p.metrics.add(costEvict, key, uint64(cost))
   335  	p.metrics.add(keyEvict, key, 1)
   336  }
   337  
   338  func (p *sampledLFU) add(key uint64, cost int64) {
   339  	p.keyCosts[key] = cost
   340  	p.used += cost
   341  }
   342  
   343  func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool {
   344  	if prev, found := p.keyCosts[key]; found {
   345  		// Update the cost of an existing key, but don't worry about evicting.
   346  		// Evictions will be handled the next time a new item is added.
   347  		p.metrics.add(keyUpdate, key, 1)
   348  		if prev > cost {
   349  			diff := prev - cost
   350  			p.metrics.add(costAdd, key, ^uint64(uint64(diff)-1))
   351  		} else if cost > prev {
   352  			diff := cost - prev
   353  			p.metrics.add(costAdd, key, uint64(diff))
   354  		}
   355  		p.used += cost - prev
   356  		p.keyCosts[key] = cost
   357  		return true
   358  	}
   359  	return false
   360  }
   361  
   362  func (p *sampledLFU) clear() {
   363  	p.used = 0
   364  	p.keyCosts = make(map[uint64]int64)
   365  }
   366  
   367  // tinyLFU is an admission helper that keeps track of access frequency using
   368  // tiny (4-bit) counters in the form of a count-min sketch.
   369  // tinyLFU is NOT thread safe.
   370  type tinyLFU struct {
   371  	freq    *cmSketch
   372  	door    *z.Bloom
   373  	incrs   int64
   374  	resetAt int64
   375  }
   376  
   377  func newTinyLFU(numCounters int64) *tinyLFU {
   378  	return &tinyLFU{
   379  		freq:    newCmSketch(numCounters),
   380  		door:    z.NewBloomFilter(float64(numCounters), 0.01),
   381  		resetAt: numCounters,
   382  	}
   383  }
   384  
   385  func (p *tinyLFU) Push(keys []uint64) {
   386  	for _, key := range keys {
   387  		p.Increment(key)
   388  	}
   389  }
   390  
   391  func (p *tinyLFU) Estimate(key uint64) int64 {
   392  	hits := p.freq.Estimate(key)
   393  	if p.door.Has(key) {
   394  		hits++
   395  	}
   396  	return hits
   397  }
   398  
   399  func (p *tinyLFU) Increment(key uint64) {
   400  	// Flip doorkeeper bit if not already done.
   401  	if added := p.door.AddIfNotHas(key); !added {
   402  		// Increment count-min counter if doorkeeper bit is already set.
   403  		p.freq.Increment(key)
   404  	}
   405  	p.incrs++
   406  	if p.incrs >= p.resetAt {
   407  		p.reset()
   408  	}
   409  }
   410  
   411  func (p *tinyLFU) reset() {
   412  	// Zero out incrs.
   413  	p.incrs = 0
   414  	// clears doorkeeper bits
   415  	p.door.Clear()
   416  	// halves count-min counters
   417  	p.freq.Reset()
   418  }
   419  
   420  func (p *tinyLFU) clear() {
   421  	p.incrs = 0
   422  	p.door.Clear()
   423  	p.freq.Clear()
   424  }