github.com/outcaste-io/ristretto@v0.2.3/policy.go (about)

     1  /*
     2   * Copyright 2020 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package ristretto
    18  
    19  import (
    20  	"math"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"github.com/outcaste-io/ristretto/z"
    25  )
    26  
    27  const (
    28  	// lfuSample is the number of items to sample when looking at eviction
    29  	// candidates. 5 seems to be the most optimal number [citation needed].
    30  	lfuSample = 5
    31  )
    32  
    33  // lfuPolicy encapsulates eviction/admission behavior.
    34  type lfuPolicy struct {
    35  	sync.Mutex
    36  	admit    *tinyLFU
    37  	costs    *keyCosts
    38  	itemsCh  chan []uint64
    39  	stop     chan struct{}
    40  	isClosed bool
    41  	metrics  *Metrics
    42  }
    43  
    44  func newPolicy(numCounters, maxCost int64) *lfuPolicy {
    45  	p := &lfuPolicy{
    46  		admit:   newTinyLFU(numCounters),
    47  		costs:   newSampledLFU(maxCost),
    48  		itemsCh: make(chan []uint64, 3),
    49  		stop:    make(chan struct{}),
    50  	}
    51  	go p.processItems()
    52  	return p
    53  }
    54  
    55  func (p *lfuPolicy) CollectMetrics(metrics *Metrics) {
    56  	p.metrics = metrics
    57  	p.costs.metrics = metrics
    58  }
    59  
    60  type policyPair struct {
    61  	key  uint64
    62  	cost int64
    63  }
    64  
    65  func (p *lfuPolicy) processItems() {
    66  	for {
    67  		select {
    68  		case items := <-p.itemsCh:
    69  			p.Lock()
    70  			p.admit.Push(items)
    71  			p.Unlock()
    72  		case <-p.stop:
    73  			return
    74  		}
    75  	}
    76  }
    77  
    78  func (p *lfuPolicy) Push(keys []uint64) bool {
    79  	if p.isClosed {
    80  		return false
    81  	}
    82  
    83  	if len(keys) == 0 {
    84  		return true
    85  	}
    86  
    87  	select {
    88  	case p.itemsCh <- keys:
    89  		p.metrics.add(keepGets, keys[0], uint64(len(keys)))
    90  		return true
    91  	default:
    92  		p.metrics.add(dropGets, keys[0], uint64(len(keys)))
    93  		return false
    94  	}
    95  }
    96  
    97  // Add decides whether the item with the given key and cost should be accepted by
    98  // the policy. It returns the list of victims that have been evicted and a boolean
    99  // indicating whether the incoming item should be accepted.
   100  func (p *lfuPolicy) Add(key uint64, cost int64) ([]*Item, bool) {
   101  	p.Lock()
   102  	defer p.Unlock()
   103  
   104  	// Cannot add an item bigger than entire cache.
   105  	if cost > p.costs.getMaxCost() {
   106  		return nil, false
   107  	}
   108  
   109  	// No need to go any further if the item is already in the cache.
   110  	if has := p.costs.updateIfHas(key, cost); has {
   111  		// An update does not count as an addition, so return false.
   112  		return nil, false
   113  	}
   114  
   115  	// If the execution reaches this point, the key doesn't exist in the cache.
   116  	// Calculate the remaining room in the cache (usually bytes).
   117  	room := p.costs.roomLeft(cost)
   118  	if room >= 0 {
   119  		// There's enough room in the cache to store the new item without
   120  		// overflowing. Do that now and stop here.
   121  		p.costs.add(key, cost)
   122  		p.metrics.add(costAdd, key, uint64(cost))
   123  		return nil, true
   124  	}
   125  
   126  	// incHits is the hit count for the incoming item.
   127  	incHits := p.admit.Estimate(key)
   128  	// sample is the eviction candidate pool to be filled via random sampling.
   129  	// TODO: perhaps we should use a min heap here. Right now our time
   130  	// complexity is N for finding the min. Min heap should bring it down to
   131  	// O(lg N).
   132  	sample := make([]*policyPair, 0, lfuSample)
   133  	// As items are evicted they will be appended to victims.
   134  	victims := make([]*Item, 0)
   135  
   136  	// Delete victims until there's enough space or a minKey is found that has
   137  	// more hits than incoming item.
   138  	for ; room < 0; room = p.costs.roomLeft(cost) {
   139  		// Fill up empty slots in sample.
   140  		sample = p.costs.fillSample(sample)
   141  
   142  		// Find minimally used item in sample.
   143  		minKey, minHits, minId, minCost := uint64(0), int64(math.MaxInt64), 0, int64(0)
   144  		for i, pair := range sample {
   145  			// Look up hit count for sample key.
   146  			if hits := p.admit.Estimate(pair.key); hits < minHits {
   147  				minKey, minHits, minId, minCost = pair.key, hits, i, pair.cost
   148  			}
   149  		}
   150  
   151  		// If the incoming item isn't worth keeping in the policy, reject.
   152  		if incHits < minHits {
   153  			p.metrics.add(rejectSets, key, 1)
   154  			return victims, false
   155  		}
   156  
   157  		// Delete the victim from metadata.
   158  		p.costs.del(minKey)
   159  
   160  		// Delete the victim from sample.
   161  		sample[minId] = sample[len(sample)-1]
   162  		sample = sample[:len(sample)-1]
   163  		// Store victim in evicted victims slice.
   164  		victims = append(victims, &Item{
   165  			Key:      minKey,
   166  			Conflict: 0,
   167  			Cost:     minCost,
   168  		})
   169  	}
   170  
   171  	p.costs.add(key, cost)
   172  	p.metrics.add(costAdd, key, uint64(cost))
   173  	return victims, true
   174  }
   175  
   176  func (p *lfuPolicy) Has(key uint64) bool {
   177  	p.Lock()
   178  	_, exists := p.costs.keyCosts[key]
   179  	p.Unlock()
   180  	return exists
   181  }
   182  
   183  func (p *lfuPolicy) Del(key uint64) {
   184  	p.Lock()
   185  	p.costs.del(key)
   186  	p.Unlock()
   187  }
   188  
   189  func (p *lfuPolicy) Cap() int64 {
   190  	p.Lock()
   191  	capacity := int64(p.costs.getMaxCost() - p.costs.used)
   192  	p.Unlock()
   193  	return capacity
   194  }
   195  
   196  func (p *lfuPolicy) Update(key uint64, cost int64) {
   197  	p.Lock()
   198  	p.costs.updateIfHas(key, cost)
   199  	p.Unlock()
   200  }
   201  
   202  func (p *lfuPolicy) Cost(key uint64) int64 {
   203  	p.Lock()
   204  	if cost, found := p.costs.keyCosts[key]; found {
   205  		p.Unlock()
   206  		return cost
   207  	}
   208  	p.Unlock()
   209  	return -1
   210  }
   211  
   212  func (p *lfuPolicy) Clear() {
   213  	p.Lock()
   214  	p.admit.clear()
   215  	p.costs.clear()
   216  	p.Unlock()
   217  }
   218  
   219  func (p *lfuPolicy) Close() {
   220  	if p.isClosed {
   221  		return
   222  	}
   223  
   224  	// Block until the p.processItems goroutine returns.
   225  	p.stop <- struct{}{}
   226  	close(p.stop)
   227  	close(p.itemsCh)
   228  	p.isClosed = true
   229  }
   230  
   231  func (p *lfuPolicy) MaxCost() int64 {
   232  	if p == nil || p.costs == nil {
   233  		return 0
   234  	}
   235  	return p.costs.getMaxCost()
   236  }
   237  
   238  func (p *lfuPolicy) UpdateMaxCost(maxCost int64) {
   239  	if p == nil || p.costs == nil {
   240  		return
   241  	}
   242  	p.costs.updateMaxCost(maxCost)
   243  }
   244  
   245  // keyCosts stores key-cost pairs.
   246  type keyCosts struct {
   247  	// NOTE: align maxCost to 64-bit boundary for use with atomic.
   248  	// As per https://golang.org/pkg/sync/atomic/: "On ARM, x86-32,
   249  	// and 32-bit MIPS, it is the caller’s responsibility to arrange
   250  	// for 64-bit alignment of 64-bit words accessed atomically.
   251  	// The first word in a variable or in an allocated struct, array,
   252  	// or slice can be relied upon to be 64-bit aligned."
   253  	maxCost  int64
   254  	used     int64
   255  	metrics  *Metrics
   256  	keyCosts map[uint64]int64
   257  }
   258  
   259  func newSampledLFU(maxCost int64) *keyCosts {
   260  	return &keyCosts{
   261  		keyCosts: make(map[uint64]int64),
   262  		maxCost:  maxCost,
   263  	}
   264  }
   265  
   266  func (p *keyCosts) getMaxCost() int64 {
   267  	return atomic.LoadInt64(&p.maxCost)
   268  }
   269  
   270  func (p *keyCosts) updateMaxCost(maxCost int64) {
   271  	atomic.StoreInt64(&p.maxCost, maxCost)
   272  }
   273  
   274  func (p *keyCosts) roomLeft(cost int64) int64 {
   275  	return p.getMaxCost() - (p.used + cost)
   276  }
   277  
   278  func (p *keyCosts) fillSample(in []*policyPair) []*policyPair {
   279  	if len(in) >= lfuSample {
   280  		return in
   281  	}
   282  	for key, cost := range p.keyCosts {
   283  		in = append(in, &policyPair{key, cost})
   284  		if len(in) >= lfuSample {
   285  			return in
   286  		}
   287  	}
   288  	return in
   289  }
   290  
   291  func (p *keyCosts) del(key uint64) {
   292  	cost, ok := p.keyCosts[key]
   293  	if !ok {
   294  		return
   295  	}
   296  	p.used -= cost
   297  	delete(p.keyCosts, key)
   298  	p.metrics.add(costEvict, key, uint64(cost))
   299  	p.metrics.add(keyEvict, key, 1)
   300  }
   301  
   302  func (p *keyCosts) add(key uint64, cost int64) {
   303  	p.keyCosts[key] = cost
   304  	p.used += cost
   305  }
   306  
   307  func (p *keyCosts) updateIfHas(key uint64, cost int64) bool {
   308  	if prev, found := p.keyCosts[key]; found {
   309  		// Update the cost of an existing key, but don't worry about evicting.
   310  		// Evictions will be handled the next time a new item is added.
   311  		p.metrics.add(keyUpdate, key, 1)
   312  		if prev > cost {
   313  			diff := prev - cost
   314  			p.metrics.add(costAdd, key, ^uint64(uint64(diff)-1))
   315  		} else if cost > prev {
   316  			diff := cost - prev
   317  			p.metrics.add(costAdd, key, uint64(diff))
   318  		}
   319  		p.used += cost - prev
   320  		p.keyCosts[key] = cost
   321  		return true
   322  	}
   323  	return false
   324  }
   325  
   326  func (p *keyCosts) clear() {
   327  	p.used = 0
   328  	p.keyCosts = make(map[uint64]int64)
   329  }
   330  
   331  // tinyLFU is an admission helper that keeps track of access frequency using
   332  // tiny (4-bit) counters in the form of a count-min sketch.
   333  // tinyLFU is NOT thread safe.
   334  type tinyLFU struct {
   335  	freq    *cmSketch
   336  	door    *z.Bloom
   337  	incrs   int64
   338  	resetAt int64
   339  }
   340  
   341  func newTinyLFU(numCounters int64) *tinyLFU {
   342  	return &tinyLFU{
   343  		freq:    newCmSketch(numCounters),
   344  		door:    z.NewBloomFilter(float64(numCounters), 0.01),
   345  		resetAt: numCounters,
   346  	}
   347  }
   348  
   349  func (p *tinyLFU) Push(keys []uint64) {
   350  	for _, key := range keys {
   351  		p.Increment(key)
   352  	}
   353  }
   354  
   355  func (p *tinyLFU) Estimate(key uint64) int64 {
   356  	hits := p.freq.Estimate(key)
   357  	if p.door.Has(key) {
   358  		hits++
   359  	}
   360  	return hits
   361  }
   362  
   363  func (p *tinyLFU) Increment(key uint64) {
   364  	// Flip doorkeeper bit if not already done.
   365  	if added := p.door.AddIfNotHas(key); !added {
   366  		// Increment count-min counter if doorkeeper bit is already set.
   367  		p.freq.Increment(key)
   368  	}
   369  	p.incrs++
   370  	if p.incrs >= p.resetAt {
   371  		p.reset()
   372  	}
   373  }
   374  
   375  func (p *tinyLFU) reset() {
   376  	// Zero out incrs.
   377  	p.incrs = 0
   378  	// clears doorkeeper bits
   379  	p.door.Clear()
   380  	// halves count-min counters
   381  	p.freq.Reset()
   382  }
   383  
   384  func (p *tinyLFU) clear() {
   385  	p.incrs = 0
   386  	p.door.Clear()
   387  	p.freq.Clear()
   388  }