github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/cache/policy.go (about)

     1  /*
     2   * Copyright 2019 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cache
    18  
    19  import (
    20  	"math"
    21  	"sync"
    22  
    23  	"github.com/pingcap/badger/cache/z"
    24  )
    25  
    26  const (
    27  	// lfuSample is the number of items to sample when looking at eviction
    28  	// candidates. 5 seems to be the most optimal number [citation needed].
    29  	lfuSample = 5
    30  )
    31  
    32  type policy struct {
    33  	sync.Mutex
    34  	admit   *tinyLFU
    35  	evict   *sampledLFU
    36  	itemsCh chan []uint64
    37  	stop    chan struct{}
    38  	metrics *Metrics
    39  }
    40  
    41  func newPolicy(numCounters, maxCost int64) *policy {
    42  	p := &policy{
    43  		admit:   newTinyLFU(numCounters),
    44  		evict:   newSampledLFU(maxCost),
    45  		itemsCh: make(chan []uint64, 3),
    46  		stop:    make(chan struct{}),
    47  	}
    48  	go p.processItems()
    49  	return p
    50  }
    51  
    52  func (p *policy) CollectMetrics(metrics *Metrics) {
    53  	p.metrics = metrics
    54  	p.evict.metrics = metrics
    55  }
    56  
    57  type policyPair struct {
    58  	key  uint64
    59  	cost int64
    60  }
    61  
    62  func (p *policy) processItems() {
    63  	for {
    64  		select {
    65  		case items := <-p.itemsCh:
    66  			p.Lock()
    67  			p.admit.Push(items)
    68  			p.Unlock()
    69  		case <-p.stop:
    70  			return
    71  		}
    72  	}
    73  }
    74  
    75  func (p *policy) setNewMaxCost(newMaxCost int64) {
    76  	p.Lock()
    77  	p.evict.maxCost = newMaxCost
    78  	p.Unlock()
    79  }
    80  
    81  func (p *policy) Push(keys []uint64) bool {
    82  	if len(keys) == 0 {
    83  		return true
    84  	}
    85  	select {
    86  	case p.itemsCh <- keys:
    87  		p.metrics.add(keepGets, keys[0], uint64(len(keys)))
    88  		return true
    89  	default:
    90  		p.metrics.add(dropGets, keys[0], uint64(len(keys)))
    91  		return false
    92  	}
    93  }
    94  
    95  func (p *policy) Add(key uint64, cost int64) ([]*item, bool) {
    96  	p.Lock()
    97  	defer p.Unlock()
    98  	// can't add an item bigger than entire cache
    99  	if cost > p.evict.maxCost {
   100  		return nil, false
   101  	}
   102  	// we don't need to go any further if the item is already in the cache
   103  	if has := p.evict.updateIfHas(key, cost); has {
   104  		return nil, true
   105  	}
   106  	// if we got this far, this key doesn't exist in the cache
   107  	//
   108  	// calculate the remaining room in the cache (usually bytes)
   109  	room := p.evict.roomLeft(cost)
   110  	if room >= 0 {
   111  		// there's enough room in the cache to store the new item without
   112  		// overflowing, so we can do that now and stop here
   113  		p.evict.add(key, cost)
   114  		p.metrics.add(costAdd, key, uint64(cost))
   115  		p.metrics.add(keyAdd, key, 1)
   116  		return nil, true
   117  	}
   118  	// incHits is the hit count for the incoming item
   119  	incHits := p.admit.Estimate(key)
   120  	// sample is the eviction candidate pool to be filled via random sampling
   121  	//
   122  	// TODO: perhaps we should use a min heap here. Right now our time
   123  	// complexity is N for finding the min. Min heap should bring it down to
   124  	// O(lg N).
   125  	sample := make([]*policyPair, 0, lfuSample)
   126  	// as items are evicted they will be appended to victims
   127  	victims := make([]*item, 0)
   128  	// delete victims until there's enough space or a minKey is found that has
   129  	// more hits than incoming item.
   130  	for ; room < 0; room = p.evict.roomLeft(cost) {
   131  		// fill up empty slots in sample
   132  		sample = p.evict.fillSample(sample)
   133  		// find minimally used item in sample
   134  		minKey, minHits, minId := uint64(0), int64(math.MaxInt64), 0
   135  		for i, pair := range sample {
   136  			// look up hit count for sample key
   137  			if hits := p.admit.Estimate(pair.key); hits < minHits {
   138  				minKey, minHits, minId = pair.key, hits, i
   139  			}
   140  		}
   141  		// if the incoming item isn't worth keeping in the policy, reject.
   142  		if incHits < minHits {
   143  			p.metrics.add(rejectSets, key, 1)
   144  			return victims, false
   145  		}
   146  		// delete the victim from metadata
   147  		p.evict.del(minKey)
   148  
   149  		// delete the victim from sample
   150  		sample[minId] = sample[len(sample)-1]
   151  		sample = sample[:len(sample)-1]
   152  		// store victim in evicted victims slice
   153  		victims = append(victims, &item{
   154  			key: minKey,
   155  		})
   156  	}
   157  	p.evict.add(key, cost)
   158  	p.metrics.add(costAdd, key, uint64(cost))
   159  	p.metrics.add(keyAdd, key, 1)
   160  	return victims, true
   161  }
   162  
   163  func (p *policy) Has(key uint64) bool {
   164  	p.Lock()
   165  	_, exists := p.evict.keyCosts[key]
   166  	p.Unlock()
   167  	return exists
   168  }
   169  
   170  func (p *policy) Del(key uint64) {
   171  	p.Lock()
   172  	p.evict.del(key)
   173  	p.Unlock()
   174  }
   175  
   176  func (p *policy) Cap() int64 {
   177  	p.Lock()
   178  	capacity := int64(p.evict.maxCost - p.evict.used)
   179  	p.Unlock()
   180  	return capacity
   181  }
   182  
   183  func (p *policy) Update(key uint64, cost int64) {
   184  	p.Lock()
   185  	p.evict.updateIfHas(key, cost)
   186  	p.Unlock()
   187  }
   188  
   189  func (p *policy) Cost(key uint64) int64 {
   190  	p.Lock()
   191  	if cost, found := p.evict.keyCosts[key]; found {
   192  		p.Unlock()
   193  		return cost
   194  	}
   195  	p.Unlock()
   196  	return -1
   197  }
   198  
   199  func (p *policy) Clear() {
   200  	p.Lock()
   201  	p.admit.clear()
   202  	p.evict.clear()
   203  	p.Unlock()
   204  }
   205  
   206  func (p *policy) Close() {
   207  	// block until p.processItems goroutine is returned
   208  	p.stop <- struct{}{}
   209  	close(p.stop)
   210  	close(p.itemsCh)
   211  }
   212  
   213  // sampledLFU is an eviction helper storing key-cost pairs.
   214  type sampledLFU struct {
   215  	keyCosts map[uint64]int64
   216  	maxCost  int64
   217  	used     int64
   218  	metrics  *Metrics
   219  }
   220  
   221  func newSampledLFU(maxCost int64) *sampledLFU {
   222  	return &sampledLFU{
   223  		keyCosts: make(map[uint64]int64),
   224  		maxCost:  maxCost,
   225  	}
   226  }
   227  
   228  func (p *sampledLFU) roomLeft(cost int64) int64 {
   229  	return p.maxCost - (p.used + cost)
   230  }
   231  
   232  func (p *sampledLFU) fillSample(in []*policyPair) []*policyPair {
   233  	if len(in) >= lfuSample {
   234  		return in
   235  	}
   236  	for key, cost := range p.keyCosts {
   237  		in = append(in, &policyPair{key, cost})
   238  		if len(in) >= lfuSample {
   239  			return in
   240  		}
   241  	}
   242  	return in
   243  }
   244  
   245  func (p *sampledLFU) del(key uint64) {
   246  	cost, ok := p.keyCosts[key]
   247  	if !ok {
   248  		return
   249  	}
   250  	p.used -= cost
   251  	delete(p.keyCosts, key)
   252  	p.metrics.add(costEvict, key, uint64(cost))
   253  	p.metrics.add(keyEvict, key, 1)
   254  }
   255  
   256  func (p *sampledLFU) add(key uint64, cost int64) {
   257  	p.keyCosts[key] = cost
   258  	p.used += cost
   259  }
   260  
   261  func (p *sampledLFU) updateIfHas(key uint64, cost int64) bool {
   262  	if prev, found := p.keyCosts[key]; found {
   263  		// update the cost of an existing key, but don't worry about evicting,
   264  		// evictions will be handled the next time a new item is added
   265  		p.metrics.add(keyUpdate, key, 1)
   266  		if prev > cost {
   267  			diff := prev - cost
   268  			p.metrics.add(costAdd, key, ^uint64(uint64(diff)-1))
   269  		} else if cost > prev {
   270  			diff := cost - prev
   271  			p.metrics.add(costAdd, key, uint64(diff))
   272  		}
   273  		p.used += cost - prev
   274  		p.keyCosts[key] = cost
   275  		return true
   276  	}
   277  	return false
   278  }
   279  
   280  func (p *sampledLFU) clear() {
   281  	p.used = 0
   282  	p.keyCosts = make(map[uint64]int64)
   283  }
   284  
   285  // tinyLFU is an admission helper that keeps track of access frequency using
   286  // tiny (4-bit) counters in the form of a count-min sketch.
   287  // tinyLFU is NOT thread safe.
   288  type tinyLFU struct {
   289  	freq    *cmSketch
   290  	door    *z.Bloom
   291  	incrs   int64
   292  	resetAt int64
   293  }
   294  
   295  func newTinyLFU(numCounters int64) *tinyLFU {
   296  	return &tinyLFU{
   297  		freq:    newCmSketch(numCounters),
   298  		door:    z.NewBloomFilter(float64(numCounters), 0.01),
   299  		resetAt: numCounters,
   300  	}
   301  }
   302  
   303  func (p *tinyLFU) Push(keys []uint64) {
   304  	for _, key := range keys {
   305  		p.Increment(key)
   306  	}
   307  }
   308  
   309  func (p *tinyLFU) Estimate(key uint64) int64 {
   310  	hits := p.freq.Estimate(key)
   311  	if p.door.Has(key) {
   312  		hits += 1
   313  	}
   314  	return hits
   315  }
   316  
   317  func (p *tinyLFU) Increment(key uint64) {
   318  	// flip doorkeeper bit if not already
   319  	if added := p.door.AddIfNotHas(key); !added {
   320  		// increment count-min counter if doorkeeper bit is already set.
   321  		p.freq.Increment(key)
   322  	}
   323  	p.incrs++
   324  	if p.incrs >= p.resetAt {
   325  		p.reset()
   326  	}
   327  }
   328  
   329  func (p *tinyLFU) reset() {
   330  	// Zero out incrs.
   331  	p.incrs = 0
   332  	// clears doorkeeper bits
   333  	p.door.Clear()
   334  	// halves count-min counters
   335  	p.freq.Reset()
   336  }
   337  
   338  func (p *tinyLFU) clear() {
   339  	p.incrs = 0
   340  	p.door.Clear()
   341  	p.freq.Clear()
   342  }