github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/cache/cache.go (about)

     1  package cache
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/dgraph-io/badger/v2"
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"github.com/valyala/bytebufferpool"
    14  
    15  	"github.com/pyroscope-io/pyroscope/pkg/storage/cache/lfu"
    16  )
    17  
    18  type Cache struct {
    19  	db      *badger.DB
    20  	lfu     *lfu.Cache
    21  	metrics *Metrics
    22  	codec   Codec
    23  
    24  	prefix string
    25  	ttl    time.Duration
    26  
    27  	evictionsDone chan struct{}
    28  	writeBackDone chan struct{}
    29  	flushOnce     sync.Once
    30  }
    31  
    32  type Config struct {
    33  	*badger.DB
    34  	*Metrics
    35  	Codec
    36  
    37  	// Prefix for badger DB keys.
    38  	Prefix string
    39  	// TTL specifies number of seconds an item can reside in cache after
    40  	// the last access. An obsolete item is evicted. Setting TTL to less
    41  	// than a second disables time-based eviction.
    42  	TTL time.Duration
    43  }
    44  
    45  // Codec is a shorthand of coder-decoder. A Codec implementation
    46  // is responsible for type conversions and binary representation.
    47  type Codec interface {
    48  	Serialize(w io.Writer, key string, value interface{}) error
    49  	Deserialize(r io.Reader, key string) (interface{}, error)
    50  	// New returns a new instance of the type. The method is
    51  	// called by GetOrCreate when an item can not be found by
    52  	// the given key.
    53  	New(key string) interface{}
    54  }
    55  
    56  type Metrics struct {
    57  	MissesCounter     prometheus.Counter
    58  	ReadsCounter      prometheus.Counter
    59  	DBWrites          prometheus.Observer
    60  	DBReads           prometheus.Observer
    61  	WriteBackDuration prometheus.Observer
    62  	EvictionsDuration prometheus.Observer
    63  }
    64  
    65  func New(c Config) *Cache {
    66  	cache := &Cache{
    67  		lfu:           lfu.New(),
    68  		db:            c.DB,
    69  		codec:         c.Codec,
    70  		metrics:       c.Metrics,
    71  		prefix:        c.Prefix,
    72  		ttl:           c.TTL,
    73  		evictionsDone: make(chan struct{}),
    74  		writeBackDone: make(chan struct{}),
    75  	}
    76  
    77  	evictionChannel := make(chan lfu.Eviction)
    78  	writeBackChannel := make(chan lfu.Eviction)
    79  
    80  	// eviction channel for saving cache items to disk
    81  	cache.lfu.EvictionChannel = evictionChannel
    82  	cache.lfu.WriteBackChannel = writeBackChannel
    83  	cache.lfu.TTL = int64(c.TTL.Seconds())
    84  
    85  	// start a goroutine for saving the evicted cache items to disk
    86  	go func() {
    87  		for e := range evictionChannel {
    88  			// TODO(kolesnikovae): these errors should be at least logged.
    89  			//  Perhaps, it will be better if we move it outside of the cache.
    90  			//  Taking into account that writes almost always happen in batches,
    91  			//  We should definitely take advantage of BadgerDB write batch API.
    92  			//  Also, WriteBack and Evict could be combined. We also could
    93  			//  consider moving caching to storage/db.
    94  			cache.saveToDisk(e.Key, e.Value)
    95  		}
    96  		close(cache.evictionsDone)
    97  	}()
    98  
    99  	// start a goroutine for saving the evicted cache items to disk
   100  	go func() {
   101  		for e := range writeBackChannel {
   102  			cache.saveToDisk(e.Key, e.Value)
   103  		}
   104  		close(cache.writeBackDone)
   105  	}()
   106  
   107  	return cache
   108  }
   109  
   110  func (cache *Cache) Put(key string, val interface{}) {
   111  	cache.lfu.Set(key, val)
   112  }
   113  
   114  func (cache *Cache) saveToDisk(key string, val interface{}) error {
   115  	b := bytebufferpool.Get()
   116  	defer bytebufferpool.Put(b)
   117  	if err := cache.codec.Serialize(b, key, val); err != nil {
   118  		return fmt.Errorf("serialization: %w", err)
   119  	}
   120  	cache.metrics.DBWrites.Observe(float64(b.Len()))
   121  	return cache.db.Update(func(txn *badger.Txn) error {
   122  		return txn.Set([]byte(cache.prefix+key), b.Bytes())
   123  	})
   124  }
   125  
   126  func (cache *Cache) Sync() error {
   127  	return cache.lfu.Iterate(func(k string, v interface{}) error {
   128  		return cache.saveToDisk(k, v)
   129  	})
   130  }
   131  
   132  func (cache *Cache) Flush() {
   133  	cache.flushOnce.Do(func() {
   134  		// Make sure there is no pending items.
   135  		close(cache.lfu.WriteBackChannel)
   136  		<-cache.writeBackDone
   137  		// evict all the items in cache
   138  		cache.lfu.Evict(cache.lfu.Len())
   139  		close(cache.lfu.EvictionChannel)
   140  		// wait until all evictions are done
   141  		<-cache.evictionsDone
   142  	})
   143  }
   144  
   145  // Evict performs cache evictions. The difference between Evict and WriteBack is that evictions happen when cache grows
   146  // above allowed threshold and write-back calls happen constantly, making pyroscope more crash-resilient.
   147  // See https://github.com/pyroscope-io/pyroscope/issues/210 for more context
   148  func (cache *Cache) Evict(percent float64) {
   149  	timer := prometheus.NewTimer(prometheus.ObserverFunc(cache.metrics.EvictionsDuration.Observe))
   150  	cache.lfu.Evict(int(float64(cache.lfu.Len()) * percent))
   151  	timer.ObserveDuration()
   152  }
   153  
   154  func (cache *Cache) WriteBack() {
   155  	timer := prometheus.NewTimer(prometheus.ObserverFunc(cache.metrics.WriteBackDuration.Observe))
   156  	cache.lfu.WriteBack()
   157  	timer.ObserveDuration()
   158  }
   159  
   160  func (cache *Cache) Delete(key string) error {
   161  	cache.lfu.Delete(key)
   162  	return cache.db.Update(func(txn *badger.Txn) error {
   163  		return txn.Delete([]byte(cache.prefix + key))
   164  	})
   165  }
   166  
   167  func (cache *Cache) Discard(key string) {
   168  	cache.lfu.Delete(key)
   169  }
   170  
   171  // DiscardPrefix deletes all data that matches a certain prefix
   172  // In both cache and database
   173  func (cache *Cache) DiscardPrefix(prefix string) error {
   174  	cache.lfu.DeletePrefix(prefix)
   175  	return dropPrefix(cache.db, []byte(cache.prefix+prefix))
   176  }
   177  
   178  const defaultBatchSize = 1 << 10 // 1K items
   179  
   180  func dropPrefix(db *badger.DB, p []byte) error {
   181  	var err error
   182  	for more := true; more; {
   183  		if more, err = dropPrefixBatch(db, p, defaultBatchSize); err != nil {
   184  			return err
   185  		}
   186  	}
   187  	return nil
   188  }
   189  
   190  func dropPrefixBatch(db *badger.DB, p []byte, n int) (bool, error) {
   191  	keys := make([][]byte, 0, n)
   192  	err := db.View(func(txn *badger.Txn) error {
   193  		it := txn.NewIterator(badger.IteratorOptions{Prefix: p})
   194  		defer it.Close()
   195  		for it.Rewind(); it.Valid(); it.Next() {
   196  			if len(keys) == cap(keys) {
   197  				return nil
   198  			}
   199  			keys = append(keys, it.Item().KeyCopy(nil))
   200  		}
   201  		return nil
   202  	})
   203  	if err != nil || len(keys) == 0 {
   204  		return false, err
   205  	}
   206  	batch := db.NewWriteBatch()
   207  	defer batch.Cancel()
   208  	for i := range keys {
   209  		if err = batch.Delete(keys[i]); err != nil {
   210  			return false, err
   211  		}
   212  	}
   213  	return true, batch.Flush()
   214  }
   215  
   216  func (cache *Cache) GetOrCreate(key string) (interface{}, error) {
   217  	return cache.get(key, true)
   218  }
   219  
   220  func (cache *Cache) Lookup(key string) (interface{}, bool) {
   221  	v, err := cache.get(key, false)
   222  	return v, v != nil && err == nil
   223  }
   224  
   225  func (cache *Cache) get(key string, createNotFound bool) (interface{}, error) {
   226  	cache.metrics.ReadsCounter.Inc()
   227  	return cache.lfu.GetOrSet(key, func() (interface{}, error) {
   228  		cache.metrics.MissesCounter.Inc()
   229  		var buf []byte
   230  		err := cache.db.View(func(txn *badger.Txn) error {
   231  			item, err := txn.Get([]byte(cache.prefix + key))
   232  			if err != nil {
   233  				return err
   234  			}
   235  			buf, err = item.ValueCopy(buf)
   236  			return err
   237  		})
   238  
   239  		switch {
   240  		default:
   241  			return nil, err
   242  		case err == nil:
   243  		case errors.Is(err, badger.ErrKeyNotFound):
   244  			if createNotFound {
   245  				return cache.codec.New(key), nil
   246  			}
   247  			return nil, nil
   248  		}
   249  
   250  		cache.metrics.DBReads.Observe(float64(len(buf)))
   251  		return cache.codec.Deserialize(bytes.NewReader(buf), key)
   252  	})
   253  }
   254  
   255  func (cache *Cache) Size() uint64 {
   256  	return uint64(cache.lfu.Len())
   257  }