github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/cache/cache.go (about) 1 package cache 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "sync" 9 "time" 10 11 "github.com/dgraph-io/badger/v2" 12 "github.com/prometheus/client_golang/prometheus" 13 "github.com/valyala/bytebufferpool" 14 15 "github.com/pyroscope-io/pyroscope/pkg/storage/cache/lfu" 16 ) 17 18 type Cache struct { 19 db *badger.DB 20 lfu *lfu.Cache 21 metrics *Metrics 22 codec Codec 23 24 prefix string 25 ttl time.Duration 26 27 evictionsDone chan struct{} 28 writeBackDone chan struct{} 29 flushOnce sync.Once 30 } 31 32 type Config struct { 33 *badger.DB 34 *Metrics 35 Codec 36 37 // Prefix for badger DB keys. 38 Prefix string 39 // TTL specifies number of seconds an item can reside in cache after 40 // the last access. An obsolete item is evicted. Setting TTL to less 41 // than a second disables time-based eviction. 42 TTL time.Duration 43 } 44 45 // Codec is a shorthand of coder-decoder. A Codec implementation 46 // is responsible for type conversions and binary representation. 47 type Codec interface { 48 Serialize(w io.Writer, key string, value interface{}) error 49 Deserialize(r io.Reader, key string) (interface{}, error) 50 // New returns a new instance of the type. The method is 51 // called by GetOrCreate when an item can not be found by 52 // the given key. 53 New(key string) interface{} 54 } 55 56 type Metrics struct { 57 MissesCounter prometheus.Counter 58 ReadsCounter prometheus.Counter 59 DBWrites prometheus.Observer 60 DBReads prometheus.Observer 61 WriteBackDuration prometheus.Observer 62 EvictionsDuration prometheus.Observer 63 } 64 65 func New(c Config) *Cache { 66 cache := &Cache{ 67 lfu: lfu.New(), 68 db: c.DB, 69 codec: c.Codec, 70 metrics: c.Metrics, 71 prefix: c.Prefix, 72 ttl: c.TTL, 73 evictionsDone: make(chan struct{}), 74 writeBackDone: make(chan struct{}), 75 } 76 77 evictionChannel := make(chan lfu.Eviction) 78 writeBackChannel := make(chan lfu.Eviction) 79 80 // eviction channel for saving cache items to disk 81 cache.lfu.EvictionChannel = evictionChannel 82 cache.lfu.WriteBackChannel = writeBackChannel 83 cache.lfu.TTL = int64(c.TTL.Seconds()) 84 85 // start a goroutine for saving the evicted cache items to disk 86 go func() { 87 for e := range evictionChannel { 88 // TODO(kolesnikovae): these errors should be at least logged. 89 // Perhaps, it will be better if we move it outside of the cache. 90 // Taking into account that writes almost always happen in batches, 91 // We should definitely take advantage of BadgerDB write batch API. 92 // Also, WriteBack and Evict could be combined. We also could 93 // consider moving caching to storage/db. 94 cache.saveToDisk(e.Key, e.Value) 95 } 96 close(cache.evictionsDone) 97 }() 98 99 // start a goroutine for saving the evicted cache items to disk 100 go func() { 101 for e := range writeBackChannel { 102 cache.saveToDisk(e.Key, e.Value) 103 } 104 close(cache.writeBackDone) 105 }() 106 107 return cache 108 } 109 110 func (cache *Cache) Put(key string, val interface{}) { 111 cache.lfu.Set(key, val) 112 } 113 114 func (cache *Cache) saveToDisk(key string, val interface{}) error { 115 b := bytebufferpool.Get() 116 defer bytebufferpool.Put(b) 117 if err := cache.codec.Serialize(b, key, val); err != nil { 118 return fmt.Errorf("serialization: %w", err) 119 } 120 cache.metrics.DBWrites.Observe(float64(b.Len())) 121 return cache.db.Update(func(txn *badger.Txn) error { 122 return txn.Set([]byte(cache.prefix+key), b.Bytes()) 123 }) 124 } 125 126 func (cache *Cache) Sync() error { 127 return cache.lfu.Iterate(func(k string, v interface{}) error { 128 return cache.saveToDisk(k, v) 129 }) 130 } 131 132 func (cache *Cache) Flush() { 133 cache.flushOnce.Do(func() { 134 // Make sure there is no pending items. 135 close(cache.lfu.WriteBackChannel) 136 <-cache.writeBackDone 137 // evict all the items in cache 138 cache.lfu.Evict(cache.lfu.Len()) 139 close(cache.lfu.EvictionChannel) 140 // wait until all evictions are done 141 <-cache.evictionsDone 142 }) 143 } 144 145 // Evict performs cache evictions. The difference between Evict and WriteBack is that evictions happen when cache grows 146 // above allowed threshold and write-back calls happen constantly, making pyroscope more crash-resilient. 147 // See https://github.com/pyroscope-io/pyroscope/issues/210 for more context 148 func (cache *Cache) Evict(percent float64) { 149 timer := prometheus.NewTimer(prometheus.ObserverFunc(cache.metrics.EvictionsDuration.Observe)) 150 cache.lfu.Evict(int(float64(cache.lfu.Len()) * percent)) 151 timer.ObserveDuration() 152 } 153 154 func (cache *Cache) WriteBack() { 155 timer := prometheus.NewTimer(prometheus.ObserverFunc(cache.metrics.WriteBackDuration.Observe)) 156 cache.lfu.WriteBack() 157 timer.ObserveDuration() 158 } 159 160 func (cache *Cache) Delete(key string) error { 161 cache.lfu.Delete(key) 162 return cache.db.Update(func(txn *badger.Txn) error { 163 return txn.Delete([]byte(cache.prefix + key)) 164 }) 165 } 166 167 func (cache *Cache) Discard(key string) { 168 cache.lfu.Delete(key) 169 } 170 171 // DiscardPrefix deletes all data that matches a certain prefix 172 // In both cache and database 173 func (cache *Cache) DiscardPrefix(prefix string) error { 174 cache.lfu.DeletePrefix(prefix) 175 return dropPrefix(cache.db, []byte(cache.prefix+prefix)) 176 } 177 178 const defaultBatchSize = 1 << 10 // 1K items 179 180 func dropPrefix(db *badger.DB, p []byte) error { 181 var err error 182 for more := true; more; { 183 if more, err = dropPrefixBatch(db, p, defaultBatchSize); err != nil { 184 return err 185 } 186 } 187 return nil 188 } 189 190 func dropPrefixBatch(db *badger.DB, p []byte, n int) (bool, error) { 191 keys := make([][]byte, 0, n) 192 err := db.View(func(txn *badger.Txn) error { 193 it := txn.NewIterator(badger.IteratorOptions{Prefix: p}) 194 defer it.Close() 195 for it.Rewind(); it.Valid(); it.Next() { 196 if len(keys) == cap(keys) { 197 return nil 198 } 199 keys = append(keys, it.Item().KeyCopy(nil)) 200 } 201 return nil 202 }) 203 if err != nil || len(keys) == 0 { 204 return false, err 205 } 206 batch := db.NewWriteBatch() 207 defer batch.Cancel() 208 for i := range keys { 209 if err = batch.Delete(keys[i]); err != nil { 210 return false, err 211 } 212 } 213 return true, batch.Flush() 214 } 215 216 func (cache *Cache) GetOrCreate(key string) (interface{}, error) { 217 return cache.get(key, true) 218 } 219 220 func (cache *Cache) Lookup(key string) (interface{}, bool) { 221 v, err := cache.get(key, false) 222 return v, v != nil && err == nil 223 } 224 225 func (cache *Cache) get(key string, createNotFound bool) (interface{}, error) { 226 cache.metrics.ReadsCounter.Inc() 227 return cache.lfu.GetOrSet(key, func() (interface{}, error) { 228 cache.metrics.MissesCounter.Inc() 229 var buf []byte 230 err := cache.db.View(func(txn *badger.Txn) error { 231 item, err := txn.Get([]byte(cache.prefix + key)) 232 if err != nil { 233 return err 234 } 235 buf, err = item.ValueCopy(buf) 236 return err 237 }) 238 239 switch { 240 default: 241 return nil, err 242 case err == nil: 243 case errors.Is(err, badger.ErrKeyNotFound): 244 if createNotFound { 245 return cache.codec.New(key), nil 246 } 247 return nil, nil 248 } 249 250 cache.metrics.DBReads.Observe(float64(len(buf))) 251 return cache.codec.Deserialize(bytes.NewReader(buf), key) 252 }) 253 } 254 255 func (cache *Cache) Size() uint64 { 256 return uint64(cache.lfu.Len()) 257 }