github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/index_cache.go (about)

     1  package index
     2  
     3  import (
     4  	"sync"
     5  
     6  	lru "github.com/hashicorp/golang-lru/v2"
     7  	"go.etcd.io/bbolt"
     8  
     9  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    10  	indexstore "github.com/grafana/pyroscope/pkg/metastore/index/store"
    11  	kvstore "github.com/grafana/pyroscope/pkg/metastore/store"
    12  )
    13  
    14  // Shard cache.
    15  //
    16  // The cache helps us to avoid repeatedly reading the string table from
    17  // the persistent store. Cached shards have a flag that indicates whether
    18  // the shard was loaded for reads. Any write operation should invalidate
    19  // the cached entry and reload it as it may violate transaction isolation.
    20  //
    21  // Writes are always sequential and never concurrent. Therefore, it's
    22  // guaranteed that every write operation observes the latest state of
    23  // the shard on disk. The cache introduces a possibility to observe
    24  // a stale state in the cache, because of the concurrent reads that
    25  // share the same cache.
    26  //
    27  // Reads are concurrent and may run in transactions that began before
    28  // the ongoing write transaction. If a read transaction reads the shard
    29  // state from the disk, its state is obsolete from the writer perspective,
    30  // since it corresponds to an older transaction; if such state is cached,
    31  // all participants may observe it. Therefore, we mark such shards as
    32  // read-only to let the writer know about it.
    33  //
    34  // Reads may observe a state modified "in the future", by a write
    35  // transaction that has started after the read transaction. This is fine,
    36  // as "stale reads" are resolved at the raft level. It is not fine,
    37  // however, if the write transaction uses the cached shard state, loaded
    38  // by read transaction.
    39  type shardCache struct {
    40  	mu    sync.RWMutex
    41  	cache *lru.TwoQueueCache[shardCacheKey, *indexShardCached]
    42  	store Store
    43  }
    44  
    45  type shardCacheKey struct {
    46  	partition indexstore.Partition
    47  	tenant    string
    48  	shard     uint32
    49  }
    50  
    51  type indexShardCached struct {
    52  	*indexstore.Shard
    53  	readOnly bool
    54  }
    55  
    56  func newShardCache(size int, s Store) *shardCache {
    57  	if size <= 0 {
    58  		size = 1
    59  	}
    60  	c, _ := lru.New2Q[shardCacheKey, *indexShardCached](size)
    61  	return &shardCache{cache: c, store: s}
    62  }
    63  
    64  func (c *shardCache) update(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32, fn func(*indexstore.Shard) error) error {
    65  	c.mu.Lock()
    66  	defer c.mu.Unlock()
    67  	s, err := c.getForWriteUnsafe(tx, p, tenant, shard)
    68  	if err != nil {
    69  		return err
    70  	}
    71  	return fn(s)
    72  }
    73  
    74  func (c *shardCache) getForWrite(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32) (*indexstore.Shard, error) {
    75  	c.mu.Lock()
    76  	defer c.mu.Unlock()
    77  	return c.getForWriteUnsafe(tx, p, tenant, shard)
    78  }
    79  
    80  func (c *shardCache) getForWriteUnsafe(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32) (*indexstore.Shard, error) {
    81  	k := shardCacheKey{
    82  		partition: p,
    83  		tenant:    tenant,
    84  		shard:     shard,
    85  	}
    86  	x, found := c.cache.Get(k)
    87  	if found && x != nil && !x.readOnly {
    88  		return x.Shard, nil
    89  	}
    90  	// If the shard is not found, or it is loaded for reads,
    91  	// reload it and invalidate the cached version.
    92  	s, err := c.store.LoadShard(tx, p, tenant, shard)
    93  	if err != nil {
    94  		return nil, err
    95  	}
    96  	if s == nil {
    97  		s = indexstore.NewShard(p, tenant, shard)
    98  	}
    99  	c.cache.Add(k, &indexShardCached{
   100  		Shard:    s,
   101  		readOnly: false,
   102  	})
   103  	return s, nil
   104  }
   105  
   106  func (c *shardCache) getForRead(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32) (*indexstore.Shard, error) {
   107  	c.mu.Lock()
   108  	defer c.mu.Unlock()
   109  	k := shardCacheKey{
   110  		partition: p,
   111  		tenant:    tenant,
   112  		shard:     shard,
   113  	}
   114  	x, found := c.cache.Get(k)
   115  	if found && x != nil {
   116  		return x.ShallowCopy(), nil
   117  	}
   118  	s, err := c.store.LoadShard(tx, p, tenant, shard)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  	if s == nil {
   123  		// Returning an empty shard is fine, as this
   124  		// is a read operation.
   125  		return indexstore.NewShard(p, tenant, shard), nil
   126  	}
   127  	c.cache.Add(k, &indexShardCached{
   128  		Shard:    s,
   129  		readOnly: true,
   130  	})
   131  	return s, nil
   132  }
   133  
   134  func (c *shardCache) delete(p indexstore.Partition, tenant string, shard uint32) {
   135  	c.mu.Lock()
   136  	defer c.mu.Unlock()
   137  	k := shardCacheKey{partition: p, tenant: tenant, shard: shard}
   138  	c.cache.Remove(k)
   139  }
   140  
   141  // Block cache.
   142  //
   143  // Metadata entries might be large, tens of kilobytes, depending on the number
   144  // of datasets, labels, and other metadata. Therefore, we use block cache
   145  // to avoid repeatedly decoding the serialized raw bytes. The cache does not
   146  // require any special coordination, as it is accessed by keys, which are
   147  // loaded from the disk in the current transaction.
   148  //
   149  // The cache is split into two parts: read and write. This is done to prevent
   150  // cache pollution in case of compaction delays.
   151  //
   152  // The read cache is populated with blocks that are fully compacted and with
   153  // blocks queried by the user. We use 2Q cache replacement strategy to ensure
   154  // that the most recently read blocks are kept in memory, while frequently
   155  // accessed older blocks are not evicted prematurely.
   156  //
   157  // The write cache is used to store blocks that are being written to the index.
   158  // It is important because it's guaranteed that the block will be read soon for
   159  // compaction. The write cache is accessed for reads if the read cache does not
   160  // contain the block queried.
   161  type blockCache struct {
   162  	mu    sync.RWMutex
   163  	read  *lru.TwoQueueCache[blockCacheKey, *metastorev1.BlockMeta]
   164  	write *lru.Cache[blockCacheKey, *metastorev1.BlockMeta]
   165  }
   166  
   167  type blockCacheKey struct {
   168  	tenant string
   169  	shard  uint32
   170  	block  string
   171  }
   172  
   173  func newBlockCache(rcs, wcs int) *blockCache {
   174  	var c blockCache
   175  	if rcs <= 0 {
   176  		rcs = 1
   177  	}
   178  	if wcs <= 0 {
   179  		wcs = 1
   180  	}
   181  	c.read, _ = lru.New2Q[blockCacheKey, *metastorev1.BlockMeta](rcs)
   182  	c.write, _ = lru.New[blockCacheKey, *metastorev1.BlockMeta](wcs)
   183  	return &c
   184  }
   185  
   186  func (c *blockCache) getOrCreate(shard *indexstore.Shard, block kvstore.KV) *metastorev1.BlockMeta {
   187  	k := blockCacheKey{
   188  		tenant: shard.Tenant,
   189  		shard:  shard.Shard,
   190  		block:  string(block.Key),
   191  	}
   192  	c.mu.RLock()
   193  	v, ok := c.read.Get(k)
   194  	if ok {
   195  		c.mu.RUnlock()
   196  		return v
   197  	}
   198  	v, ok = c.write.Get(k)
   199  	if ok {
   200  		c.mu.RUnlock()
   201  		return v
   202  	}
   203  	c.mu.RUnlock()
   204  	c.mu.Lock()
   205  	defer c.mu.Unlock()
   206  	v, ok = c.read.Get(k)
   207  	if ok {
   208  		return v
   209  	}
   210  	v, ok = c.write.Get(k)
   211  	if ok {
   212  		return v
   213  	}
   214  	var md metastorev1.BlockMeta
   215  	if err := md.UnmarshalVT(block.Value); err != nil {
   216  		return &md
   217  	}
   218  	c.read.Add(k, &md)
   219  	return &md
   220  }
   221  
   222  func (c *blockCache) put(shard *indexstore.Shard, md *metastorev1.BlockMeta) {
   223  	k := blockCacheKey{
   224  		tenant: shard.Tenant,
   225  		shard:  shard.Shard,
   226  		block:  md.Id,
   227  	}
   228  	c.mu.Lock()
   229  	defer c.mu.Unlock()
   230  	if md.CompactionLevel >= 2 {
   231  		c.read.Add(k, md)
   232  		return
   233  	}
   234  	c.write.Add(k, md)
   235  }
   236  
   237  func (c *blockCache) delete(shard *indexstore.Shard, block string) {
   238  	k := blockCacheKey{
   239  		tenant: shard.Tenant,
   240  		shard:  shard.Shard,
   241  		block:  block,
   242  	}
   243  	c.write.Remove(k)
   244  	c.read.Remove(k)
   245  }