github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/index.go (about)

     1  package index
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"iter"
     8  	"math"
     9  	"slices"
    10  	"time"
    11  
    12  	"github.com/go-kit/log"
    13  	"github.com/go-kit/log/level"
    14  	"github.com/oklog/ulid/v2"
    15  	"go.etcd.io/bbolt"
    16  
    17  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    18  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    19  	"github.com/grafana/pyroscope/pkg/block/metadata"
    20  	"github.com/grafana/pyroscope/pkg/metastore/index/cleaner"
    21  	"github.com/grafana/pyroscope/pkg/metastore/index/dlq"
    22  	indexstore "github.com/grafana/pyroscope/pkg/metastore/index/store"
    23  	"github.com/grafana/pyroscope/pkg/model"
    24  )
    25  
    26  var ErrBlockExists = fmt.Errorf("block already exists")
    27  
    28  type Config struct {
    29  	ShardCacheSize      int `yaml:"shard_cache_size"`
    30  	BlockWriteCacheSize int `yaml:"block_write_cache_size"`
    31  	BlockReadCacheSize  int `yaml:"block_read_cache_size"`
    32  
    33  	Cleaner  cleaner.Config `yaml:",inline"`
    34  	Recovery dlq.Config     `yaml:",inline"`
    35  
    36  	partitionDuration     time.Duration
    37  	queryLookaroundPeriod time.Duration
    38  }
    39  
    40  var DefaultConfig = Config{
    41  	ShardCacheSize:      2000,   // 128KB * 2000 = 256MB
    42  	BlockReadCacheSize:  100000, // 8KB blocks = 800MB
    43  	BlockWriteCacheSize: 10000,
    44  
    45  	// FIXME(kolesnikovae): Do not modify, it will break the index.
    46  	//
    47  	// This parameter is not supported; used only for testing.
    48  	// Partition key MUST be an input parameter.
    49  	partitionDuration: 6 * time.Hour,
    50  
    51  	// FIXME(kolesnikovae): Remove: build an interval tree.
    52  	//
    53  	// Currently, we do not use information about the time range of data each
    54  	// partition refers to. For example, it's possible – though very unlikely
    55  	// – for data from the past hour to be stored in a partition created a day
    56  	// ago. We need to be cautious: when querying, we must identify all
    57  	// partitions that may include the query time range. To ensure we catch
    58  	// such "misplaced" data, we extend the query time range using this period.
    59  	queryLookaroundPeriod: 24 * time.Hour,
    60  }
    61  
    62  func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
    63  	cfg.Recovery.RegisterFlagsWithPrefix(prefix, f)
    64  	cfg.Cleaner.RegisterFlagsWithPrefix(prefix, f)
    65  	f.IntVar(&cfg.ShardCacheSize, prefix+"shard-cache-size", DefaultConfig.ShardCacheSize, "Maximum number of shards to keep in memory")
    66  	f.IntVar(&cfg.BlockWriteCacheSize, prefix+"block-write-cache-size", DefaultConfig.BlockWriteCacheSize, "Maximum number of written blocks to keep in memory")
    67  	f.IntVar(&cfg.BlockReadCacheSize, prefix+"block-read-cache-size", DefaultConfig.BlockReadCacheSize, "Maximum number of read blocks to keep in memory")
    68  	cfg.partitionDuration = DefaultConfig.partitionDuration
    69  	cfg.queryLookaroundPeriod = DefaultConfig.queryLookaroundPeriod
    70  }
    71  
    72  type Store interface {
    73  	CreateBuckets(*bbolt.Tx) error
    74  	Partitions(tx *bbolt.Tx) iter.Seq[indexstore.Partition]
    75  	LoadShard(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32) (*indexstore.Shard, error)
    76  	DeleteShard(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32) error
    77  }
    78  
    79  type Index struct {
    80  	logger log.Logger
    81  	config Config
    82  	store  Store
    83  	shards *shardCache
    84  	blocks *blockCache
    85  }
    86  
    87  func NewIndex(logger log.Logger, s Store, cfg Config) *Index {
    88  	return &Index{
    89  		logger: logger,
    90  		config: cfg,
    91  		store:  s,
    92  		shards: newShardCache(cfg.ShardCacheSize, s),
    93  		blocks: newBlockCache(cfg.BlockReadCacheSize, cfg.BlockWriteCacheSize),
    94  	}
    95  }
    96  
    97  func NewStore() *indexstore.IndexStore { return indexstore.NewIndexStore() }
    98  
    99  func (i *Index) Init(tx *bbolt.Tx) error { return i.store.CreateBuckets(tx) }
   100  
   101  func (i *Index) Restore(tx *bbolt.Tx) error {
   102  	// See comment in DefaultConfig.queryLookaroundPeriod.
   103  	now := time.Now()
   104  	start := now.Add(-i.config.queryLookaroundPeriod)
   105  	end := now.Add(i.config.queryLookaroundPeriod)
   106  	for p := range i.store.Partitions(tx) {
   107  		if !p.Overlaps(start, end) {
   108  			continue
   109  		}
   110  		level.Info(i.logger).Log("msg", "loading partition in memory")
   111  		q := p.Query(tx)
   112  		if q == nil {
   113  			continue
   114  		}
   115  		for tenant := range q.Tenants() {
   116  			for shard := range q.Shards(tenant) {
   117  				if _, err := i.shards.getForWrite(tx, p, tenant, shard.Shard); err != nil {
   118  					level.Error(i.logger).Log(
   119  						"msg", "failed to load tenant partition shard",
   120  						"partition", p,
   121  						"tenant", tenant,
   122  						"shard", shard,
   123  						"err", err,
   124  					)
   125  					return err
   126  				}
   127  			}
   128  		}
   129  	}
   130  	return nil
   131  }
   132  
   133  func (i *Index) InsertBlock(tx *bbolt.Tx, b *metastorev1.BlockMeta) error {
   134  	p := i.partitionKeyForBlock(b.Id)
   135  	return i.shards.update(tx, p, metadata.Tenant(b), b.Shard, func(s *indexstore.Shard) error {
   136  		if err := s.Store(tx, b); err != nil {
   137  			return err
   138  		}
   139  		i.blocks.put(s, b)
   140  		return nil
   141  	})
   142  }
   143  
   144  func (i *Index) ReplaceBlocks(tx *bbolt.Tx, compacted *metastorev1.CompactedBlocks) error {
   145  	for _, b := range compacted.NewBlocks {
   146  		if err := i.InsertBlock(tx, b); err != nil {
   147  			return err
   148  		}
   149  	}
   150  	for p, list := range i.partitionedList(compacted.SourceBlocks) {
   151  		err := i.shards.update(tx, p, list.Tenant, list.Shard, func(s *indexstore.Shard) error {
   152  			if err := s.Delete(tx, list.Blocks...); err != nil {
   153  				return err
   154  			}
   155  			for _, b := range list.Blocks {
   156  				i.blocks.delete(s, b)
   157  			}
   158  			return nil
   159  		})
   160  		if err != nil {
   161  			return err
   162  		}
   163  	}
   164  	return nil
   165  }
   166  
   167  func (i *Index) GetBlocks(tx *bbolt.Tx, list *metastorev1.BlockList) ([]*metastorev1.BlockMeta, error) {
   168  	metas := make([]*metastorev1.BlockMeta, 0, len(list.Blocks))
   169  	for k, partitioned := range i.partitionedList(list) {
   170  		s, err := i.shards.getForRead(tx, k, partitioned.Tenant, partitioned.Shard)
   171  		if err != nil {
   172  			return nil, err
   173  		}
   174  		for _, kv := range s.Find(tx, partitioned.Blocks...) {
   175  			b := i.blocks.getOrCreate(s, kv).CloneVT()
   176  			s.StringTable.Export(b)
   177  			metas = append(metas, b)
   178  		}
   179  	}
   180  	return metas, nil
   181  }
   182  
   183  func (i *Index) Partitions(tx *bbolt.Tx) iter.Seq[indexstore.Partition] {
   184  	return i.store.Partitions(tx)
   185  }
   186  
   187  func (i *Index) DeleteShard(tx *bbolt.Tx, key indexstore.Partition, tenant string, shard uint32) error {
   188  	if err := i.store.DeleteShard(tx, key, tenant, shard); err != nil {
   189  		return err
   190  	}
   191  	i.shards.delete(key, tenant, shard)
   192  	return nil
   193  }
   194  
   195  func (i *Index) GetTenants(tx *bbolt.Tx) []string {
   196  	uniqueTenants := make(map[string]struct{})
   197  	for p := range i.store.Partitions(tx) {
   198  		q := p.Query(tx)
   199  		if q == nil {
   200  			// Partition not found.
   201  			continue
   202  		}
   203  		for t := range q.Tenants() {
   204  			if t == "" {
   205  				continue
   206  			}
   207  			uniqueTenants[t] = struct{}{}
   208  		}
   209  	}
   210  	tenants := make([]string, 0, len(uniqueTenants))
   211  	for t := range uniqueTenants {
   212  		tenants = append(tenants, t)
   213  	}
   214  	return tenants
   215  }
   216  
   217  func (i *Index) GetTenantStats(tx *bbolt.Tx, tenant string) *metastorev1.TenantStats {
   218  	stats := &metastorev1.TenantStats{
   219  		DataIngested:      false,
   220  		OldestProfileTime: math.MaxInt64,
   221  		NewestProfileTime: math.MinInt64,
   222  	}
   223  	for p := range i.store.Partitions(tx) {
   224  		q := p.Query(tx)
   225  		if q == nil {
   226  			// Partition not found.
   227  			continue
   228  		}
   229  		for shard := range q.Shards(tenant) {
   230  			stats.DataIngested = true
   231  			oldest := shard.ShardIndex.MinTime
   232  			newest := shard.ShardIndex.MaxTime
   233  			if oldest < stats.OldestProfileTime {
   234  				stats.OldestProfileTime = oldest
   235  			}
   236  			if newest > stats.NewestProfileTime {
   237  				stats.NewestProfileTime = newest
   238  			}
   239  		}
   240  	}
   241  	if !stats.DataIngested {
   242  		return new(metastorev1.TenantStats)
   243  	}
   244  	return stats
   245  }
   246  
   247  func (i *Index) QueryMetadata(tx *bbolt.Tx, ctx context.Context, query MetadataQuery) ([]*metastorev1.BlockMeta, error) {
   248  	q, err := newMetadataQuery(i, query)
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  	r, err := newBlockMetadataQuerier(tx, q).queryBlocks(ctx)
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  	return r, nil
   257  }
   258  
   259  func (i *Index) QueryMetadataLabels(tx *bbolt.Tx, ctx context.Context, query MetadataQuery) ([]*typesv1.Labels, error) {
   260  	q, err := newMetadataQuery(i, query)
   261  	if err != nil {
   262  		return nil, err
   263  	}
   264  	c, err := newMetadataLabelQuerier(tx, q).queryLabels(ctx)
   265  	if err != nil {
   266  		return nil, err
   267  	}
   268  	l := slices.Collect(c.Unique())
   269  	slices.SortFunc(l, model.CompareLabels)
   270  	return l, nil
   271  }
   272  
   273  func (i *Index) partitionedList(list *metastorev1.BlockList) map[indexstore.Partition]*metastorev1.BlockList {
   274  	partitions := make(map[indexstore.Partition]*metastorev1.BlockList)
   275  	for _, b := range list.Blocks {
   276  		k := i.partitionKeyForBlock(b)
   277  		v := partitions[k]
   278  		if v == nil {
   279  			v = &metastorev1.BlockList{
   280  				Shard:  list.Shard,
   281  				Tenant: list.Tenant,
   282  				Blocks: make([]string, 0, len(list.Blocks)),
   283  			}
   284  			partitions[k] = v
   285  		}
   286  		v.Blocks = append(v.Blocks, b)
   287  	}
   288  	return partitions
   289  }
   290  
   291  func (i *Index) partitionKeyForBlock(b string) indexstore.Partition {
   292  	return indexstore.NewPartition(ulid.Time(ulid.MustParse(b).Time()), i.config.partitionDuration)
   293  }