github.com/grafana/pyroscope@v1.18.0/pkg/metastore/index/index.go (about) 1 package index 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "iter" 8 "math" 9 "slices" 10 "time" 11 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/oklog/ulid/v2" 15 "go.etcd.io/bbolt" 16 17 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 18 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 19 "github.com/grafana/pyroscope/pkg/block/metadata" 20 "github.com/grafana/pyroscope/pkg/metastore/index/cleaner" 21 "github.com/grafana/pyroscope/pkg/metastore/index/dlq" 22 indexstore "github.com/grafana/pyroscope/pkg/metastore/index/store" 23 "github.com/grafana/pyroscope/pkg/model" 24 ) 25 26 var ErrBlockExists = fmt.Errorf("block already exists") 27 28 type Config struct { 29 ShardCacheSize int `yaml:"shard_cache_size"` 30 BlockWriteCacheSize int `yaml:"block_write_cache_size"` 31 BlockReadCacheSize int `yaml:"block_read_cache_size"` 32 33 Cleaner cleaner.Config `yaml:",inline"` 34 Recovery dlq.Config `yaml:",inline"` 35 36 partitionDuration time.Duration 37 queryLookaroundPeriod time.Duration 38 } 39 40 var DefaultConfig = Config{ 41 ShardCacheSize: 2000, // 128KB * 2000 = 256MB 42 BlockReadCacheSize: 100000, // 8KB blocks = 800MB 43 BlockWriteCacheSize: 10000, 44 45 // FIXME(kolesnikovae): Do not modify, it will break the index. 46 // 47 // This parameter is not supported; used only for testing. 48 // Partition key MUST be an input parameter. 49 partitionDuration: 6 * time.Hour, 50 51 // FIXME(kolesnikovae): Remove: build an interval tree. 52 // 53 // Currently, we do not use information about the time range of data each 54 // partition refers to. For example, it's possible – though very unlikely 55 // – for data from the past hour to be stored in a partition created a day 56 // ago. We need to be cautious: when querying, we must identify all 57 // partitions that may include the query time range. To ensure we catch 58 // such "misplaced" data, we extend the query time range using this period. 59 queryLookaroundPeriod: 24 * time.Hour, 60 } 61 62 func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { 63 cfg.Recovery.RegisterFlagsWithPrefix(prefix, f) 64 cfg.Cleaner.RegisterFlagsWithPrefix(prefix, f) 65 f.IntVar(&cfg.ShardCacheSize, prefix+"shard-cache-size", DefaultConfig.ShardCacheSize, "Maximum number of shards to keep in memory") 66 f.IntVar(&cfg.BlockWriteCacheSize, prefix+"block-write-cache-size", DefaultConfig.BlockWriteCacheSize, "Maximum number of written blocks to keep in memory") 67 f.IntVar(&cfg.BlockReadCacheSize, prefix+"block-read-cache-size", DefaultConfig.BlockReadCacheSize, "Maximum number of read blocks to keep in memory") 68 cfg.partitionDuration = DefaultConfig.partitionDuration 69 cfg.queryLookaroundPeriod = DefaultConfig.queryLookaroundPeriod 70 } 71 72 type Store interface { 73 CreateBuckets(*bbolt.Tx) error 74 Partitions(tx *bbolt.Tx) iter.Seq[indexstore.Partition] 75 LoadShard(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32) (*indexstore.Shard, error) 76 DeleteShard(tx *bbolt.Tx, p indexstore.Partition, tenant string, shard uint32) error 77 } 78 79 type Index struct { 80 logger log.Logger 81 config Config 82 store Store 83 shards *shardCache 84 blocks *blockCache 85 } 86 87 func NewIndex(logger log.Logger, s Store, cfg Config) *Index { 88 return &Index{ 89 logger: logger, 90 config: cfg, 91 store: s, 92 shards: newShardCache(cfg.ShardCacheSize, s), 93 blocks: newBlockCache(cfg.BlockReadCacheSize, cfg.BlockWriteCacheSize), 94 } 95 } 96 97 func NewStore() *indexstore.IndexStore { return indexstore.NewIndexStore() } 98 99 func (i *Index) Init(tx *bbolt.Tx) error { return i.store.CreateBuckets(tx) } 100 101 func (i *Index) Restore(tx *bbolt.Tx) error { 102 // See comment in DefaultConfig.queryLookaroundPeriod. 103 now := time.Now() 104 start := now.Add(-i.config.queryLookaroundPeriod) 105 end := now.Add(i.config.queryLookaroundPeriod) 106 for p := range i.store.Partitions(tx) { 107 if !p.Overlaps(start, end) { 108 continue 109 } 110 level.Info(i.logger).Log("msg", "loading partition in memory") 111 q := p.Query(tx) 112 if q == nil { 113 continue 114 } 115 for tenant := range q.Tenants() { 116 for shard := range q.Shards(tenant) { 117 if _, err := i.shards.getForWrite(tx, p, tenant, shard.Shard); err != nil { 118 level.Error(i.logger).Log( 119 "msg", "failed to load tenant partition shard", 120 "partition", p, 121 "tenant", tenant, 122 "shard", shard, 123 "err", err, 124 ) 125 return err 126 } 127 } 128 } 129 } 130 return nil 131 } 132 133 func (i *Index) InsertBlock(tx *bbolt.Tx, b *metastorev1.BlockMeta) error { 134 p := i.partitionKeyForBlock(b.Id) 135 return i.shards.update(tx, p, metadata.Tenant(b), b.Shard, func(s *indexstore.Shard) error { 136 if err := s.Store(tx, b); err != nil { 137 return err 138 } 139 i.blocks.put(s, b) 140 return nil 141 }) 142 } 143 144 func (i *Index) ReplaceBlocks(tx *bbolt.Tx, compacted *metastorev1.CompactedBlocks) error { 145 for _, b := range compacted.NewBlocks { 146 if err := i.InsertBlock(tx, b); err != nil { 147 return err 148 } 149 } 150 for p, list := range i.partitionedList(compacted.SourceBlocks) { 151 err := i.shards.update(tx, p, list.Tenant, list.Shard, func(s *indexstore.Shard) error { 152 if err := s.Delete(tx, list.Blocks...); err != nil { 153 return err 154 } 155 for _, b := range list.Blocks { 156 i.blocks.delete(s, b) 157 } 158 return nil 159 }) 160 if err != nil { 161 return err 162 } 163 } 164 return nil 165 } 166 167 func (i *Index) GetBlocks(tx *bbolt.Tx, list *metastorev1.BlockList) ([]*metastorev1.BlockMeta, error) { 168 metas := make([]*metastorev1.BlockMeta, 0, len(list.Blocks)) 169 for k, partitioned := range i.partitionedList(list) { 170 s, err := i.shards.getForRead(tx, k, partitioned.Tenant, partitioned.Shard) 171 if err != nil { 172 return nil, err 173 } 174 for _, kv := range s.Find(tx, partitioned.Blocks...) { 175 b := i.blocks.getOrCreate(s, kv).CloneVT() 176 s.StringTable.Export(b) 177 metas = append(metas, b) 178 } 179 } 180 return metas, nil 181 } 182 183 func (i *Index) Partitions(tx *bbolt.Tx) iter.Seq[indexstore.Partition] { 184 return i.store.Partitions(tx) 185 } 186 187 func (i *Index) DeleteShard(tx *bbolt.Tx, key indexstore.Partition, tenant string, shard uint32) error { 188 if err := i.store.DeleteShard(tx, key, tenant, shard); err != nil { 189 return err 190 } 191 i.shards.delete(key, tenant, shard) 192 return nil 193 } 194 195 func (i *Index) GetTenants(tx *bbolt.Tx) []string { 196 uniqueTenants := make(map[string]struct{}) 197 for p := range i.store.Partitions(tx) { 198 q := p.Query(tx) 199 if q == nil { 200 // Partition not found. 201 continue 202 } 203 for t := range q.Tenants() { 204 if t == "" { 205 continue 206 } 207 uniqueTenants[t] = struct{}{} 208 } 209 } 210 tenants := make([]string, 0, len(uniqueTenants)) 211 for t := range uniqueTenants { 212 tenants = append(tenants, t) 213 } 214 return tenants 215 } 216 217 func (i *Index) GetTenantStats(tx *bbolt.Tx, tenant string) *metastorev1.TenantStats { 218 stats := &metastorev1.TenantStats{ 219 DataIngested: false, 220 OldestProfileTime: math.MaxInt64, 221 NewestProfileTime: math.MinInt64, 222 } 223 for p := range i.store.Partitions(tx) { 224 q := p.Query(tx) 225 if q == nil { 226 // Partition not found. 227 continue 228 } 229 for shard := range q.Shards(tenant) { 230 stats.DataIngested = true 231 oldest := shard.ShardIndex.MinTime 232 newest := shard.ShardIndex.MaxTime 233 if oldest < stats.OldestProfileTime { 234 stats.OldestProfileTime = oldest 235 } 236 if newest > stats.NewestProfileTime { 237 stats.NewestProfileTime = newest 238 } 239 } 240 } 241 if !stats.DataIngested { 242 return new(metastorev1.TenantStats) 243 } 244 return stats 245 } 246 247 func (i *Index) QueryMetadata(tx *bbolt.Tx, ctx context.Context, query MetadataQuery) ([]*metastorev1.BlockMeta, error) { 248 q, err := newMetadataQuery(i, query) 249 if err != nil { 250 return nil, err 251 } 252 r, err := newBlockMetadataQuerier(tx, q).queryBlocks(ctx) 253 if err != nil { 254 return nil, err 255 } 256 return r, nil 257 } 258 259 func (i *Index) QueryMetadataLabels(tx *bbolt.Tx, ctx context.Context, query MetadataQuery) ([]*typesv1.Labels, error) { 260 q, err := newMetadataQuery(i, query) 261 if err != nil { 262 return nil, err 263 } 264 c, err := newMetadataLabelQuerier(tx, q).queryLabels(ctx) 265 if err != nil { 266 return nil, err 267 } 268 l := slices.Collect(c.Unique()) 269 slices.SortFunc(l, model.CompareLabels) 270 return l, nil 271 } 272 273 func (i *Index) partitionedList(list *metastorev1.BlockList) map[indexstore.Partition]*metastorev1.BlockList { 274 partitions := make(map[indexstore.Partition]*metastorev1.BlockList) 275 for _, b := range list.Blocks { 276 k := i.partitionKeyForBlock(b) 277 v := partitions[k] 278 if v == nil { 279 v = &metastorev1.BlockList{ 280 Shard: list.Shard, 281 Tenant: list.Tenant, 282 Blocks: make([]string, 0, len(list.Blocks)), 283 } 284 partitions[k] = v 285 } 286 v.Blocks = append(v.Blocks, b) 287 } 288 return partitions 289 } 290 291 func (i *Index) partitionKeyForBlock(b string) indexstore.Partition { 292 return indexstore.NewPartition(ulid.Time(ulid.MustParse(b).Time()), i.config.partitionDuration) 293 }