github.com/grafana/pyroscope@v1.18.0/pkg/storegateway/bucket_stores.go (about) 1 package storegateway 2 3 import ( 4 "context" 5 "flag" 6 "os" 7 "path/filepath" 8 "sync" 9 "time" 10 11 "github.com/go-kit/log" 12 "github.com/go-kit/log/level" 13 "github.com/grafana/dskit/backoff" 14 "github.com/grafana/dskit/multierror" 15 "github.com/pkg/errors" 16 "github.com/prometheus/client_golang/prometheus" 17 "github.com/prometheus/client_golang/prometheus/promauto" 18 19 phlareobj "github.com/grafana/pyroscope/pkg/objstore" 20 "github.com/grafana/pyroscope/pkg/phlaredb/block" 21 "github.com/grafana/pyroscope/pkg/phlaredb/bucket" 22 "github.com/grafana/pyroscope/pkg/util" 23 ) 24 25 var errBucketStoreNotFound = errors.New("bucket store not found") 26 27 type BucketStoreConfig struct { 28 SyncDir string `yaml:"sync_dir"` 29 SyncInterval time.Duration `yaml:"sync_interval" category:"advanced"` 30 TenantSyncConcurrency int `yaml:"tenant_sync_concurrency" category:"advanced"` 31 IgnoreBlocksWithin time.Duration `yaml:"ignore_blocks_within" category:"advanced"` 32 MetaSyncConcurrency int `yaml:"meta_sync_concurrency" category:"advanced"` 33 IgnoreDeletionMarksDelay time.Duration `yaml:"ignore_deletion_mark_delay" category:"advanced"` 34 } 35 36 // RegisterFlags registers the BucketStore flags 37 func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) { 38 // cfg.IndexCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.index-cache.") 39 // cfg.ChunksCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.chunks-cache.", logger) 40 // cfg.MetadataCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.metadata-cache.") 41 // cfg.BucketIndex.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.bucket-index.") 42 // cfg.IndexHeader.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.index-header.") 43 44 f.StringVar(&cfg.SyncDir, "blocks-storage.bucket-store.sync-dir", "./data/pyroscope-sync/", "Directory to store synchronized pyroscope block headers. This directory is not required to be persisted between restarts, but it's highly recommended in order to improve the store-gateway startup time.") 45 f.DurationVar(&cfg.SyncInterval, "blocks-storage.bucket-store.sync-interval", 15*time.Minute, "How frequently to scan the bucket, or to refresh the bucket index (if enabled), in order to look for changes (new blocks shipped by ingesters and blocks deleted by retention or compaction).") 46 f.IntVar(&cfg.TenantSyncConcurrency, "blocks-storage.bucket-store.tenant-sync-concurrency", 10, "Maximum number of concurrent tenants synching blocks.") 47 f.DurationVar(&cfg.IgnoreBlocksWithin, "blocks-storage.bucket-store.ignore-blocks-within", 3*time.Hour, "Blocks with minimum time within this duration are ignored, and not loaded by store-gateway. Useful when used together with -querier.query-store-after to prevent loading young blocks, because there are usually many of them (depending on number of ingesters) and they are not yet compacted. Negative values or 0 disable the filter.") 48 49 // f.Uint64Var(&cfg.MaxChunkPoolBytes, "blocks-storage.bucket-store.max-chunk-pool-bytes", uint64(2*units.Gibibyte), "Max size - in bytes - of a chunks pool, used to reduce memory allocations. The pool is shared across all tenants. 0 to disable the limit.") 50 // f.IntVar(&cfg.ChunkPoolMinBucketSizeBytes, "blocks-storage.bucket-store.chunk-pool-min-bucket-size-bytes", ChunkPoolDefaultMinBucketSize, "Size - in bytes - of the smallest chunks pool bucket.") 51 // f.IntVar(&cfg.ChunkPoolMaxBucketSizeBytes, "blocks-storage.bucket-store.chunk-pool-max-bucket-size-bytes", ChunkPoolDefaultMaxBucketSize, "Size - in bytes - of the largest chunks pool bucket.") 52 // f.Uint64Var(&cfg.SeriesHashCacheMaxBytes, "blocks-storage.bucket-store.series-hash-cache-max-size-bytes", uint64(1*units.Gibibyte), "Max size - in bytes - of the in-memory series hash cache. The cache is shared across all tenants and it's used only when query sharding is enabled.") 53 // f.IntVar(&cfg.MaxConcurrent, "blocks-storage.bucket-store.max-concurrent", 100, "Max number of concurrent queries to execute against the long-term storage. The limit is shared across all tenants.") 54 // f.IntVar(&cfg.BlockSyncConcurrency, "blocks-storage.bucket-store.block-sync-concurrency", 20, "Maximum number of concurrent blocks synching per tenant.") 55 f.IntVar(&cfg.MetaSyncConcurrency, "blocks-storage.bucket-store.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from object storage per tenant.") 56 // f.DurationVar(&cfg.DeprecatedConsistencyDelay, consistencyDelayFlag, 0, "Minimum age of a block before it's being read. Set it to safe value (e.g 30m) if your object storage is eventually consistent. GCS and S3 are (roughly) strongly consistent.") 57 f.DurationVar(&cfg.IgnoreDeletionMarksDelay, "blocks-storage.bucket-store.ignore-deletion-marks-delay", 30*time.Minute, "Duration after which the blocks marked for deletion will be filtered out while fetching blocks. "+ 58 "The idea of ignore-deletion-marks-delay is to ignore blocks that are marked for deletion with some delay. This ensures store can still serve blocks that are meant to be deleted but do not have a replacement yet.") 59 // f.IntVar(&cfg.PostingOffsetsInMemSampling, "blocks-storage.bucket-store.posting-offsets-in-mem-sampling", DefaultPostingOffsetInMemorySampling, "Controls what is the ratio of postings offsets that the store will hold in memory.") 60 // f.BoolVar(&cfg.IndexHeaderLazyLoadingEnabled, "blocks-storage.bucket-store.index-header-lazy-loading-enabled", true, "If enabled, store-gateway will lazy load an index-header only once required by a query.") 61 // f.DurationVar(&cfg.IndexHeaderLazyLoadingIdleTimeout, "blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout", 60*time.Minute, "If index-header lazy loading is enabled and this setting is > 0, the store-gateway will offload unused index-headers after 'idle timeout' inactivity.") 62 // f.Uint64Var(&cfg.PartitionerMaxGapBytes, "blocks-storage.bucket-store.partitioner-max-gap-bytes", DefaultPartitionerMaxGapSize, "Max size - in bytes - of a gap for which the partitioner aggregates together two bucket GET object requests.") 63 // f.IntVar(&cfg.StreamingBatchSize, "blocks-storage.bucket-store.batch-series-size", 5000, "This option controls how many series to fetch per batch. The batch size must be greater than 0.") 64 // f.IntVar(&cfg.ChunkRangesPerSeries, "blocks-storage.bucket-store.fine-grained-chunks-caching-ranges-per-series", 1, "This option controls into how many ranges the chunks of each series from each block are split. This value is effectively the number of chunks cache items per series per block when -blocks-storage.bucket-store.chunks-cache.fine-grained-chunks-caching-enabled is enabled.") 65 // f.StringVar(&cfg.SeriesSelectionStrategyName, "blocks-storage.bucket-store.series-selection-strategy", AllPostingsStrategy, "This option controls the strategy to selection of series and deferring application of matchers. A more aggressive strategy will fetch less posting lists at the cost of more series. This is useful when querying large blocks in which many series share the same label name and value. Supported values (most aggressive to least aggressive): "+strings.Join(validSeriesSelectionStrategies, ", ")+".") 66 } 67 68 // Validate the config. 69 func (cfg *BucketStoreConfig) Validate(logger log.Logger) error { 70 // if cfg.StreamingBatchSize <= 0 { 71 // return errInvalidStreamingBatchSize 72 // } 73 // if err := cfg.IndexCache.Validate(); err != nil { 74 // return errors.Wrap(err, "index-cache configuration") 75 // } 76 // if err := cfg.ChunksCache.Validate(); err != nil { 77 // return errors.Wrap(err, "chunks-cache configuration") 78 // } 79 // if err := cfg.MetadataCache.Validate(); err != nil { 80 // return errors.Wrap(err, "metadata-cache configuration") 81 // } 82 // if cfg.DeprecatedConsistencyDelay > 0 { 83 // util.WarnDeprecatedConfig(consistencyDelayFlag, logger) 84 // } 85 // if !util.StringsContain(validSeriesSelectionStrategies, cfg.SeriesSelectionStrategyName) { 86 // return errors.New("invalid series-selection-strategy, set one of " + strings.Join(validSeriesSelectionStrategies, ", ")) 87 // } 88 return nil 89 } 90 91 type BucketIndexConfig struct { 92 UpdateOnErrorInterval time.Duration `yaml:"update_on_error_interval" category:"advanced"` 93 IdleTimeout time.Duration `yaml:"idle_timeout" category:"advanced"` 94 MaxStalePeriod time.Duration `yaml:"max_stale_period" category:"advanced"` 95 } 96 97 func (cfg *BucketIndexConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) { 98 f.DurationVar(&cfg.UpdateOnErrorInterval, prefix+"update-on-error-interval", time.Minute, "How frequently a bucket index, which previously failed to load, should be tried to load again. This option is used only by querier.") 99 f.DurationVar(&cfg.IdleTimeout, prefix+"idle-timeout", time.Hour, "How long a unused bucket index should be cached. Once this timeout expires, the unused bucket index is removed from the in-memory cache. This option is used only by querier.") 100 f.DurationVar(&cfg.MaxStalePeriod, prefix+"max-stale-period", time.Hour, "The maximum allowed age of a bucket index (last updated) before queries start failing because the bucket index is too old. The bucket index is periodically updated by the compactor, and this check is enforced in the querier (at query time).") 101 } 102 103 type BucketStores struct { 104 storageBucket phlareobj.Bucket 105 cfg BucketStoreConfig 106 logger log.Logger 107 syncBackoffConfig backoff.Config 108 shardingStrategy ShardingStrategy 109 limits Limits 110 reg prometheus.Registerer 111 // Keeps a bucket store for each tenant. 112 storesMu sync.RWMutex 113 stores map[string]*BucketStore 114 115 // Metrics. 116 syncTimes prometheus.Histogram 117 syncLastSuccess prometheus.Gauge 118 tenantsDiscovered prometheus.Gauge 119 tenantsSynced prometheus.Gauge 120 blocksLoaded prometheus.GaugeFunc 121 } 122 123 func NewBucketStores(cfg BucketStoreConfig, shardingStrategy ShardingStrategy, storageBucket phlareobj.Bucket, limits Limits, logger log.Logger, reg prometheus.Registerer) (*BucketStores, error) { 124 bs := &BucketStores{ 125 storageBucket: storageBucket, 126 logger: logger, 127 cfg: cfg, 128 syncBackoffConfig: backoff.Config{ 129 MinBackoff: 1 * time.Second, 130 MaxBackoff: 10 * time.Second, 131 MaxRetries: 3, 132 }, 133 stores: map[string]*BucketStore{}, 134 shardingStrategy: shardingStrategy, 135 reg: reg, 136 limits: limits, 137 } 138 // Register metrics. 139 bs.syncTimes = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ 140 Name: "pyroscope_bucket_stores_blocks_sync_seconds", 141 Help: "The total time it takes to perform a sync stores", 142 Buckets: []float64{0.1, 1, 10, 30, 60, 120, 300, 600, 900}, 143 }) 144 bs.syncLastSuccess = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 145 Name: "pyroscope_bucket_stores_blocks_last_successful_sync_timestamp_seconds", 146 Help: "Unix timestamp of the last successful blocks sync.", 147 }) 148 bs.tenantsDiscovered = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 149 Name: "pyroscope_bucket_stores_tenants_discovered", 150 Help: "Number of tenants discovered in the bucket.", 151 }) 152 bs.tenantsSynced = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 153 Name: "pyroscope_bucket_stores_tenants_synced", 154 Help: "Number of tenants synced.", 155 }) 156 bs.blocksLoaded = promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ 157 Name: "pyroscope_bucket_store_blocks_loaded", 158 Help: "Number of currently loaded blocks.", 159 }, bs.getBlocksLoadedMetric) 160 return bs, nil 161 } 162 163 // SyncBlocks synchronizes the stores state with the Bucket store for every user. 164 func (bs *BucketStores) SyncBlocks(ctx context.Context) error { 165 return bs.syncUsersBlocksWithRetries(ctx, func(ctx context.Context, s *BucketStore) error { 166 return s.SyncBlocks(ctx) 167 }) 168 } 169 170 func (bs *BucketStores) InitialSync(ctx context.Context) error { 171 level.Info(bs.logger).Log("msg", "synchronizing Pyroscope blocks for all users") 172 173 if err := bs.syncUsersBlocksWithRetries(ctx, func(ctx context.Context, s *BucketStore) error { 174 return s.InitialSync(ctx) 175 }); err != nil { 176 level.Warn(bs.logger).Log("msg", "failed to synchronize Pyroscope blocks", "err", err) 177 return err 178 } 179 180 level.Info(bs.logger).Log("msg", "successfully synchronized Pyroscope blocks for all users") 181 return nil 182 } 183 184 func (bs *BucketStores) syncUsersBlocksWithRetries(ctx context.Context, f func(context.Context, *BucketStore) error) error { 185 retries := backoff.New(ctx, bs.syncBackoffConfig) 186 187 var lastErr error 188 for retries.Ongoing() { 189 lastErr = bs.syncUsersBlocks(ctx, f) 190 if lastErr == nil { 191 return nil 192 } 193 194 retries.Wait() 195 } 196 197 if lastErr == nil { 198 return retries.Err() 199 } 200 201 return lastErr 202 } 203 204 func (bs *BucketStores) syncUsersBlocks(ctx context.Context, f func(context.Context, *BucketStore) error) (returnErr error) { 205 defer func(start time.Time) { 206 bs.syncTimes.Observe(time.Since(start).Seconds()) 207 if returnErr == nil { 208 bs.syncLastSuccess.SetToCurrentTime() 209 } 210 }(time.Now()) 211 212 type job struct { 213 userID string 214 store *BucketStore 215 } 216 217 wg := &sync.WaitGroup{} 218 jobs := make(chan job) 219 errs := multierror.New() 220 errsMx := sync.Mutex{} 221 222 // Scan users in the bucket. In case of error, it may return a subset of users. If we sync a subset of users 223 // during a periodic sync, we may end up unloading blocks for users that still belong to this store-gateway 224 // so we do prefer to not run the sync at all. 225 userIDs, err := bs.scanUsers(ctx) 226 if err != nil { 227 return err 228 } 229 230 ownedUserIDs, err := bs.shardingStrategy.FilterUsers(ctx, userIDs) 231 if err != nil { 232 return errors.Wrap(err, "unable to check tenants owned by this store-gateway instance") 233 } 234 235 includeUserIDs := make(map[string]struct{}, len(ownedUserIDs)) 236 for _, userID := range ownedUserIDs { 237 includeUserIDs[userID] = struct{}{} 238 } 239 240 bs.tenantsDiscovered.Set(float64(len(userIDs))) 241 bs.tenantsSynced.Set(float64(len(includeUserIDs))) 242 243 // Create a pool of workers which will synchronize blocks. The pool size 244 // is limited in order to avoid to concurrently sync a lot of tenants in 245 // a large cluster. 246 for i := 0; i < bs.cfg.TenantSyncConcurrency; i++ { 247 wg.Add(1) 248 go func() { 249 defer wg.Done() 250 251 for job := range jobs { 252 if err := f(ctx, job.store); err != nil { 253 errsMx.Lock() 254 errs.Add(errors.Wrapf(err, "failed to synchronize Pyroscope blocks for user %s", job.userID)) 255 errsMx.Unlock() 256 } 257 } 258 }() 259 } 260 261 // Lazily create a bucket store for each new user found 262 // and submit a sync job for each user. 263 for userID := range includeUserIDs { 264 bs, err := bs.getOrCreateStore(userID) 265 if err != nil { 266 errsMx.Lock() 267 errs.Add(err) 268 errsMx.Unlock() 269 270 continue 271 } 272 273 select { 274 case jobs <- job{userID: userID, store: bs}: 275 // Nothing to do. Will loop to push more jobs. 276 case <-ctx.Done(): 277 // Wait until all workers have done, so the goroutines leak detector doesn't 278 // report any issue. This is expected to be quick, considering the done ctx 279 // is used by the worker callback function too. 280 close(jobs) 281 wg.Wait() 282 283 return ctx.Err() 284 } 285 } 286 287 // Wait until all workers completed. 288 close(jobs) 289 wg.Wait() 290 291 bs.closeBucketStoreAndDeleteLocalFilesForExcludedTenants(includeUserIDs) 292 293 return errs.Err() 294 } 295 296 func (bs *BucketStores) getStore(userID string) *BucketStore { 297 bs.storesMu.RLock() 298 defer bs.storesMu.RUnlock() 299 return bs.stores[userID] 300 } 301 302 func (bs *BucketStores) getOrCreateStore(userID string) (*BucketStore, error) { 303 // Check if the store already exists. 304 s := bs.getStore(userID) 305 if s != nil { 306 return s, nil 307 } 308 309 bs.storesMu.Lock() 310 defer bs.storesMu.Unlock() 311 312 // Check again for the store in the event it was created in-between locks. 313 s = bs.stores[userID] 314 if s != nil { 315 return s, nil 316 } 317 318 userLogger := util.LoggerWithUserID(userID, bs.logger) 319 320 level.Info(userLogger).Log("msg", "creating user bucket store") 321 322 // The sharding strategy filter MUST be before the ones we create here (order matters). 323 filters := []block.MetadataFilter{ 324 NewShardingMetadataFilterAdapter(userID, bs.shardingStrategy), 325 newMinTimeMetaFilter(bs.cfg.IgnoreBlocksWithin), 326 NewIgnoreDeletionMarkFilter(userLogger, bs.storageBucket, bs.cfg.IgnoreDeletionMarksDelay, bs.cfg.MetaSyncConcurrency), 327 } 328 fetcherReg := prometheus.NewRegistry() 329 330 fetcher := NewBucketIndexMetadataFetcher( 331 userID, 332 bs.storageBucket, 333 bs.limits, 334 bs.logger, 335 fetcherReg, 336 filters, 337 ) 338 339 s, err := NewBucketStore( 340 bs.storageBucket, 341 fetcher, 342 userID, 343 bs.syncDirForUser(userID), 344 userLogger, 345 bs.reg, 346 ) 347 if err != nil { 348 return nil, err 349 } 350 351 bs.stores[userID] = s 352 353 return s, nil 354 } 355 356 // closeBucketStoreAndDeleteLocalFilesForExcludedTenants closes bucket store and removes local "sync" directories 357 // for tenants that are not included in the current shard. 358 func (bs *BucketStores) closeBucketStoreAndDeleteLocalFilesForExcludedTenants(includeUserIDs map[string]struct{}) { 359 files, err := os.ReadDir(bs.cfg.SyncDir) 360 if err != nil { 361 return 362 } 363 364 for _, f := range files { 365 if !f.IsDir() { 366 continue 367 } 368 369 userID := f.Name() 370 if _, included := includeUserIDs[userID]; included { 371 // Preserve directory for users owned by this shard. 372 continue 373 } 374 375 err := bs.closeBucketStore(userID) 376 switch { 377 case errors.Is(err, errBucketStoreNotFound): 378 // This is OK, nothing was closed. 379 case err == nil: 380 level.Info(bs.logger).Log("msg", "closed bucket store for user", "tenant", userID) 381 default: 382 level.Warn(bs.logger).Log("msg", "failed to close bucket store for user", "tenant", userID, "err", err) 383 } 384 385 userSyncDir := bs.syncDirForUser(userID) 386 err = os.RemoveAll(userSyncDir) 387 if err == nil { 388 level.Info(bs.logger).Log("msg", "deleted user sync directory", "dir", userSyncDir) 389 } else { 390 level.Warn(bs.logger).Log("msg", "failed to delete user sync directory", "dir", userSyncDir, "err", err) 391 } 392 } 393 } 394 395 func (u *BucketStores) syncDirForUser(userID string) string { 396 return filepath.Join(u.cfg.SyncDir, userID) 397 } 398 399 // closeBucketStore closes bucket store for given user 400 // and removes it from bucket stores map and metrics. 401 // If bucket store doesn't exist, returns errBucketStoreNotFound. 402 // Otherwise returns error from closing the bucket store. 403 func (bs *BucketStores) closeBucketStore(userID string) error { 404 bs.storesMu.Lock() 405 unlockInDefer := true 406 defer func() { 407 if unlockInDefer { 408 bs.storesMu.Unlock() 409 } 410 }() 411 412 s := bs.stores[userID] 413 if s == nil { 414 return errBucketStoreNotFound 415 } 416 417 delete(bs.stores, userID) 418 s.metrics.Unregister() 419 unlockInDefer = false 420 bs.storesMu.Unlock() 421 422 return s.RemoveBlocksAndClose() 423 } 424 425 // getBlocksLoadedMetric returns the number of blocks currently loaded across all bucket stores. 426 func (u *BucketStores) getBlocksLoadedMetric() float64 { 427 count := 0 428 429 u.storesMu.RLock() 430 for _, store := range u.stores { 431 count += store.Stats().BlocksLoaded 432 } 433 u.storesMu.RUnlock() 434 435 return float64(count) 436 } 437 438 func (bs *BucketStores) scanUsers(ctx context.Context) ([]string, error) { 439 return bucket.ListUsers(ctx, bs.storageBucket) 440 }