github.com/grafana/pyroscope@v1.18.0/pkg/storegateway/bucket_stores.go (about)

     1  package storegateway
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/go-kit/log"
    12  	"github.com/go-kit/log/level"
    13  	"github.com/grafana/dskit/backoff"
    14  	"github.com/grafana/dskit/multierror"
    15  	"github.com/pkg/errors"
    16  	"github.com/prometheus/client_golang/prometheus"
    17  	"github.com/prometheus/client_golang/prometheus/promauto"
    18  
    19  	phlareobj "github.com/grafana/pyroscope/pkg/objstore"
    20  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    21  	"github.com/grafana/pyroscope/pkg/phlaredb/bucket"
    22  	"github.com/grafana/pyroscope/pkg/util"
    23  )
    24  
    25  var errBucketStoreNotFound = errors.New("bucket store not found")
    26  
    27  type BucketStoreConfig struct {
    28  	SyncDir                  string        `yaml:"sync_dir"`
    29  	SyncInterval             time.Duration `yaml:"sync_interval" category:"advanced"`
    30  	TenantSyncConcurrency    int           `yaml:"tenant_sync_concurrency" category:"advanced"`
    31  	IgnoreBlocksWithin       time.Duration `yaml:"ignore_blocks_within" category:"advanced"`
    32  	MetaSyncConcurrency      int           `yaml:"meta_sync_concurrency" category:"advanced"`
    33  	IgnoreDeletionMarksDelay time.Duration `yaml:"ignore_deletion_mark_delay" category:"advanced"`
    34  }
    35  
    36  // RegisterFlags registers the BucketStore flags
    37  func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
    38  	// cfg.IndexCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.index-cache.")
    39  	// cfg.ChunksCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.chunks-cache.", logger)
    40  	// cfg.MetadataCache.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.metadata-cache.")
    41  	// cfg.BucketIndex.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.bucket-index.")
    42  	// cfg.IndexHeader.RegisterFlagsWithPrefix(f, "blocks-storage.bucket-store.index-header.")
    43  
    44  	f.StringVar(&cfg.SyncDir, "blocks-storage.bucket-store.sync-dir", "./data/pyroscope-sync/", "Directory to store synchronized pyroscope block headers. This directory is not required to be persisted between restarts, but it's highly recommended in order to improve the store-gateway startup time.")
    45  	f.DurationVar(&cfg.SyncInterval, "blocks-storage.bucket-store.sync-interval", 15*time.Minute, "How frequently to scan the bucket, or to refresh the bucket index (if enabled), in order to look for changes (new blocks shipped by ingesters and blocks deleted by retention or compaction).")
    46  	f.IntVar(&cfg.TenantSyncConcurrency, "blocks-storage.bucket-store.tenant-sync-concurrency", 10, "Maximum number of concurrent tenants synching blocks.")
    47  	f.DurationVar(&cfg.IgnoreBlocksWithin, "blocks-storage.bucket-store.ignore-blocks-within", 3*time.Hour, "Blocks with minimum time within this duration are ignored, and not loaded by store-gateway. Useful when used together with -querier.query-store-after to prevent loading young blocks, because there are usually many of them (depending on number of ingesters) and they are not yet compacted. Negative values or 0 disable the filter.")
    48  
    49  	// f.Uint64Var(&cfg.MaxChunkPoolBytes, "blocks-storage.bucket-store.max-chunk-pool-bytes", uint64(2*units.Gibibyte), "Max size - in bytes - of a chunks pool, used to reduce memory allocations. The pool is shared across all tenants. 0 to disable the limit.")
    50  	// f.IntVar(&cfg.ChunkPoolMinBucketSizeBytes, "blocks-storage.bucket-store.chunk-pool-min-bucket-size-bytes", ChunkPoolDefaultMinBucketSize, "Size - in bytes - of the smallest chunks pool bucket.")
    51  	// f.IntVar(&cfg.ChunkPoolMaxBucketSizeBytes, "blocks-storage.bucket-store.chunk-pool-max-bucket-size-bytes", ChunkPoolDefaultMaxBucketSize, "Size - in bytes - of the largest chunks pool bucket.")
    52  	// f.Uint64Var(&cfg.SeriesHashCacheMaxBytes, "blocks-storage.bucket-store.series-hash-cache-max-size-bytes", uint64(1*units.Gibibyte), "Max size - in bytes - of the in-memory series hash cache. The cache is shared across all tenants and it's used only when query sharding is enabled.")
    53  	// f.IntVar(&cfg.MaxConcurrent, "blocks-storage.bucket-store.max-concurrent", 100, "Max number of concurrent queries to execute against the long-term storage. The limit is shared across all tenants.")
    54  	// f.IntVar(&cfg.BlockSyncConcurrency, "blocks-storage.bucket-store.block-sync-concurrency", 20, "Maximum number of concurrent blocks synching per tenant.")
    55  	f.IntVar(&cfg.MetaSyncConcurrency, "blocks-storage.bucket-store.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from object storage per tenant.")
    56  	// f.DurationVar(&cfg.DeprecatedConsistencyDelay, consistencyDelayFlag, 0, "Minimum age of a block before it's being read. Set it to safe value (e.g 30m) if your object storage is eventually consistent. GCS and S3 are (roughly) strongly consistent.")
    57  	f.DurationVar(&cfg.IgnoreDeletionMarksDelay, "blocks-storage.bucket-store.ignore-deletion-marks-delay", 30*time.Minute, "Duration after which the blocks marked for deletion will be filtered out while fetching blocks. "+
    58  		"The idea of ignore-deletion-marks-delay is to ignore blocks that are marked for deletion with some delay. This ensures store can still serve blocks that are meant to be deleted but do not have a replacement yet.")
    59  	// f.IntVar(&cfg.PostingOffsetsInMemSampling, "blocks-storage.bucket-store.posting-offsets-in-mem-sampling", DefaultPostingOffsetInMemorySampling, "Controls what is the ratio of postings offsets that the store will hold in memory.")
    60  	// f.BoolVar(&cfg.IndexHeaderLazyLoadingEnabled, "blocks-storage.bucket-store.index-header-lazy-loading-enabled", true, "If enabled, store-gateway will lazy load an index-header only once required by a query.")
    61  	// f.DurationVar(&cfg.IndexHeaderLazyLoadingIdleTimeout, "blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout", 60*time.Minute, "If index-header lazy loading is enabled and this setting is > 0, the store-gateway will offload unused index-headers after 'idle timeout' inactivity.")
    62  	// f.Uint64Var(&cfg.PartitionerMaxGapBytes, "blocks-storage.bucket-store.partitioner-max-gap-bytes", DefaultPartitionerMaxGapSize, "Max size - in bytes - of a gap for which the partitioner aggregates together two bucket GET object requests.")
    63  	// f.IntVar(&cfg.StreamingBatchSize, "blocks-storage.bucket-store.batch-series-size", 5000, "This option controls how many series to fetch per batch. The batch size must be greater than 0.")
    64  	// f.IntVar(&cfg.ChunkRangesPerSeries, "blocks-storage.bucket-store.fine-grained-chunks-caching-ranges-per-series", 1, "This option controls into how many ranges the chunks of each series from each block are split. This value is effectively the number of chunks cache items per series per block when -blocks-storage.bucket-store.chunks-cache.fine-grained-chunks-caching-enabled is enabled.")
    65  	// f.StringVar(&cfg.SeriesSelectionStrategyName, "blocks-storage.bucket-store.series-selection-strategy", AllPostingsStrategy, "This option controls the strategy to selection of series and deferring application of matchers. A more aggressive strategy will fetch less posting lists at the cost of more series. This is useful when querying large blocks in which many series share the same label name and value. Supported values (most aggressive to least aggressive): "+strings.Join(validSeriesSelectionStrategies, ", ")+".")
    66  }
    67  
    68  // Validate the config.
    69  func (cfg *BucketStoreConfig) Validate(logger log.Logger) error {
    70  	// if cfg.StreamingBatchSize <= 0 {
    71  	// 	return errInvalidStreamingBatchSize
    72  	// }
    73  	// if err := cfg.IndexCache.Validate(); err != nil {
    74  	// 	return errors.Wrap(err, "index-cache configuration")
    75  	// }
    76  	// if err := cfg.ChunksCache.Validate(); err != nil {
    77  	// 	return errors.Wrap(err, "chunks-cache configuration")
    78  	// }
    79  	// if err := cfg.MetadataCache.Validate(); err != nil {
    80  	// 	return errors.Wrap(err, "metadata-cache configuration")
    81  	// }
    82  	// if cfg.DeprecatedConsistencyDelay > 0 {
    83  	// 	util.WarnDeprecatedConfig(consistencyDelayFlag, logger)
    84  	// }
    85  	// if !util.StringsContain(validSeriesSelectionStrategies, cfg.SeriesSelectionStrategyName) {
    86  	// 	return errors.New("invalid series-selection-strategy, set one of " + strings.Join(validSeriesSelectionStrategies, ", "))
    87  	// }
    88  	return nil
    89  }
    90  
    91  type BucketIndexConfig struct {
    92  	UpdateOnErrorInterval time.Duration `yaml:"update_on_error_interval" category:"advanced"`
    93  	IdleTimeout           time.Duration `yaml:"idle_timeout" category:"advanced"`
    94  	MaxStalePeriod        time.Duration `yaml:"max_stale_period" category:"advanced"`
    95  }
    96  
    97  func (cfg *BucketIndexConfig) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) {
    98  	f.DurationVar(&cfg.UpdateOnErrorInterval, prefix+"update-on-error-interval", time.Minute, "How frequently a bucket index, which previously failed to load, should be tried to load again. This option is used only by querier.")
    99  	f.DurationVar(&cfg.IdleTimeout, prefix+"idle-timeout", time.Hour, "How long a unused bucket index should be cached. Once this timeout expires, the unused bucket index is removed from the in-memory cache. This option is used only by querier.")
   100  	f.DurationVar(&cfg.MaxStalePeriod, prefix+"max-stale-period", time.Hour, "The maximum allowed age of a bucket index (last updated) before queries start failing because the bucket index is too old. The bucket index is periodically updated by the compactor, and this check is enforced in the querier (at query time).")
   101  }
   102  
   103  type BucketStores struct {
   104  	storageBucket     phlareobj.Bucket
   105  	cfg               BucketStoreConfig
   106  	logger            log.Logger
   107  	syncBackoffConfig backoff.Config
   108  	shardingStrategy  ShardingStrategy
   109  	limits            Limits
   110  	reg               prometheus.Registerer
   111  	// Keeps a bucket store for each tenant.
   112  	storesMu sync.RWMutex
   113  	stores   map[string]*BucketStore
   114  
   115  	// Metrics.
   116  	syncTimes         prometheus.Histogram
   117  	syncLastSuccess   prometheus.Gauge
   118  	tenantsDiscovered prometheus.Gauge
   119  	tenantsSynced     prometheus.Gauge
   120  	blocksLoaded      prometheus.GaugeFunc
   121  }
   122  
   123  func NewBucketStores(cfg BucketStoreConfig, shardingStrategy ShardingStrategy, storageBucket phlareobj.Bucket, limits Limits, logger log.Logger, reg prometheus.Registerer) (*BucketStores, error) {
   124  	bs := &BucketStores{
   125  		storageBucket: storageBucket,
   126  		logger:        logger,
   127  		cfg:           cfg,
   128  		syncBackoffConfig: backoff.Config{
   129  			MinBackoff: 1 * time.Second,
   130  			MaxBackoff: 10 * time.Second,
   131  			MaxRetries: 3,
   132  		},
   133  		stores:           map[string]*BucketStore{},
   134  		shardingStrategy: shardingStrategy,
   135  		reg:              reg,
   136  		limits:           limits,
   137  	}
   138  	// Register metrics.
   139  	bs.syncTimes = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
   140  		Name:    "pyroscope_bucket_stores_blocks_sync_seconds",
   141  		Help:    "The total time it takes to perform a sync stores",
   142  		Buckets: []float64{0.1, 1, 10, 30, 60, 120, 300, 600, 900},
   143  	})
   144  	bs.syncLastSuccess = promauto.With(reg).NewGauge(prometheus.GaugeOpts{
   145  		Name: "pyroscope_bucket_stores_blocks_last_successful_sync_timestamp_seconds",
   146  		Help: "Unix timestamp of the last successful blocks sync.",
   147  	})
   148  	bs.tenantsDiscovered = promauto.With(reg).NewGauge(prometheus.GaugeOpts{
   149  		Name: "pyroscope_bucket_stores_tenants_discovered",
   150  		Help: "Number of tenants discovered in the bucket.",
   151  	})
   152  	bs.tenantsSynced = promauto.With(reg).NewGauge(prometheus.GaugeOpts{
   153  		Name: "pyroscope_bucket_stores_tenants_synced",
   154  		Help: "Number of tenants synced.",
   155  	})
   156  	bs.blocksLoaded = promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{
   157  		Name: "pyroscope_bucket_store_blocks_loaded",
   158  		Help: "Number of currently loaded blocks.",
   159  	}, bs.getBlocksLoadedMetric)
   160  	return bs, nil
   161  }
   162  
   163  // SyncBlocks synchronizes the stores state with the Bucket store for every user.
   164  func (bs *BucketStores) SyncBlocks(ctx context.Context) error {
   165  	return bs.syncUsersBlocksWithRetries(ctx, func(ctx context.Context, s *BucketStore) error {
   166  		return s.SyncBlocks(ctx)
   167  	})
   168  }
   169  
   170  func (bs *BucketStores) InitialSync(ctx context.Context) error {
   171  	level.Info(bs.logger).Log("msg", "synchronizing Pyroscope blocks for all users")
   172  
   173  	if err := bs.syncUsersBlocksWithRetries(ctx, func(ctx context.Context, s *BucketStore) error {
   174  		return s.InitialSync(ctx)
   175  	}); err != nil {
   176  		level.Warn(bs.logger).Log("msg", "failed to synchronize Pyroscope blocks", "err", err)
   177  		return err
   178  	}
   179  
   180  	level.Info(bs.logger).Log("msg", "successfully synchronized Pyroscope blocks for all users")
   181  	return nil
   182  }
   183  
   184  func (bs *BucketStores) syncUsersBlocksWithRetries(ctx context.Context, f func(context.Context, *BucketStore) error) error {
   185  	retries := backoff.New(ctx, bs.syncBackoffConfig)
   186  
   187  	var lastErr error
   188  	for retries.Ongoing() {
   189  		lastErr = bs.syncUsersBlocks(ctx, f)
   190  		if lastErr == nil {
   191  			return nil
   192  		}
   193  
   194  		retries.Wait()
   195  	}
   196  
   197  	if lastErr == nil {
   198  		return retries.Err()
   199  	}
   200  
   201  	return lastErr
   202  }
   203  
   204  func (bs *BucketStores) syncUsersBlocks(ctx context.Context, f func(context.Context, *BucketStore) error) (returnErr error) {
   205  	defer func(start time.Time) {
   206  		bs.syncTimes.Observe(time.Since(start).Seconds())
   207  		if returnErr == nil {
   208  			bs.syncLastSuccess.SetToCurrentTime()
   209  		}
   210  	}(time.Now())
   211  
   212  	type job struct {
   213  		userID string
   214  		store  *BucketStore
   215  	}
   216  
   217  	wg := &sync.WaitGroup{}
   218  	jobs := make(chan job)
   219  	errs := multierror.New()
   220  	errsMx := sync.Mutex{}
   221  
   222  	// Scan users in the bucket. In case of error, it may return a subset of users. If we sync a subset of users
   223  	// during a periodic sync, we may end up unloading blocks for users that still belong to this store-gateway
   224  	// so we do prefer to not run the sync at all.
   225  	userIDs, err := bs.scanUsers(ctx)
   226  	if err != nil {
   227  		return err
   228  	}
   229  
   230  	ownedUserIDs, err := bs.shardingStrategy.FilterUsers(ctx, userIDs)
   231  	if err != nil {
   232  		return errors.Wrap(err, "unable to check tenants owned by this store-gateway instance")
   233  	}
   234  
   235  	includeUserIDs := make(map[string]struct{}, len(ownedUserIDs))
   236  	for _, userID := range ownedUserIDs {
   237  		includeUserIDs[userID] = struct{}{}
   238  	}
   239  
   240  	bs.tenantsDiscovered.Set(float64(len(userIDs)))
   241  	bs.tenantsSynced.Set(float64(len(includeUserIDs)))
   242  
   243  	// Create a pool of workers which will synchronize blocks. The pool size
   244  	// is limited in order to avoid to concurrently sync a lot of tenants in
   245  	// a large cluster.
   246  	for i := 0; i < bs.cfg.TenantSyncConcurrency; i++ {
   247  		wg.Add(1)
   248  		go func() {
   249  			defer wg.Done()
   250  
   251  			for job := range jobs {
   252  				if err := f(ctx, job.store); err != nil {
   253  					errsMx.Lock()
   254  					errs.Add(errors.Wrapf(err, "failed to synchronize Pyroscope blocks for user %s", job.userID))
   255  					errsMx.Unlock()
   256  				}
   257  			}
   258  		}()
   259  	}
   260  
   261  	// Lazily create a bucket store for each new user found
   262  	// and submit a sync job for each user.
   263  	for userID := range includeUserIDs {
   264  		bs, err := bs.getOrCreateStore(userID)
   265  		if err != nil {
   266  			errsMx.Lock()
   267  			errs.Add(err)
   268  			errsMx.Unlock()
   269  
   270  			continue
   271  		}
   272  
   273  		select {
   274  		case jobs <- job{userID: userID, store: bs}:
   275  			// Nothing to do. Will loop to push more jobs.
   276  		case <-ctx.Done():
   277  			// Wait until all workers have done, so the goroutines leak detector doesn't
   278  			// report any issue. This is expected to be quick, considering the done ctx
   279  			// is used by the worker callback function too.
   280  			close(jobs)
   281  			wg.Wait()
   282  
   283  			return ctx.Err()
   284  		}
   285  	}
   286  
   287  	// Wait until all workers completed.
   288  	close(jobs)
   289  	wg.Wait()
   290  
   291  	bs.closeBucketStoreAndDeleteLocalFilesForExcludedTenants(includeUserIDs)
   292  
   293  	return errs.Err()
   294  }
   295  
   296  func (bs *BucketStores) getStore(userID string) *BucketStore {
   297  	bs.storesMu.RLock()
   298  	defer bs.storesMu.RUnlock()
   299  	return bs.stores[userID]
   300  }
   301  
   302  func (bs *BucketStores) getOrCreateStore(userID string) (*BucketStore, error) {
   303  	// Check if the store already exists.
   304  	s := bs.getStore(userID)
   305  	if s != nil {
   306  		return s, nil
   307  	}
   308  
   309  	bs.storesMu.Lock()
   310  	defer bs.storesMu.Unlock()
   311  
   312  	// Check again for the store in the event it was created in-between locks.
   313  	s = bs.stores[userID]
   314  	if s != nil {
   315  		return s, nil
   316  	}
   317  
   318  	userLogger := util.LoggerWithUserID(userID, bs.logger)
   319  
   320  	level.Info(userLogger).Log("msg", "creating user bucket store")
   321  
   322  	// The sharding strategy filter MUST be before the ones we create here (order matters).
   323  	filters := []block.MetadataFilter{
   324  		NewShardingMetadataFilterAdapter(userID, bs.shardingStrategy),
   325  		newMinTimeMetaFilter(bs.cfg.IgnoreBlocksWithin),
   326  		NewIgnoreDeletionMarkFilter(userLogger, bs.storageBucket, bs.cfg.IgnoreDeletionMarksDelay, bs.cfg.MetaSyncConcurrency),
   327  	}
   328  	fetcherReg := prometheus.NewRegistry()
   329  
   330  	fetcher := NewBucketIndexMetadataFetcher(
   331  		userID,
   332  		bs.storageBucket,
   333  		bs.limits,
   334  		bs.logger,
   335  		fetcherReg,
   336  		filters,
   337  	)
   338  
   339  	s, err := NewBucketStore(
   340  		bs.storageBucket,
   341  		fetcher,
   342  		userID,
   343  		bs.syncDirForUser(userID),
   344  		userLogger,
   345  		bs.reg,
   346  	)
   347  	if err != nil {
   348  		return nil, err
   349  	}
   350  
   351  	bs.stores[userID] = s
   352  
   353  	return s, nil
   354  }
   355  
   356  // closeBucketStoreAndDeleteLocalFilesForExcludedTenants closes bucket store and removes local "sync" directories
   357  // for tenants that are not included in the current shard.
   358  func (bs *BucketStores) closeBucketStoreAndDeleteLocalFilesForExcludedTenants(includeUserIDs map[string]struct{}) {
   359  	files, err := os.ReadDir(bs.cfg.SyncDir)
   360  	if err != nil {
   361  		return
   362  	}
   363  
   364  	for _, f := range files {
   365  		if !f.IsDir() {
   366  			continue
   367  		}
   368  
   369  		userID := f.Name()
   370  		if _, included := includeUserIDs[userID]; included {
   371  			// Preserve directory for users owned by this shard.
   372  			continue
   373  		}
   374  
   375  		err := bs.closeBucketStore(userID)
   376  		switch {
   377  		case errors.Is(err, errBucketStoreNotFound):
   378  			// This is OK, nothing was closed.
   379  		case err == nil:
   380  			level.Info(bs.logger).Log("msg", "closed bucket store for user", "tenant", userID)
   381  		default:
   382  			level.Warn(bs.logger).Log("msg", "failed to close bucket store for user", "tenant", userID, "err", err)
   383  		}
   384  
   385  		userSyncDir := bs.syncDirForUser(userID)
   386  		err = os.RemoveAll(userSyncDir)
   387  		if err == nil {
   388  			level.Info(bs.logger).Log("msg", "deleted user sync directory", "dir", userSyncDir)
   389  		} else {
   390  			level.Warn(bs.logger).Log("msg", "failed to delete user sync directory", "dir", userSyncDir, "err", err)
   391  		}
   392  	}
   393  }
   394  
   395  func (u *BucketStores) syncDirForUser(userID string) string {
   396  	return filepath.Join(u.cfg.SyncDir, userID)
   397  }
   398  
   399  // closeBucketStore closes bucket store for given user
   400  // and removes it from bucket stores map and metrics.
   401  // If bucket store doesn't exist, returns errBucketStoreNotFound.
   402  // Otherwise returns error from closing the bucket store.
   403  func (bs *BucketStores) closeBucketStore(userID string) error {
   404  	bs.storesMu.Lock()
   405  	unlockInDefer := true
   406  	defer func() {
   407  		if unlockInDefer {
   408  			bs.storesMu.Unlock()
   409  		}
   410  	}()
   411  
   412  	s := bs.stores[userID]
   413  	if s == nil {
   414  		return errBucketStoreNotFound
   415  	}
   416  
   417  	delete(bs.stores, userID)
   418  	s.metrics.Unregister()
   419  	unlockInDefer = false
   420  	bs.storesMu.Unlock()
   421  
   422  	return s.RemoveBlocksAndClose()
   423  }
   424  
   425  // getBlocksLoadedMetric returns the number of blocks currently loaded across all bucket stores.
   426  func (u *BucketStores) getBlocksLoadedMetric() float64 {
   427  	count := 0
   428  
   429  	u.storesMu.RLock()
   430  	for _, store := range u.stores {
   431  		count += store.Stats().BlocksLoaded
   432  	}
   433  	u.storesMu.RUnlock()
   434  
   435  	return float64(count)
   436  }
   437  
   438  func (bs *BucketStores) scanUsers(ctx context.Context) ([]string, error) {
   439  	return bucket.ListUsers(ctx, bs.storageBucket)
   440  }