github.com/grafana/pyroscope@v1.18.0/pkg/compactor/compactor.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/compactor.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  package compactor
     6  
     7  import (
     8  	"context"
     9  	"flag"
    10  	"fmt"
    11  	"hash/fnv"
    12  	"math/rand"
    13  	"os"
    14  	"path"
    15  	"path/filepath"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/go-kit/log"
    20  	"github.com/go-kit/log/level"
    21  	"github.com/grafana/dskit/backoff"
    22  	"github.com/grafana/dskit/flagext"
    23  	"github.com/grafana/dskit/kv"
    24  	"github.com/grafana/dskit/ring"
    25  	"github.com/grafana/dskit/services"
    26  	"github.com/opentracing/opentracing-go"
    27  	"github.com/opentracing/opentracing-go/ext"
    28  	"github.com/pkg/errors"
    29  	"github.com/prometheus/client_golang/prometheus"
    30  	"github.com/prometheus/client_golang/prometheus/promauto"
    31  	"go.uber.org/atomic"
    32  
    33  	"github.com/grafana/pyroscope/pkg/objstore"
    34  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    35  	"github.com/grafana/pyroscope/pkg/phlaredb/bucket"
    36  	"github.com/grafana/pyroscope/pkg/tenant"
    37  	"github.com/grafana/pyroscope/pkg/util"
    38  )
    39  
    40  const (
    41  	// ringKey is the key under which we store the compactors ring in the KVStore.
    42  	ringKey = "compactor"
    43  
    44  	// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
    45  	// in the ring will be automatically removed after.
    46  	ringAutoForgetUnhealthyPeriods = 10
    47  )
    48  
    49  const (
    50  	blocksMarkedForDeletionName = "pyroscope_compactor_blocks_marked_for_deletion_total"
    51  	blocksMarkedForDeletionHelp = "Total number of blocks marked for deletion in compactor."
    52  )
    53  
    54  var (
    55  	errInvalidBlockRanges                 = "compactor block range periods should be divisible by the previous one, but %s is not divisible by %s"
    56  	errInvalidBlockDuration               = "compactor block range periods should be divisible by the max block duration, but %s is not divisible by %s"
    57  	errInvalidCompactionOrder             = fmt.Errorf("unsupported compaction order (supported values: %s)", strings.Join(CompactionOrders, ", "))
    58  	errInvalidCompactionSplitBy           = fmt.Errorf("unsupported compaction split by (supported values: %s)", strings.Join(CompactionSplitBys, ", "))
    59  	errInvalidMaxOpeningBlocksConcurrency = fmt.Errorf("invalid max-opening-blocks-concurrency value, must be positive")
    60  	RingOp                                = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil)
    61  )
    62  
    63  // BlocksGrouperFactory builds and returns the grouper to use to compact a tenant's blocks.
    64  type BlocksGrouperFactory func(
    65  	ctx context.Context,
    66  	cfg Config,
    67  	cfgProvider ConfigProvider,
    68  	userID string,
    69  	logger log.Logger,
    70  	reg prometheus.Registerer,
    71  ) Grouper
    72  
    73  // BlocksCompactorFactory builds and returns the compactor and planner to use to compact a tenant's blocks.
    74  type BlocksCompactorFactory func(
    75  	ctx context.Context,
    76  	cfg Config,
    77  	cfgProvider ConfigProvider,
    78  	userID string,
    79  	logger log.Logger,
    80  	metrics *CompactorMetrics,
    81  ) (Compactor, error)
    82  
    83  // BlocksPlannerFactory builds and returns the compactor and planner to use to compact a tenant's blocks.
    84  type BlocksPlannerFactory func(
    85  	cfg Config,
    86  ) Planner
    87  
    88  // Config holds the MultitenantCompactor config.
    89  type Config struct {
    90  	BlockRanges                DurationList  `yaml:"block_ranges" category:"advanced"`
    91  	BlockSyncConcurrency       int           `yaml:"block_sync_concurrency" category:"advanced"`
    92  	MetaSyncConcurrency        int           `yaml:"meta_sync_concurrency" category:"advanced"`
    93  	DataDir                    string        `yaml:"data_dir"`
    94  	CompactionInterval         time.Duration `yaml:"compaction_interval" category:"advanced"`
    95  	CompactionRetries          int           `yaml:"compaction_retries" category:"advanced"`
    96  	CompactionConcurrency      int           `yaml:"compaction_concurrency" category:"advanced"`
    97  	CompactionWaitPeriod       time.Duration `yaml:"first_level_compaction_wait_period"`
    98  	CleanupInterval            time.Duration `yaml:"cleanup_interval" category:"advanced"`
    99  	CleanupConcurrency         int           `yaml:"cleanup_concurrency" category:"advanced"`
   100  	DeletionDelay              time.Duration `yaml:"deletion_delay" category:"advanced"`
   101  	TenantCleanupDelay         time.Duration `yaml:"tenant_cleanup_delay" category:"advanced"`
   102  	MaxCompactionTime          time.Duration `yaml:"max_compaction_time" category:"advanced"`
   103  	NoBlocksFileCleanupEnabled bool          `yaml:"no_blocks_file_cleanup_enabled" category:"experimental"`
   104  	DownsamplerEnabled         bool          `yaml:"downsampler_enabled" category:"advanced"`
   105  
   106  	// Compactor concurrency options
   107  	MaxOpeningBlocksConcurrency int `yaml:"max_opening_blocks_concurrency" category:"advanced"` // Number of goroutines opening blocks before compaction.
   108  	// MaxClosingBlocksConcurrency int `yaml:"max_closing_blocks_concurrency" category:"advanced"` // Max number of blocks that can be closed concurrently during split compaction. Note that closing of newly compacted block uses a lot of memory for writing index.
   109  
   110  	EnabledTenants  flagext.StringSliceCSV `yaml:"enabled_tenants" category:"advanced"`
   111  	DisabledTenants flagext.StringSliceCSV `yaml:"disabled_tenants" category:"advanced"`
   112  
   113  	// Compactors sharding.
   114  	ShardingRing RingConfig `yaml:"sharding_ring"`
   115  
   116  	CompactionJobsOrder string `yaml:"compaction_jobs_order" category:"advanced"`
   117  	CompactionSplitBy   string `yaml:"compaction_split_by" category:"advanced"`
   118  
   119  	// No need to add options to customize the retry backoff,
   120  	// given the defaults should be fine, but allow to override
   121  	// it in tests.
   122  	retryMinBackoff time.Duration `yaml:"-"`
   123  	retryMaxBackoff time.Duration `yaml:"-"`
   124  
   125  	// Allow downstream projects to customise the blocks compactor.
   126  	BlocksGrouperFactory   BlocksGrouperFactory   `yaml:"-"`
   127  	BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"`
   128  	BlocksPlannerFactory   BlocksPlannerFactory   `yaml:"-"`
   129  }
   130  
   131  // RegisterFlags registers the MultitenantCompactor flags.
   132  func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
   133  	cfg.ShardingRing.RegisterFlags(f, logger)
   134  
   135  	cfg.BlockRanges = DurationList{1 * time.Hour, 2 * time.Hour, 8 * time.Hour}
   136  	cfg.retryMinBackoff = 10 * time.Second
   137  	cfg.retryMaxBackoff = time.Minute
   138  
   139  	f.Var(&cfg.BlockRanges, "compactor.block-ranges", "List of compaction time ranges.")
   140  	f.IntVar(&cfg.BlockSyncConcurrency, "compactor.block-sync-concurrency", 8, "Number of Go routines to use when downloading blocks for compaction and uploading resulting blocks.")
   141  	f.IntVar(&cfg.MetaSyncConcurrency, "compactor.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from the long term storage.")
   142  	f.StringVar(&cfg.DataDir, "compactor.data-dir", "./data-compactor", "Directory to temporarily store blocks during compaction. This directory is not required to be persisted between restarts.")
   143  	f.DurationVar(&cfg.CompactionInterval, "compactor.compaction-interval", 30*time.Minute, "The frequency at which the compaction runs")
   144  	f.DurationVar(&cfg.MaxCompactionTime, "compactor.max-compaction-time", time.Hour, "Max time for starting compactions for a single tenant. After this time no new compactions for the tenant are started before next compaction cycle. This can help in multi-tenant environments to avoid single tenant using all compaction time, but also in single-tenant environments to force new discovery of blocks more often. 0 = disabled.")
   145  	f.IntVar(&cfg.CompactionRetries, "compactor.compaction-retries", 3, "How many times to retry a failed compaction within a single compaction run.")
   146  	f.IntVar(&cfg.CompactionConcurrency, "compactor.compaction-concurrency", 1, "Max number of concurrent compactions running.")
   147  	f.DurationVar(&cfg.CompactionWaitPeriod, "compactor.first-level-compaction-wait-period", 25*time.Minute, "How long the compactor waits before compacting first-level blocks that are uploaded by the ingesters. This configuration option allows for the reduction of cases where the compactor begins to compact blocks before all ingesters have uploaded their blocks to the storage.")
   148  	f.DurationVar(&cfg.CleanupInterval, "compactor.cleanup-interval", 15*time.Minute, "How frequently compactor should run blocks cleanup and maintenance, as well as update the bucket index.")
   149  	f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.")
   150  	f.StringVar(&cfg.CompactionJobsOrder, "compactor.compaction-jobs-order", CompactionOrderOldestFirst, fmt.Sprintf("The sorting to use when deciding which compaction jobs should run first for a given tenant. Supported values are: %s.", strings.Join(CompactionOrders, ", ")))
   151  	f.StringVar(&cfg.CompactionSplitBy, "compactor.compaction-split-by", CompactionSplitByFingerprint, fmt.Sprintf("Experimental: The strategy to use when splitting blocks during compaction. Supported values are: %s.", strings.Join(CompactionSplitBys, ", ")))
   152  	f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+
   153  		"If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+
   154  		"If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.")
   155  	// f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is time between deleting of last block, and doing final cleanup (marker files, debug files) of the tenant.")
   156  	f.BoolVar(&cfg.NoBlocksFileCleanupEnabled, "compactor.no-blocks-file-cleanup-enabled", false, "If enabled, will delete the bucket-index, markers and debug files in the tenant bucket when there are no blocks left in the index.")
   157  	f.BoolVar(&cfg.DownsamplerEnabled, "compactor.downsampler-enabled", false, "If enabled, the compactor will downsample profiles in blocks at compaction level 3 and above. The original profiles are also kept.")
   158  	// compactor concurrency options
   159  	f.IntVar(&cfg.MaxOpeningBlocksConcurrency, "compactor.max-opening-blocks-concurrency", 16, "Number of goroutines opening blocks before compaction.")
   160  
   161  	f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.")
   162  	f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.")
   163  }
   164  
   165  func (cfg *Config) Validate(maxBlockDuration time.Duration) error {
   166  	if len(cfg.BlockRanges) > 0 && cfg.BlockRanges[0]%maxBlockDuration != 0 {
   167  		return errors.Errorf(errInvalidBlockDuration, cfg.BlockRanges[0].String(), maxBlockDuration.String())
   168  	}
   169  	// Each block range period should be divisible by the previous one.
   170  	for i := 1; i < len(cfg.BlockRanges); i++ {
   171  		if cfg.BlockRanges[i]%cfg.BlockRanges[i-1] != 0 {
   172  			return errors.Errorf(errInvalidBlockRanges, cfg.BlockRanges[i].String(), cfg.BlockRanges[i-1].String())
   173  		}
   174  	}
   175  
   176  	if cfg.MaxOpeningBlocksConcurrency < 1 {
   177  		return errInvalidMaxOpeningBlocksConcurrency
   178  	}
   179  
   180  	if !util.StringsContain(CompactionOrders, cfg.CompactionJobsOrder) {
   181  		return errInvalidCompactionOrder
   182  	}
   183  
   184  	if !util.StringsContain(CompactionSplitBys, cfg.CompactionSplitBy) {
   185  		return errInvalidCompactionSplitBy
   186  	}
   187  
   188  	return nil
   189  }
   190  
   191  // ConfigProvider defines the per-tenant config provider for the MultitenantCompactor.
   192  type ConfigProvider interface {
   193  	objstore.TenantConfigProvider
   194  
   195  	// CompactorBlocksRetentionPeriod returns the retention period for a given user.
   196  	CompactorBlocksRetentionPeriod(user string) time.Duration
   197  
   198  	// CompactorSplitAndMergeShards returns the number of shards to use when splitting blocks.
   199  	CompactorSplitAndMergeShards(userID string) int
   200  
   201  	// CompactorSplitAndMergeStageSize returns the number of stages split shards will be written to.
   202  	CompactorSplitAndMergeStageSize(userID string) int
   203  
   204  	// CompactorSplitGroups returns the number of groups that blocks used for splitting should
   205  	// be grouped into. Different groups are then split by different jobs.
   206  	CompactorSplitGroups(userID string) int
   207  
   208  	// CompactorTenantShardSize returns number of compactors that this user can use. 0 = all compactors.
   209  	CompactorTenantShardSize(userID string) int
   210  
   211  	// CompactorPartialBlockDeletionDelay returns the partial block delay time period for a given user,
   212  	// and whether the configured value was valid. If the value wasn't valid, the returned delay is the default one
   213  	// and the caller is responsible to warn the Mimir operator about it.
   214  	CompactorPartialBlockDeletionDelay(userID string) (delay time.Duration, valid bool)
   215  
   216  	// CompactorDownsamplerEnabled returns true if the downsampler is enabled for a given user.
   217  	CompactorDownsamplerEnabled(userId string) bool
   218  }
   219  
   220  // MultitenantCompactor is a multi-tenant TSDB blocks compactor based on Thanos.
   221  type MultitenantCompactor struct {
   222  	services.Service
   223  
   224  	compactorCfg Config
   225  	cfgProvider  ConfigProvider
   226  	logger       log.Logger
   227  	parentLogger log.Logger
   228  	registerer   prometheus.Registerer
   229  
   230  	// Functions that creates bucket client, grouper, planner and compactor using the context.
   231  	// Useful for injecting mock objects from tests.
   232  	blocksGrouperFactory   BlocksGrouperFactory
   233  	blocksCompactorFactory BlocksCompactorFactory
   234  	blocksPlannerFactory   BlocksPlannerFactory
   235  
   236  	// Blocks cleaner is responsible to hard delete blocks marked for deletion.
   237  	blocksCleaner *BlocksCleaner
   238  
   239  	// Underlying compactor and planner used to compact TSDB blocks.
   240  	blocksPlanner Planner
   241  
   242  	// Client used to run operations on the bucket storing blocks.
   243  	bucketClient objstore.Bucket
   244  
   245  	// Ring used for sharding compactions.
   246  	ringLifecycler         *ring.BasicLifecycler
   247  	ring                   *ring.Ring
   248  	ringSubservices        *services.Manager
   249  	ringSubservicesWatcher *services.FailureWatcher
   250  
   251  	shardingStrategy shardingStrategy
   252  	jobsOrder        JobsOrderFunc
   253  
   254  	// Metrics.
   255  	compactionRunsStarted          prometheus.Counter
   256  	compactionRunsCompleted        prometheus.Counter
   257  	compactionRunsErred            prometheus.Counter
   258  	compactionRunsShutdown         prometheus.Counter
   259  	compactionRunsLastSuccess      prometheus.Gauge
   260  	compactionRunDiscoveredTenants prometheus.Gauge
   261  	compactionRunSkippedTenants    prometheus.Gauge
   262  	compactionRunSucceededTenants  prometheus.Gauge
   263  	compactionRunFailedTenants     prometheus.Gauge
   264  	compactionRunInterval          prometheus.Gauge
   265  	blocksMarkedForDeletion        prometheus.Counter
   266  
   267  	// Metrics shared across all BucketCompactor instances.
   268  	bucketCompactorMetrics *BucketCompactorMetrics
   269  
   270  	// TSDB syncer metrics
   271  	syncerMetrics *aggregatedSyncerMetrics
   272  
   273  	// Block upload metrics
   274  	blockUploadBlocks      *prometheus.GaugeVec
   275  	blockUploadBytes       *prometheus.GaugeVec
   276  	blockUploadFiles       *prometheus.GaugeVec
   277  	blockUploadValidations atomic.Int64
   278  
   279  	// Compactor metrics
   280  	compactorMetrics *CompactorMetrics
   281  }
   282  
   283  // NewMultitenantCompactor makes a new MultitenantCompactor.
   284  func NewMultitenantCompactor(compactorCfg Config, bucketClient objstore.Bucket, cfgProvider ConfigProvider, logger log.Logger, registerer prometheus.Registerer) (*MultitenantCompactor, error) {
   285  	// Configure the compactor and grouper factories only if they weren't already set by a downstream project.
   286  	if compactorCfg.BlocksGrouperFactory == nil || compactorCfg.BlocksCompactorFactory == nil {
   287  		configureSplitAndMergeCompactor(&compactorCfg)
   288  	}
   289  
   290  	blocksGrouperFactory := compactorCfg.BlocksGrouperFactory
   291  	blocksCompactorFactory := compactorCfg.BlocksCompactorFactory
   292  	blocksPlannerFactory := compactorCfg.BlocksPlannerFactory
   293  
   294  	c, err := newMultitenantCompactor(compactorCfg, bucketClient, cfgProvider, logger, registerer, blocksGrouperFactory, blocksCompactorFactory, blocksPlannerFactory)
   295  	if err != nil {
   296  		return nil, errors.Wrap(err, "failed to create blocks compactor")
   297  	}
   298  
   299  	return c, nil
   300  }
   301  
   302  func newMultitenantCompactor(
   303  	compactorCfg Config,
   304  	bucketClient objstore.Bucket,
   305  	cfgProvider ConfigProvider,
   306  	logger log.Logger,
   307  	registerer prometheus.Registerer,
   308  	blocksGrouperFactory BlocksGrouperFactory,
   309  	blocksCompactorFactory BlocksCompactorFactory,
   310  	blocksPlannerFactory BlocksPlannerFactory,
   311  ) (*MultitenantCompactor, error) {
   312  	c := &MultitenantCompactor{
   313  		compactorCfg:           compactorCfg,
   314  		cfgProvider:            cfgProvider,
   315  		parentLogger:           logger,
   316  		logger:                 log.With(logger, "component", "compactor"),
   317  		registerer:             registerer,
   318  		syncerMetrics:          newAggregatedSyncerMetrics(registerer),
   319  		bucketClient:           bucketClient,
   320  		blocksGrouperFactory:   blocksGrouperFactory,
   321  		blocksCompactorFactory: blocksCompactorFactory,
   322  		blocksPlannerFactory:   blocksPlannerFactory,
   323  		compactionRunsStarted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   324  			Name: "pyroscope_compactor_runs_started_total",
   325  			Help: "Total number of compaction runs started.",
   326  		}),
   327  		compactionRunsCompleted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   328  			Name: "pyroscope_compactor_runs_completed_total",
   329  			Help: "Total number of compaction runs successfully completed.",
   330  		}),
   331  		compactionRunsErred: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   332  			Name:        "pyroscope_compactor_runs_failed_total",
   333  			Help:        "Total number of compaction runs failed.",
   334  			ConstLabels: map[string]string{"reason": "error"},
   335  		}),
   336  		compactionRunsShutdown: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   337  			Name:        "pyroscope_compactor_runs_failed_total",
   338  			Help:        "Total number of compaction runs failed.",
   339  			ConstLabels: map[string]string{"reason": "shutdown"},
   340  		}),
   341  		compactionRunsLastSuccess: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   342  			Name: "pyroscope_compactor_last_successful_run_timestamp_seconds",
   343  			Help: "Unix timestamp of the last successful compaction run.",
   344  		}),
   345  		compactionRunDiscoveredTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   346  			Name: "pyroscope_compactor_tenants_discovered",
   347  			Help: "Number of tenants discovered during the current compaction run. Reset to 0 when compactor is idle.",
   348  		}),
   349  		compactionRunSkippedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   350  			Name: "pyroscope_compactor_tenants_skipped",
   351  			Help: "Number of tenants skipped during the current compaction run. Reset to 0 when compactor is idle.",
   352  		}),
   353  		compactionRunSucceededTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   354  			Name: "pyroscope_compactor_tenants_processing_succeeded",
   355  			Help: "Number of tenants successfully processed during the current compaction run. Reset to 0 when compactor is idle.",
   356  		}),
   357  		compactionRunFailedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   358  			Name: "pyroscope_compactor_tenants_processing_failed",
   359  			Help: "Number of tenants failed processing during the current compaction run. Reset to 0 when compactor is idle.",
   360  		}),
   361  		compactionRunInterval: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   362  			Name: "pyroscope_compactor_compaction_interval_seconds",
   363  			Help: "The configured interval on which compaction is run in seconds. Useful when compared to the last successful run metric to accurately detect multiple failed compaction runs.",
   364  		}),
   365  		blocksMarkedForDeletion: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   366  			Name:        blocksMarkedForDeletionName,
   367  			Help:        blocksMarkedForDeletionHelp,
   368  			ConstLabels: prometheus.Labels{"reason": "compaction"},
   369  		}),
   370  		blockUploadBlocks: promauto.With(registerer).NewGaugeVec(prometheus.GaugeOpts{
   371  			Name: "pyroscope_block_upload_api_blocks_total",
   372  			Help: "Total number of blocks successfully uploaded and validated using the block upload API.",
   373  		}, []string{"user"}),
   374  		blockUploadBytes: promauto.With(registerer).NewGaugeVec(prometheus.GaugeOpts{
   375  			Name: "pyroscope_block_upload_api_bytes_total",
   376  			Help: "Total number of bytes from successfully uploaded and validated blocks using block upload API.",
   377  		}, []string{"user"}),
   378  		blockUploadFiles: promauto.With(registerer).NewGaugeVec(prometheus.GaugeOpts{
   379  			Name: "pyroscope_block_upload_api_files_total",
   380  			Help: "Total number of files from successfully uploaded and validated blocks using block upload API.",
   381  		}, []string{"user"}),
   382  		compactorMetrics: newCompactorMetrics(registerer),
   383  	}
   384  
   385  	promauto.With(registerer).NewGaugeFunc(prometheus.GaugeOpts{
   386  		Name: "pyroscope_block_upload_validations_in_progress",
   387  		Help: "Number of block upload validations currently running.",
   388  	}, func() float64 {
   389  		return float64(c.blockUploadValidations.Load())
   390  	})
   391  
   392  	c.bucketCompactorMetrics = NewBucketCompactorMetrics(c.blocksMarkedForDeletion, registerer)
   393  
   394  	if len(compactorCfg.EnabledTenants) > 0 {
   395  		level.Info(c.logger).Log("msg", "compactor using enabled users", "enabled", strings.Join(compactorCfg.EnabledTenants, ", "))
   396  	}
   397  	if len(compactorCfg.DisabledTenants) > 0 {
   398  		level.Info(c.logger).Log("msg", "compactor using disabled users", "disabled", strings.Join(compactorCfg.DisabledTenants, ", "))
   399  	}
   400  
   401  	c.jobsOrder = GetJobsOrderFunction(compactorCfg.CompactionJobsOrder)
   402  	if c.jobsOrder == nil {
   403  		return nil, errInvalidCompactionOrder
   404  	}
   405  
   406  	c.Service = services.NewBasicService(c.starting, c.running, c.stopping)
   407  
   408  	// The last successful compaction run metric is exposed as seconds since epoch, so we need to use seconds for this metric.
   409  	c.compactionRunInterval.Set(c.compactorCfg.CompactionInterval.Seconds())
   410  
   411  	return c, nil
   412  }
   413  
   414  // Start the compactor.
   415  func (c *MultitenantCompactor) starting(ctx context.Context) error {
   416  	var err error
   417  
   418  	c.blocksPlanner = c.blocksPlannerFactory(c.compactorCfg)
   419  
   420  	// Wrap the bucket client to write block deletion marks in the global location too.
   421  	c.bucketClient = block.BucketWithGlobalMarkers(c.bucketClient)
   422  
   423  	// Initialize the compactors ring if sharding is enabled.
   424  	c.ring, c.ringLifecycler, err = newRingAndLifecycler(c.compactorCfg.ShardingRing, c.logger, c.registerer)
   425  	if err != nil {
   426  		return err
   427  	}
   428  
   429  	c.ringSubservices, err = services.NewManager(c.ringLifecycler, c.ring)
   430  	if err != nil {
   431  		return errors.Wrap(err, "unable to create compactor ring dependencies")
   432  	}
   433  
   434  	c.ringSubservicesWatcher = services.NewFailureWatcher()
   435  	c.ringSubservicesWatcher.WatchManager(c.ringSubservices)
   436  	if err = c.ringSubservices.StartAsync(ctx); err != nil {
   437  		return errors.Wrap(err, "unable to start compactor ring dependencies")
   438  	}
   439  
   440  	ctxTimeout, cancel := context.WithTimeout(ctx, c.compactorCfg.ShardingRing.WaitActiveInstanceTimeout)
   441  	defer cancel()
   442  	if err = c.ringSubservices.AwaitHealthy(ctxTimeout); err != nil {
   443  		return errors.Wrap(err, "unable to start compactor ring dependencies")
   444  	}
   445  
   446  	// If sharding is enabled we should wait until this instance is ACTIVE within the ring. This
   447  	// MUST be done before starting any other component depending on the users scanner, because
   448  	// the users scanner depends on the ring (to check whether a user belongs to this shard or not).
   449  	level.Info(c.logger).Log("msg", "waiting until compactor is ACTIVE in the ring")
   450  	if err = ring.WaitInstanceState(ctxTimeout, c.ring, c.ringLifecycler.GetInstanceID(), ring.ACTIVE); err != nil {
   451  		return errors.Wrap(err, "compactor failed to become ACTIVE in the ring")
   452  	}
   453  
   454  	level.Info(c.logger).Log("msg", "compactor is ACTIVE in the ring")
   455  
   456  	// In the event of a cluster cold start or scale up of 2+ compactor instances at the same
   457  	// time, we may end up in a situation where each new compactor instance starts at a slightly
   458  	// different time and thus each one starts with a different state of the ring. It's better
   459  	// to just wait a short time for ring stability.
   460  	if c.compactorCfg.ShardingRing.WaitStabilityMinDuration > 0 {
   461  		minWaiting := c.compactorCfg.ShardingRing.WaitStabilityMinDuration
   462  		maxWaiting := c.compactorCfg.ShardingRing.WaitStabilityMaxDuration
   463  
   464  		level.Info(c.logger).Log("msg", "waiting until compactor ring topology is stable", "min_waiting", minWaiting.String(), "max_waiting", maxWaiting.String())
   465  		if err := ring.WaitRingStability(ctx, c.ring, RingOp, minWaiting, maxWaiting); err != nil {
   466  			level.Warn(c.logger).Log("msg", "compactor ring topology is not stable after the max waiting time, proceeding anyway")
   467  		} else {
   468  			level.Info(c.logger).Log("msg", "compactor ring topology is stable")
   469  		}
   470  	}
   471  
   472  	allowedTenants := tenant.NewAllowedTenants(c.compactorCfg.EnabledTenants, c.compactorCfg.DisabledTenants)
   473  	c.shardingStrategy = newSplitAndMergeShardingStrategy(allowedTenants, c.ring, c.ringLifecycler, c.cfgProvider)
   474  
   475  	// Create the blocks cleaner (service).
   476  	c.blocksCleaner = NewBlocksCleaner(BlocksCleanerConfig{
   477  		DeletionDelay:              c.compactorCfg.DeletionDelay,
   478  		CleanupInterval:            util.DurationWithJitter(c.compactorCfg.CleanupInterval, 0.1),
   479  		CleanupConcurrency:         c.compactorCfg.CleanupConcurrency,
   480  		TenantCleanupDelay:         c.compactorCfg.TenantCleanupDelay,
   481  		DeleteBlocksConcurrency:    defaultDeleteBlocksConcurrency,
   482  		NoBlocksFileCleanupEnabled: c.compactorCfg.NoBlocksFileCleanupEnabled,
   483  	}, c.bucketClient, c.shardingStrategy.blocksCleanerOwnUser, c.cfgProvider, c.parentLogger, c.registerer)
   484  
   485  	// Start blocks cleaner asynchronously, don't wait until initial cleanup is finished.
   486  	if err := c.blocksCleaner.StartAsync(ctx); err != nil {
   487  		c.ringSubservices.StopAsync()
   488  		return errors.Wrap(err, "failed to start the blocks cleaner")
   489  	}
   490  
   491  	return nil
   492  }
   493  
   494  func newRingAndLifecycler(cfg RingConfig, logger log.Logger, reg prometheus.Registerer) (*ring.Ring, *ring.BasicLifecycler, error) {
   495  	reg = prometheus.WrapRegistererWithPrefix("pyroscope_", reg)
   496  	kvStore, err := kv.NewClient(cfg.Common.KVStore, ring.GetCodec(), kv.RegistererWithKVName(reg, "compactor-lifecycler"), logger)
   497  	if err != nil {
   498  		return nil, nil, errors.Wrap(err, "failed to initialize compactors' KV store")
   499  	}
   500  
   501  	lifecyclerCfg, err := cfg.ToBasicLifecyclerConfig(logger)
   502  	if err != nil {
   503  		return nil, nil, errors.Wrap(err, "failed to build compactors' lifecycler config")
   504  	}
   505  
   506  	var delegate ring.BasicLifecyclerDelegate
   507  	delegate = ring.NewInstanceRegisterDelegate(ring.ACTIVE, lifecyclerCfg.NumTokens)
   508  	delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger)
   509  	delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*lifecyclerCfg.HeartbeatTimeout, delegate, logger)
   510  
   511  	compactorsLifecycler, err := ring.NewBasicLifecycler(lifecyclerCfg, "compactor", ringKey, kvStore, delegate, logger, reg)
   512  	if err != nil {
   513  		return nil, nil, errors.Wrap(err, "failed to initialize compactors' lifecycler")
   514  	}
   515  
   516  	compactorsRing, err := ring.New(cfg.toRingConfig(), "compactor", ringKey, logger, reg)
   517  	if err != nil {
   518  		return nil, nil, errors.Wrap(err, "failed to initialize compactors' ring client")
   519  	}
   520  
   521  	return compactorsRing, compactorsLifecycler, nil
   522  }
   523  
   524  func (c *MultitenantCompactor) stopping(_ error) error {
   525  	ctx := context.Background()
   526  
   527  	services.StopAndAwaitTerminated(ctx, c.blocksCleaner) //nolint:errcheck
   528  	if c.ringSubservices != nil {
   529  		return services.StopManagerAndAwaitStopped(ctx, c.ringSubservices)
   530  	}
   531  	return nil
   532  }
   533  
   534  func (c *MultitenantCompactor) running(ctx context.Context) error {
   535  	// Run an initial compaction before starting the interval.
   536  	c.compactUsers(ctx)
   537  
   538  	ticker := time.NewTicker(util.DurationWithJitter(c.compactorCfg.CompactionInterval, 0.05))
   539  	defer ticker.Stop()
   540  
   541  	for {
   542  		select {
   543  		case <-ticker.C:
   544  			c.compactUsers(ctx)
   545  		case <-ctx.Done():
   546  			return nil
   547  		case err := <-c.ringSubservicesWatcher.Chan():
   548  			return errors.Wrap(err, "compactor subservice failed")
   549  		}
   550  	}
   551  }
   552  
   553  func (c *MultitenantCompactor) compactUsers(ctx context.Context) {
   554  	sp, ctx := opentracing.StartSpanFromContext(ctx, "CompactUsers")
   555  	defer sp.Finish()
   556  
   557  	succeeded := false
   558  	compactionErrorCount := 0
   559  
   560  	c.compactionRunsStarted.Inc()
   561  
   562  	defer func() {
   563  		if succeeded && compactionErrorCount == 0 {
   564  			c.compactionRunsCompleted.Inc()
   565  			c.compactionRunsLastSuccess.SetToCurrentTime()
   566  		} else if compactionErrorCount == 0 {
   567  			c.compactionRunsShutdown.Inc()
   568  		} else {
   569  			c.compactionRunsErred.Inc()
   570  		}
   571  		sp.LogKV("error_count", compactionErrorCount)
   572  
   573  		// Reset progress metrics once done.
   574  		c.compactionRunDiscoveredTenants.Set(0)
   575  		c.compactionRunSkippedTenants.Set(0)
   576  		c.compactionRunSucceededTenants.Set(0)
   577  		c.compactionRunFailedTenants.Set(0)
   578  	}()
   579  
   580  	level.Info(c.logger).Log("msg", "discovering users from bucket")
   581  	users, err := c.discoverUsersWithRetries(ctx)
   582  	if err != nil {
   583  		if !errors.Is(err, context.Canceled) {
   584  			compactionErrorCount++
   585  			level.Error(c.logger).Log("msg", "failed to discover users from bucket", "err", err)
   586  		}
   587  		return
   588  	}
   589  	sp.LogKV("discovered_user_count", len(users))
   590  	level.Info(c.logger).Log("msg", "discovered users from bucket", "users", len(users))
   591  	c.compactionRunDiscoveredTenants.Set(float64(len(users)))
   592  
   593  	// When starting multiple compactor replicas nearly at the same time, running in a cluster with
   594  	// a large number of tenants, we may end up in a situation where the 1st user is compacted by
   595  	// multiple replicas at the same time. Shuffling users helps reduce the likelihood this will happen.
   596  	rand.Shuffle(len(users), func(i, j int) {
   597  		users[i], users[j] = users[j], users[i]
   598  	})
   599  
   600  	// Keep track of users owned by this shard, so that we can delete the local files for all other users.
   601  	ownedUsers := map[string]struct{}{}
   602  	defer func() {
   603  		sp.LogKV("owned_user_count", len(ownedUsers))
   604  	}()
   605  	for _, userID := range users {
   606  		// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
   607  		if ctx.Err() != nil {
   608  			level.Info(c.logger).Log("msg", "interrupting compaction of user blocks", "err", err)
   609  			return
   610  		}
   611  
   612  		// Ensure the user ID belongs to our shard.
   613  		if owned, err := c.shardingStrategy.compactorOwnUser(userID); err != nil {
   614  			c.compactionRunSkippedTenants.Inc()
   615  			level.Warn(c.logger).Log("msg", "unable to check if user is owned by this shard", "tenant", userID, "err", err)
   616  			continue
   617  		} else if !owned {
   618  			c.compactionRunSkippedTenants.Inc()
   619  			level.Debug(c.logger).Log("msg", "skipping user because it is not owned by this shard", "tenant", userID)
   620  			continue
   621  		}
   622  
   623  		ownedUsers[userID] = struct{}{}
   624  
   625  		if markedForDeletion, err := bucket.TenantDeletionMarkExists(ctx, c.bucketClient, userID); err != nil {
   626  			c.compactionRunSkippedTenants.Inc()
   627  			level.Warn(c.logger).Log("msg", "unable to check if user is marked for deletion", "tenant", userID, "err", err)
   628  			continue
   629  		} else if markedForDeletion {
   630  			c.compactionRunSkippedTenants.Inc()
   631  			level.Debug(c.logger).Log("msg", "skipping user because it is marked for deletion", "tenant", userID)
   632  			continue
   633  		}
   634  
   635  		level.Info(c.logger).Log("msg", "starting compaction of user blocks", "tenant", userID)
   636  
   637  		if err = c.compactUserWithRetries(ctx, userID); err != nil {
   638  			switch {
   639  			case errors.Is(err, context.Canceled):
   640  				// We don't want to count shutdowns as failed compactions because we will pick up with the rest of the compaction after the restart.
   641  				level.Info(c.logger).Log("msg", "compaction for user was interrupted by a shutdown", "tenant", userID)
   642  				return
   643  			default:
   644  				c.compactionRunFailedTenants.Inc()
   645  				compactionErrorCount++
   646  				level.Error(c.logger).Log("msg", "failed to compact user blocks", "tenant", userID, "err", err)
   647  			}
   648  			continue
   649  		}
   650  
   651  		c.compactionRunSucceededTenants.Inc()
   652  		level.Info(c.logger).Log("msg", "successfully compacted user blocks", "tenant", userID)
   653  	}
   654  
   655  	// Delete local files for unowned tenants, if there are any. This cleans up
   656  	// leftover local files for tenants that belong to different compactors now,
   657  	// or have been deleted completely.
   658  	for userID := range c.listTenantsWithMetaSyncDirectories() {
   659  		if _, owned := ownedUsers[userID]; owned {
   660  			continue
   661  		}
   662  
   663  		dir := c.metaSyncDirForUser(userID)
   664  		s, err := os.Stat(dir)
   665  		if err != nil {
   666  			if !os.IsNotExist(err) {
   667  				level.Warn(c.logger).Log("msg", "failed to stat local directory with user data", "dir", dir, "err", err)
   668  			}
   669  			continue
   670  		}
   671  
   672  		if s.IsDir() {
   673  			err := os.RemoveAll(dir)
   674  			if err == nil {
   675  				level.Info(c.logger).Log("msg", "deleted directory for user not owned by this shard", "dir", dir)
   676  			} else {
   677  				level.Warn(c.logger).Log("msg", "failed to delete directory for user not owned by this shard", "dir", dir, "err", err)
   678  			}
   679  		}
   680  	}
   681  
   682  	succeeded = true
   683  }
   684  
   685  func (c *MultitenantCompactor) compactUserWithRetries(ctx context.Context, userID string) error {
   686  	var lastErr error
   687  
   688  	retries := backoff.New(ctx, backoff.Config{
   689  		MinBackoff: c.compactorCfg.retryMinBackoff,
   690  		MaxBackoff: c.compactorCfg.retryMaxBackoff,
   691  		MaxRetries: c.compactorCfg.CompactionRetries,
   692  	})
   693  
   694  	for retries.Ongoing() {
   695  		sp, ctx := opentracing.StartSpanFromContext(ctx, "CompactUser", opentracing.Tag{Key: "tenantID", Value: userID})
   696  		lastErr = c.compactUser(ctx, userID)
   697  		if lastErr == nil {
   698  			sp.Finish()
   699  			return nil
   700  		}
   701  		ext.LogError(sp, lastErr)
   702  		sp.Finish()
   703  		retries.Wait()
   704  	}
   705  
   706  	return lastErr
   707  }
   708  
   709  func (c *MultitenantCompactor) compactUser(ctx context.Context, userID string) error {
   710  	userBucket := objstore.NewTenantBucketClient(userID, c.bucketClient, c.cfgProvider)
   711  	reg := prometheus.NewRegistry()
   712  	defer c.syncerMetrics.gatherThanosSyncerMetrics(reg)
   713  
   714  	userLogger := util.LoggerWithUserID(userID, c.logger)
   715  
   716  	// Filters out duplicate blocks that can be formed from two or more overlapping
   717  	// blocks that fully submatches the source blocks of the older blocks.
   718  	deduplicateBlocksFilter := NewShardAwareDeduplicateFilter()
   719  
   720  	// List of filters to apply (order matters).
   721  	fetcherFilters := []block.MetadataFilter{
   722  		deduplicateBlocksFilter,
   723  		// removes blocks that should not be compacted due to being marked so.
   724  		NewNoCompactionMarkFilter(userBucket, true),
   725  	}
   726  
   727  	fetcher, err := block.NewMetaFetcher(
   728  		userLogger,
   729  		c.compactorCfg.MetaSyncConcurrency,
   730  		userBucket,
   731  		c.metaSyncDirForUser(userID),
   732  		reg,
   733  		fetcherFilters,
   734  	)
   735  	if err != nil {
   736  		return err
   737  	}
   738  
   739  	syncer, err := NewMetaSyncer(
   740  		userLogger,
   741  		reg,
   742  		userBucket,
   743  		fetcher,
   744  		deduplicateBlocksFilter,
   745  		c.blocksMarkedForDeletion,
   746  	)
   747  	if err != nil {
   748  		return errors.Wrap(err, "failed to create syncer")
   749  	}
   750  
   751  	// Create blocks compactor dependencies.
   752  	blocksCompactor, err := c.blocksCompactorFactory(ctx, c.compactorCfg, c.cfgProvider, userID, c.logger, c.compactorMetrics)
   753  	if err != nil {
   754  		return errors.Wrap(err, "failed to initialize compactor dependencies")
   755  	}
   756  
   757  	compactor, err := NewBucketCompactor(
   758  		userLogger,
   759  		syncer,
   760  		c.blocksGrouperFactory(ctx, c.compactorCfg, c.cfgProvider, userID, userLogger, reg),
   761  		c.blocksPlanner,
   762  		blocksCompactor,
   763  		path.Join(c.compactorCfg.DataDir, "compact"),
   764  		userBucket,
   765  		c.compactorCfg.CompactionConcurrency,
   766  		c.shardingStrategy.ownJob,
   767  		c.jobsOrder,
   768  		c.compactorCfg.CompactionWaitPeriod,
   769  		c.compactorCfg.BlockSyncConcurrency,
   770  		c.bucketCompactorMetrics,
   771  	)
   772  	if err != nil {
   773  		return errors.Wrap(err, "failed to create bucket compactor")
   774  	}
   775  
   776  	if err := compactor.Compact(ctx, c.compactorCfg.MaxCompactionTime); err != nil {
   777  		return errors.Wrap(err, "compaction")
   778  	}
   779  
   780  	return nil
   781  }
   782  
   783  func (c *MultitenantCompactor) discoverUsersWithRetries(ctx context.Context) ([]string, error) {
   784  	sp, ctx := opentracing.StartSpanFromContext(ctx, "DiscoverUsers")
   785  	defer sp.Finish()
   786  
   787  	var lastErr error
   788  
   789  	retries := backoff.New(ctx, backoff.Config{
   790  		MinBackoff: c.compactorCfg.retryMinBackoff,
   791  		MaxBackoff: c.compactorCfg.retryMaxBackoff,
   792  		MaxRetries: c.compactorCfg.CompactionRetries,
   793  	})
   794  
   795  	for retries.Ongoing() {
   796  		var users []string
   797  
   798  		users, lastErr = c.discoverUsers(ctx)
   799  		if lastErr == nil {
   800  			return users, nil
   801  		}
   802  
   803  		retries.Wait()
   804  	}
   805  
   806  	return nil, lastErr
   807  }
   808  
   809  func (c *MultitenantCompactor) discoverUsers(ctx context.Context) ([]string, error) {
   810  	return bucket.ListUsers(ctx, c.bucketClient)
   811  }
   812  
   813  // shardingStrategy describes whether compactor "owns" given user or job.
   814  type shardingStrategy interface {
   815  	compactorOwnUser(userID string) (bool, error)
   816  	// blocksCleanerOwnUser must be concurrency-safe
   817  	blocksCleanerOwnUser(userID string) (bool, error)
   818  	ownJob(job *Job) (bool, error)
   819  }
   820  
   821  // splitAndMergeShardingStrategy is used by split-and-merge compactor when configured with sharding.
   822  // All compactors from user's shard own the user for compaction purposes, and plan jobs.
   823  // Each job is only owned and executed by single compactor.
   824  // Only one of compactors from user's shard will do cleanup.
   825  type splitAndMergeShardingStrategy struct {
   826  	allowedTenants *tenant.AllowedTenants
   827  	ring           *ring.Ring
   828  	ringLifecycler *ring.BasicLifecycler
   829  	configProvider ConfigProvider
   830  }
   831  
   832  func newSplitAndMergeShardingStrategy(allowedTenants *tenant.AllowedTenants, ring *ring.Ring, ringLifecycler *ring.BasicLifecycler, configProvider ConfigProvider) *splitAndMergeShardingStrategy {
   833  	return &splitAndMergeShardingStrategy{
   834  		allowedTenants: allowedTenants,
   835  		ring:           ring,
   836  		ringLifecycler: ringLifecycler,
   837  		configProvider: configProvider,
   838  	}
   839  }
   840  
   841  // Only single instance in the subring can run blocks cleaner for given user. blocksCleanerOwnUser is concurrency-safe.
   842  func (s *splitAndMergeShardingStrategy) blocksCleanerOwnUser(userID string) (bool, error) {
   843  	if !s.allowedTenants.IsAllowed(userID) {
   844  		return false, nil
   845  	}
   846  
   847  	r := s.ring.ShuffleShard(userID, s.configProvider.CompactorTenantShardSize(userID))
   848  
   849  	return instanceOwnsTokenInRing(r, s.ringLifecycler.GetInstanceAddr(), userID)
   850  }
   851  
   852  // ALL compactors should plan jobs for all users.
   853  func (s *splitAndMergeShardingStrategy) compactorOwnUser(userID string) (bool, error) {
   854  	if !s.allowedTenants.IsAllowed(userID) {
   855  		return false, nil
   856  	}
   857  
   858  	r := s.ring.ShuffleShard(userID, s.configProvider.CompactorTenantShardSize(userID))
   859  
   860  	return r.HasInstance(s.ringLifecycler.GetInstanceID()), nil
   861  }
   862  
   863  // Only single compactor should execute the job.
   864  func (s *splitAndMergeShardingStrategy) ownJob(job *Job) (bool, error) {
   865  	ok, err := s.compactorOwnUser(job.UserID())
   866  	if err != nil || !ok {
   867  		return ok, err
   868  	}
   869  
   870  	r := s.ring.ShuffleShard(job.UserID(), s.configProvider.CompactorTenantShardSize(job.UserID()))
   871  
   872  	return instanceOwnsTokenInRing(r, s.ringLifecycler.GetInstanceAddr(), job.ShardingKey())
   873  }
   874  
   875  func instanceOwnsTokenInRing(r ring.ReadRing, instanceAddr string, key string) (bool, error) {
   876  	// Hash the key.
   877  	hasher := fnv.New32a()
   878  	_, _ = hasher.Write([]byte(key))
   879  	hash := hasher.Sum32()
   880  
   881  	// Check whether this compactor instance owns the token.
   882  	rs, err := r.Get(hash, RingOp, nil, nil, nil)
   883  	if err != nil {
   884  		return false, err
   885  	}
   886  
   887  	if len(rs.Instances) != 1 {
   888  		return false, fmt.Errorf("unexpected number of compactors in the shard (expected 1, got %d)", len(rs.Instances))
   889  	}
   890  
   891  	return rs.Instances[0].Addr == instanceAddr, nil
   892  }
   893  
   894  const compactorMetaPrefix = "compactor-meta-"
   895  
   896  // metaSyncDirForUser returns directory to store cached meta files.
   897  // The fetcher stores cached metas in the "meta-syncer/" sub directory,
   898  // but we prefix it with "compactor-meta-" in order to guarantee no clashing with
   899  // the directory used by the Thanos Syncer, whatever is the user ID.
   900  func (c *MultitenantCompactor) metaSyncDirForUser(userID string) string {
   901  	return filepath.Join(c.compactorCfg.DataDir, compactorMetaPrefix+userID)
   902  }
   903  
   904  // This function returns tenants with meta sync directories found on local disk. On error, it returns nil map.
   905  func (c *MultitenantCompactor) listTenantsWithMetaSyncDirectories() map[string]struct{} {
   906  	result := map[string]struct{}{}
   907  
   908  	files, err := os.ReadDir(c.compactorCfg.DataDir)
   909  	if err != nil {
   910  		return nil
   911  	}
   912  
   913  	for _, f := range files {
   914  		if !f.IsDir() {
   915  			continue
   916  		}
   917  
   918  		if !strings.HasPrefix(f.Name(), compactorMetaPrefix) {
   919  			continue
   920  		}
   921  
   922  		result[f.Name()[len(compactorMetaPrefix):]] = struct{}{}
   923  	}
   924  
   925  	return result
   926  }