github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/compactor/compactor.go (about)

     1  package compactor
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"hash/fnv"
     8  	"io/ioutil"
     9  	"math/rand"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/go-kit/log"
    17  	"github.com/go-kit/log/level"
    18  	"github.com/grafana/dskit/backoff"
    19  	"github.com/grafana/dskit/flagext"
    20  	"github.com/grafana/dskit/ring"
    21  	"github.com/grafana/dskit/services"
    22  	"github.com/pkg/errors"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  	"github.com/prometheus/client_golang/prometheus/promauto"
    25  	"github.com/prometheus/prometheus/tsdb"
    26  	"github.com/thanos-io/thanos/pkg/block"
    27  	"github.com/thanos-io/thanos/pkg/block/metadata"
    28  	"github.com/thanos-io/thanos/pkg/compact"
    29  	"github.com/thanos-io/thanos/pkg/compact/downsample"
    30  	"github.com/thanos-io/thanos/pkg/objstore"
    31  
    32  	"github.com/cortexproject/cortex/pkg/storage/bucket"
    33  	cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
    34  	"github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex"
    35  	"github.com/cortexproject/cortex/pkg/util"
    36  	util_log "github.com/cortexproject/cortex/pkg/util/log"
    37  )
    38  
    39  const (
    40  	blocksMarkedForDeletionName = "cortex_compactor_blocks_marked_for_deletion_total"
    41  	blocksMarkedForDeletionHelp = "Total number of blocks marked for deletion in compactor."
    42  )
    43  
    44  var (
    45  	errInvalidBlockRanges = "compactor block range periods should be divisible by the previous one, but %s is not divisible by %s"
    46  	RingOp                = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil)
    47  
    48  	DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.Bucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion prometheus.Counter, garbageCollectedBlocks prometheus.Counter) compact.Grouper {
    49  		return compact.NewDefaultGrouper(
    50  			logger,
    51  			bkt,
    52  			false, // Do not accept malformed indexes
    53  			true,  // Enable vertical compaction
    54  			reg,
    55  			blocksMarkedForDeletion,
    56  			garbageCollectedBlocks,
    57  			prometheus.NewCounter(prometheus.CounterOpts{}),
    58  			metadata.NoneFunc)
    59  	}
    60  
    61  	DefaultBlocksCompactorFactory = func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, compact.Planner, error) {
    62  		compactor, err := tsdb.NewLeveledCompactor(ctx, reg, logger, cfg.BlockRanges.ToMilliseconds(), downsample.NewPool(), nil)
    63  		if err != nil {
    64  			return nil, nil, err
    65  		}
    66  
    67  		planner := compact.NewTSDBBasedPlanner(logger, cfg.BlockRanges.ToMilliseconds())
    68  		return compactor, planner, nil
    69  	}
    70  )
    71  
    72  // BlocksGrouperFactory builds and returns the grouper to use to compact a tenant's blocks.
    73  type BlocksGrouperFactory func(
    74  	ctx context.Context,
    75  	cfg Config,
    76  	bkt objstore.Bucket,
    77  	logger log.Logger,
    78  	reg prometheus.Registerer,
    79  	blocksMarkedForDeletion prometheus.Counter,
    80  	garbageCollectedBlocks prometheus.Counter,
    81  ) compact.Grouper
    82  
    83  // BlocksCompactorFactory builds and returns the compactor and planner to use to compact a tenant's blocks.
    84  type BlocksCompactorFactory func(
    85  	ctx context.Context,
    86  	cfg Config,
    87  	logger log.Logger,
    88  	reg prometheus.Registerer,
    89  ) (compact.Compactor, compact.Planner, error)
    90  
    91  // Config holds the Compactor config.
    92  type Config struct {
    93  	BlockRanges           cortex_tsdb.DurationList `yaml:"block_ranges"`
    94  	BlockSyncConcurrency  int                      `yaml:"block_sync_concurrency"`
    95  	MetaSyncConcurrency   int                      `yaml:"meta_sync_concurrency"`
    96  	ConsistencyDelay      time.Duration            `yaml:"consistency_delay"`
    97  	DataDir               string                   `yaml:"data_dir"`
    98  	CompactionInterval    time.Duration            `yaml:"compaction_interval"`
    99  	CompactionRetries     int                      `yaml:"compaction_retries"`
   100  	CompactionConcurrency int                      `yaml:"compaction_concurrency"`
   101  	CleanupInterval       time.Duration            `yaml:"cleanup_interval"`
   102  	CleanupConcurrency    int                      `yaml:"cleanup_concurrency"`
   103  	DeletionDelay         time.Duration            `yaml:"deletion_delay"`
   104  	TenantCleanupDelay    time.Duration            `yaml:"tenant_cleanup_delay"`
   105  
   106  	// Whether the migration of block deletion marks to the global markers location is enabled.
   107  	BlockDeletionMarksMigrationEnabled bool `yaml:"block_deletion_marks_migration_enabled"`
   108  
   109  	EnabledTenants  flagext.StringSliceCSV `yaml:"enabled_tenants"`
   110  	DisabledTenants flagext.StringSliceCSV `yaml:"disabled_tenants"`
   111  
   112  	// Compactors sharding.
   113  	ShardingEnabled bool       `yaml:"sharding_enabled"`
   114  	ShardingRing    RingConfig `yaml:"sharding_ring"`
   115  
   116  	// No need to add options to customize the retry backoff,
   117  	// given the defaults should be fine, but allow to override
   118  	// it in tests.
   119  	retryMinBackoff time.Duration `yaml:"-"`
   120  	retryMaxBackoff time.Duration `yaml:"-"`
   121  
   122  	// Allow downstream projects to customise the blocks compactor.
   123  	BlocksGrouperFactory   BlocksGrouperFactory   `yaml:"-"`
   124  	BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"`
   125  }
   126  
   127  // RegisterFlags registers the Compactor flags.
   128  func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
   129  	cfg.ShardingRing.RegisterFlags(f)
   130  
   131  	cfg.BlockRanges = cortex_tsdb.DurationList{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}
   132  	cfg.retryMinBackoff = 10 * time.Second
   133  	cfg.retryMaxBackoff = time.Minute
   134  
   135  	f.Var(&cfg.BlockRanges, "compactor.block-ranges", "List of compaction time ranges.")
   136  	f.DurationVar(&cfg.ConsistencyDelay, "compactor.consistency-delay", 0, fmt.Sprintf("Minimum age of fresh (non-compacted) blocks before they are being processed. Malformed blocks older than the maximum of consistency-delay and %s will be removed.", compact.PartialUploadThresholdAge))
   137  	f.IntVar(&cfg.BlockSyncConcurrency, "compactor.block-sync-concurrency", 20, "Number of Go routines to use when syncing block index and chunks files from the long term storage.")
   138  	f.IntVar(&cfg.MetaSyncConcurrency, "compactor.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from the long term storage.")
   139  	f.StringVar(&cfg.DataDir, "compactor.data-dir", "./data", "Data directory in which to cache blocks and process compactions")
   140  	f.DurationVar(&cfg.CompactionInterval, "compactor.compaction-interval", time.Hour, "The frequency at which the compaction runs")
   141  	f.IntVar(&cfg.CompactionRetries, "compactor.compaction-retries", 3, "How many times to retry a failed compaction within a single compaction run.")
   142  	f.IntVar(&cfg.CompactionConcurrency, "compactor.compaction-concurrency", 1, "Max number of concurrent compactions running.")
   143  	f.DurationVar(&cfg.CleanupInterval, "compactor.cleanup-interval", 15*time.Minute, "How frequently compactor should run blocks cleanup and maintenance, as well as update the bucket index.")
   144  	f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.")
   145  	f.BoolVar(&cfg.ShardingEnabled, "compactor.sharding-enabled", false, "Shard tenants across multiple compactor instances. Sharding is required if you run multiple compactor instances, in order to coordinate compactions and avoid race conditions leading to the same tenant blocks simultaneously compacted by different instances.")
   146  	f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+
   147  		"If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+
   148  		"If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.")
   149  	f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is time between deleting of last block, and doing final cleanup (marker files, debug files) of the tenant.")
   150  	f.BoolVar(&cfg.BlockDeletionMarksMigrationEnabled, "compactor.block-deletion-marks-migration-enabled", true, "When enabled, at compactor startup the bucket will be scanned and all found deletion marks inside the block location will be copied to the markers global location too. This option can (and should) be safely disabled as soon as the compactor has successfully run at least once.")
   151  
   152  	f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.")
   153  	f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.")
   154  }
   155  
   156  func (cfg *Config) Validate() error {
   157  	// Each block range period should be divisible by the previous one.
   158  	for i := 1; i < len(cfg.BlockRanges); i++ {
   159  		if cfg.BlockRanges[i]%cfg.BlockRanges[i-1] != 0 {
   160  			return errors.Errorf(errInvalidBlockRanges, cfg.BlockRanges[i].String(), cfg.BlockRanges[i-1].String())
   161  		}
   162  	}
   163  
   164  	return nil
   165  }
   166  
   167  // ConfigProvider defines the per-tenant config provider for the Compactor.
   168  type ConfigProvider interface {
   169  	bucket.TenantConfigProvider
   170  	CompactorBlocksRetentionPeriod(user string) time.Duration
   171  }
   172  
   173  // Compactor is a multi-tenant TSDB blocks compactor based on Thanos.
   174  type Compactor struct {
   175  	services.Service
   176  
   177  	compactorCfg   Config
   178  	storageCfg     cortex_tsdb.BlocksStorageConfig
   179  	cfgProvider    ConfigProvider
   180  	logger         log.Logger
   181  	parentLogger   log.Logger
   182  	registerer     prometheus.Registerer
   183  	allowedTenants *util.AllowedTenants
   184  
   185  	// Functions that creates bucket client, grouper, planner and compactor using the context.
   186  	// Useful for injecting mock objects from tests.
   187  	bucketClientFactory    func(ctx context.Context) (objstore.Bucket, error)
   188  	blocksGrouperFactory   BlocksGrouperFactory
   189  	blocksCompactorFactory BlocksCompactorFactory
   190  
   191  	// Users scanner, used to discover users from the bucket.
   192  	usersScanner *cortex_tsdb.UsersScanner
   193  
   194  	// Blocks cleaner is responsible to hard delete blocks marked for deletion.
   195  	blocksCleaner *BlocksCleaner
   196  
   197  	// Underlying compactor and planner used to compact TSDB blocks.
   198  	blocksCompactor compact.Compactor
   199  	blocksPlanner   compact.Planner
   200  
   201  	// Client used to run operations on the bucket storing blocks.
   202  	bucketClient objstore.Bucket
   203  
   204  	// Ring used for sharding compactions.
   205  	ringLifecycler         *ring.Lifecycler
   206  	ring                   *ring.Ring
   207  	ringSubservices        *services.Manager
   208  	ringSubservicesWatcher *services.FailureWatcher
   209  
   210  	// Metrics.
   211  	compactionRunsStarted          prometheus.Counter
   212  	compactionRunsCompleted        prometheus.Counter
   213  	compactionRunsFailed           prometheus.Counter
   214  	compactionRunsLastSuccess      prometheus.Gauge
   215  	compactionRunDiscoveredTenants prometheus.Gauge
   216  	compactionRunSkippedTenants    prometheus.Gauge
   217  	compactionRunSucceededTenants  prometheus.Gauge
   218  	compactionRunFailedTenants     prometheus.Gauge
   219  	compactionRunInterval          prometheus.Gauge
   220  	blocksMarkedForDeletion        prometheus.Counter
   221  	garbageCollectedBlocks         prometheus.Counter
   222  
   223  	// TSDB syncer metrics
   224  	syncerMetrics *syncerMetrics
   225  }
   226  
   227  // NewCompactor makes a new Compactor.
   228  func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider ConfigProvider, logger log.Logger, registerer prometheus.Registerer) (*Compactor, error) {
   229  	bucketClientFactory := func(ctx context.Context) (objstore.Bucket, error) {
   230  		return bucket.NewClient(ctx, storageCfg.Bucket, "compactor", logger, registerer)
   231  	}
   232  
   233  	blocksGrouperFactory := compactorCfg.BlocksGrouperFactory
   234  	if blocksGrouperFactory == nil {
   235  		blocksGrouperFactory = DefaultBlocksGrouperFactory
   236  	}
   237  
   238  	blocksCompactorFactory := compactorCfg.BlocksCompactorFactory
   239  	if blocksCompactorFactory == nil {
   240  		blocksCompactorFactory = DefaultBlocksCompactorFactory
   241  	}
   242  
   243  	cortexCompactor, err := newCompactor(compactorCfg, storageCfg, cfgProvider, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory)
   244  	if err != nil {
   245  		return nil, errors.Wrap(err, "failed to create Cortex blocks compactor")
   246  	}
   247  
   248  	return cortexCompactor, nil
   249  }
   250  
   251  func newCompactor(
   252  	compactorCfg Config,
   253  	storageCfg cortex_tsdb.BlocksStorageConfig,
   254  	cfgProvider ConfigProvider,
   255  	logger log.Logger,
   256  	registerer prometheus.Registerer,
   257  	bucketClientFactory func(ctx context.Context) (objstore.Bucket, error),
   258  	blocksGrouperFactory BlocksGrouperFactory,
   259  	blocksCompactorFactory BlocksCompactorFactory,
   260  ) (*Compactor, error) {
   261  	c := &Compactor{
   262  		compactorCfg:           compactorCfg,
   263  		storageCfg:             storageCfg,
   264  		cfgProvider:            cfgProvider,
   265  		parentLogger:           logger,
   266  		logger:                 log.With(logger, "component", "compactor"),
   267  		registerer:             registerer,
   268  		syncerMetrics:          newSyncerMetrics(registerer),
   269  		bucketClientFactory:    bucketClientFactory,
   270  		blocksGrouperFactory:   blocksGrouperFactory,
   271  		blocksCompactorFactory: blocksCompactorFactory,
   272  		allowedTenants:         util.NewAllowedTenants(compactorCfg.EnabledTenants, compactorCfg.DisabledTenants),
   273  
   274  		compactionRunsStarted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   275  			Name: "cortex_compactor_runs_started_total",
   276  			Help: "Total number of compaction runs started.",
   277  		}),
   278  		compactionRunsCompleted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   279  			Name: "cortex_compactor_runs_completed_total",
   280  			Help: "Total number of compaction runs successfully completed.",
   281  		}),
   282  		compactionRunsFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   283  			Name: "cortex_compactor_runs_failed_total",
   284  			Help: "Total number of compaction runs failed.",
   285  		}),
   286  		compactionRunsLastSuccess: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   287  			Name: "cortex_compactor_last_successful_run_timestamp_seconds",
   288  			Help: "Unix timestamp of the last successful compaction run.",
   289  		}),
   290  		compactionRunDiscoveredTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   291  			Name: "cortex_compactor_tenants_discovered",
   292  			Help: "Number of tenants discovered during the current compaction run. Reset to 0 when compactor is idle.",
   293  		}),
   294  		compactionRunSkippedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   295  			Name: "cortex_compactor_tenants_skipped",
   296  			Help: "Number of tenants skipped during the current compaction run. Reset to 0 when compactor is idle.",
   297  		}),
   298  		compactionRunSucceededTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   299  			Name: "cortex_compactor_tenants_processing_succeeded",
   300  			Help: "Number of tenants successfully processed during the current compaction run. Reset to 0 when compactor is idle.",
   301  		}),
   302  		compactionRunFailedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   303  			Name: "cortex_compactor_tenants_processing_failed",
   304  			Help: "Number of tenants failed processing during the current compaction run. Reset to 0 when compactor is idle.",
   305  		}),
   306  		compactionRunInterval: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{
   307  			Name: "cortex_compactor_compaction_interval_seconds",
   308  			Help: "The configured interval on which compaction is run in seconds. Useful when compared to the last successful run metric to accurately detect multiple failed compaction runs.",
   309  		}),
   310  		blocksMarkedForDeletion: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   311  			Name:        blocksMarkedForDeletionName,
   312  			Help:        blocksMarkedForDeletionHelp,
   313  			ConstLabels: prometheus.Labels{"reason": "compaction"},
   314  		}),
   315  		garbageCollectedBlocks: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
   316  			Name: "cortex_compactor_garbage_collected_blocks_total",
   317  			Help: "Total number of blocks marked for deletion by compactor.",
   318  		}),
   319  	}
   320  
   321  	if len(compactorCfg.EnabledTenants) > 0 {
   322  		level.Info(c.logger).Log("msg", "compactor using enabled users", "enabled", strings.Join(compactorCfg.EnabledTenants, ", "))
   323  	}
   324  	if len(compactorCfg.DisabledTenants) > 0 {
   325  		level.Info(c.logger).Log("msg", "compactor using disabled users", "disabled", strings.Join(compactorCfg.DisabledTenants, ", "))
   326  	}
   327  
   328  	c.Service = services.NewBasicService(c.starting, c.running, c.stopping)
   329  
   330  	// The last successful compaction run metric is exposed as seconds since epoch, so we need to use seconds for this metric.
   331  	c.compactionRunInterval.Set(c.compactorCfg.CompactionInterval.Seconds())
   332  
   333  	return c, nil
   334  }
   335  
   336  // Start the compactor.
   337  func (c *Compactor) starting(ctx context.Context) error {
   338  	var err error
   339  
   340  	// Create bucket client.
   341  	c.bucketClient, err = c.bucketClientFactory(ctx)
   342  	if err != nil {
   343  		return errors.Wrap(err, "failed to create bucket client")
   344  	}
   345  
   346  	// Create blocks compactor dependencies.
   347  	c.blocksCompactor, c.blocksPlanner, err = c.blocksCompactorFactory(ctx, c.compactorCfg, c.logger, c.registerer)
   348  	if err != nil {
   349  		return errors.Wrap(err, "failed to initialize compactor dependencies")
   350  	}
   351  
   352  	// Wrap the bucket client to write block deletion marks in the global location too.
   353  	c.bucketClient = bucketindex.BucketWithGlobalMarkers(c.bucketClient)
   354  
   355  	// Create the users scanner.
   356  	c.usersScanner = cortex_tsdb.NewUsersScanner(c.bucketClient, c.ownUser, c.parentLogger)
   357  
   358  	// Create the blocks cleaner (service).
   359  	c.blocksCleaner = NewBlocksCleaner(BlocksCleanerConfig{
   360  		DeletionDelay:                      c.compactorCfg.DeletionDelay,
   361  		CleanupInterval:                    util.DurationWithJitter(c.compactorCfg.CleanupInterval, 0.1),
   362  		CleanupConcurrency:                 c.compactorCfg.CleanupConcurrency,
   363  		BlockDeletionMarksMigrationEnabled: c.compactorCfg.BlockDeletionMarksMigrationEnabled,
   364  		TenantCleanupDelay:                 c.compactorCfg.TenantCleanupDelay,
   365  	}, c.bucketClient, c.usersScanner, c.cfgProvider, c.parentLogger, c.registerer)
   366  
   367  	// Initialize the compactors ring if sharding is enabled.
   368  	if c.compactorCfg.ShardingEnabled {
   369  		lifecyclerCfg := c.compactorCfg.ShardingRing.ToLifecyclerConfig()
   370  		c.ringLifecycler, err = ring.NewLifecycler(lifecyclerCfg, ring.NewNoopFlushTransferer(), "compactor", ring.CompactorRingKey, false, c.logger, prometheus.WrapRegistererWithPrefix("cortex_", c.registerer))
   371  		if err != nil {
   372  			return errors.Wrap(err, "unable to initialize compactor ring lifecycler")
   373  		}
   374  
   375  		c.ring, err = ring.New(lifecyclerCfg.RingConfig, "compactor", ring.CompactorRingKey, c.logger, prometheus.WrapRegistererWithPrefix("cortex_", c.registerer))
   376  		if err != nil {
   377  			return errors.Wrap(err, "unable to initialize compactor ring")
   378  		}
   379  
   380  		c.ringSubservices, err = services.NewManager(c.ringLifecycler, c.ring)
   381  		if err == nil {
   382  			c.ringSubservicesWatcher = services.NewFailureWatcher()
   383  			c.ringSubservicesWatcher.WatchManager(c.ringSubservices)
   384  
   385  			err = services.StartManagerAndAwaitHealthy(ctx, c.ringSubservices)
   386  		}
   387  
   388  		if err != nil {
   389  			return errors.Wrap(err, "unable to start compactor ring dependencies")
   390  		}
   391  
   392  		// If sharding is enabled we should wait until this instance is
   393  		// ACTIVE within the ring. This MUST be done before starting the
   394  		// any other component depending on the users scanner, because the
   395  		// users scanner depends on the ring (to check whether an user belongs
   396  		// to this shard or not).
   397  		level.Info(c.logger).Log("msg", "waiting until compactor is ACTIVE in the ring")
   398  
   399  		ctxWithTimeout, cancel := context.WithTimeout(ctx, c.compactorCfg.ShardingRing.WaitActiveInstanceTimeout)
   400  		defer cancel()
   401  		if err := ring.WaitInstanceState(ctxWithTimeout, c.ring, c.ringLifecycler.ID, ring.ACTIVE); err != nil {
   402  			level.Error(c.logger).Log("msg", "compactor failed to become ACTIVE in the ring", "err", err)
   403  			return err
   404  		}
   405  		level.Info(c.logger).Log("msg", "compactor is ACTIVE in the ring")
   406  
   407  		// In the event of a cluster cold start or scale up of 2+ compactor instances at the same
   408  		// time, we may end up in a situation where each new compactor instance starts at a slightly
   409  		// different time and thus each one starts with a different state of the ring. It's better
   410  		// to just wait the ring stability for a short time.
   411  		if c.compactorCfg.ShardingRing.WaitStabilityMinDuration > 0 {
   412  			minWaiting := c.compactorCfg.ShardingRing.WaitStabilityMinDuration
   413  			maxWaiting := c.compactorCfg.ShardingRing.WaitStabilityMaxDuration
   414  
   415  			level.Info(c.logger).Log("msg", "waiting until compactor ring topology is stable", "min_waiting", minWaiting.String(), "max_waiting", maxWaiting.String())
   416  			if err := ring.WaitRingStability(ctx, c.ring, RingOp, minWaiting, maxWaiting); err != nil {
   417  				level.Warn(c.logger).Log("msg", "compactor ring topology is not stable after the max waiting time, proceeding anyway")
   418  			} else {
   419  				level.Info(c.logger).Log("msg", "compactor ring topology is stable")
   420  			}
   421  		}
   422  	}
   423  
   424  	// Ensure an initial cleanup occurred before starting the compactor.
   425  	if err := services.StartAndAwaitRunning(ctx, c.blocksCleaner); err != nil {
   426  		c.ringSubservices.StopAsync()
   427  		return errors.Wrap(err, "failed to start the blocks cleaner")
   428  	}
   429  
   430  	return nil
   431  }
   432  
   433  func (c *Compactor) stopping(_ error) error {
   434  	ctx := context.Background()
   435  
   436  	services.StopAndAwaitTerminated(ctx, c.blocksCleaner) //nolint:errcheck
   437  	if c.ringSubservices != nil {
   438  		return services.StopManagerAndAwaitStopped(ctx, c.ringSubservices)
   439  	}
   440  	return nil
   441  }
   442  
   443  func (c *Compactor) running(ctx context.Context) error {
   444  	// Run an initial compaction before starting the interval.
   445  	c.compactUsers(ctx)
   446  
   447  	ticker := time.NewTicker(util.DurationWithJitter(c.compactorCfg.CompactionInterval, 0.05))
   448  	defer ticker.Stop()
   449  
   450  	for {
   451  		select {
   452  		case <-ticker.C:
   453  			c.compactUsers(ctx)
   454  		case <-ctx.Done():
   455  			return nil
   456  		case err := <-c.ringSubservicesWatcher.Chan():
   457  			return errors.Wrap(err, "compactor subservice failed")
   458  		}
   459  	}
   460  }
   461  
   462  func (c *Compactor) compactUsers(ctx context.Context) {
   463  	succeeded := false
   464  	compactionErrorCount := 0
   465  
   466  	c.compactionRunsStarted.Inc()
   467  
   468  	defer func() {
   469  		if succeeded && compactionErrorCount == 0 {
   470  			c.compactionRunsCompleted.Inc()
   471  			c.compactionRunsLastSuccess.SetToCurrentTime()
   472  		} else {
   473  			c.compactionRunsFailed.Inc()
   474  		}
   475  
   476  		// Reset progress metrics once done.
   477  		c.compactionRunDiscoveredTenants.Set(0)
   478  		c.compactionRunSkippedTenants.Set(0)
   479  		c.compactionRunSucceededTenants.Set(0)
   480  		c.compactionRunFailedTenants.Set(0)
   481  	}()
   482  
   483  	level.Info(c.logger).Log("msg", "discovering users from bucket")
   484  	users, err := c.discoverUsersWithRetries(ctx)
   485  	if err != nil {
   486  		level.Error(c.logger).Log("msg", "failed to discover users from bucket", "err", err)
   487  		return
   488  	}
   489  
   490  	level.Info(c.logger).Log("msg", "discovered users from bucket", "users", len(users))
   491  	c.compactionRunDiscoveredTenants.Set(float64(len(users)))
   492  
   493  	// When starting multiple compactor replicas nearly at the same time, running in a cluster with
   494  	// a large number of tenants, we may end up in a situation where the 1st user is compacted by
   495  	// multiple replicas at the same time. Shuffling users helps reduce the likelihood this will happen.
   496  	rand.Shuffle(len(users), func(i, j int) {
   497  		users[i], users[j] = users[j], users[i]
   498  	})
   499  
   500  	// Keep track of users owned by this shard, so that we can delete the local files for all other users.
   501  	ownedUsers := map[string]struct{}{}
   502  	for _, userID := range users {
   503  		// Ensure the context has not been canceled (ie. compactor shutdown has been triggered).
   504  		if ctx.Err() != nil {
   505  			level.Info(c.logger).Log("msg", "interrupting compaction of user blocks", "err", err)
   506  			return
   507  		}
   508  
   509  		// Ensure the user ID belongs to our shard.
   510  		if owned, err := c.ownUser(userID); err != nil {
   511  			c.compactionRunSkippedTenants.Inc()
   512  			level.Warn(c.logger).Log("msg", "unable to check if user is owned by this shard", "user", userID, "err", err)
   513  			continue
   514  		} else if !owned {
   515  			c.compactionRunSkippedTenants.Inc()
   516  			level.Debug(c.logger).Log("msg", "skipping user because it is not owned by this shard", "user", userID)
   517  			continue
   518  		}
   519  
   520  		ownedUsers[userID] = struct{}{}
   521  
   522  		if markedForDeletion, err := cortex_tsdb.TenantDeletionMarkExists(ctx, c.bucketClient, userID); err != nil {
   523  			c.compactionRunSkippedTenants.Inc()
   524  			level.Warn(c.logger).Log("msg", "unable to check if user is marked for deletion", "user", userID, "err", err)
   525  			continue
   526  		} else if markedForDeletion {
   527  			c.compactionRunSkippedTenants.Inc()
   528  			level.Debug(c.logger).Log("msg", "skipping user because it is marked for deletion", "user", userID)
   529  			continue
   530  		}
   531  
   532  		level.Info(c.logger).Log("msg", "starting compaction of user blocks", "user", userID)
   533  
   534  		if err = c.compactUserWithRetries(ctx, userID); err != nil {
   535  			c.compactionRunFailedTenants.Inc()
   536  			compactionErrorCount++
   537  			level.Error(c.logger).Log("msg", "failed to compact user blocks", "user", userID, "err", err)
   538  			continue
   539  		}
   540  
   541  		c.compactionRunSucceededTenants.Inc()
   542  		level.Info(c.logger).Log("msg", "successfully compacted user blocks", "user", userID)
   543  	}
   544  
   545  	// Delete local files for unowned tenants, if there are any. This cleans up
   546  	// leftover local files for tenants that belong to different compactors now,
   547  	// or have been deleted completely.
   548  	for userID := range c.listTenantsWithMetaSyncDirectories() {
   549  		if _, owned := ownedUsers[userID]; owned {
   550  			continue
   551  		}
   552  
   553  		dir := c.metaSyncDirForUser(userID)
   554  		s, err := os.Stat(dir)
   555  		if err != nil {
   556  			if !os.IsNotExist(err) {
   557  				level.Warn(c.logger).Log("msg", "failed to stat local directory with user data", "dir", dir, "err", err)
   558  			}
   559  			continue
   560  		}
   561  
   562  		if s.IsDir() {
   563  			err := os.RemoveAll(dir)
   564  			if err == nil {
   565  				level.Info(c.logger).Log("msg", "deleted directory for user not owned by this shard", "dir", dir)
   566  			} else {
   567  				level.Warn(c.logger).Log("msg", "failed to delete directory for user not owned by this shard", "dir", dir, "err", err)
   568  			}
   569  		}
   570  	}
   571  
   572  	succeeded = true
   573  }
   574  
   575  func (c *Compactor) compactUserWithRetries(ctx context.Context, userID string) error {
   576  	var lastErr error
   577  
   578  	retries := backoff.New(ctx, backoff.Config{
   579  		MinBackoff: c.compactorCfg.retryMinBackoff,
   580  		MaxBackoff: c.compactorCfg.retryMaxBackoff,
   581  		MaxRetries: c.compactorCfg.CompactionRetries,
   582  	})
   583  
   584  	for retries.Ongoing() {
   585  		lastErr = c.compactUser(ctx, userID)
   586  		if lastErr == nil {
   587  			return nil
   588  		}
   589  
   590  		retries.Wait()
   591  	}
   592  
   593  	return lastErr
   594  }
   595  
   596  func (c *Compactor) compactUser(ctx context.Context, userID string) error {
   597  	bucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider)
   598  	reg := prometheus.NewRegistry()
   599  	defer c.syncerMetrics.gatherThanosSyncerMetrics(reg)
   600  
   601  	ulogger := util_log.WithUserID(userID, c.logger)
   602  
   603  	// Filters out duplicate blocks that can be formed from two or more overlapping
   604  	// blocks that fully submatches the source blocks of the older blocks.
   605  	deduplicateBlocksFilter := block.NewDeduplicateFilter()
   606  
   607  	// While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter.
   608  	// No delay is used -- all blocks with deletion marker are ignored, and not considered for compaction.
   609  	ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(
   610  		ulogger,
   611  		bucket,
   612  		0,
   613  		c.compactorCfg.MetaSyncConcurrency)
   614  
   615  	fetcher, err := block.NewMetaFetcher(
   616  		ulogger,
   617  		c.compactorCfg.MetaSyncConcurrency,
   618  		bucket,
   619  		c.metaSyncDirForUser(userID),
   620  		reg,
   621  		// List of filters to apply (order matters).
   622  		[]block.MetadataFilter{
   623  			// Remove the ingester ID because we don't shard blocks anymore, while still
   624  			// honoring the shard ID if sharding was done in the past.
   625  			NewLabelRemoverFilter([]string{cortex_tsdb.IngesterIDExternalLabel}),
   626  			block.NewConsistencyDelayMetaFilter(ulogger, c.compactorCfg.ConsistencyDelay, reg),
   627  			ignoreDeletionMarkFilter,
   628  			deduplicateBlocksFilter,
   629  		},
   630  		nil,
   631  	)
   632  	if err != nil {
   633  		return err
   634  	}
   635  
   636  	syncer, err := compact.NewMetaSyncer(
   637  		ulogger,
   638  		reg,
   639  		bucket,
   640  		fetcher,
   641  		deduplicateBlocksFilter,
   642  		ignoreDeletionMarkFilter,
   643  		c.blocksMarkedForDeletion,
   644  		c.garbageCollectedBlocks,
   645  		c.compactorCfg.BlockSyncConcurrency,
   646  	)
   647  	if err != nil {
   648  		return errors.Wrap(err, "failed to create syncer")
   649  	}
   650  
   651  	compactor, err := compact.NewBucketCompactor(
   652  		ulogger,
   653  		syncer,
   654  		c.blocksGrouperFactory(ctx, c.compactorCfg, bucket, ulogger, reg, c.blocksMarkedForDeletion, c.garbageCollectedBlocks),
   655  		c.blocksPlanner,
   656  		c.blocksCompactor,
   657  		path.Join(c.compactorCfg.DataDir, "compact"),
   658  		bucket,
   659  		c.compactorCfg.CompactionConcurrency,
   660  		false,
   661  	)
   662  	if err != nil {
   663  		return errors.Wrap(err, "failed to create bucket compactor")
   664  	}
   665  
   666  	if err := compactor.Compact(ctx); err != nil {
   667  		return errors.Wrap(err, "compaction")
   668  	}
   669  
   670  	return nil
   671  }
   672  
   673  func (c *Compactor) discoverUsersWithRetries(ctx context.Context) ([]string, error) {
   674  	var lastErr error
   675  
   676  	retries := backoff.New(ctx, backoff.Config{
   677  		MinBackoff: c.compactorCfg.retryMinBackoff,
   678  		MaxBackoff: c.compactorCfg.retryMaxBackoff,
   679  		MaxRetries: c.compactorCfg.CompactionRetries,
   680  	})
   681  
   682  	for retries.Ongoing() {
   683  		var users []string
   684  
   685  		users, lastErr = c.discoverUsers(ctx)
   686  		if lastErr == nil {
   687  			return users, nil
   688  		}
   689  
   690  		retries.Wait()
   691  	}
   692  
   693  	return nil, lastErr
   694  }
   695  
   696  func (c *Compactor) discoverUsers(ctx context.Context) ([]string, error) {
   697  	var users []string
   698  
   699  	err := c.bucketClient.Iter(ctx, "", func(entry string) error {
   700  		users = append(users, strings.TrimSuffix(entry, "/"))
   701  		return nil
   702  	})
   703  
   704  	return users, err
   705  }
   706  
   707  func (c *Compactor) ownUser(userID string) (bool, error) {
   708  	if !c.allowedTenants.IsAllowed(userID) {
   709  		return false, nil
   710  	}
   711  
   712  	// Always owned if sharding is disabled.
   713  	if !c.compactorCfg.ShardingEnabled {
   714  		return true, nil
   715  	}
   716  
   717  	// Hash the user ID.
   718  	hasher := fnv.New32a()
   719  	_, _ = hasher.Write([]byte(userID))
   720  	userHash := hasher.Sum32()
   721  
   722  	// Check whether this compactor instance owns the user.
   723  	rs, err := c.ring.Get(userHash, RingOp, nil, nil, nil)
   724  	if err != nil {
   725  		return false, err
   726  	}
   727  
   728  	if len(rs.Instances) != 1 {
   729  		return false, fmt.Errorf("unexpected number of compactors in the shard (expected 1, got %d)", len(rs.Instances))
   730  	}
   731  
   732  	return rs.Instances[0].Addr == c.ringLifecycler.Addr, nil
   733  }
   734  
   735  const compactorMetaPrefix = "compactor-meta-"
   736  
   737  // metaSyncDirForUser returns directory to store cached meta files.
   738  // The fetcher stores cached metas in the "meta-syncer/" sub directory,
   739  // but we prefix it with "compactor-meta-" in order to guarantee no clashing with
   740  // the directory used by the Thanos Syncer, whatever is the user ID.
   741  func (c *Compactor) metaSyncDirForUser(userID string) string {
   742  	return filepath.Join(c.compactorCfg.DataDir, compactorMetaPrefix+userID)
   743  }
   744  
   745  // This function returns tenants with meta sync directories found on local disk. On error, it returns nil map.
   746  func (c *Compactor) listTenantsWithMetaSyncDirectories() map[string]struct{} {
   747  	result := map[string]struct{}{}
   748  
   749  	files, err := ioutil.ReadDir(c.compactorCfg.DataDir)
   750  	if err != nil {
   751  		return nil
   752  	}
   753  
   754  	for _, f := range files {
   755  		if !f.IsDir() {
   756  			continue
   757  		}
   758  
   759  		if !strings.HasPrefix(f.Name(), compactorMetaPrefix) {
   760  			continue
   761  		}
   762  
   763  		result[f.Name()[len(compactorMetaPrefix):]] = struct{}{}
   764  	}
   765  
   766  	return result
   767  }