github.com/thanos-io/thanos@v0.32.5/cmd/thanos/compact.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package main
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"path"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/alecthomas/units"
    17  	extflag "github.com/efficientgo/tools/extkingpin"
    18  	"github.com/go-kit/log"
    19  	"github.com/go-kit/log/level"
    20  	"github.com/oklog/run"
    21  	"github.com/opentracing/opentracing-go"
    22  	"github.com/pkg/errors"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  	"github.com/prometheus/client_golang/prometheus/promauto"
    25  	"github.com/prometheus/common/model"
    26  	"github.com/prometheus/common/route"
    27  	"github.com/prometheus/prometheus/storage"
    28  	"github.com/prometheus/prometheus/tsdb"
    29  
    30  	"github.com/thanos-io/objstore"
    31  	"github.com/thanos-io/objstore/client"
    32  	objstoretracing "github.com/thanos-io/objstore/tracing/opentracing"
    33  
    34  	blocksAPI "github.com/thanos-io/thanos/pkg/api/blocks"
    35  	"github.com/thanos-io/thanos/pkg/block"
    36  	"github.com/thanos-io/thanos/pkg/block/metadata"
    37  	"github.com/thanos-io/thanos/pkg/compact"
    38  	"github.com/thanos-io/thanos/pkg/compact/downsample"
    39  	"github.com/thanos-io/thanos/pkg/component"
    40  	"github.com/thanos-io/thanos/pkg/dedup"
    41  	"github.com/thanos-io/thanos/pkg/extkingpin"
    42  	"github.com/thanos-io/thanos/pkg/extprom"
    43  	extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http"
    44  	"github.com/thanos-io/thanos/pkg/logging"
    45  	"github.com/thanos-io/thanos/pkg/prober"
    46  	"github.com/thanos-io/thanos/pkg/runutil"
    47  	httpserver "github.com/thanos-io/thanos/pkg/server/http"
    48  	"github.com/thanos-io/thanos/pkg/store"
    49  	"github.com/thanos-io/thanos/pkg/tracing"
    50  	"github.com/thanos-io/thanos/pkg/ui"
    51  )
    52  
    53  var (
    54  	compactions = compactionSet{
    55  		1 * time.Hour,
    56  		2 * time.Hour,
    57  		8 * time.Hour,
    58  		2 * 24 * time.Hour,
    59  		14 * 24 * time.Hour,
    60  	}
    61  )
    62  
    63  type compactionSet []time.Duration
    64  
    65  func (cs compactionSet) String() string {
    66  	result := make([]string, len(cs))
    67  	for i, c := range cs {
    68  		result[i] = fmt.Sprintf("%d=%dh", i, int(c.Hours()))
    69  	}
    70  	return strings.Join(result, ", ")
    71  }
    72  
    73  // levels returns set of compaction levels not higher than specified max compaction level.
    74  func (cs compactionSet) levels(maxLevel int) ([]int64, error) {
    75  	if maxLevel >= len(cs) {
    76  		return nil, errors.Errorf("level is bigger then default set of %d", len(cs))
    77  	}
    78  
    79  	levels := make([]int64, maxLevel+1)
    80  	for i, c := range cs[:maxLevel+1] {
    81  		levels[i] = int64(c / time.Millisecond)
    82  	}
    83  	return levels, nil
    84  }
    85  
    86  // maxLevel returns max available compaction level.
    87  func (cs compactionSet) maxLevel() int {
    88  	return len(cs) - 1
    89  }
    90  
    91  func registerCompact(app *extkingpin.App) {
    92  	cmd := app.Command(component.Compact.String(), "Continuously compacts blocks in an object store bucket.")
    93  	conf := &compactConfig{}
    94  	conf.registerFlag(cmd)
    95  
    96  	cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error {
    97  		return runCompact(g, logger, tracer, reg, component.Compact, *conf, getFlagsMap(cmd.Flags()))
    98  	})
    99  }
   100  
   101  type compactMetrics struct {
   102  	halted                      prometheus.Gauge
   103  	retried                     prometheus.Counter
   104  	iterations                  prometheus.Counter
   105  	cleanups                    prometheus.Counter
   106  	partialUploadDeleteAttempts prometheus.Counter
   107  	blocksCleaned               prometheus.Counter
   108  	blockCleanupFailures        prometheus.Counter
   109  	blocksMarked                *prometheus.CounterVec
   110  	garbageCollectedBlocks      prometheus.Counter
   111  }
   112  
   113  func newCompactMetrics(reg *prometheus.Registry, deleteDelay time.Duration) *compactMetrics {
   114  	_ = promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{
   115  		Name: "thanos_delete_delay_seconds",
   116  		Help: "Configured delete delay in seconds.",
   117  	}, func() float64 {
   118  		return deleteDelay.Seconds()
   119  	})
   120  
   121  	m := &compactMetrics{}
   122  
   123  	m.halted = promauto.With(reg).NewGauge(prometheus.GaugeOpts{
   124  		Name: "thanos_compact_halted",
   125  		Help: "Set to 1 if the compactor halted due to an unexpected error.",
   126  	})
   127  	m.halted.Set(0)
   128  	m.retried = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   129  		Name: "thanos_compact_retries_total",
   130  		Help: "Total number of retries after retriable compactor error.",
   131  	})
   132  	m.iterations = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   133  		Name: "thanos_compact_iterations_total",
   134  		Help: "Total number of iterations that were executed successfully.",
   135  	})
   136  	m.cleanups = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   137  		Name: "thanos_compact_block_cleanup_loops_total",
   138  		Help: "Total number of concurrent cleanup loops of partially uploaded blocks and marked blocks that were executed successfully.",
   139  	})
   140  	m.partialUploadDeleteAttempts = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   141  		Name: "thanos_compact_aborted_partial_uploads_deletion_attempts_total",
   142  		Help: "Total number of started deletions of blocks that are assumed aborted and only partially uploaded.",
   143  	})
   144  	m.blocksCleaned = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   145  		Name: "thanos_compact_blocks_cleaned_total",
   146  		Help: "Total number of blocks deleted in compactor.",
   147  	})
   148  	m.blockCleanupFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   149  		Name: "thanos_compact_block_cleanup_failures_total",
   150  		Help: "Failures encountered while deleting blocks in compactor.",
   151  	})
   152  	m.blocksMarked = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
   153  		Name: "thanos_compact_blocks_marked_total",
   154  		Help: "Total number of blocks marked in compactor.",
   155  	}, []string{"marker", "reason"})
   156  	m.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason)
   157  	m.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.IndexSizeExceedingNoCompactReason)
   158  	m.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")
   159  
   160  	m.garbageCollectedBlocks = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   161  		Name: "thanos_compact_garbage_collected_blocks_total",
   162  		Help: "Total number of blocks marked for deletion by compactor.",
   163  	})
   164  	return m
   165  }
   166  
   167  func runCompact(
   168  	g *run.Group,
   169  	logger log.Logger,
   170  	tracer opentracing.Tracer,
   171  	reg *prometheus.Registry,
   172  	component component.Component,
   173  	conf compactConfig,
   174  	flagsMap map[string]string,
   175  ) (rerr error) {
   176  	deleteDelay := time.Duration(conf.deleteDelay)
   177  	compactMetrics := newCompactMetrics(reg, deleteDelay)
   178  	downsampleMetrics := newDownsampleMetrics(reg)
   179  
   180  	httpProbe := prober.NewHTTP()
   181  	statusProber := prober.Combine(
   182  		httpProbe,
   183  		prober.NewInstrumentation(component, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)),
   184  	)
   185  
   186  	srv := httpserver.New(logger, reg, component, httpProbe,
   187  		httpserver.WithListen(conf.http.bindAddress),
   188  		httpserver.WithGracePeriod(time.Duration(conf.http.gracePeriod)),
   189  		httpserver.WithTLSConfig(conf.http.tlsConfig),
   190  	)
   191  
   192  	g.Add(func() error {
   193  		statusProber.Healthy()
   194  
   195  		return srv.ListenAndServe()
   196  	}, func(err error) {
   197  		statusProber.NotReady(err)
   198  		defer statusProber.NotHealthy(err)
   199  
   200  		srv.Shutdown(err)
   201  	})
   202  
   203  	confContentYaml, err := conf.objStore.Content()
   204  	if err != nil {
   205  		return err
   206  	}
   207  
   208  	bkt, err := client.NewBucket(logger, confContentYaml, component.String())
   209  	if err != nil {
   210  		return err
   211  	}
   212  	insBkt := objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name()))
   213  
   214  	relabelContentYaml, err := conf.selectorRelabelConf.Content()
   215  	if err != nil {
   216  		return errors.Wrap(err, "get content of relabel configuration")
   217  	}
   218  
   219  	relabelConfig, err := block.ParseRelabelConfig(relabelContentYaml, block.SelectorSupportedRelabelActions)
   220  	if err != nil {
   221  		return err
   222  	}
   223  
   224  	// Ensure we close up everything properly.
   225  	defer func() {
   226  		if err != nil {
   227  			runutil.CloseWithLogOnErr(logger, insBkt, "bucket client")
   228  		}
   229  	}()
   230  
   231  	// While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter.
   232  	// The delay of deleteDelay/2 is added to ensure we fetch blocks that are meant to be deleted but do not have a replacement yet.
   233  	// This is to make sure compactor will not accidentally perform compactions with gap instead.
   234  	ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter(logger, insBkt, deleteDelay/2, conf.blockMetaFetchConcurrency)
   235  	duplicateBlocksFilter := block.NewDeduplicateFilter(conf.blockMetaFetchConcurrency)
   236  	noCompactMarkerFilter := compact.NewGatherNoCompactionMarkFilter(logger, insBkt, conf.blockMetaFetchConcurrency)
   237  	labelShardedMetaFilter := block.NewLabelShardedMetaFilter(relabelConfig)
   238  	consistencyDelayMetaFilter := block.NewConsistencyDelayMetaFilter(logger, conf.consistencyDelay, extprom.WrapRegistererWithPrefix("thanos_", reg))
   239  	timePartitionMetaFilter := block.NewTimePartitionMetaFilter(conf.filterConf.MinTime, conf.filterConf.MaxTime)
   240  
   241  	baseMetaFetcher, err := block.NewBaseFetcher(logger, conf.blockMetaFetchConcurrency, insBkt, conf.dataDir, extprom.WrapRegistererWithPrefix("thanos_", reg))
   242  	if err != nil {
   243  		return errors.Wrap(err, "create meta fetcher")
   244  	}
   245  
   246  	enableVerticalCompaction := conf.enableVerticalCompaction
   247  	if len(conf.dedupReplicaLabels) > 0 {
   248  		enableVerticalCompaction = true
   249  		level.Info(logger).Log(
   250  			"msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(conf.dedupReplicaLabels, ","),
   251  		)
   252  	}
   253  	if enableVerticalCompaction {
   254  		level.Info(logger).Log(
   255  			"msg", "vertical compaction is enabled", "compact.enable-vertical-compaction", fmt.Sprintf("%v", conf.enableVerticalCompaction),
   256  		)
   257  	}
   258  	var (
   259  		api = blocksAPI.NewBlocksAPI(logger, conf.webConf.disableCORS, conf.label, flagsMap, insBkt)
   260  		sy  *compact.Syncer
   261  	)
   262  	{
   263  		// Make sure all compactor meta syncs are done through Syncer.SyncMeta for readability.
   264  		cf := baseMetaFetcher.NewMetaFetcher(
   265  			extprom.WrapRegistererWithPrefix("thanos_", reg), []block.MetadataFilter{
   266  				timePartitionMetaFilter,
   267  				labelShardedMetaFilter,
   268  				consistencyDelayMetaFilter,
   269  				ignoreDeletionMarkFilter,
   270  				block.NewReplicaLabelRemover(logger, conf.dedupReplicaLabels),
   271  				duplicateBlocksFilter,
   272  				noCompactMarkerFilter,
   273  			},
   274  		)
   275  		cf.UpdateOnChange(func(blocks []metadata.Meta, err error) {
   276  			api.SetLoaded(blocks, err)
   277  		})
   278  		sy, err = compact.NewMetaSyncer(
   279  			logger,
   280  			reg,
   281  			insBkt,
   282  			cf,
   283  			duplicateBlocksFilter,
   284  			ignoreDeletionMarkFilter,
   285  			compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""),
   286  			compactMetrics.garbageCollectedBlocks,
   287  		)
   288  		if err != nil {
   289  			return errors.Wrap(err, "create syncer")
   290  		}
   291  	}
   292  
   293  	levels, err := compactions.levels(conf.maxCompactionLevel)
   294  	if err != nil {
   295  		return errors.Wrap(err, "get compaction levels")
   296  	}
   297  
   298  	if conf.maxCompactionLevel < compactions.maxLevel() {
   299  		level.Warn(logger).Log("msg", "Max compaction level is lower than should be", "current", conf.maxCompactionLevel, "default", compactions.maxLevel())
   300  	}
   301  
   302  	ctx, cancel := context.WithCancel(context.Background())
   303  	ctx = tracing.ContextWithTracer(ctx, tracer)
   304  
   305  	defer func() {
   306  		if rerr != nil {
   307  			cancel()
   308  		}
   309  	}()
   310  
   311  	var mergeFunc storage.VerticalChunkSeriesMergeFunc
   312  	switch conf.dedupFunc {
   313  	case compact.DedupAlgorithmPenalty:
   314  		mergeFunc = dedup.NewChunkSeriesMerger()
   315  
   316  		if len(conf.dedupReplicaLabels) == 0 {
   317  			return errors.New("penalty based deduplication needs at least one replica label specified")
   318  		}
   319  	case "":
   320  		mergeFunc = storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge)
   321  
   322  	default:
   323  		return errors.Errorf("unsupported deduplication func, got %s", conf.dedupFunc)
   324  	}
   325  
   326  	// Instantiate the compactor with different time slices. Timestamps in TSDB
   327  	// are in milliseconds.
   328  	comp, err := tsdb.NewLeveledCompactor(ctx, reg, logger, levels, downsample.NewPool(), mergeFunc)
   329  	if err != nil {
   330  		return errors.Wrap(err, "create compactor")
   331  	}
   332  
   333  	var (
   334  		compactDir      = path.Join(conf.dataDir, "compact")
   335  		downsamplingDir = path.Join(conf.dataDir, "downsample")
   336  	)
   337  
   338  	if err := os.MkdirAll(compactDir, os.ModePerm); err != nil {
   339  		return errors.Wrap(err, "create working compact directory")
   340  	}
   341  
   342  	if err := os.MkdirAll(downsamplingDir, os.ModePerm); err != nil {
   343  		return errors.Wrap(err, "create working downsample directory")
   344  	}
   345  
   346  	grouper := compact.NewDefaultGrouper(
   347  		logger,
   348  		insBkt,
   349  		conf.acceptMalformedIndex,
   350  		enableVerticalCompaction,
   351  		reg,
   352  		compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""),
   353  		compactMetrics.garbageCollectedBlocks,
   354  		compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.OutOfOrderChunksNoCompactReason),
   355  		metadata.HashFunc(conf.hashFunc),
   356  		conf.blockFilesConcurrency,
   357  		conf.compactBlocksFetchConcurrency,
   358  	)
   359  	tsdbPlanner := compact.NewPlanner(logger, levels, noCompactMarkerFilter)
   360  	planner := compact.WithLargeTotalIndexSizeFilter(
   361  		tsdbPlanner,
   362  		insBkt,
   363  		int64(conf.maxBlockIndexSize),
   364  		compactMetrics.blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename, metadata.IndexSizeExceedingNoCompactReason),
   365  	)
   366  	blocksCleaner := compact.NewBlocksCleaner(logger, insBkt, ignoreDeletionMarkFilter, deleteDelay, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures)
   367  	compactor, err := compact.NewBucketCompactor(
   368  		logger,
   369  		sy,
   370  		grouper,
   371  		planner,
   372  		comp,
   373  		compactDir,
   374  		insBkt,
   375  		conf.compactionConcurrency,
   376  		conf.skipBlockWithOutOfOrderChunks,
   377  	)
   378  	if err != nil {
   379  		return errors.Wrap(err, "create bucket compactor")
   380  	}
   381  
   382  	retentionByResolution := map[compact.ResolutionLevel]time.Duration{
   383  		compact.ResolutionLevelRaw: time.Duration(conf.retentionRaw),
   384  		compact.ResolutionLevel5m:  time.Duration(conf.retentionFiveMin),
   385  		compact.ResolutionLevel1h:  time.Duration(conf.retentionOneHr),
   386  	}
   387  
   388  	if retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() != 0 {
   389  		// If downsampling is enabled, error if raw retention is not sufficient for downsampling to occur (upper bound 10 days for 1h resolution)
   390  		if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevelRaw].Milliseconds() < downsample.ResLevel1DownsampleRange {
   391  			return errors.New("raw resolution must be higher than the minimum block size after which 5m resolution downsampling will occur (40 hours)")
   392  		}
   393  		level.Info(logger).Log("msg", "retention policy of raw samples is enabled", "duration", retentionByResolution[compact.ResolutionLevelRaw])
   394  	}
   395  	if retentionByResolution[compact.ResolutionLevel5m].Milliseconds() != 0 {
   396  		// If retention is lower than minimum downsample range, then no downsampling at this resolution will be persisted
   397  		if !conf.disableDownsampling && retentionByResolution[compact.ResolutionLevel5m].Milliseconds() < downsample.ResLevel2DownsampleRange {
   398  			return errors.New("5m resolution retention must be higher than the minimum block size after which 1h resolution downsampling will occur (10 days)")
   399  		}
   400  		level.Info(logger).Log("msg", "retention policy of 5 min aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel5m])
   401  	}
   402  	if retentionByResolution[compact.ResolutionLevel1h].Milliseconds() != 0 {
   403  		level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h])
   404  	}
   405  
   406  	var cleanMtx sync.Mutex
   407  	// TODO(GiedriusS): we could also apply retention policies here but the logic would be a bit more complex.
   408  	cleanPartialMarked := func() error {
   409  		cleanMtx.Lock()
   410  		defer cleanMtx.Unlock()
   411  
   412  		if err := sy.SyncMetas(ctx); err != nil {
   413  			return errors.Wrap(err, "syncing metas")
   414  		}
   415  
   416  		compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), insBkt, compactMetrics.partialUploadDeleteAttempts, compactMetrics.blocksCleaned, compactMetrics.blockCleanupFailures)
   417  		if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil {
   418  			return errors.Wrap(err, "cleaning marked blocks")
   419  		}
   420  		compactMetrics.cleanups.Inc()
   421  
   422  		return nil
   423  	}
   424  
   425  	compactMainFn := func() error {
   426  		if err := compactor.Compact(ctx); err != nil {
   427  			return errors.Wrap(err, "compaction")
   428  		}
   429  
   430  		if !conf.disableDownsampling {
   431  			// After all compactions are done, work down the downsampling backlog.
   432  			// We run two passes of this to ensure that the 1h downsampling is generated
   433  			// for 5m downsamplings created in the first run.
   434  			level.Info(logger).Log("msg", "start first pass of downsampling")
   435  			if err := sy.SyncMetas(ctx); err != nil {
   436  				return errors.Wrap(err, "sync before first pass of downsampling")
   437  			}
   438  
   439  			for _, meta := range sy.Metas() {
   440  				groupKey := meta.Thanos.GroupKey()
   441  				downsampleMetrics.downsamples.WithLabelValues(groupKey)
   442  				downsampleMetrics.downsampleFailures.WithLabelValues(groupKey)
   443  			}
   444  			if err := downsampleBucket(ctx, logger, downsampleMetrics, insBkt, sy.Metas(), downsamplingDir, conf.downsampleConcurrency, conf.blockFilesConcurrency, metadata.HashFunc(conf.hashFunc), conf.acceptMalformedIndex); err != nil {
   445  				return errors.Wrap(err, "first pass of downsampling failed")
   446  			}
   447  
   448  			level.Info(logger).Log("msg", "start second pass of downsampling")
   449  			if err := sy.SyncMetas(ctx); err != nil {
   450  				return errors.Wrap(err, "sync before second pass of downsampling")
   451  			}
   452  			if err := downsampleBucket(ctx, logger, downsampleMetrics, insBkt, sy.Metas(), downsamplingDir, conf.downsampleConcurrency, conf.blockFilesConcurrency, metadata.HashFunc(conf.hashFunc), conf.acceptMalformedIndex); err != nil {
   453  				return errors.Wrap(err, "second pass of downsampling failed")
   454  			}
   455  			level.Info(logger).Log("msg", "downsampling iterations done")
   456  		} else {
   457  			level.Info(logger).Log("msg", "downsampling was explicitly disabled")
   458  		}
   459  
   460  		// TODO(bwplotka): Find a way to avoid syncing if no op was done.
   461  		if err := sy.SyncMetas(ctx); err != nil {
   462  			return errors.Wrap(err, "sync before retention")
   463  		}
   464  
   465  		if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil {
   466  			return errors.Wrap(err, "retention failed")
   467  		}
   468  
   469  		return cleanPartialMarked()
   470  	}
   471  
   472  	g.Add(func() error {
   473  		defer runutil.CloseWithLogOnErr(logger, insBkt, "bucket client")
   474  
   475  		if !conf.wait {
   476  			return compactMainFn()
   477  		}
   478  
   479  		// --wait=true is specified.
   480  		return runutil.Repeat(conf.waitInterval, ctx.Done(), func() error {
   481  			err := compactMainFn()
   482  			if err == nil {
   483  				compactMetrics.iterations.Inc()
   484  				return nil
   485  			}
   486  
   487  			// The HaltError type signals that we hit a critical bug and should block
   488  			// for investigation. You should alert on this being halted.
   489  			if compact.IsHaltError(err) {
   490  				if conf.haltOnError {
   491  					level.Error(logger).Log("msg", "critical error detected; halting", "err", err)
   492  					compactMetrics.halted.Set(1)
   493  					select {}
   494  				} else {
   495  					return errors.Wrap(err, "critical error detected")
   496  				}
   497  			}
   498  
   499  			// The RetryError signals that we hit an retriable error (transient error, no connection).
   500  			// You should alert on this being triggered too frequently.
   501  			if compact.IsRetryError(err) {
   502  				level.Error(logger).Log("msg", "retriable error", "err", err)
   503  				compactMetrics.retried.Inc()
   504  				// TODO(bplotka): use actual "retry()" here instead of waiting 5 minutes?
   505  				return nil
   506  			}
   507  
   508  			return errors.Wrap(err, "error executing compaction")
   509  		})
   510  	}, func(error) {
   511  		cancel()
   512  	})
   513  
   514  	if conf.wait {
   515  		if !conf.disableWeb {
   516  			r := route.New()
   517  
   518  			ins := extpromhttp.NewInstrumentationMiddleware(reg, nil)
   519  
   520  			global := ui.NewBucketUI(logger, conf.webConf.externalPrefix, conf.webConf.prefixHeaderName, component)
   521  			global.Register(r, ins)
   522  
   523  			// Configure Request Logging for HTTP calls.
   524  			opts := []logging.Option{logging.WithDecider(func(_ string, _ error) logging.Decision {
   525  				return logging.NoLogCall
   526  			})}
   527  			logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...)
   528  			api.Register(r.WithPrefix("/api/v1"), tracer, logger, ins, logMiddleware)
   529  
   530  			// Separate fetcher for global view.
   531  			// TODO(bwplotka): Allow Bucket UI to visualize the state of the block as well.
   532  			f := baseMetaFetcher.NewMetaFetcher(extprom.WrapRegistererWithPrefix("thanos_bucket_ui", reg), nil, "component", "globalBucketUI")
   533  			f.UpdateOnChange(func(blocks []metadata.Meta, err error) {
   534  				api.SetGlobal(blocks, err)
   535  			})
   536  
   537  			srv.Handle("/", r)
   538  
   539  			g.Add(func() error {
   540  				iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout)
   541  				_, _, _ = f.Fetch(iterCtx)
   542  				iterCancel()
   543  
   544  				// For /global state make sure to fetch periodically.
   545  				return runutil.Repeat(conf.blockViewerSyncBlockInterval, ctx.Done(), func() error {
   546  					return runutil.RetryWithLog(logger, time.Minute, ctx.Done(), func() error {
   547  						iterCtx, iterCancel := context.WithTimeout(ctx, conf.blockViewerSyncBlockTimeout)
   548  						defer iterCancel()
   549  
   550  						_, _, err := f.Fetch(iterCtx)
   551  						return err
   552  					})
   553  				})
   554  			}, func(error) {
   555  				cancel()
   556  			})
   557  		}
   558  
   559  		// Periodically remove partial blocks and blocks marked for deletion
   560  		// since one iteration potentially could take a long time.
   561  		if conf.cleanupBlocksInterval > 0 {
   562  			g.Add(func() error {
   563  				return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), func() error {
   564  					err := cleanPartialMarked()
   565  					if err != nil && compact.IsRetryError(err) {
   566  						// The RetryError signals that we hit an retriable error (transient error, no connection).
   567  						// You should alert on this being triggered too frequently.
   568  						level.Error(logger).Log("msg", "retriable error", "err", err)
   569  						compactMetrics.retried.Inc()
   570  
   571  						return nil
   572  					}
   573  
   574  					return err
   575  				})
   576  			}, func(error) {
   577  				cancel()
   578  			})
   579  		}
   580  
   581  		// Periodically calculate the progress of compaction, downsampling and retention.
   582  		if conf.progressCalculateInterval > 0 {
   583  			g.Add(func() error {
   584  				ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner)
   585  				rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution)
   586  				var ds *compact.DownsampleProgressCalculator
   587  				if !conf.disableDownsampling {
   588  					ds = compact.NewDownsampleProgressCalculator(reg)
   589  				}
   590  
   591  				return runutil.Repeat(conf.progressCalculateInterval, ctx.Done(), func() error {
   592  
   593  					if err := sy.SyncMetas(ctx); err != nil {
   594  						// The RetryError signals that we hit an retriable error (transient error, no connection).
   595  						// You should alert on this being triggered too frequently.
   596  						if compact.IsRetryError(err) {
   597  							level.Error(logger).Log("msg", "retriable error", "err", err)
   598  							compactMetrics.retried.Inc()
   599  
   600  							return nil
   601  						}
   602  
   603  						return errors.Wrapf(err, "could not sync metas")
   604  					}
   605  
   606  					metas := sy.Metas()
   607  					groups, err := grouper.Groups(metas)
   608  					if err != nil {
   609  						return errors.Wrapf(err, "could not group metadata for compaction")
   610  					}
   611  
   612  					if err = ps.ProgressCalculate(ctx, groups); err != nil {
   613  						return errors.Wrapf(err, "could not calculate compaction progress")
   614  					}
   615  
   616  					retGroups, err := grouper.Groups(metas)
   617  					if err != nil {
   618  						return errors.Wrapf(err, "could not group metadata for retention")
   619  					}
   620  
   621  					if err = rs.ProgressCalculate(ctx, retGroups); err != nil {
   622  						return errors.Wrapf(err, "could not calculate retention progress")
   623  					}
   624  
   625  					if !conf.disableDownsampling {
   626  						groups, err = grouper.Groups(metas)
   627  						if err != nil {
   628  							return errors.Wrapf(err, "could not group metadata into downsample groups")
   629  						}
   630  						if err := ds.ProgressCalculate(ctx, groups); err != nil {
   631  							return errors.Wrapf(err, "could not calculate downsampling progress")
   632  						}
   633  					}
   634  
   635  					return nil
   636  				})
   637  			}, func(err error) {
   638  				cancel()
   639  			})
   640  		}
   641  	}
   642  
   643  	level.Info(logger).Log("msg", "starting compact node")
   644  	statusProber.Ready()
   645  	return nil
   646  }
   647  
   648  type compactConfig struct {
   649  	haltOnError                                    bool
   650  	acceptMalformedIndex                           bool
   651  	maxCompactionLevel                             int
   652  	http                                           httpConfig
   653  	dataDir                                        string
   654  	objStore                                       extflag.PathOrContent
   655  	consistencyDelay                               time.Duration
   656  	retentionRaw, retentionFiveMin, retentionOneHr model.Duration
   657  	wait                                           bool
   658  	waitInterval                                   time.Duration
   659  	disableDownsampling                            bool
   660  	blockMetaFetchConcurrency                      int
   661  	blockFilesConcurrency                          int
   662  	blockViewerSyncBlockInterval                   time.Duration
   663  	blockViewerSyncBlockTimeout                    time.Duration
   664  	cleanupBlocksInterval                          time.Duration
   665  	compactionConcurrency                          int
   666  	downsampleConcurrency                          int
   667  	compactBlocksFetchConcurrency                  int
   668  	deleteDelay                                    model.Duration
   669  	dedupReplicaLabels                             []string
   670  	selectorRelabelConf                            extflag.PathOrContent
   671  	disableWeb                                     bool
   672  	webConf                                        webConfig
   673  	label                                          string
   674  	maxBlockIndexSize                              units.Base2Bytes
   675  	hashFunc                                       string
   676  	enableVerticalCompaction                       bool
   677  	dedupFunc                                      string
   678  	skipBlockWithOutOfOrderChunks                  bool
   679  	progressCalculateInterval                      time.Duration
   680  	filterConf                                     *store.FilterConfig
   681  }
   682  
   683  func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
   684  	cmd.Flag("debug.halt-on-error", "Halt the process if a critical compaction error is detected.").
   685  		Hidden().Default("true").BoolVar(&cc.haltOnError)
   686  	cmd.Flag("debug.accept-malformed-index",
   687  		"Compaction and downsampling index verification will ignore out of order label names.").
   688  		Hidden().Default("false").BoolVar(&cc.acceptMalformedIndex)
   689  	cmd.Flag("debug.max-compaction-level", fmt.Sprintf("Maximum compaction level, default is %d: %s", compactions.maxLevel(), compactions.String())).
   690  		Hidden().Default(strconv.Itoa(compactions.maxLevel())).IntVar(&cc.maxCompactionLevel)
   691  
   692  	cc.http.registerFlag(cmd)
   693  
   694  	cmd.Flag("data-dir", "Data directory in which to cache blocks and process compactions.").
   695  		Default("./data").StringVar(&cc.dataDir)
   696  
   697  	cc.objStore = *extkingpin.RegisterCommonObjStoreFlags(cmd, "", false)
   698  
   699  	cmd.Flag("consistency-delay", fmt.Sprintf("Minimum age of fresh (non-compacted) blocks before they are being processed. Malformed blocks older than the maximum of consistency-delay and %v will be removed.", compact.PartialUploadThresholdAge)).
   700  		Default("30m").DurationVar(&cc.consistencyDelay)
   701  
   702  	cmd.Flag("retention.resolution-raw",
   703  		"How long to retain raw samples in bucket. Setting this to 0d will retain samples of this resolution forever").
   704  		Default("0d").SetValue(&cc.retentionRaw)
   705  	cmd.Flag("retention.resolution-5m", "How long to retain samples of resolution 1 (5 minutes) in bucket. Setting this to 0d will retain samples of this resolution forever").
   706  		Default("0d").SetValue(&cc.retentionFiveMin)
   707  	cmd.Flag("retention.resolution-1h", "How long to retain samples of resolution 2 (1 hour) in bucket. Setting this to 0d will retain samples of this resolution forever").
   708  		Default("0d").SetValue(&cc.retentionOneHr)
   709  
   710  	// TODO(kakkoyun, pgough): https://github.com/thanos-io/thanos/issues/2266.
   711  	cmd.Flag("wait", "Do not exit after all compactions have been processed and wait for new work.").
   712  		Short('w').BoolVar(&cc.wait)
   713  	cmd.Flag("wait-interval", "Wait interval between consecutive compaction runs and bucket refreshes. Only works when --wait flag specified.").
   714  		Default("5m").DurationVar(&cc.waitInterval)
   715  
   716  	cmd.Flag("downsampling.disable", "Disables downsampling. This is not recommended "+
   717  		"as querying long time ranges without non-downsampled data is not efficient and useful e.g it is not possible to render all samples for a human eye anyway").
   718  		Default("false").BoolVar(&cc.disableDownsampling)
   719  
   720  	cmd.Flag("block-meta-fetch-concurrency", "Number of goroutines to use when fetching block metadata from object storage.").
   721  		Default("32").IntVar(&cc.blockMetaFetchConcurrency)
   722  	cmd.Flag("block-files-concurrency", "Number of goroutines to use when fetching/uploading block files from object storage.").
   723  		Default("1").IntVar(&cc.blockFilesConcurrency)
   724  	cmd.Flag("block-viewer.global.sync-block-interval", "Repeat interval for syncing the blocks between local and remote view for /global Block Viewer UI.").
   725  		Default("1m").DurationVar(&cc.blockViewerSyncBlockInterval)
   726  	cmd.Flag("block-viewer.global.sync-block-timeout", "Maximum time for syncing the blocks between local and remote view for /global Block Viewer UI.").
   727  		Default("5m").DurationVar(&cc.blockViewerSyncBlockTimeout)
   728  	cmd.Flag("compact.cleanup-interval", "How often we should clean up partially uploaded blocks and blocks with deletion mark in the background when --wait has been enabled. Setting it to \"0s\" disables it - the cleaning will only happen at the end of an iteration.").
   729  		Default("5m").DurationVar(&cc.cleanupBlocksInterval)
   730  	cmd.Flag("compact.progress-interval", "Frequency of calculating the compaction progress in the background when --wait has been enabled. Setting it to \"0s\" disables it. Now compaction, downsampling and retention progress are supported.").
   731  		Default("5m").DurationVar(&cc.progressCalculateInterval)
   732  
   733  	cmd.Flag("compact.concurrency", "Number of goroutines to use when compacting groups.").
   734  		Default("1").IntVar(&cc.compactionConcurrency)
   735  	cmd.Flag("compact.blocks-fetch-concurrency", "Number of goroutines to use when download block during compaction.").
   736  		Default("1").IntVar(&cc.compactBlocksFetchConcurrency)
   737  	cmd.Flag("downsample.concurrency", "Number of goroutines to use when downsampling blocks.").
   738  		Default("1").IntVar(&cc.downsampleConcurrency)
   739  
   740  	cmd.Flag("delete-delay", "Time before a block marked for deletion is deleted from bucket. "+
   741  		"If delete-delay is non zero, blocks will be marked for deletion and compactor component will delete blocks marked for deletion from the bucket. "+
   742  		"If delete-delay is 0, blocks will be deleted straight away. "+
   743  		"Note that deleting blocks immediately can cause query failures, if store gateway still has the block loaded, "+
   744  		"or compactor is ignoring the deletion because it's compacting the block at the same time.").
   745  		Default("48h").SetValue(&cc.deleteDelay)
   746  
   747  	cmd.Flag("compact.enable-vertical-compaction", "Experimental. When set to true, compactor will allow overlaps and perform **irreversible** vertical compaction. See https://thanos.io/tip/components/compact.md/#vertical-compactions to read more. "+
   748  		"Please note that by default this uses a NAIVE algorithm for merging. If you need a different deduplication algorithm (e.g one that works well with Prometheus replicas), please set it via --deduplication.func."+
   749  		"NOTE: This flag is ignored and (enabled) when --deduplication.replica-label flag is set.").
   750  		Hidden().Default("false").BoolVar(&cc.enableVerticalCompaction)
   751  
   752  	cmd.Flag("deduplication.func", "Experimental. Deduplication algorithm for merging overlapping blocks. "+
   753  		"Possible values are: \"\", \"penalty\". If no value is specified, the default compact deduplication merger is used, which performs 1:1 deduplication for samples. "+
   754  		"When set to penalty, penalty based deduplication algorithm will be used. At least one replica label has to be set via --deduplication.replica-label flag.").
   755  		Default("").EnumVar(&cc.dedupFunc, compact.DedupAlgorithmPenalty, "")
   756  
   757  	cmd.Flag("deduplication.replica-label", "Label to treat as a replica indicator of blocks that can be deduplicated (repeated flag). This will merge multiple replica blocks into one. This process is irreversible."+
   758  		"Experimental. When one or more labels are set, compactor will ignore the given labels so that vertical compaction can merge the blocks."+
   759  		"Please note that by default this uses a NAIVE algorithm for merging which works well for deduplication of blocks with **precisely the same samples** like produced by Receiver replication."+
   760  		"If you need a different deduplication algorithm (e.g one that works well with Prometheus replicas), please set it via --deduplication.func.").
   761  		StringsVar(&cc.dedupReplicaLabels)
   762  
   763  	// TODO(bwplotka): This is short term fix for https://github.com/thanos-io/thanos/issues/1424, replace with vertical block sharding https://github.com/thanos-io/thanos/pull/3390.
   764  	cmd.Flag("compact.block-max-index-size", "Maximum index size for the resulted block during any compaction. Note that"+
   765  		"total size is approximated in worst case. If the block that would be resulted from compaction is estimated to exceed this number, biggest source"+
   766  		"block is marked for no compaction (no-compact-mark.json is uploaded) which causes this block to be excluded from any compaction. "+
   767  		"Default is due to https://github.com/thanos-io/thanos/issues/1424, but it's overall recommended to keeps block size to some reasonable size.").
   768  		Hidden().Default("64GB").BytesVar(&cc.maxBlockIndexSize)
   769  
   770  	cmd.Flag("compact.skip-block-with-out-of-order-chunks", "When set to true, mark blocks containing index with out-of-order chunks for no compact instead of halting the compaction").
   771  		Hidden().Default("false").BoolVar(&cc.skipBlockWithOutOfOrderChunks)
   772  
   773  	cmd.Flag("hash-func", "Specify which hash function to use when calculating the hashes of produced files. If no function has been specified, it does not happen. This permits avoiding downloading some files twice albeit at some performance cost. Possible values are: \"\", \"SHA256\".").
   774  		Default("").EnumVar(&cc.hashFunc, "SHA256", "")
   775  
   776  	cc.filterConf = &store.FilterConfig{}
   777  	cmd.Flag("min-time", "Start of time range limit to compact. Thanos Compactor will compact only blocks, which happened later than this value. Option can be a constant time in RFC3339 format or time duration relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y.").
   778  		Default("0000-01-01T00:00:00Z").SetValue(&cc.filterConf.MinTime)
   779  	cmd.Flag("max-time", "End of time range limit to compact. Thanos Compactor will compact only blocks, which happened earlier than this value. Option can be a constant time in RFC3339 format or time duration relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y.").
   780  		Default("9999-12-31T23:59:59Z").SetValue(&cc.filterConf.MaxTime)
   781  
   782  	cmd.Flag("web.disable", "Disable Block Viewer UI.").Default("false").BoolVar(&cc.disableWeb)
   783  
   784  	cc.selectorRelabelConf = *extkingpin.RegisterSelectorRelabelFlags(cmd)
   785  
   786  	cc.webConf.registerFlag(cmd)
   787  
   788  	cmd.Flag("bucket-web-label", "External block label to use as group title in the bucket web UI").StringVar(&cc.label)
   789  }