github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/compactor/compactor.go (about)

     1  package compactor
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"net/http"
     8  	"path/filepath"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/grafana/loki/pkg/validation"
    15  
    16  	"github.com/go-kit/log/level"
    17  	"github.com/grafana/dskit/kv"
    18  	"github.com/grafana/dskit/ring"
    19  	"github.com/grafana/dskit/services"
    20  	"github.com/pkg/errors"
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	"github.com/prometheus/common/model"
    23  
    24  	"github.com/grafana/loki/pkg/storage/chunk/client"
    25  	"github.com/grafana/loki/pkg/storage/chunk/client/local"
    26  	chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util"
    27  	"github.com/grafana/loki/pkg/storage/config"
    28  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/deletion"
    29  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/retention"
    30  	shipper_storage "github.com/grafana/loki/pkg/storage/stores/indexshipper/storage"
    31  	"github.com/grafana/loki/pkg/usagestats"
    32  	"github.com/grafana/loki/pkg/util"
    33  	util_log "github.com/grafana/loki/pkg/util/log"
    34  )
    35  
    36  // Here is how the generic compactor works:
    37  // 1. Find the index type from table name using schemaPeriodForTable.
    38  // 2. Find the registered IndexCompactor for the index type.
    39  // 3. Build an instance of TableCompactor using IndexCompactor.NewIndexCompactor, with all the required information to do a compaction.
    40  // 4. Run the compaction using TableCompactor.Compact, which would set the new/updated CompactedIndex for each IndexSet.
    41  // 5. If retention is enabled, run retention on the CompactedIndex using its retention.IndexProcessor implementation.
    42  // 6. Convert the CompactedIndex to a file using the IndexCompactor.ToIndexFile for uploading.
    43  // 7. If we uploaded successfully, delete the old index files.
    44  
    45  const (
    46  	// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
    47  	// in the ring will be automatically removed.
    48  	ringAutoForgetUnhealthyPeriods = 10
    49  
    50  	// ringKey is the key under which we store the store gateways ring in the KVStore.
    51  	ringKey = "compactor"
    52  
    53  	// ringNameForServer is the name of the ring used by the compactor server.
    54  	ringNameForServer = "compactor"
    55  
    56  	// ringKeyOfLeader is a somewhat arbitrary ID to pull from the ring to see who will be elected the leader
    57  	ringKeyOfLeader = 0
    58  
    59  	// ringReplicationFactor should be 1 because we only want to pull back one node from the Ring
    60  	ringReplicationFactor = 1
    61  
    62  	// ringNumTokens sets our single token in the ring,
    63  	// we only need to insert 1 token to be used for leader election purposes.
    64  	ringNumTokens = 1
    65  )
    66  
    67  var (
    68  	retentionEnabledStats = usagestats.NewString("compactor_retention_enabled")
    69  	defaultRetentionStats = usagestats.NewString("compactor_default_retention")
    70  )
    71  
    72  type Config struct {
    73  	WorkingDirectory          string          `yaml:"working_directory"`
    74  	SharedStoreType           string          `yaml:"shared_store"`
    75  	SharedStoreKeyPrefix      string          `yaml:"shared_store_key_prefix"`
    76  	CompactionInterval        time.Duration   `yaml:"compaction_interval"`
    77  	ApplyRetentionInterval    time.Duration   `yaml:"apply_retention_interval"`
    78  	RetentionEnabled          bool            `yaml:"retention_enabled"`
    79  	RetentionDeleteDelay      time.Duration   `yaml:"retention_delete_delay"`
    80  	RetentionDeleteWorkCount  int             `yaml:"retention_delete_worker_count"`
    81  	RetentionTableTimeout     time.Duration   `yaml:"retention_table_timeout"`
    82  	DeleteBatchSize           int             `yaml:"delete_batch_size"`
    83  	DeleteRequestCancelPeriod time.Duration   `yaml:"delete_request_cancel_period"`
    84  	MaxCompactionParallelism  int             `yaml:"max_compaction_parallelism"`
    85  	CompactorRing             util.RingConfig `yaml:"compactor_ring,omitempty"`
    86  	RunOnce                   bool            `yaml:"-"`
    87  }
    88  
    89  // RegisterFlags registers flags.
    90  func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
    91  	f.StringVar(&cfg.WorkingDirectory, "boltdb.shipper.compactor.working-directory", "", "Directory where files can be downloaded for compaction.")
    92  	f.StringVar(&cfg.SharedStoreType, "boltdb.shipper.compactor.shared-store", "", "Shared store used for storing boltdb files. Supported types: gcs, s3, azure, swift, filesystem")
    93  	f.StringVar(&cfg.SharedStoreKeyPrefix, "boltdb.shipper.compactor.shared-store.key-prefix", "index/", "Prefix to add to Object Keys in Shared store. Path separator(if any) should always be a '/'. Prefix should never start with a separator but should always end with it.")
    94  	f.DurationVar(&cfg.CompactionInterval, "boltdb.shipper.compactor.compaction-interval", 10*time.Minute, "Interval at which to re-run the compaction operation.")
    95  	f.DurationVar(&cfg.ApplyRetentionInterval, "boltdb.shipper.compactor.apply-retention-interval", 0, "Interval at which to apply/enforce retention. 0 means run at same interval as compaction. If non-zero, it should always be a multiple of compaction interval.")
    96  	f.DurationVar(&cfg.RetentionDeleteDelay, "boltdb.shipper.compactor.retention-delete-delay", 2*time.Hour, "Delay after which chunks will be fully deleted during retention.")
    97  	f.BoolVar(&cfg.RetentionEnabled, "boltdb.shipper.compactor.retention-enabled", false, "(Experimental) Activate custom (per-stream,per-tenant) retention.")
    98  	f.IntVar(&cfg.RetentionDeleteWorkCount, "boltdb.shipper.compactor.retention-delete-worker-count", 150, "The total amount of worker to use to delete chunks.")
    99  	f.IntVar(&cfg.DeleteBatchSize, "boltdb.shipper.compactor.delete-batch-size", 70, "The max number of delete requests to run per compaction cycle.")
   100  	f.DurationVar(&cfg.DeleteRequestCancelPeriod, "boltdb.shipper.compactor.delete-request-cancel-period", 24*time.Hour, "Allow cancellation of delete request until duration after they are created. Data would be deleted only after delete requests have been older than this duration. Ideally this should be set to at least 24h.")
   101  	f.DurationVar(&cfg.RetentionTableTimeout, "boltdb.shipper.compactor.retention-table-timeout", 0, "The maximum amount of time to spend running retention and deletion on any given table in the index.")
   102  	f.IntVar(&cfg.MaxCompactionParallelism, "boltdb.shipper.compactor.max-compaction-parallelism", 1, "Maximum number of tables to compact in parallel. While increasing this value, please make sure compactor has enough disk space allocated to be able to store and compact as many tables.")
   103  	f.BoolVar(&cfg.RunOnce, "boltdb.shipper.compactor.run-once", false, "Run the compactor one time to cleanup and compact index files only (no retention applied)")
   104  	cfg.CompactorRing.RegisterFlagsWithPrefix("boltdb.shipper.compactor.", "collectors/", f)
   105  }
   106  
   107  // Validate verifies the config does not contain inappropriate values
   108  func (cfg *Config) Validate() error {
   109  	if cfg.MaxCompactionParallelism < 1 {
   110  		return errors.New("max compaction parallelism must be >= 1")
   111  	}
   112  	if cfg.RetentionEnabled && cfg.ApplyRetentionInterval != 0 && cfg.ApplyRetentionInterval%cfg.CompactionInterval != 0 {
   113  		return errors.New("interval for applying retention should either be set to a 0 or a multiple of compaction interval")
   114  	}
   115  
   116  	return shipper_storage.ValidateSharedStoreKeyPrefix(cfg.SharedStoreKeyPrefix)
   117  }
   118  
   119  type Compactor struct {
   120  	services.Service
   121  
   122  	cfg                   Config
   123  	indexStorageClient    shipper_storage.Client
   124  	tableMarker           retention.TableMarker
   125  	sweeper               *retention.Sweeper
   126  	deleteRequestsStore   deletion.DeleteRequestsStore
   127  	DeleteRequestsHandler *deletion.DeleteRequestHandler
   128  	deleteRequestsManager *deletion.DeleteRequestsManager
   129  	expirationChecker     retention.ExpirationChecker
   130  	metrics               *metrics
   131  	running               bool
   132  	wg                    sync.WaitGroup
   133  	indexCompactors       map[string]IndexCompactor
   134  	schemaConfig          config.SchemaConfig
   135  
   136  	// Ring used for running a single compactor
   137  	ringLifecycler *ring.BasicLifecycler
   138  	ring           *ring.Ring
   139  	ringPollPeriod time.Duration
   140  
   141  	// Subservices manager.
   142  	subservices        *services.Manager
   143  	subservicesWatcher *services.FailureWatcher
   144  }
   145  
   146  func NewCompactor(cfg Config, objectClient client.ObjectClient, schemaConfig config.SchemaConfig, limits *validation.Overrides, r prometheus.Registerer) (*Compactor, error) {
   147  	retentionEnabledStats.Set("false")
   148  	if cfg.RetentionEnabled {
   149  		retentionEnabledStats.Set("true")
   150  	}
   151  	if limits != nil {
   152  		defaultRetentionStats.Set(limits.DefaultLimits().RetentionPeriod.String())
   153  	}
   154  	if cfg.SharedStoreType == "" {
   155  		return nil, errors.New("compactor shared_store_type must be specified")
   156  	}
   157  
   158  	compactor := &Compactor{
   159  		cfg:             cfg,
   160  		ringPollPeriod:  5 * time.Second,
   161  		indexCompactors: map[string]IndexCompactor{},
   162  		schemaConfig:    schemaConfig,
   163  	}
   164  
   165  	ringStore, err := kv.NewClient(
   166  		cfg.CompactorRing.KVStore,
   167  		ring.GetCodec(),
   168  		kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", r), "compactor"),
   169  		util_log.Logger,
   170  	)
   171  	if err != nil {
   172  		return nil, errors.Wrap(err, "create KV store client")
   173  	}
   174  	lifecyclerCfg, err := cfg.CompactorRing.ToLifecyclerConfig(ringNumTokens, util_log.Logger)
   175  	if err != nil {
   176  		return nil, errors.Wrap(err, "invalid ring lifecycler config")
   177  	}
   178  
   179  	// Define lifecycler delegates in reverse order (last to be called defined first because they're
   180  	// chained via "next delegate").
   181  	delegate := ring.BasicLifecyclerDelegate(compactor)
   182  	delegate = ring.NewLeaveOnStoppingDelegate(delegate, util_log.Logger)
   183  	delegate = ring.NewTokensPersistencyDelegate(cfg.CompactorRing.TokensFilePath, ring.JOINING, delegate, util_log.Logger)
   184  	delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*cfg.CompactorRing.HeartbeatTimeout, delegate, util_log.Logger)
   185  
   186  	compactor.ringLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, ringKey, ringStore, delegate, util_log.Logger, r)
   187  	if err != nil {
   188  		return nil, errors.Wrap(err, "create ring lifecycler")
   189  	}
   190  
   191  	ringCfg := cfg.CompactorRing.ToRingConfig(ringReplicationFactor)
   192  	compactor.ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, ringNameForServer, ringKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("cortex_", r), util_log.Logger)
   193  	if err != nil {
   194  		return nil, errors.Wrap(err, "create ring client")
   195  	}
   196  
   197  	compactor.subservices, err = services.NewManager(compactor.ringLifecycler, compactor.ring)
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  	compactor.subservicesWatcher = services.NewFailureWatcher()
   202  	compactor.subservicesWatcher.WatchManager(compactor.subservices)
   203  
   204  	if err := compactor.init(objectClient, schemaConfig, limits, r); err != nil {
   205  		return nil, err
   206  	}
   207  
   208  	compactor.Service = services.NewBasicService(compactor.starting, compactor.loop, compactor.stopping)
   209  	return compactor, nil
   210  }
   211  
   212  func (c *Compactor) init(objectClient client.ObjectClient, schemaConfig config.SchemaConfig, limits *validation.Overrides, r prometheus.Registerer) error {
   213  	err := chunk_util.EnsureDirectory(c.cfg.WorkingDirectory)
   214  	if err != nil {
   215  		return err
   216  	}
   217  	c.indexStorageClient = shipper_storage.NewIndexStorageClient(objectClient, c.cfg.SharedStoreKeyPrefix)
   218  	c.metrics = newMetrics(r)
   219  
   220  	if c.cfg.RetentionEnabled {
   221  		var encoder client.KeyEncoder
   222  		if _, ok := objectClient.(*local.FSObjectClient); ok {
   223  			encoder = client.FSEncoder
   224  		}
   225  
   226  		chunkClient := client.NewClient(objectClient, encoder, schemaConfig)
   227  
   228  		retentionWorkDir := filepath.Join(c.cfg.WorkingDirectory, "retention")
   229  		c.sweeper, err = retention.NewSweeper(retentionWorkDir, chunkClient, c.cfg.RetentionDeleteWorkCount, c.cfg.RetentionDeleteDelay, r)
   230  		if err != nil {
   231  			return err
   232  		}
   233  
   234  		if err := c.initDeletes(r, limits); err != nil {
   235  			return err
   236  		}
   237  
   238  		c.tableMarker, err = retention.NewMarker(retentionWorkDir, c.expirationChecker, c.cfg.RetentionTableTimeout, chunkClient, r)
   239  		if err != nil {
   240  			return err
   241  		}
   242  	}
   243  
   244  	return nil
   245  }
   246  
   247  func (c *Compactor) initDeletes(r prometheus.Registerer, limits *validation.Overrides) error {
   248  	deletionWorkDir := filepath.Join(c.cfg.WorkingDirectory, "deletion")
   249  
   250  	store, err := deletion.NewDeleteStore(deletionWorkDir, c.indexStorageClient)
   251  	if err != nil {
   252  		return err
   253  	}
   254  	c.deleteRequestsStore = store
   255  
   256  	c.DeleteRequestsHandler = deletion.NewDeleteRequestHandler(
   257  		c.deleteRequestsStore,
   258  		c.cfg.DeleteRequestCancelPeriod,
   259  		r,
   260  	)
   261  
   262  	c.deleteRequestsManager = deletion.NewDeleteRequestsManager(
   263  		c.deleteRequestsStore,
   264  		c.cfg.DeleteRequestCancelPeriod,
   265  		c.cfg.DeleteBatchSize,
   266  		limits,
   267  		r,
   268  	)
   269  
   270  	c.expirationChecker = newExpirationChecker(retention.NewExpirationChecker(limits), c.deleteRequestsManager)
   271  	return nil
   272  }
   273  
   274  func (c *Compactor) starting(ctx context.Context) (err error) {
   275  	// In case this function will return error we want to unregister the instance
   276  	// from the ring. We do it ensuring dependencies are gracefully stopped if they
   277  	// were already started.
   278  	defer func() {
   279  		if err == nil || c.subservices == nil {
   280  			return
   281  		}
   282  
   283  		if stopErr := services.StopManagerAndAwaitStopped(context.Background(), c.subservices); stopErr != nil {
   284  			level.Error(util_log.Logger).Log("msg", "failed to gracefully stop compactor dependencies", "err", stopErr)
   285  		}
   286  	}()
   287  
   288  	if err := services.StartManagerAndAwaitHealthy(ctx, c.subservices); err != nil {
   289  		return errors.Wrap(err, "unable to start compactor subservices")
   290  	}
   291  
   292  	// The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that
   293  	// someone wants to do can be done before becoming ACTIVE. For the query compactor we don't currently
   294  	// have any additional work so we can become ACTIVE right away.
   295  
   296  	// Wait until the ring client detected this instance in the JOINING state to
   297  	// make sure that when we'll run the initial sync we already know  the tokens
   298  	// assigned to this instance.
   299  	level.Info(util_log.Logger).Log("msg", "waiting until compactor is JOINING in the ring")
   300  	if err := ring.WaitInstanceState(ctx, c.ring, c.ringLifecycler.GetInstanceID(), ring.JOINING); err != nil {
   301  		return err
   302  	}
   303  	level.Info(util_log.Logger).Log("msg", "compactor is JOINING in the ring")
   304  
   305  	// Change ring state to ACTIVE
   306  	if err = c.ringLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil {
   307  		return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE)
   308  	}
   309  
   310  	// Wait until the ring client detected this instance in the ACTIVE state to
   311  	// make sure that when we'll run the loop it won't be detected as a ring
   312  	// topology change.
   313  	level.Info(util_log.Logger).Log("msg", "waiting until compactor is ACTIVE in the ring")
   314  	if err := ring.WaitInstanceState(ctx, c.ring, c.ringLifecycler.GetInstanceID(), ring.ACTIVE); err != nil {
   315  		return err
   316  	}
   317  	level.Info(util_log.Logger).Log("msg", "compactor is ACTIVE in the ring")
   318  
   319  	return nil
   320  }
   321  
   322  func (c *Compactor) loop(ctx context.Context) error {
   323  	if c.cfg.RunOnce {
   324  		level.Info(util_log.Logger).Log("msg", "running single compaction")
   325  		err := c.RunCompaction(ctx, false)
   326  		if err != nil {
   327  			level.Error(util_log.Logger).Log("msg", "compaction encountered an error", "err", err)
   328  		}
   329  		level.Info(util_log.Logger).Log("msg", "single compaction finished")
   330  		level.Info(util_log.Logger).Log("msg", "interrupt or terminate the process to finish")
   331  
   332  		// Wait for Loki to shutdown.
   333  		<-ctx.Done()
   334  		level.Info(util_log.Logger).Log("msg", "compactor exiting")
   335  		return nil
   336  	}
   337  
   338  	if c.cfg.RetentionEnabled {
   339  		if c.deleteRequestsStore != nil {
   340  			defer c.deleteRequestsStore.Stop()
   341  		}
   342  		if c.deleteRequestsManager != nil {
   343  			defer c.deleteRequestsManager.Stop()
   344  		}
   345  	}
   346  
   347  	syncTicker := time.NewTicker(c.ringPollPeriod)
   348  	defer syncTicker.Stop()
   349  
   350  	var runningCtx context.Context
   351  	var runningCancel context.CancelFunc
   352  
   353  	for {
   354  		select {
   355  		case <-ctx.Done():
   356  			if runningCancel != nil {
   357  				runningCancel()
   358  			}
   359  			c.wg.Wait()
   360  			level.Info(util_log.Logger).Log("msg", "compactor exiting")
   361  			return nil
   362  		case <-syncTicker.C:
   363  			bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet()
   364  			rs, err := c.ring.Get(ringKeyOfLeader, ring.Write, bufDescs, bufHosts, bufZones)
   365  			if err != nil {
   366  				level.Error(util_log.Logger).Log("msg", "error asking ring for who should run the compactor, will check again", "err", err)
   367  				continue
   368  			}
   369  
   370  			addrs := rs.GetAddresses()
   371  			if len(addrs) != 1 {
   372  				level.Error(util_log.Logger).Log("msg", "too many addresses (more that one) return when asking the ring who should run the compactor, will check again")
   373  				continue
   374  			}
   375  			if c.ringLifecycler.GetInstanceAddr() == addrs[0] {
   376  				// If not running, start
   377  				if !c.running {
   378  					level.Info(util_log.Logger).Log("msg", "this instance has been chosen to run the compactor, starting compactor")
   379  					runningCtx, runningCancel = context.WithCancel(ctx)
   380  					go c.runCompactions(runningCtx)
   381  					c.running = true
   382  					c.metrics.compactorRunning.Set(1)
   383  				}
   384  			} else {
   385  				// If running, shutdown
   386  				if c.running {
   387  					level.Info(util_log.Logger).Log("msg", "this instance should no longer run the compactor, stopping compactor")
   388  					runningCancel()
   389  					c.wg.Wait()
   390  					c.running = false
   391  					c.metrics.compactorRunning.Set(0)
   392  					level.Info(util_log.Logger).Log("msg", "compactor stopped")
   393  				}
   394  			}
   395  		}
   396  	}
   397  }
   398  
   399  func (c *Compactor) runCompactions(ctx context.Context) {
   400  	// To avoid races, wait 1 compaction interval before actually starting the compactor
   401  	// this allows the ring to settle if there are a lot of ring changes and gives
   402  	// time for existing compactors to shutdown before this starts to avoid
   403  	// multiple compactors running at the same time.
   404  	func() {
   405  		t := time.NewTimer(c.cfg.CompactionInterval)
   406  		defer t.Stop()
   407  		level.Info(util_log.Logger).Log("msg", fmt.Sprintf("waiting %v for ring to stay stable and previous compactions to finish before starting compactor", c.cfg.CompactionInterval))
   408  		select {
   409  		case <-ctx.Done():
   410  			return
   411  		case <-t.C:
   412  			level.Info(util_log.Logger).Log("msg", "compactor startup delay completed")
   413  			break
   414  		}
   415  	}()
   416  
   417  	lastRetentionRunAt := time.Unix(0, 0)
   418  	runCompaction := func() {
   419  		applyRetention := false
   420  		if c.cfg.RetentionEnabled && time.Since(lastRetentionRunAt) >= c.cfg.ApplyRetentionInterval {
   421  			level.Info(util_log.Logger).Log("msg", "applying retention with compaction")
   422  			applyRetention = true
   423  		}
   424  
   425  		err := c.RunCompaction(ctx, applyRetention)
   426  		if err != nil {
   427  			level.Error(util_log.Logger).Log("msg", "failed to run compaction", "err", err)
   428  		}
   429  
   430  		if applyRetention {
   431  			lastRetentionRunAt = time.Now()
   432  		}
   433  	}
   434  
   435  	c.wg.Add(1)
   436  	go func() {
   437  		defer c.wg.Done()
   438  		runCompaction()
   439  
   440  		ticker := time.NewTicker(c.cfg.CompactionInterval)
   441  		defer ticker.Stop()
   442  
   443  		for {
   444  			select {
   445  			case <-ticker.C:
   446  				runCompaction()
   447  			case <-ctx.Done():
   448  				return
   449  			}
   450  		}
   451  	}()
   452  	if c.cfg.RetentionEnabled {
   453  		c.wg.Add(1)
   454  		go func() {
   455  			// starts the chunk sweeper
   456  			defer func() {
   457  				c.sweeper.Stop()
   458  				c.wg.Done()
   459  			}()
   460  			c.sweeper.Start()
   461  			<-ctx.Done()
   462  		}()
   463  	}
   464  	level.Info(util_log.Logger).Log("msg", "compactor started")
   465  }
   466  
   467  func (c *Compactor) stopping(_ error) error {
   468  	return services.StopManagerAndAwaitStopped(context.Background(), c.subservices)
   469  }
   470  
   471  func (c *Compactor) CompactTable(ctx context.Context, tableName string, applyRetention bool) error {
   472  	schemaCfg, ok := schemaPeriodForTable(c.schemaConfig, tableName)
   473  	if !ok {
   474  		level.Error(util_log.Logger).Log("msg", "skipping compaction since we can't find schema for table", "table", tableName)
   475  		return nil
   476  	}
   477  
   478  	indexCompactor, ok := c.indexCompactors[schemaCfg.IndexType]
   479  	if !ok {
   480  		return fmt.Errorf("index processor not found for index type %s", schemaCfg.IndexType)
   481  	}
   482  
   483  	table, err := newTable(ctx, filepath.Join(c.cfg.WorkingDirectory, tableName), c.indexStorageClient, indexCompactor,
   484  		schemaCfg, c.tableMarker, c.expirationChecker)
   485  	if err != nil {
   486  		level.Error(util_log.Logger).Log("msg", "failed to initialize table for compaction", "table", tableName, "err", err)
   487  		return err
   488  	}
   489  
   490  	interval := retention.ExtractIntervalFromTableName(tableName)
   491  	intervalMayHaveExpiredChunks := false
   492  	if applyRetention {
   493  		intervalMayHaveExpiredChunks = c.expirationChecker.IntervalMayHaveExpiredChunks(interval, "")
   494  	}
   495  
   496  	err = table.compact(intervalMayHaveExpiredChunks)
   497  	if err != nil {
   498  		level.Error(util_log.Logger).Log("msg", "failed to compact files", "table", tableName, "err", err)
   499  		return err
   500  	}
   501  	return nil
   502  }
   503  
   504  func (c *Compactor) RegisterIndexCompactor(indexType string, indexCompactor IndexCompactor) {
   505  	c.indexCompactors[indexType] = indexCompactor
   506  }
   507  
   508  func (c *Compactor) RunCompaction(ctx context.Context, applyRetention bool) error {
   509  	status := statusSuccess
   510  	start := time.Now()
   511  
   512  	if applyRetention {
   513  		c.expirationChecker.MarkPhaseStarted()
   514  	}
   515  
   516  	defer func() {
   517  		c.metrics.compactTablesOperationTotal.WithLabelValues(status).Inc()
   518  		runtime := time.Since(start)
   519  		if status == statusSuccess {
   520  			c.metrics.compactTablesOperationDurationSeconds.Set(runtime.Seconds())
   521  			c.metrics.compactTablesOperationLastSuccess.SetToCurrentTime()
   522  			if applyRetention {
   523  				c.metrics.applyRetentionLastSuccess.SetToCurrentTime()
   524  			}
   525  		}
   526  
   527  		if applyRetention {
   528  			if status == statusSuccess {
   529  				c.expirationChecker.MarkPhaseFinished()
   530  			} else {
   531  				c.expirationChecker.MarkPhaseFailed()
   532  			}
   533  		}
   534  		if runtime > c.cfg.CompactionInterval {
   535  			level.Warn(util_log.Logger).Log("msg", fmt.Sprintf("last compaction took %s which is longer than the compaction interval of %s, this can lead to duplicate compactors running if not running a standalone compactor instance.", runtime, c.cfg.CompactionInterval))
   536  		}
   537  	}()
   538  
   539  	// refresh index list cache since previous compaction would have changed the index files in the object store
   540  	c.indexStorageClient.RefreshIndexListCache(ctx)
   541  
   542  	tables, err := c.indexStorageClient.ListTables(ctx)
   543  	if err != nil {
   544  		status = statusFailure
   545  		return err
   546  	}
   547  
   548  	compactTablesChan := make(chan string)
   549  	errChan := make(chan error)
   550  
   551  	for i := 0; i < c.cfg.MaxCompactionParallelism; i++ {
   552  		go func() {
   553  			var err error
   554  			defer func() {
   555  				errChan <- err
   556  			}()
   557  
   558  			for {
   559  				select {
   560  				case tableName, ok := <-compactTablesChan:
   561  					if !ok {
   562  						return
   563  					}
   564  
   565  					level.Info(util_log.Logger).Log("msg", "compacting table", "table-name", tableName)
   566  					err = c.CompactTable(ctx, tableName, applyRetention)
   567  					if err != nil {
   568  						return
   569  					}
   570  					level.Info(util_log.Logger).Log("msg", "finished compacting table", "table-name", tableName)
   571  				case <-ctx.Done():
   572  					return
   573  				}
   574  			}
   575  		}()
   576  	}
   577  
   578  	go func() {
   579  		for _, tableName := range tables {
   580  			if tableName == deletion.DeleteRequestsTableName {
   581  				// we do not want to compact or apply retention on delete requests table
   582  				continue
   583  			}
   584  
   585  			select {
   586  			case compactTablesChan <- tableName:
   587  			case <-ctx.Done():
   588  				return
   589  			}
   590  		}
   591  
   592  		close(compactTablesChan)
   593  	}()
   594  
   595  	var firstErr error
   596  	// read all the errors
   597  	for i := 0; i < c.cfg.MaxCompactionParallelism; i++ {
   598  		err := <-errChan
   599  		if err != nil && firstErr == nil {
   600  			status = statusFailure
   601  			firstErr = err
   602  		}
   603  	}
   604  
   605  	return firstErr
   606  }
   607  
   608  type expirationChecker struct {
   609  	retentionExpiryChecker retention.ExpirationChecker
   610  	deletionExpiryChecker  retention.ExpirationChecker
   611  }
   612  
   613  func newExpirationChecker(retentionExpiryChecker, deletionExpiryChecker retention.ExpirationChecker) retention.ExpirationChecker {
   614  	return &expirationChecker{retentionExpiryChecker, deletionExpiryChecker}
   615  }
   616  
   617  func (e *expirationChecker) Expired(ref retention.ChunkEntry, now model.Time) (bool, []retention.IntervalFilter) {
   618  	if expired, nonDeletedIntervals := e.retentionExpiryChecker.Expired(ref, now); expired {
   619  		return expired, nonDeletedIntervals
   620  	}
   621  
   622  	return e.deletionExpiryChecker.Expired(ref, now)
   623  }
   624  
   625  func (e *expirationChecker) MarkPhaseStarted() {
   626  	e.retentionExpiryChecker.MarkPhaseStarted()
   627  	e.deletionExpiryChecker.MarkPhaseStarted()
   628  }
   629  
   630  func (e *expirationChecker) MarkPhaseFailed() {
   631  	e.retentionExpiryChecker.MarkPhaseFailed()
   632  	e.deletionExpiryChecker.MarkPhaseFailed()
   633  }
   634  
   635  func (e *expirationChecker) MarkPhaseFinished() {
   636  	e.retentionExpiryChecker.MarkPhaseFinished()
   637  	e.deletionExpiryChecker.MarkPhaseFinished()
   638  }
   639  
   640  func (e *expirationChecker) MarkPhaseTimedOut() {
   641  	e.retentionExpiryChecker.MarkPhaseTimedOut()
   642  	e.deletionExpiryChecker.MarkPhaseTimedOut()
   643  }
   644  
   645  func (e *expirationChecker) IntervalMayHaveExpiredChunks(interval model.Interval, userID string) bool {
   646  	return e.retentionExpiryChecker.IntervalMayHaveExpiredChunks(interval, userID) || e.deletionExpiryChecker.IntervalMayHaveExpiredChunks(interval, userID)
   647  }
   648  
   649  func (e *expirationChecker) DropFromIndex(ref retention.ChunkEntry, tableEndTime model.Time, now model.Time) bool {
   650  	return e.retentionExpiryChecker.DropFromIndex(ref, tableEndTime, now) || e.deletionExpiryChecker.DropFromIndex(ref, tableEndTime, now)
   651  }
   652  
   653  func (c *Compactor) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, instanceID string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) {
   654  	// When we initialize the compactor instance in the ring we want to start from
   655  	// a clean situation, so whatever is the state we set it JOINING, while we keep existing
   656  	// tokens (if any) or the ones loaded from file.
   657  	var tokens []uint32
   658  	if instanceExists {
   659  		tokens = instanceDesc.GetTokens()
   660  	}
   661  
   662  	takenTokens := ringDesc.GetTokens()
   663  	newTokens := ring.GenerateTokens(ringNumTokens-len(tokens), takenTokens)
   664  
   665  	// Tokens sorting will be enforced by the parent caller.
   666  	tokens = append(tokens, newTokens...)
   667  
   668  	return ring.JOINING, tokens
   669  }
   670  
   671  func (c *Compactor) OnRingInstanceTokens(_ *ring.BasicLifecycler, _ ring.Tokens) {}
   672  func (c *Compactor) OnRingInstanceStopping(_ *ring.BasicLifecycler)              {}
   673  func (c *Compactor) OnRingInstanceHeartbeat(_ *ring.BasicLifecycler, _ *ring.Desc, _ *ring.InstanceDesc) {
   674  }
   675  
   676  func (c *Compactor) ServeHTTP(w http.ResponseWriter, req *http.Request) {
   677  	c.ring.ServeHTTP(w, req)
   678  }
   679  
   680  func schemaPeriodForTable(cfg config.SchemaConfig, tableName string) (config.PeriodConfig, bool) {
   681  	// first round removes configs that does not have the prefix.
   682  	candidates := []config.PeriodConfig{}
   683  	for _, schema := range cfg.Configs {
   684  		if strings.HasPrefix(tableName, schema.IndexTables.Prefix) {
   685  			candidates = append(candidates, schema)
   686  		}
   687  	}
   688  	// WARN we  assume period is always daily. This is only true for boltdb-shipper.
   689  	var (
   690  		matched config.PeriodConfig
   691  		found   bool
   692  	)
   693  	for _, schema := range candidates {
   694  		periodIndex, err := strconv.ParseInt(strings.TrimPrefix(tableName, schema.IndexTables.Prefix), 10, 64)
   695  		if err != nil {
   696  			continue
   697  		}
   698  		periodSec := int64(schema.IndexTables.Period / time.Second)
   699  		tableTs := model.TimeFromUnix(periodIndex * periodSec)
   700  		if tableTs.After(schema.From.Time) || tableTs == schema.From.Time {
   701  			matched = schema
   702  			found = true
   703  		}
   704  	}
   705  
   706  	return matched, found
   707  }