github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/compactor/blocks_cleaner.go (about)

     1  package compactor
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/go-kit/log"
     9  	"github.com/go-kit/log/level"
    10  	"github.com/grafana/dskit/concurrency"
    11  	"github.com/grafana/dskit/services"
    12  	"github.com/oklog/ulid"
    13  	"github.com/pkg/errors"
    14  	"github.com/prometheus/client_golang/prometheus"
    15  	"github.com/prometheus/client_golang/prometheus/promauto"
    16  	"github.com/thanos-io/thanos/pkg/block"
    17  	"github.com/thanos-io/thanos/pkg/block/metadata"
    18  	"github.com/thanos-io/thanos/pkg/objstore"
    19  
    20  	"github.com/cortexproject/cortex/pkg/storage/bucket"
    21  	cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
    22  	"github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex"
    23  	"github.com/cortexproject/cortex/pkg/util"
    24  	util_log "github.com/cortexproject/cortex/pkg/util/log"
    25  )
    26  
    27  type BlocksCleanerConfig struct {
    28  	DeletionDelay                      time.Duration
    29  	CleanupInterval                    time.Duration
    30  	CleanupConcurrency                 int
    31  	BlockDeletionMarksMigrationEnabled bool          // TODO Discuss whether we should remove it in Cortex 1.8.0 and document that upgrading to 1.7.0 before 1.8.0 is required.
    32  	TenantCleanupDelay                 time.Duration // Delay before removing tenant deletion mark and "debug".
    33  }
    34  
    35  type BlocksCleaner struct {
    36  	services.Service
    37  
    38  	cfg          BlocksCleanerConfig
    39  	cfgProvider  ConfigProvider
    40  	logger       log.Logger
    41  	bucketClient objstore.Bucket
    42  	usersScanner *cortex_tsdb.UsersScanner
    43  
    44  	// Keep track of the last owned users.
    45  	lastOwnedUsers []string
    46  
    47  	// Metrics.
    48  	runsStarted                 prometheus.Counter
    49  	runsCompleted               prometheus.Counter
    50  	runsFailed                  prometheus.Counter
    51  	runsLastSuccess             prometheus.Gauge
    52  	blocksCleanedTotal          prometheus.Counter
    53  	blocksFailedTotal           prometheus.Counter
    54  	blocksMarkedForDeletion     prometheus.Counter
    55  	tenantBlocks                *prometheus.GaugeVec
    56  	tenantMarkedBlocks          *prometheus.GaugeVec
    57  	tenantPartialBlocks         *prometheus.GaugeVec
    58  	tenantBucketIndexLastUpdate *prometheus.GaugeVec
    59  }
    60  
    61  func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.Bucket, usersScanner *cortex_tsdb.UsersScanner, cfgProvider ConfigProvider, logger log.Logger, reg prometheus.Registerer) *BlocksCleaner {
    62  	c := &BlocksCleaner{
    63  		cfg:          cfg,
    64  		bucketClient: bucketClient,
    65  		usersScanner: usersScanner,
    66  		cfgProvider:  cfgProvider,
    67  		logger:       log.With(logger, "component", "cleaner"),
    68  		runsStarted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    69  			Name: "cortex_compactor_block_cleanup_started_total",
    70  			Help: "Total number of blocks cleanup runs started.",
    71  		}),
    72  		runsCompleted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    73  			Name: "cortex_compactor_block_cleanup_completed_total",
    74  			Help: "Total number of blocks cleanup runs successfully completed.",
    75  		}),
    76  		runsFailed: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    77  			Name: "cortex_compactor_block_cleanup_failed_total",
    78  			Help: "Total number of blocks cleanup runs failed.",
    79  		}),
    80  		runsLastSuccess: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
    81  			Name: "cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds",
    82  			Help: "Unix timestamp of the last successful blocks cleanup run.",
    83  		}),
    84  		blocksCleanedTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    85  			Name: "cortex_compactor_blocks_cleaned_total",
    86  			Help: "Total number of blocks deleted.",
    87  		}),
    88  		blocksFailedTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    89  			Name: "cortex_compactor_block_cleanup_failures_total",
    90  			Help: "Total number of blocks failed to be deleted.",
    91  		}),
    92  		blocksMarkedForDeletion: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    93  			Name:        blocksMarkedForDeletionName,
    94  			Help:        blocksMarkedForDeletionHelp,
    95  			ConstLabels: prometheus.Labels{"reason": "retention"},
    96  		}),
    97  
    98  		// The following metrics don't have the "cortex_compactor" prefix because not strictly related to
    99  		// the compactor. They're just tracked by the compactor because it's the most logical place where these
   100  		// metrics can be tracked.
   101  		tenantBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   102  			Name: "cortex_bucket_blocks_count",
   103  			Help: "Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.",
   104  		}, []string{"user"}),
   105  		tenantMarkedBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   106  			Name: "cortex_bucket_blocks_marked_for_deletion_count",
   107  			Help: "Total number of blocks marked for deletion in the bucket.",
   108  		}, []string{"user"}),
   109  		tenantPartialBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   110  			Name: "cortex_bucket_blocks_partials_count",
   111  			Help: "Total number of partial blocks.",
   112  		}, []string{"user"}),
   113  		tenantBucketIndexLastUpdate: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   114  			Name: "cortex_bucket_index_last_successful_update_timestamp_seconds",
   115  			Help: "Timestamp of the last successful update of a tenant's bucket index.",
   116  		}, []string{"user"}),
   117  	}
   118  
   119  	c.Service = services.NewTimerService(cfg.CleanupInterval, c.starting, c.ticker, nil)
   120  
   121  	return c
   122  }
   123  
   124  func (c *BlocksCleaner) starting(ctx context.Context) error {
   125  	// Run a cleanup so that any other service depending on this service
   126  	// is guaranteed to start once the initial cleanup has been done.
   127  	c.runCleanup(ctx, true)
   128  
   129  	return nil
   130  }
   131  
   132  func (c *BlocksCleaner) ticker(ctx context.Context) error {
   133  	c.runCleanup(ctx, false)
   134  
   135  	return nil
   136  }
   137  
   138  func (c *BlocksCleaner) runCleanup(ctx context.Context, firstRun bool) {
   139  	level.Info(c.logger).Log("msg", "started blocks cleanup and maintenance")
   140  	c.runsStarted.Inc()
   141  
   142  	if err := c.cleanUsers(ctx, firstRun); err == nil {
   143  		level.Info(c.logger).Log("msg", "successfully completed blocks cleanup and maintenance")
   144  		c.runsCompleted.Inc()
   145  		c.runsLastSuccess.SetToCurrentTime()
   146  	} else if errors.Is(err, context.Canceled) {
   147  		level.Info(c.logger).Log("msg", "canceled blocks cleanup and maintenance", "err", err)
   148  		return
   149  	} else {
   150  		level.Error(c.logger).Log("msg", "failed to run blocks cleanup and maintenance", "err", err.Error())
   151  		c.runsFailed.Inc()
   152  	}
   153  }
   154  
   155  func (c *BlocksCleaner) cleanUsers(ctx context.Context, firstRun bool) error {
   156  	users, deleted, err := c.usersScanner.ScanUsers(ctx)
   157  	if err != nil {
   158  		return errors.Wrap(err, "failed to discover users from bucket")
   159  	}
   160  
   161  	isActive := util.StringsMap(users)
   162  	isDeleted := util.StringsMap(deleted)
   163  	allUsers := append(users, deleted...)
   164  
   165  	// Delete per-tenant metrics for all tenants not belonging anymore to this shard.
   166  	// Such tenants have been moved to a different shard, so their updated metrics will
   167  	// be exported by the new shard.
   168  	for _, userID := range c.lastOwnedUsers {
   169  		if !isActive[userID] && !isDeleted[userID] {
   170  			c.tenantBlocks.DeleteLabelValues(userID)
   171  			c.tenantMarkedBlocks.DeleteLabelValues(userID)
   172  			c.tenantPartialBlocks.DeleteLabelValues(userID)
   173  			c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID)
   174  		}
   175  	}
   176  	c.lastOwnedUsers = allUsers
   177  
   178  	return concurrency.ForEachUser(ctx, allUsers, c.cfg.CleanupConcurrency, func(ctx context.Context, userID string) error {
   179  		if isDeleted[userID] {
   180  			return errors.Wrapf(c.deleteUserMarkedForDeletion(ctx, userID), "failed to delete user marked for deletion: %s", userID)
   181  		}
   182  		return errors.Wrapf(c.cleanUser(ctx, userID, firstRun), "failed to delete blocks for user: %s", userID)
   183  	})
   184  }
   185  
   186  // Remove blocks and remaining data for tenant marked for deletion.
   187  func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID string) error {
   188  	userLogger := util_log.WithUserID(userID, c.logger)
   189  	userBucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider)
   190  
   191  	level.Info(userLogger).Log("msg", "deleting blocks for tenant marked for deletion")
   192  
   193  	// We immediately delete the bucket index, to signal to its consumers that
   194  	// the tenant has "no blocks" in the storage.
   195  	if err := bucketindex.DeleteIndex(ctx, c.bucketClient, userID, c.cfgProvider); err != nil {
   196  		return err
   197  	}
   198  	c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID)
   199  
   200  	var deletedBlocks, failed int
   201  	err := userBucket.Iter(ctx, "", func(name string) error {
   202  		if err := ctx.Err(); err != nil {
   203  			return err
   204  		}
   205  
   206  		id, ok := block.IsBlockDir(name)
   207  		if !ok {
   208  			return nil
   209  		}
   210  
   211  		err := block.Delete(ctx, userLogger, userBucket, id)
   212  		if err != nil {
   213  			failed++
   214  			c.blocksFailedTotal.Inc()
   215  			level.Warn(userLogger).Log("msg", "failed to delete block", "block", id, "err", err)
   216  			return nil // Continue with other blocks.
   217  		}
   218  
   219  		deletedBlocks++
   220  		c.blocksCleanedTotal.Inc()
   221  		level.Info(userLogger).Log("msg", "deleted block", "block", id)
   222  		return nil
   223  	})
   224  
   225  	if err != nil {
   226  		return err
   227  	}
   228  
   229  	if failed > 0 {
   230  		// The number of blocks left in the storage is equal to the number of blocks we failed
   231  		// to delete. We also consider them all marked for deletion given the next run will try
   232  		// to delete them again.
   233  		c.tenantBlocks.WithLabelValues(userID).Set(float64(failed))
   234  		c.tenantMarkedBlocks.WithLabelValues(userID).Set(float64(failed))
   235  		c.tenantPartialBlocks.WithLabelValues(userID).Set(0)
   236  
   237  		return errors.Errorf("failed to delete %d blocks", failed)
   238  	}
   239  
   240  	// Given all blocks have been deleted, we can also remove the metrics.
   241  	c.tenantBlocks.DeleteLabelValues(userID)
   242  	c.tenantMarkedBlocks.DeleteLabelValues(userID)
   243  	c.tenantPartialBlocks.DeleteLabelValues(userID)
   244  
   245  	if deletedBlocks > 0 {
   246  		level.Info(userLogger).Log("msg", "deleted blocks for tenant marked for deletion", "deletedBlocks", deletedBlocks)
   247  	}
   248  
   249  	mark, err := cortex_tsdb.ReadTenantDeletionMark(ctx, c.bucketClient, userID)
   250  	if err != nil {
   251  		return errors.Wrap(err, "failed to read tenant deletion mark")
   252  	}
   253  	if mark == nil {
   254  		return errors.Wrap(err, "cannot find tenant deletion mark anymore")
   255  	}
   256  
   257  	// If we have just deleted some blocks, update "finished" time. Also update "finished" time if it wasn't set yet, but there are no blocks.
   258  	// Note: this UPDATES the tenant deletion mark. Components that use caching bucket will NOT SEE this update,
   259  	// but that is fine -- they only check whether tenant deletion marker exists or not.
   260  	if deletedBlocks > 0 || mark.FinishedTime == 0 {
   261  		level.Info(userLogger).Log("msg", "updating finished time in tenant deletion mark")
   262  		mark.FinishedTime = time.Now().Unix()
   263  		return errors.Wrap(cortex_tsdb.WriteTenantDeletionMark(ctx, c.bucketClient, userID, c.cfgProvider, mark), "failed to update tenant deletion mark")
   264  	}
   265  
   266  	if time.Since(time.Unix(mark.FinishedTime, 0)) < c.cfg.TenantCleanupDelay {
   267  		return nil
   268  	}
   269  
   270  	level.Info(userLogger).Log("msg", "cleaning up remaining blocks data for tenant marked for deletion")
   271  
   272  	// Let's do final cleanup of tenant.
   273  	if deleted, err := bucket.DeletePrefix(ctx, userBucket, block.DebugMetas, userLogger); err != nil {
   274  		return errors.Wrap(err, "failed to delete "+block.DebugMetas)
   275  	} else if deleted > 0 {
   276  		level.Info(userLogger).Log("msg", "deleted files under "+block.DebugMetas+" for tenant marked for deletion", "count", deleted)
   277  	}
   278  
   279  	// Tenant deletion mark file is inside Markers as well.
   280  	if deleted, err := bucket.DeletePrefix(ctx, userBucket, bucketindex.MarkersPathname, userLogger); err != nil {
   281  		return errors.Wrap(err, "failed to delete marker files")
   282  	} else if deleted > 0 {
   283  		level.Info(userLogger).Log("msg", "deleted marker files for tenant marked for deletion", "count", deleted)
   284  	}
   285  
   286  	return nil
   287  }
   288  
   289  func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun bool) (returnErr error) {
   290  	userLogger := util_log.WithUserID(userID, c.logger)
   291  	userBucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider)
   292  	startTime := time.Now()
   293  
   294  	level.Info(userLogger).Log("msg", "started blocks cleanup and maintenance")
   295  	defer func() {
   296  		if returnErr != nil {
   297  			level.Warn(userLogger).Log("msg", "failed blocks cleanup and maintenance", "err", returnErr)
   298  		} else {
   299  			level.Info(userLogger).Log("msg", "completed blocks cleanup and maintenance", "duration", time.Since(startTime))
   300  		}
   301  	}()
   302  
   303  	// Migrate block deletion marks to the global markers location. This operation is a best-effort.
   304  	if firstRun && c.cfg.BlockDeletionMarksMigrationEnabled {
   305  		if err := bucketindex.MigrateBlockDeletionMarksToGlobalLocation(ctx, c.bucketClient, userID, c.cfgProvider); err != nil {
   306  			level.Warn(userLogger).Log("msg", "failed to migrate block deletion marks to the global markers location", "err", err)
   307  		} else {
   308  			level.Info(userLogger).Log("msg", "migrated block deletion marks to the global markers location")
   309  		}
   310  	}
   311  
   312  	// Read the bucket index.
   313  	idx, err := bucketindex.ReadIndex(ctx, c.bucketClient, userID, c.cfgProvider, c.logger)
   314  	if errors.Is(err, bucketindex.ErrIndexCorrupted) {
   315  		level.Warn(userLogger).Log("msg", "found a corrupted bucket index, recreating it")
   316  	} else if err != nil && !errors.Is(err, bucketindex.ErrIndexNotFound) {
   317  		return err
   318  	}
   319  
   320  	// Mark blocks for future deletion based on the retention period for the user.
   321  	// Note doing this before UpdateIndex, so it reads in the deletion marks.
   322  	// The trade-off being that retention is not applied if the index has to be
   323  	// built, but this is rare.
   324  	if idx != nil {
   325  		// We do not want to stop the remaining work in the cleaner if an
   326  		// error occurs here. Errors are logged in the function.
   327  		retention := c.cfgProvider.CompactorBlocksRetentionPeriod(userID)
   328  		c.applyUserRetentionPeriod(ctx, idx, retention, userBucket, userLogger)
   329  	}
   330  
   331  	// Generate an updated in-memory version of the bucket index.
   332  	w := bucketindex.NewUpdater(c.bucketClient, userID, c.cfgProvider, c.logger)
   333  	idx, partials, err := w.UpdateIndex(ctx, idx)
   334  	if err != nil {
   335  		return err
   336  	}
   337  
   338  	// Delete blocks marked for deletion. We iterate over a copy of deletion marks because
   339  	// we'll need to manipulate the index (removing blocks which get deleted).
   340  	for _, mark := range idx.BlockDeletionMarks.Clone() {
   341  		if time.Since(mark.GetDeletionTime()).Seconds() <= c.cfg.DeletionDelay.Seconds() {
   342  			continue
   343  		}
   344  
   345  		if err := block.Delete(ctx, userLogger, userBucket, mark.ID); err != nil {
   346  			c.blocksFailedTotal.Inc()
   347  			level.Warn(userLogger).Log("msg", "failed to delete block marked for deletion", "block", mark.ID, "err", err)
   348  			continue
   349  		}
   350  
   351  		// Remove the block from the bucket index too.
   352  		idx.RemoveBlock(mark.ID)
   353  
   354  		c.blocksCleanedTotal.Inc()
   355  		level.Info(userLogger).Log("msg", "deleted block marked for deletion", "block", mark.ID)
   356  	}
   357  
   358  	// Partial blocks with a deletion mark can be cleaned up. This is a best effort, so we don't return
   359  	// error if the cleanup of partial blocks fail.
   360  	if len(partials) > 0 {
   361  		c.cleanUserPartialBlocks(ctx, partials, idx, userBucket, userLogger)
   362  	}
   363  
   364  	// Upload the updated index to the storage.
   365  	if err := bucketindex.WriteIndex(ctx, c.bucketClient, userID, c.cfgProvider, idx); err != nil {
   366  		return err
   367  	}
   368  
   369  	c.tenantBlocks.WithLabelValues(userID).Set(float64(len(idx.Blocks)))
   370  	c.tenantMarkedBlocks.WithLabelValues(userID).Set(float64(len(idx.BlockDeletionMarks)))
   371  	c.tenantPartialBlocks.WithLabelValues(userID).Set(float64(len(partials)))
   372  	c.tenantBucketIndexLastUpdate.WithLabelValues(userID).SetToCurrentTime()
   373  
   374  	return nil
   375  }
   376  
   377  // cleanUserPartialBlocks delete partial blocks which are safe to be deleted. The provided partials map
   378  // is updated accordingly.
   379  func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, partials map[ulid.ULID]error, idx *bucketindex.Index, userBucket objstore.InstrumentedBucket, userLogger log.Logger) {
   380  	for blockID, blockErr := range partials {
   381  		// We can safely delete only blocks which are partial because the meta.json is missing.
   382  		if !errors.Is(blockErr, bucketindex.ErrBlockMetaNotFound) {
   383  			continue
   384  		}
   385  
   386  		// We can safely delete only partial blocks with a deletion mark.
   387  		err := metadata.ReadMarker(ctx, userLogger, userBucket, blockID.String(), &metadata.DeletionMark{})
   388  		if errors.Is(err, metadata.ErrorMarkerNotFound) {
   389  			continue
   390  		}
   391  		if err != nil {
   392  			level.Warn(userLogger).Log("msg", "error reading partial block deletion mark", "block", blockID, "err", err)
   393  			continue
   394  		}
   395  
   396  		// Hard-delete partial blocks having a deletion mark, even if the deletion threshold has not
   397  		// been reached yet.
   398  		if err := block.Delete(ctx, userLogger, userBucket, blockID); err != nil {
   399  			c.blocksFailedTotal.Inc()
   400  			level.Warn(userLogger).Log("msg", "error deleting partial block marked for deletion", "block", blockID, "err", err)
   401  			continue
   402  		}
   403  
   404  		// Remove the block from the bucket index too.
   405  		idx.RemoveBlock(blockID)
   406  		delete(partials, blockID)
   407  
   408  		c.blocksCleanedTotal.Inc()
   409  		level.Info(userLogger).Log("msg", "deleted partial block marked for deletion", "block", blockID)
   410  	}
   411  }
   412  
   413  // applyUserRetentionPeriod marks blocks for deletion which have aged past the retention period.
   414  func (c *BlocksCleaner) applyUserRetentionPeriod(ctx context.Context, idx *bucketindex.Index, retention time.Duration, userBucket objstore.Bucket, userLogger log.Logger) {
   415  	// The retention period of zero is a special value indicating to never delete.
   416  	if retention <= 0 {
   417  		return
   418  	}
   419  
   420  	level.Debug(userLogger).Log("msg", "applying retention", "retention", retention.String())
   421  	blocks := listBlocksOutsideRetentionPeriod(idx, time.Now().Add(-retention))
   422  
   423  	// Attempt to mark all blocks. It is not critical if a marking fails, as
   424  	// the cleaner will retry applying the retention in its next cycle.
   425  	for _, b := range blocks {
   426  		level.Info(userLogger).Log("msg", "applied retention: marking block for deletion", "block", b.ID, "maxTime", b.MaxTime)
   427  		if err := block.MarkForDeletion(ctx, userLogger, userBucket, b.ID, fmt.Sprintf("block exceeding retention of %v", retention), c.blocksMarkedForDeletion); err != nil {
   428  			level.Warn(userLogger).Log("msg", "failed to mark block for deletion", "block", b.ID, "err", err)
   429  		}
   430  	}
   431  }
   432  
   433  // listBlocksOutsideRetentionPeriod determines the blocks which have aged past
   434  // the specified retention period, and are not already marked for deletion.
   435  func listBlocksOutsideRetentionPeriod(idx *bucketindex.Index, threshold time.Time) (result bucketindex.Blocks) {
   436  	// Whilst re-marking a block is not harmful, it is wasteful and generates
   437  	// a warning log message. Use the block deletion marks already in-memory
   438  	// to prevent marking blocks already marked for deletion.
   439  	marked := make(map[ulid.ULID]struct{}, len(idx.BlockDeletionMarks))
   440  	for _, d := range idx.BlockDeletionMarks {
   441  		marked[d.ID] = struct{}{}
   442  	}
   443  
   444  	for _, b := range idx.Blocks {
   445  		maxTime := time.Unix(b.MaxTime/1000, 0)
   446  		if maxTime.Before(threshold) {
   447  			if _, isMarked := marked[b.ID]; !isMarked {
   448  				result = append(result, b)
   449  			}
   450  		}
   451  	}
   452  
   453  	return
   454  }