github.com/grafana/pyroscope@v1.18.0/pkg/compactor/blocks_cleaner.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/blocks_cleaner.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"context"
    10  	"fmt"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/go-kit/log"
    17  	"github.com/go-kit/log/level"
    18  	"github.com/grafana/dskit/concurrency"
    19  	"github.com/grafana/dskit/services"
    20  	"github.com/oklog/ulid/v2"
    21  	"github.com/pkg/errors"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  	"github.com/prometheus/client_golang/prometheus/promauto"
    24  	thanos_objstore "github.com/thanos-io/objstore"
    25  
    26  	"github.com/grafana/pyroscope/pkg/objstore"
    27  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    28  	"github.com/grafana/pyroscope/pkg/phlaredb/bucket"
    29  	"github.com/grafana/pyroscope/pkg/phlaredb/bucketindex"
    30  	"github.com/grafana/pyroscope/pkg/util"
    31  	"github.com/grafana/pyroscope/pkg/validation"
    32  )
    33  
    34  const (
    35  	defaultDeleteBlocksConcurrency = 16
    36  )
    37  
    38  type BlocksCleanerConfig struct {
    39  	DeletionDelay              time.Duration
    40  	CleanupInterval            time.Duration
    41  	CleanupConcurrency         int
    42  	TenantCleanupDelay         time.Duration // Delay before removing tenant deletion mark and "debug".
    43  	DeleteBlocksConcurrency    int
    44  	NoBlocksFileCleanupEnabled bool
    45  }
    46  
    47  type BlocksCleaner struct {
    48  	services.Service
    49  
    50  	cfg            BlocksCleanerConfig
    51  	cfgProvider    ConfigProvider
    52  	logger         log.Logger
    53  	bucketClient   objstore.Bucket
    54  	tenantsScanner *bucket.TenantsScanner
    55  	ownUser        func(userID string) (bool, error)
    56  	singleFlight   *concurrency.LimitedConcurrencySingleFlight
    57  
    58  	// Keep track of the last owned users.
    59  	lastOwnedUsers []string
    60  
    61  	// Metrics.
    62  	runsStarted                    prometheus.Counter
    63  	runsCompleted                  prometheus.Counter
    64  	runsFailed                     prometheus.Counter
    65  	runsLastSuccess                prometheus.Gauge
    66  	blocksCleanedTotal             prometheus.Counter
    67  	blocksFailedTotal              prometheus.Counter
    68  	blocksMarkedForDeletion        prometheus.Counter
    69  	partialBlocksMarkedForDeletion prometheus.Counter
    70  	tenantBlocks                   *prometheus.GaugeVec
    71  	tenantMarkedBlocks             *prometheus.GaugeVec
    72  	tenantPartialBlocks            *prometheus.GaugeVec
    73  	tenantBucketIndexLastUpdate    *prometheus.GaugeVec
    74  }
    75  
    76  func NewBlocksCleaner(cfg BlocksCleanerConfig, bucketClient objstore.Bucket, ownUser func(userID string) (bool, error), cfgProvider ConfigProvider, logger log.Logger, reg prometheus.Registerer) *BlocksCleaner {
    77  	c := &BlocksCleaner{
    78  		cfg:            cfg,
    79  		bucketClient:   bucketClient,
    80  		tenantsScanner: bucket.NewTenantsScanner(bucketClient, ownUser, logger),
    81  		ownUser:        ownUser,
    82  		cfgProvider:    cfgProvider,
    83  		singleFlight:   concurrency.NewLimitedConcurrencySingleFlight(cfg.CleanupConcurrency),
    84  		logger:         log.With(logger, "component", "cleaner"),
    85  		runsStarted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    86  			Name: "pyroscope_compactor_block_cleanup_started_total",
    87  			Help: "Total number of blocks cleanup runs started.",
    88  		}),
    89  		runsCompleted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    90  			Name: "pyroscope_compactor_block_cleanup_completed_total",
    91  			Help: "Total number of blocks cleanup runs successfully completed.",
    92  		}),
    93  		runsFailed: promauto.With(reg).NewCounter(prometheus.CounterOpts{
    94  			Name: "pyroscope_compactor_block_cleanup_failed_total",
    95  			Help: "Total number of blocks cleanup runs failed.",
    96  		}),
    97  		runsLastSuccess: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
    98  			Name: "pyroscope_compactor_block_cleanup_last_successful_run_timestamp_seconds",
    99  			Help: "Unix timestamp of the last successful blocks cleanup run.",
   100  		}),
   101  		blocksCleanedTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{
   102  			Name: "pyroscope_compactor_blocks_cleaned_total",
   103  			Help: "Total number of blocks deleted.",
   104  		}),
   105  		blocksFailedTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{
   106  			Name: "pyroscope_compactor_block_cleanup_failures_total",
   107  			Help: "Total number of blocks failed to be deleted.",
   108  		}),
   109  		blocksMarkedForDeletion: promauto.With(reg).NewCounter(prometheus.CounterOpts{
   110  			Name:        blocksMarkedForDeletionName,
   111  			Help:        blocksMarkedForDeletionHelp,
   112  			ConstLabels: prometheus.Labels{"reason": "retention"},
   113  		}),
   114  		partialBlocksMarkedForDeletion: promauto.With(reg).NewCounter(prometheus.CounterOpts{
   115  			Name:        blocksMarkedForDeletionName,
   116  			Help:        blocksMarkedForDeletionHelp,
   117  			ConstLabels: prometheus.Labels{"reason": "partial"},
   118  		}),
   119  
   120  		// The following metrics don't have the "pyroscope_compactor" prefix because not strictly related to
   121  		// the compactor. They're just tracked by the compactor because it's the most logical place where these
   122  		// metrics can be tracked.
   123  		tenantBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   124  			Name: "pyroscope_bucket_blocks_count",
   125  			Help: "Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.",
   126  		}, []string{"user", "compaction_level"}),
   127  		tenantMarkedBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   128  			Name: "pyroscope_bucket_blocks_marked_for_deletion_count",
   129  			Help: "Total number of blocks marked for deletion in the bucket.",
   130  		}, []string{"user"}),
   131  		tenantPartialBlocks: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   132  			Name: "pyroscope_bucket_blocks_partials_count",
   133  			Help: "Total number of partial blocks.",
   134  		}, []string{"user"}),
   135  		tenantBucketIndexLastUpdate: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
   136  			Name: "pyroscope_bucket_index_last_successful_update_timestamp_seconds",
   137  			Help: "Timestamp of the last successful update of a tenant's bucket index.",
   138  		}, []string{"user"}),
   139  	}
   140  
   141  	c.Service = services.NewTimerService(cfg.CleanupInterval, c.starting, c.ticker, c.stopping)
   142  
   143  	return c
   144  }
   145  
   146  func (c *BlocksCleaner) stopping(error) error {
   147  	c.singleFlight.Wait()
   148  	return nil
   149  }
   150  
   151  func (c *BlocksCleaner) starting(ctx context.Context) error {
   152  	// Run an initial cleanup in starting state. (Note that compactor no longer waits
   153  	// for blocks cleaner to finish starting before it starts compactions.)
   154  	c.runCleanup(ctx, false)
   155  
   156  	return nil
   157  }
   158  
   159  func (c *BlocksCleaner) ticker(ctx context.Context) error {
   160  	c.runCleanup(ctx, true)
   161  
   162  	return nil
   163  }
   164  
   165  func (c *BlocksCleaner) runCleanup(ctx context.Context, async bool) {
   166  	// Wrap logger with some unique ID so if runCleanUp does run in parallel with itself, we can
   167  	// at least differentiate the logs in this function for each run.
   168  	logger := log.With(c.logger,
   169  		"run_id", strconv.FormatInt(time.Now().Unix(), 10),
   170  		"task", "clean_up_users",
   171  	)
   172  
   173  	c.instrumentStartedCleanupRun(logger)
   174  
   175  	allUsers, isDeleted, err := c.refreshOwnedUsers(ctx)
   176  	if err != nil {
   177  		c.instrumentFinishedCleanupRun(err, logger)
   178  		return
   179  	}
   180  
   181  	doCleanup := func() {
   182  		err := c.cleanUsers(ctx, allUsers, isDeleted, logger)
   183  		c.instrumentFinishedCleanupRun(err, logger)
   184  	}
   185  
   186  	if async {
   187  		go doCleanup()
   188  	} else {
   189  		doCleanup()
   190  	}
   191  }
   192  
   193  func (c *BlocksCleaner) instrumentStartedCleanupRun(logger log.Logger) {
   194  	level.Info(logger).Log("msg", "started blocks cleanup and maintenance")
   195  	c.runsStarted.Inc()
   196  }
   197  
   198  func (c *BlocksCleaner) instrumentFinishedCleanupRun(err error, logger log.Logger) {
   199  	if err == nil {
   200  		level.Info(logger).Log("msg", "successfully completed blocks cleanup and maintenance")
   201  		c.runsCompleted.Inc()
   202  		c.runsLastSuccess.SetToCurrentTime()
   203  	} else if errors.Is(err, context.Canceled) {
   204  		level.Info(logger).Log("msg", "canceled blocks cleanup and maintenance", "err", err)
   205  		return
   206  	} else {
   207  		level.Error(logger).Log("msg", "failed to run blocks cleanup and maintenance", "err", err.Error())
   208  		c.runsFailed.Inc()
   209  	}
   210  }
   211  
   212  // refreshOwnedUsers is not required to be concurrency safe, but a single instance of this function
   213  // could run concurrently with the cleanup job for any tenant.
   214  func (c *BlocksCleaner) refreshOwnedUsers(ctx context.Context) ([]string, map[string]bool, error) {
   215  	users, deleted, err := c.tenantsScanner.ScanTenants(ctx)
   216  	if err != nil {
   217  		return nil, nil, errors.Wrap(err, "failed to discover users from bucket")
   218  	}
   219  
   220  	isActive := util.StringsMap(users)
   221  	isDeleted := util.StringsMap(deleted)
   222  	allUsers := append(users, deleted...)
   223  
   224  	// Delete per-tenant metrics for all tenants not belonging anymore to this shard.
   225  	// Such tenants have been moved to a different shard, so their updated metrics will
   226  	// be exported by the new shard.
   227  	for _, userID := range c.lastOwnedUsers {
   228  		if !isActive[userID] && !isDeleted[userID] {
   229  			c.tenantBlocks.DeleteLabelValues(userID)
   230  			c.tenantMarkedBlocks.DeleteLabelValues(userID)
   231  			c.tenantPartialBlocks.DeleteLabelValues(userID)
   232  			c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID)
   233  		}
   234  	}
   235  	c.lastOwnedUsers = allUsers
   236  	return allUsers, isDeleted, nil
   237  }
   238  
   239  // cleanUsers must be concurrency-safe because some invocations may take longer and overlap with the next periodic invocation.
   240  func (c *BlocksCleaner) cleanUsers(ctx context.Context, allUsers []string, isDeleted map[string]bool, logger log.Logger) error {
   241  	return c.singleFlight.ForEachNotInFlight(ctx, allUsers, func(ctx context.Context, userID string) error {
   242  		own, err := c.ownUser(userID)
   243  		if err != nil || !own {
   244  			// This returns error only if err != nil. ForEachUser keeps working for other users.
   245  			return errors.Wrap(err, "check own user")
   246  		}
   247  
   248  		userLogger := util.LoggerWithUserID(userID, logger)
   249  		if isDeleted[userID] {
   250  			return errors.Wrapf(c.deleteUserMarkedForDeletion(ctx, userID, userLogger), "failed to delete user marked for deletion: %s", userID)
   251  		}
   252  		return errors.Wrapf(c.cleanUser(ctx, userID, userLogger), "failed to delete blocks for user: %s", userID)
   253  	})
   254  }
   255  
   256  // deleteRemainingData removes any additional files that may remain when a user has no blocks. Should only
   257  // be called when there no more blocks remaining.
   258  func (c *BlocksCleaner) deleteRemainingData(ctx context.Context, userBucket objstore.Bucket, userID string, userLogger log.Logger) error {
   259  	// Delete bucket index
   260  	if err := bucketindex.DeleteIndex(ctx, c.bucketClient, userID, c.cfgProvider); err != nil {
   261  		return errors.Wrap(err, "failed to delete bucket index file")
   262  	}
   263  	level.Info(userLogger).Log("msg", "deleted bucket index for tenant with no blocks remaining")
   264  
   265  	// Delete markers folder
   266  	if deleted, err := objstore.DeletePrefix(ctx, userBucket, block.MarkersPathname, userLogger); err != nil {
   267  		return errors.Wrap(err, "failed to delete marker files")
   268  	} else if deleted > 0 {
   269  		level.Info(userLogger).Log("msg", "deleted marker files for tenant with no blocks remaining", "count", deleted)
   270  	}
   271  
   272  	return nil
   273  }
   274  
   275  // deleteUserMarkedForDeletion removes blocks and remaining data for tenant marked for deletion.
   276  func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userID string, userLogger log.Logger) error {
   277  	userBucket := objstore.NewTenantBucketClient(userID, c.bucketClient, c.cfgProvider)
   278  
   279  	level.Info(userLogger).Log("msg", "deleting blocks for tenant marked for deletion")
   280  
   281  	// We immediately delete the bucket index, to signal to its consumers that
   282  	// the tenant has "no blocks" in the storage.
   283  	if err := bucketindex.DeleteIndex(ctx, c.bucketClient, userID, c.cfgProvider); err != nil {
   284  		return err
   285  	}
   286  	c.tenantBucketIndexLastUpdate.DeleteLabelValues(userID)
   287  
   288  	var deletedBlocks, failed int
   289  	err := userBucket.Iter(ctx, "", func(name string) error {
   290  		if err := ctx.Err(); err != nil {
   291  			return err
   292  		}
   293  
   294  		id, ok := block.IsBlockDir(name)
   295  		if !ok {
   296  			return nil
   297  		}
   298  
   299  		err := block.Delete(ctx, userLogger, userBucket, id)
   300  		if err != nil {
   301  			failed++
   302  			c.blocksFailedTotal.Inc()
   303  			level.Warn(userLogger).Log("msg", "failed to delete block", "block", id, "err", err)
   304  			return nil // Continue with other blocks.
   305  		}
   306  
   307  		deletedBlocks++
   308  		c.blocksCleanedTotal.Inc()
   309  		level.Info(userLogger).Log("msg", "deleted block", "block", id)
   310  		return nil
   311  	})
   312  	if err != nil {
   313  		return err
   314  	}
   315  
   316  	if failed > 0 {
   317  		// The number of blocks left in the storage is equal to the number of blocks we failed
   318  		// to delete. We also consider them all marked for deletion given the next run will try
   319  		// to delete them again.
   320  		c.tenantBlocks.WithLabelValues(userID).Set(float64(failed))
   321  		c.tenantMarkedBlocks.WithLabelValues(userID).Set(float64(failed))
   322  		c.tenantPartialBlocks.WithLabelValues(userID).Set(0)
   323  
   324  		return errors.Errorf("failed to delete %d blocks", failed)
   325  	}
   326  
   327  	// Given all blocks have been deleted, we can also remove the metrics.
   328  	c.tenantBlocks.DeleteLabelValues(userID)
   329  	c.tenantMarkedBlocks.DeleteLabelValues(userID)
   330  	c.tenantPartialBlocks.DeleteLabelValues(userID)
   331  
   332  	if deletedBlocks > 0 {
   333  		level.Info(userLogger).Log("msg", "deleted blocks for tenant marked for deletion", "deletedBlocks", deletedBlocks)
   334  	}
   335  
   336  	mark, err := bucket.ReadTenantDeletionMark(ctx, c.bucketClient, userID)
   337  	if err != nil {
   338  		return errors.Wrap(err, "failed to read tenant deletion mark")
   339  	}
   340  	if mark == nil {
   341  		return fmt.Errorf("cannot find tenant deletion mark anymore")
   342  	}
   343  
   344  	// If we have just deleted some blocks, update "finished" time. Also update "finished" time if it wasn't set yet, but there are no blocks.
   345  	// Note: this UPDATES the tenant deletion mark. Components that use caching bucket will NOT SEE this update,
   346  	// but that is fine -- they only check whether tenant deletion marker exists or not.
   347  	if deletedBlocks > 0 || mark.FinishedTime == 0 {
   348  		level.Info(userLogger).Log("msg", "updating finished time in tenant deletion mark")
   349  		mark.FinishedTime = time.Now().Unix()
   350  		return errors.Wrap(bucket.WriteTenantDeletionMark(ctx, c.bucketClient, userID, c.cfgProvider, mark), "failed to update tenant deletion mark")
   351  	}
   352  
   353  	if time.Since(time.Unix(mark.FinishedTime, 0)) < c.cfg.TenantCleanupDelay {
   354  		return nil
   355  	}
   356  
   357  	level.Info(userLogger).Log("msg", "cleaning up remaining blocks data for tenant marked for deletion")
   358  
   359  	// Let's do final cleanup of markers.
   360  	if deleted, err := objstore.DeletePrefix(ctx, userBucket, block.MarkersPathname, userLogger); err != nil {
   361  		return errors.Wrap(err, "failed to delete marker files")
   362  	} else if deleted > 0 {
   363  		level.Info(userLogger).Log("msg", "deleted marker files for tenant marked for deletion", "count", deleted)
   364  	}
   365  
   366  	return nil
   367  }
   368  
   369  func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, userLogger log.Logger) (returnErr error) {
   370  	userBucket := objstore.NewTenantBucketClient(userID, c.bucketClient, c.cfgProvider)
   371  	startTime := time.Now()
   372  
   373  	level.Info(userLogger).Log("msg", "started blocks cleanup and maintenance")
   374  	defer func() {
   375  		if returnErr != nil {
   376  			level.Warn(userLogger).Log("msg", "failed blocks cleanup and maintenance", "err", returnErr, "duration", time.Since(startTime))
   377  		} else {
   378  			level.Info(userLogger).Log("msg", "completed blocks cleanup and maintenance", "duration", time.Since(startTime))
   379  		}
   380  	}()
   381  
   382  	// Read the bucket index.
   383  	idx, err := bucketindex.ReadIndex(ctx, c.bucketClient, userID, c.cfgProvider, userLogger)
   384  	if errors.Is(err, bucketindex.ErrIndexCorrupted) {
   385  		level.Warn(userLogger).Log("msg", "found a corrupted bucket index, recreating it")
   386  	} else if err != nil && !errors.Is(err, bucketindex.ErrIndexNotFound) {
   387  		return err
   388  	}
   389  
   390  	level.Info(userLogger).Log("msg", "fetched existing bucket index")
   391  
   392  	// Mark blocks for future deletion based on the retention period for the user.
   393  	// Note doing this before UpdateIndex, so it reads in the deletion marks.
   394  	// The trade-off being that retention is not applied if the index has to be
   395  	// built, but this is rare.
   396  	if idx != nil {
   397  		// We do not want to stop the remaining work in the cleaner if an
   398  		// error occurs here. Errors are logged in the function.
   399  		retention := c.cfgProvider.CompactorBlocksRetentionPeriod(userID)
   400  		c.applyUserRetentionPeriod(ctx, idx, retention, userBucket, userLogger)
   401  	}
   402  
   403  	// Generate an updated in-memory version of the bucket index.
   404  	w := bucketindex.NewUpdater(c.bucketClient, userID, c.cfgProvider, userLogger)
   405  	idx, partials, err := w.UpdateIndex(ctx, idx)
   406  	if err != nil {
   407  		return err
   408  	}
   409  
   410  	c.deleteBlocksMarkedForDeletion(ctx, idx, userBucket, userLogger)
   411  
   412  	// Partial blocks with a deletion mark can be cleaned up. This is a best effort, so we don't return
   413  	// error if the cleanup of partial blocks fail.
   414  	if len(partials) > 0 {
   415  		var partialDeletionCutoffTime time.Time // zero value, disabled.
   416  		if delay, valid := c.cfgProvider.CompactorPartialBlockDeletionDelay(userID); delay > 0 {
   417  			// enable cleanup of partial blocks without deletion marker
   418  			partialDeletionCutoffTime = time.Now().Add(-delay)
   419  		} else if !valid {
   420  			level.Warn(userLogger).Log("msg", "partial blocks deletion has been disabled for tenant because the delay has been set lower than the minimum value allowed", "minimum", validation.MinCompactorPartialBlockDeletionDelay)
   421  		}
   422  
   423  		c.cleanUserPartialBlocks(ctx, partials, idx, partialDeletionCutoffTime, userBucket, userLogger)
   424  		level.Info(userLogger).Log("msg", "cleaned up partial blocks", "partials", len(partials))
   425  	}
   426  
   427  	// If there are no more blocks, clean up any remaining files
   428  	// Otherwise upload the updated index to the storage.
   429  	if c.cfg.NoBlocksFileCleanupEnabled && len(idx.Blocks) == 0 {
   430  		if err := c.deleteRemainingData(ctx, userBucket, userID, userLogger); err != nil {
   431  			return err
   432  		}
   433  	} else {
   434  		if err := bucketindex.WriteIndex(ctx, c.bucketClient, userID, c.cfgProvider, idx); err != nil {
   435  			return err
   436  		}
   437  	}
   438  
   439  	c.updateBlockCountMetrics(userID, idx)
   440  	c.tenantMarkedBlocks.WithLabelValues(userID).Set(float64(len(idx.BlockDeletionMarks)))
   441  	c.tenantPartialBlocks.WithLabelValues(userID).Set(float64(len(partials)))
   442  	c.tenantBucketIndexLastUpdate.WithLabelValues(userID).SetToCurrentTime()
   443  
   444  	return nil
   445  }
   446  
   447  func (c *BlocksCleaner) updateBlockCountMetrics(userID string, idx *bucketindex.Index) {
   448  	blocksPerCompactionLevel := make(map[int]int)
   449  	for _, blk := range idx.Blocks {
   450  		blocksPerCompactionLevel[blk.CompactionLevel]++
   451  	}
   452  	c.tenantBlocks.DeletePartialMatch(map[string]string{"user": userID})
   453  	for compactionLevel, count := range blocksPerCompactionLevel {
   454  		c.tenantBlocks.WithLabelValues(userID, strconv.Itoa(compactionLevel)).Set(float64(count))
   455  	}
   456  }
   457  
   458  // Concurrently deletes blocks marked for deletion, and removes blocks from index.
   459  func (c *BlocksCleaner) deleteBlocksMarkedForDeletion(ctx context.Context, idx *bucketindex.Index, userBucket objstore.Bucket, userLogger log.Logger) {
   460  	blocksToDelete := make([]ulid.ULID, 0, len(idx.BlockDeletionMarks))
   461  
   462  	// Collect blocks marked for deletion into buffered channel.
   463  	for _, mark := range idx.BlockDeletionMarks {
   464  		if time.Since(mark.GetDeletionTime()).Seconds() <= c.cfg.DeletionDelay.Seconds() {
   465  			continue
   466  		}
   467  		blocksToDelete = append(blocksToDelete, mark.ID)
   468  	}
   469  
   470  	var mu sync.Mutex
   471  
   472  	// We don't want to return errors from our function, as that would stop ForEach loop early.
   473  	_ = concurrency.ForEachJob(ctx, len(blocksToDelete), c.cfg.DeleteBlocksConcurrency, func(ctx context.Context, jobIdx int) error {
   474  		blockID := blocksToDelete[jobIdx]
   475  
   476  		if err := block.Delete(ctx, userLogger, userBucket, blockID); err != nil {
   477  			c.blocksFailedTotal.Inc()
   478  			level.Warn(userLogger).Log("msg", "failed to delete block marked for deletion", "block", blockID, "err", err)
   479  			return nil
   480  		}
   481  
   482  		// Remove the block from the bucket index too.
   483  		mu.Lock()
   484  		idx.RemoveBlock(blockID)
   485  		mu.Unlock()
   486  
   487  		c.blocksCleanedTotal.Inc()
   488  		level.Info(userLogger).Log("msg", "deleted block marked for deletion", "block", blockID)
   489  		return nil
   490  	})
   491  }
   492  
   493  // cleanUserPartialBlocks deletes partial blocks which are safe to be deleted. The provided index is updated accordingly.
   494  // partialDeletionCutoffTime, if not zero, is used to find blocks without deletion marker that were last modified before this time. Such blocks will be marked for deletion.
   495  func (c *BlocksCleaner) cleanUserPartialBlocks(ctx context.Context, partials map[ulid.ULID]error, idx *bucketindex.Index, partialDeletionCutoffTime time.Time, userBucket objstore.InstrumentedBucket, userLogger log.Logger) {
   496  	// Collect all blocks with missing meta.json into buffered channel.
   497  	blocks := make([]ulid.ULID, 0, len(partials))
   498  
   499  	for blockID, blockErr := range partials {
   500  		// We can safely delete only blocks which are partial because the meta.json is missing.
   501  		if !errors.Is(blockErr, bucketindex.ErrBlockMetaNotFound) {
   502  			continue
   503  		}
   504  		blocks = append(blocks, blockID)
   505  	}
   506  
   507  	var mu sync.Mutex
   508  	var partialBlocksWithoutDeletionMarker []ulid.ULID
   509  
   510  	// We don't want to return errors from our function, as that would stop ForEach loop early.
   511  	_ = concurrency.ForEachJob(ctx, len(blocks), c.cfg.DeleteBlocksConcurrency, func(ctx context.Context, jobIdx int) error {
   512  		blockID := blocks[jobIdx]
   513  
   514  		// We can safely delete only partial blocks with a deletion mark.
   515  		err := block.ReadMarker(ctx, userLogger, userBucket, blockID.String(), &block.DeletionMark{})
   516  		if errors.Is(err, block.ErrorMarkerNotFound) {
   517  			mu.Lock()
   518  			partialBlocksWithoutDeletionMarker = append(partialBlocksWithoutDeletionMarker, blockID)
   519  			mu.Unlock()
   520  			return nil
   521  		}
   522  		if err != nil {
   523  			level.Warn(userLogger).Log("msg", "error reading partial block deletion mark", "block", blockID, "err", err)
   524  			return nil
   525  		}
   526  
   527  		// Hard-delete partial blocks having a deletion mark, even if the deletion threshold has not
   528  		// been reached yet.
   529  		if err := block.Delete(ctx, userLogger, userBucket, blockID); err != nil {
   530  			c.blocksFailedTotal.Inc()
   531  			level.Warn(userLogger).Log("msg", "error deleting partial block marked for deletion", "block", blockID, "err", err)
   532  			return nil
   533  		}
   534  
   535  		// Remove the block from the bucket index too.
   536  		mu.Lock()
   537  		idx.RemoveBlock(blockID)
   538  		delete(partials, blockID)
   539  		mu.Unlock()
   540  
   541  		c.blocksCleanedTotal.Inc()
   542  		level.Info(userLogger).Log("msg", "deleted partial block marked for deletion", "block", blockID)
   543  		return nil
   544  	})
   545  
   546  	// Check if partial blocks are older than delay period, and mark for deletion
   547  	if !partialDeletionCutoffTime.IsZero() {
   548  		for _, blockID := range partialBlocksWithoutDeletionMarker {
   549  			lastModified, err := stalePartialBlockLastModifiedTime(ctx, blockID, userBucket, partialDeletionCutoffTime)
   550  			if err != nil {
   551  				level.Warn(userLogger).Log("msg", "failed while determining if partial block should be marked for deletion", "block", blockID, "err", err)
   552  				continue
   553  			}
   554  			if !lastModified.IsZero() {
   555  				level.Info(userLogger).Log("msg", "stale partial block found: marking block for deletion", "block", blockID, "last modified", lastModified)
   556  				if err := block.MarkForDeletion(ctx, userLogger, userBucket, blockID, "stale partial block", false, c.partialBlocksMarkedForDeletion); err != nil {
   557  					level.Warn(userLogger).Log("msg", "failed to mark partial block for deletion", "block", blockID, "err", err)
   558  				}
   559  			}
   560  		}
   561  	}
   562  }
   563  
   564  // applyUserRetentionPeriod marks blocks for deletion which have aged past the retention period.
   565  func (c *BlocksCleaner) applyUserRetentionPeriod(ctx context.Context, idx *bucketindex.Index, retention time.Duration, userBucket objstore.Bucket, userLogger log.Logger) {
   566  	// The retention period of zero is a special value indicating to never delete.
   567  	if retention <= 0 {
   568  		return
   569  	}
   570  
   571  	blocks := listBlocksOutsideRetentionPeriod(idx, time.Now().Add(-retention))
   572  
   573  	// Attempt to mark all blocks. It is not critical if a marking fails, as
   574  	// the cleaner will retry applying the retention in its next cycle.
   575  	for _, b := range blocks {
   576  		level.Info(userLogger).Log("msg", "applied retention: marking block for deletion", "block", b.ID, "maxTime", b.MaxTime)
   577  		if err := block.MarkForDeletion(ctx, userLogger, userBucket, b.ID, fmt.Sprintf("block exceeding retention of %v", retention), false, c.blocksMarkedForDeletion); err != nil {
   578  			level.Warn(userLogger).Log("msg", "failed to mark block for deletion", "block", b.ID, "err", err)
   579  		}
   580  	}
   581  	level.Info(userLogger).Log("msg", "marked blocks for deletion", "num_blocks", len(blocks), "retention", retention.String())
   582  }
   583  
   584  // listBlocksOutsideRetentionPeriod determines the blocks which have aged past
   585  // the specified retention period, and are not already marked for deletion.
   586  func listBlocksOutsideRetentionPeriod(idx *bucketindex.Index, threshold time.Time) (result bucketindex.Blocks) {
   587  	// Whilst re-marking a block is not harmful, it is wasteful and generates
   588  	// a warning log message. Use the block deletion marks already in-memory
   589  	// to prevent marking blocks already marked for deletion.
   590  	marked := make(map[ulid.ULID]struct{}, len(idx.BlockDeletionMarks))
   591  	for _, d := range idx.BlockDeletionMarks {
   592  		marked[d.ID] = struct{}{}
   593  	}
   594  
   595  	for _, b := range idx.Blocks {
   596  		maxTime := time.Unix(int64(b.MaxTime)/1000, 0)
   597  		if maxTime.Before(threshold) {
   598  			if _, isMarked := marked[b.ID]; !isMarked {
   599  				result = append(result, b)
   600  			}
   601  		}
   602  	}
   603  
   604  	return
   605  }
   606  
   607  var errStopIter = errors.New("stop iteration")
   608  
   609  // stalePartialBlockLastModifiedTime returns the most recent last modified time of a stale partial block, or the zero value of time.Time if the provided block wasn't a stale partial block
   610  func stalePartialBlockLastModifiedTime(ctx context.Context, blockID ulid.ULID, userBucket objstore.InstrumentedBucket, partialDeletionCutoffTime time.Time) (time.Time, error) {
   611  	var lastModified time.Time
   612  	err := userBucket.WithExpectedErrs(func(err error) bool {
   613  		return errors.Is(err, errStopIter) // sentinel error
   614  	}).Iter(ctx, blockID.String(), func(name string) error {
   615  		if strings.HasSuffix(name, thanos_objstore.DirDelim) {
   616  			return nil
   617  		}
   618  		attrib, err := userBucket.Attributes(ctx, name)
   619  		if err != nil {
   620  			return errors.Wrapf(err, "failed to get attributes for %s", name)
   621  		}
   622  		if attrib.LastModified.After(partialDeletionCutoffTime) {
   623  			return errStopIter
   624  		}
   625  		if attrib.LastModified.After(lastModified) {
   626  			lastModified = attrib.LastModified
   627  		}
   628  		return nil
   629  	}, thanos_objstore.WithRecursiveIter())
   630  
   631  	if errors.Is(err, errStopIter) {
   632  		return time.Time{}, nil
   633  	}
   634  	return lastModified, err
   635  }