github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/compactor/blocks_cleaner_test.go (about)

     1  package compactor
     2  
     3  import (
     4  	"context"
     5  	"crypto/rand"
     6  	"errors"
     7  	"fmt"
     8  	"path"
     9  	"strings"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/grafana/dskit/services"
    15  	"github.com/oklog/ulid"
    16  	"github.com/prometheus/client_golang/prometheus"
    17  	"github.com/prometheus/client_golang/prometheus/testutil"
    18  	prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/thanos-io/thanos/pkg/block"
    22  	"github.com/thanos-io/thanos/pkg/block/metadata"
    23  	"github.com/thanos-io/thanos/pkg/objstore"
    24  
    25  	"github.com/cortexproject/cortex/pkg/storage/tsdb"
    26  	"github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex"
    27  	cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil"
    28  	"github.com/cortexproject/cortex/pkg/util"
    29  )
    30  
    31  type testBlocksCleanerOptions struct {
    32  	concurrency             int
    33  	markersMigrationEnabled bool
    34  	tenantDeletionDelay     time.Duration
    35  	user4FilesExist         bool // User 4 has "FinishedTime" in tenant deletion marker set to "1h" ago.
    36  }
    37  
    38  func (o testBlocksCleanerOptions) String() string {
    39  	return fmt.Sprintf("concurrency=%d, markers migration enabled=%v, tenant deletion delay=%v",
    40  		o.concurrency, o.markersMigrationEnabled, o.tenantDeletionDelay)
    41  }
    42  
    43  func TestBlocksCleaner(t *testing.T) {
    44  	for _, options := range []testBlocksCleanerOptions{
    45  		{concurrency: 1, tenantDeletionDelay: 0, user4FilesExist: false},
    46  		{concurrency: 1, tenantDeletionDelay: 2 * time.Hour, user4FilesExist: true},
    47  		{concurrency: 1, markersMigrationEnabled: true},
    48  		{concurrency: 2},
    49  		{concurrency: 10},
    50  	} {
    51  		options := options
    52  
    53  		t.Run(options.String(), func(t *testing.T) {
    54  			t.Parallel()
    55  			testBlocksCleanerWithOptions(t, options)
    56  		})
    57  	}
    58  }
    59  
    60  func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions) {
    61  	bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
    62  
    63  	// If the markers migration is enabled, then we create the fixture blocks without
    64  	// writing the deletion marks in the global location, because they will be migrated
    65  	// at statup.
    66  	if !options.markersMigrationEnabled {
    67  		bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
    68  	}
    69  
    70  	// Create blocks.
    71  	ctx := context.Background()
    72  	now := time.Now()
    73  	deletionDelay := 12 * time.Hour
    74  	block1 := createTSDBBlock(t, bucketClient, "user-1", 10, 20, nil)
    75  	block2 := createTSDBBlock(t, bucketClient, "user-1", 20, 30, nil)
    76  	block3 := createTSDBBlock(t, bucketClient, "user-1", 30, 40, nil)
    77  	block4 := ulid.MustNew(4, rand.Reader)
    78  	block5 := ulid.MustNew(5, rand.Reader)
    79  	block6 := createTSDBBlock(t, bucketClient, "user-1", 40, 50, nil)
    80  	block7 := createTSDBBlock(t, bucketClient, "user-2", 10, 20, nil)
    81  	block8 := createTSDBBlock(t, bucketClient, "user-2", 40, 50, nil)
    82  	createDeletionMark(t, bucketClient, "user-1", block2, now.Add(-deletionDelay).Add(time.Hour))             // Block hasn't reached the deletion threshold yet.
    83  	createDeletionMark(t, bucketClient, "user-1", block3, now.Add(-deletionDelay).Add(-time.Hour))            // Block reached the deletion threshold.
    84  	createDeletionMark(t, bucketClient, "user-1", block4, now.Add(-deletionDelay).Add(time.Hour))             // Partial block hasn't reached the deletion threshold yet.
    85  	createDeletionMark(t, bucketClient, "user-1", block5, now.Add(-deletionDelay).Add(-time.Hour))            // Partial block reached the deletion threshold.
    86  	require.NoError(t, bucketClient.Delete(ctx, path.Join("user-1", block6.String(), metadata.MetaFilename))) // Partial block without deletion mark.
    87  	createDeletionMark(t, bucketClient, "user-2", block7, now.Add(-deletionDelay).Add(-time.Hour))            // Block reached the deletion threshold.
    88  
    89  	// Blocks for user-3, marked for deletion.
    90  	require.NoError(t, tsdb.WriteTenantDeletionMark(context.Background(), bucketClient, "user-3", nil, tsdb.NewTenantDeletionMark(time.Now())))
    91  	block9 := createTSDBBlock(t, bucketClient, "user-3", 10, 30, nil)
    92  	block10 := createTSDBBlock(t, bucketClient, "user-3", 30, 50, nil)
    93  
    94  	// User-4 with no more blocks, but couple of mark and debug files. Should be fully deleted.
    95  	user4Mark := tsdb.NewTenantDeletionMark(time.Now())
    96  	user4Mark.FinishedTime = time.Now().Unix() - 60 // Set to check final user cleanup.
    97  	require.NoError(t, tsdb.WriteTenantDeletionMark(context.Background(), bucketClient, "user-4", nil, user4Mark))
    98  	user4DebugMetaFile := path.Join("user-4", block.DebugMetas, "meta.json")
    99  	require.NoError(t, bucketClient.Upload(context.Background(), user4DebugMetaFile, strings.NewReader("some random content here")))
   100  
   101  	// The fixtures have been created. If the bucket client wasn't wrapped to write
   102  	// deletion marks to the global location too, then this is the right time to do it.
   103  	if options.markersMigrationEnabled {
   104  		bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
   105  	}
   106  
   107  	cfg := BlocksCleanerConfig{
   108  		DeletionDelay:                      deletionDelay,
   109  		CleanupInterval:                    time.Minute,
   110  		CleanupConcurrency:                 options.concurrency,
   111  		BlockDeletionMarksMigrationEnabled: options.markersMigrationEnabled,
   112  		TenantCleanupDelay:                 options.tenantDeletionDelay,
   113  	}
   114  
   115  	reg := prometheus.NewPedanticRegistry()
   116  	logger := log.NewNopLogger()
   117  	scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger)
   118  	cfgProvider := newMockConfigProvider()
   119  
   120  	cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg)
   121  	require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner))
   122  	defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck
   123  
   124  	for _, tc := range []struct {
   125  		path           string
   126  		expectedExists bool
   127  	}{
   128  		// Check the storage to ensure only the block which has reached the deletion threshold
   129  		// has been effectively deleted.
   130  		{path: path.Join("user-1", block1.String(), metadata.MetaFilename), expectedExists: true},
   131  		{path: path.Join("user-1", block3.String(), metadata.MetaFilename), expectedExists: false},
   132  		{path: path.Join("user-2", block7.String(), metadata.MetaFilename), expectedExists: false},
   133  		{path: path.Join("user-2", block8.String(), metadata.MetaFilename), expectedExists: true},
   134  		// Should not delete a block with deletion mark who hasn't reached the deletion threshold yet.
   135  		{path: path.Join("user-1", block2.String(), metadata.MetaFilename), expectedExists: true},
   136  		{path: path.Join("user-1", bucketindex.BlockDeletionMarkFilepath(block2)), expectedExists: true},
   137  		// Should delete a partial block with deletion mark who hasn't reached the deletion threshold yet.
   138  		{path: path.Join("user-1", block4.String(), metadata.DeletionMarkFilename), expectedExists: false},
   139  		{path: path.Join("user-1", bucketindex.BlockDeletionMarkFilepath(block4)), expectedExists: false},
   140  		// Should delete a partial block with deletion mark who has reached the deletion threshold.
   141  		{path: path.Join("user-1", block5.String(), metadata.DeletionMarkFilename), expectedExists: false},
   142  		{path: path.Join("user-1", bucketindex.BlockDeletionMarkFilepath(block5)), expectedExists: false},
   143  		// Should not delete a partial block without deletion mark.
   144  		{path: path.Join("user-1", block6.String(), "index"), expectedExists: true},
   145  		// Should completely delete blocks for user-3, marked for deletion
   146  		{path: path.Join("user-3", block9.String(), metadata.MetaFilename), expectedExists: false},
   147  		{path: path.Join("user-3", block9.String(), "index"), expectedExists: false},
   148  		{path: path.Join("user-3", block10.String(), metadata.MetaFilename), expectedExists: false},
   149  		{path: path.Join("user-3", block10.String(), "index"), expectedExists: false},
   150  		// Tenant deletion mark is not removed.
   151  		{path: path.Join("user-3", tsdb.TenantDeletionMarkPath), expectedExists: true},
   152  		// User-4 is removed fully.
   153  		{path: path.Join("user-4", tsdb.TenantDeletionMarkPath), expectedExists: options.user4FilesExist},
   154  		{path: path.Join("user-4", block.DebugMetas, "meta.json"), expectedExists: options.user4FilesExist},
   155  	} {
   156  		exists, err := bucketClient.Exists(ctx, tc.path)
   157  		require.NoError(t, err)
   158  		assert.Equal(t, tc.expectedExists, exists, tc.path)
   159  	}
   160  
   161  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted))
   162  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted))
   163  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed))
   164  	assert.Equal(t, float64(6), testutil.ToFloat64(cleaner.blocksCleanedTotal))
   165  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal))
   166  
   167  	// Check the updated bucket index.
   168  	for _, tc := range []struct {
   169  		userID         string
   170  		expectedIndex  bool
   171  		expectedBlocks []ulid.ULID
   172  		expectedMarks  []ulid.ULID
   173  	}{
   174  		{
   175  			userID:         "user-1",
   176  			expectedIndex:  true,
   177  			expectedBlocks: []ulid.ULID{block1, block2 /* deleted: block3, block4, block5, partial: block6 */},
   178  			expectedMarks:  []ulid.ULID{block2},
   179  		}, {
   180  			userID:         "user-2",
   181  			expectedIndex:  true,
   182  			expectedBlocks: []ulid.ULID{block8},
   183  			expectedMarks:  []ulid.ULID{},
   184  		}, {
   185  			userID:        "user-3",
   186  			expectedIndex: false,
   187  		},
   188  	} {
   189  		idx, err := bucketindex.ReadIndex(ctx, bucketClient, tc.userID, nil, logger)
   190  		if !tc.expectedIndex {
   191  			assert.Equal(t, bucketindex.ErrIndexNotFound, err)
   192  			continue
   193  		}
   194  
   195  		require.NoError(t, err)
   196  		assert.ElementsMatch(t, tc.expectedBlocks, idx.Blocks.GetULIDs())
   197  		assert.ElementsMatch(t, tc.expectedMarks, idx.BlockDeletionMarks.GetULIDs())
   198  	}
   199  
   200  	assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(`
   201  		# HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   202  		# TYPE cortex_bucket_blocks_count gauge
   203  		cortex_bucket_blocks_count{user="user-1"} 2
   204  		cortex_bucket_blocks_count{user="user-2"} 1
   205  		# HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   206  		# TYPE cortex_bucket_blocks_marked_for_deletion_count gauge
   207  		cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 1
   208  		cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   209  		# HELP cortex_bucket_blocks_partials_count Total number of partial blocks.
   210  		# TYPE cortex_bucket_blocks_partials_count gauge
   211  		cortex_bucket_blocks_partials_count{user="user-1"} 2
   212  		cortex_bucket_blocks_partials_count{user="user-2"} 0
   213  	`),
   214  		"cortex_bucket_blocks_count",
   215  		"cortex_bucket_blocks_marked_for_deletion_count",
   216  		"cortex_bucket_blocks_partials_count",
   217  	))
   218  }
   219  
   220  func TestBlocksCleaner_ShouldContinueOnBlockDeletionFailure(t *testing.T) {
   221  	const userID = "user-1"
   222  
   223  	bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
   224  	bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
   225  
   226  	// Create blocks.
   227  	ctx := context.Background()
   228  	now := time.Now()
   229  	deletionDelay := 12 * time.Hour
   230  	block1 := createTSDBBlock(t, bucketClient, userID, 10, 20, nil)
   231  	block2 := createTSDBBlock(t, bucketClient, userID, 20, 30, nil)
   232  	block3 := createTSDBBlock(t, bucketClient, userID, 30, 40, nil)
   233  	block4 := createTSDBBlock(t, bucketClient, userID, 40, 50, nil)
   234  	createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour))
   235  	createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(-time.Hour))
   236  	createDeletionMark(t, bucketClient, userID, block4, now.Add(-deletionDelay).Add(-time.Hour))
   237  
   238  	// To emulate a failure deleting a block, we wrap the bucket client in a mocked one.
   239  	bucketClient = &mockBucketFailure{
   240  		Bucket:         bucketClient,
   241  		DeleteFailures: []string{path.Join(userID, block3.String(), metadata.MetaFilename)},
   242  	}
   243  
   244  	cfg := BlocksCleanerConfig{
   245  		DeletionDelay:      deletionDelay,
   246  		CleanupInterval:    time.Minute,
   247  		CleanupConcurrency: 1,
   248  	}
   249  
   250  	logger := log.NewNopLogger()
   251  	scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger)
   252  	cfgProvider := newMockConfigProvider()
   253  
   254  	cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, nil)
   255  	require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner))
   256  	defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck
   257  
   258  	for _, tc := range []struct {
   259  		path           string
   260  		expectedExists bool
   261  	}{
   262  		{path: path.Join(userID, block1.String(), metadata.MetaFilename), expectedExists: true},
   263  		{path: path.Join(userID, block2.String(), metadata.MetaFilename), expectedExists: false},
   264  		{path: path.Join(userID, block3.String(), metadata.MetaFilename), expectedExists: true},
   265  		{path: path.Join(userID, block4.String(), metadata.MetaFilename), expectedExists: false},
   266  	} {
   267  		exists, err := bucketClient.Exists(ctx, tc.path)
   268  		require.NoError(t, err)
   269  		assert.Equal(t, tc.expectedExists, exists, tc.path)
   270  	}
   271  
   272  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted))
   273  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted))
   274  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed))
   275  	assert.Equal(t, float64(2), testutil.ToFloat64(cleaner.blocksCleanedTotal))
   276  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksFailedTotal))
   277  
   278  	// Check the updated bucket index.
   279  	idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger)
   280  	require.NoError(t, err)
   281  	assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs())
   282  	assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs())
   283  }
   284  
   285  func TestBlocksCleaner_ShouldRebuildBucketIndexOnCorruptedOne(t *testing.T) {
   286  	const userID = "user-1"
   287  
   288  	bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
   289  	bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
   290  
   291  	// Create blocks.
   292  	ctx := context.Background()
   293  	now := time.Now()
   294  	deletionDelay := 12 * time.Hour
   295  	block1 := createTSDBBlock(t, bucketClient, userID, 10, 20, nil)
   296  	block2 := createTSDBBlock(t, bucketClient, userID, 20, 30, nil)
   297  	block3 := createTSDBBlock(t, bucketClient, userID, 30, 40, nil)
   298  	createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour))
   299  	createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(time.Hour))
   300  
   301  	// Write a corrupted bucket index.
   302  	require.NoError(t, bucketClient.Upload(ctx, path.Join(userID, bucketindex.IndexCompressedFilename), strings.NewReader("invalid!}")))
   303  
   304  	cfg := BlocksCleanerConfig{
   305  		DeletionDelay:      deletionDelay,
   306  		CleanupInterval:    time.Minute,
   307  		CleanupConcurrency: 1,
   308  	}
   309  
   310  	logger := log.NewNopLogger()
   311  	scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger)
   312  	cfgProvider := newMockConfigProvider()
   313  
   314  	cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, nil)
   315  	require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner))
   316  	defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck
   317  
   318  	for _, tc := range []struct {
   319  		path           string
   320  		expectedExists bool
   321  	}{
   322  		{path: path.Join(userID, block1.String(), metadata.MetaFilename), expectedExists: true},
   323  		{path: path.Join(userID, block2.String(), metadata.MetaFilename), expectedExists: false},
   324  		{path: path.Join(userID, block3.String(), metadata.MetaFilename), expectedExists: true},
   325  	} {
   326  		exists, err := bucketClient.Exists(ctx, tc.path)
   327  		require.NoError(t, err)
   328  		assert.Equal(t, tc.expectedExists, exists, tc.path)
   329  	}
   330  
   331  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted))
   332  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted))
   333  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed))
   334  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksCleanedTotal))
   335  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal))
   336  
   337  	// Check the updated bucket index.
   338  	idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger)
   339  	require.NoError(t, err)
   340  	assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs())
   341  	assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs())
   342  }
   343  
   344  func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShard(t *testing.T) {
   345  	bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
   346  	bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
   347  
   348  	// Create blocks.
   349  	createTSDBBlock(t, bucketClient, "user-1", 10, 20, nil)
   350  	createTSDBBlock(t, bucketClient, "user-1", 20, 30, nil)
   351  	createTSDBBlock(t, bucketClient, "user-2", 30, 40, nil)
   352  
   353  	cfg := BlocksCleanerConfig{
   354  		DeletionDelay:      time.Hour,
   355  		CleanupInterval:    time.Minute,
   356  		CleanupConcurrency: 1,
   357  	}
   358  
   359  	ctx := context.Background()
   360  	logger := log.NewNopLogger()
   361  	reg := prometheus.NewPedanticRegistry()
   362  	scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger)
   363  	cfgProvider := newMockConfigProvider()
   364  
   365  	cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg)
   366  	require.NoError(t, cleaner.cleanUsers(ctx, true))
   367  
   368  	assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(`
   369  		# HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   370  		# TYPE cortex_bucket_blocks_count gauge
   371  		cortex_bucket_blocks_count{user="user-1"} 2
   372  		cortex_bucket_blocks_count{user="user-2"} 1
   373  		# HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   374  		# TYPE cortex_bucket_blocks_marked_for_deletion_count gauge
   375  		cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   376  		cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   377  		# HELP cortex_bucket_blocks_partials_count Total number of partial blocks.
   378  		# TYPE cortex_bucket_blocks_partials_count gauge
   379  		cortex_bucket_blocks_partials_count{user="user-1"} 0
   380  		cortex_bucket_blocks_partials_count{user="user-2"} 0
   381  	`),
   382  		"cortex_bucket_blocks_count",
   383  		"cortex_bucket_blocks_marked_for_deletion_count",
   384  		"cortex_bucket_blocks_partials_count",
   385  	))
   386  
   387  	// Override the users scanner to reconfigure it to only return a subset of users.
   388  	cleaner.usersScanner = tsdb.NewUsersScanner(bucketClient, func(userID string) (bool, error) { return userID == "user-1", nil }, logger)
   389  
   390  	// Create new blocks, to double check expected metrics have changed.
   391  	createTSDBBlock(t, bucketClient, "user-1", 40, 50, nil)
   392  	createTSDBBlock(t, bucketClient, "user-2", 50, 60, nil)
   393  
   394  	require.NoError(t, cleaner.cleanUsers(ctx, false))
   395  
   396  	assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(`
   397  		# HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   398  		# TYPE cortex_bucket_blocks_count gauge
   399  		cortex_bucket_blocks_count{user="user-1"} 3
   400  		# HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   401  		# TYPE cortex_bucket_blocks_marked_for_deletion_count gauge
   402  		cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   403  		# HELP cortex_bucket_blocks_partials_count Total number of partial blocks.
   404  		# TYPE cortex_bucket_blocks_partials_count gauge
   405  		cortex_bucket_blocks_partials_count{user="user-1"} 0
   406  	`),
   407  		"cortex_bucket_blocks_count",
   408  		"cortex_bucket_blocks_marked_for_deletion_count",
   409  		"cortex_bucket_blocks_partials_count",
   410  	))
   411  }
   412  
   413  func TestBlocksCleaner_ListBlocksOutsideRetentionPeriod(t *testing.T) {
   414  	bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
   415  	bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
   416  	ctx := context.Background()
   417  	logger := log.NewNopLogger()
   418  
   419  	id1 := createTSDBBlock(t, bucketClient, "user-1", 5000, 6000, nil)
   420  	id2 := createTSDBBlock(t, bucketClient, "user-1", 6000, 7000, nil)
   421  	id3 := createTSDBBlock(t, bucketClient, "user-1", 7000, 8000, nil)
   422  
   423  	w := bucketindex.NewUpdater(bucketClient, "user-1", nil, logger)
   424  	idx, _, err := w.UpdateIndex(ctx, nil)
   425  	require.NoError(t, err)
   426  
   427  	assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, idx.Blocks.GetULIDs())
   428  
   429  	// Excessive retention period (wrapping epoch)
   430  	result := listBlocksOutsideRetentionPeriod(idx, time.Unix(10, 0).Add(-time.Hour))
   431  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   432  
   433  	// Normal operation - varying retention period.
   434  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(6, 0))
   435  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   436  
   437  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0))
   438  	assert.ElementsMatch(t, []ulid.ULID{id1}, result.GetULIDs())
   439  
   440  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0))
   441  	assert.ElementsMatch(t, []ulid.ULID{id1, id2}, result.GetULIDs())
   442  
   443  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0))
   444  	assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, result.GetULIDs())
   445  
   446  	// Avoiding redundant marking - blocks already marked for deletion.
   447  
   448  	mark1 := &bucketindex.BlockDeletionMark{ID: id1}
   449  	mark2 := &bucketindex.BlockDeletionMark{ID: id2}
   450  
   451  	idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1}
   452  
   453  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0))
   454  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   455  
   456  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0))
   457  	assert.ElementsMatch(t, []ulid.ULID{id2}, result.GetULIDs())
   458  
   459  	idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1, mark2}
   460  
   461  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0))
   462  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   463  
   464  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0))
   465  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   466  
   467  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0))
   468  	assert.ElementsMatch(t, []ulid.ULID{id3}, result.GetULIDs())
   469  }
   470  
   471  func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) {
   472  	bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
   473  	bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
   474  
   475  	ts := func(hours int) int64 {
   476  		return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000
   477  	}
   478  
   479  	block1 := createTSDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), nil)
   480  	block2 := createTSDBBlock(t, bucketClient, "user-1", ts(-8), ts(-6), nil)
   481  	block3 := createTSDBBlock(t, bucketClient, "user-2", ts(-10), ts(-8), nil)
   482  	block4 := createTSDBBlock(t, bucketClient, "user-2", ts(-8), ts(-6), nil)
   483  
   484  	cfg := BlocksCleanerConfig{
   485  		DeletionDelay:      time.Hour,
   486  		CleanupInterval:    time.Minute,
   487  		CleanupConcurrency: 1,
   488  	}
   489  
   490  	ctx := context.Background()
   491  	logger := log.NewNopLogger()
   492  	reg := prometheus.NewPedanticRegistry()
   493  	scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger)
   494  	cfgProvider := newMockConfigProvider()
   495  
   496  	cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg)
   497  
   498  	assertBlockExists := func(user string, block ulid.ULID, expectExists bool) {
   499  		exists, err := bucketClient.Exists(ctx, path.Join(user, block.String(), metadata.MetaFilename))
   500  		require.NoError(t, err)
   501  		assert.Equal(t, expectExists, exists)
   502  	}
   503  
   504  	// Existing behaviour - retention period disabled.
   505  	{
   506  		cfgProvider.userRetentionPeriods["user-1"] = 0
   507  		cfgProvider.userRetentionPeriods["user-2"] = 0
   508  
   509  		require.NoError(t, cleaner.cleanUsers(ctx, true))
   510  		assertBlockExists("user-1", block1, true)
   511  		assertBlockExists("user-1", block2, true)
   512  		assertBlockExists("user-2", block3, true)
   513  		assertBlockExists("user-2", block4, true)
   514  
   515  		assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(`
   516  			# HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   517  			# TYPE cortex_bucket_blocks_count gauge
   518  			cortex_bucket_blocks_count{user="user-1"} 2
   519  			cortex_bucket_blocks_count{user="user-2"} 2
   520  			# HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   521  			# TYPE cortex_bucket_blocks_marked_for_deletion_count gauge
   522  			cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   523  			cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   524  			# HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   525  			# TYPE cortex_compactor_blocks_marked_for_deletion_total counter
   526  			cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0
   527  			`),
   528  			"cortex_bucket_blocks_count",
   529  			"cortex_bucket_blocks_marked_for_deletion_count",
   530  			"cortex_compactor_blocks_marked_for_deletion_total",
   531  		))
   532  	}
   533  
   534  	// Retention enabled only for a single user, but does nothing.
   535  	{
   536  		cfgProvider.userRetentionPeriods["user-1"] = 9 * time.Hour
   537  
   538  		require.NoError(t, cleaner.cleanUsers(ctx, false))
   539  		assertBlockExists("user-1", block1, true)
   540  		assertBlockExists("user-1", block2, true)
   541  		assertBlockExists("user-2", block3, true)
   542  		assertBlockExists("user-2", block4, true)
   543  	}
   544  
   545  	// Retention enabled only for a single user, marking a single block.
   546  	// Note the block won't be deleted yet due to deletion delay.
   547  	{
   548  		cfgProvider.userRetentionPeriods["user-1"] = 7 * time.Hour
   549  
   550  		require.NoError(t, cleaner.cleanUsers(ctx, false))
   551  		assertBlockExists("user-1", block1, true)
   552  		assertBlockExists("user-1", block2, true)
   553  		assertBlockExists("user-2", block3, true)
   554  		assertBlockExists("user-2", block4, true)
   555  
   556  		assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(`
   557  			# HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   558  			# TYPE cortex_bucket_blocks_count gauge
   559  			cortex_bucket_blocks_count{user="user-1"} 2
   560  			cortex_bucket_blocks_count{user="user-2"} 2
   561  			# HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   562  			# TYPE cortex_bucket_blocks_marked_for_deletion_count gauge
   563  			cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 1
   564  			cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   565  			# HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   566  			# TYPE cortex_compactor_blocks_marked_for_deletion_total counter
   567  			cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 1
   568  			`),
   569  			"cortex_bucket_blocks_count",
   570  			"cortex_bucket_blocks_marked_for_deletion_count",
   571  			"cortex_compactor_blocks_marked_for_deletion_total",
   572  		))
   573  	}
   574  
   575  	// Marking the block again, before the deletion occurs, should not cause an error.
   576  	{
   577  		require.NoError(t, cleaner.cleanUsers(ctx, false))
   578  		assertBlockExists("user-1", block1, true)
   579  		assertBlockExists("user-1", block2, true)
   580  		assertBlockExists("user-2", block3, true)
   581  		assertBlockExists("user-2", block4, true)
   582  	}
   583  
   584  	// Reduce the deletion delay. Now the block will be deleted.
   585  	{
   586  		cleaner.cfg.DeletionDelay = 0
   587  
   588  		require.NoError(t, cleaner.cleanUsers(ctx, false))
   589  		assertBlockExists("user-1", block1, false)
   590  		assertBlockExists("user-1", block2, true)
   591  		assertBlockExists("user-2", block3, true)
   592  		assertBlockExists("user-2", block4, true)
   593  
   594  		assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(`
   595  			# HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   596  			# TYPE cortex_bucket_blocks_count gauge
   597  			cortex_bucket_blocks_count{user="user-1"} 1
   598  			cortex_bucket_blocks_count{user="user-2"} 2
   599  			# HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   600  			# TYPE cortex_bucket_blocks_marked_for_deletion_count gauge
   601  			cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   602  			cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   603  			# HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   604  			# TYPE cortex_compactor_blocks_marked_for_deletion_total counter
   605  			cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 1
   606  			`),
   607  			"cortex_bucket_blocks_count",
   608  			"cortex_bucket_blocks_marked_for_deletion_count",
   609  			"cortex_compactor_blocks_marked_for_deletion_total",
   610  		))
   611  	}
   612  
   613  	// Retention enabled for other user; test deleting multiple blocks.
   614  	{
   615  		cfgProvider.userRetentionPeriods["user-2"] = 5 * time.Hour
   616  
   617  		require.NoError(t, cleaner.cleanUsers(ctx, false))
   618  		assertBlockExists("user-1", block1, false)
   619  		assertBlockExists("user-1", block2, true)
   620  		assertBlockExists("user-2", block3, false)
   621  		assertBlockExists("user-2", block4, false)
   622  
   623  		assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(`
   624  			# HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   625  			# TYPE cortex_bucket_blocks_count gauge
   626  			cortex_bucket_blocks_count{user="user-1"} 1
   627  			cortex_bucket_blocks_count{user="user-2"} 0
   628  			# HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   629  			# TYPE cortex_bucket_blocks_marked_for_deletion_count gauge
   630  			cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   631  			cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   632  			# HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   633  			# TYPE cortex_compactor_blocks_marked_for_deletion_total counter
   634  			cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 3
   635  			`),
   636  			"cortex_bucket_blocks_count",
   637  			"cortex_bucket_blocks_marked_for_deletion_count",
   638  			"cortex_compactor_blocks_marked_for_deletion_total",
   639  		))
   640  	}
   641  }
   642  
   643  type mockBucketFailure struct {
   644  	objstore.Bucket
   645  
   646  	DeleteFailures []string
   647  }
   648  
   649  func (m *mockBucketFailure) Delete(ctx context.Context, name string) error {
   650  	if util.StringsContain(m.DeleteFailures, name) {
   651  		return errors.New("mocked delete failure")
   652  	}
   653  	return m.Bucket.Delete(ctx, name)
   654  }
   655  
   656  type mockConfigProvider struct {
   657  	userRetentionPeriods map[string]time.Duration
   658  }
   659  
   660  func newMockConfigProvider() *mockConfigProvider {
   661  	return &mockConfigProvider{
   662  		userRetentionPeriods: make(map[string]time.Duration),
   663  	}
   664  }
   665  
   666  func (m *mockConfigProvider) CompactorBlocksRetentionPeriod(user string) time.Duration {
   667  	if result, ok := m.userRetentionPeriods[user]; ok {
   668  		return result
   669  	}
   670  	return 0
   671  }
   672  
   673  func (m *mockConfigProvider) S3SSEType(user string) string {
   674  	return ""
   675  }
   676  
   677  func (m *mockConfigProvider) S3SSEKMSKeyID(userID string) string {
   678  	return ""
   679  }
   680  
   681  func (m *mockConfigProvider) S3SSEKMSEncryptionContext(userID string) string {
   682  	return ""
   683  }