github.com/grafana/pyroscope@v1.18.0/pkg/compactor/blocks_cleaner_test.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/blocks_cleaner_test.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"context"
    10  	"crypto/rand"
    11  	"errors"
    12  	"fmt"
    13  	"os"
    14  	"path"
    15  	"path/filepath"
    16  	"strings"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/go-kit/log"
    21  	"github.com/grafana/dskit/concurrency"
    22  	"github.com/grafana/dskit/services"
    23  	"github.com/oklog/ulid/v2"
    24  	"github.com/prometheus/client_golang/prometheus"
    25  	"github.com/prometheus/client_golang/prometheus/testutil"
    26  	"github.com/stretchr/testify/assert"
    27  	"github.com/stretchr/testify/require"
    28  
    29  	"github.com/grafana/pyroscope/pkg/objstore"
    30  	objstore_testutil "github.com/grafana/pyroscope/pkg/objstore/testutil"
    31  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    32  	"github.com/grafana/pyroscope/pkg/phlaredb/bucket"
    33  	"github.com/grafana/pyroscope/pkg/phlaredb/bucketindex"
    34  	"github.com/grafana/pyroscope/pkg/test"
    35  	"github.com/grafana/pyroscope/pkg/util"
    36  )
    37  
    38  type testBlocksCleanerOptions struct {
    39  	concurrency         int
    40  	tenantDeletionDelay time.Duration
    41  	user4FilesExist     bool // User 4 has "FinishedTime" in tenant deletion marker set to "1h" ago.
    42  }
    43  
    44  func (o testBlocksCleanerOptions) String() string {
    45  	return fmt.Sprintf("concurrency=%d, tenant deletion delay=%v",
    46  		o.concurrency, o.tenantDeletionDelay)
    47  }
    48  
    49  func TestBlocksCleaner(t *testing.T) {
    50  	for _, options := range []testBlocksCleanerOptions{
    51  		{concurrency: 1, tenantDeletionDelay: 0, user4FilesExist: false},
    52  		{concurrency: 1, tenantDeletionDelay: 2 * time.Hour, user4FilesExist: true},
    53  		{concurrency: 2},
    54  		{concurrency: 10},
    55  	} {
    56  		options := options
    57  
    58  		t.Run(options.String(), func(t *testing.T) {
    59  			t.Parallel()
    60  			testBlocksCleanerWithOptions(t, options)
    61  		})
    62  	}
    63  }
    64  
    65  func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions) {
    66  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
    67  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
    68  
    69  	// Create blocks.
    70  	ctx := context.Background()
    71  	now := time.Now()
    72  	deletionDelay := 12 * time.Hour
    73  	block1 := createDBBlock(t, bucketClient, "user-1", 10, 20, 2, nil)
    74  	block2 := createDBBlock(t, bucketClient, "user-1", 20, 30, 2, nil)
    75  	block3 := createDBBlock(t, bucketClient, "user-1", 30, 40, 2, nil)
    76  	block4 := ulid.MustNew(4, rand.Reader)
    77  	block5 := ulid.MustNew(5, rand.Reader)
    78  	block6 := createDBBlock(t, bucketClient, "user-1", 40, 50, 2, nil)
    79  	block7 := createDBBlock(t, bucketClient, "user-2", 10, 20, 2, nil)
    80  	block8 := createDBBlock(t, bucketClient, "user-2", 40, 50, 2, nil)
    81  	createDeletionMark(t, bucketClient, "user-1", block2, now.Add(-deletionDelay).Add(time.Hour))                      // Block hasn't reached the deletion threshold yet.
    82  	createDeletionMark(t, bucketClient, "user-1", block3, now.Add(-deletionDelay).Add(-time.Hour))                     // Block reached the deletion threshold.
    83  	createDeletionMark(t, bucketClient, "user-1", block4, now.Add(-deletionDelay).Add(time.Hour))                      // Partial block hasn't reached the deletion threshold yet.
    84  	createDeletionMark(t, bucketClient, "user-1", block5, now.Add(-deletionDelay).Add(-time.Hour))                     // Partial block reached the deletion threshold.
    85  	require.NoError(t, bucketClient.Delete(ctx, path.Join("user-1", "phlaredb", block6.String(), block.MetaFilename))) // Partial block without deletion mark.
    86  	createDeletionMark(t, bucketClient, "user-2", block7, now.Add(-deletionDelay).Add(-time.Hour))                     // Block reached the deletion threshold.
    87  
    88  	// Blocks for user-3, marked for deletion.
    89  	require.NoError(t, bucket.WriteTenantDeletionMark(context.Background(), bucketClient, "user-3", nil, bucket.NewTenantDeletionMark(time.Now())))
    90  	block9 := createDBBlock(t, bucketClient, "user-3", 10, 30, 2, nil)
    91  	block10 := createDBBlock(t, bucketClient, "user-3", 30, 50, 2, nil)
    92  
    93  	// User-4 with no more blocks, but couple of mark and debug files. Should be fully deleted.
    94  	user4Mark := bucket.NewTenantDeletionMark(time.Now())
    95  	user4Mark.FinishedTime = time.Now().Unix() - 60 // Set to check final user cleanup.
    96  	require.NoError(t, bucket.WriteTenantDeletionMark(context.Background(), bucketClient, "user-4", nil, user4Mark))
    97  
    98  	cfg := BlocksCleanerConfig{
    99  		DeletionDelay:           deletionDelay,
   100  		CleanupInterval:         time.Minute,
   101  		CleanupConcurrency:      options.concurrency,
   102  		TenantCleanupDelay:      options.tenantDeletionDelay,
   103  		DeleteBlocksConcurrency: 1,
   104  	}
   105  
   106  	reg := prometheus.NewPedanticRegistry()
   107  	logger := log.NewNopLogger()
   108  	cfgProvider := newMockConfigProvider()
   109  
   110  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg)
   111  	require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner))
   112  	defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck
   113  
   114  	for _, tc := range []struct {
   115  		path           string
   116  		expectedExists bool
   117  	}{
   118  		// Check the storage to ensure only the block which has reached the deletion threshold
   119  		// has been effectively deleted.
   120  		{path: path.Join("user-1", "phlaredb/", block1.String(), block.MetaFilename), expectedExists: true},
   121  		{path: path.Join("user-1", "phlaredb/", block3.String(), block.MetaFilename), expectedExists: false},
   122  		{path: path.Join("user-2", "phlaredb/", block7.String(), block.MetaFilename), expectedExists: false},
   123  		{path: path.Join("user-2", "phlaredb/", block8.String(), block.MetaFilename), expectedExists: true},
   124  		// Should not delete a block with deletion mark who hasn't reached the deletion threshold yet.
   125  		{path: path.Join("user-1", "phlaredb/", block2.String(), block.MetaFilename), expectedExists: true},
   126  		{path: path.Join("user-1", "phlaredb/", block.DeletionMarkFilepath(block2)), expectedExists: true},
   127  		// Should delete a partial block with deletion mark who hasn't reached the deletion threshold yet.
   128  		{path: path.Join("user-1", "phlaredb/", block4.String(), block.DeletionMarkFilename), expectedExists: false},
   129  		{path: path.Join("user-1", "phlaredb/", block.DeletionMarkFilepath(block4)), expectedExists: false},
   130  		// Should delete a partial block with deletion mark who has reached the deletion threshold.
   131  		{path: path.Join("user-1", "phlaredb/", block5.String(), block.DeletionMarkFilename), expectedExists: false},
   132  		{path: path.Join("user-1", "phlaredb/", block.DeletionMarkFilepath(block5)), expectedExists: false},
   133  		// Should not delete a partial block without deletion mark.
   134  		{path: path.Join("user-1", "phlaredb/", block6.String(), block.IndexFilename), expectedExists: true},
   135  		// Should completely delete blocks for user-3, marked for deletion
   136  		{path: path.Join("user-3", "phlaredb/", block9.String(), block.MetaFilename), expectedExists: false},
   137  		{path: path.Join("user-3", "phlaredb/", block9.String(), block.IndexFilename), expectedExists: false},
   138  		{path: path.Join("user-3", "phlaredb/", block10.String(), block.MetaFilename), expectedExists: false},
   139  		{path: path.Join("user-3", "phlaredb/", block10.String(), block.IndexFilename), expectedExists: false},
   140  		// Tenant deletion mark is not removed.
   141  		{path: path.Join("user-3", "phlaredb/", bucket.TenantDeletionMarkPath), expectedExists: true},
   142  		// User-4 is removed fully.
   143  		{path: path.Join("user-4", "phlaredb/", bucket.TenantDeletionMarkPath), expectedExists: options.user4FilesExist},
   144  	} {
   145  		exists, err := bucketClient.Exists(ctx, tc.path)
   146  		require.NoError(t, err)
   147  		assert.Equal(t, tc.expectedExists, exists, tc.path)
   148  	}
   149  
   150  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted))
   151  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted))
   152  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed))
   153  	assert.Equal(t, float64(6), testutil.ToFloat64(cleaner.blocksCleanedTotal))
   154  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal))
   155  
   156  	// Check the updated bucket index.
   157  	for _, tc := range []struct {
   158  		userID         string
   159  		expectedIndex  bool
   160  		expectedBlocks []ulid.ULID
   161  		expectedMarks  []ulid.ULID
   162  	}{
   163  		{
   164  			userID:         "user-1",
   165  			expectedIndex:  true,
   166  			expectedBlocks: []ulid.ULID{block1, block2 /* deleted: block3, block4, block5, partial: block6 */},
   167  			expectedMarks:  []ulid.ULID{block2},
   168  		}, {
   169  			userID:         "user-2",
   170  			expectedIndex:  true,
   171  			expectedBlocks: []ulid.ULID{block8},
   172  			expectedMarks:  []ulid.ULID{},
   173  		}, {
   174  			userID:        "user-3",
   175  			expectedIndex: false,
   176  		},
   177  	} {
   178  		idx, err := bucketindex.ReadIndex(ctx, bucketClient, tc.userID, nil, logger)
   179  		if !tc.expectedIndex {
   180  			assert.Equal(t, bucketindex.ErrIndexNotFound, err)
   181  			continue
   182  		}
   183  
   184  		require.NoError(t, err)
   185  		assert.ElementsMatch(t, tc.expectedBlocks, idx.Blocks.GetULIDs())
   186  		assert.ElementsMatch(t, tc.expectedMarks, idx.BlockDeletionMarks.GetULIDs())
   187  	}
   188  
   189  	assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   190  		# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   191  		# TYPE pyroscope_bucket_blocks_count gauge
   192  		pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2
   193  		pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 1
   194  		# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   195  		# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   196  		pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 1
   197  		pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   198  		# HELP pyroscope_bucket_blocks_partials_count Total number of partial blocks.
   199  		# TYPE pyroscope_bucket_blocks_partials_count gauge
   200  		pyroscope_bucket_blocks_partials_count{user="user-1"} 2
   201  		pyroscope_bucket_blocks_partials_count{user="user-2"} 0
   202  	`),
   203  		"pyroscope_bucket_blocks_count",
   204  		"pyroscope_bucket_blocks_marked_for_deletion_count",
   205  		"pyroscope_bucket_blocks_partials_count",
   206  	))
   207  }
   208  
   209  func TestBlocksCleaner_ShouldContinueOnBlockDeletionFailure(t *testing.T) {
   210  	const userID = "user-1"
   211  
   212  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   213  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   214  
   215  	// Create blocks.
   216  	ctx := context.Background()
   217  	now := time.Now()
   218  	deletionDelay := 12 * time.Hour
   219  	block1 := createDBBlock(t, bucketClient, userID, 10, 20, 2, nil)
   220  	block2 := createDBBlock(t, bucketClient, userID, 20, 30, 2, nil)
   221  	block3 := createDBBlock(t, bucketClient, userID, 30, 40, 2, nil)
   222  	block4 := createDBBlock(t, bucketClient, userID, 40, 50, 2, nil)
   223  	createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour))
   224  	createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(-time.Hour))
   225  	createDeletionMark(t, bucketClient, userID, block4, now.Add(-deletionDelay).Add(-time.Hour))
   226  
   227  	// To emulate a failure deleting a block, we wrap the bucket client in a mocked one.
   228  	bucketClient = &mockBucketFailure{
   229  		Bucket:         bucketClient,
   230  		DeleteFailures: []string{path.Join(userID, "phlaredb/", block3.String(), block.MetaFilename)},
   231  	}
   232  
   233  	cfg := BlocksCleanerConfig{
   234  		DeletionDelay:           deletionDelay,
   235  		CleanupInterval:         time.Minute,
   236  		CleanupConcurrency:      1,
   237  		DeleteBlocksConcurrency: 1,
   238  	}
   239  
   240  	logger := log.NewNopLogger()
   241  	cfgProvider := newMockConfigProvider()
   242  
   243  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, nil)
   244  	require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner))
   245  	defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck
   246  
   247  	for _, tc := range []struct {
   248  		path           string
   249  		expectedExists bool
   250  	}{
   251  		{path: path.Join(userID, "phlaredb/", block1.String(), block.MetaFilename), expectedExists: true},
   252  		{path: path.Join(userID, "phlaredb/", block2.String(), block.MetaFilename), expectedExists: false},
   253  		{path: path.Join(userID, "phlaredb/", block3.String(), block.MetaFilename), expectedExists: true},
   254  		{path: path.Join(userID, "phlaredb/", block4.String(), block.MetaFilename), expectedExists: false},
   255  	} {
   256  		exists, err := bucketClient.Exists(ctx, tc.path)
   257  		require.NoError(t, err)
   258  		assert.Equal(t, tc.expectedExists, exists, tc.path)
   259  	}
   260  
   261  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted))
   262  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted))
   263  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed))
   264  	assert.Equal(t, float64(2), testutil.ToFloat64(cleaner.blocksCleanedTotal))
   265  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksFailedTotal))
   266  
   267  	// Check the updated bucket index.
   268  	idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger)
   269  	require.NoError(t, err)
   270  	assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs())
   271  	assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs())
   272  }
   273  
   274  func TestBlocksCleaner_ShouldRebuildBucketIndexOnCorruptedOne(t *testing.T) {
   275  	const userID = "user-1"
   276  
   277  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   278  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   279  
   280  	// Create blocks.
   281  	ctx := context.Background()
   282  	now := time.Now()
   283  	deletionDelay := 12 * time.Hour
   284  	block1 := createDBBlock(t, bucketClient, userID, 10, 20, 2, nil)
   285  	block2 := createDBBlock(t, bucketClient, userID, 20, 30, 2, nil)
   286  	block3 := createDBBlock(t, bucketClient, userID, 30, 40, 2, nil)
   287  	createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour))
   288  	createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(time.Hour))
   289  
   290  	// Write a corrupted bucket index.
   291  	require.NoError(t, bucketClient.Upload(ctx, path.Join(userID, "phlaredb/", bucketindex.IndexCompressedFilename), strings.NewReader("invalid!}")))
   292  
   293  	cfg := BlocksCleanerConfig{
   294  		DeletionDelay:           deletionDelay,
   295  		CleanupInterval:         time.Minute,
   296  		CleanupConcurrency:      1,
   297  		DeleteBlocksConcurrency: 1,
   298  	}
   299  
   300  	logger := log.NewNopLogger()
   301  	cfgProvider := newMockConfigProvider()
   302  
   303  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, nil)
   304  	require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner))
   305  	defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck
   306  
   307  	for _, tc := range []struct {
   308  		path           string
   309  		expectedExists bool
   310  	}{
   311  		{path: path.Join(userID, "phlaredb/", block1.String(), block.MetaFilename), expectedExists: true},
   312  		{path: path.Join(userID, "phlaredb/", block2.String(), block.MetaFilename), expectedExists: false},
   313  		{path: path.Join(userID, "phlaredb/", block3.String(), block.MetaFilename), expectedExists: true},
   314  	} {
   315  		exists, err := bucketClient.Exists(ctx, tc.path)
   316  		require.NoError(t, err)
   317  		assert.Equal(t, tc.expectedExists, exists, tc.path)
   318  	}
   319  
   320  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted))
   321  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted))
   322  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed))
   323  	assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksCleanedTotal))
   324  	assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal))
   325  
   326  	// Check the updated bucket index.
   327  	idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger)
   328  	require.NoError(t, err)
   329  	assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs())
   330  	assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs())
   331  }
   332  
   333  func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShard(t *testing.T) {
   334  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   335  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   336  
   337  	// Create blocks.
   338  	createDBBlock(t, bucketClient, "user-1", 10, 20, 2, nil)
   339  	createDBBlock(t, bucketClient, "user-1", 20, 30, 2, nil)
   340  	createDBBlock(t, bucketClient, "user-2", 30, 40, 2, nil)
   341  
   342  	cfg := BlocksCleanerConfig{
   343  		DeletionDelay:           time.Hour,
   344  		CleanupInterval:         time.Minute,
   345  		CleanupConcurrency:      1,
   346  		DeleteBlocksConcurrency: 1,
   347  	}
   348  
   349  	ctx := context.Background()
   350  	logger := log.NewNopLogger()
   351  	reg := prometheus.NewPedanticRegistry()
   352  	cfgProvider := newMockConfigProvider()
   353  
   354  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg)
   355  	require.NoError(t, cleaner.runCleanupWithErr(ctx))
   356  
   357  	assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   358  		# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   359  		# TYPE pyroscope_bucket_blocks_count gauge
   360  		pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2
   361  		pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 1
   362  		# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   363  		# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   364  		pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   365  		pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   366  		# HELP pyroscope_bucket_blocks_partials_count Total number of partial blocks.
   367  		# TYPE pyroscope_bucket_blocks_partials_count gauge
   368  		pyroscope_bucket_blocks_partials_count{user="user-1"} 0
   369  		pyroscope_bucket_blocks_partials_count{user="user-2"} 0
   370  	`),
   371  		"pyroscope_bucket_blocks_count",
   372  		"pyroscope_bucket_blocks_marked_for_deletion_count",
   373  		"pyroscope_bucket_blocks_partials_count",
   374  	))
   375  
   376  	// Override the users scanner to reconfigure it to only return a subset of users.
   377  	cleaner.tenantsScanner = bucket.NewTenantsScanner(bucketClient, func(userID string) (bool, error) { return userID == "user-1", nil }, logger)
   378  
   379  	// Create new blocks, to double check expected metrics have changed.
   380  	createDBBlock(t, bucketClient, "user-1", 40, 50, 2, nil)
   381  	createDBBlock(t, bucketClient, "user-2", 50, 60, 2, nil)
   382  
   383  	require.NoError(t, cleaner.runCleanupWithErr(ctx))
   384  
   385  	assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   386  		# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   387  		# TYPE pyroscope_bucket_blocks_count gauge
   388  		pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 3
   389  		pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 1
   390  		# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   391  		# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   392  		pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   393  		# HELP pyroscope_bucket_blocks_partials_count Total number of partial blocks.
   394  		# TYPE pyroscope_bucket_blocks_partials_count gauge
   395  		pyroscope_bucket_blocks_partials_count{user="user-1"} 0
   396  	`),
   397  		"pyroscope_bucket_blocks_count",
   398  		"pyroscope_bucket_blocks_marked_for_deletion_count",
   399  		"pyroscope_bucket_blocks_partials_count",
   400  	))
   401  }
   402  
   403  func TestBlocksCleaner_ShouldNotCleanupUserThatDoesntBelongToShardAnymore(t *testing.T) {
   404  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   405  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   406  
   407  	// Create blocks.
   408  	createDBBlock(t, bucketClient, "user-1", 10, 20, 2, nil)
   409  	createDBBlock(t, bucketClient, "user-2", 20, 30, 2, nil)
   410  
   411  	cfg := BlocksCleanerConfig{
   412  		DeletionDelay:           time.Hour,
   413  		CleanupInterval:         time.Minute,
   414  		CleanupConcurrency:      1,
   415  		DeleteBlocksConcurrency: 1,
   416  	}
   417  
   418  	ctx := context.Background()
   419  	logger := log.NewNopLogger()
   420  	reg := prometheus.NewPedanticRegistry()
   421  	cfgProvider := newMockConfigProvider()
   422  
   423  	// We will simulate change of "ownUser" by counting number of replies per user. First reply will be "true",
   424  	// all subsequent replies will be false.
   425  
   426  	userSeen := map[string]bool{}
   427  	ownUser := func(user string) (bool, error) {
   428  		if userSeen[user] {
   429  			return false, nil
   430  		}
   431  		userSeen[user] = true
   432  		return true, nil
   433  	}
   434  
   435  	cleaner := NewBlocksCleaner(cfg, bucketClient, ownUser, cfgProvider, logger, reg)
   436  	require.NoError(t, cleaner.runCleanupWithErr(ctx))
   437  
   438  	// Verify that we have seen the users
   439  	require.ElementsMatch(t, []string{"user-1", "user-2"}, cleaner.lastOwnedUsers)
   440  
   441  	// But there are no metrics for any user, because we did not in fact clean them.
   442  	assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   443  		# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   444  		# TYPE pyroscope_bucket_blocks_count gauge
   445  	`),
   446  		"pyroscope_bucket_blocks_count",
   447  	))
   448  
   449  	// Running cleanUsers again will see that users are no longer owned.
   450  	require.NoError(t, cleaner.runCleanupWithErr(ctx))
   451  	require.ElementsMatch(t, []string{}, cleaner.lastOwnedUsers)
   452  }
   453  
   454  func TestBlocksCleaner_ListBlocksOutsideRetentionPeriod(t *testing.T) {
   455  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   456  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   457  	ctx := context.Background()
   458  	logger := log.NewNopLogger()
   459  
   460  	id1 := createDBBlock(t, bucketClient, "user-1", 5000, 6000, 2, nil)
   461  	id2 := createDBBlock(t, bucketClient, "user-1", 6000, 7000, 2, nil)
   462  	id3 := createDBBlock(t, bucketClient, "user-1", 7000, 8000, 2, nil)
   463  
   464  	w := bucketindex.NewUpdater(bucketClient, "user-1", nil, logger)
   465  	idx, _, err := w.UpdateIndex(ctx, nil)
   466  	require.NoError(t, err)
   467  
   468  	assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, idx.Blocks.GetULIDs())
   469  
   470  	// Excessive retention period (wrapping epoch)
   471  	result := listBlocksOutsideRetentionPeriod(idx, time.Unix(10, 0).Add(-time.Hour))
   472  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   473  
   474  	// Normal operation - varying retention period.
   475  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(6, 0))
   476  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   477  
   478  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0))
   479  	assert.ElementsMatch(t, []ulid.ULID{id1}, result.GetULIDs())
   480  
   481  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0))
   482  	assert.ElementsMatch(t, []ulid.ULID{id1, id2}, result.GetULIDs())
   483  
   484  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0))
   485  	assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, result.GetULIDs())
   486  
   487  	// Avoiding redundant marking - blocks already marked for deletion.
   488  
   489  	mark1 := &bucketindex.BlockDeletionMark{ID: id1}
   490  	mark2 := &bucketindex.BlockDeletionMark{ID: id2}
   491  
   492  	idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1}
   493  
   494  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0))
   495  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   496  
   497  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0))
   498  	assert.ElementsMatch(t, []ulid.ULID{id2}, result.GetULIDs())
   499  
   500  	idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1, mark2}
   501  
   502  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0))
   503  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   504  
   505  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0))
   506  	assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs())
   507  
   508  	result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0))
   509  	assert.ElementsMatch(t, []ulid.ULID{id3}, result.GetULIDs())
   510  }
   511  
   512  func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) {
   513  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   514  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   515  
   516  	ts := func(hours int) int64 {
   517  		return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000
   518  	}
   519  
   520  	block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil)
   521  	block2 := createDBBlock(t, bucketClient, "user-1", ts(-8), ts(-6), 2, nil)
   522  	block3 := createDBBlock(t, bucketClient, "user-2", ts(-10), ts(-8), 2, nil)
   523  	block4 := createDBBlock(t, bucketClient, "user-2", ts(-8), ts(-6), 2, nil)
   524  
   525  	cfg := BlocksCleanerConfig{
   526  		DeletionDelay:           time.Hour,
   527  		CleanupInterval:         time.Minute,
   528  		CleanupConcurrency:      1,
   529  		DeleteBlocksConcurrency: 1,
   530  	}
   531  
   532  	ctx := context.Background()
   533  	logger := test.NewTestingLogger(t)
   534  	reg := prometheus.NewPedanticRegistry()
   535  	cfgProvider := newMockConfigProvider()
   536  
   537  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg)
   538  
   539  	assertBlockExists := func(user string, blockID ulid.ULID, expectExists bool) {
   540  		exists, err := bucketClient.Exists(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename))
   541  		require.NoError(t, err)
   542  		assert.Equal(t, expectExists, exists)
   543  	}
   544  
   545  	// Existing behaviour - retention period disabled.
   546  	{
   547  		cfgProvider.userRetentionPeriods["user-1"] = 0
   548  		cfgProvider.userRetentionPeriods["user-2"] = 0
   549  
   550  		require.NoError(t, cleaner.runCleanupWithErr(ctx))
   551  		assertBlockExists("user-1", block1, true)
   552  		assertBlockExists("user-1", block2, true)
   553  		assertBlockExists("user-2", block3, true)
   554  		assertBlockExists("user-2", block4, true)
   555  
   556  		assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   557  			# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   558  			# TYPE pyroscope_bucket_blocks_count gauge
   559  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2
   560  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 2
   561  			# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   562  			# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   563  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   564  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   565  			# HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   566  			# TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter
   567  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0
   568  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0
   569  			`),
   570  			"pyroscope_bucket_blocks_count",
   571  			"pyroscope_bucket_blocks_marked_for_deletion_count",
   572  			"pyroscope_compactor_blocks_marked_for_deletion_total",
   573  		))
   574  	}
   575  
   576  	// Retention enabled only for a single user, but does nothing.
   577  	{
   578  		cfgProvider.userRetentionPeriods["user-1"] = 9 * time.Hour
   579  
   580  		require.NoError(t, cleaner.runCleanupWithErr(ctx))
   581  		assertBlockExists("user-1", block1, true)
   582  		assertBlockExists("user-1", block2, true)
   583  		assertBlockExists("user-2", block3, true)
   584  		assertBlockExists("user-2", block4, true)
   585  	}
   586  
   587  	// Retention enabled only for a single user, marking a single block.
   588  	// Note the block won't be deleted yet due to deletion delay.
   589  	{
   590  		cfgProvider.userRetentionPeriods["user-1"] = 7 * time.Hour
   591  
   592  		require.NoError(t, cleaner.runCleanupWithErr(ctx))
   593  		assertBlockExists("user-1", block1, true)
   594  		assertBlockExists("user-1", block2, true)
   595  		assertBlockExists("user-2", block3, true)
   596  		assertBlockExists("user-2", block4, true)
   597  
   598  		assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   599  			# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   600  			# TYPE pyroscope_bucket_blocks_count gauge
   601  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2
   602  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 2
   603  			# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   604  			# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   605  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 1
   606  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   607  			# HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   608  			# TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter
   609  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0
   610  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 1
   611  			`),
   612  			"pyroscope_bucket_blocks_count",
   613  			"pyroscope_bucket_blocks_marked_for_deletion_count",
   614  			"pyroscope_compactor_blocks_marked_for_deletion_total",
   615  		))
   616  	}
   617  
   618  	// Marking the block again, before the deletion occurs, should not cause an error.
   619  	{
   620  		require.NoError(t, cleaner.runCleanupWithErr(ctx))
   621  		assertBlockExists("user-1", block1, true)
   622  		assertBlockExists("user-1", block2, true)
   623  		assertBlockExists("user-2", block3, true)
   624  		assertBlockExists("user-2", block4, true)
   625  	}
   626  
   627  	// Reduce the deletion delay. Now the block will be deleted.
   628  	{
   629  		cleaner.cfg.DeletionDelay = 0
   630  
   631  		require.NoError(t, cleaner.runCleanupWithErr(ctx))
   632  		assertBlockExists("user-1", block1, false)
   633  		assertBlockExists("user-1", block2, true)
   634  		assertBlockExists("user-2", block3, true)
   635  		assertBlockExists("user-2", block4, true)
   636  
   637  		assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   638  			# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   639  			# TYPE pyroscope_bucket_blocks_count gauge
   640  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 1
   641  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 2
   642  			# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   643  			# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   644  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   645  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   646  			# HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   647  			# TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter
   648  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0
   649  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 1
   650  			`),
   651  			"pyroscope_bucket_blocks_count",
   652  			"pyroscope_bucket_blocks_marked_for_deletion_count",
   653  			"pyroscope_compactor_blocks_marked_for_deletion_total",
   654  		))
   655  	}
   656  
   657  	// Retention enabled for other user; test deleting multiple blocks.
   658  	{
   659  		cfgProvider.userRetentionPeriods["user-2"] = 5 * time.Hour
   660  
   661  		require.NoError(t, cleaner.runCleanupWithErr(ctx))
   662  		assertBlockExists("user-1", block1, false)
   663  		assertBlockExists("user-1", block2, true)
   664  		assertBlockExists("user-2", block3, false)
   665  		assertBlockExists("user-2", block4, false)
   666  
   667  		assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   668  			# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   669  			# TYPE pyroscope_bucket_blocks_count gauge
   670  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 1
   671  			# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   672  			# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   673  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   674  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   675  			# HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   676  			# TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter
   677  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0
   678  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 3
   679  			`),
   680  			"pyroscope_bucket_blocks_count",
   681  			"pyroscope_bucket_blocks_marked_for_deletion_count",
   682  			"pyroscope_compactor_blocks_marked_for_deletion_total",
   683  		))
   684  	}
   685  }
   686  
   687  func checkBlock(t *testing.T, user string, bucketClient objstore.Bucket, blockID ulid.ULID, metaJSONExists bool, markedForDeletion bool) {
   688  	exists, err := bucketClient.Exists(context.Background(), path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename))
   689  	require.NoError(t, err)
   690  	require.Equal(t, metaJSONExists, exists)
   691  
   692  	exists, err = bucketClient.Exists(context.Background(), path.Join(user, "phlaredb/", blockID.String(), block.DeletionMarkFilename))
   693  	require.NoError(t, err)
   694  	require.Equal(t, markedForDeletion, exists)
   695  }
   696  
   697  func TestBlocksCleaner_ShouldCleanUpFilesWhenNoMoreBlocksRemain(t *testing.T) {
   698  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   699  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   700  
   701  	const userID = "user-1"
   702  	ctx := context.Background()
   703  	now := time.Now()
   704  	deletionDelay := 12 * time.Hour
   705  
   706  	// Create two blocks and mark them for deletion at a time before the deletionDelay
   707  	block1 := createDBBlock(t, bucketClient, userID, 10, 20, 2, nil)
   708  	block2 := createDBBlock(t, bucketClient, userID, 20, 30, 2, nil)
   709  
   710  	createDeletionMark(t, bucketClient, userID, block1, now.Add(-deletionDelay).Add(-time.Hour))
   711  	createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour))
   712  
   713  	checkBlock(t, "user-1", bucketClient, block1, true, true)
   714  	checkBlock(t, "user-1", bucketClient, block2, true, true)
   715  
   716  	// Create a deletion mark within the deletionDelay period that won't correspond to any block
   717  	randomULID := ulid.MustNew(ulid.Now(), rand.Reader)
   718  	createDeletionMark(t, bucketClient, userID, randomULID, now.Add(-deletionDelay).Add(time.Hour))
   719  	blockDeletionMarkFile := path.Join(userID, "phlaredb/", block.DeletionMarkFilepath(randomULID))
   720  	exists, err := bucketClient.Exists(ctx, blockDeletionMarkFile)
   721  	require.NoError(t, err)
   722  	assert.True(t, exists)
   723  
   724  	cfg := BlocksCleanerConfig{
   725  		DeletionDelay:              deletionDelay,
   726  		CleanupInterval:            time.Minute,
   727  		CleanupConcurrency:         1,
   728  		DeleteBlocksConcurrency:    1,
   729  		NoBlocksFileCleanupEnabled: true,
   730  	}
   731  
   732  	logger := test.NewTestingLogger(t)
   733  	reg := prometheus.NewPedanticRegistry()
   734  	cfgProvider := newMockConfigProvider()
   735  
   736  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg)
   737  	require.NoError(t, cleaner.runCleanupWithErr(ctx))
   738  
   739  	// Check bucket index, markers and debug files have been deleted.
   740  	exists, err = bucketClient.Exists(ctx, blockDeletionMarkFile)
   741  	require.NoError(t, err)
   742  	assert.False(t, exists)
   743  
   744  	_, err = bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger)
   745  	require.ErrorIs(t, err, bucketindex.ErrIndexNotFound)
   746  }
   747  
   748  func TestBlocksCleaner_ShouldRemovePartialBlocksOutsideDelayPeriod(t *testing.T) {
   749  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   750  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   751  
   752  	ts := func(hours int) int64 {
   753  		return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000
   754  	}
   755  
   756  	block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil)
   757  	block2 := createDBBlock(t, bucketClient, "user-1", ts(-8), ts(-6), 2, nil)
   758  
   759  	cfg := BlocksCleanerConfig{
   760  		DeletionDelay:           time.Hour,
   761  		CleanupInterval:         time.Minute,
   762  		CleanupConcurrency:      1,
   763  		DeleteBlocksConcurrency: 1,
   764  	}
   765  
   766  	ctx := context.Background()
   767  	logger := test.NewTestingLogger(t)
   768  	reg := prometheus.NewPedanticRegistry()
   769  	cfgProvider := newMockConfigProvider()
   770  
   771  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg)
   772  
   773  	makeBlockPartial := func(user string, blockID ulid.ULID) {
   774  		err := bucketClient.Delete(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename))
   775  		require.NoError(t, err)
   776  	}
   777  
   778  	checkBlock(t, "user-1", bucketClient, block1, true, false)
   779  	checkBlock(t, "user-1", bucketClient, block2, true, false)
   780  	makeBlockPartial("user-1", block1)
   781  	checkBlock(t, "user-1", bucketClient, block1, false, false)
   782  	checkBlock(t, "user-1", bucketClient, block2, true, false)
   783  
   784  	require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger))
   785  
   786  	// check that no blocks were marked for deletion, because deletion delay is set to 0.
   787  	checkBlock(t, "user-1", bucketClient, block1, false, false)
   788  	checkBlock(t, "user-1", bucketClient, block2, true, false)
   789  
   790  	// Test that partial block does get marked for deletion
   791  	// The delay time must be very short since these temporary files were just created
   792  	cfgProvider.userPartialBlockDelay["user-1"] = 1 * time.Nanosecond
   793  
   794  	require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger))
   795  
   796  	// check that first block was marked for deletion (partial block updated far in the past), but not the second one, because it's not partial.
   797  	checkBlock(t, "user-1", bucketClient, block1, false, true)
   798  	checkBlock(t, "user-1", bucketClient, block2, true, false)
   799  
   800  	require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   801  			# HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks.
   802  			# TYPE pyroscope_bucket_blocks_count gauge
   803  			pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 1
   804  			# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   805  			# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   806  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   807  			# HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   808  			# TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter
   809  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 1
   810  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0
   811  			`),
   812  		"pyroscope_bucket_blocks_count",
   813  		"pyroscope_bucket_blocks_marked_for_deletion_count",
   814  		"pyroscope_compactor_blocks_marked_for_deletion_total",
   815  	))
   816  }
   817  
   818  func TestBlocksCleaner_ShouldNotRemovePartialBlocksInsideDelayPeriod(t *testing.T) {
   819  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   820  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   821  
   822  	ts := func(hours int) int64 {
   823  		return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000
   824  	}
   825  
   826  	block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil)
   827  	block2 := createDBBlock(t, bucketClient, "user-2", ts(-8), ts(-6), 2, nil)
   828  
   829  	cfg := BlocksCleanerConfig{
   830  		DeletionDelay:           time.Hour,
   831  		CleanupInterval:         time.Minute,
   832  		CleanupConcurrency:      1,
   833  		DeleteBlocksConcurrency: 1,
   834  	}
   835  
   836  	ctx := context.Background()
   837  	logger := test.NewTestingLogger(t)
   838  	reg := prometheus.NewPedanticRegistry()
   839  	cfgProvider := newMockConfigProvider()
   840  
   841  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg)
   842  
   843  	makeBlockPartial := func(user string, blockID ulid.ULID) {
   844  		err := bucketClient.Delete(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename))
   845  		require.NoError(t, err)
   846  	}
   847  
   848  	corruptMeta := func(user string, blockID ulid.ULID) {
   849  		err := bucketClient.Upload(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename), strings.NewReader("corrupted file contents"))
   850  		require.NoError(t, err)
   851  	}
   852  
   853  	checkBlock(t, "user-1", bucketClient, block1, true, false)
   854  	checkBlock(t, "user-2", bucketClient, block2, true, false)
   855  
   856  	makeBlockPartial("user-1", block1)
   857  	corruptMeta("user-2", block2)
   858  
   859  	checkBlock(t, "user-1", bucketClient, block1, false, false)
   860  	checkBlock(t, "user-2", bucketClient, block2, true, false)
   861  
   862  	// Set partial block delay such that block will not be marked for deletion
   863  	// The comparison is based on inode modification time, so anything more than very recent (< 1 second) won't be
   864  	// out of range
   865  	cfgProvider.userPartialBlockDelay["user-1"] = 1 * time.Hour
   866  	cfgProvider.userPartialBlockDelay["user-2"] = 1 * time.Nanosecond
   867  
   868  	require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger))
   869  	checkBlock(t, "user-1", bucketClient, block1, false, false) // This block was updated too recently, so we don't mark it for deletion just yet.
   870  	checkBlock(t, "user-2", bucketClient, block2, true, false)  // No change for user-2.
   871  
   872  	require.NoError(t, cleaner.cleanUser(ctx, "user-2", logger))
   873  	checkBlock(t, "user-1", bucketClient, block1, false, false) // No change for user-1
   874  	checkBlock(t, "user-2", bucketClient, block2, true, false)  // Block with corrupted meta is NOT marked for deletion.
   875  
   876  	// The pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} counter should be zero since for user-1
   877  	// the time since modification is shorter than the delay, and for user-2, the metadata is corrupted but the file
   878  	// is still present in the bucket so the block is not partial
   879  	require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   880  			# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   881  			# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   882  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   883  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0
   884  			# HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   885  			# TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter
   886  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0
   887  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0
   888  			`),
   889  		"pyroscope_bucket_blocks_count",
   890  		"pyroscope_bucket_blocks_marked_for_deletion_count",
   891  		"pyroscope_compactor_blocks_marked_for_deletion_total",
   892  	))
   893  }
   894  
   895  func TestBlocksCleaner_ShouldNotRemovePartialBlocksIfConfiguredDelayIsInvalid(t *testing.T) {
   896  	ctx := context.Background()
   897  	reg := prometheus.NewPedanticRegistry()
   898  	logs := &concurrency.SyncBuffer{}
   899  	logger := log.NewLogfmtLogger(logs)
   900  
   901  	bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir())
   902  	bucketClient = block.BucketWithGlobalMarkers(bucketClient)
   903  
   904  	ts := func(hours int) int64 {
   905  		return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000
   906  	}
   907  
   908  	// Create a partial block.
   909  	block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil)
   910  	err := bucketClient.Delete(ctx, path.Join("user-1", "phlaredb/", block1.String(), block.MetaFilename))
   911  	require.NoError(t, err)
   912  
   913  	cfg := BlocksCleanerConfig{
   914  		DeletionDelay:           time.Hour,
   915  		CleanupInterval:         time.Minute,
   916  		CleanupConcurrency:      1,
   917  		DeleteBlocksConcurrency: 1,
   918  	}
   919  
   920  	// Configure an invalid delay.
   921  	cfgProvider := newMockConfigProvider()
   922  	cfgProvider.userPartialBlockDelay["user-1"] = 0
   923  	cfgProvider.userPartialBlockDelayInvalid["user-1"] = true
   924  
   925  	// Pre-condition check: block should be partial and not being marked for deletion.
   926  	checkBlock(t, "user-1", bucketClient, block1, false, false)
   927  
   928  	// Run the cleanup.
   929  	cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg)
   930  	require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger))
   931  
   932  	// Ensure the block has NOT been marked for deletion.
   933  	checkBlock(t, "user-1", bucketClient, block1, false, false)
   934  	assert.Contains(t, logs.String(), "partial blocks deletion has been disabled for tenant because the delay has been set lower than the minimum value allowed")
   935  
   936  	require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
   937  			# HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket.
   938  			# TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge
   939  			pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0
   940  			# HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor.
   941  			# TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter
   942  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0
   943  			pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0
   944  			`),
   945  		"pyroscope_bucket_blocks_count",
   946  		"pyroscope_bucket_blocks_marked_for_deletion_count",
   947  		"pyroscope_compactor_blocks_marked_for_deletion_total",
   948  	))
   949  }
   950  
   951  func TestStalePartialBlockLastModifiedTime(t *testing.T) {
   952  	dir := t.TempDir()
   953  	b, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), dir)
   954  
   955  	const tenantId = "user"
   956  
   957  	objectTime := time.Now().Add(-1 * time.Hour).Truncate(time.Second) // ignore milliseconds, as not all filesystems store them.
   958  	blockID := createDBBlock(t, b, tenantId, objectTime.UnixMilli(), time.Now().UnixMilli(), 2, nil)
   959  	err := filepath.Walk(filepath.Join(dir, tenantId, "phlaredb/", blockID.String()), func(path string, info os.FileInfo, err error) error {
   960  		require.NoError(t, err)
   961  		require.NoError(t, os.Chtimes(path, objectTime, objectTime))
   962  		return nil
   963  	})
   964  	require.NoError(t, err)
   965  
   966  	userBucket := objstore.NewTenantBucketClient(tenantId, b, nil)
   967  
   968  	emptyBlockID := ulid.ULID{}
   969  	require.NotEqual(t, blockID, emptyBlockID)
   970  	empty := true
   971  	err = userBucket.Iter(context.Background(), emptyBlockID.String(), func(_ string) error {
   972  		empty = false
   973  		return nil
   974  	})
   975  	require.NoError(t, err)
   976  	require.True(t, empty)
   977  
   978  	testCases := []struct {
   979  		name                 string
   980  		blockID              ulid.ULID
   981  		cutoff               time.Time
   982  		expectedLastModified time.Time
   983  	}{
   984  		{name: "no objects", blockID: emptyBlockID, cutoff: objectTime, expectedLastModified: time.Time{}},
   985  		{name: "objects newer than delay cutoff", blockID: blockID, cutoff: objectTime.Add(-1 * time.Second), expectedLastModified: time.Time{}},
   986  		{name: "objects equal to delay cutoff", blockID: blockID, cutoff: objectTime, expectedLastModified: objectTime},
   987  		{name: "objects older than delay cutoff", blockID: blockID, cutoff: objectTime.Add(1 * time.Second), expectedLastModified: objectTime},
   988  	}
   989  
   990  	for _, tc := range testCases {
   991  		t.Run(tc.name, func(t *testing.T) {
   992  			lastModified, err := stalePartialBlockLastModifiedTime(context.Background(), tc.blockID, userBucket, tc.cutoff)
   993  			require.NoError(t, err)
   994  			require.Equal(t, tc.expectedLastModified, lastModified)
   995  		})
   996  	}
   997  }
   998  
   999  type mockBucketFailure struct {
  1000  	objstore.Bucket
  1001  
  1002  	DeleteFailures []string
  1003  }
  1004  
  1005  func (m *mockBucketFailure) Delete(ctx context.Context, name string) error {
  1006  	if util.StringsContain(m.DeleteFailures, name) {
  1007  		return errors.New("mocked delete failure")
  1008  	}
  1009  	return m.Bucket.Delete(ctx, name)
  1010  }
  1011  
  1012  type mockConfigProvider struct {
  1013  	userRetentionPeriods         map[string]time.Duration
  1014  	splitAndMergeShards          map[string]int
  1015  	instancesShardSize           map[string]int
  1016  	splitGroups                  map[string]int
  1017  	splitAndMergeStageSize       map[string]int
  1018  	blockUploadEnabled           map[string]bool
  1019  	blockUploadValidationEnabled map[string]bool
  1020  	blockUploadMaxBlockSizeBytes map[string]int64
  1021  	userPartialBlockDelay        map[string]time.Duration
  1022  	userPartialBlockDelayInvalid map[string]bool
  1023  	verifyChunks                 map[string]bool
  1024  	downsamplerEnabled           map[string]bool
  1025  }
  1026  
  1027  func newMockConfigProvider() *mockConfigProvider {
  1028  	return &mockConfigProvider{
  1029  		userRetentionPeriods:         make(map[string]time.Duration),
  1030  		splitAndMergeShards:          make(map[string]int),
  1031  		splitGroups:                  make(map[string]int),
  1032  		splitAndMergeStageSize:       make(map[string]int),
  1033  		blockUploadEnabled:           make(map[string]bool),
  1034  		blockUploadValidationEnabled: make(map[string]bool),
  1035  		blockUploadMaxBlockSizeBytes: make(map[string]int64),
  1036  		userPartialBlockDelay:        make(map[string]time.Duration),
  1037  		userPartialBlockDelayInvalid: make(map[string]bool),
  1038  		verifyChunks:                 make(map[string]bool),
  1039  		downsamplerEnabled:           make(map[string]bool),
  1040  	}
  1041  }
  1042  
  1043  func (m *mockConfigProvider) CompactorBlocksRetentionPeriod(user string) time.Duration {
  1044  	if result, ok := m.userRetentionPeriods[user]; ok {
  1045  		return result
  1046  	}
  1047  	return 0
  1048  }
  1049  
  1050  func (m *mockConfigProvider) CompactorSplitAndMergeShards(user string) int {
  1051  	if result, ok := m.splitAndMergeShards[user]; ok {
  1052  		return result
  1053  	}
  1054  	return 0
  1055  }
  1056  
  1057  func (m *mockConfigProvider) CompactorSplitAndMergeStageSize(user string) int {
  1058  	if result, ok := m.splitAndMergeStageSize[user]; ok {
  1059  		return result
  1060  	}
  1061  	return 0
  1062  }
  1063  
  1064  func (m *mockConfigProvider) CompactorSplitGroups(user string) int {
  1065  	if result, ok := m.splitGroups[user]; ok {
  1066  		return result
  1067  	}
  1068  	return 0
  1069  }
  1070  
  1071  func (m *mockConfigProvider) CompactorTenantShardSize(user string) int {
  1072  	if result, ok := m.instancesShardSize[user]; ok {
  1073  		return result
  1074  	}
  1075  	return 0
  1076  }
  1077  
  1078  func (m *mockConfigProvider) CompactorBlockUploadEnabled(tenantID string) bool {
  1079  	return m.blockUploadEnabled[tenantID]
  1080  }
  1081  
  1082  func (m *mockConfigProvider) CompactorBlockUploadValidationEnabled(tenantID string) bool {
  1083  	return m.blockUploadValidationEnabled[tenantID]
  1084  }
  1085  
  1086  func (m *mockConfigProvider) CompactorPartialBlockDeletionDelay(user string) (time.Duration, bool) {
  1087  	return m.userPartialBlockDelay[user], !m.userPartialBlockDelayInvalid[user]
  1088  }
  1089  
  1090  func (m *mockConfigProvider) CompactorBlockUploadVerifyChunks(tenantID string) bool {
  1091  	return m.verifyChunks[tenantID]
  1092  }
  1093  
  1094  func (m *mockConfigProvider) CompactorBlockUploadMaxBlockSizeBytes(user string) int64 {
  1095  	return m.blockUploadMaxBlockSizeBytes[user]
  1096  }
  1097  
  1098  func (m *mockConfigProvider) CompactorDownsamplerEnabled(user string) bool {
  1099  	return m.downsamplerEnabled[user]
  1100  }
  1101  
  1102  func (m *mockConfigProvider) S3SSEType(string) string {
  1103  	return ""
  1104  }
  1105  
  1106  func (m *mockConfigProvider) S3SSEKMSKeyID(string) string {
  1107  	return ""
  1108  }
  1109  
  1110  func (m *mockConfigProvider) S3SSEKMSEncryptionContext(string) string {
  1111  	return ""
  1112  }
  1113  
  1114  func (c *BlocksCleaner) runCleanupWithErr(ctx context.Context) error {
  1115  	allUsers, isDeleted, err := c.refreshOwnedUsers(ctx)
  1116  	if err != nil {
  1117  		return err
  1118  	}
  1119  
  1120  	return c.cleanUsers(ctx, allUsers, isDeleted, log.NewNopLogger())
  1121  }