github.com/grafana/pyroscope@v1.18.0/pkg/compactor/blocks_cleaner_test.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/blocks_cleaner_test.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package compactor 7 8 import ( 9 "context" 10 "crypto/rand" 11 "errors" 12 "fmt" 13 "os" 14 "path" 15 "path/filepath" 16 "strings" 17 "testing" 18 "time" 19 20 "github.com/go-kit/log" 21 "github.com/grafana/dskit/concurrency" 22 "github.com/grafana/dskit/services" 23 "github.com/oklog/ulid/v2" 24 "github.com/prometheus/client_golang/prometheus" 25 "github.com/prometheus/client_golang/prometheus/testutil" 26 "github.com/stretchr/testify/assert" 27 "github.com/stretchr/testify/require" 28 29 "github.com/grafana/pyroscope/pkg/objstore" 30 objstore_testutil "github.com/grafana/pyroscope/pkg/objstore/testutil" 31 "github.com/grafana/pyroscope/pkg/phlaredb/block" 32 "github.com/grafana/pyroscope/pkg/phlaredb/bucket" 33 "github.com/grafana/pyroscope/pkg/phlaredb/bucketindex" 34 "github.com/grafana/pyroscope/pkg/test" 35 "github.com/grafana/pyroscope/pkg/util" 36 ) 37 38 type testBlocksCleanerOptions struct { 39 concurrency int 40 tenantDeletionDelay time.Duration 41 user4FilesExist bool // User 4 has "FinishedTime" in tenant deletion marker set to "1h" ago. 42 } 43 44 func (o testBlocksCleanerOptions) String() string { 45 return fmt.Sprintf("concurrency=%d, tenant deletion delay=%v", 46 o.concurrency, o.tenantDeletionDelay) 47 } 48 49 func TestBlocksCleaner(t *testing.T) { 50 for _, options := range []testBlocksCleanerOptions{ 51 {concurrency: 1, tenantDeletionDelay: 0, user4FilesExist: false}, 52 {concurrency: 1, tenantDeletionDelay: 2 * time.Hour, user4FilesExist: true}, 53 {concurrency: 2}, 54 {concurrency: 10}, 55 } { 56 options := options 57 58 t.Run(options.String(), func(t *testing.T) { 59 t.Parallel() 60 testBlocksCleanerWithOptions(t, options) 61 }) 62 } 63 } 64 65 func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions) { 66 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 67 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 68 69 // Create blocks. 70 ctx := context.Background() 71 now := time.Now() 72 deletionDelay := 12 * time.Hour 73 block1 := createDBBlock(t, bucketClient, "user-1", 10, 20, 2, nil) 74 block2 := createDBBlock(t, bucketClient, "user-1", 20, 30, 2, nil) 75 block3 := createDBBlock(t, bucketClient, "user-1", 30, 40, 2, nil) 76 block4 := ulid.MustNew(4, rand.Reader) 77 block5 := ulid.MustNew(5, rand.Reader) 78 block6 := createDBBlock(t, bucketClient, "user-1", 40, 50, 2, nil) 79 block7 := createDBBlock(t, bucketClient, "user-2", 10, 20, 2, nil) 80 block8 := createDBBlock(t, bucketClient, "user-2", 40, 50, 2, nil) 81 createDeletionMark(t, bucketClient, "user-1", block2, now.Add(-deletionDelay).Add(time.Hour)) // Block hasn't reached the deletion threshold yet. 82 createDeletionMark(t, bucketClient, "user-1", block3, now.Add(-deletionDelay).Add(-time.Hour)) // Block reached the deletion threshold. 83 createDeletionMark(t, bucketClient, "user-1", block4, now.Add(-deletionDelay).Add(time.Hour)) // Partial block hasn't reached the deletion threshold yet. 84 createDeletionMark(t, bucketClient, "user-1", block5, now.Add(-deletionDelay).Add(-time.Hour)) // Partial block reached the deletion threshold. 85 require.NoError(t, bucketClient.Delete(ctx, path.Join("user-1", "phlaredb", block6.String(), block.MetaFilename))) // Partial block without deletion mark. 86 createDeletionMark(t, bucketClient, "user-2", block7, now.Add(-deletionDelay).Add(-time.Hour)) // Block reached the deletion threshold. 87 88 // Blocks for user-3, marked for deletion. 89 require.NoError(t, bucket.WriteTenantDeletionMark(context.Background(), bucketClient, "user-3", nil, bucket.NewTenantDeletionMark(time.Now()))) 90 block9 := createDBBlock(t, bucketClient, "user-3", 10, 30, 2, nil) 91 block10 := createDBBlock(t, bucketClient, "user-3", 30, 50, 2, nil) 92 93 // User-4 with no more blocks, but couple of mark and debug files. Should be fully deleted. 94 user4Mark := bucket.NewTenantDeletionMark(time.Now()) 95 user4Mark.FinishedTime = time.Now().Unix() - 60 // Set to check final user cleanup. 96 require.NoError(t, bucket.WriteTenantDeletionMark(context.Background(), bucketClient, "user-4", nil, user4Mark)) 97 98 cfg := BlocksCleanerConfig{ 99 DeletionDelay: deletionDelay, 100 CleanupInterval: time.Minute, 101 CleanupConcurrency: options.concurrency, 102 TenantCleanupDelay: options.tenantDeletionDelay, 103 DeleteBlocksConcurrency: 1, 104 } 105 106 reg := prometheus.NewPedanticRegistry() 107 logger := log.NewNopLogger() 108 cfgProvider := newMockConfigProvider() 109 110 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg) 111 require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) 112 defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck 113 114 for _, tc := range []struct { 115 path string 116 expectedExists bool 117 }{ 118 // Check the storage to ensure only the block which has reached the deletion threshold 119 // has been effectively deleted. 120 {path: path.Join("user-1", "phlaredb/", block1.String(), block.MetaFilename), expectedExists: true}, 121 {path: path.Join("user-1", "phlaredb/", block3.String(), block.MetaFilename), expectedExists: false}, 122 {path: path.Join("user-2", "phlaredb/", block7.String(), block.MetaFilename), expectedExists: false}, 123 {path: path.Join("user-2", "phlaredb/", block8.String(), block.MetaFilename), expectedExists: true}, 124 // Should not delete a block with deletion mark who hasn't reached the deletion threshold yet. 125 {path: path.Join("user-1", "phlaredb/", block2.String(), block.MetaFilename), expectedExists: true}, 126 {path: path.Join("user-1", "phlaredb/", block.DeletionMarkFilepath(block2)), expectedExists: true}, 127 // Should delete a partial block with deletion mark who hasn't reached the deletion threshold yet. 128 {path: path.Join("user-1", "phlaredb/", block4.String(), block.DeletionMarkFilename), expectedExists: false}, 129 {path: path.Join("user-1", "phlaredb/", block.DeletionMarkFilepath(block4)), expectedExists: false}, 130 // Should delete a partial block with deletion mark who has reached the deletion threshold. 131 {path: path.Join("user-1", "phlaredb/", block5.String(), block.DeletionMarkFilename), expectedExists: false}, 132 {path: path.Join("user-1", "phlaredb/", block.DeletionMarkFilepath(block5)), expectedExists: false}, 133 // Should not delete a partial block without deletion mark. 134 {path: path.Join("user-1", "phlaredb/", block6.String(), block.IndexFilename), expectedExists: true}, 135 // Should completely delete blocks for user-3, marked for deletion 136 {path: path.Join("user-3", "phlaredb/", block9.String(), block.MetaFilename), expectedExists: false}, 137 {path: path.Join("user-3", "phlaredb/", block9.String(), block.IndexFilename), expectedExists: false}, 138 {path: path.Join("user-3", "phlaredb/", block10.String(), block.MetaFilename), expectedExists: false}, 139 {path: path.Join("user-3", "phlaredb/", block10.String(), block.IndexFilename), expectedExists: false}, 140 // Tenant deletion mark is not removed. 141 {path: path.Join("user-3", "phlaredb/", bucket.TenantDeletionMarkPath), expectedExists: true}, 142 // User-4 is removed fully. 143 {path: path.Join("user-4", "phlaredb/", bucket.TenantDeletionMarkPath), expectedExists: options.user4FilesExist}, 144 } { 145 exists, err := bucketClient.Exists(ctx, tc.path) 146 require.NoError(t, err) 147 assert.Equal(t, tc.expectedExists, exists, tc.path) 148 } 149 150 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) 151 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) 152 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) 153 assert.Equal(t, float64(6), testutil.ToFloat64(cleaner.blocksCleanedTotal)) 154 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal)) 155 156 // Check the updated bucket index. 157 for _, tc := range []struct { 158 userID string 159 expectedIndex bool 160 expectedBlocks []ulid.ULID 161 expectedMarks []ulid.ULID 162 }{ 163 { 164 userID: "user-1", 165 expectedIndex: true, 166 expectedBlocks: []ulid.ULID{block1, block2 /* deleted: block3, block4, block5, partial: block6 */}, 167 expectedMarks: []ulid.ULID{block2}, 168 }, { 169 userID: "user-2", 170 expectedIndex: true, 171 expectedBlocks: []ulid.ULID{block8}, 172 expectedMarks: []ulid.ULID{}, 173 }, { 174 userID: "user-3", 175 expectedIndex: false, 176 }, 177 } { 178 idx, err := bucketindex.ReadIndex(ctx, bucketClient, tc.userID, nil, logger) 179 if !tc.expectedIndex { 180 assert.Equal(t, bucketindex.ErrIndexNotFound, err) 181 continue 182 } 183 184 require.NoError(t, err) 185 assert.ElementsMatch(t, tc.expectedBlocks, idx.Blocks.GetULIDs()) 186 assert.ElementsMatch(t, tc.expectedMarks, idx.BlockDeletionMarks.GetULIDs()) 187 } 188 189 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 190 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 191 # TYPE pyroscope_bucket_blocks_count gauge 192 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2 193 pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 1 194 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 195 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 196 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 1 197 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 198 # HELP pyroscope_bucket_blocks_partials_count Total number of partial blocks. 199 # TYPE pyroscope_bucket_blocks_partials_count gauge 200 pyroscope_bucket_blocks_partials_count{user="user-1"} 2 201 pyroscope_bucket_blocks_partials_count{user="user-2"} 0 202 `), 203 "pyroscope_bucket_blocks_count", 204 "pyroscope_bucket_blocks_marked_for_deletion_count", 205 "pyroscope_bucket_blocks_partials_count", 206 )) 207 } 208 209 func TestBlocksCleaner_ShouldContinueOnBlockDeletionFailure(t *testing.T) { 210 const userID = "user-1" 211 212 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 213 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 214 215 // Create blocks. 216 ctx := context.Background() 217 now := time.Now() 218 deletionDelay := 12 * time.Hour 219 block1 := createDBBlock(t, bucketClient, userID, 10, 20, 2, nil) 220 block2 := createDBBlock(t, bucketClient, userID, 20, 30, 2, nil) 221 block3 := createDBBlock(t, bucketClient, userID, 30, 40, 2, nil) 222 block4 := createDBBlock(t, bucketClient, userID, 40, 50, 2, nil) 223 createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour)) 224 createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(-time.Hour)) 225 createDeletionMark(t, bucketClient, userID, block4, now.Add(-deletionDelay).Add(-time.Hour)) 226 227 // To emulate a failure deleting a block, we wrap the bucket client in a mocked one. 228 bucketClient = &mockBucketFailure{ 229 Bucket: bucketClient, 230 DeleteFailures: []string{path.Join(userID, "phlaredb/", block3.String(), block.MetaFilename)}, 231 } 232 233 cfg := BlocksCleanerConfig{ 234 DeletionDelay: deletionDelay, 235 CleanupInterval: time.Minute, 236 CleanupConcurrency: 1, 237 DeleteBlocksConcurrency: 1, 238 } 239 240 logger := log.NewNopLogger() 241 cfgProvider := newMockConfigProvider() 242 243 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, nil) 244 require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) 245 defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck 246 247 for _, tc := range []struct { 248 path string 249 expectedExists bool 250 }{ 251 {path: path.Join(userID, "phlaredb/", block1.String(), block.MetaFilename), expectedExists: true}, 252 {path: path.Join(userID, "phlaredb/", block2.String(), block.MetaFilename), expectedExists: false}, 253 {path: path.Join(userID, "phlaredb/", block3.String(), block.MetaFilename), expectedExists: true}, 254 {path: path.Join(userID, "phlaredb/", block4.String(), block.MetaFilename), expectedExists: false}, 255 } { 256 exists, err := bucketClient.Exists(ctx, tc.path) 257 require.NoError(t, err) 258 assert.Equal(t, tc.expectedExists, exists, tc.path) 259 } 260 261 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) 262 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) 263 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) 264 assert.Equal(t, float64(2), testutil.ToFloat64(cleaner.blocksCleanedTotal)) 265 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksFailedTotal)) 266 267 // Check the updated bucket index. 268 idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger) 269 require.NoError(t, err) 270 assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs()) 271 assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs()) 272 } 273 274 func TestBlocksCleaner_ShouldRebuildBucketIndexOnCorruptedOne(t *testing.T) { 275 const userID = "user-1" 276 277 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 278 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 279 280 // Create blocks. 281 ctx := context.Background() 282 now := time.Now() 283 deletionDelay := 12 * time.Hour 284 block1 := createDBBlock(t, bucketClient, userID, 10, 20, 2, nil) 285 block2 := createDBBlock(t, bucketClient, userID, 20, 30, 2, nil) 286 block3 := createDBBlock(t, bucketClient, userID, 30, 40, 2, nil) 287 createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour)) 288 createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(time.Hour)) 289 290 // Write a corrupted bucket index. 291 require.NoError(t, bucketClient.Upload(ctx, path.Join(userID, "phlaredb/", bucketindex.IndexCompressedFilename), strings.NewReader("invalid!}"))) 292 293 cfg := BlocksCleanerConfig{ 294 DeletionDelay: deletionDelay, 295 CleanupInterval: time.Minute, 296 CleanupConcurrency: 1, 297 DeleteBlocksConcurrency: 1, 298 } 299 300 logger := log.NewNopLogger() 301 cfgProvider := newMockConfigProvider() 302 303 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, nil) 304 require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) 305 defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck 306 307 for _, tc := range []struct { 308 path string 309 expectedExists bool 310 }{ 311 {path: path.Join(userID, "phlaredb/", block1.String(), block.MetaFilename), expectedExists: true}, 312 {path: path.Join(userID, "phlaredb/", block2.String(), block.MetaFilename), expectedExists: false}, 313 {path: path.Join(userID, "phlaredb/", block3.String(), block.MetaFilename), expectedExists: true}, 314 } { 315 exists, err := bucketClient.Exists(ctx, tc.path) 316 require.NoError(t, err) 317 assert.Equal(t, tc.expectedExists, exists, tc.path) 318 } 319 320 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) 321 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) 322 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) 323 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksCleanedTotal)) 324 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal)) 325 326 // Check the updated bucket index. 327 idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger) 328 require.NoError(t, err) 329 assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs()) 330 assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs()) 331 } 332 333 func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShard(t *testing.T) { 334 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 335 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 336 337 // Create blocks. 338 createDBBlock(t, bucketClient, "user-1", 10, 20, 2, nil) 339 createDBBlock(t, bucketClient, "user-1", 20, 30, 2, nil) 340 createDBBlock(t, bucketClient, "user-2", 30, 40, 2, nil) 341 342 cfg := BlocksCleanerConfig{ 343 DeletionDelay: time.Hour, 344 CleanupInterval: time.Minute, 345 CleanupConcurrency: 1, 346 DeleteBlocksConcurrency: 1, 347 } 348 349 ctx := context.Background() 350 logger := log.NewNopLogger() 351 reg := prometheus.NewPedanticRegistry() 352 cfgProvider := newMockConfigProvider() 353 354 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg) 355 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 356 357 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 358 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 359 # TYPE pyroscope_bucket_blocks_count gauge 360 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2 361 pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 1 362 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 363 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 364 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 365 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 366 # HELP pyroscope_bucket_blocks_partials_count Total number of partial blocks. 367 # TYPE pyroscope_bucket_blocks_partials_count gauge 368 pyroscope_bucket_blocks_partials_count{user="user-1"} 0 369 pyroscope_bucket_blocks_partials_count{user="user-2"} 0 370 `), 371 "pyroscope_bucket_blocks_count", 372 "pyroscope_bucket_blocks_marked_for_deletion_count", 373 "pyroscope_bucket_blocks_partials_count", 374 )) 375 376 // Override the users scanner to reconfigure it to only return a subset of users. 377 cleaner.tenantsScanner = bucket.NewTenantsScanner(bucketClient, func(userID string) (bool, error) { return userID == "user-1", nil }, logger) 378 379 // Create new blocks, to double check expected metrics have changed. 380 createDBBlock(t, bucketClient, "user-1", 40, 50, 2, nil) 381 createDBBlock(t, bucketClient, "user-2", 50, 60, 2, nil) 382 383 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 384 385 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 386 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 387 # TYPE pyroscope_bucket_blocks_count gauge 388 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 3 389 pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 1 390 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 391 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 392 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 393 # HELP pyroscope_bucket_blocks_partials_count Total number of partial blocks. 394 # TYPE pyroscope_bucket_blocks_partials_count gauge 395 pyroscope_bucket_blocks_partials_count{user="user-1"} 0 396 `), 397 "pyroscope_bucket_blocks_count", 398 "pyroscope_bucket_blocks_marked_for_deletion_count", 399 "pyroscope_bucket_blocks_partials_count", 400 )) 401 } 402 403 func TestBlocksCleaner_ShouldNotCleanupUserThatDoesntBelongToShardAnymore(t *testing.T) { 404 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 405 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 406 407 // Create blocks. 408 createDBBlock(t, bucketClient, "user-1", 10, 20, 2, nil) 409 createDBBlock(t, bucketClient, "user-2", 20, 30, 2, nil) 410 411 cfg := BlocksCleanerConfig{ 412 DeletionDelay: time.Hour, 413 CleanupInterval: time.Minute, 414 CleanupConcurrency: 1, 415 DeleteBlocksConcurrency: 1, 416 } 417 418 ctx := context.Background() 419 logger := log.NewNopLogger() 420 reg := prometheus.NewPedanticRegistry() 421 cfgProvider := newMockConfigProvider() 422 423 // We will simulate change of "ownUser" by counting number of replies per user. First reply will be "true", 424 // all subsequent replies will be false. 425 426 userSeen := map[string]bool{} 427 ownUser := func(user string) (bool, error) { 428 if userSeen[user] { 429 return false, nil 430 } 431 userSeen[user] = true 432 return true, nil 433 } 434 435 cleaner := NewBlocksCleaner(cfg, bucketClient, ownUser, cfgProvider, logger, reg) 436 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 437 438 // Verify that we have seen the users 439 require.ElementsMatch(t, []string{"user-1", "user-2"}, cleaner.lastOwnedUsers) 440 441 // But there are no metrics for any user, because we did not in fact clean them. 442 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 443 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 444 # TYPE pyroscope_bucket_blocks_count gauge 445 `), 446 "pyroscope_bucket_blocks_count", 447 )) 448 449 // Running cleanUsers again will see that users are no longer owned. 450 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 451 require.ElementsMatch(t, []string{}, cleaner.lastOwnedUsers) 452 } 453 454 func TestBlocksCleaner_ListBlocksOutsideRetentionPeriod(t *testing.T) { 455 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 456 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 457 ctx := context.Background() 458 logger := log.NewNopLogger() 459 460 id1 := createDBBlock(t, bucketClient, "user-1", 5000, 6000, 2, nil) 461 id2 := createDBBlock(t, bucketClient, "user-1", 6000, 7000, 2, nil) 462 id3 := createDBBlock(t, bucketClient, "user-1", 7000, 8000, 2, nil) 463 464 w := bucketindex.NewUpdater(bucketClient, "user-1", nil, logger) 465 idx, _, err := w.UpdateIndex(ctx, nil) 466 require.NoError(t, err) 467 468 assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, idx.Blocks.GetULIDs()) 469 470 // Excessive retention period (wrapping epoch) 471 result := listBlocksOutsideRetentionPeriod(idx, time.Unix(10, 0).Add(-time.Hour)) 472 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 473 474 // Normal operation - varying retention period. 475 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(6, 0)) 476 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 477 478 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0)) 479 assert.ElementsMatch(t, []ulid.ULID{id1}, result.GetULIDs()) 480 481 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0)) 482 assert.ElementsMatch(t, []ulid.ULID{id1, id2}, result.GetULIDs()) 483 484 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0)) 485 assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, result.GetULIDs()) 486 487 // Avoiding redundant marking - blocks already marked for deletion. 488 489 mark1 := &bucketindex.BlockDeletionMark{ID: id1} 490 mark2 := &bucketindex.BlockDeletionMark{ID: id2} 491 492 idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1} 493 494 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0)) 495 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 496 497 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0)) 498 assert.ElementsMatch(t, []ulid.ULID{id2}, result.GetULIDs()) 499 500 idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1, mark2} 501 502 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0)) 503 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 504 505 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0)) 506 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 507 508 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0)) 509 assert.ElementsMatch(t, []ulid.ULID{id3}, result.GetULIDs()) 510 } 511 512 func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { 513 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 514 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 515 516 ts := func(hours int) int64 { 517 return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000 518 } 519 520 block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil) 521 block2 := createDBBlock(t, bucketClient, "user-1", ts(-8), ts(-6), 2, nil) 522 block3 := createDBBlock(t, bucketClient, "user-2", ts(-10), ts(-8), 2, nil) 523 block4 := createDBBlock(t, bucketClient, "user-2", ts(-8), ts(-6), 2, nil) 524 525 cfg := BlocksCleanerConfig{ 526 DeletionDelay: time.Hour, 527 CleanupInterval: time.Minute, 528 CleanupConcurrency: 1, 529 DeleteBlocksConcurrency: 1, 530 } 531 532 ctx := context.Background() 533 logger := test.NewTestingLogger(t) 534 reg := prometheus.NewPedanticRegistry() 535 cfgProvider := newMockConfigProvider() 536 537 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg) 538 539 assertBlockExists := func(user string, blockID ulid.ULID, expectExists bool) { 540 exists, err := bucketClient.Exists(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename)) 541 require.NoError(t, err) 542 assert.Equal(t, expectExists, exists) 543 } 544 545 // Existing behaviour - retention period disabled. 546 { 547 cfgProvider.userRetentionPeriods["user-1"] = 0 548 cfgProvider.userRetentionPeriods["user-2"] = 0 549 550 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 551 assertBlockExists("user-1", block1, true) 552 assertBlockExists("user-1", block2, true) 553 assertBlockExists("user-2", block3, true) 554 assertBlockExists("user-2", block4, true) 555 556 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 557 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 558 # TYPE pyroscope_bucket_blocks_count gauge 559 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2 560 pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 2 561 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 562 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 563 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 564 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 565 # HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 566 # TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter 567 pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0 568 pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 569 `), 570 "pyroscope_bucket_blocks_count", 571 "pyroscope_bucket_blocks_marked_for_deletion_count", 572 "pyroscope_compactor_blocks_marked_for_deletion_total", 573 )) 574 } 575 576 // Retention enabled only for a single user, but does nothing. 577 { 578 cfgProvider.userRetentionPeriods["user-1"] = 9 * time.Hour 579 580 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 581 assertBlockExists("user-1", block1, true) 582 assertBlockExists("user-1", block2, true) 583 assertBlockExists("user-2", block3, true) 584 assertBlockExists("user-2", block4, true) 585 } 586 587 // Retention enabled only for a single user, marking a single block. 588 // Note the block won't be deleted yet due to deletion delay. 589 { 590 cfgProvider.userRetentionPeriods["user-1"] = 7 * time.Hour 591 592 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 593 assertBlockExists("user-1", block1, true) 594 assertBlockExists("user-1", block2, true) 595 assertBlockExists("user-2", block3, true) 596 assertBlockExists("user-2", block4, true) 597 598 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 599 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 600 # TYPE pyroscope_bucket_blocks_count gauge 601 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 2 602 pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 2 603 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 604 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 605 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 1 606 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 607 # HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 608 # TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter 609 pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0 610 pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 1 611 `), 612 "pyroscope_bucket_blocks_count", 613 "pyroscope_bucket_blocks_marked_for_deletion_count", 614 "pyroscope_compactor_blocks_marked_for_deletion_total", 615 )) 616 } 617 618 // Marking the block again, before the deletion occurs, should not cause an error. 619 { 620 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 621 assertBlockExists("user-1", block1, true) 622 assertBlockExists("user-1", block2, true) 623 assertBlockExists("user-2", block3, true) 624 assertBlockExists("user-2", block4, true) 625 } 626 627 // Reduce the deletion delay. Now the block will be deleted. 628 { 629 cleaner.cfg.DeletionDelay = 0 630 631 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 632 assertBlockExists("user-1", block1, false) 633 assertBlockExists("user-1", block2, true) 634 assertBlockExists("user-2", block3, true) 635 assertBlockExists("user-2", block4, true) 636 637 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 638 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 639 # TYPE pyroscope_bucket_blocks_count gauge 640 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 1 641 pyroscope_bucket_blocks_count{compaction_level="1", user="user-2"} 2 642 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 643 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 644 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 645 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 646 # HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 647 # TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter 648 pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0 649 pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 1 650 `), 651 "pyroscope_bucket_blocks_count", 652 "pyroscope_bucket_blocks_marked_for_deletion_count", 653 "pyroscope_compactor_blocks_marked_for_deletion_total", 654 )) 655 } 656 657 // Retention enabled for other user; test deleting multiple blocks. 658 { 659 cfgProvider.userRetentionPeriods["user-2"] = 5 * time.Hour 660 661 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 662 assertBlockExists("user-1", block1, false) 663 assertBlockExists("user-1", block2, true) 664 assertBlockExists("user-2", block3, false) 665 assertBlockExists("user-2", block4, false) 666 667 assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 668 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 669 # TYPE pyroscope_bucket_blocks_count gauge 670 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 1 671 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 672 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 673 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 674 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 675 # HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 676 # TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter 677 pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0 678 pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 3 679 `), 680 "pyroscope_bucket_blocks_count", 681 "pyroscope_bucket_blocks_marked_for_deletion_count", 682 "pyroscope_compactor_blocks_marked_for_deletion_total", 683 )) 684 } 685 } 686 687 func checkBlock(t *testing.T, user string, bucketClient objstore.Bucket, blockID ulid.ULID, metaJSONExists bool, markedForDeletion bool) { 688 exists, err := bucketClient.Exists(context.Background(), path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename)) 689 require.NoError(t, err) 690 require.Equal(t, metaJSONExists, exists) 691 692 exists, err = bucketClient.Exists(context.Background(), path.Join(user, "phlaredb/", blockID.String(), block.DeletionMarkFilename)) 693 require.NoError(t, err) 694 require.Equal(t, markedForDeletion, exists) 695 } 696 697 func TestBlocksCleaner_ShouldCleanUpFilesWhenNoMoreBlocksRemain(t *testing.T) { 698 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 699 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 700 701 const userID = "user-1" 702 ctx := context.Background() 703 now := time.Now() 704 deletionDelay := 12 * time.Hour 705 706 // Create two blocks and mark them for deletion at a time before the deletionDelay 707 block1 := createDBBlock(t, bucketClient, userID, 10, 20, 2, nil) 708 block2 := createDBBlock(t, bucketClient, userID, 20, 30, 2, nil) 709 710 createDeletionMark(t, bucketClient, userID, block1, now.Add(-deletionDelay).Add(-time.Hour)) 711 createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour)) 712 713 checkBlock(t, "user-1", bucketClient, block1, true, true) 714 checkBlock(t, "user-1", bucketClient, block2, true, true) 715 716 // Create a deletion mark within the deletionDelay period that won't correspond to any block 717 randomULID := ulid.MustNew(ulid.Now(), rand.Reader) 718 createDeletionMark(t, bucketClient, userID, randomULID, now.Add(-deletionDelay).Add(time.Hour)) 719 blockDeletionMarkFile := path.Join(userID, "phlaredb/", block.DeletionMarkFilepath(randomULID)) 720 exists, err := bucketClient.Exists(ctx, blockDeletionMarkFile) 721 require.NoError(t, err) 722 assert.True(t, exists) 723 724 cfg := BlocksCleanerConfig{ 725 DeletionDelay: deletionDelay, 726 CleanupInterval: time.Minute, 727 CleanupConcurrency: 1, 728 DeleteBlocksConcurrency: 1, 729 NoBlocksFileCleanupEnabled: true, 730 } 731 732 logger := test.NewTestingLogger(t) 733 reg := prometheus.NewPedanticRegistry() 734 cfgProvider := newMockConfigProvider() 735 736 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg) 737 require.NoError(t, cleaner.runCleanupWithErr(ctx)) 738 739 // Check bucket index, markers and debug files have been deleted. 740 exists, err = bucketClient.Exists(ctx, blockDeletionMarkFile) 741 require.NoError(t, err) 742 assert.False(t, exists) 743 744 _, err = bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger) 745 require.ErrorIs(t, err, bucketindex.ErrIndexNotFound) 746 } 747 748 func TestBlocksCleaner_ShouldRemovePartialBlocksOutsideDelayPeriod(t *testing.T) { 749 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 750 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 751 752 ts := func(hours int) int64 { 753 return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000 754 } 755 756 block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil) 757 block2 := createDBBlock(t, bucketClient, "user-1", ts(-8), ts(-6), 2, nil) 758 759 cfg := BlocksCleanerConfig{ 760 DeletionDelay: time.Hour, 761 CleanupInterval: time.Minute, 762 CleanupConcurrency: 1, 763 DeleteBlocksConcurrency: 1, 764 } 765 766 ctx := context.Background() 767 logger := test.NewTestingLogger(t) 768 reg := prometheus.NewPedanticRegistry() 769 cfgProvider := newMockConfigProvider() 770 771 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg) 772 773 makeBlockPartial := func(user string, blockID ulid.ULID) { 774 err := bucketClient.Delete(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename)) 775 require.NoError(t, err) 776 } 777 778 checkBlock(t, "user-1", bucketClient, block1, true, false) 779 checkBlock(t, "user-1", bucketClient, block2, true, false) 780 makeBlockPartial("user-1", block1) 781 checkBlock(t, "user-1", bucketClient, block1, false, false) 782 checkBlock(t, "user-1", bucketClient, block2, true, false) 783 784 require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger)) 785 786 // check that no blocks were marked for deletion, because deletion delay is set to 0. 787 checkBlock(t, "user-1", bucketClient, block1, false, false) 788 checkBlock(t, "user-1", bucketClient, block2, true, false) 789 790 // Test that partial block does get marked for deletion 791 // The delay time must be very short since these temporary files were just created 792 cfgProvider.userPartialBlockDelay["user-1"] = 1 * time.Nanosecond 793 794 require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger)) 795 796 // check that first block was marked for deletion (partial block updated far in the past), but not the second one, because it's not partial. 797 checkBlock(t, "user-1", bucketClient, block1, false, true) 798 checkBlock(t, "user-1", bucketClient, block2, true, false) 799 800 require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 801 # HELP pyroscope_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 802 # TYPE pyroscope_bucket_blocks_count gauge 803 pyroscope_bucket_blocks_count{compaction_level="1", user="user-1"} 1 804 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 805 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 806 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 807 # HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 808 # TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter 809 pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 1 810 pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 811 `), 812 "pyroscope_bucket_blocks_count", 813 "pyroscope_bucket_blocks_marked_for_deletion_count", 814 "pyroscope_compactor_blocks_marked_for_deletion_total", 815 )) 816 } 817 818 func TestBlocksCleaner_ShouldNotRemovePartialBlocksInsideDelayPeriod(t *testing.T) { 819 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 820 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 821 822 ts := func(hours int) int64 { 823 return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000 824 } 825 826 block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil) 827 block2 := createDBBlock(t, bucketClient, "user-2", ts(-8), ts(-6), 2, nil) 828 829 cfg := BlocksCleanerConfig{ 830 DeletionDelay: time.Hour, 831 CleanupInterval: time.Minute, 832 CleanupConcurrency: 1, 833 DeleteBlocksConcurrency: 1, 834 } 835 836 ctx := context.Background() 837 logger := test.NewTestingLogger(t) 838 reg := prometheus.NewPedanticRegistry() 839 cfgProvider := newMockConfigProvider() 840 841 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg) 842 843 makeBlockPartial := func(user string, blockID ulid.ULID) { 844 err := bucketClient.Delete(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename)) 845 require.NoError(t, err) 846 } 847 848 corruptMeta := func(user string, blockID ulid.ULID) { 849 err := bucketClient.Upload(ctx, path.Join(user, "phlaredb/", blockID.String(), block.MetaFilename), strings.NewReader("corrupted file contents")) 850 require.NoError(t, err) 851 } 852 853 checkBlock(t, "user-1", bucketClient, block1, true, false) 854 checkBlock(t, "user-2", bucketClient, block2, true, false) 855 856 makeBlockPartial("user-1", block1) 857 corruptMeta("user-2", block2) 858 859 checkBlock(t, "user-1", bucketClient, block1, false, false) 860 checkBlock(t, "user-2", bucketClient, block2, true, false) 861 862 // Set partial block delay such that block will not be marked for deletion 863 // The comparison is based on inode modification time, so anything more than very recent (< 1 second) won't be 864 // out of range 865 cfgProvider.userPartialBlockDelay["user-1"] = 1 * time.Hour 866 cfgProvider.userPartialBlockDelay["user-2"] = 1 * time.Nanosecond 867 868 require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger)) 869 checkBlock(t, "user-1", bucketClient, block1, false, false) // This block was updated too recently, so we don't mark it for deletion just yet. 870 checkBlock(t, "user-2", bucketClient, block2, true, false) // No change for user-2. 871 872 require.NoError(t, cleaner.cleanUser(ctx, "user-2", logger)) 873 checkBlock(t, "user-1", bucketClient, block1, false, false) // No change for user-1 874 checkBlock(t, "user-2", bucketClient, block2, true, false) // Block with corrupted meta is NOT marked for deletion. 875 876 // The pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} counter should be zero since for user-1 877 // the time since modification is shorter than the delay, and for user-2, the metadata is corrupted but the file 878 // is still present in the bucket so the block is not partial 879 require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 880 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 881 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 882 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 883 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 884 # HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 885 # TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter 886 pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0 887 pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 888 `), 889 "pyroscope_bucket_blocks_count", 890 "pyroscope_bucket_blocks_marked_for_deletion_count", 891 "pyroscope_compactor_blocks_marked_for_deletion_total", 892 )) 893 } 894 895 func TestBlocksCleaner_ShouldNotRemovePartialBlocksIfConfiguredDelayIsInvalid(t *testing.T) { 896 ctx := context.Background() 897 reg := prometheus.NewPedanticRegistry() 898 logs := &concurrency.SyncBuffer{} 899 logger := log.NewLogfmtLogger(logs) 900 901 bucketClient, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), t.TempDir()) 902 bucketClient = block.BucketWithGlobalMarkers(bucketClient) 903 904 ts := func(hours int) int64 { 905 return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000 906 } 907 908 // Create a partial block. 909 block1 := createDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), 2, nil) 910 err := bucketClient.Delete(ctx, path.Join("user-1", "phlaredb/", block1.String(), block.MetaFilename)) 911 require.NoError(t, err) 912 913 cfg := BlocksCleanerConfig{ 914 DeletionDelay: time.Hour, 915 CleanupInterval: time.Minute, 916 CleanupConcurrency: 1, 917 DeleteBlocksConcurrency: 1, 918 } 919 920 // Configure an invalid delay. 921 cfgProvider := newMockConfigProvider() 922 cfgProvider.userPartialBlockDelay["user-1"] = 0 923 cfgProvider.userPartialBlockDelayInvalid["user-1"] = true 924 925 // Pre-condition check: block should be partial and not being marked for deletion. 926 checkBlock(t, "user-1", bucketClient, block1, false, false) 927 928 // Run the cleanup. 929 cleaner := NewBlocksCleaner(cfg, bucketClient, bucket.AllTenants, cfgProvider, logger, reg) 930 require.NoError(t, cleaner.cleanUser(ctx, "user-1", logger)) 931 932 // Ensure the block has NOT been marked for deletion. 933 checkBlock(t, "user-1", bucketClient, block1, false, false) 934 assert.Contains(t, logs.String(), "partial blocks deletion has been disabled for tenant because the delay has been set lower than the minimum value allowed") 935 936 require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 937 # HELP pyroscope_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 938 # TYPE pyroscope_bucket_blocks_marked_for_deletion_count gauge 939 pyroscope_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 940 # HELP pyroscope_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 941 # TYPE pyroscope_compactor_blocks_marked_for_deletion_total counter 942 pyroscope_compactor_blocks_marked_for_deletion_total{reason="partial"} 0 943 pyroscope_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 944 `), 945 "pyroscope_bucket_blocks_count", 946 "pyroscope_bucket_blocks_marked_for_deletion_count", 947 "pyroscope_compactor_blocks_marked_for_deletion_total", 948 )) 949 } 950 951 func TestStalePartialBlockLastModifiedTime(t *testing.T) { 952 dir := t.TempDir() 953 b, _ := objstore_testutil.NewFilesystemBucket(t, context.Background(), dir) 954 955 const tenantId = "user" 956 957 objectTime := time.Now().Add(-1 * time.Hour).Truncate(time.Second) // ignore milliseconds, as not all filesystems store them. 958 blockID := createDBBlock(t, b, tenantId, objectTime.UnixMilli(), time.Now().UnixMilli(), 2, nil) 959 err := filepath.Walk(filepath.Join(dir, tenantId, "phlaredb/", blockID.String()), func(path string, info os.FileInfo, err error) error { 960 require.NoError(t, err) 961 require.NoError(t, os.Chtimes(path, objectTime, objectTime)) 962 return nil 963 }) 964 require.NoError(t, err) 965 966 userBucket := objstore.NewTenantBucketClient(tenantId, b, nil) 967 968 emptyBlockID := ulid.ULID{} 969 require.NotEqual(t, blockID, emptyBlockID) 970 empty := true 971 err = userBucket.Iter(context.Background(), emptyBlockID.String(), func(_ string) error { 972 empty = false 973 return nil 974 }) 975 require.NoError(t, err) 976 require.True(t, empty) 977 978 testCases := []struct { 979 name string 980 blockID ulid.ULID 981 cutoff time.Time 982 expectedLastModified time.Time 983 }{ 984 {name: "no objects", blockID: emptyBlockID, cutoff: objectTime, expectedLastModified: time.Time{}}, 985 {name: "objects newer than delay cutoff", blockID: blockID, cutoff: objectTime.Add(-1 * time.Second), expectedLastModified: time.Time{}}, 986 {name: "objects equal to delay cutoff", blockID: blockID, cutoff: objectTime, expectedLastModified: objectTime}, 987 {name: "objects older than delay cutoff", blockID: blockID, cutoff: objectTime.Add(1 * time.Second), expectedLastModified: objectTime}, 988 } 989 990 for _, tc := range testCases { 991 t.Run(tc.name, func(t *testing.T) { 992 lastModified, err := stalePartialBlockLastModifiedTime(context.Background(), tc.blockID, userBucket, tc.cutoff) 993 require.NoError(t, err) 994 require.Equal(t, tc.expectedLastModified, lastModified) 995 }) 996 } 997 } 998 999 type mockBucketFailure struct { 1000 objstore.Bucket 1001 1002 DeleteFailures []string 1003 } 1004 1005 func (m *mockBucketFailure) Delete(ctx context.Context, name string) error { 1006 if util.StringsContain(m.DeleteFailures, name) { 1007 return errors.New("mocked delete failure") 1008 } 1009 return m.Bucket.Delete(ctx, name) 1010 } 1011 1012 type mockConfigProvider struct { 1013 userRetentionPeriods map[string]time.Duration 1014 splitAndMergeShards map[string]int 1015 instancesShardSize map[string]int 1016 splitGroups map[string]int 1017 splitAndMergeStageSize map[string]int 1018 blockUploadEnabled map[string]bool 1019 blockUploadValidationEnabled map[string]bool 1020 blockUploadMaxBlockSizeBytes map[string]int64 1021 userPartialBlockDelay map[string]time.Duration 1022 userPartialBlockDelayInvalid map[string]bool 1023 verifyChunks map[string]bool 1024 downsamplerEnabled map[string]bool 1025 } 1026 1027 func newMockConfigProvider() *mockConfigProvider { 1028 return &mockConfigProvider{ 1029 userRetentionPeriods: make(map[string]time.Duration), 1030 splitAndMergeShards: make(map[string]int), 1031 splitGroups: make(map[string]int), 1032 splitAndMergeStageSize: make(map[string]int), 1033 blockUploadEnabled: make(map[string]bool), 1034 blockUploadValidationEnabled: make(map[string]bool), 1035 blockUploadMaxBlockSizeBytes: make(map[string]int64), 1036 userPartialBlockDelay: make(map[string]time.Duration), 1037 userPartialBlockDelayInvalid: make(map[string]bool), 1038 verifyChunks: make(map[string]bool), 1039 downsamplerEnabled: make(map[string]bool), 1040 } 1041 } 1042 1043 func (m *mockConfigProvider) CompactorBlocksRetentionPeriod(user string) time.Duration { 1044 if result, ok := m.userRetentionPeriods[user]; ok { 1045 return result 1046 } 1047 return 0 1048 } 1049 1050 func (m *mockConfigProvider) CompactorSplitAndMergeShards(user string) int { 1051 if result, ok := m.splitAndMergeShards[user]; ok { 1052 return result 1053 } 1054 return 0 1055 } 1056 1057 func (m *mockConfigProvider) CompactorSplitAndMergeStageSize(user string) int { 1058 if result, ok := m.splitAndMergeStageSize[user]; ok { 1059 return result 1060 } 1061 return 0 1062 } 1063 1064 func (m *mockConfigProvider) CompactorSplitGroups(user string) int { 1065 if result, ok := m.splitGroups[user]; ok { 1066 return result 1067 } 1068 return 0 1069 } 1070 1071 func (m *mockConfigProvider) CompactorTenantShardSize(user string) int { 1072 if result, ok := m.instancesShardSize[user]; ok { 1073 return result 1074 } 1075 return 0 1076 } 1077 1078 func (m *mockConfigProvider) CompactorBlockUploadEnabled(tenantID string) bool { 1079 return m.blockUploadEnabled[tenantID] 1080 } 1081 1082 func (m *mockConfigProvider) CompactorBlockUploadValidationEnabled(tenantID string) bool { 1083 return m.blockUploadValidationEnabled[tenantID] 1084 } 1085 1086 func (m *mockConfigProvider) CompactorPartialBlockDeletionDelay(user string) (time.Duration, bool) { 1087 return m.userPartialBlockDelay[user], !m.userPartialBlockDelayInvalid[user] 1088 } 1089 1090 func (m *mockConfigProvider) CompactorBlockUploadVerifyChunks(tenantID string) bool { 1091 return m.verifyChunks[tenantID] 1092 } 1093 1094 func (m *mockConfigProvider) CompactorBlockUploadMaxBlockSizeBytes(user string) int64 { 1095 return m.blockUploadMaxBlockSizeBytes[user] 1096 } 1097 1098 func (m *mockConfigProvider) CompactorDownsamplerEnabled(user string) bool { 1099 return m.downsamplerEnabled[user] 1100 } 1101 1102 func (m *mockConfigProvider) S3SSEType(string) string { 1103 return "" 1104 } 1105 1106 func (m *mockConfigProvider) S3SSEKMSKeyID(string) string { 1107 return "" 1108 } 1109 1110 func (m *mockConfigProvider) S3SSEKMSEncryptionContext(string) string { 1111 return "" 1112 } 1113 1114 func (c *BlocksCleaner) runCleanupWithErr(ctx context.Context) error { 1115 allUsers, isDeleted, err := c.refreshOwnedUsers(ctx) 1116 if err != nil { 1117 return err 1118 } 1119 1120 return c.cleanUsers(ctx, allUsers, isDeleted, log.NewNopLogger()) 1121 }