github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/compactor/blocks_cleaner_test.go (about) 1 package compactor 2 3 import ( 4 "context" 5 "crypto/rand" 6 "errors" 7 "fmt" 8 "path" 9 "strings" 10 "testing" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/grafana/dskit/services" 15 "github.com/oklog/ulid" 16 "github.com/prometheus/client_golang/prometheus" 17 "github.com/prometheus/client_golang/prometheus/testutil" 18 prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/thanos-io/thanos/pkg/block" 22 "github.com/thanos-io/thanos/pkg/block/metadata" 23 "github.com/thanos-io/thanos/pkg/objstore" 24 25 "github.com/cortexproject/cortex/pkg/storage/tsdb" 26 "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" 27 cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" 28 "github.com/cortexproject/cortex/pkg/util" 29 ) 30 31 type testBlocksCleanerOptions struct { 32 concurrency int 33 markersMigrationEnabled bool 34 tenantDeletionDelay time.Duration 35 user4FilesExist bool // User 4 has "FinishedTime" in tenant deletion marker set to "1h" ago. 36 } 37 38 func (o testBlocksCleanerOptions) String() string { 39 return fmt.Sprintf("concurrency=%d, markers migration enabled=%v, tenant deletion delay=%v", 40 o.concurrency, o.markersMigrationEnabled, o.tenantDeletionDelay) 41 } 42 43 func TestBlocksCleaner(t *testing.T) { 44 for _, options := range []testBlocksCleanerOptions{ 45 {concurrency: 1, tenantDeletionDelay: 0, user4FilesExist: false}, 46 {concurrency: 1, tenantDeletionDelay: 2 * time.Hour, user4FilesExist: true}, 47 {concurrency: 1, markersMigrationEnabled: true}, 48 {concurrency: 2}, 49 {concurrency: 10}, 50 } { 51 options := options 52 53 t.Run(options.String(), func(t *testing.T) { 54 t.Parallel() 55 testBlocksCleanerWithOptions(t, options) 56 }) 57 } 58 } 59 60 func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions) { 61 bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t) 62 63 // If the markers migration is enabled, then we create the fixture blocks without 64 // writing the deletion marks in the global location, because they will be migrated 65 // at statup. 66 if !options.markersMigrationEnabled { 67 bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) 68 } 69 70 // Create blocks. 71 ctx := context.Background() 72 now := time.Now() 73 deletionDelay := 12 * time.Hour 74 block1 := createTSDBBlock(t, bucketClient, "user-1", 10, 20, nil) 75 block2 := createTSDBBlock(t, bucketClient, "user-1", 20, 30, nil) 76 block3 := createTSDBBlock(t, bucketClient, "user-1", 30, 40, nil) 77 block4 := ulid.MustNew(4, rand.Reader) 78 block5 := ulid.MustNew(5, rand.Reader) 79 block6 := createTSDBBlock(t, bucketClient, "user-1", 40, 50, nil) 80 block7 := createTSDBBlock(t, bucketClient, "user-2", 10, 20, nil) 81 block8 := createTSDBBlock(t, bucketClient, "user-2", 40, 50, nil) 82 createDeletionMark(t, bucketClient, "user-1", block2, now.Add(-deletionDelay).Add(time.Hour)) // Block hasn't reached the deletion threshold yet. 83 createDeletionMark(t, bucketClient, "user-1", block3, now.Add(-deletionDelay).Add(-time.Hour)) // Block reached the deletion threshold. 84 createDeletionMark(t, bucketClient, "user-1", block4, now.Add(-deletionDelay).Add(time.Hour)) // Partial block hasn't reached the deletion threshold yet. 85 createDeletionMark(t, bucketClient, "user-1", block5, now.Add(-deletionDelay).Add(-time.Hour)) // Partial block reached the deletion threshold. 86 require.NoError(t, bucketClient.Delete(ctx, path.Join("user-1", block6.String(), metadata.MetaFilename))) // Partial block without deletion mark. 87 createDeletionMark(t, bucketClient, "user-2", block7, now.Add(-deletionDelay).Add(-time.Hour)) // Block reached the deletion threshold. 88 89 // Blocks for user-3, marked for deletion. 90 require.NoError(t, tsdb.WriteTenantDeletionMark(context.Background(), bucketClient, "user-3", nil, tsdb.NewTenantDeletionMark(time.Now()))) 91 block9 := createTSDBBlock(t, bucketClient, "user-3", 10, 30, nil) 92 block10 := createTSDBBlock(t, bucketClient, "user-3", 30, 50, nil) 93 94 // User-4 with no more blocks, but couple of mark and debug files. Should be fully deleted. 95 user4Mark := tsdb.NewTenantDeletionMark(time.Now()) 96 user4Mark.FinishedTime = time.Now().Unix() - 60 // Set to check final user cleanup. 97 require.NoError(t, tsdb.WriteTenantDeletionMark(context.Background(), bucketClient, "user-4", nil, user4Mark)) 98 user4DebugMetaFile := path.Join("user-4", block.DebugMetas, "meta.json") 99 require.NoError(t, bucketClient.Upload(context.Background(), user4DebugMetaFile, strings.NewReader("some random content here"))) 100 101 // The fixtures have been created. If the bucket client wasn't wrapped to write 102 // deletion marks to the global location too, then this is the right time to do it. 103 if options.markersMigrationEnabled { 104 bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) 105 } 106 107 cfg := BlocksCleanerConfig{ 108 DeletionDelay: deletionDelay, 109 CleanupInterval: time.Minute, 110 CleanupConcurrency: options.concurrency, 111 BlockDeletionMarksMigrationEnabled: options.markersMigrationEnabled, 112 TenantCleanupDelay: options.tenantDeletionDelay, 113 } 114 115 reg := prometheus.NewPedanticRegistry() 116 logger := log.NewNopLogger() 117 scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) 118 cfgProvider := newMockConfigProvider() 119 120 cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg) 121 require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) 122 defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck 123 124 for _, tc := range []struct { 125 path string 126 expectedExists bool 127 }{ 128 // Check the storage to ensure only the block which has reached the deletion threshold 129 // has been effectively deleted. 130 {path: path.Join("user-1", block1.String(), metadata.MetaFilename), expectedExists: true}, 131 {path: path.Join("user-1", block3.String(), metadata.MetaFilename), expectedExists: false}, 132 {path: path.Join("user-2", block7.String(), metadata.MetaFilename), expectedExists: false}, 133 {path: path.Join("user-2", block8.String(), metadata.MetaFilename), expectedExists: true}, 134 // Should not delete a block with deletion mark who hasn't reached the deletion threshold yet. 135 {path: path.Join("user-1", block2.String(), metadata.MetaFilename), expectedExists: true}, 136 {path: path.Join("user-1", bucketindex.BlockDeletionMarkFilepath(block2)), expectedExists: true}, 137 // Should delete a partial block with deletion mark who hasn't reached the deletion threshold yet. 138 {path: path.Join("user-1", block4.String(), metadata.DeletionMarkFilename), expectedExists: false}, 139 {path: path.Join("user-1", bucketindex.BlockDeletionMarkFilepath(block4)), expectedExists: false}, 140 // Should delete a partial block with deletion mark who has reached the deletion threshold. 141 {path: path.Join("user-1", block5.String(), metadata.DeletionMarkFilename), expectedExists: false}, 142 {path: path.Join("user-1", bucketindex.BlockDeletionMarkFilepath(block5)), expectedExists: false}, 143 // Should not delete a partial block without deletion mark. 144 {path: path.Join("user-1", block6.String(), "index"), expectedExists: true}, 145 // Should completely delete blocks for user-3, marked for deletion 146 {path: path.Join("user-3", block9.String(), metadata.MetaFilename), expectedExists: false}, 147 {path: path.Join("user-3", block9.String(), "index"), expectedExists: false}, 148 {path: path.Join("user-3", block10.String(), metadata.MetaFilename), expectedExists: false}, 149 {path: path.Join("user-3", block10.String(), "index"), expectedExists: false}, 150 // Tenant deletion mark is not removed. 151 {path: path.Join("user-3", tsdb.TenantDeletionMarkPath), expectedExists: true}, 152 // User-4 is removed fully. 153 {path: path.Join("user-4", tsdb.TenantDeletionMarkPath), expectedExists: options.user4FilesExist}, 154 {path: path.Join("user-4", block.DebugMetas, "meta.json"), expectedExists: options.user4FilesExist}, 155 } { 156 exists, err := bucketClient.Exists(ctx, tc.path) 157 require.NoError(t, err) 158 assert.Equal(t, tc.expectedExists, exists, tc.path) 159 } 160 161 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) 162 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) 163 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) 164 assert.Equal(t, float64(6), testutil.ToFloat64(cleaner.blocksCleanedTotal)) 165 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal)) 166 167 // Check the updated bucket index. 168 for _, tc := range []struct { 169 userID string 170 expectedIndex bool 171 expectedBlocks []ulid.ULID 172 expectedMarks []ulid.ULID 173 }{ 174 { 175 userID: "user-1", 176 expectedIndex: true, 177 expectedBlocks: []ulid.ULID{block1, block2 /* deleted: block3, block4, block5, partial: block6 */}, 178 expectedMarks: []ulid.ULID{block2}, 179 }, { 180 userID: "user-2", 181 expectedIndex: true, 182 expectedBlocks: []ulid.ULID{block8}, 183 expectedMarks: []ulid.ULID{}, 184 }, { 185 userID: "user-3", 186 expectedIndex: false, 187 }, 188 } { 189 idx, err := bucketindex.ReadIndex(ctx, bucketClient, tc.userID, nil, logger) 190 if !tc.expectedIndex { 191 assert.Equal(t, bucketindex.ErrIndexNotFound, err) 192 continue 193 } 194 195 require.NoError(t, err) 196 assert.ElementsMatch(t, tc.expectedBlocks, idx.Blocks.GetULIDs()) 197 assert.ElementsMatch(t, tc.expectedMarks, idx.BlockDeletionMarks.GetULIDs()) 198 } 199 200 assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` 201 # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 202 # TYPE cortex_bucket_blocks_count gauge 203 cortex_bucket_blocks_count{user="user-1"} 2 204 cortex_bucket_blocks_count{user="user-2"} 1 205 # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 206 # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge 207 cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 1 208 cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 209 # HELP cortex_bucket_blocks_partials_count Total number of partial blocks. 210 # TYPE cortex_bucket_blocks_partials_count gauge 211 cortex_bucket_blocks_partials_count{user="user-1"} 2 212 cortex_bucket_blocks_partials_count{user="user-2"} 0 213 `), 214 "cortex_bucket_blocks_count", 215 "cortex_bucket_blocks_marked_for_deletion_count", 216 "cortex_bucket_blocks_partials_count", 217 )) 218 } 219 220 func TestBlocksCleaner_ShouldContinueOnBlockDeletionFailure(t *testing.T) { 221 const userID = "user-1" 222 223 bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t) 224 bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) 225 226 // Create blocks. 227 ctx := context.Background() 228 now := time.Now() 229 deletionDelay := 12 * time.Hour 230 block1 := createTSDBBlock(t, bucketClient, userID, 10, 20, nil) 231 block2 := createTSDBBlock(t, bucketClient, userID, 20, 30, nil) 232 block3 := createTSDBBlock(t, bucketClient, userID, 30, 40, nil) 233 block4 := createTSDBBlock(t, bucketClient, userID, 40, 50, nil) 234 createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour)) 235 createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(-time.Hour)) 236 createDeletionMark(t, bucketClient, userID, block4, now.Add(-deletionDelay).Add(-time.Hour)) 237 238 // To emulate a failure deleting a block, we wrap the bucket client in a mocked one. 239 bucketClient = &mockBucketFailure{ 240 Bucket: bucketClient, 241 DeleteFailures: []string{path.Join(userID, block3.String(), metadata.MetaFilename)}, 242 } 243 244 cfg := BlocksCleanerConfig{ 245 DeletionDelay: deletionDelay, 246 CleanupInterval: time.Minute, 247 CleanupConcurrency: 1, 248 } 249 250 logger := log.NewNopLogger() 251 scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) 252 cfgProvider := newMockConfigProvider() 253 254 cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, nil) 255 require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) 256 defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck 257 258 for _, tc := range []struct { 259 path string 260 expectedExists bool 261 }{ 262 {path: path.Join(userID, block1.String(), metadata.MetaFilename), expectedExists: true}, 263 {path: path.Join(userID, block2.String(), metadata.MetaFilename), expectedExists: false}, 264 {path: path.Join(userID, block3.String(), metadata.MetaFilename), expectedExists: true}, 265 {path: path.Join(userID, block4.String(), metadata.MetaFilename), expectedExists: false}, 266 } { 267 exists, err := bucketClient.Exists(ctx, tc.path) 268 require.NoError(t, err) 269 assert.Equal(t, tc.expectedExists, exists, tc.path) 270 } 271 272 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) 273 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) 274 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) 275 assert.Equal(t, float64(2), testutil.ToFloat64(cleaner.blocksCleanedTotal)) 276 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksFailedTotal)) 277 278 // Check the updated bucket index. 279 idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger) 280 require.NoError(t, err) 281 assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs()) 282 assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs()) 283 } 284 285 func TestBlocksCleaner_ShouldRebuildBucketIndexOnCorruptedOne(t *testing.T) { 286 const userID = "user-1" 287 288 bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t) 289 bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) 290 291 // Create blocks. 292 ctx := context.Background() 293 now := time.Now() 294 deletionDelay := 12 * time.Hour 295 block1 := createTSDBBlock(t, bucketClient, userID, 10, 20, nil) 296 block2 := createTSDBBlock(t, bucketClient, userID, 20, 30, nil) 297 block3 := createTSDBBlock(t, bucketClient, userID, 30, 40, nil) 298 createDeletionMark(t, bucketClient, userID, block2, now.Add(-deletionDelay).Add(-time.Hour)) 299 createDeletionMark(t, bucketClient, userID, block3, now.Add(-deletionDelay).Add(time.Hour)) 300 301 // Write a corrupted bucket index. 302 require.NoError(t, bucketClient.Upload(ctx, path.Join(userID, bucketindex.IndexCompressedFilename), strings.NewReader("invalid!}"))) 303 304 cfg := BlocksCleanerConfig{ 305 DeletionDelay: deletionDelay, 306 CleanupInterval: time.Minute, 307 CleanupConcurrency: 1, 308 } 309 310 logger := log.NewNopLogger() 311 scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) 312 cfgProvider := newMockConfigProvider() 313 314 cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, nil) 315 require.NoError(t, services.StartAndAwaitRunning(ctx, cleaner)) 316 defer services.StopAndAwaitTerminated(ctx, cleaner) //nolint:errcheck 317 318 for _, tc := range []struct { 319 path string 320 expectedExists bool 321 }{ 322 {path: path.Join(userID, block1.String(), metadata.MetaFilename), expectedExists: true}, 323 {path: path.Join(userID, block2.String(), metadata.MetaFilename), expectedExists: false}, 324 {path: path.Join(userID, block3.String(), metadata.MetaFilename), expectedExists: true}, 325 } { 326 exists, err := bucketClient.Exists(ctx, tc.path) 327 require.NoError(t, err) 328 assert.Equal(t, tc.expectedExists, exists, tc.path) 329 } 330 331 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsStarted)) 332 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.runsCompleted)) 333 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.runsFailed)) 334 assert.Equal(t, float64(1), testutil.ToFloat64(cleaner.blocksCleanedTotal)) 335 assert.Equal(t, float64(0), testutil.ToFloat64(cleaner.blocksFailedTotal)) 336 337 // Check the updated bucket index. 338 idx, err := bucketindex.ReadIndex(ctx, bucketClient, userID, nil, logger) 339 require.NoError(t, err) 340 assert.ElementsMatch(t, []ulid.ULID{block1, block3}, idx.Blocks.GetULIDs()) 341 assert.ElementsMatch(t, []ulid.ULID{block3}, idx.BlockDeletionMarks.GetULIDs()) 342 } 343 344 func TestBlocksCleaner_ShouldRemoveMetricsForTenantsNotBelongingAnymoreToTheShard(t *testing.T) { 345 bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t) 346 bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) 347 348 // Create blocks. 349 createTSDBBlock(t, bucketClient, "user-1", 10, 20, nil) 350 createTSDBBlock(t, bucketClient, "user-1", 20, 30, nil) 351 createTSDBBlock(t, bucketClient, "user-2", 30, 40, nil) 352 353 cfg := BlocksCleanerConfig{ 354 DeletionDelay: time.Hour, 355 CleanupInterval: time.Minute, 356 CleanupConcurrency: 1, 357 } 358 359 ctx := context.Background() 360 logger := log.NewNopLogger() 361 reg := prometheus.NewPedanticRegistry() 362 scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) 363 cfgProvider := newMockConfigProvider() 364 365 cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg) 366 require.NoError(t, cleaner.cleanUsers(ctx, true)) 367 368 assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` 369 # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 370 # TYPE cortex_bucket_blocks_count gauge 371 cortex_bucket_blocks_count{user="user-1"} 2 372 cortex_bucket_blocks_count{user="user-2"} 1 373 # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 374 # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge 375 cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 376 cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 377 # HELP cortex_bucket_blocks_partials_count Total number of partial blocks. 378 # TYPE cortex_bucket_blocks_partials_count gauge 379 cortex_bucket_blocks_partials_count{user="user-1"} 0 380 cortex_bucket_blocks_partials_count{user="user-2"} 0 381 `), 382 "cortex_bucket_blocks_count", 383 "cortex_bucket_blocks_marked_for_deletion_count", 384 "cortex_bucket_blocks_partials_count", 385 )) 386 387 // Override the users scanner to reconfigure it to only return a subset of users. 388 cleaner.usersScanner = tsdb.NewUsersScanner(bucketClient, func(userID string) (bool, error) { return userID == "user-1", nil }, logger) 389 390 // Create new blocks, to double check expected metrics have changed. 391 createTSDBBlock(t, bucketClient, "user-1", 40, 50, nil) 392 createTSDBBlock(t, bucketClient, "user-2", 50, 60, nil) 393 394 require.NoError(t, cleaner.cleanUsers(ctx, false)) 395 396 assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` 397 # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 398 # TYPE cortex_bucket_blocks_count gauge 399 cortex_bucket_blocks_count{user="user-1"} 3 400 # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 401 # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge 402 cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 403 # HELP cortex_bucket_blocks_partials_count Total number of partial blocks. 404 # TYPE cortex_bucket_blocks_partials_count gauge 405 cortex_bucket_blocks_partials_count{user="user-1"} 0 406 `), 407 "cortex_bucket_blocks_count", 408 "cortex_bucket_blocks_marked_for_deletion_count", 409 "cortex_bucket_blocks_partials_count", 410 )) 411 } 412 413 func TestBlocksCleaner_ListBlocksOutsideRetentionPeriod(t *testing.T) { 414 bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t) 415 bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) 416 ctx := context.Background() 417 logger := log.NewNopLogger() 418 419 id1 := createTSDBBlock(t, bucketClient, "user-1", 5000, 6000, nil) 420 id2 := createTSDBBlock(t, bucketClient, "user-1", 6000, 7000, nil) 421 id3 := createTSDBBlock(t, bucketClient, "user-1", 7000, 8000, nil) 422 423 w := bucketindex.NewUpdater(bucketClient, "user-1", nil, logger) 424 idx, _, err := w.UpdateIndex(ctx, nil) 425 require.NoError(t, err) 426 427 assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, idx.Blocks.GetULIDs()) 428 429 // Excessive retention period (wrapping epoch) 430 result := listBlocksOutsideRetentionPeriod(idx, time.Unix(10, 0).Add(-time.Hour)) 431 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 432 433 // Normal operation - varying retention period. 434 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(6, 0)) 435 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 436 437 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0)) 438 assert.ElementsMatch(t, []ulid.ULID{id1}, result.GetULIDs()) 439 440 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0)) 441 assert.ElementsMatch(t, []ulid.ULID{id1, id2}, result.GetULIDs()) 442 443 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0)) 444 assert.ElementsMatch(t, []ulid.ULID{id1, id2, id3}, result.GetULIDs()) 445 446 // Avoiding redundant marking - blocks already marked for deletion. 447 448 mark1 := &bucketindex.BlockDeletionMark{ID: id1} 449 mark2 := &bucketindex.BlockDeletionMark{ID: id2} 450 451 idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1} 452 453 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0)) 454 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 455 456 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0)) 457 assert.ElementsMatch(t, []ulid.ULID{id2}, result.GetULIDs()) 458 459 idx.BlockDeletionMarks = bucketindex.BlockDeletionMarks{mark1, mark2} 460 461 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(7, 0)) 462 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 463 464 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(8, 0)) 465 assert.ElementsMatch(t, []ulid.ULID{}, result.GetULIDs()) 466 467 result = listBlocksOutsideRetentionPeriod(idx, time.Unix(9, 0)) 468 assert.ElementsMatch(t, []ulid.ULID{id3}, result.GetULIDs()) 469 } 470 471 func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) { 472 bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t) 473 bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient) 474 475 ts := func(hours int) int64 { 476 return time.Now().Add(time.Duration(hours)*time.Hour).Unix() * 1000 477 } 478 479 block1 := createTSDBBlock(t, bucketClient, "user-1", ts(-10), ts(-8), nil) 480 block2 := createTSDBBlock(t, bucketClient, "user-1", ts(-8), ts(-6), nil) 481 block3 := createTSDBBlock(t, bucketClient, "user-2", ts(-10), ts(-8), nil) 482 block4 := createTSDBBlock(t, bucketClient, "user-2", ts(-8), ts(-6), nil) 483 484 cfg := BlocksCleanerConfig{ 485 DeletionDelay: time.Hour, 486 CleanupInterval: time.Minute, 487 CleanupConcurrency: 1, 488 } 489 490 ctx := context.Background() 491 logger := log.NewNopLogger() 492 reg := prometheus.NewPedanticRegistry() 493 scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger) 494 cfgProvider := newMockConfigProvider() 495 496 cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, reg) 497 498 assertBlockExists := func(user string, block ulid.ULID, expectExists bool) { 499 exists, err := bucketClient.Exists(ctx, path.Join(user, block.String(), metadata.MetaFilename)) 500 require.NoError(t, err) 501 assert.Equal(t, expectExists, exists) 502 } 503 504 // Existing behaviour - retention period disabled. 505 { 506 cfgProvider.userRetentionPeriods["user-1"] = 0 507 cfgProvider.userRetentionPeriods["user-2"] = 0 508 509 require.NoError(t, cleaner.cleanUsers(ctx, true)) 510 assertBlockExists("user-1", block1, true) 511 assertBlockExists("user-1", block2, true) 512 assertBlockExists("user-2", block3, true) 513 assertBlockExists("user-2", block4, true) 514 515 assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` 516 # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 517 # TYPE cortex_bucket_blocks_count gauge 518 cortex_bucket_blocks_count{user="user-1"} 2 519 cortex_bucket_blocks_count{user="user-2"} 2 520 # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 521 # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge 522 cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 523 cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 524 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 525 # TYPE cortex_compactor_blocks_marked_for_deletion_total counter 526 cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 0 527 `), 528 "cortex_bucket_blocks_count", 529 "cortex_bucket_blocks_marked_for_deletion_count", 530 "cortex_compactor_blocks_marked_for_deletion_total", 531 )) 532 } 533 534 // Retention enabled only for a single user, but does nothing. 535 { 536 cfgProvider.userRetentionPeriods["user-1"] = 9 * time.Hour 537 538 require.NoError(t, cleaner.cleanUsers(ctx, false)) 539 assertBlockExists("user-1", block1, true) 540 assertBlockExists("user-1", block2, true) 541 assertBlockExists("user-2", block3, true) 542 assertBlockExists("user-2", block4, true) 543 } 544 545 // Retention enabled only for a single user, marking a single block. 546 // Note the block won't be deleted yet due to deletion delay. 547 { 548 cfgProvider.userRetentionPeriods["user-1"] = 7 * time.Hour 549 550 require.NoError(t, cleaner.cleanUsers(ctx, false)) 551 assertBlockExists("user-1", block1, true) 552 assertBlockExists("user-1", block2, true) 553 assertBlockExists("user-2", block3, true) 554 assertBlockExists("user-2", block4, true) 555 556 assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` 557 # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 558 # TYPE cortex_bucket_blocks_count gauge 559 cortex_bucket_blocks_count{user="user-1"} 2 560 cortex_bucket_blocks_count{user="user-2"} 2 561 # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 562 # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge 563 cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 1 564 cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 565 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 566 # TYPE cortex_compactor_blocks_marked_for_deletion_total counter 567 cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 1 568 `), 569 "cortex_bucket_blocks_count", 570 "cortex_bucket_blocks_marked_for_deletion_count", 571 "cortex_compactor_blocks_marked_for_deletion_total", 572 )) 573 } 574 575 // Marking the block again, before the deletion occurs, should not cause an error. 576 { 577 require.NoError(t, cleaner.cleanUsers(ctx, false)) 578 assertBlockExists("user-1", block1, true) 579 assertBlockExists("user-1", block2, true) 580 assertBlockExists("user-2", block3, true) 581 assertBlockExists("user-2", block4, true) 582 } 583 584 // Reduce the deletion delay. Now the block will be deleted. 585 { 586 cleaner.cfg.DeletionDelay = 0 587 588 require.NoError(t, cleaner.cleanUsers(ctx, false)) 589 assertBlockExists("user-1", block1, false) 590 assertBlockExists("user-1", block2, true) 591 assertBlockExists("user-2", block3, true) 592 assertBlockExists("user-2", block4, true) 593 594 assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` 595 # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 596 # TYPE cortex_bucket_blocks_count gauge 597 cortex_bucket_blocks_count{user="user-1"} 1 598 cortex_bucket_blocks_count{user="user-2"} 2 599 # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 600 # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge 601 cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 602 cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 603 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 604 # TYPE cortex_compactor_blocks_marked_for_deletion_total counter 605 cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 1 606 `), 607 "cortex_bucket_blocks_count", 608 "cortex_bucket_blocks_marked_for_deletion_count", 609 "cortex_compactor_blocks_marked_for_deletion_total", 610 )) 611 } 612 613 // Retention enabled for other user; test deleting multiple blocks. 614 { 615 cfgProvider.userRetentionPeriods["user-2"] = 5 * time.Hour 616 617 require.NoError(t, cleaner.cleanUsers(ctx, false)) 618 assertBlockExists("user-1", block1, false) 619 assertBlockExists("user-1", block2, true) 620 assertBlockExists("user-2", block3, false) 621 assertBlockExists("user-2", block4, false) 622 623 assert.NoError(t, prom_testutil.GatherAndCompare(reg, strings.NewReader(` 624 # HELP cortex_bucket_blocks_count Total number of blocks in the bucket. Includes blocks marked for deletion, but not partial blocks. 625 # TYPE cortex_bucket_blocks_count gauge 626 cortex_bucket_blocks_count{user="user-1"} 1 627 cortex_bucket_blocks_count{user="user-2"} 0 628 # HELP cortex_bucket_blocks_marked_for_deletion_count Total number of blocks marked for deletion in the bucket. 629 # TYPE cortex_bucket_blocks_marked_for_deletion_count gauge 630 cortex_bucket_blocks_marked_for_deletion_count{user="user-1"} 0 631 cortex_bucket_blocks_marked_for_deletion_count{user="user-2"} 0 632 # HELP cortex_compactor_blocks_marked_for_deletion_total Total number of blocks marked for deletion in compactor. 633 # TYPE cortex_compactor_blocks_marked_for_deletion_total counter 634 cortex_compactor_blocks_marked_for_deletion_total{reason="retention"} 3 635 `), 636 "cortex_bucket_blocks_count", 637 "cortex_bucket_blocks_marked_for_deletion_count", 638 "cortex_compactor_blocks_marked_for_deletion_total", 639 )) 640 } 641 } 642 643 type mockBucketFailure struct { 644 objstore.Bucket 645 646 DeleteFailures []string 647 } 648 649 func (m *mockBucketFailure) Delete(ctx context.Context, name string) error { 650 if util.StringsContain(m.DeleteFailures, name) { 651 return errors.New("mocked delete failure") 652 } 653 return m.Bucket.Delete(ctx, name) 654 } 655 656 type mockConfigProvider struct { 657 userRetentionPeriods map[string]time.Duration 658 } 659 660 func newMockConfigProvider() *mockConfigProvider { 661 return &mockConfigProvider{ 662 userRetentionPeriods: make(map[string]time.Duration), 663 } 664 } 665 666 func (m *mockConfigProvider) CompactorBlocksRetentionPeriod(user string) time.Duration { 667 if result, ok := m.userRetentionPeriods[user]; ok { 668 return result 669 } 670 return 0 671 } 672 673 func (m *mockConfigProvider) S3SSEType(user string) string { 674 return "" 675 } 676 677 func (m *mockConfigProvider) S3SSEKMSKeyID(userID string) string { 678 return "" 679 } 680 681 func (m *mockConfigProvider) S3SSEKMSEncryptionContext(userID string) string { 682 return "" 683 }