github.com/grafana/pyroscope@v1.18.0/pkg/compactor/split_merge_compactor_test.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/split_merge_compactor_test.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: The Cortex Authors. 5 6 package compactor 7 8 import ( 9 "context" 10 "os" 11 "strconv" 12 "strings" 13 "testing" 14 "time" 15 16 "github.com/go-kit/log" 17 "github.com/grafana/dskit/services" 18 "github.com/grafana/dskit/test" 19 "github.com/oklog/ulid/v2" 20 "github.com/prometheus/client_golang/prometheus" 21 "github.com/prometheus/client_golang/prometheus/testutil" 22 "github.com/prometheus/common/model" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 26 phlaremodel "github.com/grafana/pyroscope/pkg/model" 27 "github.com/grafana/pyroscope/pkg/objstore" 28 "github.com/grafana/pyroscope/pkg/objstore/client" 29 "github.com/grafana/pyroscope/pkg/objstore/providers/filesystem" 30 "github.com/grafana/pyroscope/pkg/phlaredb" 31 "github.com/grafana/pyroscope/pkg/phlaredb/block" 32 "github.com/grafana/pyroscope/pkg/phlaredb/sharding" 33 ) 34 35 func TestMultitenantCompactor_ShouldSupportSplitAndMergeCompactor(t *testing.T) { 36 const ( 37 userID = "user-1" 38 numSeries = 100 39 blockRange = 2 * time.Hour 40 ) 41 42 var ( 43 blockRangeMillis = blockRange.Milliseconds() 44 compactionRanges = DurationList{blockRange, 2 * blockRange, 4 * blockRange} 45 ) 46 47 externalLabels := func(shardID string) map[string]string { 48 labels := map[string]string{} 49 50 if shardID != "" { 51 labels[sharding.CompactorShardIDLabel] = shardID 52 } 53 return labels 54 } 55 56 tests := map[string]struct { 57 numShards int 58 setup func(t *testing.T, bkt objstore.Bucket) []block.Meta 59 }{ 60 "overlapping blocks matching the 1st compaction range should be merged and split": { 61 numShards: 2, 62 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 63 block1 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels("")) 64 block2 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels("")) 65 66 return []block.Meta{ 67 { 68 MinTime: model.Time(1 * blockRangeMillis), 69 MaxTime: model.Time(2 * blockRangeMillis), 70 Compaction: block.BlockMetaCompaction{ 71 Sources: []ulid.ULID{block1, block2}, 72 }, 73 74 Labels: map[string]string{ 75 sharding.CompactorShardIDLabel: "1_of_2", 76 }, 77 }, { 78 MinTime: model.Time(1 * blockRangeMillis), 79 MaxTime: model.Time(2 * blockRangeMillis), 80 Compaction: block.BlockMetaCompaction{ 81 Sources: []ulid.ULID{block1, block2}, 82 }, 83 84 Labels: map[string]string{ 85 sharding.CompactorShardIDLabel: "2_of_2", 86 }, 87 }, 88 } 89 }, 90 }, 91 "overlapping blocks matching the beginning of the 1st compaction range should be merged and split": { 92 numShards: 2, 93 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 94 block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 95 block2 := createDBBlock(t, bkt, userID, time.Minute.Milliseconds(), (7 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 96 97 // Add another block as "most recent one" otherwise the previous blocks are not compacted 98 // because the most recent blocks must cover the full range to be compacted. 99 block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels("")) 100 101 return []block.Meta{ 102 { 103 MinTime: 0, 104 MaxTime: model.Time((7 * time.Minute).Milliseconds()), 105 Compaction: block.BlockMetaCompaction{ 106 Sources: []ulid.ULID{block1, block2}, 107 }, 108 109 Labels: map[string]string{ 110 sharding.CompactorShardIDLabel: "1_of_2", 111 }, 112 }, { 113 MinTime: 0, 114 MaxTime: model.Time((7 * time.Minute).Milliseconds()), 115 Compaction: block.BlockMetaCompaction{ 116 Sources: []ulid.ULID{block1, block2}, 117 }, 118 119 Labels: map[string]string{ 120 sharding.CompactorShardIDLabel: "2_of_2", 121 }, 122 }, { 123 // Not compacted. 124 MinTime: model.Time(blockRangeMillis), 125 MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()), 126 Compaction: block.BlockMetaCompaction{ 127 Sources: []ulid.ULID{block3}, 128 }, 129 130 Labels: map[string]string{}, 131 }, 132 } 133 }, 134 }, 135 "non-overlapping blocks matching the beginning of the 1st compaction range (without gaps) should be merged and split": { 136 numShards: 2, 137 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 138 block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 139 block2 := createDBBlock(t, bkt, userID, (5 * time.Minute).Milliseconds(), (10 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 140 141 // Add another block as "most recent one" otherwise the previous blocks are not compacted 142 // because the most recent blocks must cover the full range to be compacted. 143 block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels("")) 144 145 return []block.Meta{ 146 { 147 MinTime: 0, 148 MaxTime: model.Time((10 * time.Minute).Milliseconds()), 149 Compaction: block.BlockMetaCompaction{ 150 Sources: []ulid.ULID{block1, block2}, 151 }, 152 153 Labels: map[string]string{ 154 sharding.CompactorShardIDLabel: "1_of_2", 155 }, 156 }, { 157 MinTime: 0, 158 MaxTime: model.Time((10 * time.Minute).Milliseconds()), 159 Compaction: block.BlockMetaCompaction{ 160 Sources: []ulid.ULID{block1, block2}, 161 }, 162 163 Labels: map[string]string{ 164 sharding.CompactorShardIDLabel: "2_of_2", 165 }, 166 }, { 167 // Not compacted. 168 MinTime: model.Time(blockRangeMillis), 169 MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()), 170 Compaction: block.BlockMetaCompaction{ 171 Sources: []ulid.ULID{block3}, 172 }, 173 174 Labels: map[string]string{}, 175 }, 176 } 177 }, 178 }, 179 "non-overlapping blocks matching the beginning of the 1st compaction range (with gaps) should be merged and split": { 180 numShards: 2, 181 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 182 block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 183 block2 := createDBBlock(t, bkt, userID, (7 * time.Minute).Milliseconds(), (10 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 184 185 // Add another block as "most recent one" otherwise the previous blocks are not compacted 186 // because the most recent blocks must cover the full range to be compacted. 187 block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels("")) 188 189 return []block.Meta{ 190 { 191 MinTime: 0, 192 MaxTime: model.Time((10 * time.Minute).Milliseconds()), 193 Compaction: block.BlockMetaCompaction{ 194 Sources: []ulid.ULID{block1, block2}, 195 }, 196 197 Labels: map[string]string{ 198 sharding.CompactorShardIDLabel: "1_of_2", 199 }, 200 }, { 201 MinTime: 0, 202 MaxTime: model.Time((10 * time.Minute).Milliseconds()), 203 Compaction: block.BlockMetaCompaction{ 204 Sources: []ulid.ULID{block1, block2}, 205 }, 206 207 Labels: map[string]string{ 208 sharding.CompactorShardIDLabel: "2_of_2", 209 }, 210 }, { 211 // Not compacted. 212 MinTime: model.Time(blockRangeMillis), 213 MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()), 214 Compaction: block.BlockMetaCompaction{ 215 Sources: []ulid.ULID{block3}, 216 }, 217 218 Labels: map[string]string{}, 219 }, 220 } 221 }, 222 }, 223 "smaller compaction ranges should take precedence over larger ones, and then re-iterate in subsequent compactions of increasing ranges": { 224 numShards: 2, 225 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 226 // Two split blocks in the 1st compaction range. 227 block1a := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("1_of_2")) 228 block1b := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("2_of_2")) 229 230 // Two non-split overlapping blocks in the 1st compaction range. 231 block2 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels("")) 232 block3 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels("")) 233 234 // Two split adjacent blocks in the 2nd compaction range. 235 block4a := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("1_of_2")) 236 block4b := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("2_of_2")) 237 block5a := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("1_of_2")) 238 block5b := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("2_of_2")) 239 240 // Two non-adjacent non-split blocks in the 1st compaction range. 241 block6 := createDBBlock(t, bkt, userID, 4*blockRangeMillis+1, 5*blockRangeMillis, numSeries, externalLabels("")) 242 block7 := createDBBlock(t, bkt, userID, 7*blockRangeMillis, 8*blockRangeMillis, numSeries, externalLabels("")) 243 244 return []block.Meta{ 245 // The two overlapping blocks (block2, block3) have been merged and split in the 1st range, 246 // and then compacted with block1 in 2nd range. Finally, they've been compacted with 247 // block4 and block5 in the 3rd range compaction (total levels: 4). 248 { 249 MinTime: 1, 250 MaxTime: model.Time(4 * blockRangeMillis), 251 Compaction: block.BlockMetaCompaction{ 252 Sources: []ulid.ULID{block1a, block2, block3, block4a, block5a}, 253 }, 254 255 Labels: map[string]string{ 256 sharding.CompactorShardIDLabel: "1_of_2", 257 }, 258 }, 259 { 260 MinTime: 1, 261 MaxTime: model.Time(4 * blockRangeMillis), 262 Compaction: block.BlockMetaCompaction{ 263 Sources: []ulid.ULID{block1b, block2, block3, block4b, block5b}, 264 }, 265 266 Labels: map[string]string{ 267 sharding.CompactorShardIDLabel: "2_of_2", 268 }, 269 }, 270 // The two non-adjacent blocks block6 and block7 are split individually first and then merged 271 // together in the 3rd range. 272 { 273 MinTime: model.Time(4*blockRangeMillis + 1), 274 MaxTime: model.Time(8 * blockRangeMillis), 275 Compaction: block.BlockMetaCompaction{ 276 Sources: []ulid.ULID{block6, block7}, 277 }, 278 279 Labels: map[string]string{ 280 sharding.CompactorShardIDLabel: "1_of_2", 281 }, 282 }, 283 { 284 MinTime: model.Time(4*blockRangeMillis + 1), 285 MaxTime: model.Time(8 * blockRangeMillis), 286 Compaction: block.BlockMetaCompaction{ 287 Sources: []ulid.ULID{block6, block7}, 288 }, 289 290 Labels: map[string]string{ 291 sharding.CompactorShardIDLabel: "2_of_2", 292 }, 293 }, 294 } 295 }, 296 }, 297 "overlapping and non-overlapping blocks within the same range should be split and compacted together": { 298 numShards: 2, 299 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 300 // Overlapping. 301 block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 302 block2 := createDBBlock(t, bkt, userID, time.Minute.Milliseconds(), (7 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 303 304 // Not overlapping. 305 block3 := createDBBlock(t, bkt, userID, time.Hour.Milliseconds(), (2 * time.Hour).Milliseconds(), numSeries, externalLabels("")) 306 307 return []block.Meta{ 308 { 309 MinTime: 0, 310 MaxTime: model.Time((2 * time.Hour).Milliseconds()), 311 Compaction: block.BlockMetaCompaction{ 312 Sources: []ulid.ULID{block1, block2, block3}, 313 }, 314 315 Labels: map[string]string{ 316 sharding.CompactorShardIDLabel: "1_of_2", 317 }, 318 }, { 319 MinTime: 0, 320 MaxTime: model.Time((2 * time.Hour).Milliseconds()), 321 Compaction: block.BlockMetaCompaction{ 322 Sources: []ulid.ULID{block1, block2, block3}, 323 }, 324 325 Labels: map[string]string{ 326 sharding.CompactorShardIDLabel: "2_of_2", 327 }, 328 }, 329 } 330 }, 331 }, 332 "should correctly handle empty blocks generated in the splitting stage": { 333 numShards: 2, 334 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 335 // Generate a block with only 1 series. This block will be split into 1 split block only, 336 // because the source block only has 1 series. 337 block1 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, 1, externalLabels("")) 338 339 return []block.Meta{ 340 { 341 MinTime: model.Time(blockRangeMillis), // Because there's only 1 sample with timestamp=maxT-1 342 MaxTime: model.Time(2 * blockRangeMillis), 343 Compaction: block.BlockMetaCompaction{ 344 Sources: []ulid.ULID{block1}, 345 }, 346 347 Labels: map[string]string{ 348 sharding.CompactorShardIDLabel: "1_of_2", 349 }, 350 }, 351 } 352 }, 353 }, 354 "splitting should be disabled if configured shards = 0": { 355 numShards: 0, 356 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 357 block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 358 block2 := createDBBlock(t, bkt, userID, (5 * time.Minute).Milliseconds(), (10 * time.Minute).Milliseconds(), numSeries, externalLabels("")) 359 360 // Add another block as "most recent one" otherwise the previous blocks are not compacted 361 // because the most recent blocks must cover the full range to be compacted. 362 block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels("")) 363 364 return []block.Meta{ 365 // Compacted but not split. 366 { 367 MinTime: 0, 368 MaxTime: model.Time((10 * time.Minute).Milliseconds()), 369 Compaction: block.BlockMetaCompaction{ 370 Sources: []ulid.ULID{block1, block2}, 371 }, 372 373 Labels: map[string]string{}, 374 }, { 375 // Not compacted. 376 MinTime: model.Time(blockRangeMillis), 377 MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()), 378 Compaction: block.BlockMetaCompaction{ 379 Sources: []ulid.ULID{block3}, 380 }, 381 382 Labels: map[string]string{}, 383 }, 384 } 385 }, 386 }, 387 "splitting should be disabled but already split blocks should be merged correctly (respecting the shard) if configured shards = 0": { 388 numShards: 0, 389 setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta { 390 // Two split blocks in the 1st compaction range. 391 block1a := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("1_of_2")) 392 block1b := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("2_of_2")) 393 394 // Two non-split overlapping blocks in the 1st compaction range. 395 block2 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels("")) 396 block3 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels("")) 397 398 // // Two split adjacent blocks in the 2nd compaction range. 399 block4a := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("1_of_2")) 400 block4b := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("2_of_2")) 401 block5a := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("1_of_2")) 402 block5b := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("2_of_2")) 403 404 // Two non-adjacent non-split blocks in the 1st compaction range. 405 block6 := createDBBlock(t, bkt, userID, 4*blockRangeMillis+1, 5*blockRangeMillis, numSeries, externalLabels("")) 406 block7 := createDBBlock(t, bkt, userID, 7*blockRangeMillis, 8*blockRangeMillis, numSeries, externalLabels("")) 407 408 return []block.Meta{ 409 // Block1 have been compacted with block4 and block5 in the 3rd range compaction. 410 { 411 MinTime: 1, 412 MaxTime: model.Time(4 * blockRangeMillis), 413 Compaction: block.BlockMetaCompaction{ 414 Sources: []ulid.ULID{block1a, block4a, block5a}, 415 }, 416 417 Labels: map[string]string{ 418 sharding.CompactorShardIDLabel: "1_of_2", 419 }, 420 }, 421 { 422 MinTime: 1, 423 MaxTime: model.Time(4 * blockRangeMillis), 424 Compaction: block.BlockMetaCompaction{ 425 Sources: []ulid.ULID{block1b, block4b, block5b}, 426 }, 427 428 Labels: map[string]string{ 429 sharding.CompactorShardIDLabel: "2_of_2", 430 }, 431 }, 432 // The two overlapping blocks (block2, block3) have been merged in the 1st range. 433 { 434 MinTime: model.Time(blockRangeMillis), 435 MaxTime: model.Time(2 * blockRangeMillis), 436 Compaction: block.BlockMetaCompaction{ 437 Sources: []ulid.ULID{block2, block3}, 438 }, 439 440 Labels: map[string]string{}, 441 }, 442 // The two non-adjacent blocks block6 and block7 are merged together in the 3rd range. 443 { 444 MinTime: model.Time(4*blockRangeMillis) + 1, 445 MaxTime: model.Time(8 * blockRangeMillis), 446 Compaction: block.BlockMetaCompaction{ 447 Sources: []ulid.ULID{block6, block7}, 448 }, 449 450 Labels: map[string]string{}, 451 }, 452 } 453 }, 454 }, 455 } 456 457 for testName, testData := range tests { 458 t.Run(testName, func(t *testing.T) { 459 workDir := t.TempDir() 460 storageDir := t.TempDir() 461 fetcherDir := t.TempDir() 462 463 storageCfg := client.Config{ 464 StorageBackendConfig: client.StorageBackendConfig{ 465 Backend: client.Filesystem, 466 Filesystem: filesystem.Config{ 467 Directory: storageDir, 468 }, 469 }, 470 } 471 472 compactorCfg := prepareConfig(t) 473 compactorCfg.DataDir = workDir 474 compactorCfg.BlockRanges = compactionRanges 475 476 cfgProvider := newMockConfigProvider() 477 cfgProvider.splitAndMergeShards[userID] = testData.numShards 478 479 logger := log.NewLogfmtLogger(os.Stdout) 480 reg := prometheus.NewPedanticRegistry() 481 ctx := context.Background() 482 483 // Create TSDB blocks in the storage and get the expected blocks. 484 bkt, err := client.NewBucket(context.Background(), storageCfg, "test") 485 require.NoError(t, err) 486 487 defer bkt.Close() 488 expected := testData.setup(t, bkt) 489 490 c, err := NewMultitenantCompactor(compactorCfg, bkt, cfgProvider, logger, reg) 491 require.NoError(t, err) 492 require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) 493 t.Cleanup(func() { 494 require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) 495 }) 496 497 // Wait until the first compaction run completed. 498 test.Poll(t, 15*time.Second, nil, func() interface{} { 499 return testutil.GatherAndCompare(reg, strings.NewReader(` 500 # HELP pyroscope_compactor_runs_completed_total Total number of compaction runs successfully completed. 501 # TYPE pyroscope_compactor_runs_completed_total counter 502 pyroscope_compactor_runs_completed_total 1 503 `), "pyroscope_compactor_runs_completed_total") 504 }) 505 506 // List back any (non deleted) block from the storage. 507 userBucket := objstore.NewTenantBucketClient(userID, bkt, nil) 508 fetcher, err := block.NewMetaFetcher(logger, 509 1, 510 userBucket, 511 fetcherDir, 512 reg, 513 nil, 514 ) 515 require.NoError(t, err) 516 metas, partials, err := fetcher.FetchWithoutMarkedForDeletion(ctx) 517 require.NoError(t, err) 518 require.Empty(t, partials) 519 520 // Sort blocks by MinTime and labels so that we get a stable comparison. 521 actual := sortMetasByMinTime(convertMetasMapToSlice(metas)) 522 523 // Compare actual blocks with the expected ones. 524 require.Len(t, actual, len(expected)) 525 for i, e := range expected { 526 delete(actual[i].Labels, block.HostnameLabel) 527 assert.Equal(t, e.MinTime, actual[i].MinTime) 528 assert.Equal(t, e.MaxTime, actual[i].MaxTime) 529 assert.Equal(t, e.Compaction.Sources, actual[i].Compaction.Sources) 530 assert.Equal(t, e.Labels, actual[i].Labels) 531 } 532 }) 533 } 534 } 535 536 func TestMultitenantCompactor_ShouldGuaranteeSeriesShardingConsistencyOverTheTime(t *testing.T) { 537 const ( 538 userID = "user-1" 539 numSeries = 100 540 blockRange = 2 * time.Hour 541 numShards = 2 542 ) 543 544 var ( 545 blockRangeMillis = blockRange.Milliseconds() 546 compactionRanges = DurationList{blockRange} 547 548 // You should NEVER CHANGE the expected series here, otherwise it means you're introducing 549 // a backward incompatible change. 550 expectedSeriesIDByShard = map[string][]int{ 551 "1_of_2": {0, 1, 3, 4, 5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 24, 25, 27, 31, 36, 37, 38, 40, 42, 45, 47, 50, 51, 52, 53, 54, 55, 57, 59, 60, 61, 63, 68, 70, 71, 72, 74, 77, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 91, 92, 94, 98, 100}, 552 "2_of_2": {2, 8, 9, 10, 13, 14, 22, 23, 26, 28, 29, 30, 32, 33, 34, 35, 39, 41, 43, 44, 46, 48, 49, 56, 58, 62, 64, 65, 66, 67, 69, 73, 75, 76, 78, 87, 93, 95, 96, 97, 99}, 553 } 554 ) 555 556 workDir := t.TempDir() 557 storageDir := t.TempDir() 558 fetcherDir := t.TempDir() 559 560 storageCfg := client.Config{ 561 StorageBackendConfig: client.StorageBackendConfig{ 562 Backend: client.Filesystem, 563 Filesystem: filesystem.Config{ 564 Directory: storageDir, 565 }, 566 }, 567 } 568 569 compactorCfg := prepareConfig(t) 570 compactorCfg.DataDir = workDir 571 compactorCfg.BlockRanges = compactionRanges 572 573 cfgProvider := newMockConfigProvider() 574 cfgProvider.splitAndMergeShards[userID] = numShards 575 576 logger := log.NewLogfmtLogger(os.Stdout) 577 reg := prometheus.NewPedanticRegistry() 578 ctx := context.Background() 579 580 bucketClient, err := client.NewBucket(ctx, storageCfg, "test") 581 require.NoError(t, err) 582 583 // Create a TSDB block in the storage. 584 blockID := createDBBlock(t, bucketClient, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, nil) 585 586 c, err := NewMultitenantCompactor(compactorCfg, bucketClient, cfgProvider, logger, reg) 587 require.NoError(t, err) 588 require.NoError(t, services.StartAndAwaitRunning(context.Background(), c)) 589 t.Cleanup(func() { 590 require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c)) 591 }) 592 593 // Wait until the first compaction run completed. 594 test.Poll(t, 15*time.Second, nil, func() interface{} { 595 return testutil.GatherAndCompare(reg, strings.NewReader(` 596 # HELP pyroscope_compactor_runs_completed_total Total number of compaction runs successfully completed. 597 # TYPE pyroscope_compactor_runs_completed_total counter 598 pyroscope_compactor_runs_completed_total 1 599 `), "pyroscope_compactor_runs_completed_total") 600 }) 601 602 // List back any (non deleted) block from the storage. 603 userBucket := objstore.NewTenantBucketClient(userID, bucketClient, nil) 604 fetcher, err := block.NewMetaFetcher(logger, 605 1, 606 userBucket, 607 fetcherDir, 608 reg, 609 nil, 610 ) 611 require.NoError(t, err) 612 metas, partials, err := fetcher.FetchWithoutMarkedForDeletion(ctx) 613 require.NoError(t, err) 614 require.Empty(t, partials) 615 616 // Sort blocks by MinTime and labels so that we get a stable comparison. 617 actualMetas := sortMetasByMinTime(convertMetasMapToSlice(metas)) 618 619 // Ensure the input block has been split. 620 require.Len(t, actualMetas, numShards) 621 for idx, actualMeta := range actualMetas { 622 assert.Equal(t, model.Time(blockRangeMillis), actualMeta.MinTime) 623 assert.Equal(t, model.Time(2*blockRangeMillis), actualMeta.MaxTime) 624 assert.Equal(t, []ulid.ULID{blockID}, actualMeta.Compaction.Sources) 625 assert.Equal(t, sharding.FormatShardIDLabelValue(uint64(idx), numShards), actualMeta.Labels[sharding.CompactorShardIDLabel]) 626 } 627 628 // Ensure each split block contains the right series, based on a series labels 629 // hashing function which doesn't change over time. 630 for _, actualMeta := range actualMetas { 631 expectedSeriesIDs := expectedSeriesIDByShard[actualMeta.Labels[sharding.CompactorShardIDLabel]] 632 633 b := phlaredb.NewSingleBlockQuerierFromMeta(ctx, userBucket, actualMeta) 634 require.NoError(t, b.Open(ctx)) 635 indexReader := b.Index() 636 637 // Find all series in the block. 638 postings, err := indexReader.Postings("series_id", nil) 639 require.NoError(t, err) 640 641 lbls := make(phlaremodel.Labels, 0, 6) 642 643 for postings.Next() { 644 _, err := indexReader.Series(postings.At(), &lbls, nil) 645 // Symbolize the series labels. 646 require.NoError(t, err) 647 648 // Ensure the series below to the right shard. 649 seriesLabels := lbls.Clone() 650 seriesID, err := strconv.Atoi(seriesLabels.Get("series_id")) 651 require.NoError(t, err) 652 assert.Contains(t, expectedSeriesIDs, seriesID, "series:", seriesLabels.ToPrometheusLabels().String()) 653 } 654 655 require.NoError(t, postings.Err()) 656 } 657 } 658 659 func convertMetasMapToSlice(metas map[ulid.ULID]*block.Meta) []*block.Meta { 660 var out []*block.Meta 661 for _, m := range metas { 662 out = append(out, m) 663 } 664 return out 665 }