github.com/grafana/pyroscope@v1.18.0/pkg/compactor/split_merge_compactor_test.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/split_merge_compactor_test.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"context"
    10  	"os"
    11  	"strconv"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/go-kit/log"
    17  	"github.com/grafana/dskit/services"
    18  	"github.com/grafana/dskit/test"
    19  	"github.com/oklog/ulid/v2"
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	"github.com/prometheus/client_golang/prometheus/testutil"
    22  	"github.com/prometheus/common/model"
    23  	"github.com/stretchr/testify/assert"
    24  	"github.com/stretchr/testify/require"
    25  
    26  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    27  	"github.com/grafana/pyroscope/pkg/objstore"
    28  	"github.com/grafana/pyroscope/pkg/objstore/client"
    29  	"github.com/grafana/pyroscope/pkg/objstore/providers/filesystem"
    30  	"github.com/grafana/pyroscope/pkg/phlaredb"
    31  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    32  	"github.com/grafana/pyroscope/pkg/phlaredb/sharding"
    33  )
    34  
    35  func TestMultitenantCompactor_ShouldSupportSplitAndMergeCompactor(t *testing.T) {
    36  	const (
    37  		userID     = "user-1"
    38  		numSeries  = 100
    39  		blockRange = 2 * time.Hour
    40  	)
    41  
    42  	var (
    43  		blockRangeMillis = blockRange.Milliseconds()
    44  		compactionRanges = DurationList{blockRange, 2 * blockRange, 4 * blockRange}
    45  	)
    46  
    47  	externalLabels := func(shardID string) map[string]string {
    48  		labels := map[string]string{}
    49  
    50  		if shardID != "" {
    51  			labels[sharding.CompactorShardIDLabel] = shardID
    52  		}
    53  		return labels
    54  	}
    55  
    56  	tests := map[string]struct {
    57  		numShards int
    58  		setup     func(t *testing.T, bkt objstore.Bucket) []block.Meta
    59  	}{
    60  		"overlapping blocks matching the 1st compaction range should be merged and split": {
    61  			numShards: 2,
    62  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
    63  				block1 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels(""))
    64  				block2 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels(""))
    65  
    66  				return []block.Meta{
    67  					{
    68  						MinTime: model.Time(1 * blockRangeMillis),
    69  						MaxTime: model.Time(2 * blockRangeMillis),
    70  						Compaction: block.BlockMetaCompaction{
    71  							Sources: []ulid.ULID{block1, block2},
    72  						},
    73  
    74  						Labels: map[string]string{
    75  							sharding.CompactorShardIDLabel: "1_of_2",
    76  						},
    77  					}, {
    78  						MinTime: model.Time(1 * blockRangeMillis),
    79  						MaxTime: model.Time(2 * blockRangeMillis),
    80  						Compaction: block.BlockMetaCompaction{
    81  							Sources: []ulid.ULID{block1, block2},
    82  						},
    83  
    84  						Labels: map[string]string{
    85  							sharding.CompactorShardIDLabel: "2_of_2",
    86  						},
    87  					},
    88  				}
    89  			},
    90  		},
    91  		"overlapping blocks matching the beginning of the 1st compaction range should be merged and split": {
    92  			numShards: 2,
    93  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
    94  				block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
    95  				block2 := createDBBlock(t, bkt, userID, time.Minute.Milliseconds(), (7 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
    96  
    97  				// Add another block as "most recent one" otherwise the previous blocks are not compacted
    98  				// because the most recent blocks must cover the full range to be compacted.
    99  				block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels(""))
   100  
   101  				return []block.Meta{
   102  					{
   103  						MinTime: 0,
   104  						MaxTime: model.Time((7 * time.Minute).Milliseconds()),
   105  						Compaction: block.BlockMetaCompaction{
   106  							Sources: []ulid.ULID{block1, block2},
   107  						},
   108  
   109  						Labels: map[string]string{
   110  							sharding.CompactorShardIDLabel: "1_of_2",
   111  						},
   112  					}, {
   113  						MinTime: 0,
   114  						MaxTime: model.Time((7 * time.Minute).Milliseconds()),
   115  						Compaction: block.BlockMetaCompaction{
   116  							Sources: []ulid.ULID{block1, block2},
   117  						},
   118  
   119  						Labels: map[string]string{
   120  							sharding.CompactorShardIDLabel: "2_of_2",
   121  						},
   122  					}, {
   123  						// Not compacted.
   124  						MinTime: model.Time(blockRangeMillis),
   125  						MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()),
   126  						Compaction: block.BlockMetaCompaction{
   127  							Sources: []ulid.ULID{block3},
   128  						},
   129  
   130  						Labels: map[string]string{},
   131  					},
   132  				}
   133  			},
   134  		},
   135  		"non-overlapping blocks matching the beginning of the 1st compaction range (without gaps) should be merged and split": {
   136  			numShards: 2,
   137  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
   138  				block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   139  				block2 := createDBBlock(t, bkt, userID, (5 * time.Minute).Milliseconds(), (10 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   140  
   141  				// Add another block as "most recent one" otherwise the previous blocks are not compacted
   142  				// because the most recent blocks must cover the full range to be compacted.
   143  				block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels(""))
   144  
   145  				return []block.Meta{
   146  					{
   147  						MinTime: 0,
   148  						MaxTime: model.Time((10 * time.Minute).Milliseconds()),
   149  						Compaction: block.BlockMetaCompaction{
   150  							Sources: []ulid.ULID{block1, block2},
   151  						},
   152  
   153  						Labels: map[string]string{
   154  							sharding.CompactorShardIDLabel: "1_of_2",
   155  						},
   156  					}, {
   157  						MinTime: 0,
   158  						MaxTime: model.Time((10 * time.Minute).Milliseconds()),
   159  						Compaction: block.BlockMetaCompaction{
   160  							Sources: []ulid.ULID{block1, block2},
   161  						},
   162  
   163  						Labels: map[string]string{
   164  							sharding.CompactorShardIDLabel: "2_of_2",
   165  						},
   166  					}, {
   167  						// Not compacted.
   168  						MinTime: model.Time(blockRangeMillis),
   169  						MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()),
   170  						Compaction: block.BlockMetaCompaction{
   171  							Sources: []ulid.ULID{block3},
   172  						},
   173  
   174  						Labels: map[string]string{},
   175  					},
   176  				}
   177  			},
   178  		},
   179  		"non-overlapping blocks matching the beginning of the 1st compaction range (with gaps) should be merged and split": {
   180  			numShards: 2,
   181  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
   182  				block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   183  				block2 := createDBBlock(t, bkt, userID, (7 * time.Minute).Milliseconds(), (10 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   184  
   185  				// Add another block as "most recent one" otherwise the previous blocks are not compacted
   186  				// because the most recent blocks must cover the full range to be compacted.
   187  				block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels(""))
   188  
   189  				return []block.Meta{
   190  					{
   191  						MinTime: 0,
   192  						MaxTime: model.Time((10 * time.Minute).Milliseconds()),
   193  						Compaction: block.BlockMetaCompaction{
   194  							Sources: []ulid.ULID{block1, block2},
   195  						},
   196  
   197  						Labels: map[string]string{
   198  							sharding.CompactorShardIDLabel: "1_of_2",
   199  						},
   200  					}, {
   201  						MinTime: 0,
   202  						MaxTime: model.Time((10 * time.Minute).Milliseconds()),
   203  						Compaction: block.BlockMetaCompaction{
   204  							Sources: []ulid.ULID{block1, block2},
   205  						},
   206  
   207  						Labels: map[string]string{
   208  							sharding.CompactorShardIDLabel: "2_of_2",
   209  						},
   210  					}, {
   211  						// Not compacted.
   212  						MinTime: model.Time(blockRangeMillis),
   213  						MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()),
   214  						Compaction: block.BlockMetaCompaction{
   215  							Sources: []ulid.ULID{block3},
   216  						},
   217  
   218  						Labels: map[string]string{},
   219  					},
   220  				}
   221  			},
   222  		},
   223  		"smaller compaction ranges should take precedence over larger ones, and then re-iterate in subsequent compactions of increasing ranges": {
   224  			numShards: 2,
   225  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
   226  				// Two split blocks in the 1st compaction range.
   227  				block1a := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("1_of_2"))
   228  				block1b := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("2_of_2"))
   229  
   230  				// Two non-split overlapping blocks in the 1st compaction range.
   231  				block2 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels(""))
   232  				block3 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels(""))
   233  
   234  				// Two split adjacent blocks in the 2nd compaction range.
   235  				block4a := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("1_of_2"))
   236  				block4b := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("2_of_2"))
   237  				block5a := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("1_of_2"))
   238  				block5b := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("2_of_2"))
   239  
   240  				// Two non-adjacent non-split blocks in the 1st compaction range.
   241  				block6 := createDBBlock(t, bkt, userID, 4*blockRangeMillis+1, 5*blockRangeMillis, numSeries, externalLabels(""))
   242  				block7 := createDBBlock(t, bkt, userID, 7*blockRangeMillis, 8*blockRangeMillis, numSeries, externalLabels(""))
   243  
   244  				return []block.Meta{
   245  					// The two overlapping blocks (block2, block3) have been merged and split in the 1st range,
   246  					// and then compacted with block1 in 2nd range. Finally, they've been compacted with
   247  					// block4 and block5 in the 3rd range compaction (total levels: 4).
   248  					{
   249  						MinTime: 1,
   250  						MaxTime: model.Time(4 * blockRangeMillis),
   251  						Compaction: block.BlockMetaCompaction{
   252  							Sources: []ulid.ULID{block1a, block2, block3, block4a, block5a},
   253  						},
   254  
   255  						Labels: map[string]string{
   256  							sharding.CompactorShardIDLabel: "1_of_2",
   257  						},
   258  					},
   259  					{
   260  						MinTime: 1,
   261  						MaxTime: model.Time(4 * blockRangeMillis),
   262  						Compaction: block.BlockMetaCompaction{
   263  							Sources: []ulid.ULID{block1b, block2, block3, block4b, block5b},
   264  						},
   265  
   266  						Labels: map[string]string{
   267  							sharding.CompactorShardIDLabel: "2_of_2",
   268  						},
   269  					},
   270  					// The two non-adjacent blocks block6 and block7 are split individually first and then merged
   271  					// together in the 3rd range.
   272  					{
   273  						MinTime: model.Time(4*blockRangeMillis + 1),
   274  						MaxTime: model.Time(8 * blockRangeMillis),
   275  						Compaction: block.BlockMetaCompaction{
   276  							Sources: []ulid.ULID{block6, block7},
   277  						},
   278  
   279  						Labels: map[string]string{
   280  							sharding.CompactorShardIDLabel: "1_of_2",
   281  						},
   282  					},
   283  					{
   284  						MinTime: model.Time(4*blockRangeMillis + 1),
   285  						MaxTime: model.Time(8 * blockRangeMillis),
   286  						Compaction: block.BlockMetaCompaction{
   287  							Sources: []ulid.ULID{block6, block7},
   288  						},
   289  
   290  						Labels: map[string]string{
   291  							sharding.CompactorShardIDLabel: "2_of_2",
   292  						},
   293  					},
   294  				}
   295  			},
   296  		},
   297  		"overlapping and non-overlapping blocks within the same range should be split and compacted together": {
   298  			numShards: 2,
   299  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
   300  				// Overlapping.
   301  				block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   302  				block2 := createDBBlock(t, bkt, userID, time.Minute.Milliseconds(), (7 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   303  
   304  				// Not overlapping.
   305  				block3 := createDBBlock(t, bkt, userID, time.Hour.Milliseconds(), (2 * time.Hour).Milliseconds(), numSeries, externalLabels(""))
   306  
   307  				return []block.Meta{
   308  					{
   309  						MinTime: 0,
   310  						MaxTime: model.Time((2 * time.Hour).Milliseconds()),
   311  						Compaction: block.BlockMetaCompaction{
   312  							Sources: []ulid.ULID{block1, block2, block3},
   313  						},
   314  
   315  						Labels: map[string]string{
   316  							sharding.CompactorShardIDLabel: "1_of_2",
   317  						},
   318  					}, {
   319  						MinTime: 0,
   320  						MaxTime: model.Time((2 * time.Hour).Milliseconds()),
   321  						Compaction: block.BlockMetaCompaction{
   322  							Sources: []ulid.ULID{block1, block2, block3},
   323  						},
   324  
   325  						Labels: map[string]string{
   326  							sharding.CompactorShardIDLabel: "2_of_2",
   327  						},
   328  					},
   329  				}
   330  			},
   331  		},
   332  		"should correctly handle empty blocks generated in the splitting stage": {
   333  			numShards: 2,
   334  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
   335  				// Generate a block with only 1 series. This block will be split into 1 split block only,
   336  				// because the source block only has 1 series.
   337  				block1 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, 1, externalLabels(""))
   338  
   339  				return []block.Meta{
   340  					{
   341  						MinTime: model.Time(blockRangeMillis), // Because there's only 1 sample with timestamp=maxT-1
   342  						MaxTime: model.Time(2 * blockRangeMillis),
   343  						Compaction: block.BlockMetaCompaction{
   344  							Sources: []ulid.ULID{block1},
   345  						},
   346  
   347  						Labels: map[string]string{
   348  							sharding.CompactorShardIDLabel: "1_of_2",
   349  						},
   350  					},
   351  				}
   352  			},
   353  		},
   354  		"splitting should be disabled if configured shards = 0": {
   355  			numShards: 0,
   356  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
   357  				block1 := createDBBlock(t, bkt, userID, 0, (5 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   358  				block2 := createDBBlock(t, bkt, userID, (5 * time.Minute).Milliseconds(), (10 * time.Minute).Milliseconds(), numSeries, externalLabels(""))
   359  
   360  				// Add another block as "most recent one" otherwise the previous blocks are not compacted
   361  				// because the most recent blocks must cover the full range to be compacted.
   362  				block3 := createDBBlock(t, bkt, userID, blockRangeMillis, blockRangeMillis+time.Minute.Milliseconds(), numSeries, externalLabels(""))
   363  
   364  				return []block.Meta{
   365  					// Compacted but not split.
   366  					{
   367  						MinTime: 0,
   368  						MaxTime: model.Time((10 * time.Minute).Milliseconds()),
   369  						Compaction: block.BlockMetaCompaction{
   370  							Sources: []ulid.ULID{block1, block2},
   371  						},
   372  
   373  						Labels: map[string]string{},
   374  					}, {
   375  						// Not compacted.
   376  						MinTime: model.Time(blockRangeMillis),
   377  						MaxTime: model.Time(blockRangeMillis + time.Minute.Milliseconds()),
   378  						Compaction: block.BlockMetaCompaction{
   379  							Sources: []ulid.ULID{block3},
   380  						},
   381  
   382  						Labels: map[string]string{},
   383  					},
   384  				}
   385  			},
   386  		},
   387  		"splitting should be disabled but already split blocks should be merged correctly (respecting the shard) if configured shards = 0": {
   388  			numShards: 0,
   389  			setup: func(t *testing.T, bkt objstore.Bucket) []block.Meta {
   390  				// Two split blocks in the 1st compaction range.
   391  				block1a := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("1_of_2"))
   392  				block1b := createDBBlock(t, bkt, userID, 1, blockRangeMillis, numSeries, externalLabels("2_of_2"))
   393  
   394  				// Two non-split overlapping blocks in the 1st compaction range.
   395  				block2 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels(""))
   396  				block3 := createDBBlock(t, bkt, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, externalLabels(""))
   397  
   398  				// // Two split adjacent blocks in the 2nd compaction range.
   399  				block4a := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("1_of_2"))
   400  				block4b := createDBBlock(t, bkt, userID, 2*blockRangeMillis, 3*blockRangeMillis, numSeries, externalLabels("2_of_2"))
   401  				block5a := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("1_of_2"))
   402  				block5b := createDBBlock(t, bkt, userID, 3*blockRangeMillis, 4*blockRangeMillis, numSeries, externalLabels("2_of_2"))
   403  
   404  				// Two non-adjacent non-split blocks in the 1st compaction range.
   405  				block6 := createDBBlock(t, bkt, userID, 4*blockRangeMillis+1, 5*blockRangeMillis, numSeries, externalLabels(""))
   406  				block7 := createDBBlock(t, bkt, userID, 7*blockRangeMillis, 8*blockRangeMillis, numSeries, externalLabels(""))
   407  
   408  				return []block.Meta{
   409  					// Block1 have been compacted with block4 and block5 in the 3rd range compaction.
   410  					{
   411  						MinTime: 1,
   412  						MaxTime: model.Time(4 * blockRangeMillis),
   413  						Compaction: block.BlockMetaCompaction{
   414  							Sources: []ulid.ULID{block1a, block4a, block5a},
   415  						},
   416  
   417  						Labels: map[string]string{
   418  							sharding.CompactorShardIDLabel: "1_of_2",
   419  						},
   420  					},
   421  					{
   422  						MinTime: 1,
   423  						MaxTime: model.Time(4 * blockRangeMillis),
   424  						Compaction: block.BlockMetaCompaction{
   425  							Sources: []ulid.ULID{block1b, block4b, block5b},
   426  						},
   427  
   428  						Labels: map[string]string{
   429  							sharding.CompactorShardIDLabel: "2_of_2",
   430  						},
   431  					},
   432  					// The two overlapping blocks (block2, block3) have been merged in the 1st range.
   433  					{
   434  						MinTime: model.Time(blockRangeMillis),
   435  						MaxTime: model.Time(2 * blockRangeMillis),
   436  						Compaction: block.BlockMetaCompaction{
   437  							Sources: []ulid.ULID{block2, block3},
   438  						},
   439  
   440  						Labels: map[string]string{},
   441  					},
   442  					// The two non-adjacent blocks block6 and block7 are merged together in the 3rd range.
   443  					{
   444  						MinTime: model.Time(4*blockRangeMillis) + 1,
   445  						MaxTime: model.Time(8 * blockRangeMillis),
   446  						Compaction: block.BlockMetaCompaction{
   447  							Sources: []ulid.ULID{block6, block7},
   448  						},
   449  
   450  						Labels: map[string]string{},
   451  					},
   452  				}
   453  			},
   454  		},
   455  	}
   456  
   457  	for testName, testData := range tests {
   458  		t.Run(testName, func(t *testing.T) {
   459  			workDir := t.TempDir()
   460  			storageDir := t.TempDir()
   461  			fetcherDir := t.TempDir()
   462  
   463  			storageCfg := client.Config{
   464  				StorageBackendConfig: client.StorageBackendConfig{
   465  					Backend: client.Filesystem,
   466  					Filesystem: filesystem.Config{
   467  						Directory: storageDir,
   468  					},
   469  				},
   470  			}
   471  
   472  			compactorCfg := prepareConfig(t)
   473  			compactorCfg.DataDir = workDir
   474  			compactorCfg.BlockRanges = compactionRanges
   475  
   476  			cfgProvider := newMockConfigProvider()
   477  			cfgProvider.splitAndMergeShards[userID] = testData.numShards
   478  
   479  			logger := log.NewLogfmtLogger(os.Stdout)
   480  			reg := prometheus.NewPedanticRegistry()
   481  			ctx := context.Background()
   482  
   483  			// Create TSDB blocks in the storage and get the expected blocks.
   484  			bkt, err := client.NewBucket(context.Background(), storageCfg, "test")
   485  			require.NoError(t, err)
   486  
   487  			defer bkt.Close()
   488  			expected := testData.setup(t, bkt)
   489  
   490  			c, err := NewMultitenantCompactor(compactorCfg, bkt, cfgProvider, logger, reg)
   491  			require.NoError(t, err)
   492  			require.NoError(t, services.StartAndAwaitRunning(context.Background(), c))
   493  			t.Cleanup(func() {
   494  				require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c))
   495  			})
   496  
   497  			// Wait until the first compaction run completed.
   498  			test.Poll(t, 15*time.Second, nil, func() interface{} {
   499  				return testutil.GatherAndCompare(reg, strings.NewReader(`
   500  					# HELP pyroscope_compactor_runs_completed_total Total number of compaction runs successfully completed.
   501  					# TYPE pyroscope_compactor_runs_completed_total counter
   502  					pyroscope_compactor_runs_completed_total 1
   503  				`), "pyroscope_compactor_runs_completed_total")
   504  			})
   505  
   506  			// List back any (non deleted) block from the storage.
   507  			userBucket := objstore.NewTenantBucketClient(userID, bkt, nil)
   508  			fetcher, err := block.NewMetaFetcher(logger,
   509  				1,
   510  				userBucket,
   511  				fetcherDir,
   512  				reg,
   513  				nil,
   514  			)
   515  			require.NoError(t, err)
   516  			metas, partials, err := fetcher.FetchWithoutMarkedForDeletion(ctx)
   517  			require.NoError(t, err)
   518  			require.Empty(t, partials)
   519  
   520  			// Sort blocks by MinTime and labels so that we get a stable comparison.
   521  			actual := sortMetasByMinTime(convertMetasMapToSlice(metas))
   522  
   523  			// Compare actual blocks with the expected ones.
   524  			require.Len(t, actual, len(expected))
   525  			for i, e := range expected {
   526  				delete(actual[i].Labels, block.HostnameLabel)
   527  				assert.Equal(t, e.MinTime, actual[i].MinTime)
   528  				assert.Equal(t, e.MaxTime, actual[i].MaxTime)
   529  				assert.Equal(t, e.Compaction.Sources, actual[i].Compaction.Sources)
   530  				assert.Equal(t, e.Labels, actual[i].Labels)
   531  			}
   532  		})
   533  	}
   534  }
   535  
   536  func TestMultitenantCompactor_ShouldGuaranteeSeriesShardingConsistencyOverTheTime(t *testing.T) {
   537  	const (
   538  		userID     = "user-1"
   539  		numSeries  = 100
   540  		blockRange = 2 * time.Hour
   541  		numShards  = 2
   542  	)
   543  
   544  	var (
   545  		blockRangeMillis = blockRange.Milliseconds()
   546  		compactionRanges = DurationList{blockRange}
   547  
   548  		// You should NEVER CHANGE the expected series here, otherwise it means you're introducing
   549  		// a backward incompatible change.
   550  		expectedSeriesIDByShard = map[string][]int{
   551  			"1_of_2": {0, 1, 3, 4, 5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 24, 25, 27, 31, 36, 37, 38, 40, 42, 45, 47, 50, 51, 52, 53, 54, 55, 57, 59, 60, 61, 63, 68, 70, 71, 72, 74, 77, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 91, 92, 94, 98, 100},
   552  			"2_of_2": {2, 8, 9, 10, 13, 14, 22, 23, 26, 28, 29, 30, 32, 33, 34, 35, 39, 41, 43, 44, 46, 48, 49, 56, 58, 62, 64, 65, 66, 67, 69, 73, 75, 76, 78, 87, 93, 95, 96, 97, 99},
   553  		}
   554  	)
   555  
   556  	workDir := t.TempDir()
   557  	storageDir := t.TempDir()
   558  	fetcherDir := t.TempDir()
   559  
   560  	storageCfg := client.Config{
   561  		StorageBackendConfig: client.StorageBackendConfig{
   562  			Backend: client.Filesystem,
   563  			Filesystem: filesystem.Config{
   564  				Directory: storageDir,
   565  			},
   566  		},
   567  	}
   568  
   569  	compactorCfg := prepareConfig(t)
   570  	compactorCfg.DataDir = workDir
   571  	compactorCfg.BlockRanges = compactionRanges
   572  
   573  	cfgProvider := newMockConfigProvider()
   574  	cfgProvider.splitAndMergeShards[userID] = numShards
   575  
   576  	logger := log.NewLogfmtLogger(os.Stdout)
   577  	reg := prometheus.NewPedanticRegistry()
   578  	ctx := context.Background()
   579  
   580  	bucketClient, err := client.NewBucket(ctx, storageCfg, "test")
   581  	require.NoError(t, err)
   582  
   583  	// Create a TSDB block in the storage.
   584  	blockID := createDBBlock(t, bucketClient, userID, blockRangeMillis, 2*blockRangeMillis, numSeries, nil)
   585  
   586  	c, err := NewMultitenantCompactor(compactorCfg, bucketClient, cfgProvider, logger, reg)
   587  	require.NoError(t, err)
   588  	require.NoError(t, services.StartAndAwaitRunning(context.Background(), c))
   589  	t.Cleanup(func() {
   590  		require.NoError(t, services.StopAndAwaitTerminated(context.Background(), c))
   591  	})
   592  
   593  	// Wait until the first compaction run completed.
   594  	test.Poll(t, 15*time.Second, nil, func() interface{} {
   595  		return testutil.GatherAndCompare(reg, strings.NewReader(`
   596  					# HELP pyroscope_compactor_runs_completed_total Total number of compaction runs successfully completed.
   597  					# TYPE pyroscope_compactor_runs_completed_total counter
   598  					pyroscope_compactor_runs_completed_total 1
   599  				`), "pyroscope_compactor_runs_completed_total")
   600  	})
   601  
   602  	// List back any (non deleted) block from the storage.
   603  	userBucket := objstore.NewTenantBucketClient(userID, bucketClient, nil)
   604  	fetcher, err := block.NewMetaFetcher(logger,
   605  		1,
   606  		userBucket,
   607  		fetcherDir,
   608  		reg,
   609  		nil,
   610  	)
   611  	require.NoError(t, err)
   612  	metas, partials, err := fetcher.FetchWithoutMarkedForDeletion(ctx)
   613  	require.NoError(t, err)
   614  	require.Empty(t, partials)
   615  
   616  	// Sort blocks by MinTime and labels so that we get a stable comparison.
   617  	actualMetas := sortMetasByMinTime(convertMetasMapToSlice(metas))
   618  
   619  	// Ensure the input block has been split.
   620  	require.Len(t, actualMetas, numShards)
   621  	for idx, actualMeta := range actualMetas {
   622  		assert.Equal(t, model.Time(blockRangeMillis), actualMeta.MinTime)
   623  		assert.Equal(t, model.Time(2*blockRangeMillis), actualMeta.MaxTime)
   624  		assert.Equal(t, []ulid.ULID{blockID}, actualMeta.Compaction.Sources)
   625  		assert.Equal(t, sharding.FormatShardIDLabelValue(uint64(idx), numShards), actualMeta.Labels[sharding.CompactorShardIDLabel])
   626  	}
   627  
   628  	// Ensure each split block contains the right series, based on a series labels
   629  	// hashing function which doesn't change over time.
   630  	for _, actualMeta := range actualMetas {
   631  		expectedSeriesIDs := expectedSeriesIDByShard[actualMeta.Labels[sharding.CompactorShardIDLabel]]
   632  
   633  		b := phlaredb.NewSingleBlockQuerierFromMeta(ctx, userBucket, actualMeta)
   634  		require.NoError(t, b.Open(ctx))
   635  		indexReader := b.Index()
   636  
   637  		// Find all series in the block.
   638  		postings, err := indexReader.Postings("series_id", nil)
   639  		require.NoError(t, err)
   640  
   641  		lbls := make(phlaremodel.Labels, 0, 6)
   642  
   643  		for postings.Next() {
   644  			_, err := indexReader.Series(postings.At(), &lbls, nil)
   645  			// Symbolize the series labels.
   646  			require.NoError(t, err)
   647  
   648  			// Ensure the series below to the right shard.
   649  			seriesLabels := lbls.Clone()
   650  			seriesID, err := strconv.Atoi(seriesLabels.Get("series_id"))
   651  			require.NoError(t, err)
   652  			assert.Contains(t, expectedSeriesIDs, seriesID, "series:", seriesLabels.ToPrometheusLabels().String())
   653  		}
   654  
   655  		require.NoError(t, postings.Err())
   656  	}
   657  }
   658  
   659  func convertMetasMapToSlice(metas map[ulid.ULID]*block.Meta) []*block.Meta {
   660  	var out []*block.Meta
   661  	for _, m := range metas {
   662  		out = append(out, m)
   663  	}
   664  	return out
   665  }