github.com/grafana/pyroscope@v1.18.0/pkg/compactor/split_merge_grouper_test.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/grafana/mimir/blob/main/pkg/compactor/split_merge_grouper_test.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package compactor
     7  
     8  import (
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/oklog/ulid/v2"
    13  	"github.com/prometheus/common/model"
    14  	"github.com/stretchr/testify/assert"
    15  
    16  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    17  	"github.com/grafana/pyroscope/pkg/phlaredb/sharding"
    18  )
    19  
    20  func TestPlanCompaction(t *testing.T) {
    21  	const userID = "user-1"
    22  
    23  	block1 := ulid.MustNew(1, nil)   // Hash: 283204220
    24  	block2 := ulid.MustNew(2, nil)   // Hash: 444110359
    25  	block3 := ulid.MustNew(3, nil)   // Hash: 3253786510
    26  	block4 := ulid.MustNew(4, nil)   // Hash: 122298081
    27  	block5 := ulid.MustNew(5, nil)   // Hash: 2931974232
    28  	block6 := ulid.MustNew(6, nil)   // Hash: 3092880371
    29  	block7 := ulid.MustNew(7, nil)   // Hash: 1607589226
    30  	block8 := ulid.MustNew(8, nil)   // Hash: 2771068093
    31  	block9 := ulid.MustNew(9, nil)   // Hash: 1285776948
    32  	block10 := ulid.MustNew(10, nil) // Hash: 1446683087
    33  
    34  	tests := map[string]struct {
    35  		ranges      []int64
    36  		shardCount  uint32
    37  		splitGroups uint32
    38  		blocks      []*block.Meta
    39  		expected    []*job
    40  	}{
    41  		"no input blocks": {
    42  			ranges:   []int64{20},
    43  			blocks:   nil,
    44  			expected: nil,
    45  		},
    46  		"should split a single block if == smallest compaction range": {
    47  			ranges:     []int64{20, 40},
    48  			shardCount: 1,
    49  			blocks: []*block.Meta{
    50  				{ULID: block1, MinTime: 0, MaxTime: 20},
    51  			},
    52  			expected: []*job{
    53  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
    54  					rangeStart: 0,
    55  					rangeEnd:   20,
    56  					blocks: []*block.Meta{
    57  						{ULID: block1, MinTime: 0, MaxTime: 20},
    58  					},
    59  				}},
    60  			},
    61  		},
    62  		"should split a single block if < smallest compaction range": {
    63  			ranges:     []int64{20, 40},
    64  			shardCount: 1,
    65  			blocks: []*block.Meta{
    66  				{ULID: block1, MinTime: 10, MaxTime: 20},
    67  			},
    68  			expected: []*job{
    69  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
    70  					rangeStart: 0,
    71  					rangeEnd:   20,
    72  					blocks: []*block.Meta{
    73  						{ULID: block1, MinTime: 10, MaxTime: 20},
    74  					},
    75  				}},
    76  			},
    77  		},
    78  		"should NOT split a single block if == smallest compaction range but configured shards = 0": {
    79  			ranges:     []int64{20, 40},
    80  			shardCount: 0,
    81  			blocks: []*block.Meta{
    82  				{ULID: block1, MinTime: 0, MaxTime: 20},
    83  			},
    84  			expected: []*job{},
    85  		},
    86  		"should merge and split multiple 1st level blocks within the same time range": {
    87  			ranges:     []int64{10, 20},
    88  			shardCount: 1,
    89  			blocks: []*block.Meta{
    90  				{ULID: block1, MinTime: 10, MaxTime: 20},
    91  				{ULID: block2, MinTime: 10, MaxTime: 20},
    92  			},
    93  			expected: []*job{
    94  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
    95  					rangeStart: 10,
    96  					rangeEnd:   20,
    97  					blocks: []*block.Meta{
    98  						{ULID: block1, MinTime: 10, MaxTime: 20},
    99  						{ULID: block2, MinTime: 10, MaxTime: 20},
   100  					},
   101  				}},
   102  			},
   103  		},
   104  		"should merge and split multiple 1st level blocks in different time ranges": {
   105  			ranges:     []int64{10, 20},
   106  			shardCount: 1,
   107  			blocks: []*block.Meta{
   108  				// 1st level range [0, 10]
   109  				{ULID: block1, MinTime: 0, MaxTime: 10},
   110  				{ULID: block2, MinTime: 0, MaxTime: 10},
   111  				// 1st level range [10, 20]
   112  				{ULID: block3, MinTime: 11, MaxTime: 20},
   113  				{ULID: block4, MinTime: 11, MaxTime: 20},
   114  			},
   115  			expected: []*job{
   116  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   117  					rangeStart: 0,
   118  					rangeEnd:   10,
   119  					blocks: []*block.Meta{
   120  						{ULID: block1, MinTime: 0, MaxTime: 10},
   121  						{ULID: block2, MinTime: 0, MaxTime: 10},
   122  					},
   123  				}},
   124  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   125  					rangeStart: 10,
   126  					rangeEnd:   20,
   127  					blocks: []*block.Meta{
   128  						{ULID: block3, MinTime: 11, MaxTime: 20},
   129  						{ULID: block4, MinTime: 11, MaxTime: 20},
   130  					},
   131  				}},
   132  			},
   133  		},
   134  		"should merge and split multiple 1st level blocks in different time ranges, single split group": {
   135  			ranges:      []int64{10, 20},
   136  			shardCount:  2,
   137  			splitGroups: 1,
   138  			blocks: []*block.Meta{
   139  				// 1st level range [0, 10]
   140  				{ULID: block1, MinTime: 0, MaxTime: 10},
   141  				{ULID: block2, MinTime: 0, MaxTime: 10},
   142  				// 1st level range [10, 20]
   143  				{ULID: block3, MinTime: 11, MaxTime: 20},
   144  				{ULID: block4, MinTime: 11, MaxTime: 20},
   145  			},
   146  			expected: []*job{
   147  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   148  					rangeStart: 0,
   149  					rangeEnd:   10,
   150  					blocks: []*block.Meta{
   151  						{ULID: block1, MinTime: 0, MaxTime: 10},
   152  						{ULID: block2, MinTime: 0, MaxTime: 10},
   153  					},
   154  				}},
   155  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   156  					rangeStart: 10,
   157  					rangeEnd:   20,
   158  					blocks: []*block.Meta{
   159  						{ULID: block3, MinTime: 11, MaxTime: 20},
   160  						{ULID: block4, MinTime: 11, MaxTime: 20},
   161  					},
   162  				}},
   163  			},
   164  		},
   165  		"should merge and split multiple 1st level blocks in different time ranges, two split groups": {
   166  			ranges:      []int64{10, 20},
   167  			shardCount:  2,
   168  			splitGroups: 2,
   169  			blocks: []*block.Meta{
   170  				// 1st level range [0, 10]
   171  				{ULID: block1, MinTime: 0, MaxTime: 10},
   172  				{ULID: block2, MinTime: 0, MaxTime: 10},
   173  				// 1st level range [10, 20]
   174  				{ULID: block3, MinTime: 11, MaxTime: 20},
   175  				{ULID: block4, MinTime: 11, MaxTime: 20},
   176  			},
   177  			expected: []*job{
   178  				{userID: userID, stage: stageSplit, shardID: "1_of_2", blocksGroup: blocksGroup{
   179  					rangeStart: 0,
   180  					rangeEnd:   10,
   181  					blocks: []*block.Meta{
   182  						{ULID: block1, MinTime: 0, MaxTime: 10},
   183  					},
   184  				}},
   185  				{userID: userID, stage: stageSplit, shardID: "2_of_2", blocksGroup: blocksGroup{
   186  					rangeStart: 0,
   187  					rangeEnd:   10,
   188  					blocks: []*block.Meta{
   189  						{ULID: block2, MinTime: 0, MaxTime: 10},
   190  					},
   191  				}},
   192  				{userID: userID, stage: stageSplit, shardID: "1_of_2", blocksGroup: blocksGroup{
   193  					rangeStart: 10,
   194  					rangeEnd:   20,
   195  					blocks: []*block.Meta{
   196  						{ULID: block3, MinTime: 11, MaxTime: 20},
   197  					},
   198  				}},
   199  				{userID: userID, stage: stageSplit, shardID: "2_of_2", blocksGroup: blocksGroup{
   200  					rangeStart: 10,
   201  					rangeEnd:   20,
   202  					blocks: []*block.Meta{
   203  						{ULID: block4, MinTime: 11, MaxTime: 20},
   204  					},
   205  				}},
   206  			},
   207  		},
   208  		"should merge but NOT split multiple 1st level blocks in different time ranges if configured shards = 0": {
   209  			ranges:     []int64{10, 20},
   210  			shardCount: 0,
   211  			blocks: []*block.Meta{
   212  				// 1st level range [0, 10]
   213  				{ULID: block1, MinTime: 0, MaxTime: 10},
   214  				{ULID: block2, MinTime: 0, MaxTime: 10},
   215  				// 1st level range [10, 20]
   216  				{ULID: block3, MinTime: 11, MaxTime: 20},
   217  				{ULID: block4, MinTime: 11, MaxTime: 20},
   218  			},
   219  			expected: []*job{
   220  				{userID: userID, stage: stageMerge, blocksGroup: blocksGroup{
   221  					rangeStart: 0,
   222  					rangeEnd:   10,
   223  					blocks: []*block.Meta{
   224  						{ULID: block1, MinTime: 0, MaxTime: 10},
   225  						{ULID: block2, MinTime: 0, MaxTime: 10},
   226  					},
   227  				}},
   228  				{userID: userID, stage: stageMerge, blocksGroup: blocksGroup{
   229  					rangeStart: 10,
   230  					rangeEnd:   20,
   231  					blocks: []*block.Meta{
   232  						{ULID: block3, MinTime: 11, MaxTime: 20},
   233  						{ULID: block4, MinTime: 11, MaxTime: 20},
   234  					},
   235  				}},
   236  			},
   237  		},
   238  		"should merge split blocks that can be compacted on the 2nd range only": {
   239  			ranges:     []int64{10, 20},
   240  			shardCount: 2,
   241  			blocks: []*block.Meta{
   242  				// 2nd level range [0, 20]
   243  				{ULID: block1, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   244  				{ULID: block2, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   245  				{ULID: block3, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   246  				{ULID: block4, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   247  				// 2nd level range [20, 40]
   248  				{ULID: block5, MinTime: 21, MaxTime: 30, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   249  				{ULID: block6, MinTime: 30, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   250  			},
   251  			expected: []*job{
   252  				{userID: userID, stage: stageMerge, shardID: "1_of_2", blocksGroup: blocksGroup{
   253  					rangeStart: 0,
   254  					rangeEnd:   20,
   255  					blocks: []*block.Meta{
   256  						{ULID: block1, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   257  						{ULID: block2, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   258  					},
   259  				}},
   260  				{userID: userID, stage: stageMerge, shardID: "2_of_2", blocksGroup: blocksGroup{
   261  					rangeStart: 0,
   262  					rangeEnd:   20,
   263  					blocks: []*block.Meta{
   264  						{ULID: block3, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   265  						{ULID: block4, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   266  					},
   267  				}},
   268  				{userID: userID, stage: stageMerge, shardID: "1_of_2", blocksGroup: blocksGroup{
   269  					rangeStart: 20,
   270  					rangeEnd:   40,
   271  					blocks: []*block.Meta{
   272  						{ULID: block5, MinTime: 21, MaxTime: 30, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   273  						{ULID: block6, MinTime: 30, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   274  					},
   275  				}},
   276  			},
   277  		},
   278  		"should not split non-split blocks if they're > smallest compaction range (do not split historical blocks after enabling splitting)": {
   279  			ranges:     []int64{10, 20},
   280  			shardCount: 2,
   281  			blocks: []*block.Meta{
   282  				// 2nd level range [0, 20]
   283  				{ULID: block1, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   284  				{ULID: block2, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   285  				{ULID: block3, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   286  				{ULID: block4, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   287  				// 2nd level range [20, 40]
   288  				{ULID: block5, MinTime: 21, MaxTime: 40},
   289  				{ULID: block6, MinTime: 21, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   290  			},
   291  			expected: []*job{
   292  				{userID: userID, stage: stageMerge, shardID: "1_of_2", blocksGroup: blocksGroup{
   293  					rangeStart: 0,
   294  					rangeEnd:   20,
   295  					blocks: []*block.Meta{
   296  						{ULID: block1, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   297  						{ULID: block2, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   298  					},
   299  				}},
   300  				{userID: userID, stage: stageMerge, shardID: "2_of_2", blocksGroup: blocksGroup{
   301  					rangeStart: 0,
   302  					rangeEnd:   20,
   303  					blocks: []*block.Meta{
   304  						{ULID: block3, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   305  						{ULID: block4, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   306  					},
   307  				}},
   308  			},
   309  		},
   310  		"input blocks can be compacted on a mix of 1st and 2nd ranges, guaranteeing no overlaps and giving preference to smaller ranges": {
   311  			ranges:     []int64{10, 20},
   312  			shardCount: 1,
   313  			blocks: []*block.Meta{
   314  				// To be split on 1st level range [0, 10]
   315  				{ULID: block1, MinTime: 0, MaxTime: 10},
   316  				{ULID: block2, MinTime: 7, MaxTime: 10},
   317  				// Not compacted because on 2nd level because the range [0, 20]
   318  				// has other 1st level range groups to be split first
   319  				{ULID: block10, MinTime: 0, MaxTime: 10, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   320  				{ULID: block3, MinTime: 10, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   321  				// To be compacted on 2nd level range [20, 40]
   322  				{ULID: block4, MinTime: 21, MaxTime: 30, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   323  				{ULID: block5, MinTime: 30, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   324  				// Already compacted on 2nd level range [40, 60]
   325  				{ULID: block6, MinTime: 41, MaxTime: 60, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   326  				// Not compacted on 2nd level because the range [60, 80]
   327  				// has other 1st level range groups to be compacted first
   328  				{ULID: block7, MinTime: 61, MaxTime: 70, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   329  				// To be compacted on 1st level range [70, 80]
   330  				{ULID: block8, MinTime: 71, MaxTime: 80, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   331  				{ULID: block9, MinTime: 75, MaxTime: 80, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   332  			},
   333  			expected: []*job{
   334  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   335  					rangeStart: 0,
   336  					rangeEnd:   10,
   337  					blocks: []*block.Meta{
   338  						{ULID: block1, MinTime: 0, MaxTime: 10},
   339  						{ULID: block2, MinTime: 7, MaxTime: 10},
   340  					},
   341  				}},
   342  				{userID: userID, stage: stageMerge, shardID: "1_of_1", blocksGroup: blocksGroup{
   343  					rangeStart: 70,
   344  					rangeEnd:   80,
   345  					blocks: []*block.Meta{
   346  						{ULID: block8, MinTime: 71, MaxTime: 80, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   347  						{ULID: block9, MinTime: 75, MaxTime: 80, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   348  					},
   349  				}},
   350  				{userID: userID, stage: stageMerge, shardID: "1_of_1", blocksGroup: blocksGroup{
   351  					rangeStart: 20,
   352  					rangeEnd:   40,
   353  					blocks: []*block.Meta{
   354  						{ULID: block4, MinTime: 21, MaxTime: 30, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   355  						{ULID: block5, MinTime: 30, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   356  					},
   357  				}},
   358  			},
   359  		},
   360  		"input blocks have already been compacted with the largest range": {
   361  			ranges:     []int64{10, 20, 40},
   362  			shardCount: 1,
   363  			blocks: []*block.Meta{
   364  				{ULID: block1, MinTime: 0, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   365  				{ULID: block2, MinTime: 40, MaxTime: 70, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   366  				{ULID: block3, MinTime: 80, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   367  			},
   368  			expected: nil,
   369  		},
   370  		"input blocks match the largest range but can be compacted because overlapping": {
   371  			ranges:     []int64{10, 20, 40},
   372  			shardCount: 1,
   373  			blocks: []*block.Meta{
   374  				{ULID: block1, MinTime: 0, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   375  				{ULID: block2, MinTime: 40, MaxTime: 70, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   376  				{ULID: block3, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   377  				{ULID: block4, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   378  			},
   379  			expected: []*job{
   380  				{userID: userID, stage: stageMerge, shardID: "1_of_1", blocksGroup: blocksGroup{
   381  					rangeStart: 80,
   382  					rangeEnd:   120,
   383  					blocks: []*block.Meta{
   384  						{ULID: block3, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   385  						{ULID: block4, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   386  					},
   387  				}},
   388  			},
   389  		},
   390  		"a block with time range crossing two 1st level ranges should be NOT considered for 1st level splitting": {
   391  			ranges:     []int64{20, 40},
   392  			shardCount: 1,
   393  			blocks: []*block.Meta{
   394  				{ULID: block1, MinTime: 10, MaxTime: 20},
   395  				{ULID: block2, MinTime: 10, MaxTime: 30}, // This block spans across two 1st level ranges.
   396  				{ULID: block3, MinTime: 21, MaxTime: 30},
   397  				{ULID: block4, MinTime: 30, MaxTime: 40},
   398  			},
   399  			expected: []*job{
   400  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   401  					rangeStart: 0,
   402  					rangeEnd:   20,
   403  					blocks: []*block.Meta{
   404  						{ULID: block1, MinTime: 10, MaxTime: 20},
   405  					},
   406  				}},
   407  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   408  					rangeStart: 20,
   409  					rangeEnd:   40,
   410  					blocks: []*block.Meta{
   411  						{ULID: block3, MinTime: 21, MaxTime: 30},
   412  						{ULID: block4, MinTime: 30, MaxTime: 40},
   413  					},
   414  				}},
   415  			},
   416  		},
   417  		"a block with time range crossing two 1st level ranges should BE considered for 2nd level compaction": {
   418  			ranges:     []int64{20, 40},
   419  			shardCount: 1,
   420  			blocks: []*block.Meta{
   421  				{ULID: block1, MinTime: 0, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   422  				{ULID: block2, MinTime: 10, MaxTime: 30, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}}, // This block spans across two 1st level ranges.
   423  				{ULID: block3, MinTime: 20, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   424  			},
   425  			expected: []*job{
   426  				{userID: userID, stage: stageMerge, shardID: "1_of_1", blocksGroup: blocksGroup{
   427  					rangeStart: 0,
   428  					rangeEnd:   40,
   429  					blocks: []*block.Meta{
   430  						{ULID: block1, MinTime: 0, MaxTime: 20, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   431  						{ULID: block2, MinTime: 10, MaxTime: 30, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   432  						{ULID: block3, MinTime: 20, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   433  					},
   434  				}},
   435  			},
   436  		},
   437  		"a block with time range larger then the largest compaction range should NOT be considered for compaction": {
   438  			ranges:     []int64{10, 20, 40},
   439  			shardCount: 1,
   440  			blocks: []*block.Meta{
   441  				{ULID: block1, MinTime: 0, MaxTime: 40, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   442  				{ULID: block2, MinTime: 30, MaxTime: 150, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}}, // This block is larger then the largest compaction range.
   443  				{ULID: block3, MinTime: 40, MaxTime: 70, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   444  				{ULID: block4, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   445  				{ULID: block5, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   446  			},
   447  			expected: []*job{
   448  				{userID: userID, stage: stageMerge, shardID: "1_of_1", blocksGroup: blocksGroup{
   449  					rangeStart: 80,
   450  					rangeEnd:   120,
   451  					blocks: []*block.Meta{
   452  						{ULID: block4, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   453  						{ULID: block5, MinTime: 81, MaxTime: 120, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_1"}},
   454  					},
   455  				}},
   456  			},
   457  		},
   458  		"a range containing the most recent block shouldn't be prematurely compacted if doesn't cover the full range": {
   459  			ranges:     []int64{10, 20, 40},
   460  			shardCount: 1,
   461  			blocks: []*block.Meta{
   462  				{MinTime: 5, MaxTime: 8},
   463  				{MinTime: 7, MaxTime: 9},
   464  				{MinTime: 10, MaxTime: 12},
   465  				{MinTime: 13, MaxTime: 15},
   466  			},
   467  			expected: []*job{
   468  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   469  					rangeStart: 0,
   470  					rangeEnd:   10,
   471  					blocks: []*block.Meta{
   472  						{MinTime: 5, MaxTime: 8},
   473  						{MinTime: 7, MaxTime: 9},
   474  					},
   475  				}},
   476  			},
   477  		},
   478  		"should not merge blocks within the same time range but with different external labels": {
   479  			ranges:     []int64{10, 20},
   480  			shardCount: 1,
   481  			blocks: []*block.Meta{
   482  				{ULID: block1, MinTime: 10, MaxTime: 20},
   483  				{ULID: block2, MinTime: 10, MaxTime: 20, Labels: map[string]string{"another_group": "a"}},
   484  				{ULID: block3, MinTime: 10, MaxTime: 20, Labels: map[string]string{"another_group": "a"}},
   485  				{ULID: block4, MinTime: 10, MaxTime: 20, Labels: map[string]string{"another_group": "b"}},
   486  			},
   487  			expected: []*job{
   488  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   489  					rangeStart: 10,
   490  					rangeEnd:   20,
   491  					blocks: []*block.Meta{
   492  						{ULID: block1, MinTime: 10, MaxTime: 20},
   493  					},
   494  				}},
   495  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   496  					rangeStart: 10,
   497  					rangeEnd:   20,
   498  					blocks: []*block.Meta{
   499  						{ULID: block2, MinTime: 10, MaxTime: 20, Labels: map[string]string{"another_group": "a"}},
   500  						{ULID: block3, MinTime: 10, MaxTime: 20, Labels: map[string]string{"another_group": "a"}},
   501  					},
   502  				}},
   503  				{userID: userID, stage: stageSplit, shardID: "1_of_1", blocksGroup: blocksGroup{
   504  					rangeStart: 10,
   505  					rangeEnd:   20,
   506  					blocks: []*block.Meta{
   507  						{ULID: block4, MinTime: 10, MaxTime: 20, Labels: map[string]string{"another_group": "b"}},
   508  					},
   509  				}},
   510  			},
   511  		},
   512  	}
   513  
   514  	for testName, testData := range tests {
   515  		t.Run(testName, func(t *testing.T) {
   516  			actual := planCompaction(userID, testData.blocks, testData.ranges, testData.shardCount, testData.splitGroups)
   517  
   518  			// Print the actual jobs (useful for debugging if tests fail).
   519  			t.Logf("got %d jobs:", len(actual))
   520  			for _, job := range actual {
   521  				t.Logf("- %s", job.String())
   522  			}
   523  
   524  			assert.ElementsMatch(t, testData.expected, actual)
   525  		})
   526  	}
   527  }
   528  
   529  func TestPlanSplitting(t *testing.T) {
   530  	const userID = "user-1"
   531  
   532  	block1 := ulid.MustNew(1, nil) // Hash: 283204220
   533  	block2 := ulid.MustNew(2, nil) // Hash: 444110359
   534  	block3 := ulid.MustNew(3, nil) // Hash: 3253786510
   535  	block4 := ulid.MustNew(4, nil) // Hash: 122298081
   536  	block5 := ulid.MustNew(5, nil) // Hash: 2931974232
   537  
   538  	tests := map[string]struct {
   539  		blocks      blocksGroup
   540  		splitGroups uint32
   541  		expected    []*job
   542  	}{
   543  		"should return nil if the input group is empty": {
   544  			blocks:      blocksGroup{},
   545  			splitGroups: 2,
   546  			expected:    nil,
   547  		},
   548  		"should return nil if the input group contains no non-sharded blocks": {
   549  			blocks: blocksGroup{
   550  				rangeStart: 10,
   551  				rangeEnd:   20,
   552  				blocks: []*block.Meta{
   553  					{ULID: block1, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   554  					{ULID: block2, Labels: map[string]string{sharding.CompactorShardIDLabel: "2_of_2"}},
   555  				},
   556  			},
   557  			splitGroups: 2,
   558  			expected:    nil,
   559  		},
   560  		"should return a split job if the input group contains 1 non-sharded block": {
   561  			blocks: blocksGroup{
   562  				rangeStart: 10,
   563  				rangeEnd:   20,
   564  				blocks: []*block.Meta{
   565  					{ULID: block1, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   566  					{ULID: block2},
   567  				},
   568  			},
   569  			splitGroups: 2,
   570  			expected: []*job{
   571  				{
   572  					blocksGroup: blocksGroup{
   573  						rangeStart: 10,
   574  						rangeEnd:   20,
   575  						blocks: []*block.Meta{
   576  							{ULID: block2},
   577  						},
   578  					},
   579  					userID:  userID,
   580  					stage:   stageSplit,
   581  					shardID: "2_of_2",
   582  				},
   583  			},
   584  		},
   585  		"should splitGroups split jobs if the input group contains multiple non-sharded blocks": {
   586  			blocks: blocksGroup{
   587  				rangeStart: 10,
   588  				rangeEnd:   20,
   589  				blocks: []*block.Meta{
   590  					{ULID: block1, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   591  					{ULID: block2},
   592  					{ULID: block3},
   593  					{ULID: block4},
   594  					{ULID: block5, Labels: map[string]string{sharding.CompactorShardIDLabel: "1_of_2"}},
   595  				},
   596  			},
   597  			splitGroups: 2,
   598  			expected: []*job{
   599  				{
   600  					blocksGroup: blocksGroup{
   601  						rangeStart: 10,
   602  						rangeEnd:   20,
   603  						blocks: []*block.Meta{
   604  							{ULID: block3},
   605  						},
   606  					},
   607  					userID:  userID,
   608  					stage:   stageSplit,
   609  					shardID: "1_of_2",
   610  				}, {
   611  					blocksGroup: blocksGroup{
   612  						rangeStart: 10,
   613  						rangeEnd:   20,
   614  						blocks: []*block.Meta{
   615  							{ULID: block2},
   616  							{ULID: block4},
   617  						},
   618  					},
   619  					userID:  userID,
   620  					stage:   stageSplit,
   621  					shardID: "2_of_2",
   622  				},
   623  			},
   624  		},
   625  	}
   626  
   627  	for testName, testData := range tests {
   628  		t.Run(testName, func(t *testing.T) {
   629  			assert.ElementsMatch(t, testData.expected, planSplitting(userID, testData.blocks, testData.splitGroups))
   630  		})
   631  	}
   632  }
   633  
   634  func TestGroupBlocksByShardID(t *testing.T) {
   635  	block1 := ulid.MustNew(1, nil)
   636  	block2 := ulid.MustNew(2, nil)
   637  	block3 := ulid.MustNew(3, nil)
   638  	block4 := ulid.MustNew(4, nil)
   639  
   640  	tests := map[string]struct {
   641  		blocks   []*block.Meta
   642  		expected map[string][]*block.Meta
   643  	}{
   644  		"no input blocks": {
   645  			blocks:   nil,
   646  			expected: map[string][]*block.Meta{},
   647  		},
   648  		"only 1 block in input with shard ID": {
   649  			blocks: []*block.Meta{
   650  				{ULID: block1, Labels: map[string]string{sharding.CompactorShardIDLabel: "1"}},
   651  			},
   652  			expected: map[string][]*block.Meta{
   653  				"1": {
   654  					{ULID: block1, Labels: map[string]string{sharding.CompactorShardIDLabel: "1"}},
   655  				},
   656  			},
   657  		},
   658  		"only 1 block in input without shard ID": {
   659  			blocks: []*block.Meta{
   660  				{ULID: block1},
   661  			},
   662  			expected: map[string][]*block.Meta{
   663  				"": {
   664  					{ULID: block1},
   665  				},
   666  			},
   667  		},
   668  		"multiple blocks per shard ID": {
   669  			blocks: []*block.Meta{
   670  				{ULID: block1, Labels: map[string]string{sharding.CompactorShardIDLabel: "1"}},
   671  				{ULID: block2, Labels: map[string]string{sharding.CompactorShardIDLabel: "2"}},
   672  				{ULID: block3, Labels: map[string]string{sharding.CompactorShardIDLabel: "1"}},
   673  				{ULID: block4},
   674  			},
   675  			expected: map[string][]*block.Meta{
   676  				"": {
   677  					{ULID: block4},
   678  				},
   679  				"1": {
   680  					{ULID: block1, Labels: map[string]string{sharding.CompactorShardIDLabel: "1"}},
   681  					{ULID: block3, Labels: map[string]string{sharding.CompactorShardIDLabel: "1"}},
   682  				},
   683  				"2": {
   684  					{ULID: block2, Labels: map[string]string{sharding.CompactorShardIDLabel: "2"}},
   685  				},
   686  			},
   687  		},
   688  	}
   689  
   690  	for testName, testData := range tests {
   691  		t.Run(testName, func(t *testing.T) {
   692  			assert.Equal(t, testData.expected, groupBlocksByShardID(testData.blocks))
   693  		})
   694  	}
   695  }
   696  
   697  func TestGroupBlocksByRange(t *testing.T) {
   698  	blockRange := 2 * time.Hour.Milliseconds()
   699  	tests := map[string]struct {
   700  		timeRange int64
   701  		blocks    []*block.Meta
   702  		expected  []blocksGroup
   703  	}{
   704  		"no input blocks": {
   705  			timeRange: 20,
   706  			blocks:    nil,
   707  			expected:  nil,
   708  		},
   709  		"only 1 block in input": {
   710  			timeRange: 20,
   711  			blocks: []*block.Meta{
   712  				{MinTime: 10, MaxTime: 20},
   713  			},
   714  			expected: []blocksGroup{
   715  				{rangeStart: 0, rangeEnd: 20, blocks: []*block.Meta{
   716  					{MinTime: 10, MaxTime: 20},
   717  				}},
   718  			},
   719  		},
   720  		"block start at the end of the range": {
   721  			timeRange: 20,
   722  			blocks: []*block.Meta{
   723  				{MinTime: 10, MaxTime: 20},
   724  				{MinTime: 20, MaxTime: 40},
   725  			},
   726  			expected: []blocksGroup{
   727  				{rangeStart: 0, rangeEnd: 20, blocks: []*block.Meta{
   728  					{MinTime: 10, MaxTime: 20},
   729  				}},
   730  				{rangeStart: 20, rangeEnd: 40, blocks: []*block.Meta{
   731  					{MinTime: 20, MaxTime: 40},
   732  				}},
   733  			},
   734  		},
   735  		"only 1 block per range": {
   736  			timeRange: 20,
   737  			blocks: []*block.Meta{
   738  				{MinTime: 10, MaxTime: 15},
   739  				{MinTime: 21, MaxTime: 40},
   740  				{MinTime: 41, MaxTime: 60},
   741  			},
   742  			expected: []blocksGroup{
   743  				{rangeStart: 0, rangeEnd: 20, blocks: []*block.Meta{
   744  					{MinTime: 10, MaxTime: 15},
   745  				}},
   746  				{rangeStart: 20, rangeEnd: 40, blocks: []*block.Meta{
   747  					{MinTime: 21, MaxTime: 40},
   748  				}},
   749  				{rangeStart: 40, rangeEnd: 60, blocks: []*block.Meta{
   750  					{MinTime: 41, MaxTime: 60},
   751  				}},
   752  			},
   753  		},
   754  		"multiple blocks per range": {
   755  			timeRange: 20,
   756  			blocks: []*block.Meta{
   757  				{MinTime: 10, MaxTime: 15},
   758  				{MinTime: 10, MaxTime: 20},
   759  				{MinTime: 40, MaxTime: 60},
   760  				{MinTime: 50, MaxTime: 55},
   761  			},
   762  			expected: []blocksGroup{
   763  				{rangeStart: 0, rangeEnd: 20, blocks: []*block.Meta{
   764  					{MinTime: 10, MaxTime: 15},
   765  					{MinTime: 10, MaxTime: 20},
   766  				}},
   767  				{rangeStart: 40, rangeEnd: 60, blocks: []*block.Meta{
   768  					{MinTime: 40, MaxTime: 60},
   769  					{MinTime: 50, MaxTime: 55},
   770  				}},
   771  			},
   772  		},
   773  		"a block with time range larger then the range should be excluded": {
   774  			timeRange: 20,
   775  			blocks: []*block.Meta{
   776  				{MinTime: 0, MaxTime: 20},
   777  				{MinTime: 0, MaxTime: 40}, // This block is larger then the range.
   778  				{MinTime: 10, MaxTime: 20},
   779  				{MinTime: 21, MaxTime: 30},
   780  			},
   781  			expected: []blocksGroup{
   782  				{rangeStart: 0, rangeEnd: 20, blocks: []*block.Meta{
   783  					{MinTime: 0, MaxTime: 20},
   784  					{MinTime: 10, MaxTime: 20},
   785  				}},
   786  				{rangeStart: 20, rangeEnd: 40, blocks: []*block.Meta{
   787  					{MinTime: 21, MaxTime: 30},
   788  				}},
   789  			},
   790  		},
   791  		"blocks with different time ranges but all fitting within the input range": {
   792  			timeRange: 40,
   793  			blocks: []*block.Meta{
   794  				{MinTime: 0, MaxTime: 20},
   795  				{MinTime: 0, MaxTime: 40},
   796  				{MinTime: 10, MaxTime: 20},
   797  				{MinTime: 20, MaxTime: 30},
   798  			},
   799  			expected: []blocksGroup{
   800  				{rangeStart: 0, rangeEnd: 40, blocks: []*block.Meta{
   801  					{MinTime: 0, MaxTime: 20},
   802  					{MinTime: 0, MaxTime: 40},
   803  					{MinTime: 10, MaxTime: 20},
   804  					{MinTime: 20, MaxTime: 30},
   805  				}},
   806  			},
   807  		},
   808  		"2 different range": {
   809  			timeRange: 4 * blockRange,
   810  			blocks: []*block.Meta{
   811  				{MinTime: model.Time(blockRange), MaxTime: model.Time(2 * blockRange)},
   812  				{MinTime: model.Time(blockRange), MaxTime: model.Time(2 * blockRange)},
   813  				{MinTime: model.Time(4*blockRange) + 1, MaxTime: model.Time(5 * blockRange)},
   814  				{MinTime: model.Time(7 * blockRange), MaxTime: model.Time(8 * blockRange)},
   815  			},
   816  			expected: []blocksGroup{
   817  				{
   818  					rangeStart: 0, rangeEnd: 4 * blockRange,
   819  					blocks: []*block.Meta{
   820  						{MinTime: model.Time(blockRange), MaxTime: model.Time(2 * blockRange)},
   821  						{MinTime: model.Time(blockRange), MaxTime: model.Time(2 * blockRange)},
   822  					},
   823  				},
   824  				{
   825  					rangeStart: 4 * blockRange, rangeEnd: 8 * blockRange,
   826  					blocks: []*block.Meta{
   827  						{MinTime: model.Time(4*blockRange) + 1, MaxTime: model.Time(5 * blockRange)},
   828  						{MinTime: model.Time(7 * blockRange), MaxTime: model.Time(8 * blockRange)},
   829  					},
   830  				},
   831  			},
   832  		},
   833  	}
   834  
   835  	for testName, testData := range tests {
   836  		t.Run(testName, func(t *testing.T) {
   837  			actual := groupBlocksByRange(testData.blocks, testData.timeRange)
   838  			assert.Equal(t, testData.expected, actual)
   839  		})
   840  	}
   841  }