github.com/thanos-io/thanos@v0.32.5/pkg/dedup/chunk_iter_test.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package dedup
     5  
     6  import (
     7  	"testing"
     8  
     9  	"github.com/prometheus/prometheus/model/labels"
    10  	"github.com/prometheus/prometheus/storage"
    11  	"github.com/prometheus/prometheus/tsdb/chunkenc"
    12  	"github.com/prometheus/prometheus/tsdb/chunks"
    13  	"github.com/prometheus/prometheus/tsdb/tsdbutil"
    14  
    15  	"github.com/efficientgo/core/testutil"
    16  
    17  	"github.com/thanos-io/thanos/pkg/compact/downsample"
    18  )
    19  
    20  func TestDedupChunkSeriesMerger(t *testing.T) {
    21  	m := NewChunkSeriesMerger()
    22  
    23  	for _, tc := range []struct {
    24  		name     string
    25  		input    []storage.ChunkSeries
    26  		expected storage.ChunkSeries
    27  	}{
    28  		{
    29  			name: "single empty series",
    30  			input: []storage.ChunkSeries{
    31  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
    32  			},
    33  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
    34  		},
    35  		{
    36  			name: "single series",
    37  			input: []storage.ChunkSeries{
    38  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}}),
    39  			},
    40  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}}),
    41  		},
    42  		{
    43  			name: "two empty series",
    44  			input: []storage.ChunkSeries{
    45  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
    46  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
    47  			},
    48  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
    49  		},
    50  		{
    51  			name: "two non overlapping",
    52  			input: []storage.ChunkSeries{
    53  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{5, 5}}),
    54  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}),
    55  			},
    56  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{5, 5}}, []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}),
    57  		},
    58  		{
    59  			name: "two overlapping",
    60  			input: []storage.ChunkSeries{
    61  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{8, 8}}),
    62  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}),
    63  			},
    64  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{8, 8}}, []tsdbutil.Sample{sample{10, 10}}),
    65  		},
    66  		{
    67  			name: "two overlapping with large time diff",
    68  			input: []storage.ChunkSeries{
    69  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{2, 2}, sample{5008, 5008}}),
    70  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}),
    71  			},
    72  			// sample{5008, 5008} is added to the result due to its large timestamp.
    73  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{5008, 5008}}),
    74  		},
    75  		{
    76  			name: "two duplicated",
    77  			input: []storage.ChunkSeries{
    78  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
    79  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{5, 5}}),
    80  			},
    81  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
    82  		},
    83  		{
    84  			name: "three overlapping",
    85  			input: []storage.ChunkSeries{
    86  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
    87  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{6, 6}}),
    88  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{4, 4}}),
    89  			},
    90  			// only samples from the last series are retained due to high penalty.
    91  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{4, 4}}),
    92  		},
    93  		{
    94  			name: "three in chained overlap",
    95  			input: []storage.ChunkSeries{
    96  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
    97  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{4, 4}, sample{6, 66}}),
    98  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{6, 6}, sample{10, 10}}),
    99  			},
   100  			// only samples from the last series are retained due to high penalty.
   101  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
   102  		},
   103  		{
   104  			name: "three in chained overlap complex",
   105  			input: []storage.ChunkSeries{
   106  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{5, 5}}, []tsdbutil.Sample{sample{10, 10}, sample{15, 15}}),
   107  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{20, 20}}, []tsdbutil.Sample{sample{25, 25}, sample{30, 30}}),
   108  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{18, 18}, sample{26, 26}}, []tsdbutil.Sample{sample{31, 31}, sample{35, 35}}),
   109  			},
   110  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
   111  				[]tsdbutil.Sample{sample{0, 0}, sample{5, 5}},
   112  				[]tsdbutil.Sample{sample{31, 31}, sample{35, 35}},
   113  			),
   114  		},
   115  		{
   116  			name: "110 overlapping samples",
   117  			input: []storage.ChunkSeries{
   118  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(0, 110)), // [0 - 110)
   119  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(60, 50)), // [60 - 110)
   120  			},
   121  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
   122  				tsdbutil.GenerateSamples(0, 110),
   123  			),
   124  		},
   125  		{
   126  			name: "150 overlapping samples, no chunk splitting due to penalty deduplication",
   127  			input: []storage.ChunkSeries{
   128  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(0, 90)),  // [0 - 90)
   129  				storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(60, 90)), // [90 - 150)
   130  			},
   131  			expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
   132  				tsdbutil.GenerateSamples(0, 90),
   133  			),
   134  		},
   135  	} {
   136  		t.Run(tc.name, func(t *testing.T) {
   137  			merged := m(tc.input...)
   138  			testutil.Equals(t, tc.expected.Labels(), merged.Labels())
   139  			actChks, actErr := storage.ExpandChunks(merged.Iterator(nil))
   140  			expChks, expErr := storage.ExpandChunks(tc.expected.Iterator(nil))
   141  
   142  			testutil.Equals(t, expErr, actErr)
   143  			testutil.Equals(t, expChks, actChks)
   144  		})
   145  	}
   146  }
   147  
   148  func TestDedupChunkSeriesMergerDownsampledChunks(t *testing.T) {
   149  	m := NewChunkSeriesMerger()
   150  
   151  	defaultLabels := labels.FromStrings("bar", "baz")
   152  	emptySamples := downsample.SamplesFromTSDBSamples([]tsdbutil.Sample{})
   153  	// Samples are created with step 1m. So the 5m downsampled chunk has 2 samples.
   154  	samples1 := downsample.SamplesFromTSDBSamples(createSamplesWithStep(0, 10, 60*1000))
   155  	// Non overlapping samples with samples1. 5m downsampled chunk has 2 samples.
   156  	samples2 := downsample.SamplesFromTSDBSamples(createSamplesWithStep(600000, 10, 60*1000))
   157  	// Overlapped with samples1.
   158  	samples3 := downsample.SamplesFromTSDBSamples(createSamplesWithStep(120000, 10, 60*1000))
   159  
   160  	for _, tc := range []struct {
   161  		name     string
   162  		input    []storage.ChunkSeries
   163  		expected storage.ChunkSeries
   164  	}{
   165  		{
   166  			name: "single empty series",
   167  			input: []storage.ChunkSeries{
   168  				&storage.ChunkSeriesEntry{
   169  					Lset: defaultLabels,
   170  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   171  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(emptySamples, downsample.ResLevel1)...)
   172  					},
   173  				},
   174  			},
   175  			expected: &storage.ChunkSeriesEntry{
   176  				Lset: defaultLabels,
   177  				ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   178  					return storage.NewListChunkSeriesIterator()
   179  				},
   180  			},
   181  		},
   182  		{
   183  			name: "single series",
   184  			input: []storage.ChunkSeries{
   185  				&storage.ChunkSeriesEntry{
   186  					Lset: defaultLabels,
   187  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   188  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...)
   189  					},
   190  				},
   191  			},
   192  			expected: &storage.ChunkSeriesEntry{
   193  				Lset: defaultLabels,
   194  				ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   195  					return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...)
   196  				},
   197  			},
   198  		},
   199  		{
   200  			name: "two empty series",
   201  			input: []storage.ChunkSeries{
   202  				&storage.ChunkSeriesEntry{
   203  					Lset: defaultLabels,
   204  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   205  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(emptySamples, downsample.ResLevel1)...)
   206  					},
   207  				},
   208  				&storage.ChunkSeriesEntry{
   209  					Lset: defaultLabels,
   210  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   211  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(emptySamples, downsample.ResLevel1)...)
   212  					},
   213  				},
   214  			},
   215  			expected: &storage.ChunkSeriesEntry{
   216  				Lset: defaultLabels,
   217  				ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   218  					return storage.NewListChunkSeriesIterator()
   219  				},
   220  			},
   221  		},
   222  		{
   223  			name: "two non overlapping series",
   224  			input: []storage.ChunkSeries{
   225  				&storage.ChunkSeriesEntry{
   226  					Lset: defaultLabels,
   227  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   228  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...)
   229  					},
   230  				},
   231  				&storage.ChunkSeriesEntry{
   232  					Lset: defaultLabels,
   233  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   234  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples2, downsample.ResLevel1)...)
   235  					},
   236  				},
   237  			},
   238  			expected: &storage.ChunkSeriesEntry{
   239  				Lset: defaultLabels,
   240  				ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   241  					return storage.NewListChunkSeriesIterator(
   242  						append(downsample.DownsampleRaw(samples1, downsample.ResLevel1),
   243  							downsample.DownsampleRaw(samples2, downsample.ResLevel1)...)...)
   244  				},
   245  			},
   246  		},
   247  		{
   248  			// 1:1 duplicated chunks are deduplicated.
   249  			name: "two same series",
   250  			input: []storage.ChunkSeries{
   251  				&storage.ChunkSeriesEntry{
   252  					Lset: defaultLabels,
   253  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   254  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...)
   255  					},
   256  				},
   257  				&storage.ChunkSeriesEntry{
   258  					Lset: defaultLabels,
   259  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   260  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...)
   261  					},
   262  				},
   263  			},
   264  			expected: &storage.ChunkSeriesEntry{
   265  				Lset: defaultLabels,
   266  				ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   267  					return storage.NewListChunkSeriesIterator(
   268  						downsample.DownsampleRaw(samples1, downsample.ResLevel1)...)
   269  				},
   270  			},
   271  		},
   272  		{
   273  			name: "two overlapping series",
   274  			input: []storage.ChunkSeries{
   275  				&storage.ChunkSeriesEntry{
   276  					Lset: defaultLabels,
   277  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   278  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...)
   279  					},
   280  				},
   281  				&storage.ChunkSeriesEntry{
   282  					Lset: defaultLabels,
   283  					ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   284  						return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples3, downsample.ResLevel1)...)
   285  					},
   286  				},
   287  			},
   288  			expected: &storage.ChunkSeriesEntry{
   289  				Lset: defaultLabels,
   290  				ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator {
   291  					samples := [][]tsdbutil.Sample{
   292  						{sample{299999, 3}, sample{540000, 5}},
   293  						{sample{299999, 540000}, sample{540000, 2100000}},
   294  						{sample{299999, 120000}, sample{540000, 300000}},
   295  						{sample{299999, 240000}, sample{540000, 540000}},
   296  						{sample{299999, 240000}, sample{299999, 240000}},
   297  					}
   298  					var chks [5]chunkenc.Chunk
   299  					for i, s := range samples {
   300  						chk, err := tsdbutil.ChunkFromSamples(s)
   301  						testutil.Ok(t, err)
   302  						chks[i] = chk.Chunk
   303  					}
   304  					return storage.NewListChunkSeriesIterator(chunks.Meta{
   305  						MinTime: 299999,
   306  						MaxTime: 540000,
   307  						Chunk:   downsample.EncodeAggrChunk(chks),
   308  					})
   309  				},
   310  			},
   311  		},
   312  	} {
   313  		t.Run(tc.name, func(t *testing.T) {
   314  			merged := m(tc.input...)
   315  			testutil.Equals(t, tc.expected.Labels(), merged.Labels())
   316  			actChks, actErr := storage.ExpandChunks(merged.Iterator(nil))
   317  			expChks, expErr := storage.ExpandChunks(tc.expected.Iterator(nil))
   318  
   319  			testutil.Equals(t, expErr, actErr)
   320  			testutil.Equals(t, expChks, actChks)
   321  		})
   322  	}
   323  }
   324  
   325  func createSamplesWithStep(start, numOfSamples, step int) []tsdbutil.Sample {
   326  	res := make([]tsdbutil.Sample, numOfSamples)
   327  	cur := start
   328  	for i := 0; i < numOfSamples; i++ {
   329  		res[i] = sample{t: int64(cur), f: float64(cur)}
   330  		cur += step
   331  	}
   332  
   333  	return res
   334  }