github.com/thanos-io/thanos@v0.32.5/pkg/dedup/chunk_iter_test.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package dedup 5 6 import ( 7 "testing" 8 9 "github.com/prometheus/prometheus/model/labels" 10 "github.com/prometheus/prometheus/storage" 11 "github.com/prometheus/prometheus/tsdb/chunkenc" 12 "github.com/prometheus/prometheus/tsdb/chunks" 13 "github.com/prometheus/prometheus/tsdb/tsdbutil" 14 15 "github.com/efficientgo/core/testutil" 16 17 "github.com/thanos-io/thanos/pkg/compact/downsample" 18 ) 19 20 func TestDedupChunkSeriesMerger(t *testing.T) { 21 m := NewChunkSeriesMerger() 22 23 for _, tc := range []struct { 24 name string 25 input []storage.ChunkSeries 26 expected storage.ChunkSeries 27 }{ 28 { 29 name: "single empty series", 30 input: []storage.ChunkSeries{ 31 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil), 32 }, 33 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil), 34 }, 35 { 36 name: "single series", 37 input: []storage.ChunkSeries{ 38 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}}), 39 }, 40 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}}), 41 }, 42 { 43 name: "two empty series", 44 input: []storage.ChunkSeries{ 45 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil), 46 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil), 47 }, 48 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil), 49 }, 50 { 51 name: "two non overlapping", 52 input: []storage.ChunkSeries{ 53 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{5, 5}}), 54 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}), 55 }, 56 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{5, 5}}, []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}), 57 }, 58 { 59 name: "two overlapping", 60 input: []storage.ChunkSeries{ 61 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{8, 8}}), 62 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}), 63 }, 64 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{8, 8}}, []tsdbutil.Sample{sample{10, 10}}), 65 }, 66 { 67 name: "two overlapping with large time diff", 68 input: []storage.ChunkSeries{ 69 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{2, 2}, sample{5008, 5008}}), 70 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}), 71 }, 72 // sample{5008, 5008} is added to the result due to its large timestamp. 73 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{5008, 5008}}), 74 }, 75 { 76 name: "two duplicated", 77 input: []storage.ChunkSeries{ 78 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}), 79 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{5, 5}}), 80 }, 81 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}), 82 }, 83 { 84 name: "three overlapping", 85 input: []storage.ChunkSeries{ 86 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}), 87 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{6, 6}}), 88 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{4, 4}}), 89 }, 90 // only samples from the last series are retained due to high penalty. 91 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{4, 4}}), 92 }, 93 { 94 name: "three in chained overlap", 95 input: []storage.ChunkSeries{ 96 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}), 97 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{4, 4}, sample{6, 66}}), 98 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{6, 6}, sample{10, 10}}), 99 }, 100 // only samples from the last series are retained due to high penalty. 101 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}), 102 }, 103 { 104 name: "three in chained overlap complex", 105 input: []storage.ChunkSeries{ 106 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{5, 5}}, []tsdbutil.Sample{sample{10, 10}, sample{15, 15}}), 107 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{20, 20}}, []tsdbutil.Sample{sample{25, 25}, sample{30, 30}}), 108 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{18, 18}, sample{26, 26}}, []tsdbutil.Sample{sample{31, 31}, sample{35, 35}}), 109 }, 110 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), 111 []tsdbutil.Sample{sample{0, 0}, sample{5, 5}}, 112 []tsdbutil.Sample{sample{31, 31}, sample{35, 35}}, 113 ), 114 }, 115 { 116 name: "110 overlapping samples", 117 input: []storage.ChunkSeries{ 118 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(0, 110)), // [0 - 110) 119 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(60, 50)), // [60 - 110) 120 }, 121 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), 122 tsdbutil.GenerateSamples(0, 110), 123 ), 124 }, 125 { 126 name: "150 overlapping samples, no chunk splitting due to penalty deduplication", 127 input: []storage.ChunkSeries{ 128 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(0, 90)), // [0 - 90) 129 storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(60, 90)), // [90 - 150) 130 }, 131 expected: storage.NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), 132 tsdbutil.GenerateSamples(0, 90), 133 ), 134 }, 135 } { 136 t.Run(tc.name, func(t *testing.T) { 137 merged := m(tc.input...) 138 testutil.Equals(t, tc.expected.Labels(), merged.Labels()) 139 actChks, actErr := storage.ExpandChunks(merged.Iterator(nil)) 140 expChks, expErr := storage.ExpandChunks(tc.expected.Iterator(nil)) 141 142 testutil.Equals(t, expErr, actErr) 143 testutil.Equals(t, expChks, actChks) 144 }) 145 } 146 } 147 148 func TestDedupChunkSeriesMergerDownsampledChunks(t *testing.T) { 149 m := NewChunkSeriesMerger() 150 151 defaultLabels := labels.FromStrings("bar", "baz") 152 emptySamples := downsample.SamplesFromTSDBSamples([]tsdbutil.Sample{}) 153 // Samples are created with step 1m. So the 5m downsampled chunk has 2 samples. 154 samples1 := downsample.SamplesFromTSDBSamples(createSamplesWithStep(0, 10, 60*1000)) 155 // Non overlapping samples with samples1. 5m downsampled chunk has 2 samples. 156 samples2 := downsample.SamplesFromTSDBSamples(createSamplesWithStep(600000, 10, 60*1000)) 157 // Overlapped with samples1. 158 samples3 := downsample.SamplesFromTSDBSamples(createSamplesWithStep(120000, 10, 60*1000)) 159 160 for _, tc := range []struct { 161 name string 162 input []storage.ChunkSeries 163 expected storage.ChunkSeries 164 }{ 165 { 166 name: "single empty series", 167 input: []storage.ChunkSeries{ 168 &storage.ChunkSeriesEntry{ 169 Lset: defaultLabels, 170 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 171 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(emptySamples, downsample.ResLevel1)...) 172 }, 173 }, 174 }, 175 expected: &storage.ChunkSeriesEntry{ 176 Lset: defaultLabels, 177 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 178 return storage.NewListChunkSeriesIterator() 179 }, 180 }, 181 }, 182 { 183 name: "single series", 184 input: []storage.ChunkSeries{ 185 &storage.ChunkSeriesEntry{ 186 Lset: defaultLabels, 187 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 188 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...) 189 }, 190 }, 191 }, 192 expected: &storage.ChunkSeriesEntry{ 193 Lset: defaultLabels, 194 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 195 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...) 196 }, 197 }, 198 }, 199 { 200 name: "two empty series", 201 input: []storage.ChunkSeries{ 202 &storage.ChunkSeriesEntry{ 203 Lset: defaultLabels, 204 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 205 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(emptySamples, downsample.ResLevel1)...) 206 }, 207 }, 208 &storage.ChunkSeriesEntry{ 209 Lset: defaultLabels, 210 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 211 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(emptySamples, downsample.ResLevel1)...) 212 }, 213 }, 214 }, 215 expected: &storage.ChunkSeriesEntry{ 216 Lset: defaultLabels, 217 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 218 return storage.NewListChunkSeriesIterator() 219 }, 220 }, 221 }, 222 { 223 name: "two non overlapping series", 224 input: []storage.ChunkSeries{ 225 &storage.ChunkSeriesEntry{ 226 Lset: defaultLabels, 227 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 228 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...) 229 }, 230 }, 231 &storage.ChunkSeriesEntry{ 232 Lset: defaultLabels, 233 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 234 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples2, downsample.ResLevel1)...) 235 }, 236 }, 237 }, 238 expected: &storage.ChunkSeriesEntry{ 239 Lset: defaultLabels, 240 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 241 return storage.NewListChunkSeriesIterator( 242 append(downsample.DownsampleRaw(samples1, downsample.ResLevel1), 243 downsample.DownsampleRaw(samples2, downsample.ResLevel1)...)...) 244 }, 245 }, 246 }, 247 { 248 // 1:1 duplicated chunks are deduplicated. 249 name: "two same series", 250 input: []storage.ChunkSeries{ 251 &storage.ChunkSeriesEntry{ 252 Lset: defaultLabels, 253 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 254 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...) 255 }, 256 }, 257 &storage.ChunkSeriesEntry{ 258 Lset: defaultLabels, 259 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 260 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...) 261 }, 262 }, 263 }, 264 expected: &storage.ChunkSeriesEntry{ 265 Lset: defaultLabels, 266 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 267 return storage.NewListChunkSeriesIterator( 268 downsample.DownsampleRaw(samples1, downsample.ResLevel1)...) 269 }, 270 }, 271 }, 272 { 273 name: "two overlapping series", 274 input: []storage.ChunkSeries{ 275 &storage.ChunkSeriesEntry{ 276 Lset: defaultLabels, 277 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 278 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples1, downsample.ResLevel1)...) 279 }, 280 }, 281 &storage.ChunkSeriesEntry{ 282 Lset: defaultLabels, 283 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 284 return storage.NewListChunkSeriesIterator(downsample.DownsampleRaw(samples3, downsample.ResLevel1)...) 285 }, 286 }, 287 }, 288 expected: &storage.ChunkSeriesEntry{ 289 Lset: defaultLabels, 290 ChunkIteratorFn: func(chunks.Iterator) chunks.Iterator { 291 samples := [][]tsdbutil.Sample{ 292 {sample{299999, 3}, sample{540000, 5}}, 293 {sample{299999, 540000}, sample{540000, 2100000}}, 294 {sample{299999, 120000}, sample{540000, 300000}}, 295 {sample{299999, 240000}, sample{540000, 540000}}, 296 {sample{299999, 240000}, sample{299999, 240000}}, 297 } 298 var chks [5]chunkenc.Chunk 299 for i, s := range samples { 300 chk, err := tsdbutil.ChunkFromSamples(s) 301 testutil.Ok(t, err) 302 chks[i] = chk.Chunk 303 } 304 return storage.NewListChunkSeriesIterator(chunks.Meta{ 305 MinTime: 299999, 306 MaxTime: 540000, 307 Chunk: downsample.EncodeAggrChunk(chks), 308 }) 309 }, 310 }, 311 }, 312 } { 313 t.Run(tc.name, func(t *testing.T) { 314 merged := m(tc.input...) 315 testutil.Equals(t, tc.expected.Labels(), merged.Labels()) 316 actChks, actErr := storage.ExpandChunks(merged.Iterator(nil)) 317 expChks, expErr := storage.ExpandChunks(tc.expected.Iterator(nil)) 318 319 testutil.Equals(t, expErr, actErr) 320 testutil.Equals(t, expChks, actChks) 321 }) 322 } 323 } 324 325 func createSamplesWithStep(start, numOfSamples, step int) []tsdbutil.Sample { 326 res := make([]tsdbutil.Sample, numOfSamples) 327 cur := start 328 for i := 0; i < numOfSamples; i++ { 329 res[i] = sample{t: int64(cur), f: float64(cur)} 330 cur += step 331 } 332 333 return res 334 }