github.com/thanos-io/thanos@v0.32.5/pkg/dedup/chunk_iter.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package dedup 5 6 import ( 7 "bytes" 8 "container/heap" 9 10 "github.com/prometheus/prometheus/storage" 11 "github.com/prometheus/prometheus/tsdb/chunkenc" 12 "github.com/prometheus/prometheus/tsdb/chunks" 13 14 "github.com/thanos-io/thanos/pkg/compact/downsample" 15 ) 16 17 // NewChunkSeriesMerger merges several chunk series into one. 18 // Deduplication is based on penalty based deduplication algorithm without handling counter reset. 19 func NewChunkSeriesMerger() storage.VerticalChunkSeriesMergeFunc { 20 return func(series ...storage.ChunkSeries) storage.ChunkSeries { 21 if len(series) == 0 { 22 return nil 23 } 24 return &storage.ChunkSeriesEntry{ 25 Lset: series[0].Labels(), 26 ChunkIteratorFn: func(iterator chunks.Iterator) chunks.Iterator { 27 iterators := make([]chunks.Iterator, 0, len(series)) 28 for _, s := range series { 29 iterators = append(iterators, s.Iterator(nil)) 30 } 31 return &dedupChunksIterator{ 32 iterators: iterators, 33 } 34 }, 35 } 36 } 37 } 38 39 type dedupChunksIterator struct { 40 iterators []chunks.Iterator 41 h chunkIteratorHeap 42 43 err error 44 curr chunks.Meta 45 } 46 47 func (d *dedupChunksIterator) At() chunks.Meta { 48 return d.curr 49 } 50 51 // Next method is almost the same as https://github.com/prometheus/prometheus/blob/v2.27.1/storage/merge.go#L615. 52 // The difference is that it handles both XOR and Aggr chunk Encoding. 53 func (d *dedupChunksIterator) Next() bool { 54 if d.h == nil { 55 for _, iter := range d.iterators { 56 if iter.Next() { 57 heap.Push(&d.h, iter) 58 } 59 } 60 } 61 if len(d.h) == 0 { 62 return false 63 } 64 65 iter := heap.Pop(&d.h).(chunks.Iterator) 66 d.curr = iter.At() 67 if iter.Next() { 68 heap.Push(&d.h, iter) 69 } 70 71 var ( 72 om = newOverlappingMerger() 73 oMaxTime = d.curr.MaxTime 74 prev = d.curr 75 ) 76 77 // Detect overlaps to compact. 78 for len(d.h) > 0 { 79 // Get the next oldest chunk by min, then max time. 80 next := d.h[0].At() 81 if next.MinTime > oMaxTime { 82 // No overlap with current one. 83 break 84 } 85 86 if next.MinTime == prev.MinTime && 87 next.MaxTime == prev.MaxTime && 88 bytes.Equal(next.Chunk.Bytes(), prev.Chunk.Bytes()) { 89 // 1:1 duplicates, skip it. 90 } else { 91 // We operate on same series, so labels does not matter here. 92 om.addChunk(next) 93 94 if next.MaxTime > oMaxTime { 95 oMaxTime = next.MaxTime 96 } 97 prev = next 98 } 99 100 iter := heap.Pop(&d.h).(chunks.Iterator) 101 if iter.Next() { 102 heap.Push(&d.h, iter) 103 } 104 } 105 if om.empty() { 106 return true 107 } 108 109 iter = om.iterator(d.curr) 110 if !iter.Next() { 111 if d.err = iter.Err(); d.err != nil { 112 return false 113 } 114 panic("unexpected seriesToChunkEncoder lack of iterations") 115 } 116 d.curr = iter.At() 117 if iter.Next() { 118 heap.Push(&d.h, iter) 119 } 120 return true 121 } 122 123 func (d *dedupChunksIterator) Err() error { 124 return d.err 125 } 126 127 type chunkIteratorHeap []chunks.Iterator 128 129 func (h chunkIteratorHeap) Len() int { return len(h) } 130 func (h chunkIteratorHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } 131 132 func (h chunkIteratorHeap) Less(i, j int) bool { 133 at := h[i].At() 134 bt := h[j].At() 135 if at.MinTime == bt.MinTime { 136 return at.MaxTime < bt.MaxTime 137 } 138 return at.MinTime < bt.MinTime 139 } 140 141 func (h *chunkIteratorHeap) Push(x interface{}) { 142 *h = append(*h, x.(chunks.Iterator)) 143 } 144 145 func (h *chunkIteratorHeap) Pop() interface{} { 146 old := *h 147 n := len(old) 148 x := old[n-1] 149 *h = old[0 : n-1] 150 return x 151 } 152 153 type overlappingMerger struct { 154 xorIterators []chunkenc.Iterator 155 aggrIterators [5][]chunkenc.Iterator 156 157 samplesMergeFunc func(a, b chunkenc.Iterator) chunkenc.Iterator 158 } 159 160 func newOverlappingMerger() *overlappingMerger { 161 return &overlappingMerger{ 162 samplesMergeFunc: func(a, b chunkenc.Iterator) chunkenc.Iterator { 163 it := noopAdjustableSeriesIterator{a} 164 return newDedupSeriesIterator(it, noopAdjustableSeriesIterator{b}) 165 }, 166 } 167 } 168 169 func (o *overlappingMerger) addChunk(chk chunks.Meta) { 170 switch chk.Chunk.Encoding() { 171 case chunkenc.EncXOR: 172 o.xorIterators = append(o.xorIterators, chk.Chunk.Iterator(nil)) 173 case downsample.ChunkEncAggr: 174 aggrChk := chk.Chunk.(*downsample.AggrChunk) 175 for i := downsample.AggrCount; i <= downsample.AggrCounter; i++ { 176 if c, err := aggrChk.Get(i); err == nil { 177 o.aggrIterators[i] = append(o.aggrIterators[i], c.Iterator(nil)) 178 } 179 } 180 } 181 } 182 183 func (o *overlappingMerger) empty() bool { 184 // OverlappingMerger only contains either xor chunk or aggr chunk. 185 // If xor chunks are present then we don't need to check aggr chunks. 186 if len(o.xorIterators) > 0 { 187 return false 188 } 189 return len(o.aggrIterators[downsample.AggrCount]) == 0 190 } 191 192 // Return a chunk iterator based on the encoding of base chunk. 193 func (o *overlappingMerger) iterator(baseChk chunks.Meta) chunks.Iterator { 194 var it chunkenc.Iterator 195 switch baseChk.Chunk.Encoding() { 196 case chunkenc.EncXOR: 197 // If XOR encoding, we need to deduplicate the samples and re-encode them to chunks. 198 return storage.NewSeriesToChunkEncoder(&storage.SeriesEntry{ 199 SampleIteratorFn: func(_ chunkenc.Iterator) chunkenc.Iterator { 200 it = baseChk.Chunk.Iterator(nil) 201 for _, i := range o.xorIterators { 202 it = o.samplesMergeFunc(it, i) 203 } 204 return it 205 }}).Iterator(nil) 206 207 case downsample.ChunkEncAggr: 208 // If Aggr encoding, each aggregated chunks need to be expanded and deduplicated, 209 // then re-encoded into Aggr chunks. 210 aggrChk := baseChk.Chunk.(*downsample.AggrChunk) 211 samplesIter := [5]chunkenc.Iterator{} 212 for i := downsample.AggrCount; i <= downsample.AggrCounter; i++ { 213 if c, err := aggrChk.Get(i); err == nil { 214 o.aggrIterators[i] = append(o.aggrIterators[i], c.Iterator(nil)) 215 } 216 217 if len(o.aggrIterators[i]) > 0 { 218 for _, j := range o.aggrIterators[i][1:] { 219 o.aggrIterators[i][0] = o.samplesMergeFunc(o.aggrIterators[i][0], j) 220 } 221 samplesIter[i] = o.aggrIterators[i][0] 222 } else { 223 samplesIter[i] = nil 224 } 225 } 226 227 return newAggrChunkIterator(samplesIter) 228 } 229 230 // Impossible for now. 231 return nil 232 } 233 234 type aggrChunkIterator struct { 235 iters [5]chunkenc.Iterator 236 curr chunks.Meta 237 countChkIter chunks.Iterator 238 239 err error 240 } 241 242 func newAggrChunkIterator(iters [5]chunkenc.Iterator) chunks.Iterator { 243 return &aggrChunkIterator{ 244 iters: iters, 245 countChkIter: storage.NewSeriesToChunkEncoder(&storage.SeriesEntry{ 246 SampleIteratorFn: func(_ chunkenc.Iterator) chunkenc.Iterator { 247 return iters[downsample.AggrCount] 248 }, 249 }).Iterator(nil), 250 } 251 } 252 253 func (a *aggrChunkIterator) Next() bool { 254 if !a.countChkIter.Next() { 255 if err := a.countChkIter.Err(); err != nil { 256 a.err = err 257 } 258 return false 259 } 260 261 countChk := a.countChkIter.At() 262 mint := countChk.MinTime 263 maxt := countChk.MaxTime 264 265 var ( 266 chks [5]chunkenc.Chunk 267 chk *chunks.Meta 268 err error 269 ) 270 271 chks[downsample.AggrCount] = countChk.Chunk 272 for i := downsample.AggrSum; i <= downsample.AggrCounter; i++ { 273 chk, err = a.toChunk(i, mint, maxt) 274 if err != nil { 275 a.err = err 276 return false 277 } 278 if chk != nil { 279 chks[i] = chk.Chunk 280 } 281 } 282 283 a.curr = chunks.Meta{ 284 MinTime: mint, 285 MaxTime: maxt, 286 Chunk: downsample.EncodeAggrChunk(chks), 287 } 288 return true 289 } 290 291 func (a *aggrChunkIterator) At() chunks.Meta { 292 return a.curr 293 } 294 295 func (a *aggrChunkIterator) Err() error { 296 return a.err 297 } 298 299 func (a *aggrChunkIterator) toChunk(at downsample.AggrType, minTime, maxTime int64) (*chunks.Meta, error) { 300 if a.iters[at] == nil { 301 return nil, nil 302 } 303 c := chunkenc.NewXORChunk() 304 appender, err := c.Appender() 305 if err != nil { 306 return nil, err 307 } 308 309 it := NewBoundedSeriesIterator(a.iters[at], minTime, maxTime) 310 311 var ( 312 lastT int64 313 lastV float64 314 ) 315 for it.Next() != chunkenc.ValNone { 316 lastT, lastV = it.At() 317 appender.Append(lastT, lastV) 318 } 319 if err := it.Err(); err != nil { 320 return nil, err 321 } 322 323 // No sample in the required time range. 324 if lastT == 0 && lastV == 0 { 325 return nil, nil 326 } 327 328 // Encode last sample for AggrCounter. 329 if at == downsample.AggrCounter { 330 appender.Append(lastT, lastV) 331 } 332 333 return &chunks.Meta{ 334 MinTime: minTime, 335 MaxTime: maxTime, 336 Chunk: c, 337 }, nil 338 }