github.com/thanos-io/thanos@v0.32.5/pkg/dedup/chunk_iter.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package dedup
     5  
     6  import (
     7  	"bytes"
     8  	"container/heap"
     9  
    10  	"github.com/prometheus/prometheus/storage"
    11  	"github.com/prometheus/prometheus/tsdb/chunkenc"
    12  	"github.com/prometheus/prometheus/tsdb/chunks"
    13  
    14  	"github.com/thanos-io/thanos/pkg/compact/downsample"
    15  )
    16  
    17  // NewChunkSeriesMerger merges several chunk series into one.
    18  // Deduplication is based on penalty based deduplication algorithm without handling counter reset.
    19  func NewChunkSeriesMerger() storage.VerticalChunkSeriesMergeFunc {
    20  	return func(series ...storage.ChunkSeries) storage.ChunkSeries {
    21  		if len(series) == 0 {
    22  			return nil
    23  		}
    24  		return &storage.ChunkSeriesEntry{
    25  			Lset: series[0].Labels(),
    26  			ChunkIteratorFn: func(iterator chunks.Iterator) chunks.Iterator {
    27  				iterators := make([]chunks.Iterator, 0, len(series))
    28  				for _, s := range series {
    29  					iterators = append(iterators, s.Iterator(nil))
    30  				}
    31  				return &dedupChunksIterator{
    32  					iterators: iterators,
    33  				}
    34  			},
    35  		}
    36  	}
    37  }
    38  
    39  type dedupChunksIterator struct {
    40  	iterators []chunks.Iterator
    41  	h         chunkIteratorHeap
    42  
    43  	err  error
    44  	curr chunks.Meta
    45  }
    46  
    47  func (d *dedupChunksIterator) At() chunks.Meta {
    48  	return d.curr
    49  }
    50  
    51  // Next method is almost the same as https://github.com/prometheus/prometheus/blob/v2.27.1/storage/merge.go#L615.
    52  // The difference is that it handles both XOR and Aggr chunk Encoding.
    53  func (d *dedupChunksIterator) Next() bool {
    54  	if d.h == nil {
    55  		for _, iter := range d.iterators {
    56  			if iter.Next() {
    57  				heap.Push(&d.h, iter)
    58  			}
    59  		}
    60  	}
    61  	if len(d.h) == 0 {
    62  		return false
    63  	}
    64  
    65  	iter := heap.Pop(&d.h).(chunks.Iterator)
    66  	d.curr = iter.At()
    67  	if iter.Next() {
    68  		heap.Push(&d.h, iter)
    69  	}
    70  
    71  	var (
    72  		om       = newOverlappingMerger()
    73  		oMaxTime = d.curr.MaxTime
    74  		prev     = d.curr
    75  	)
    76  
    77  	// Detect overlaps to compact.
    78  	for len(d.h) > 0 {
    79  		// Get the next oldest chunk by min, then max time.
    80  		next := d.h[0].At()
    81  		if next.MinTime > oMaxTime {
    82  			// No overlap with current one.
    83  			break
    84  		}
    85  
    86  		if next.MinTime == prev.MinTime &&
    87  			next.MaxTime == prev.MaxTime &&
    88  			bytes.Equal(next.Chunk.Bytes(), prev.Chunk.Bytes()) {
    89  			// 1:1 duplicates, skip it.
    90  		} else {
    91  			// We operate on same series, so labels does not matter here.
    92  			om.addChunk(next)
    93  
    94  			if next.MaxTime > oMaxTime {
    95  				oMaxTime = next.MaxTime
    96  			}
    97  			prev = next
    98  		}
    99  
   100  		iter := heap.Pop(&d.h).(chunks.Iterator)
   101  		if iter.Next() {
   102  			heap.Push(&d.h, iter)
   103  		}
   104  	}
   105  	if om.empty() {
   106  		return true
   107  	}
   108  
   109  	iter = om.iterator(d.curr)
   110  	if !iter.Next() {
   111  		if d.err = iter.Err(); d.err != nil {
   112  			return false
   113  		}
   114  		panic("unexpected seriesToChunkEncoder lack of iterations")
   115  	}
   116  	d.curr = iter.At()
   117  	if iter.Next() {
   118  		heap.Push(&d.h, iter)
   119  	}
   120  	return true
   121  }
   122  
   123  func (d *dedupChunksIterator) Err() error {
   124  	return d.err
   125  }
   126  
   127  type chunkIteratorHeap []chunks.Iterator
   128  
   129  func (h chunkIteratorHeap) Len() int      { return len(h) }
   130  func (h chunkIteratorHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
   131  
   132  func (h chunkIteratorHeap) Less(i, j int) bool {
   133  	at := h[i].At()
   134  	bt := h[j].At()
   135  	if at.MinTime == bt.MinTime {
   136  		return at.MaxTime < bt.MaxTime
   137  	}
   138  	return at.MinTime < bt.MinTime
   139  }
   140  
   141  func (h *chunkIteratorHeap) Push(x interface{}) {
   142  	*h = append(*h, x.(chunks.Iterator))
   143  }
   144  
   145  func (h *chunkIteratorHeap) Pop() interface{} {
   146  	old := *h
   147  	n := len(old)
   148  	x := old[n-1]
   149  	*h = old[0 : n-1]
   150  	return x
   151  }
   152  
   153  type overlappingMerger struct {
   154  	xorIterators  []chunkenc.Iterator
   155  	aggrIterators [5][]chunkenc.Iterator
   156  
   157  	samplesMergeFunc func(a, b chunkenc.Iterator) chunkenc.Iterator
   158  }
   159  
   160  func newOverlappingMerger() *overlappingMerger {
   161  	return &overlappingMerger{
   162  		samplesMergeFunc: func(a, b chunkenc.Iterator) chunkenc.Iterator {
   163  			it := noopAdjustableSeriesIterator{a}
   164  			return newDedupSeriesIterator(it, noopAdjustableSeriesIterator{b})
   165  		},
   166  	}
   167  }
   168  
   169  func (o *overlappingMerger) addChunk(chk chunks.Meta) {
   170  	switch chk.Chunk.Encoding() {
   171  	case chunkenc.EncXOR:
   172  		o.xorIterators = append(o.xorIterators, chk.Chunk.Iterator(nil))
   173  	case downsample.ChunkEncAggr:
   174  		aggrChk := chk.Chunk.(*downsample.AggrChunk)
   175  		for i := downsample.AggrCount; i <= downsample.AggrCounter; i++ {
   176  			if c, err := aggrChk.Get(i); err == nil {
   177  				o.aggrIterators[i] = append(o.aggrIterators[i], c.Iterator(nil))
   178  			}
   179  		}
   180  	}
   181  }
   182  
   183  func (o *overlappingMerger) empty() bool {
   184  	// OverlappingMerger only contains either xor chunk or aggr chunk.
   185  	// If xor chunks are present then we don't need to check aggr chunks.
   186  	if len(o.xorIterators) > 0 {
   187  		return false
   188  	}
   189  	return len(o.aggrIterators[downsample.AggrCount]) == 0
   190  }
   191  
   192  // Return a chunk iterator based on the encoding of base chunk.
   193  func (o *overlappingMerger) iterator(baseChk chunks.Meta) chunks.Iterator {
   194  	var it chunkenc.Iterator
   195  	switch baseChk.Chunk.Encoding() {
   196  	case chunkenc.EncXOR:
   197  		// If XOR encoding, we need to deduplicate the samples and re-encode them to chunks.
   198  		return storage.NewSeriesToChunkEncoder(&storage.SeriesEntry{
   199  			SampleIteratorFn: func(_ chunkenc.Iterator) chunkenc.Iterator {
   200  				it = baseChk.Chunk.Iterator(nil)
   201  				for _, i := range o.xorIterators {
   202  					it = o.samplesMergeFunc(it, i)
   203  				}
   204  				return it
   205  			}}).Iterator(nil)
   206  
   207  	case downsample.ChunkEncAggr:
   208  		// If Aggr encoding, each aggregated chunks need to be expanded and deduplicated,
   209  		// then re-encoded into Aggr chunks.
   210  		aggrChk := baseChk.Chunk.(*downsample.AggrChunk)
   211  		samplesIter := [5]chunkenc.Iterator{}
   212  		for i := downsample.AggrCount; i <= downsample.AggrCounter; i++ {
   213  			if c, err := aggrChk.Get(i); err == nil {
   214  				o.aggrIterators[i] = append(o.aggrIterators[i], c.Iterator(nil))
   215  			}
   216  
   217  			if len(o.aggrIterators[i]) > 0 {
   218  				for _, j := range o.aggrIterators[i][1:] {
   219  					o.aggrIterators[i][0] = o.samplesMergeFunc(o.aggrIterators[i][0], j)
   220  				}
   221  				samplesIter[i] = o.aggrIterators[i][0]
   222  			} else {
   223  				samplesIter[i] = nil
   224  			}
   225  		}
   226  
   227  		return newAggrChunkIterator(samplesIter)
   228  	}
   229  
   230  	// Impossible for now.
   231  	return nil
   232  }
   233  
   234  type aggrChunkIterator struct {
   235  	iters        [5]chunkenc.Iterator
   236  	curr         chunks.Meta
   237  	countChkIter chunks.Iterator
   238  
   239  	err error
   240  }
   241  
   242  func newAggrChunkIterator(iters [5]chunkenc.Iterator) chunks.Iterator {
   243  	return &aggrChunkIterator{
   244  		iters: iters,
   245  		countChkIter: storage.NewSeriesToChunkEncoder(&storage.SeriesEntry{
   246  			SampleIteratorFn: func(_ chunkenc.Iterator) chunkenc.Iterator {
   247  				return iters[downsample.AggrCount]
   248  			},
   249  		}).Iterator(nil),
   250  	}
   251  }
   252  
   253  func (a *aggrChunkIterator) Next() bool {
   254  	if !a.countChkIter.Next() {
   255  		if err := a.countChkIter.Err(); err != nil {
   256  			a.err = err
   257  		}
   258  		return false
   259  	}
   260  
   261  	countChk := a.countChkIter.At()
   262  	mint := countChk.MinTime
   263  	maxt := countChk.MaxTime
   264  
   265  	var (
   266  		chks [5]chunkenc.Chunk
   267  		chk  *chunks.Meta
   268  		err  error
   269  	)
   270  
   271  	chks[downsample.AggrCount] = countChk.Chunk
   272  	for i := downsample.AggrSum; i <= downsample.AggrCounter; i++ {
   273  		chk, err = a.toChunk(i, mint, maxt)
   274  		if err != nil {
   275  			a.err = err
   276  			return false
   277  		}
   278  		if chk != nil {
   279  			chks[i] = chk.Chunk
   280  		}
   281  	}
   282  
   283  	a.curr = chunks.Meta{
   284  		MinTime: mint,
   285  		MaxTime: maxt,
   286  		Chunk:   downsample.EncodeAggrChunk(chks),
   287  	}
   288  	return true
   289  }
   290  
   291  func (a *aggrChunkIterator) At() chunks.Meta {
   292  	return a.curr
   293  }
   294  
   295  func (a *aggrChunkIterator) Err() error {
   296  	return a.err
   297  }
   298  
   299  func (a *aggrChunkIterator) toChunk(at downsample.AggrType, minTime, maxTime int64) (*chunks.Meta, error) {
   300  	if a.iters[at] == nil {
   301  		return nil, nil
   302  	}
   303  	c := chunkenc.NewXORChunk()
   304  	appender, err := c.Appender()
   305  	if err != nil {
   306  		return nil, err
   307  	}
   308  
   309  	it := NewBoundedSeriesIterator(a.iters[at], minTime, maxTime)
   310  
   311  	var (
   312  		lastT int64
   313  		lastV float64
   314  	)
   315  	for it.Next() != chunkenc.ValNone {
   316  		lastT, lastV = it.At()
   317  		appender.Append(lastT, lastV)
   318  	}
   319  	if err := it.Err(); err != nil {
   320  		return nil, err
   321  	}
   322  
   323  	// No sample in the required time range.
   324  	if lastT == 0 && lastV == 0 {
   325  		return nil, nil
   326  	}
   327  
   328  	// Encode last sample for AggrCounter.
   329  	if at == downsample.AggrCounter {
   330  		appender.Append(lastT, lastV)
   331  	}
   332  
   333  	return &chunks.Meta{
   334  		MinTime: minTime,
   335  		MaxTime: maxTime,
   336  		Chunk:   c,
   337  	}, nil
   338  }