github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/querier/batch/merge.go (about)

     1  package batch
     2  
     3  import (
     4  	"container/heap"
     5  	"sort"
     6  
     7  	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
     8  )
     9  
    10  type mergeIterator struct {
    11  	its []*nonOverlappingIterator
    12  	h   iteratorHeap
    13  
    14  	// Store the current sorted batchStream
    15  	batches batchStream
    16  
    17  	// Buffers to merge in.
    18  	batchesBuf   batchStream
    19  	nextBatchBuf [1]promchunk.Batch
    20  
    21  	currErr error
    22  }
    23  
    24  func newMergeIterator(cs []GenericChunk) *mergeIterator {
    25  	css := partitionChunks(cs)
    26  	its := make([]*nonOverlappingIterator, 0, len(css))
    27  	for _, cs := range css {
    28  		its = append(its, newNonOverlappingIterator(cs))
    29  	}
    30  
    31  	c := &mergeIterator{
    32  		its:        its,
    33  		h:          make(iteratorHeap, 0, len(its)),
    34  		batches:    make(batchStream, 0, len(its)),
    35  		batchesBuf: make(batchStream, len(its)),
    36  	}
    37  
    38  	for _, iter := range c.its {
    39  		if iter.Next(1) {
    40  			c.h = append(c.h, iter)
    41  			continue
    42  		}
    43  
    44  		if err := iter.Err(); err != nil {
    45  			c.currErr = err
    46  		}
    47  	}
    48  
    49  	heap.Init(&c.h)
    50  	return c
    51  }
    52  
    53  func (c *mergeIterator) Seek(t int64, size int) bool {
    54  
    55  	// Optimisation to see if the seek is within our current caches batches.
    56  found:
    57  	for len(c.batches) > 0 {
    58  		batch := &c.batches[0]
    59  		if t >= batch.Timestamps[0] && t <= batch.Timestamps[batch.Length-1] {
    60  			batch.Index = 0
    61  			for batch.Index < batch.Length && t > batch.Timestamps[batch.Index] {
    62  				batch.Index++
    63  			}
    64  			break found
    65  		}
    66  		copy(c.batches, c.batches[1:])
    67  		c.batches = c.batches[:len(c.batches)-1]
    68  	}
    69  
    70  	// If we didn't find anything in the current set of batches, reset the heap
    71  	// and seek.
    72  	if len(c.batches) == 0 {
    73  		c.h = c.h[:0]
    74  		c.batches = c.batches[:0]
    75  
    76  		for _, iter := range c.its {
    77  			if iter.Seek(t, size) {
    78  				c.h = append(c.h, iter)
    79  				continue
    80  			}
    81  
    82  			if err := iter.Err(); err != nil {
    83  				c.currErr = err
    84  				return false
    85  			}
    86  		}
    87  
    88  		heap.Init(&c.h)
    89  	}
    90  
    91  	return c.buildNextBatch(size)
    92  }
    93  
    94  func (c *mergeIterator) Next(size int) bool {
    95  	// Pop the last built batch in a way that doesn't extend the slice.
    96  	if len(c.batches) > 0 {
    97  		copy(c.batches, c.batches[1:])
    98  		c.batches = c.batches[:len(c.batches)-1]
    99  	}
   100  
   101  	return c.buildNextBatch(size)
   102  }
   103  
   104  func (c *mergeIterator) nextBatchEndTime() int64 {
   105  	batch := &c.batches[0]
   106  	return batch.Timestamps[batch.Length-1]
   107  }
   108  
   109  func (c *mergeIterator) buildNextBatch(size int) bool {
   110  	// All we need to do is get enough batches that our first batch's last entry
   111  	// is before all iterators next entry.
   112  	for len(c.h) > 0 && (len(c.batches) == 0 || c.nextBatchEndTime() >= c.h[0].AtTime()) {
   113  		c.nextBatchBuf[0] = c.h[0].Batch()
   114  		c.batchesBuf = mergeStreams(c.batches, c.nextBatchBuf[:], c.batchesBuf, size)
   115  		c.batches = append(c.batches[:0], c.batchesBuf...)
   116  
   117  		if c.h[0].Next(size) {
   118  			heap.Fix(&c.h, 0)
   119  		} else {
   120  			heap.Pop(&c.h)
   121  		}
   122  	}
   123  
   124  	return len(c.batches) > 0
   125  }
   126  
   127  func (c *mergeIterator) AtTime() int64 {
   128  	return c.batches[0].Timestamps[0]
   129  }
   130  
   131  func (c *mergeIterator) Batch() promchunk.Batch {
   132  	return c.batches[0]
   133  }
   134  
   135  func (c *mergeIterator) Err() error {
   136  	return c.currErr
   137  }
   138  
   139  type iteratorHeap []iterator
   140  
   141  func (h *iteratorHeap) Len() int      { return len(*h) }
   142  func (h *iteratorHeap) Swap(i, j int) { (*h)[i], (*h)[j] = (*h)[j], (*h)[i] }
   143  
   144  func (h *iteratorHeap) Less(i, j int) bool {
   145  	iT := (*h)[i].AtTime()
   146  	jT := (*h)[j].AtTime()
   147  	return iT < jT
   148  }
   149  
   150  func (h *iteratorHeap) Push(x interface{}) {
   151  	*h = append(*h, x.(iterator))
   152  }
   153  
   154  func (h *iteratorHeap) Pop() interface{} {
   155  	old := *h
   156  	n := len(old)
   157  	x := old[n-1]
   158  	*h = old[0 : n-1]
   159  	return x
   160  }
   161  
   162  // Build a list of lists of non-overlapping chunks.
   163  func partitionChunks(cs []GenericChunk) [][]GenericChunk {
   164  	sort.Sort(byMinTime(cs))
   165  
   166  	css := [][]GenericChunk{}
   167  outer:
   168  	for _, c := range cs {
   169  		for i, cs := range css {
   170  			if cs[len(cs)-1].MaxTime < c.MinTime {
   171  				css[i] = append(css[i], c)
   172  				continue outer
   173  			}
   174  		}
   175  		cs := make([]GenericChunk, 0, len(cs)/(len(css)+1))
   176  		cs = append(cs, c)
   177  		css = append(css, cs)
   178  	}
   179  
   180  	return css
   181  }
   182  
   183  type byMinTime []GenericChunk
   184  
   185  func (b byMinTime) Len() int           { return len(b) }
   186  func (b byMinTime) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
   187  func (b byMinTime) Less(i, j int) bool { return b[i].MinTime < b[j].MinTime }