github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/querier/batch/merge.go (about) 1 package batch 2 3 import ( 4 "container/heap" 5 "sort" 6 7 promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding" 8 ) 9 10 type mergeIterator struct { 11 its []*nonOverlappingIterator 12 h iteratorHeap 13 14 // Store the current sorted batchStream 15 batches batchStream 16 17 // Buffers to merge in. 18 batchesBuf batchStream 19 nextBatchBuf [1]promchunk.Batch 20 21 currErr error 22 } 23 24 func newMergeIterator(cs []GenericChunk) *mergeIterator { 25 css := partitionChunks(cs) 26 its := make([]*nonOverlappingIterator, 0, len(css)) 27 for _, cs := range css { 28 its = append(its, newNonOverlappingIterator(cs)) 29 } 30 31 c := &mergeIterator{ 32 its: its, 33 h: make(iteratorHeap, 0, len(its)), 34 batches: make(batchStream, 0, len(its)), 35 batchesBuf: make(batchStream, len(its)), 36 } 37 38 for _, iter := range c.its { 39 if iter.Next(1) { 40 c.h = append(c.h, iter) 41 continue 42 } 43 44 if err := iter.Err(); err != nil { 45 c.currErr = err 46 } 47 } 48 49 heap.Init(&c.h) 50 return c 51 } 52 53 func (c *mergeIterator) Seek(t int64, size int) bool { 54 55 // Optimisation to see if the seek is within our current caches batches. 56 found: 57 for len(c.batches) > 0 { 58 batch := &c.batches[0] 59 if t >= batch.Timestamps[0] && t <= batch.Timestamps[batch.Length-1] { 60 batch.Index = 0 61 for batch.Index < batch.Length && t > batch.Timestamps[batch.Index] { 62 batch.Index++ 63 } 64 break found 65 } 66 copy(c.batches, c.batches[1:]) 67 c.batches = c.batches[:len(c.batches)-1] 68 } 69 70 // If we didn't find anything in the current set of batches, reset the heap 71 // and seek. 72 if len(c.batches) == 0 { 73 c.h = c.h[:0] 74 c.batches = c.batches[:0] 75 76 for _, iter := range c.its { 77 if iter.Seek(t, size) { 78 c.h = append(c.h, iter) 79 continue 80 } 81 82 if err := iter.Err(); err != nil { 83 c.currErr = err 84 return false 85 } 86 } 87 88 heap.Init(&c.h) 89 } 90 91 return c.buildNextBatch(size) 92 } 93 94 func (c *mergeIterator) Next(size int) bool { 95 // Pop the last built batch in a way that doesn't extend the slice. 96 if len(c.batches) > 0 { 97 copy(c.batches, c.batches[1:]) 98 c.batches = c.batches[:len(c.batches)-1] 99 } 100 101 return c.buildNextBatch(size) 102 } 103 104 func (c *mergeIterator) nextBatchEndTime() int64 { 105 batch := &c.batches[0] 106 return batch.Timestamps[batch.Length-1] 107 } 108 109 func (c *mergeIterator) buildNextBatch(size int) bool { 110 // All we need to do is get enough batches that our first batch's last entry 111 // is before all iterators next entry. 112 for len(c.h) > 0 && (len(c.batches) == 0 || c.nextBatchEndTime() >= c.h[0].AtTime()) { 113 c.nextBatchBuf[0] = c.h[0].Batch() 114 c.batchesBuf = mergeStreams(c.batches, c.nextBatchBuf[:], c.batchesBuf, size) 115 c.batches = append(c.batches[:0], c.batchesBuf...) 116 117 if c.h[0].Next(size) { 118 heap.Fix(&c.h, 0) 119 } else { 120 heap.Pop(&c.h) 121 } 122 } 123 124 return len(c.batches) > 0 125 } 126 127 func (c *mergeIterator) AtTime() int64 { 128 return c.batches[0].Timestamps[0] 129 } 130 131 func (c *mergeIterator) Batch() promchunk.Batch { 132 return c.batches[0] 133 } 134 135 func (c *mergeIterator) Err() error { 136 return c.currErr 137 } 138 139 type iteratorHeap []iterator 140 141 func (h *iteratorHeap) Len() int { return len(*h) } 142 func (h *iteratorHeap) Swap(i, j int) { (*h)[i], (*h)[j] = (*h)[j], (*h)[i] } 143 144 func (h *iteratorHeap) Less(i, j int) bool { 145 iT := (*h)[i].AtTime() 146 jT := (*h)[j].AtTime() 147 return iT < jT 148 } 149 150 func (h *iteratorHeap) Push(x interface{}) { 151 *h = append(*h, x.(iterator)) 152 } 153 154 func (h *iteratorHeap) Pop() interface{} { 155 old := *h 156 n := len(old) 157 x := old[n-1] 158 *h = old[0 : n-1] 159 return x 160 } 161 162 // Build a list of lists of non-overlapping chunks. 163 func partitionChunks(cs []GenericChunk) [][]GenericChunk { 164 sort.Sort(byMinTime(cs)) 165 166 css := [][]GenericChunk{} 167 outer: 168 for _, c := range cs { 169 for i, cs := range css { 170 if cs[len(cs)-1].MaxTime < c.MinTime { 171 css[i] = append(css[i], c) 172 continue outer 173 } 174 } 175 cs := make([]GenericChunk, 0, len(cs)/(len(css)+1)) 176 cs = append(cs, c) 177 css = append(css, cs) 178 } 179 180 return css 181 } 182 183 type byMinTime []GenericChunk 184 185 func (b byMinTime) Len() int { return len(b) } 186 func (b byMinTime) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 187 func (b byMinTime) Less(i, j int) bool { return b[i].MinTime < b[j].MinTime }