github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/sorttopk.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "container/heap" 15 "context" 16 "fmt" 17 18 "github.com/cockroachdb/cockroach/pkg/col/coldata" 19 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 20 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 21 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 22 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 23 "github.com/cockroachdb/cockroach/pkg/sql/types" 24 ) 25 26 const ( 27 topKVecIdx = 0 28 inputVecIdx = 1 29 ) 30 31 // NewTopKSorter returns a new sort operator, which sorts its input on the 32 // columns given in orderingCols and returns the first K rows. The inputTypes 33 // must correspond 1-1 with the columns in the input operator. 34 func NewTopKSorter( 35 allocator *colmem.Allocator, 36 input colexecbase.Operator, 37 inputTypes []*types.T, 38 orderingCols []execinfrapb.Ordering_Column, 39 k int, 40 ) colexecbase.Operator { 41 return &topKSorter{ 42 allocator: allocator, 43 OneInputNode: NewOneInputNode(input), 44 inputTypes: inputTypes, 45 orderingCols: orderingCols, 46 k: k, 47 } 48 } 49 50 var _ bufferingInMemoryOperator = &topKSorter{} 51 52 // topKSortState represents the state of the sort operator. 53 type topKSortState int 54 55 const ( 56 // sortSpooling is the initial state of the operator, where it spools its 57 // input. 58 topKSortSpooling topKSortState = iota 59 // sortEmitting is the second state of the operator, indicating that each call 60 // to Next will return another batch of the sorted data. 61 topKSortEmitting 62 ) 63 64 type topKSorter struct { 65 OneInputNode 66 67 allocator *colmem.Allocator 68 orderingCols []execinfrapb.Ordering_Column 69 inputTypes []*types.T 70 k int 71 72 // state is the current state of the sort. 73 state topKSortState 74 // inputBatch is the last read batch from the input. 75 inputBatch coldata.Batch 76 // firstUnprocessedTupleIdx indicates the index of the first tuple in 77 // inputBatch that hasn't been processed yet. 78 firstUnprocessedTupleIdx int 79 // comparators stores one comparator per ordering column. 80 comparators []vecComparator 81 // topK stores the top K rows. It is not sorted internally. 82 topK *appendOnlyBufferedBatch 83 // heap is a max heap which stores indices into topK. 84 heap []int 85 // sel is a selection vector which specifies an ordering on topK. 86 sel []int 87 // emitted is the count of rows which have been emitted so far. 88 emitted int 89 output coldata.Batch 90 91 exportedFromTopK int 92 exportedFromBatch int 93 windowedBatch coldata.Batch 94 } 95 96 func (t *topKSorter) Init() { 97 t.input.Init() 98 t.topK = newAppendOnlyBufferedBatch( 99 t.allocator, t.inputTypes, 0, /* initialSize */ 100 ) 101 t.comparators = make([]vecComparator, len(t.inputTypes)) 102 for i, typ := range t.inputTypes { 103 t.comparators[i] = GetVecComparator(typ, 2) 104 } 105 // TODO(yuzefovich): switch to calling this method on allocator. This will 106 // require plumbing unlimited allocator to work correctly in tests with 107 // memory limit of 1. 108 t.windowedBatch = coldata.NewMemBatchNoCols(t.inputTypes, coldata.BatchSize()) 109 } 110 111 func (t *topKSorter) Next(ctx context.Context) coldata.Batch { 112 switch t.state { 113 case topKSortSpooling: 114 t.spool(ctx) 115 t.state = topKSortEmitting 116 fallthrough 117 case topKSortEmitting: 118 return t.emit() 119 } 120 colexecerror.InternalError(fmt.Sprintf("invalid sort state %v", t.state)) 121 // This code is unreachable, but the compiler cannot infer that. 122 return nil 123 } 124 125 // spool reads in the entire input, always storing the top K rows it has seen so 126 // far in o.topK. This is done by maintaining a max heap of indices into o.topK. 127 // Whenever we encounter a row which is smaller than the max row in the heap, 128 // we replace the max with that row. 129 // 130 // After all the input has been read, we pop everything off the heap to 131 // determine the final output ordering. This is used in emit() to output the rows 132 // in sorted order. 133 func (t *topKSorter) spool(ctx context.Context) { 134 // Fill up t.topK by spooling up to K rows from the input. 135 t.inputBatch = t.input.Next(ctx) 136 remainingRows := t.k 137 for remainingRows > 0 && t.inputBatch.Length() > 0 { 138 fromLength := t.inputBatch.Length() 139 if remainingRows < t.inputBatch.Length() { 140 // t.topK will be full after this batch. 141 fromLength = remainingRows 142 } 143 t.firstUnprocessedTupleIdx = fromLength 144 t.allocator.PerformOperation(t.topK.ColVecs(), func() { 145 t.topK.append(t.inputBatch, 0 /* startIdx */, fromLength) 146 }) 147 remainingRows -= fromLength 148 if fromLength == t.inputBatch.Length() { 149 t.inputBatch = t.input.Next(ctx) 150 t.firstUnprocessedTupleIdx = 0 151 } 152 } 153 t.updateComparators(topKVecIdx, t.topK) 154 155 // Initialize the heap. 156 t.heap = make([]int, t.topK.Length()) 157 for i := range t.heap { 158 t.heap[i] = i 159 } 160 heap.Init(t) 161 162 // Read the remainder of the input. Whenever a row is less than the heap max, 163 // swap it in. 164 for t.inputBatch.Length() > 0 { 165 t.updateComparators(inputVecIdx, t.inputBatch) 166 sel := t.inputBatch.Selection() 167 t.allocator.PerformOperation( 168 t.topK.ColVecs(), 169 func() { 170 for i := t.firstUnprocessedTupleIdx; i < t.inputBatch.Length(); i++ { 171 idx := i 172 if sel != nil { 173 idx = sel[i] 174 } 175 maxIdx := t.heap[0] 176 if t.compareRow(inputVecIdx, topKVecIdx, idx, maxIdx) < 0 { 177 for j := range t.inputTypes { 178 t.comparators[j].set(inputVecIdx, topKVecIdx, idx, maxIdx) 179 } 180 heap.Fix(t, 0) 181 } 182 } 183 t.firstUnprocessedTupleIdx = t.inputBatch.Length() 184 }, 185 ) 186 t.inputBatch = t.input.Next(ctx) 187 t.firstUnprocessedTupleIdx = 0 188 } 189 190 // t.topK now contains the top K rows unsorted. Create a selection vector 191 // which specifies the rows in sorted order by popping everything off the 192 // heap. Note that it's a max heap so we need to fill the selection vector in 193 // reverse. 194 t.sel = make([]int, t.topK.Length()) 195 for i := 0; i < t.topK.Length(); i++ { 196 t.sel[len(t.sel)-i-1] = heap.Pop(t).(int) 197 } 198 } 199 200 func (t *topKSorter) resetOutput() { 201 if t.output == nil { 202 t.output = t.allocator.NewMemBatchWithSize(t.inputTypes, coldata.BatchSize()) 203 } else { 204 t.output.ResetInternalBatch() 205 } 206 } 207 208 func (t *topKSorter) emit() coldata.Batch { 209 t.resetOutput() 210 toEmit := t.topK.Length() - t.emitted 211 if toEmit == 0 { 212 // We're done. 213 return coldata.ZeroBatch 214 } 215 if toEmit > coldata.BatchSize() { 216 toEmit = coldata.BatchSize() 217 } 218 for i := range t.inputTypes { 219 vec := t.output.ColVec(i) 220 // At this point, we have already fully sorted the input. It is ok to do 221 // this Copy outside of the allocator - the work has been done, but 222 // theoretically it is possible to hit the limit here (mainly with 223 // variable-sized types like Bytes). Nonetheless, for performance reasons 224 // it would be sad to fallback to disk at this point. 225 vec.Copy( 226 coldata.CopySliceArgs{ 227 SliceArgs: coldata.SliceArgs{ 228 Src: t.topK.ColVec(i), 229 Sel: t.sel, 230 SrcStartIdx: t.emitted, 231 SrcEndIdx: t.emitted + toEmit, 232 }, 233 }, 234 ) 235 } 236 t.output.SetLength(toEmit) 237 t.emitted += toEmit 238 return t.output 239 } 240 241 func (t *topKSorter) compareRow(vecIdx1, vecIdx2 int, rowIdx1, rowIdx2 int) int { 242 for i := range t.orderingCols { 243 info := t.orderingCols[i] 244 res := t.comparators[info.ColIdx].compare(vecIdx1, vecIdx2, rowIdx1, rowIdx2) 245 if res != 0 { 246 switch d := info.Direction; d { 247 case execinfrapb.Ordering_Column_ASC: 248 return res 249 case execinfrapb.Ordering_Column_DESC: 250 return -res 251 default: 252 colexecerror.InternalError(fmt.Sprintf("unexpected direction value %d", d)) 253 } 254 } 255 } 256 return 0 257 } 258 259 func (t *topKSorter) updateComparators(vecIdx int, batch coldata.Batch) { 260 for i := range t.inputTypes { 261 t.comparators[i].setVec(vecIdx, batch.ColVec(i)) 262 } 263 } 264 265 func (t *topKSorter) ExportBuffered(colexecbase.Operator) coldata.Batch { 266 topKLen := t.topK.Length() 267 // First, we check whether we have exported all tuples from the topK vector. 268 if t.exportedFromTopK < topKLen { 269 newExportedFromTopK := t.exportedFromTopK + coldata.BatchSize() 270 if newExportedFromTopK > topKLen { 271 newExportedFromTopK = topKLen 272 } 273 for i := range t.inputTypes { 274 window := t.topK.ColVec(i).Window(t.exportedFromTopK, newExportedFromTopK) 275 t.windowedBatch.ReplaceCol(window, i) 276 } 277 t.windowedBatch.SetSelection(false) 278 t.windowedBatch.SetLength(newExportedFromTopK - t.exportedFromTopK) 279 t.exportedFromTopK = newExportedFromTopK 280 return t.windowedBatch 281 } 282 // Next, we check whether we have exported all tuples from the last read 283 // batch. 284 if t.inputBatch != nil && t.firstUnprocessedTupleIdx+t.exportedFromBatch < t.inputBatch.Length() { 285 makeWindowIntoBatch(t.windowedBatch, t.inputBatch, t.firstUnprocessedTupleIdx, t.inputTypes) 286 t.exportedFromBatch = t.windowedBatch.Length() 287 return t.windowedBatch 288 } 289 return coldata.ZeroBatch 290 } 291 292 // Len is part of heap.Interface and is only meant to be used internally. 293 func (t *topKSorter) Len() int { 294 return len(t.heap) 295 } 296 297 // Less is part of heap.Interface and is only meant to be used internally. 298 func (t *topKSorter) Less(i, j int) bool { 299 return t.compareRow(topKVecIdx, topKVecIdx, t.heap[i], t.heap[j]) > 0 300 } 301 302 // Swap is part of heap.Interface and is only meant to be used internally. 303 func (t *topKSorter) Swap(i, j int) { 304 t.heap[i], t.heap[j] = t.heap[j], t.heap[i] 305 } 306 307 // Push is part of heap.Interface and is only meant to be used internally. 308 func (t *topKSorter) Push(x interface{}) { 309 t.heap = append(t.heap, x.(int)) 310 } 311 312 // Pop is part of heap.Interface and is only meant to be used internally. 313 func (t *topKSorter) Pop() interface{} { 314 x := t.heap[len(t.heap)-1] 315 t.heap = t.heap[:len(t.heap)-1] 316 return x 317 }