github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/merge_util.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package colexec 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/container/nulls" 19 "github.com/matrixorigin/matrixone/pkg/sort" 20 ) 21 22 type MergeInterface interface { 23 getNextPos() (int, int, int) 24 } 25 26 type heapElem[T any] struct { 27 data *T 28 isNull bool 29 batIndex int 30 rowIndex int 31 } 32 33 // we will sort by primary key or 34 // clusterby key, so we just need one 35 // vector of every batch. 36 type Merge[T any] struct { 37 // the number of bacthes 38 size uint64 39 // convert the vecotrs which need to sort 40 // into cols data 41 cols [][]T 42 // pointer is used to specify 43 // which postion we have gotten. 44 // for example, pointers[i] means 45 // we are now at the i-th row for 46 // cols[i] 47 pointers []int 48 49 nulls []*nulls.Nulls 50 51 heaps *mergeHeap[T] 52 } 53 54 func newMerge[T any](size int, compLess sort.LessFunc[T], cols [][]T, nulls []*nulls.Nulls) (merge *Merge[T]) { 55 merge = &Merge[T]{ 56 size: uint64(size), 57 cols: cols, 58 pointers: make([]int, size), 59 nulls: nulls, 60 } 61 merge.heaps = newMergeHeap(uint64(size), compLess) 62 merge.initHeap() 63 return 64 } 65 66 func (merge *Merge[T]) initHeap() { 67 for i := 0; i < int(merge.size); i++ { 68 if len(merge.cols[i]) == 0 { 69 merge.pointers[i] = -1 70 merge.size-- 71 continue 72 } 73 merge.heaps.push(&heapElem[T]{ 74 data: &merge.cols[i][merge.pointers[i]], 75 isNull: merge.nulls[i].Contains(uint64(merge.pointers[i])), 76 batIndex: i, 77 rowIndex: merge.pointers[i], 78 }) 79 if merge.pointers[i] >= len(merge.cols[i]) { 80 merge.pointers[i] = -1 81 merge.size-- 82 } 83 } 84 } 85 86 func (merge *Merge[T]) getNextPos() (batchIndex, rowIndex, size int) { 87 data := merge.pushNext() 88 if data == nil { 89 // now, merge.size is 0 90 return -1, -1, int(merge.size) 91 } 92 return data.batIndex, data.rowIndex, int(merge.size) 93 } 94 95 func (merge *Merge[T]) pushNext() *heapElem[T] { 96 if merge.size == 0 { 97 return nil 98 } 99 data := merge.heaps.pop() 100 batchIndex := data.batIndex 101 merge.pointers[batchIndex]++ 102 if merge.pointers[batchIndex] >= len(merge.cols[batchIndex]) { 103 merge.pointers[batchIndex] = -1 104 merge.size-- 105 } 106 if merge.pointers[batchIndex] != -1 { 107 merge.heaps.push(&heapElem[T]{ 108 data: &merge.cols[batchIndex][merge.pointers[batchIndex]], 109 isNull: merge.nulls[batchIndex].Contains(uint64(merge.pointers[batchIndex])), 110 batIndex: batchIndex, 111 rowIndex: merge.pointers[batchIndex], 112 }) 113 } 114 return data 115 } 116 117 // mergeHeap will take null first rule 118 type mergeHeap[T any] struct { 119 cmpLess sort.LessFunc[T] 120 datas []*heapElem[T] 121 size uint64 122 } 123 124 func newMergeHeap[T any](cap_size uint64, cmp sort.LessFunc[T]) *mergeHeap[T] { 125 return &mergeHeap[T]{ 126 cmpLess: cmp, 127 datas: make([]*heapElem[T], cap_size+1), 128 size: 0, 129 } 130 } 131 132 func (heap *mergeHeap[T]) push(data *heapElem[T]) { 133 heap.datas[heap.size+1] = data 134 heap.size++ 135 heap.up(int(heap.size)) 136 } 137 138 func (heap *mergeHeap[T]) pop() (data *heapElem[T]) { 139 if heap.size < 1 { 140 return nil 141 } 142 data = heap.datas[1] 143 heap.datas[1], heap.datas[heap.size] = heap.datas[heap.size], heap.datas[1] 144 heap.size-- 145 heap.down(1) 146 return 147 } 148 149 func (heap *mergeHeap[T]) compLess(i, j int) bool { 150 if heap.datas[i].isNull { 151 return true 152 } 153 if heap.datas[j].isNull { 154 return false 155 } 156 return heap.cmpLess(*heap.datas[i].data, *heap.datas[j].data) 157 } 158 159 func (heap *mergeHeap[T]) down(i int) { 160 t := i 161 if i*2 <= int(heap.size) && heap.compLess(i*2, t) { 162 t = i * 2 163 } 164 if i*2+1 <= int(heap.size) && heap.compLess(i*2+1, t) { 165 t = i*2 + 1 166 } 167 if t != i { 168 heap.datas[t], heap.datas[i] = heap.datas[i], heap.datas[t] 169 heap.down(t) 170 } 171 } 172 173 func (heap *mergeHeap[T]) up(i int) { 174 t := i 175 if i/2 >= 1 && heap.compLess(t, i/2) { 176 t = i / 2 177 } 178 if t != i { 179 heap.datas[t], heap.datas[i] = heap.datas[i], heap.datas[t] 180 heap.up(t) 181 } 182 }