github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/mergetop/top.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mergetop 16 17 import ( 18 "bytes" 19 "container/heap" 20 "fmt" 21 22 "github.com/matrixorigin/matrixone/pkg/compare" 23 "github.com/matrixorigin/matrixone/pkg/container/batch" 24 "github.com/matrixorigin/matrixone/pkg/pb/plan" 25 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 26 "github.com/matrixorigin/matrixone/pkg/vm" 27 "github.com/matrixorigin/matrixone/pkg/vm/process" 28 ) 29 30 const argName = "merge_top" 31 32 func (arg *Argument) String(buf *bytes.Buffer) { 33 buf.WriteString(argName) 34 ap := arg 35 buf.WriteString(": mergetop([") 36 for i, f := range ap.Fs { 37 if i > 0 { 38 buf.WriteString(", ") 39 } 40 buf.WriteString(f.String()) 41 } 42 buf.WriteString(fmt.Sprintf("], %v)", ap.Limit)) 43 } 44 45 func (arg *Argument) Prepare(proc *process.Process) (err error) { 46 ap := arg 47 ap.ctr = new(container) 48 ap.ctr.InitReceiver(proc, true) 49 if ap.Limit > 1024 { 50 ap.ctr.sels = make([]int64, 0, 1024) 51 } else { 52 ap.ctr.sels = make([]int64, 0, ap.Limit) 53 } 54 ap.ctr.poses = make([]int32, 0, len(ap.Fs)) 55 56 ctr := ap.ctr 57 ctr.executorsForOrderList = make([]colexec.ExpressionExecutor, len(ap.Fs)) 58 for i := range ctr.executorsForOrderList { 59 ctr.executorsForOrderList[i], err = colexec.NewExpressionExecutor(proc, ap.Fs[i].Expr) 60 if err != nil { 61 return err 62 } 63 } 64 return nil 65 } 66 67 func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) { 68 if err, isCancel := vm.CancelCheck(proc); isCancel { 69 return vm.CancelResult, err 70 } 71 72 anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor()) 73 anal.Start() 74 defer anal.Stop() 75 ap := arg 76 ctr := ap.ctr 77 result := vm.NewCallResult() 78 if ap.Limit == 0 { 79 result.Batch = nil 80 result.Status = vm.ExecStop 81 return result, nil 82 } 83 84 if end, err := ctr.build(ap, proc, anal, arg.GetIsFirst()); err != nil { 85 return result, err 86 } else if end { 87 result.Status = vm.ExecStop 88 return result, nil 89 } 90 91 if ctr.bat == nil { 92 result.Batch = nil 93 result.Status = vm.ExecStop 94 return result, nil 95 } 96 err := ctr.eval(ap.Limit, proc, anal, arg.GetIsLast(), &result) 97 if err == nil { 98 result.Status = vm.ExecStop 99 return result, nil 100 } 101 return result, err 102 } 103 104 func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool) (bool, error) { 105 for { 106 bat, end, err := ctr.ReceiveFromAllRegs(anal) 107 if err != nil { 108 return true, nil 109 } 110 if end { 111 return false, nil 112 } 113 114 anal.Input(bat, isFirst) 115 116 ctr.n = len(bat.Vecs) 117 ctr.poses = ctr.poses[:0] 118 for i := range ctr.executorsForOrderList { 119 if ctr.executorsForOrderList[i].IsColumnExpr() { 120 colIndex := ctr.executorsForOrderList[i].(*colexec.ColumnExpressionExecutor).GetColIndex() 121 ctr.poses = append(ctr.poses, int32(colIndex)) 122 } else { 123 vec, err := ctr.executorsForOrderList[i].EvalWithoutResultReusing(proc, []*batch.Batch{bat}) 124 if err != nil { 125 return false, err 126 } 127 ctr.poses = append(ctr.poses, int32(len(bat.Vecs))) 128 bat.Vecs = append(bat.Vecs, vec) 129 anal.Alloc(int64(vec.Size())) 130 } 131 } 132 133 if ctr.bat == nil { 134 mp := make(map[int]int, len(ctr.poses)) 135 for i, pos := range ctr.poses { 136 mp[int(pos)] = i 137 } 138 ctr.bat = batch.NewWithSize(len(bat.Vecs)) 139 for i, vec := range bat.Vecs { 140 ctr.bat.Vecs[i] = proc.GetVector(*vec.GetType()) 141 } 142 ctr.cmps = make([]compare.Compare, len(bat.Vecs)) 143 for i := range ctr.cmps { 144 var desc, nullsLast bool 145 if pos, ok := mp[i]; ok { 146 desc = ap.Fs[pos].Flag&plan.OrderBySpec_DESC != 0 147 if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_FIRST != 0 { 148 nullsLast = false 149 } else if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_LAST != 0 { 150 nullsLast = true 151 } else { 152 nullsLast = desc 153 } 154 } 155 ctr.cmps[i] = compare.New(*bat.Vecs[i].GetType(), desc, nullsLast) 156 } 157 } 158 159 if err := ctr.processBatch(ap.Limit, bat, proc); err != nil { 160 bat.Clean(proc.Mp()) 161 return false, err 162 } 163 proc.PutBatch(bat) 164 } 165 } 166 167 func (ctr *container) processBatch(limit int64, bat *batch.Batch, proc *process.Process) error { 168 var start int64 169 170 length := int64(bat.RowCount()) 171 if n := int64(len(ctr.sels)); n < limit { 172 start = limit - n 173 if start > length { 174 start = length 175 } 176 for i := int64(0); i < start; i++ { 177 for j, vec := range ctr.bat.Vecs { 178 if err := vec.UnionOne(bat.Vecs[j], i, proc.Mp()); err != nil { 179 return err 180 } 181 } 182 ctr.sels = append(ctr.sels, n) 183 n++ 184 } 185 ctr.bat.AddRowCount(bat.RowCount()) 186 if n == limit { 187 ctr.sort() 188 } 189 } 190 if start == length { 191 return nil 192 } 193 194 // bat is still have items 195 for i, cmp := range ctr.cmps { 196 cmp.Set(1, bat.Vecs[i]) 197 } 198 for i, j := start, length; i < j; i++ { 199 if ctr.compare(1, 0, i, ctr.sels[0]) < 0 { 200 for _, cmp := range ctr.cmps { 201 if err := cmp.Copy(1, 0, i, ctr.sels[0], proc); err != nil { 202 return err 203 } 204 } 205 heap.Fix(ctr, 0) 206 } 207 } 208 return nil 209 } 210 211 func (ctr *container) eval(limit int64, proc *process.Process, anal process.Analyze, isLast bool, result *vm.CallResult) error { 212 if int64(len(ctr.sels)) < limit { 213 ctr.sort() 214 } 215 for i, cmp := range ctr.cmps { 216 ctr.bat.Vecs[i] = cmp.Vector() 217 } 218 sels := make([]int64, len(ctr.sels)) 219 for i, j := 0, len(ctr.sels); i < j; i++ { 220 sels[len(sels)-1-i] = heap.Pop(ctr).(int64) 221 } 222 if err := ctr.bat.Shuffle(sels, proc.Mp()); err != nil { 223 return err 224 } 225 for i := ctr.n; i < len(ctr.bat.Vecs); i++ { 226 ctr.bat.Vecs[i].Free(proc.Mp()) 227 } 228 ctr.bat.Vecs = ctr.bat.Vecs[:ctr.n] 229 anal.Output(ctr.bat, isLast) 230 result.Batch = ctr.bat 231 return nil 232 } 233 234 // do sort work for heap, and result order will be set in container.sels 235 func (ctr *container) sort() { 236 for i, cmp := range ctr.cmps { 237 cmp.Set(0, ctr.bat.Vecs[i]) 238 } 239 heap.Init(ctr) 240 }