github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/mergetop/top.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mergetop 16 17 import ( 18 "bytes" 19 "container/heap" 20 "fmt" 21 "reflect" 22 "time" 23 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/compare" 26 "github.com/matrixorigin/matrixone/pkg/container/batch" 27 "github.com/matrixorigin/matrixone/pkg/container/vector" 28 "github.com/matrixorigin/matrixone/pkg/pb/plan" 29 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 30 "github.com/matrixorigin/matrixone/pkg/vm/process" 31 ) 32 33 func String(arg any, buf *bytes.Buffer) { 34 ap := arg.(*Argument) 35 buf.WriteString("mergetop([") 36 for i, f := range ap.Fs { 37 if i > 0 { 38 buf.WriteString(", ") 39 } 40 buf.WriteString(f.String()) 41 } 42 buf.WriteString(fmt.Sprintf("], %v)", ap.Limit)) 43 } 44 45 func Prepare(proc *process.Process, arg any) error { 46 ap := arg.(*Argument) 47 ap.ctr = new(container) 48 if ap.Limit > 1024 { 49 ap.ctr.sels = make([]int64, 0, 1024) 50 } else { 51 ap.ctr.sels = make([]int64, 0, ap.Limit) 52 } 53 ap.ctr.poses = make([]int32, 0, len(ap.Fs)) 54 55 ap.ctr.receiverListener = make([]reflect.SelectCase, len(proc.Reg.MergeReceivers)) 56 for i, mr := range proc.Reg.MergeReceivers { 57 ap.ctr.receiverListener[i] = reflect.SelectCase{ 58 Dir: reflect.SelectRecv, 59 Chan: reflect.ValueOf(mr.Ch), 60 } 61 } 62 ap.ctr.aliveMergeReceiver = len(proc.Reg.MergeReceivers) 63 return nil 64 } 65 66 func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) { 67 anal := proc.GetAnalyze(idx) 68 anal.Start() 69 defer anal.Stop() 70 ap := arg.(*Argument) 71 ctr := ap.ctr 72 73 if ap.Limit == 0 { 74 ap.Free(proc, false) 75 proc.SetInputBatch(nil) 76 return true, nil 77 } 78 79 if err := ctr.build(ap, proc, anal, isFirst); err != nil { 80 ap.Free(proc, true) 81 return false, err 82 } 83 84 if ctr.bat == nil { 85 ap.Free(proc, false) 86 proc.SetInputBatch(nil) 87 return true, nil 88 } 89 err := ctr.eval(ap.Limit, proc, anal, isLast) 90 ap.Free(proc, err != nil) 91 return err == nil, err 92 } 93 94 func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool) error { 95 for { 96 if ctr.aliveMergeReceiver == 0 { 97 return nil 98 } 99 100 start := time.Now() 101 chosen, value, ok := reflect.Select(ctr.receiverListener) 102 if !ok { 103 return moerr.NewInternalError(proc.Ctx, "pipeline closed unexpectedly") 104 } 105 anal.WaitStop(start) 106 107 pointer := value.UnsafePointer() 108 bat := (*batch.Batch)(pointer) 109 if bat == nil { 110 ctr.receiverListener = append(ctr.receiverListener[:chosen], ctr.receiverListener[chosen+1:]...) 111 ctr.aliveMergeReceiver-- 112 continue 113 } 114 115 if bat.Length() == 0 { 116 continue 117 } 118 119 anal.Input(bat, isFirst) 120 121 ctr.n = len(bat.Vecs) 122 ctr.poses = ctr.poses[:0] 123 for _, f := range ap.Fs { 124 vec, err := colexec.EvalExpr(bat, proc, f.Expr) 125 if err != nil { 126 return err 127 } 128 flg := true 129 for i := range bat.Vecs { 130 if bat.Vecs[i] == vec { 131 flg = false 132 ctr.poses = append(ctr.poses, int32(i)) 133 break 134 } 135 } 136 if flg { 137 ctr.poses = append(ctr.poses, int32(len(bat.Vecs))) 138 bat.Vecs = append(bat.Vecs, vec) 139 } else { 140 if vec != nil { 141 anal.Alloc(int64(vec.Size())) 142 } 143 } 144 } 145 if ctr.bat == nil { 146 mp := make(map[int]int) 147 for i, pos := range ctr.poses { 148 mp[int(pos)] = i 149 } 150 ctr.bat = batch.NewWithSize(len(bat.Vecs)) 151 for i, vec := range bat.Vecs { 152 ctr.bat.Vecs[i] = vector.New(vec.Typ) 153 } 154 ctr.cmps = make([]compare.Compare, len(bat.Vecs)) 155 for i := range ctr.cmps { 156 var desc, nullsLast bool 157 if pos, ok := mp[i]; ok { 158 desc = ap.Fs[pos].Flag&plan.OrderBySpec_DESC != 0 159 if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_FIRST != 0 { 160 nullsLast = false 161 } else if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_LAST != 0 { 162 nullsLast = true 163 } else { 164 nullsLast = desc 165 } 166 } 167 ctr.cmps[i] = compare.New(bat.Vecs[i].Typ, desc, nullsLast) 168 } 169 } 170 if err := ctr.processBatch(ap.Limit, bat, proc); err != nil { 171 bat.Clean(proc.Mp()) 172 return err 173 } 174 bat.Clean(proc.Mp()) 175 } 176 } 177 178 func (ctr *container) processBatch(limit int64, bat *batch.Batch, proc *process.Process) error { 179 var start int64 180 181 length := int64(len(bat.Zs)) 182 if n := int64(len(ctr.sels)); n < limit { 183 start = limit - n 184 if start > length { 185 start = length 186 } 187 for i := int64(0); i < start; i++ { 188 for j, vec := range ctr.bat.Vecs { 189 if err := vector.UnionOne(vec, bat.Vecs[j], i, proc.Mp()); err != nil { 190 return err 191 } 192 } 193 ctr.sels = append(ctr.sels, n) 194 ctr.bat.Zs = append(ctr.bat.Zs, bat.Zs[i]) 195 n++ 196 } 197 if n == limit { 198 ctr.sort() 199 } 200 } 201 if start == length { 202 return nil 203 } 204 205 // bat is still have items 206 for i, cmp := range ctr.cmps { 207 cmp.Set(1, bat.Vecs[i]) 208 } 209 for i, j := start, length; i < j; i++ { 210 if ctr.compare(1, 0, i, ctr.sels[0]) < 0 { 211 for _, cmp := range ctr.cmps { 212 if err := cmp.Copy(1, 0, i, ctr.sels[0], proc); err != nil { 213 return err 214 } 215 ctr.bat.Zs[0] = bat.Zs[i] 216 } 217 heap.Fix(ctr, 0) 218 } 219 } 220 return nil 221 } 222 223 func (ctr *container) eval(limit int64, proc *process.Process, anal process.Analyze, isLast bool) error { 224 if int64(len(ctr.sels)) < limit { 225 ctr.sort() 226 } 227 for i, cmp := range ctr.cmps { 228 ctr.bat.Vecs[i] = cmp.Vector() 229 } 230 sels := make([]int64, len(ctr.sels)) 231 for i, j := 0, len(ctr.sels); i < j; i++ { 232 sels[len(sels)-1-i] = heap.Pop(ctr).(int64) 233 } 234 if err := ctr.bat.Shuffle(sels, proc.Mp()); err != nil { 235 return err 236 } 237 for i := ctr.n; i < len(ctr.bat.Vecs); i++ { 238 vector.Clean(ctr.bat.Vecs[i], proc.Mp()) 239 } 240 ctr.bat.Vecs = ctr.bat.Vecs[:ctr.n] 241 ctr.bat.ExpandNulls() 242 anal.Output(ctr.bat, isLast) 243 proc.SetInputBatch(ctr.bat) 244 ctr.bat = nil 245 return nil 246 } 247 248 // do sort work for heap, and result order will be set in container.sels 249 func (ctr *container) sort() { 250 for i, cmp := range ctr.cmps { 251 cmp.Set(0, ctr.bat.Vecs[i]) 252 } 253 heap.Init(ctr) 254 }