github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/mergeorder/order.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mergeorder 16 17 import ( 18 "bytes" 19 "reflect" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/common/moerr" 23 "github.com/matrixorigin/matrixone/pkg/compare" 24 "github.com/matrixorigin/matrixone/pkg/container/batch" 25 "github.com/matrixorigin/matrixone/pkg/container/vector" 26 "github.com/matrixorigin/matrixone/pkg/pb/plan" 27 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 28 "github.com/matrixorigin/matrixone/pkg/vm/process" 29 ) 30 31 func String(arg any, buf *bytes.Buffer) { 32 ap := arg.(*Argument) 33 buf.WriteString("mergeorder([") 34 for i, f := range ap.Fs { 35 if i > 0 { 36 buf.WriteString(", ") 37 } 38 buf.WriteString(f.String()) 39 } 40 buf.WriteString("])") 41 } 42 43 func Prepare(proc *process.Process, arg any) error { 44 ap := arg.(*Argument) 45 ap.ctr = new(container) 46 ap.ctr.poses = make([]int32, 0, len(ap.Fs)) 47 48 ap.ctr.receiverListener = make([]reflect.SelectCase, len(proc.Reg.MergeReceivers)) 49 for i, mr := range proc.Reg.MergeReceivers { 50 ap.ctr.receiverListener[i] = reflect.SelectCase{ 51 Dir: reflect.SelectRecv, 52 Chan: reflect.ValueOf(mr.Ch), 53 } 54 } 55 ap.ctr.aliveMergeReceiver = len(proc.Reg.MergeReceivers) 56 ap.ctr.compare0Index = make([]int32, len(ap.Fs)) 57 ap.ctr.compare1Index = make([]int32, len(ap.Fs)) 58 return nil 59 } 60 61 func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) { 62 var bat *batch.Batch 63 var end bool 64 var err error 65 66 ap := arg.(*Argument) 67 ctr := ap.ctr 68 anal := proc.GetAnalyze(idx) 69 anal.Start() 70 defer anal.Stop() 71 72 // get batch from merge receivers and do merge sort. 73 // save the unordered result in ctr.bat. 74 // save the ordered index list in ctr.finalSelectList 75 for { 76 start := time.Now() 77 bat, end, err = receiveBatch(proc, ctr) 78 if err != nil { 79 break 80 } 81 anal.WaitStop(start) 82 if end { 83 break 84 } 85 86 if bat == nil || bat.Length() == 0 { 87 continue 88 } 89 anal.Input(bat, isFirst) 90 bat.ExpandNulls() 91 92 if err = mergeSort(proc, bat, ap, ctr, anal); err != nil { 93 break 94 } 95 } 96 if err != nil { 97 ap.Free(proc, true) 98 return false, err 99 } 100 101 // remove and clean unnecessary vector 102 // shuffle the ctr.bat 103 if ctr.bat != nil { 104 for i := ctr.n; i < len(ctr.bat.Vecs); i++ { 105 vector.Clean(ctr.bat.Vecs[i], proc.Mp()) 106 } 107 ctr.bat.Vecs = ctr.bat.Vecs[:ctr.n] 108 ctr.bat.ExpandNulls() 109 } 110 if err = ctr.bat.Shuffle(ctr.finalSelectList, proc.Mp()); err != nil { 111 ap.Free(proc, true) 112 return false, err 113 } 114 115 // output the sort result. 116 anal.Output(ctr.bat, isLast) 117 proc.SetInputBatch(ctr.bat) 118 ctr.bat = nil 119 120 // free and return 121 ap.Free(proc, false) 122 return true, nil 123 } 124 125 // receiveBatch get a batch from receiver, return true if all batches have been got. 126 func receiveBatch(proc *process.Process, ctr *container) (*batch.Batch, bool, error) { 127 if ctr.aliveMergeReceiver == 0 { 128 return nil, true, nil 129 } 130 chosen, value, ok := reflect.Select(ctr.receiverListener) 131 if !ok { 132 return nil, false, moerr.NewInternalError(proc.Ctx, "pipeline closed unexpectedly") 133 } 134 pointer := value.UnsafePointer() 135 bat := (*batch.Batch)(pointer) 136 if bat == nil { 137 ctr.receiverListener = append(ctr.receiverListener[:chosen], ctr.receiverListener[chosen+1:]...) 138 ctr.aliveMergeReceiver-- 139 } 140 return bat, false, nil 141 } 142 143 func mergeSort(proc *process.Process, bat2 *batch.Batch, 144 ap *Argument, ctr *container, anal process.Analyze) error { 145 ctr.n = len(bat2.Vecs) 146 ctr.poses = ctr.poses[:0] 147 148 // evaluate the order column. 149 for _, f := range ap.Fs { 150 vec, err := colexec.EvalExpr(bat2, proc, f.Expr) 151 if err != nil { 152 return err 153 } 154 newColumn := true 155 for i := range bat2.Vecs { 156 if bat2.Vecs[i] == vec { 157 newColumn = false 158 ctr.poses = append(ctr.poses, int32(i)) 159 break 160 } 161 } 162 if newColumn { 163 ctr.poses = append(ctr.poses, int32(len(bat2.Vecs))) 164 bat2.Vecs = append(bat2.Vecs, vec) 165 anal.Alloc(int64(vec.Size())) 166 } 167 } 168 copy(ctr.compare1Index, ctr.poses) 169 170 // init the compare structure if first time. 171 if len(ctr.cmps) == 0 { 172 var desc, nullsLast bool 173 ctr.cmps = make([]compare.Compare, len(ap.Fs)) 174 for i := range ctr.cmps { 175 desc = ap.Fs[i].Flag&plan.OrderBySpec_DESC != 0 176 if ap.Fs[i].Flag&plan.OrderBySpec_NULLS_FIRST != 0 { 177 nullsLast = false 178 } else if ap.Fs[i].Flag&plan.OrderBySpec_NULLS_LAST != 0 { 179 nullsLast = true 180 } else { 181 nullsLast = desc 182 } 183 ctr.cmps[i] = compare.New(bat2.Vecs[ctr.poses[i]].Typ, desc, nullsLast) 184 } 185 } 186 187 return ctr.mergeSort2(bat2, proc) 188 } 189 190 func (ctr *container) mergeSort2(bat2 *batch.Batch, proc *process.Process) error { 191 if ctr.bat == nil { 192 ctr.bat = bat2 193 ctr.finalSelectList = generateSelectList(int64(ctr.bat.Length())) 194 copy(ctr.compare0Index, ctr.poses) 195 return nil 196 } 197 bat1 := ctr.bat 198 // union bat1 and bat2 199 // do merge sort, get order index list. 200 s1, s2 := int64(0), int64(bat1.Vecs[0].Length()) // startIndexOfBat1, startIndexOfBat2 201 202 for i := range bat1.Vecs { 203 n := bat2.Vecs[i].Length() 204 if cap(ctr.unionFlag) >= n { 205 ctr.unionFlag = ctr.unionFlag[:n:cap(ctr.unionFlag)] 206 } else { 207 ctr.unionFlag = makeFlagsOne(n) 208 } 209 err := vector.UnionBatch(bat1.Vecs[i], bat2.Vecs[i], 0, n, ctr.unionFlag, proc.Mp()) 210 if err != nil { 211 return err 212 } 213 } 214 bat1.Zs = append(bat1.Zs, bat2.Zs...) 215 216 // set cmp should after union work to avoid memory re-alloc while union. 217 for i, cmp := range ctr.cmps { 218 cmp.Set(0, bat1.GetVector(ctr.compare0Index[i])) 219 cmp.Set(1, bat2.GetVector(ctr.compare1Index[i])) 220 } 221 222 end1, end2 := s2, int64(bat1.Vecs[0].Length()) 223 sels := make([]int64, 0, end2) 224 225 for s1 < end1 && s2 < end2 { 226 i := s1 227 j := s2 - end1 228 compareResult := 0 229 for k := range ctr.cmps { 230 compareResult = ctr.cmps[k].Compare(0, 1, ctr.finalSelectList[i], j) 231 if compareResult != 0 { 232 break 233 } 234 } 235 if compareResult <= 0 { 236 // weight of item1 is less or equal to item2 237 sels = append(sels, ctr.finalSelectList[s1]) 238 s1++ 239 } else { 240 sels = append(sels, s2) 241 s2++ 242 } 243 } 244 for s1 < end1 { 245 sels = append(sels, ctr.finalSelectList[s1]) 246 s1++ 247 } 248 for s2 < end2 { 249 sels = append(sels, s2) 250 s2++ 251 } 252 ctr.finalSelectList = sels 253 ctr.bat = bat1 254 bat2.Clean(proc.Mp()) 255 return nil 256 } 257 258 func generateSelectList(j int64) []int64 { 259 list := make([]int64, j) 260 var i int64 261 for i = 0; i < j; i++ { 262 list[i] = i 263 } 264 return list 265 } 266 267 func makeFlagsOne(n int) []uint8 { 268 t := make([]uint8, n) 269 for i := range t { 270 t[i]++ 271 } 272 return t 273 }