github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/left/join.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package left 16 17 import ( 18 "bytes" 19 "time" 20 21 "github.com/matrixorigin/matrixone/pkg/common/hashmap" 22 "github.com/matrixorigin/matrixone/pkg/container/batch" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 25 "github.com/matrixorigin/matrixone/pkg/sql/plan" 26 "github.com/matrixorigin/matrixone/pkg/vm/process" 27 ) 28 29 func String(_ any, buf *bytes.Buffer) { 30 buf.WriteString(" left join ") 31 } 32 33 func Prepare(proc *process.Process, arg any) error { 34 ap := arg.(*Argument) 35 ap.ctr = new(container) 36 ap.ctr.inBuckets = make([]uint8, hashmap.UnitLimit) 37 ap.ctr.evecs = make([]evalVector, len(ap.Conditions[0])) 38 ap.ctr.vecs = make([]*vector.Vector, len(ap.Conditions[0])) 39 ap.ctr.bat = batch.NewWithSize(len(ap.Typs)) 40 ap.ctr.bat.Zs = proc.Mp().GetSels() 41 for i, typ := range ap.Typs { 42 ap.ctr.bat.Vecs[i] = vector.New(typ) 43 } 44 return nil 45 } 46 47 func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) { 48 anal := proc.GetAnalyze(idx) 49 anal.Start() 50 defer anal.Stop() 51 ap := arg.(*Argument) 52 ctr := ap.ctr 53 for { 54 switch ctr.state { 55 case Build: 56 if err := ctr.build(ap, proc, anal); err != nil { 57 ap.Free(proc, true) 58 return false, err 59 } 60 ctr.state = Probe 61 62 case Probe: 63 start := time.Now() 64 bat := <-proc.Reg.MergeReceivers[0].Ch 65 anal.WaitStop(start) 66 67 if bat == nil { 68 ctr.state = End 69 continue 70 } 71 if bat.Length() == 0 { 72 continue 73 } 74 if ctr.bat.Length() == 0 { 75 if err := ctr.emptyProbe(bat, ap, proc, anal, isFirst, isLast); err != nil { 76 ap.Free(proc, true) 77 return false, err 78 } 79 } else { 80 if err := ctr.probe(bat, ap, proc, anal, isFirst, isLast); err != nil { 81 ap.Free(proc, true) 82 return false, err 83 } 84 } 85 return false, nil 86 87 default: 88 ap.Free(proc, false) 89 proc.SetInputBatch(nil) 90 return true, nil 91 } 92 } 93 } 94 95 func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze) error { 96 start := time.Now() 97 bat := <-proc.Reg.MergeReceivers[1].Ch 98 anal.WaitStop(start) 99 100 if bat != nil { 101 ctr.bat = bat 102 ctr.mp = bat.Ht.(*hashmap.JoinMap).Dup() 103 anal.Alloc(ctr.mp.Map().Size()) 104 } 105 return nil 106 } 107 108 func (ctr *container) emptyProbe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error { 109 defer bat.Clean(proc.Mp()) 110 anal.Input(bat, isFirst) 111 rbat := batch.NewWithSize(len(ap.Result)) 112 count := bat.Length() 113 for i, rp := range ap.Result { 114 if rp.Rel == 0 { 115 rbat.Vecs[i] = bat.Vecs[rp.Pos] 116 bat.Vecs[rp.Pos] = nil 117 } else { 118 rbat.Vecs[i] = vector.NewConstNull(ctr.bat.Vecs[rp.Pos].Typ, count) 119 } 120 } 121 rbat.Zs = bat.Zs 122 bat.Zs = nil 123 anal.Output(rbat, isLast) 124 proc.SetInputBatch(rbat) 125 return nil 126 } 127 128 func (ctr *container) probe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error { 129 defer bat.Clean(proc.Mp()) 130 anal.Input(bat, isFirst) 131 rbat := batch.NewWithSize(len(ap.Result)) 132 rbat.Zs = proc.Mp().GetSels() 133 for i, rp := range ap.Result { 134 if rp.Rel == 0 { 135 rbat.Vecs[i] = vector.New(bat.Vecs[rp.Pos].Typ) 136 } else { 137 rbat.Vecs[i] = vector.New(ctr.bat.Vecs[rp.Pos].Typ) 138 } 139 } 140 141 ctr.cleanEvalVectors(proc.Mp()) 142 if err := ctr.evalJoinCondition(bat, ap.Conditions[0], proc, anal); err != nil { 143 return err 144 } 145 146 count := bat.Length() 147 mSels := ctr.mp.Sels() 148 itr := ctr.mp.Map().NewIterator() 149 for i := 0; i < count; i += hashmap.UnitLimit { 150 n := count - i 151 if n > hashmap.UnitLimit { 152 n = hashmap.UnitLimit 153 } 154 copy(ctr.inBuckets, hashmap.OneUInt8s) 155 vals, zvals := itr.Find(i, n, ctr.vecs, ctr.inBuckets) 156 for k := 0; k < n; k++ { 157 if ctr.inBuckets[k] == 0 { 158 continue 159 } 160 if zvals[k] == 0 || vals[k] == 0 { 161 for j, rp := range ap.Result { 162 if rp.Rel == 0 { 163 if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[rp.Pos], int64(i+k), proc.Mp()); err != nil { 164 rbat.Clean(proc.Mp()) 165 return err 166 } 167 } else { 168 if err := vector.UnionNull(rbat.Vecs[j], ctr.bat.Vecs[rp.Pos], proc.Mp()); err != nil { 169 rbat.Clean(proc.Mp()) 170 return err 171 } 172 } 173 } 174 rbat.Zs = append(rbat.Zs, bat.Zs[i+k]) 175 continue 176 } 177 sels := mSels[vals[k]-1] 178 matched := false 179 for _, sel := range sels { 180 if ap.Cond != nil { 181 vec, err := colexec.JoinFilterEvalExprInBucket(bat, ctr.bat, i+k, int(sel), proc, ap.Cond) 182 if err != nil { 183 return err 184 } 185 bs := vec.Col.([]bool) 186 if !bs[0] { 187 vec.Free(proc.Mp()) 188 continue 189 } 190 vec.Free(proc.Mp()) 191 } 192 matched = true 193 for j, rp := range ap.Result { 194 if rp.Rel == 0 { 195 if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[rp.Pos], int64(i+k), proc.Mp()); err != nil { 196 rbat.Clean(proc.Mp()) 197 return err 198 } 199 } else { 200 if err := vector.UnionOne(rbat.Vecs[j], ctr.bat.Vecs[rp.Pos], int64(sel), proc.Mp()); err != nil { 201 rbat.Clean(proc.Mp()) 202 return err 203 } 204 } 205 } 206 rbat.Zs = append(rbat.Zs, ctr.bat.Zs[sel]) 207 } 208 if !matched { 209 for j, rp := range ap.Result { 210 if rp.Rel == 0 { 211 if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[rp.Pos], int64(i+k), proc.Mp()); err != nil { 212 rbat.Clean(proc.Mp()) 213 return err 214 } 215 } else { 216 if err := vector.UnionNull(rbat.Vecs[j], ctr.bat.Vecs[rp.Pos], proc.Mp()); err != nil { 217 rbat.Clean(proc.Mp()) 218 return err 219 } 220 } 221 } 222 rbat.Zs = append(rbat.Zs, bat.Zs[i+k]) 223 continue 224 } 225 } 226 } 227 rbat.ExpandNulls() 228 anal.Output(rbat, isLast) 229 proc.SetInputBatch(rbat) 230 return nil 231 } 232 233 func (ctr *container) evalJoinCondition(bat *batch.Batch, conds []*plan.Expr, proc *process.Process, analyze process.Analyze) error { 234 for i, cond := range conds { 235 vec, err := colexec.EvalExpr(bat, proc, cond) 236 if err != nil || vec.ConstExpand(false, proc.Mp()) == nil { 237 ctr.cleanEvalVectors(proc.Mp()) 238 return err 239 } 240 ctr.vecs[i] = vec 241 ctr.evecs[i].vec = vec 242 ctr.evecs[i].needFree = true 243 for j := range bat.Vecs { 244 if bat.Vecs[j] == vec { 245 ctr.evecs[i].needFree = false 246 break 247 } 248 } 249 if ctr.evecs[i].needFree && vec != nil { 250 analyze.Alloc(int64(vec.Size())) 251 } 252 } 253 return nil 254 }