github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/anti/join.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package anti 16 17 import ( 18 "bytes" 19 "time" 20 21 "github.com/matrixorigin/matrixone/pkg/common/hashmap" 22 "github.com/matrixorigin/matrixone/pkg/container/batch" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 25 "github.com/matrixorigin/matrixone/pkg/sql/plan" 26 "github.com/matrixorigin/matrixone/pkg/vm/process" 27 ) 28 29 func String(_ any, buf *bytes.Buffer) { 30 buf.WriteString(" anti join ") 31 } 32 33 func Prepare(proc *process.Process, arg any) error { 34 ap := arg.(*Argument) 35 ap.ctr = new(container) 36 ap.ctr.inBuckets = make([]uint8, hashmap.UnitLimit) 37 ap.ctr.evecs = make([]evalVector, len(ap.Conditions[0])) 38 ap.ctr.vecs = make([]*vector.Vector, len(ap.Conditions[0])) 39 return nil 40 } 41 42 func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) { 43 var err error 44 anal := proc.GetAnalyze(idx) 45 anal.Start() 46 defer anal.Stop() 47 ap := arg.(*Argument) 48 ctr := ap.ctr 49 for { 50 switch ctr.state { 51 case Build: 52 if err := ctr.build(ap, proc, anal); err != nil { 53 ap.Free(proc, true) 54 return false, err 55 } 56 ctr.state = Probe 57 58 case Probe: 59 start := time.Now() 60 bat := <-proc.Reg.MergeReceivers[0].Ch 61 anal.WaitStop(start) 62 63 if bat == nil { 64 ctr.state = End 65 continue 66 } 67 if bat.Length() == 0 { 68 continue 69 } 70 if ctr.bat == nil || ctr.bat.Length() == 0 { 71 err = ctr.emptyProbe(bat, ap, proc, anal, isFirst, isLast) 72 } else { 73 err = ctr.probe(bat, ap, proc, anal, isFirst, isLast) 74 } 75 if err != nil { 76 ap.Free(proc, true) 77 } 78 return false, err 79 80 default: 81 ap.Free(proc, false) 82 proc.SetInputBatch(nil) 83 return true, nil 84 } 85 } 86 } 87 88 func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze) error { 89 start := time.Now() 90 bat := <-proc.Reg.MergeReceivers[1].Ch 91 anal.WaitStop(start) 92 if bat != nil { 93 ctr.bat = bat 94 ctr.mp = bat.Ht.(*hashmap.JoinMap).Dup() 95 ctr.hasNull = ctr.mp.HasNull() 96 anal.Alloc(ctr.mp.Map().Size()) 97 } 98 return nil 99 } 100 101 func (ctr *container) emptyProbe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error { 102 defer bat.Clean(proc.Mp()) 103 anal.Input(bat, isFirst) 104 rbat := batch.NewWithSize(len(ap.Result)) 105 rbat.Zs = proc.Mp().GetSels() 106 for i, pos := range ap.Result { 107 rbat.Vecs[i] = vector.New(bat.Vecs[pos].Typ) 108 } 109 count := bat.Length() 110 for i := 0; i < count; i += hashmap.UnitLimit { 111 n := count - i 112 if n > hashmap.UnitLimit { 113 n = hashmap.UnitLimit 114 } 115 for k := 0; k < n; k++ { 116 for j, pos := range ap.Result { 117 if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[pos], int64(i+k), proc.Mp()); err != nil { 118 rbat.Clean(proc.Mp()) 119 return err 120 } 121 } 122 rbat.Zs = append(rbat.Zs, bat.Zs[i+k]) 123 } 124 } 125 rbat.ExpandNulls() 126 anal.Output(rbat, isLast) 127 proc.SetInputBatch(rbat) 128 return nil 129 } 130 131 func (ctr *container) probe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error { 132 defer bat.Clean(proc.Mp()) 133 anal.Input(bat, isFirst) 134 rbat := batch.NewWithSize(len(ap.Result)) 135 rbat.Zs = proc.Mp().GetSels() 136 for i, pos := range ap.Result { 137 rbat.Vecs[i] = vector.New(bat.Vecs[pos].Typ) 138 } 139 if (ctr.bat.Length() == 1 && ctr.hasNull) || ctr.bat.Length() == 0 { 140 anal.Output(rbat, isLast) 141 proc.SetInputBatch(rbat) 142 return nil 143 } 144 145 ap.ctr.cleanEvalVectors(proc.Mp()) 146 if err := ctr.evalJoinCondition(bat, ap.Conditions[0], proc); err != nil { 147 return err 148 } 149 150 count := bat.Length() 151 mSels := ctr.mp.Sels() 152 itr := ctr.mp.Map().NewIterator() 153 eligible := make([]int64, 0, hashmap.UnitLimit) 154 for i := 0; i < count; i += hashmap.UnitLimit { 155 n := count - i 156 if n > hashmap.UnitLimit { 157 n = hashmap.UnitLimit 158 } 159 copy(ctr.inBuckets, hashmap.OneUInt8s) 160 vals, zvals := itr.Find(i, n, ctr.vecs, ctr.inBuckets) 161 for k := 0; k < n; k++ { 162 if ctr.inBuckets[k] == 0 || zvals[k] == 0 { 163 continue 164 } 165 if vals[k] == 0 { 166 eligible = append(eligible, int64(i+k)) 167 rbat.Zs = append(rbat.Zs, bat.Zs[i+k]) 168 continue 169 } 170 if ap.Cond != nil { 171 matched := false // mark if any tuple satisfies the condition 172 sels := mSels[vals[k]-1] 173 for _, sel := range sels { 174 vec, err := colexec.JoinFilterEvalExprInBucket(bat, ctr.bat, i+k, int(sel), proc, ap.Cond) 175 if err != nil { 176 return err 177 } 178 bs := vec.Col.([]bool) 179 if bs[0] { 180 matched = true 181 vec.Free(proc.Mp()) 182 break 183 } 184 vec.Free(proc.Mp()) 185 } 186 if matched { 187 continue 188 } 189 eligible = append(eligible, int64(i+k)) 190 rbat.Zs = append(rbat.Zs, bat.Zs[i+k]) 191 } 192 } 193 for j, pos := range ap.Result { 194 if err := vector.Union(rbat.Vecs[j], bat.Vecs[pos], eligible, true, proc.Mp()); err != nil { 195 rbat.Clean(proc.Mp()) 196 return err 197 } 198 } 199 eligible = eligible[:0] 200 } 201 rbat.ExpandNulls() 202 anal.Output(rbat, isLast) 203 proc.SetInputBatch(rbat) 204 return nil 205 } 206 207 func (ctr *container) evalJoinCondition(bat *batch.Batch, conds []*plan.Expr, proc *process.Process) error { 208 for i, cond := range conds { 209 vec, err := colexec.EvalExpr(bat, proc, cond) 210 if err != nil || vec.ConstExpand(false, proc.Mp()) == nil { 211 ctr.cleanEvalVectors(proc.Mp()) 212 return err 213 } 214 ctr.vecs[i] = vec 215 ctr.evecs[i].vec = vec 216 ctr.evecs[i].needFree = true 217 for j := range bat.Vecs { 218 if bat.Vecs[j] == vec { 219 ctr.evecs[i].needFree = false 220 break 221 } 222 } 223 } 224 return nil 225 }