github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/left/join.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package left
    16  
    17  import (
    18  	"bytes"
    19  	"time"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/hashmap"
    22  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    25  	"github.com/matrixorigin/matrixone/pkg/sql/plan"
    26  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    27  )
    28  
    29  func String(_ any, buf *bytes.Buffer) {
    30  	buf.WriteString(" left join ")
    31  }
    32  
    33  func Prepare(proc *process.Process, arg any) error {
    34  	ap := arg.(*Argument)
    35  	ap.ctr = new(container)
    36  	ap.ctr.inBuckets = make([]uint8, hashmap.UnitLimit)
    37  	ap.ctr.evecs = make([]evalVector, len(ap.Conditions[0]))
    38  	ap.ctr.vecs = make([]*vector.Vector, len(ap.Conditions[0]))
    39  	ap.ctr.bat = batch.NewWithSize(len(ap.Typs))
    40  	ap.ctr.bat.Zs = proc.Mp().GetSels()
    41  	for i, typ := range ap.Typs {
    42  		ap.ctr.bat.Vecs[i] = vector.New(typ)
    43  	}
    44  	return nil
    45  }
    46  
    47  func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) {
    48  	anal := proc.GetAnalyze(idx)
    49  	anal.Start()
    50  	defer anal.Stop()
    51  	ap := arg.(*Argument)
    52  	ctr := ap.ctr
    53  	for {
    54  		switch ctr.state {
    55  		case Build:
    56  			if err := ctr.build(ap, proc, anal); err != nil {
    57  				ap.Free(proc, true)
    58  				return false, err
    59  			}
    60  			ctr.state = Probe
    61  
    62  		case Probe:
    63  			start := time.Now()
    64  			bat := <-proc.Reg.MergeReceivers[0].Ch
    65  			anal.WaitStop(start)
    66  
    67  			if bat == nil {
    68  				ctr.state = End
    69  				continue
    70  			}
    71  			if bat.Length() == 0 {
    72  				continue
    73  			}
    74  			if ctr.bat.Length() == 0 {
    75  				if err := ctr.emptyProbe(bat, ap, proc, anal, isFirst, isLast); err != nil {
    76  					ap.Free(proc, true)
    77  					return false, err
    78  				}
    79  			} else {
    80  				if err := ctr.probe(bat, ap, proc, anal, isFirst, isLast); err != nil {
    81  					ap.Free(proc, true)
    82  					return false, err
    83  				}
    84  			}
    85  			return false, nil
    86  
    87  		default:
    88  			ap.Free(proc, false)
    89  			proc.SetInputBatch(nil)
    90  			return true, nil
    91  		}
    92  	}
    93  }
    94  
    95  func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze) error {
    96  	start := time.Now()
    97  	bat := <-proc.Reg.MergeReceivers[1].Ch
    98  	anal.WaitStop(start)
    99  
   100  	if bat != nil {
   101  		ctr.bat = bat
   102  		ctr.mp = bat.Ht.(*hashmap.JoinMap).Dup()
   103  		anal.Alloc(ctr.mp.Map().Size())
   104  	}
   105  	return nil
   106  }
   107  
   108  func (ctr *container) emptyProbe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error {
   109  	defer bat.Clean(proc.Mp())
   110  	anal.Input(bat, isFirst)
   111  	rbat := batch.NewWithSize(len(ap.Result))
   112  	count := bat.Length()
   113  	for i, rp := range ap.Result {
   114  		if rp.Rel == 0 {
   115  			rbat.Vecs[i] = bat.Vecs[rp.Pos]
   116  			bat.Vecs[rp.Pos] = nil
   117  		} else {
   118  			rbat.Vecs[i] = vector.NewConstNull(ctr.bat.Vecs[rp.Pos].Typ, count)
   119  		}
   120  	}
   121  	rbat.Zs = bat.Zs
   122  	bat.Zs = nil
   123  	anal.Output(rbat, isLast)
   124  	proc.SetInputBatch(rbat)
   125  	return nil
   126  }
   127  
   128  func (ctr *container) probe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error {
   129  	defer bat.Clean(proc.Mp())
   130  	anal.Input(bat, isFirst)
   131  	rbat := batch.NewWithSize(len(ap.Result))
   132  	rbat.Zs = proc.Mp().GetSels()
   133  	for i, rp := range ap.Result {
   134  		if rp.Rel == 0 {
   135  			rbat.Vecs[i] = vector.New(bat.Vecs[rp.Pos].Typ)
   136  		} else {
   137  			rbat.Vecs[i] = vector.New(ctr.bat.Vecs[rp.Pos].Typ)
   138  		}
   139  	}
   140  
   141  	ctr.cleanEvalVectors(proc.Mp())
   142  	if err := ctr.evalJoinCondition(bat, ap.Conditions[0], proc, anal); err != nil {
   143  		return err
   144  	}
   145  
   146  	count := bat.Length()
   147  	mSels := ctr.mp.Sels()
   148  	itr := ctr.mp.Map().NewIterator()
   149  	for i := 0; i < count; i += hashmap.UnitLimit {
   150  		n := count - i
   151  		if n > hashmap.UnitLimit {
   152  			n = hashmap.UnitLimit
   153  		}
   154  		copy(ctr.inBuckets, hashmap.OneUInt8s)
   155  		vals, zvals := itr.Find(i, n, ctr.vecs, ctr.inBuckets)
   156  		for k := 0; k < n; k++ {
   157  			if ctr.inBuckets[k] == 0 {
   158  				continue
   159  			}
   160  			if zvals[k] == 0 || vals[k] == 0 {
   161  				for j, rp := range ap.Result {
   162  					if rp.Rel == 0 {
   163  						if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[rp.Pos], int64(i+k), proc.Mp()); err != nil {
   164  							rbat.Clean(proc.Mp())
   165  							return err
   166  						}
   167  					} else {
   168  						if err := vector.UnionNull(rbat.Vecs[j], ctr.bat.Vecs[rp.Pos], proc.Mp()); err != nil {
   169  							rbat.Clean(proc.Mp())
   170  							return err
   171  						}
   172  					}
   173  				}
   174  				rbat.Zs = append(rbat.Zs, bat.Zs[i+k])
   175  				continue
   176  			}
   177  			sels := mSels[vals[k]-1]
   178  			matched := false
   179  			for _, sel := range sels {
   180  				if ap.Cond != nil {
   181  					vec, err := colexec.JoinFilterEvalExprInBucket(bat, ctr.bat, i+k, int(sel), proc, ap.Cond)
   182  					if err != nil {
   183  						return err
   184  					}
   185  					bs := vec.Col.([]bool)
   186  					if !bs[0] {
   187  						vec.Free(proc.Mp())
   188  						continue
   189  					}
   190  					vec.Free(proc.Mp())
   191  				}
   192  				matched = true
   193  				for j, rp := range ap.Result {
   194  					if rp.Rel == 0 {
   195  						if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[rp.Pos], int64(i+k), proc.Mp()); err != nil {
   196  							rbat.Clean(proc.Mp())
   197  							return err
   198  						}
   199  					} else {
   200  						if err := vector.UnionOne(rbat.Vecs[j], ctr.bat.Vecs[rp.Pos], int64(sel), proc.Mp()); err != nil {
   201  							rbat.Clean(proc.Mp())
   202  							return err
   203  						}
   204  					}
   205  				}
   206  				rbat.Zs = append(rbat.Zs, ctr.bat.Zs[sel])
   207  			}
   208  			if !matched {
   209  				for j, rp := range ap.Result {
   210  					if rp.Rel == 0 {
   211  						if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[rp.Pos], int64(i+k), proc.Mp()); err != nil {
   212  							rbat.Clean(proc.Mp())
   213  							return err
   214  						}
   215  					} else {
   216  						if err := vector.UnionNull(rbat.Vecs[j], ctr.bat.Vecs[rp.Pos], proc.Mp()); err != nil {
   217  							rbat.Clean(proc.Mp())
   218  							return err
   219  						}
   220  					}
   221  				}
   222  				rbat.Zs = append(rbat.Zs, bat.Zs[i+k])
   223  				continue
   224  			}
   225  		}
   226  	}
   227  	rbat.ExpandNulls()
   228  	anal.Output(rbat, isLast)
   229  	proc.SetInputBatch(rbat)
   230  	return nil
   231  }
   232  
   233  func (ctr *container) evalJoinCondition(bat *batch.Batch, conds []*plan.Expr, proc *process.Process, analyze process.Analyze) error {
   234  	for i, cond := range conds {
   235  		vec, err := colexec.EvalExpr(bat, proc, cond)
   236  		if err != nil || vec.ConstExpand(false, proc.Mp()) == nil {
   237  			ctr.cleanEvalVectors(proc.Mp())
   238  			return err
   239  		}
   240  		ctr.vecs[i] = vec
   241  		ctr.evecs[i].vec = vec
   242  		ctr.evecs[i].needFree = true
   243  		for j := range bat.Vecs {
   244  			if bat.Vecs[j] == vec {
   245  				ctr.evecs[i].needFree = false
   246  				break
   247  			}
   248  		}
   249  		if ctr.evecs[i].needFree && vec != nil {
   250  			analyze.Alloc(int64(vec.Size()))
   251  		}
   252  	}
   253  	return nil
   254  }