github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/anti/join.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package anti
    16  
    17  import (
    18  	"bytes"
    19  	"time"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/hashmap"
    22  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    25  	"github.com/matrixorigin/matrixone/pkg/sql/plan"
    26  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    27  )
    28  
    29  func String(_ any, buf *bytes.Buffer) {
    30  	buf.WriteString(" anti join ")
    31  }
    32  
    33  func Prepare(proc *process.Process, arg any) error {
    34  	ap := arg.(*Argument)
    35  	ap.ctr = new(container)
    36  	ap.ctr.inBuckets = make([]uint8, hashmap.UnitLimit)
    37  	ap.ctr.evecs = make([]evalVector, len(ap.Conditions[0]))
    38  	ap.ctr.vecs = make([]*vector.Vector, len(ap.Conditions[0]))
    39  	return nil
    40  }
    41  
    42  func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) {
    43  	var err error
    44  	anal := proc.GetAnalyze(idx)
    45  	anal.Start()
    46  	defer anal.Stop()
    47  	ap := arg.(*Argument)
    48  	ctr := ap.ctr
    49  	for {
    50  		switch ctr.state {
    51  		case Build:
    52  			if err := ctr.build(ap, proc, anal); err != nil {
    53  				ap.Free(proc, true)
    54  				return false, err
    55  			}
    56  			ctr.state = Probe
    57  
    58  		case Probe:
    59  			start := time.Now()
    60  			bat := <-proc.Reg.MergeReceivers[0].Ch
    61  			anal.WaitStop(start)
    62  
    63  			if bat == nil {
    64  				ctr.state = End
    65  				continue
    66  			}
    67  			if bat.Length() == 0 {
    68  				continue
    69  			}
    70  			if ctr.bat == nil || ctr.bat.Length() == 0 {
    71  				err = ctr.emptyProbe(bat, ap, proc, anal, isFirst, isLast)
    72  			} else {
    73  				err = ctr.probe(bat, ap, proc, anal, isFirst, isLast)
    74  			}
    75  			if err != nil {
    76  				ap.Free(proc, true)
    77  			}
    78  			return false, err
    79  
    80  		default:
    81  			ap.Free(proc, false)
    82  			proc.SetInputBatch(nil)
    83  			return true, nil
    84  		}
    85  	}
    86  }
    87  
    88  func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze) error {
    89  	start := time.Now()
    90  	bat := <-proc.Reg.MergeReceivers[1].Ch
    91  	anal.WaitStop(start)
    92  	if bat != nil {
    93  		ctr.bat = bat
    94  		ctr.mp = bat.Ht.(*hashmap.JoinMap).Dup()
    95  		ctr.hasNull = ctr.mp.HasNull()
    96  		anal.Alloc(ctr.mp.Map().Size())
    97  	}
    98  	return nil
    99  }
   100  
   101  func (ctr *container) emptyProbe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error {
   102  	defer bat.Clean(proc.Mp())
   103  	anal.Input(bat, isFirst)
   104  	rbat := batch.NewWithSize(len(ap.Result))
   105  	rbat.Zs = proc.Mp().GetSels()
   106  	for i, pos := range ap.Result {
   107  		rbat.Vecs[i] = vector.New(bat.Vecs[pos].Typ)
   108  	}
   109  	count := bat.Length()
   110  	for i := 0; i < count; i += hashmap.UnitLimit {
   111  		n := count - i
   112  		if n > hashmap.UnitLimit {
   113  			n = hashmap.UnitLimit
   114  		}
   115  		for k := 0; k < n; k++ {
   116  			for j, pos := range ap.Result {
   117  				if err := vector.UnionOne(rbat.Vecs[j], bat.Vecs[pos], int64(i+k), proc.Mp()); err != nil {
   118  					rbat.Clean(proc.Mp())
   119  					return err
   120  				}
   121  			}
   122  			rbat.Zs = append(rbat.Zs, bat.Zs[i+k])
   123  		}
   124  	}
   125  	rbat.ExpandNulls()
   126  	anal.Output(rbat, isLast)
   127  	proc.SetInputBatch(rbat)
   128  	return nil
   129  }
   130  
   131  func (ctr *container) probe(bat *batch.Batch, ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool, isLast bool) error {
   132  	defer bat.Clean(proc.Mp())
   133  	anal.Input(bat, isFirst)
   134  	rbat := batch.NewWithSize(len(ap.Result))
   135  	rbat.Zs = proc.Mp().GetSels()
   136  	for i, pos := range ap.Result {
   137  		rbat.Vecs[i] = vector.New(bat.Vecs[pos].Typ)
   138  	}
   139  	if (ctr.bat.Length() == 1 && ctr.hasNull) || ctr.bat.Length() == 0 {
   140  		anal.Output(rbat, isLast)
   141  		proc.SetInputBatch(rbat)
   142  		return nil
   143  	}
   144  
   145  	ap.ctr.cleanEvalVectors(proc.Mp())
   146  	if err := ctr.evalJoinCondition(bat, ap.Conditions[0], proc); err != nil {
   147  		return err
   148  	}
   149  
   150  	count := bat.Length()
   151  	mSels := ctr.mp.Sels()
   152  	itr := ctr.mp.Map().NewIterator()
   153  	eligible := make([]int64, 0, hashmap.UnitLimit)
   154  	for i := 0; i < count; i += hashmap.UnitLimit {
   155  		n := count - i
   156  		if n > hashmap.UnitLimit {
   157  			n = hashmap.UnitLimit
   158  		}
   159  		copy(ctr.inBuckets, hashmap.OneUInt8s)
   160  		vals, zvals := itr.Find(i, n, ctr.vecs, ctr.inBuckets)
   161  		for k := 0; k < n; k++ {
   162  			if ctr.inBuckets[k] == 0 || zvals[k] == 0 {
   163  				continue
   164  			}
   165  			if vals[k] == 0 {
   166  				eligible = append(eligible, int64(i+k))
   167  				rbat.Zs = append(rbat.Zs, bat.Zs[i+k])
   168  				continue
   169  			}
   170  			if ap.Cond != nil {
   171  				matched := false // mark if any tuple satisfies the condition
   172  				sels := mSels[vals[k]-1]
   173  				for _, sel := range sels {
   174  					vec, err := colexec.JoinFilterEvalExprInBucket(bat, ctr.bat, i+k, int(sel), proc, ap.Cond)
   175  					if err != nil {
   176  						return err
   177  					}
   178  					bs := vec.Col.([]bool)
   179  					if bs[0] {
   180  						matched = true
   181  						vec.Free(proc.Mp())
   182  						break
   183  					}
   184  					vec.Free(proc.Mp())
   185  				}
   186  				if matched {
   187  					continue
   188  				}
   189  				eligible = append(eligible, int64(i+k))
   190  				rbat.Zs = append(rbat.Zs, bat.Zs[i+k])
   191  			}
   192  		}
   193  		for j, pos := range ap.Result {
   194  			if err := vector.Union(rbat.Vecs[j], bat.Vecs[pos], eligible, true, proc.Mp()); err != nil {
   195  				rbat.Clean(proc.Mp())
   196  				return err
   197  			}
   198  		}
   199  		eligible = eligible[:0]
   200  	}
   201  	rbat.ExpandNulls()
   202  	anal.Output(rbat, isLast)
   203  	proc.SetInputBatch(rbat)
   204  	return nil
   205  }
   206  
   207  func (ctr *container) evalJoinCondition(bat *batch.Batch, conds []*plan.Expr, proc *process.Process) error {
   208  	for i, cond := range conds {
   209  		vec, err := colexec.EvalExpr(bat, proc, cond)
   210  		if err != nil || vec.ConstExpand(false, proc.Mp()) == nil {
   211  			ctr.cleanEvalVectors(proc.Mp())
   212  			return err
   213  		}
   214  		ctr.vecs[i] = vec
   215  		ctr.evecs[i].vec = vec
   216  		ctr.evecs[i].needFree = true
   217  		for j := range bat.Vecs {
   218  			if bat.Vecs[j] == vec {
   219  				ctr.evecs[i].needFree = false
   220  				break
   221  			}
   222  		}
   223  	}
   224  	return nil
   225  }