github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/minus/minus.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package minus
    16  
    17  import (
    18  	"bytes"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/hashmap"
    21  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    22  	"github.com/matrixorigin/matrixone/pkg/vm"
    23  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    24  )
    25  
    26  const argName = "minus"
    27  
    28  func (arg *Argument) String(buf *bytes.Buffer) {
    29  	buf.WriteString(argName)
    30  	buf.WriteString(": minus ")
    31  }
    32  
    33  func (arg *Argument) Prepare(proc *process.Process) error {
    34  	var err error
    35  	{
    36  		arg.ctr = new(container)
    37  		arg.ctr.InitReceiver(proc, false)
    38  		arg.ctr.bat = nil
    39  		arg.ctr.hashTable, err = hashmap.NewStrMap(true, arg.IBucket, arg.NBucket, proc.Mp())
    40  		if err != nil {
    41  			return err
    42  		}
    43  	}
    44  	return nil
    45  }
    46  
    47  // Call is the execute method of minus operator
    48  // it built a hash table for right relation first.
    49  // use values from left relation to probe and update the hash table.
    50  // and preserve values that do not exist in the hash table.
    51  func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) {
    52  	if err, isCancel := vm.CancelCheck(proc); isCancel {
    53  		return vm.CancelResult, err
    54  	}
    55  
    56  	var err error
    57  	// prepare the analysis work.
    58  	analyze := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor())
    59  	analyze.Start()
    60  	defer analyze.Stop()
    61  	result := vm.NewCallResult()
    62  
    63  	for {
    64  		switch arg.ctr.state {
    65  		case buildingHashMap:
    66  			// step 1: build the hash table by all right batches.
    67  			if err = arg.ctr.buildHashTable(proc, analyze, 1, arg.GetIsFirst()); err != nil {
    68  				return result, err
    69  			}
    70  			if arg.ctr.hashTable != nil {
    71  				analyze.Alloc(arg.ctr.hashTable.Size())
    72  			}
    73  			arg.ctr.state = probingHashMap
    74  
    75  		case probingHashMap:
    76  			// step 2: use left batches to probe and update the hash table.
    77  			//
    78  			// only one batch is processed during each loop, and the batch will be sent to
    79  			// next operator immediately after successful processing.
    80  			last := false
    81  			last, err = arg.ctr.probeHashTable(proc, analyze, 0, arg.GetIsFirst(), arg.GetIsLast(), &result)
    82  			if err != nil {
    83  				return result, err
    84  			}
    85  			if last {
    86  				arg.ctr.state = operatorEnd
    87  				continue
    88  			}
    89  			return result, nil
    90  
    91  		case operatorEnd:
    92  			// operator over.
    93  			result.Batch = nil
    94  			result.Status = vm.ExecStop
    95  			return result, nil
    96  		}
    97  	}
    98  }
    99  
   100  // buildHashTable use all batches from proc.Reg.MergeReceiver[index] to build the hash map.
   101  func (ctr *container) buildHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool) error {
   102  	for {
   103  		bat, _, err := ctr.ReceiveFromSingleReg(index, ana)
   104  		if err != nil {
   105  			return err
   106  		}
   107  
   108  		// the last batch of pipeline.
   109  		if bat == nil {
   110  			break
   111  		}
   112  
   113  		// just an empty batch.
   114  		if bat.IsEmpty() {
   115  			proc.PutBatch(bat)
   116  			continue
   117  		}
   118  		ana.Input(bat, isFirst)
   119  
   120  		itr := ctr.hashTable.NewIterator()
   121  		count := bat.Vecs[0].Length()
   122  		for i := 0; i < count; i += hashmap.UnitLimit {
   123  			n := count - i
   124  			if n > hashmap.UnitLimit {
   125  				n = hashmap.UnitLimit
   126  			}
   127  			_, _, err := itr.Insert(i, n, bat.Vecs)
   128  			if err != nil {
   129  				bat.Clean(proc.Mp())
   130  				return err
   131  			}
   132  		}
   133  		proc.PutBatch(bat)
   134  	}
   135  	return nil
   136  }
   137  
   138  // probeHashTable use a batch from proc.Reg.MergeReceivers[index] to probe and update the hash map.
   139  // If a row of data never appears in the hash table, add it into hath table and send it to the next operator.
   140  // if batch is the last one, return true, else return false.
   141  func (ctr *container) probeHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool, isLast bool, result *vm.CallResult) (bool, error) {
   142  	inserted := make([]uint8, hashmap.UnitLimit)
   143  	restoreInserted := make([]uint8, hashmap.UnitLimit)
   144  
   145  	for {
   146  		bat, _, err := ctr.ReceiveFromSingleReg(index, ana)
   147  		if err != nil {
   148  			return false, err
   149  		}
   150  
   151  		// the last batch of block.
   152  		if bat == nil {
   153  			return true, nil
   154  		}
   155  		if bat.Last() {
   156  			ctr.bat = bat
   157  			result.Batch = ctr.bat
   158  			return false, nil
   159  		}
   160  		// just an empty batch.
   161  		if bat.IsEmpty() {
   162  			proc.PutBatch(bat)
   163  			continue
   164  		}
   165  		ana.Input(bat, isFirst)
   166  
   167  		if ctr.bat != nil {
   168  			proc.PutBatch(ctr.bat)
   169  			ctr.bat = nil
   170  		}
   171  		ctr.bat = batch.NewWithSize(len(bat.Vecs))
   172  		for i := range bat.Vecs {
   173  			ctr.bat.Vecs[i] = proc.GetVector(*bat.Vecs[i].GetType())
   174  		}
   175  
   176  		count := bat.Vecs[0].Length()
   177  		itr := ctr.hashTable.NewIterator()
   178  		for i := 0; i < count; i += hashmap.UnitLimit {
   179  			oldHashGroup := ctr.hashTable.GroupCount()
   180  
   181  			n := count - i
   182  			if n > hashmap.UnitLimit {
   183  				n = hashmap.UnitLimit
   184  			}
   185  			vs, _, err := itr.Insert(i, n, bat.Vecs)
   186  			if err != nil {
   187  				bat.Clean(proc.Mp())
   188  				return false, err
   189  			}
   190  			copy(inserted[:n], restoreInserted[:n])
   191  			rows := oldHashGroup
   192  			for j, v := range vs {
   193  				if v > rows {
   194  					// ensure that the same value will only be inserted once.
   195  					rows++
   196  					inserted[j] = 1
   197  				}
   198  			}
   199  			ctr.bat.AddRowCount(int(rows - oldHashGroup))
   200  
   201  			newHashGroup := ctr.hashTable.GroupCount()
   202  			insertCount := int(newHashGroup - oldHashGroup)
   203  			if insertCount > 0 {
   204  				for pos := range bat.Vecs {
   205  					if err := ctr.bat.Vecs[pos].UnionBatch(bat.Vecs[pos], int64(i), insertCount, inserted[:n], proc.Mp()); err != nil {
   206  						bat.Clean(proc.Mp())
   207  						return false, err
   208  					}
   209  				}
   210  			}
   211  		}
   212  		ana.Output(ctr.bat, isLast)
   213  		result.Batch = ctr.bat
   214  		proc.PutBatch(bat)
   215  		return false, nil
   216  	}
   217  }