github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/minus/minus.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package minus
    16  
    17  import (
    18  	"bytes"
    19  	"time"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/hashmap"
    22  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    25  )
    26  
    27  func String(_ any, buf *bytes.Buffer) {
    28  	buf.WriteString(" minus ")
    29  }
    30  
    31  func Prepare(proc *process.Process, argument any) error {
    32  	var err error
    33  	arg := argument.(*Argument)
    34  	{
    35  		arg.ctr.bat = nil
    36  		arg.ctr.hashTable, err = hashmap.NewStrMap(true, arg.IBucket, arg.NBucket, proc.Mp())
    37  		if err != nil {
    38  			return err
    39  		}
    40  	}
    41  	return nil
    42  }
    43  
    44  // Call is the execute method of minus operator
    45  // it built a hash table for right relation first.
    46  // use values from left relation to probe and update the hash table.
    47  // and preserve values that do not exist in the hash table.
    48  func Call(idx int, proc *process.Process, argument any, isFirst bool, isLast bool) (bool, error) {
    49  	var err error
    50  	arg := argument.(*Argument)
    51  
    52  	// prepare the analysis work.
    53  	analyze := proc.GetAnalyze(idx)
    54  	analyze.Start()
    55  	defer analyze.Stop()
    56  
    57  	for {
    58  		switch arg.ctr.state {
    59  		case buildingHashMap:
    60  			// step 1: build the hash table by all right batches.
    61  			if err = arg.ctr.buildHashTable(proc, analyze, 1, isFirst); err != nil {
    62  				arg.Free(proc, true)
    63  				return false, err
    64  			}
    65  			if arg.ctr.hashTable != nil {
    66  				analyze.Alloc(arg.ctr.hashTable.Size())
    67  			}
    68  			arg.ctr.state = probingHashMap
    69  
    70  		case probingHashMap:
    71  			// step 2: use left batches to probe and update the hash table.
    72  			//
    73  			// only one batch is processed during each loop, and the batch will be sent to
    74  			// next operator immediately after successful processing.
    75  			last := false
    76  			last, err = arg.ctr.probeHashTable(proc, analyze, 0, isFirst, isLast)
    77  			if err != nil {
    78  				arg.Free(proc, true)
    79  				return false, err
    80  			}
    81  			if last {
    82  				arg.ctr.state = operatorEnd
    83  				continue
    84  			}
    85  			return false, nil
    86  
    87  		case operatorEnd:
    88  			// operator over.
    89  			arg.Free(proc, false)
    90  			proc.SetInputBatch(nil)
    91  			return true, nil
    92  		}
    93  	}
    94  }
    95  
    96  // buildHashTable use all batches from proc.Reg.MergeReceiver[index] to build the hash map.
    97  func (ctr *container) buildHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool) error {
    98  	for {
    99  		start := time.Now()
   100  		bat := <-proc.Reg.MergeReceivers[index].Ch
   101  		ana.WaitStop(start)
   102  		// the last batch of pipeline.
   103  		if bat == nil {
   104  			break
   105  		}
   106  
   107  		// just an empty batch.
   108  		if len(bat.Zs) == 0 {
   109  			continue
   110  		}
   111  		ana.Input(bat, isFirst)
   112  
   113  		itr := ctr.hashTable.NewIterator()
   114  		count := vector.Length(bat.Vecs[0])
   115  		for i := 0; i < count; i += hashmap.UnitLimit {
   116  			n := count - i
   117  			if n > hashmap.UnitLimit {
   118  				n = hashmap.UnitLimit
   119  			}
   120  			_, _, err := itr.Insert(i, n, bat.Vecs)
   121  			if err != nil {
   122  				bat.Clean(proc.Mp())
   123  				return err
   124  			}
   125  		}
   126  		bat.Clean(proc.Mp())
   127  	}
   128  	return nil
   129  }
   130  
   131  // probeHashTable use a batch from proc.Reg.MergeReceivers[index] to probe and update the hash map.
   132  // If a row of data never appears in the hash table, add it into hath table and send it to the next operator.
   133  // if batch is the last one, return true, else return false.
   134  func (ctr *container) probeHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool, isLast bool) (bool, error) {
   135  	inserted := make([]uint8, hashmap.UnitLimit)
   136  	restoreInserted := make([]uint8, hashmap.UnitLimit)
   137  
   138  	for {
   139  		start := time.Now()
   140  		bat := <-proc.Reg.MergeReceivers[index].Ch
   141  		ana.WaitStop(start)
   142  
   143  		// the last batch of block.
   144  		if bat == nil {
   145  			return true, nil
   146  		}
   147  		// just an empty batch.
   148  		if len(bat.Zs) == 0 {
   149  			continue
   150  		}
   151  		ana.Input(bat, isFirst)
   152  
   153  		ctr.bat = batch.NewWithSize(len(bat.Vecs))
   154  		for i := range bat.Vecs {
   155  			ctr.bat.Vecs[i] = vector.New(bat.Vecs[i].Typ)
   156  		}
   157  
   158  		count := vector.Length(bat.Vecs[0])
   159  		itr := ctr.hashTable.NewIterator()
   160  		for i := 0; i < count; i += hashmap.UnitLimit {
   161  			oldHashGroup := ctr.hashTable.GroupCount()
   162  
   163  			n := count - i
   164  			if n > hashmap.UnitLimit {
   165  				n = hashmap.UnitLimit
   166  			}
   167  			vs, _, err := itr.Insert(i, n, bat.Vecs)
   168  			if err != nil {
   169  				bat.Clean(proc.Mp())
   170  				return false, err
   171  			}
   172  			copy(inserted[:n], restoreInserted[:n])
   173  			rows := oldHashGroup
   174  			for j, v := range vs {
   175  				if v > rows {
   176  					// ensure that the same value will only be inserted once.
   177  					rows++
   178  					inserted[j] = 1
   179  					ctr.bat.Zs = append(ctr.bat.Zs, 1)
   180  				}
   181  			}
   182  
   183  			newHashGroup := ctr.hashTable.GroupCount()
   184  			insertCount := int(newHashGroup - oldHashGroup)
   185  			if insertCount > 0 {
   186  				for pos := range bat.Vecs {
   187  					if err := vector.UnionBatch(ctr.bat.Vecs[pos], bat.Vecs[pos], int64(i), insertCount, inserted[:n], proc.Mp()); err != nil {
   188  						bat.Clean(proc.Mp())
   189  						return false, err
   190  					}
   191  				}
   192  			}
   193  		}
   194  		ana.Output(ctr.bat, isLast)
   195  		proc.SetInputBatch(ctr.bat)
   196  		ctr.bat = nil
   197  		bat.Clean(proc.Mp())
   198  		return false, nil
   199  	}
   200  }