github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/minus/minus.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package minus 16 17 import ( 18 "bytes" 19 20 "github.com/matrixorigin/matrixone/pkg/common/hashmap" 21 "github.com/matrixorigin/matrixone/pkg/container/batch" 22 "github.com/matrixorigin/matrixone/pkg/vm" 23 "github.com/matrixorigin/matrixone/pkg/vm/process" 24 ) 25 26 const argName = "minus" 27 28 func (arg *Argument) String(buf *bytes.Buffer) { 29 buf.WriteString(argName) 30 buf.WriteString(": minus ") 31 } 32 33 func (arg *Argument) Prepare(proc *process.Process) error { 34 var err error 35 { 36 arg.ctr = new(container) 37 arg.ctr.InitReceiver(proc, false) 38 arg.ctr.bat = nil 39 arg.ctr.hashTable, err = hashmap.NewStrMap(true, arg.IBucket, arg.NBucket, proc.Mp()) 40 if err != nil { 41 return err 42 } 43 } 44 return nil 45 } 46 47 // Call is the execute method of minus operator 48 // it built a hash table for right relation first. 49 // use values from left relation to probe and update the hash table. 50 // and preserve values that do not exist in the hash table. 51 func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) { 52 if err, isCancel := vm.CancelCheck(proc); isCancel { 53 return vm.CancelResult, err 54 } 55 56 var err error 57 // prepare the analysis work. 58 analyze := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor()) 59 analyze.Start() 60 defer analyze.Stop() 61 result := vm.NewCallResult() 62 63 for { 64 switch arg.ctr.state { 65 case buildingHashMap: 66 // step 1: build the hash table by all right batches. 67 if err = arg.ctr.buildHashTable(proc, analyze, 1, arg.GetIsFirst()); err != nil { 68 return result, err 69 } 70 if arg.ctr.hashTable != nil { 71 analyze.Alloc(arg.ctr.hashTable.Size()) 72 } 73 arg.ctr.state = probingHashMap 74 75 case probingHashMap: 76 // step 2: use left batches to probe and update the hash table. 77 // 78 // only one batch is processed during each loop, and the batch will be sent to 79 // next operator immediately after successful processing. 80 last := false 81 last, err = arg.ctr.probeHashTable(proc, analyze, 0, arg.GetIsFirst(), arg.GetIsLast(), &result) 82 if err != nil { 83 return result, err 84 } 85 if last { 86 arg.ctr.state = operatorEnd 87 continue 88 } 89 return result, nil 90 91 case operatorEnd: 92 // operator over. 93 result.Batch = nil 94 result.Status = vm.ExecStop 95 return result, nil 96 } 97 } 98 } 99 100 // buildHashTable use all batches from proc.Reg.MergeReceiver[index] to build the hash map. 101 func (ctr *container) buildHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool) error { 102 for { 103 bat, _, err := ctr.ReceiveFromSingleReg(index, ana) 104 if err != nil { 105 return err 106 } 107 108 // the last batch of pipeline. 109 if bat == nil { 110 break 111 } 112 113 // just an empty batch. 114 if bat.IsEmpty() { 115 proc.PutBatch(bat) 116 continue 117 } 118 ana.Input(bat, isFirst) 119 120 itr := ctr.hashTable.NewIterator() 121 count := bat.Vecs[0].Length() 122 for i := 0; i < count; i += hashmap.UnitLimit { 123 n := count - i 124 if n > hashmap.UnitLimit { 125 n = hashmap.UnitLimit 126 } 127 _, _, err := itr.Insert(i, n, bat.Vecs) 128 if err != nil { 129 bat.Clean(proc.Mp()) 130 return err 131 } 132 } 133 proc.PutBatch(bat) 134 } 135 return nil 136 } 137 138 // probeHashTable use a batch from proc.Reg.MergeReceivers[index] to probe and update the hash map. 139 // If a row of data never appears in the hash table, add it into hath table and send it to the next operator. 140 // if batch is the last one, return true, else return false. 141 func (ctr *container) probeHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool, isLast bool, result *vm.CallResult) (bool, error) { 142 inserted := make([]uint8, hashmap.UnitLimit) 143 restoreInserted := make([]uint8, hashmap.UnitLimit) 144 145 for { 146 bat, _, err := ctr.ReceiveFromSingleReg(index, ana) 147 if err != nil { 148 return false, err 149 } 150 151 // the last batch of block. 152 if bat == nil { 153 return true, nil 154 } 155 if bat.Last() { 156 ctr.bat = bat 157 result.Batch = ctr.bat 158 return false, nil 159 } 160 // just an empty batch. 161 if bat.IsEmpty() { 162 proc.PutBatch(bat) 163 continue 164 } 165 ana.Input(bat, isFirst) 166 167 if ctr.bat != nil { 168 proc.PutBatch(ctr.bat) 169 ctr.bat = nil 170 } 171 ctr.bat = batch.NewWithSize(len(bat.Vecs)) 172 for i := range bat.Vecs { 173 ctr.bat.Vecs[i] = proc.GetVector(*bat.Vecs[i].GetType()) 174 } 175 176 count := bat.Vecs[0].Length() 177 itr := ctr.hashTable.NewIterator() 178 for i := 0; i < count; i += hashmap.UnitLimit { 179 oldHashGroup := ctr.hashTable.GroupCount() 180 181 n := count - i 182 if n > hashmap.UnitLimit { 183 n = hashmap.UnitLimit 184 } 185 vs, _, err := itr.Insert(i, n, bat.Vecs) 186 if err != nil { 187 bat.Clean(proc.Mp()) 188 return false, err 189 } 190 copy(inserted[:n], restoreInserted[:n]) 191 rows := oldHashGroup 192 for j, v := range vs { 193 if v > rows { 194 // ensure that the same value will only be inserted once. 195 rows++ 196 inserted[j] = 1 197 } 198 } 199 ctr.bat.AddRowCount(int(rows - oldHashGroup)) 200 201 newHashGroup := ctr.hashTable.GroupCount() 202 insertCount := int(newHashGroup - oldHashGroup) 203 if insertCount > 0 { 204 for pos := range bat.Vecs { 205 if err := ctr.bat.Vecs[pos].UnionBatch(bat.Vecs[pos], int64(i), insertCount, inserted[:n], proc.Mp()); err != nil { 206 bat.Clean(proc.Mp()) 207 return false, err 208 } 209 } 210 } 211 } 212 ana.Output(ctr.bat, isLast) 213 result.Batch = ctr.bat 214 proc.PutBatch(bat) 215 return false, nil 216 } 217 }