github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/minus/minus.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package minus 16 17 import ( 18 "bytes" 19 "time" 20 21 "github.com/matrixorigin/matrixone/pkg/common/hashmap" 22 "github.com/matrixorigin/matrixone/pkg/container/batch" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 "github.com/matrixorigin/matrixone/pkg/vm/process" 25 ) 26 27 func String(_ any, buf *bytes.Buffer) { 28 buf.WriteString(" minus ") 29 } 30 31 func Prepare(proc *process.Process, argument any) error { 32 var err error 33 arg := argument.(*Argument) 34 { 35 arg.ctr.bat = nil 36 arg.ctr.hashTable, err = hashmap.NewStrMap(true, arg.IBucket, arg.NBucket, proc.Mp()) 37 if err != nil { 38 return err 39 } 40 } 41 return nil 42 } 43 44 // Call is the execute method of minus operator 45 // it built a hash table for right relation first. 46 // use values from left relation to probe and update the hash table. 47 // and preserve values that do not exist in the hash table. 48 func Call(idx int, proc *process.Process, argument any, isFirst bool, isLast bool) (bool, error) { 49 var err error 50 arg := argument.(*Argument) 51 52 // prepare the analysis work. 53 analyze := proc.GetAnalyze(idx) 54 analyze.Start() 55 defer analyze.Stop() 56 57 for { 58 switch arg.ctr.state { 59 case buildingHashMap: 60 // step 1: build the hash table by all right batches. 61 if err = arg.ctr.buildHashTable(proc, analyze, 1, isFirst); err != nil { 62 arg.Free(proc, true) 63 return false, err 64 } 65 if arg.ctr.hashTable != nil { 66 analyze.Alloc(arg.ctr.hashTable.Size()) 67 } 68 arg.ctr.state = probingHashMap 69 70 case probingHashMap: 71 // step 2: use left batches to probe and update the hash table. 72 // 73 // only one batch is processed during each loop, and the batch will be sent to 74 // next operator immediately after successful processing. 75 last := false 76 last, err = arg.ctr.probeHashTable(proc, analyze, 0, isFirst, isLast) 77 if err != nil { 78 arg.Free(proc, true) 79 return false, err 80 } 81 if last { 82 arg.ctr.state = operatorEnd 83 continue 84 } 85 return false, nil 86 87 case operatorEnd: 88 // operator over. 89 arg.Free(proc, false) 90 proc.SetInputBatch(nil) 91 return true, nil 92 } 93 } 94 } 95 96 // buildHashTable use all batches from proc.Reg.MergeReceiver[index] to build the hash map. 97 func (ctr *container) buildHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool) error { 98 for { 99 start := time.Now() 100 bat := <-proc.Reg.MergeReceivers[index].Ch 101 ana.WaitStop(start) 102 // the last batch of pipeline. 103 if bat == nil { 104 break 105 } 106 107 // just an empty batch. 108 if len(bat.Zs) == 0 { 109 continue 110 } 111 ana.Input(bat, isFirst) 112 113 itr := ctr.hashTable.NewIterator() 114 count := vector.Length(bat.Vecs[0]) 115 for i := 0; i < count; i += hashmap.UnitLimit { 116 n := count - i 117 if n > hashmap.UnitLimit { 118 n = hashmap.UnitLimit 119 } 120 _, _, err := itr.Insert(i, n, bat.Vecs) 121 if err != nil { 122 bat.Clean(proc.Mp()) 123 return err 124 } 125 } 126 bat.Clean(proc.Mp()) 127 } 128 return nil 129 } 130 131 // probeHashTable use a batch from proc.Reg.MergeReceivers[index] to probe and update the hash map. 132 // If a row of data never appears in the hash table, add it into hath table and send it to the next operator. 133 // if batch is the last one, return true, else return false. 134 func (ctr *container) probeHashTable(proc *process.Process, ana process.Analyze, index int, isFirst bool, isLast bool) (bool, error) { 135 inserted := make([]uint8, hashmap.UnitLimit) 136 restoreInserted := make([]uint8, hashmap.UnitLimit) 137 138 for { 139 start := time.Now() 140 bat := <-proc.Reg.MergeReceivers[index].Ch 141 ana.WaitStop(start) 142 143 // the last batch of block. 144 if bat == nil { 145 return true, nil 146 } 147 // just an empty batch. 148 if len(bat.Zs) == 0 { 149 continue 150 } 151 ana.Input(bat, isFirst) 152 153 ctr.bat = batch.NewWithSize(len(bat.Vecs)) 154 for i := range bat.Vecs { 155 ctr.bat.Vecs[i] = vector.New(bat.Vecs[i].Typ) 156 } 157 158 count := vector.Length(bat.Vecs[0]) 159 itr := ctr.hashTable.NewIterator() 160 for i := 0; i < count; i += hashmap.UnitLimit { 161 oldHashGroup := ctr.hashTable.GroupCount() 162 163 n := count - i 164 if n > hashmap.UnitLimit { 165 n = hashmap.UnitLimit 166 } 167 vs, _, err := itr.Insert(i, n, bat.Vecs) 168 if err != nil { 169 bat.Clean(proc.Mp()) 170 return false, err 171 } 172 copy(inserted[:n], restoreInserted[:n]) 173 rows := oldHashGroup 174 for j, v := range vs { 175 if v > rows { 176 // ensure that the same value will only be inserted once. 177 rows++ 178 inserted[j] = 1 179 ctr.bat.Zs = append(ctr.bat.Zs, 1) 180 } 181 } 182 183 newHashGroup := ctr.hashTable.GroupCount() 184 insertCount := int(newHashGroup - oldHashGroup) 185 if insertCount > 0 { 186 for pos := range bat.Vecs { 187 if err := vector.UnionBatch(ctr.bat.Vecs[pos], bat.Vecs[pos], int64(i), insertCount, inserted[:n], proc.Mp()); err != nil { 188 bat.Clean(proc.Mp()) 189 return false, err 190 } 191 } 192 } 193 } 194 ana.Output(ctr.bat, isLast) 195 proc.SetInputBatch(ctr.bat) 196 ctr.bat = nil 197 bat.Clean(proc.Mp()) 198 return false, nil 199 } 200 }