github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/hashjoiner.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/col/coldata" 17 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 18 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 19 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 20 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 21 "github.com/cockroachdb/cockroach/pkg/sql/types" 22 "github.com/cockroachdb/errors" 23 ) 24 25 // hashJoinerState represents the state of the hash join columnar operator. 26 type hashJoinerState int 27 28 const ( 29 // hjBuilding represents the state the hashJoiner is in when it is in the 30 // build phase. Output columns from the build table are stored and a hash 31 // map is constructed from its equality columns. 32 hjBuilding = iota 33 34 // hjProbing represents the state the hashJoiner is in when it is in the 35 // probe phase. Probing is done in batches against the stored hash map. 36 hjProbing 37 38 // hjEmittingUnmatched represents the state the hashJoiner is in when it is 39 // emitting unmatched rows from its build table after having consumed the 40 // probe table. This happens in the case of an outer join on the build side. 41 hjEmittingUnmatched 42 43 // hjDone represents the state the hashJoiner is in when it has finished 44 // emitting all output rows. Note that the build side will have been fully 45 // consumed in this state, but the probe side *might* have not been fully 46 // consumed. 47 hjDone 48 ) 49 50 // hashJoinerSpec is the specification for a hash join operator. The hash 51 // joiner performs a join on the left and right's equal columns and returns 52 // combined left and right output columns. 53 type hashJoinerSpec struct { 54 joinType sqlbase.JoinType 55 // left and right are the specifications of the two input table sources to 56 // the hash joiner. 57 left hashJoinerSourceSpec 58 right hashJoinerSourceSpec 59 60 // rightDistinct indicates whether or not the build table equality column 61 // tuples are distinct. If they are distinct, performance can be optimized. 62 rightDistinct bool 63 } 64 65 type hashJoinerSourceSpec struct { 66 // eqCols specify the indices of the source tables equality column during the 67 // hash join. 68 eqCols []uint32 69 70 // sourceTypes specify the types of the input columns of the source table for 71 // the hash joiner. 72 sourceTypes []*types.T 73 74 // outer specifies whether an outer join is required over the input. 75 outer bool 76 } 77 78 // hashJoiner performs a hash join on the input tables equality columns. 79 // It requires that the output for every input batch in the probe phase fits 80 // within coldata.BatchSize(), otherwise the behavior is undefined. A join is 81 // performed and there is no guarantee on the ordering of the output columns. 82 // The hash table will be built on the right side source, and the left side 83 // source will be used for probing. 84 // 85 // Before the build phase, all equality and output columns from the build table 86 // are collected and stored. 87 // 88 // In the vectorized implementation of the build phase, the following tasks are 89 // performed: 90 // 1. The bucket number (hash value) of each key tuple is computed and stored 91 // into a buckets array. 92 // 2. The values in the buckets array is normalized to fit within the hash table 93 // numBuckets. 94 // 3. The bucket-chaining hash table organization is prepared with the computed 95 // buckets. 96 // 97 // Depending on the value of the spec.rightDistinct flag, there are two 98 // variations of the probe phase. The planner will set rightDistinct to true if 99 // and only if the right equality columns make a distinct key. 100 // 101 // In the columnarized implementation of the distinct build table probe phase, 102 // the following tasks are performed by the fastProbe function: 103 // 104 // 1. Compute the bucket number for each probe row's key tuple and store the 105 // results into the buckets array. 106 // 2. In order to find the position of these key tuples in the hash table: 107 // - First find the first element in the bucket's linked list for each key tuple 108 // and store it in the groupID array. Initialize the toCheck array with the 109 // full sequence of input indices (0...batchSize - 1). 110 // - While toCheck is not empty, each element in toCheck represents a position 111 // of the key tuples for which the key has not yet been found in the hash 112 // table. Perform a multi-column equality check to see if the key columns 113 // match that of the build table's key columns at groupID. 114 // - Update the differs array to store whether or not the probe's key tuple 115 // matched the corresponding build's key tuple. 116 // - Select the indices that differed and store them into toCheck since they 117 // need to be further processed. 118 // - For the differing tuples, find the next ID in that bucket of the hash table 119 // and put it into the groupID array. 120 // 3. Now, groupID for every probe's key tuple contains the index of the 121 // matching build's key tuple in the hash table. Use it to project output 122 // columns from the has table to build the resulting batch. 123 // 124 // In the columnarized implementation of the non-distinct build table probe 125 // phase, the following tasks are performed by the probe function: 126 // 127 // 1. Compute the bucket number for each probe row's key tuple and store the 128 // results into the buckets array. 129 // 2. In order to find the position of these key tuples in the hash table: 130 // - First find the first element in the bucket's linked list for each key tuple 131 // and store it in the groupID array. Initialize the toCheck array with the 132 // full sequence of input indices (0...batchSize - 1). 133 // - While toCheck is not empty, each element in toCheck represents a position 134 // of the key tuples for which the key has not yet been visited by any prior 135 // probe. Perform a multi-column equality check to see if the key columns 136 // match that of the build table's key columns at groupID. 137 // - Update the differs array to store whether or not the probe's key tuple 138 // matched the corresponding build's key tuple. 139 // - For the indices that did not differ, we can lazily update the hashTable's 140 // same linked list to store a list of all identical keys starting at head. 141 // Once a key has been added to ht.same, ht.visited is set to true. For the 142 // indices that have never been visited, we want to continue checking this 143 // bucket for identical values by adding this key to toCheck. 144 // - Select the indices that differed and store them into toCheck since they 145 // need to be further processed. 146 // - For the differing tuples, find the next ID in that bucket of the hash table 147 // and put it into the groupID array. 148 // 3. Now, head stores the keyID of the first match in the build table for every 149 // probe table key. ht.same is used to select all build key matches for each 150 // probe key, which are added to the resulting batch. Output batching is done 151 // to ensure that each batch is at most coldata.BatchSize(). 152 // 153 // In the case that an outer join on the probe table side is performed, every 154 // single probe row is kept even if its groupID is 0. If a groupID of 0 is 155 // found, this means that the matching build table row should be all NULL. This 156 // is done by setting probeRowUnmatched at that row to true. 157 // 158 // In the case that an outer join on the build table side is performed, an 159 // emitUnmatched is performed after the probing ends. This is done by gathering 160 // all build table rows that have never been matched and stitching it together 161 // with NULL values on the probe side. 162 type hashJoiner struct { 163 twoInputNode 164 165 allocator *colmem.Allocator 166 // spec holds the specification for the current hash join process. 167 spec hashJoinerSpec 168 // state stores the current state of the hash joiner. 169 state hashJoinerState 170 // ht holds the hashTable that is populated during the build phase and used 171 // during the probe phase. 172 ht *hashTable 173 // output stores the resulting output batch that is constructed and returned 174 // for every input batch during the probe phase. 175 output coldata.Batch 176 // outputBatchSize specifies the desired length of the output batch which by 177 // default is coldata.BatchSize() but can be varied in tests. 178 outputBatchSize int 179 180 // probeState is used in hjProbing state. 181 probeState struct { 182 // buildIdx and probeIdx represents the matching row indices that are used to 183 // stitch together the join results. 184 buildIdx []int 185 probeIdx []int 186 187 // probeRowUnmatched is used in the case that the prober.spec.outer is true. 188 // This means that an outer join is performed on the probe side and we use 189 // probeRowUnmatched to represent that the resulting columns should be NULL on 190 // the build table. This indicates that the probe table row did not match any 191 // build table rows. 192 probeRowUnmatched []bool 193 // buildRowMatched is used in the case that prober.buildOuter is true. This 194 // means that an outer join is performed on the build side and buildRowMatched 195 // marks all the build table rows that have been matched already. The rows 196 // that were unmatched are emitted during the emitUnmatched phase. 197 buildRowMatched []bool 198 199 // prevBatch, if not nil, indicates that the previous probe input batch has 200 // not been fully processed. 201 prevBatch coldata.Batch 202 // prevBatchResumeIdx indicates the index of the probe row to resume the 203 // collection from. It is used only in case of non-distinct build source 204 // (every probe row can have multiple matching build rows). 205 prevBatchResumeIdx int 206 } 207 208 // emittingUnmatchedState is used in hjEmittingUnmatched state. 209 emittingUnmatchedState struct { 210 rowIdx int 211 } 212 213 exportBufferedState struct { 214 rightExported int 215 rightWindowedBatch coldata.Batch 216 } 217 } 218 219 var _ bufferingInMemoryOperator = &hashJoiner{} 220 var _ resetter = &hashJoiner{} 221 222 func (hj *hashJoiner) Init() { 223 hj.inputOne.Init() 224 hj.inputTwo.Init() 225 226 allowNullEquality, probeMode := false, hashTableDefaultProbeMode 227 if hj.spec.joinType.IsSetOpJoin() { 228 allowNullEquality = true 229 probeMode = hashTableDeletingProbeMode 230 } 231 hj.ht = newHashTable( 232 hj.allocator, 233 hashTableNumBuckets, 234 hj.spec.right.sourceTypes, 235 hj.spec.right.eqCols, 236 allowNullEquality, 237 hashTableFullBuildMode, 238 probeMode, 239 ) 240 241 hj.exportBufferedState.rightWindowedBatch = hj.allocator.NewMemBatchWithSize(hj.spec.right.sourceTypes, 0 /* size */) 242 hj.state = hjBuilding 243 } 244 245 func (hj *hashJoiner) Next(ctx context.Context) coldata.Batch { 246 hj.resetOutput() 247 for { 248 switch hj.state { 249 case hjBuilding: 250 hj.build(ctx) 251 if hj.ht.vals.Length() == 0 { 252 // The build side is empty, so we can short-circuit probing 253 // phase altogether for INNER, RIGHT OUTER, LEFT SEMI, and 254 // INTERSECT ALL joins. 255 if hj.spec.joinType == sqlbase.InnerJoin || 256 hj.spec.joinType == sqlbase.RightOuterJoin || 257 hj.spec.joinType == sqlbase.LeftSemiJoin || 258 hj.spec.joinType == sqlbase.IntersectAllJoin { 259 // The short-circuiting behavior is temporarily disabled 260 // because it causes flakiness of some tests due to #48785 261 // (concurrent calls to DrainMeta and Next). 262 // TODO(asubiotto): remove this once the issue is resolved. 263 // hj.state = hjDone 264 continue 265 } 266 } 267 continue 268 case hjProbing: 269 hj.exec(ctx) 270 271 if hj.output.Length() == 0 { 272 if hj.spec.right.outer { 273 hj.state = hjEmittingUnmatched 274 } else { 275 hj.state = hjDone 276 } 277 continue 278 } 279 return hj.output 280 case hjEmittingUnmatched: 281 if hj.emittingUnmatchedState.rowIdx == hj.ht.vals.Length() { 282 hj.state = hjDone 283 continue 284 } 285 hj.emitUnmatched() 286 return hj.output 287 case hjDone: 288 return coldata.ZeroBatch 289 default: 290 colexecerror.InternalError("hash joiner in unhandled state") 291 // This code is unreachable, but the compiler cannot infer that. 292 return nil 293 } 294 } 295 } 296 297 func (hj *hashJoiner) build(ctx context.Context) { 298 hj.ht.build(ctx, hj.inputTwo) 299 300 if !hj.spec.rightDistinct { 301 hj.ht.maybeAllocateSameAndVisited() 302 } 303 304 if hj.spec.right.outer { 305 if cap(hj.probeState.buildRowMatched) < hj.ht.vals.Length() { 306 hj.probeState.buildRowMatched = make([]bool, hj.ht.vals.Length()) 307 } else { 308 hj.probeState.buildRowMatched = hj.probeState.buildRowMatched[:hj.ht.vals.Length()] 309 for n := 0; n < hj.ht.vals.Length(); n += copy(hj.probeState.buildRowMatched[n:], zeroBoolColumn) { 310 } 311 } 312 } 313 314 hj.state = hjProbing 315 } 316 317 // emitUnmatched populates the output batch to emit tuples from the build side 318 // that didn't get a match. This will be called only for RIGHT OUTER and FULL 319 // OUTER joins. 320 func (hj *hashJoiner) emitUnmatched() { 321 // Set all elements in the probe columns of the output batch to null. 322 for i := range hj.spec.left.sourceTypes { 323 outCol := hj.output.ColVec(i) 324 outCol.Nulls().SetNulls() 325 } 326 327 nResults := 0 328 329 for nResults < hj.outputBatchSize && hj.emittingUnmatchedState.rowIdx < hj.ht.vals.Length() { 330 if !hj.probeState.buildRowMatched[hj.emittingUnmatchedState.rowIdx] { 331 hj.probeState.buildIdx[nResults] = hj.emittingUnmatchedState.rowIdx 332 nResults++ 333 } 334 hj.emittingUnmatchedState.rowIdx++ 335 } 336 337 outCols := hj.output.ColVecs()[len(hj.spec.left.sourceTypes) : len(hj.spec.left.sourceTypes)+len(hj.spec.right.sourceTypes)] 338 for i := range hj.spec.right.sourceTypes { 339 outCol := outCols[i] 340 valCol := hj.ht.vals.ColVec(i) 341 // NOTE: this Copy is not accounted for because we don't want for memory 342 // limit error to occur at this point - we have already built the hash 343 // table and now are only consuming the left source one batch at a time, 344 // so such behavior should be a minor deviation from the limit. If we were 345 // to hit the limit here, it would have been very hard to fall back to disk 346 // backed hash joiner because we might have already emitted partial output. 347 // This behavior is acceptable - we allocated hj.output batch already, so 348 // the concern here is only for the variable-sized types that exceed our 349 // estimations. 350 outCol.Copy( 351 coldata.CopySliceArgs{ 352 SliceArgs: coldata.SliceArgs{ 353 Src: valCol, 354 SrcEndIdx: nResults, 355 Sel: hj.probeState.buildIdx, 356 }, 357 }, 358 ) 359 } 360 361 hj.output.SetLength(nResults) 362 } 363 364 // exec is a general prober that works with non-distinct build table equality 365 // columns. It returns a Batch with N + M columns where N is the number of 366 // left source columns and M is the number of right source columns. The first N 367 // columns correspond to the respective left source columns, followed by the 368 // right source columns as the last M elements. Even though all the columns are 369 // present in the result, only the specified output columns store relevant 370 // information. The remaining columns are there as dummy columns and their 371 // states are undefined. 372 // 373 // rightDistinct is true if the build table equality columns are distinct. It 374 // performs the same operation as the exec() function normally would while 375 // taking a shortcut to improve speed. 376 func (hj *hashJoiner) exec(ctx context.Context) { 377 hj.output.SetLength(0) 378 379 if batch := hj.probeState.prevBatch; batch != nil { 380 // The previous result was bigger than the maximum batch size, so we didn't 381 // finish outputting it in the last call to probe. Continue outputting the 382 // result from the previous batch. 383 hj.probeState.prevBatch = nil 384 batchSize := batch.Length() 385 sel := batch.Selection() 386 387 nResults := hj.collect(batch, batchSize, sel) 388 hj.congregate(nResults, batch, batchSize) 389 } else { 390 for { 391 batch := hj.inputOne.Next(ctx) 392 batchSize := batch.Length() 393 394 if batchSize == 0 { 395 break 396 } 397 398 for i, colIdx := range hj.spec.left.eqCols { 399 hj.ht.probeScratch.keys[i] = batch.ColVec(int(colIdx)) 400 } 401 402 sel := batch.Selection() 403 404 var nToCheck uint64 405 switch hj.spec.joinType { 406 case sqlbase.LeftAntiJoin, sqlbase.ExceptAllJoin: 407 // The setup of probing for LEFT ANTI and EXCEPT ALL joins 408 // needs a special treatment in order to reuse the same "check" 409 // functions below. 410 // 411 // First, we compute the hash values for all tuples in the batch. 412 hj.ht.computeBuckets( 413 ctx, hj.ht.probeScratch.buckets, hj.ht.probeScratch.keys, batchSize, sel, 414 ) 415 // Then, we iterate over all tuples to see whether there is at least 416 // one tuple in the hash table that has the same hash value. 417 for i := 0; i < batchSize; i++ { 418 if hj.ht.buildScratch.first[hj.ht.probeScratch.buckets[i]] != 0 { 419 // Non-zero "first" key indicates that there is a match of hashes 420 // and we need to include the current tuple to check whether it is 421 // an actual match. 422 hj.ht.probeScratch.groupID[i] = hj.ht.buildScratch.first[hj.ht.probeScratch.buckets[i]] 423 hj.ht.probeScratch.toCheck[nToCheck] = uint64(i) 424 nToCheck++ 425 } 426 } 427 // We need to reset headID for all tuples in the batch to remove any 428 // leftover garbage from the previous iteration. For tuples that need 429 // to be checked, headID will be updated accordingly; for tuples that 430 // definitely don't have a match, the zero value will remain until the 431 // "collecting" and "congregation" step in which such tuple will be 432 // included into the output. 433 copy(hj.ht.probeScratch.headID[:batchSize], zeroUint64Column) 434 default: 435 // Initialize groupID with the initial hash buckets and toCheck with all 436 // applicable indices. 437 hj.ht.lookupInitial(ctx, batchSize, sel) 438 nToCheck = uint64(batchSize) 439 } 440 441 var nResults int 442 443 if hj.spec.rightDistinct { 444 for nToCheck > 0 { 445 // Continue searching along the hash table next chains for the corresponding 446 // buckets. If the key is found or end of next chain is reached, the key is 447 // removed from the toCheck array. 448 nToCheck = hj.ht.distinctCheck(nToCheck, sel) 449 hj.ht.findNext(hj.ht.buildScratch.next, nToCheck) 450 } 451 452 nResults = hj.distinctCollect(batch, batchSize, sel) 453 } else { 454 for nToCheck > 0 { 455 // Continue searching for the build table matching keys while the toCheck 456 // array is non-empty. 457 nToCheck = hj.ht.check(hj.ht.probeScratch.keys, hj.ht.keyCols, nToCheck, sel) 458 hj.ht.findNext(hj.ht.buildScratch.next, nToCheck) 459 } 460 461 // We're processing a new batch, so we'll reset the index to start 462 // collecting from. 463 hj.probeState.prevBatchResumeIdx = 0 464 nResults = hj.collect(batch, batchSize, sel) 465 } 466 467 hj.congregate(nResults, batch, batchSize) 468 469 if hj.output.Length() > 0 { 470 break 471 } 472 } 473 } 474 } 475 476 // congregate uses the probeIdx and buildIdx pairs to stitch together the 477 // resulting join rows and add them to the output batch with the left table 478 // columns preceding the right table columns. 479 func (hj *hashJoiner) congregate(nResults int, batch coldata.Batch, batchSize int) { 480 // NOTE: Copy() calls are not accounted for because we don't want for memory 481 // limit error to occur at this point - we have already built the hash 482 // table and now are only consuming the left source one batch at a time, 483 // so such behavior should be a minor deviation from the limit. If we were 484 // to hit the limit here, it would have been very hard to fall back to disk 485 // backed hash joiner because we might have already emitted partial output. 486 // This behavior is acceptable - we allocated hj.output batch already, so the 487 // concern here is only for the variable-sized types that exceed our 488 // estimations. 489 490 if hj.spec.joinType.ShouldIncludeRightColsInOutput() { 491 rightColOffset := len(hj.spec.left.sourceTypes) 492 // If the hash table is empty, then there is nothing to copy. The nulls 493 // will be set below. 494 if hj.ht.vals.Length() > 0 { 495 outCols := hj.output.ColVecs()[rightColOffset : rightColOffset+len(hj.spec.right.sourceTypes)] 496 for i := range hj.spec.right.sourceTypes { 497 outCol := outCols[i] 498 valCol := hj.ht.vals.ColVec(i) 499 // Note that if for some index i, probeRowUnmatched[i] is true, then 500 // hj.buildIdx[i] == 0 which will copy the garbage zeroth row of the 501 // hash table, but we will set the NULL value below. 502 outCol.Copy( 503 coldata.CopySliceArgs{ 504 SliceArgs: coldata.SliceArgs{ 505 Src: valCol, 506 SrcEndIdx: nResults, 507 Sel: hj.probeState.buildIdx, 508 }, 509 }, 510 ) 511 } 512 } 513 if hj.spec.left.outer { 514 // Add in the nulls we needed to set for the outer join. 515 for i := range hj.spec.right.sourceTypes { 516 outCol := hj.output.ColVec(i + rightColOffset) 517 nulls := outCol.Nulls() 518 for i, isNull := range hj.probeState.probeRowUnmatched { 519 if isNull { 520 nulls.SetNull(i) 521 } 522 } 523 } 524 } 525 } 526 527 outCols := hj.output.ColVecs()[:len(hj.spec.left.sourceTypes)] 528 for i := range hj.spec.left.sourceTypes { 529 outCol := outCols[i] 530 valCol := batch.ColVec(i) 531 outCol.Copy( 532 coldata.CopySliceArgs{ 533 SliceArgs: coldata.SliceArgs{ 534 Src: valCol, 535 Sel: hj.probeState.probeIdx, 536 SrcEndIdx: nResults, 537 }, 538 }, 539 ) 540 } 541 542 if hj.spec.right.outer { 543 // In order to determine which rows to emit for the outer join on the build 544 // table in the end, we need to mark the matched build table rows. 545 if hj.spec.left.outer { 546 for i := 0; i < nResults; i++ { 547 if !hj.probeState.probeRowUnmatched[i] { 548 hj.probeState.buildRowMatched[hj.probeState.buildIdx[i]] = true 549 } 550 } 551 } else { 552 for i := 0; i < nResults; i++ { 553 hj.probeState.buildRowMatched[hj.probeState.buildIdx[i]] = true 554 } 555 } 556 } 557 558 hj.output.SetLength(nResults) 559 } 560 561 func (hj *hashJoiner) ExportBuffered(input colexecbase.Operator) coldata.Batch { 562 if hj.inputOne == input { 563 // We do not buffer anything from the left source. Furthermore, the memory 564 // limit can only hit during the building of the hash table step at which 565 // point we haven't requested a single batch from the left. 566 return coldata.ZeroBatch 567 } else if hj.inputTwo == input { 568 if hj.exportBufferedState.rightExported == hj.ht.vals.Length() { 569 return coldata.ZeroBatch 570 } 571 newRightExported := hj.exportBufferedState.rightExported + coldata.BatchSize() 572 if newRightExported > hj.ht.vals.Length() { 573 newRightExported = hj.ht.vals.Length() 574 } 575 startIdx, endIdx := hj.exportBufferedState.rightExported, newRightExported 576 b := hj.exportBufferedState.rightWindowedBatch 577 // We don't need to worry about selection vectors on hj.ht.vals because the 578 // tuples have been already selected during building of the hash table. 579 for i := range hj.spec.right.sourceTypes { 580 window := hj.ht.vals.ColVec(i).Window(startIdx, endIdx) 581 b.ReplaceCol(window, i) 582 } 583 b.SetLength(endIdx - startIdx) 584 hj.exportBufferedState.rightExported = newRightExported 585 return b 586 } else { 587 colexecerror.InternalError(errors.New( 588 "unexpectedly ExportBuffered is called with neither left nor right inputs to hash join", 589 )) 590 // This code is unreachable, but the compiler cannot infer that. 591 return nil 592 } 593 } 594 595 func (hj *hashJoiner) resetOutput() { 596 if hj.output == nil { 597 outputTypes := append([]*types.T{}, hj.spec.left.sourceTypes...) 598 if hj.spec.joinType.ShouldIncludeRightColsInOutput() { 599 outputTypes = append(outputTypes, hj.spec.right.sourceTypes...) 600 } 601 hj.output = hj.allocator.NewMemBatch(outputTypes) 602 } else { 603 hj.output.ResetInternalBatch() 604 } 605 } 606 607 func (hj *hashJoiner) reset(ctx context.Context) { 608 for _, input := range []colexecbase.Operator{hj.inputOne, hj.inputTwo} { 609 if r, ok := input.(resetter); ok { 610 r.reset(ctx) 611 } 612 } 613 hj.state = hjBuilding 614 hj.ht.reset(ctx) 615 copy(hj.probeState.buildIdx[:coldata.BatchSize()], zeroIntColumn) 616 copy(hj.probeState.probeIdx[:coldata.BatchSize()], zeroIntColumn) 617 if hj.spec.left.outer { 618 copy(hj.probeState.probeRowUnmatched[:coldata.BatchSize()], zeroBoolColumn) 619 } 620 // hj.probeState.buildRowMatched is reset after building the hash table is 621 // complete in build() method. 622 hj.emittingUnmatchedState.rowIdx = 0 623 hj.exportBufferedState.rightExported = 0 624 } 625 626 // makeHashJoinerSpec creates a specification for columnar hash join operator. 627 // leftEqCols and rightEqCols specify the equality columns while leftOutCols 628 // and rightOutCols specifies the output columns. leftTypes and rightTypes 629 // specify the input column types of the two sources. rightDistinct indicates 630 // whether the equality columns of the right source form a key. 631 func makeHashJoinerSpec( 632 joinType sqlbase.JoinType, 633 leftEqCols []uint32, 634 rightEqCols []uint32, 635 leftTypes []*types.T, 636 rightTypes []*types.T, 637 rightDistinct bool, 638 ) (hashJoinerSpec, error) { 639 var ( 640 spec hashJoinerSpec 641 leftOuter, rightOuter bool 642 ) 643 switch joinType { 644 case sqlbase.InnerJoin: 645 case sqlbase.RightOuterJoin: 646 rightOuter = true 647 case sqlbase.LeftOuterJoin: 648 leftOuter = true 649 case sqlbase.FullOuterJoin: 650 rightOuter = true 651 leftOuter = true 652 case sqlbase.LeftSemiJoin: 653 // In a semi-join, we don't need to store anything but a single row per 654 // build row, since all we care about is whether a row on the left matches 655 // any row on the right. 656 // Note that this is *not* the case if we have an ON condition, since we'll 657 // also need to make sure that a row on the left passes the ON condition 658 // with the row on the right to emit it. However, we don't support ON 659 // conditions just yet. When we do, we'll have a separate case for that. 660 rightDistinct = true 661 case sqlbase.LeftAntiJoin: 662 case sqlbase.IntersectAllJoin: 663 case sqlbase.ExceptAllJoin: 664 default: 665 return spec, errors.AssertionFailedf("hash join of type %s not supported", joinType) 666 } 667 668 left := hashJoinerSourceSpec{ 669 eqCols: leftEqCols, 670 sourceTypes: leftTypes, 671 outer: leftOuter, 672 } 673 right := hashJoinerSourceSpec{ 674 eqCols: rightEqCols, 675 sourceTypes: rightTypes, 676 outer: rightOuter, 677 } 678 spec = hashJoinerSpec{ 679 joinType: joinType, 680 left: left, 681 right: right, 682 rightDistinct: rightDistinct, 683 } 684 return spec, nil 685 } 686 687 // newHashJoiner creates a new equality hash join operator on the left and 688 // right input tables. 689 func newHashJoiner( 690 allocator *colmem.Allocator, spec hashJoinerSpec, leftSource, rightSource colexecbase.Operator, 691 ) colexecbase.Operator { 692 hj := &hashJoiner{ 693 twoInputNode: newTwoInputNode(leftSource, rightSource), 694 allocator: allocator, 695 spec: spec, 696 outputBatchSize: coldata.BatchSize(), 697 } 698 hj.probeState.buildIdx = make([]int, coldata.BatchSize()) 699 hj.probeState.probeIdx = make([]int, coldata.BatchSize()) 700 if spec.left.outer { 701 hj.probeState.probeRowUnmatched = make([]bool, coldata.BatchSize()) 702 } 703 return hj 704 }