github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/mergejoiner_tmpl.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // {{/* 12 // +build execgen_template 13 // 14 // This file is the execgen template for mergejoiner.eg.go. It's formatted in a 15 // special way, so it's both valid Go and a valid text/template input. This 16 // permits editing this file with editor support. 17 // 18 // */}} 19 20 package colexec 21 22 import ( 23 "context" 24 "fmt" 25 26 "github.com/cockroachdb/cockroach/pkg/col/coldata" 27 "github.com/cockroachdb/cockroach/pkg/sql/colexec/execgen" 28 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 29 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 30 "github.com/cockroachdb/cockroach/pkg/sql/types" 31 ) 32 33 // Remove unused warning. 34 var _ = execgen.UNSAFEGET 35 36 // {{/* 37 // Declarations to make the template compile properly. 38 39 // _GOTYPE is the template variable. 40 type _GOTYPE interface{} 41 42 // _CANONICAL_TYPE_FAMILY is the template variable. 43 const _CANONICAL_TYPE_FAMILY = types.UnknownFamily 44 45 // _TYPE_WIDTH is the template variable. 46 const _TYPE_WIDTH = 0 47 48 // _ASSIGN_EQ is the template equality function for assigning the first input 49 // to the result of the second input == the third input. 50 func _ASSIGN_EQ(_, _, _, _, _, _ interface{}) int { 51 colexecerror.InternalError("") 52 } 53 54 // _ASSIGN_CMP is the template equality function for assigning the first input 55 // to the result of comparing the second input to the third input which returns 56 // an integer. That integer is: 57 // - negative if left < right 58 // - zero if left == right 59 // - positive if left > right. 60 func _ASSIGN_CMP(_, _, _, _, _ interface{}) int { 61 colexecerror.VectorizedInternalPanic("") 62 } 63 64 // _L_SEL_IND is the template type variable for the loop variable that 65 // is either curLIdx or lSel[curLIdx] depending on whether we're in a 66 // selection or not. 67 const _L_SEL_IND = 0 68 69 // _R_SEL_IND is the template type variable for the loop variable that 70 // is either curRIdx or rSel[curRIdx] depending on whether we're in a 71 // selection or not. 72 const _R_SEL_IND = 0 73 74 // _SEL_ARG is used in place of the string "$sel", since that isn't valid go 75 // code. 76 const _SEL_ARG = 0 77 78 // _JOIN_TYPE is used in place of the string "$.JoinType", since that isn't 79 // valid go code. 80 const _JOIN_TYPE = 0 81 82 // */}} 83 84 type mergeJoin_JOIN_TYPE_STRINGOp struct { 85 *mergeJoinBase 86 } 87 88 var _ InternalMemoryOperator = &mergeJoin_JOIN_TYPE_STRINGOp{} 89 90 // {{/* 91 // This code snippet is the "meat" of the probing phase. 92 func _PROBE_SWITCH( 93 _JOIN_TYPE joinTypeInfo, _SEL_PERMUTATION selPermutation, _L_HAS_NULLS bool, _R_HAS_NULLS bool, 94 ) { // */}} 95 // {{define "probeSwitch"}} 96 // {{$sel := $.SelPermutation}} 97 switch lVec.CanonicalTypeFamily() { 98 // {{range $overload := $.Global.Overloads}} 99 case _CANONICAL_TYPE_FAMILY: 100 switch colType.Width() { 101 // {{range .WidthOverloads}} 102 case _TYPE_WIDTH: 103 lKeys := lVec.TemplateType() 104 rKeys := rVec.TemplateType() 105 var ( 106 lGroup, rGroup group 107 cmp int 108 match bool 109 lVal, rVal _GOTYPE 110 lSelIdx, rSelIdx int 111 ) 112 113 for o.groups.nextGroupInCol(&lGroup, &rGroup) { 114 curLIdx := lGroup.rowStartIdx 115 curRIdx := rGroup.rowStartIdx 116 curLEndIdx := lGroup.rowEndIdx 117 curREndIdx := rGroup.rowEndIdx 118 areGroupsProcessed := false 119 _LEFT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE) 120 _RIGHT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE) 121 // Expand or filter each group based on the current equality column. 122 for curLIdx < curLEndIdx && curRIdx < curREndIdx && !areGroupsProcessed { 123 cmp = 0 124 // {{if _L_HAS_NULLS}} 125 lNull := lVec.Nulls().NullAt(_L_SEL_IND) 126 // {{end}} 127 // {{if _R_HAS_NULLS}} 128 rNull := rVec.Nulls().NullAt(_R_SEL_IND) 129 // {{end}} 130 131 // {{if _JOIN_TYPE.IsSetOp}} 132 // {{/* 133 // Set operations allow null equality, so we handle 134 // NULLs first. 135 // */}} 136 // {{if _L_HAS_NULLS}} 137 if lNull { 138 // {{/* If we have NULL on the left, then it is smaller than the right value. */}} 139 cmp = -1 140 } 141 // {{end}} 142 // {{if _R_HAS_NULLS}} 143 if rNull { 144 // {{/* If we have NULL on the right, then it is smaller than the left value. */}} 145 cmp = 1 146 } 147 // {{end}} 148 // {{if _L_HAS_NULLS}} 149 var nullMatch bool 150 // {{/* Remove unused warning for some code paths of INTERSECT ALL join. */}} 151 _ = nullMatch 152 // {{if _R_HAS_NULLS}} 153 // {{/* Both vectors might have nulls. */}} 154 // If we have a NULL match, it will take precedence over 155 // cmp value set above. 156 nullMatch = lNull && rNull 157 // {{end}} 158 // {{end}} 159 // {{else}} 160 // {{/* 161 // Non-set operation joins do not allow null equality, 162 // so if either value is NULL, the tuples are not 163 // matches. 164 // */}} 165 // TODO(yuzefovich): we can advance both sides if both are 166 // NULL. 167 // {{if _L_HAS_NULLS}} 168 if lNull { 169 _NULL_FROM_LEFT_SWITCH(_JOIN_TYPE) 170 curLIdx++ 171 continue 172 } 173 // {{end}} 174 // {{if _R_HAS_NULLS}} 175 if rNull { 176 _NULL_FROM_RIGHT_SWITCH(_JOIN_TYPE) 177 curRIdx++ 178 continue 179 } 180 // {{end}} 181 // {{end}} 182 183 // {{if _JOIN_TYPE.IsSetOp}} 184 // {{if and _L_HAS_NULLS _R_HAS_NULLS}} 185 if nullMatch { 186 // We have a null match, so two values are equal. 187 cmp = 0 188 } else 189 // {{end}} 190 // {{end}} 191 { 192 // {{if _JOIN_TYPE.IsSetOp}} 193 // {{if or _L_HAS_NULLS _R_HAS_NULLS}} 194 // {{/* 195 // For set operation joins we might have set 'cmp' 196 // to non-zero value above if we had a null mismatch. 197 // In such scenario we already know that the values 198 // are different and we know which side to advance. 199 // */}} 200 if cmp == 0 { 201 // {{end}} 202 // {{end}} 203 204 lSelIdx = _L_SEL_IND 205 lVal = execgen.UNSAFEGET(lKeys, lSelIdx) 206 rSelIdx = _R_SEL_IND 207 rVal = execgen.UNSAFEGET(rKeys, rSelIdx) 208 _ASSIGN_CMP(cmp, lVal, rVal, lKeys, rKeys) 209 210 // {{if _JOIN_TYPE.IsSetOp}} 211 // {{if or _L_HAS_NULLS _R_HAS_NULLS}} 212 } 213 // {{end}} 214 // {{end}} 215 } 216 217 if cmp == 0 { 218 // Find the length of the groups on each side. 219 lGroupLength, rGroupLength := 1, 1 220 // If a group ends before the end of the probing batch, 221 // then we know it is complete. 222 lComplete := curLEndIdx < o.proberState.lLength 223 rComplete := curREndIdx < o.proberState.rLength 224 beginLIdx, beginRIdx := curLIdx, curRIdx 225 curLIdx++ 226 curRIdx++ 227 228 // Find the length of the group on the left. 229 for curLIdx < curLEndIdx { 230 // {{if _JOIN_TYPE.IsSetOp}} 231 // {{if and _L_HAS_NULLS _R_HAS_NULLS}} 232 if nullMatch { 233 // {{/* 234 // We have a NULL match, so we only 235 // extend the left group if we have a 236 // NULL element. 237 // */}} 238 if !lVec.Nulls().NullAt(_L_SEL_IND) { 239 lComplete = true 240 break 241 } 242 } else 243 // {{end}} 244 // {{end}} 245 { 246 // {{if _L_HAS_NULLS}} 247 if lVec.Nulls().NullAt(_L_SEL_IND) { 248 lComplete = true 249 break 250 } 251 // {{end}} 252 lSelIdx = _L_SEL_IND 253 newLVal := execgen.UNSAFEGET(lKeys, lSelIdx) 254 _ASSIGN_EQ(match, newLVal, lVal, _, lKeys, lKeys) 255 if !match { 256 lComplete = true 257 break 258 } 259 } 260 lGroupLength++ 261 curLIdx++ 262 } 263 264 // Find the length of the group on the right. 265 for curRIdx < curREndIdx { 266 // {{if _JOIN_TYPE.IsSetOp}} 267 // {{if and _L_HAS_NULLS _R_HAS_NULLS}} 268 if nullMatch { 269 // {{/* 270 // We have a NULL match, so we only 271 // extend the right group if we have a 272 // NULL element. 273 // */}} 274 if !rVec.Nulls().NullAt(_R_SEL_IND) { 275 rComplete = true 276 break 277 } 278 } else 279 // {{end}} 280 // {{end}} 281 { 282 // {{if _R_HAS_NULLS}} 283 if rVec.Nulls().NullAt(_R_SEL_IND) { 284 rComplete = true 285 break 286 } 287 // {{end}} 288 rSelIdx = _R_SEL_IND 289 newRVal := execgen.UNSAFEGET(rKeys, rSelIdx) 290 _ASSIGN_EQ(match, newRVal, rVal, _, rKeys, rKeys) 291 if !match { 292 rComplete = true 293 break 294 } 295 } 296 rGroupLength++ 297 curRIdx++ 298 } 299 300 // Last equality column and either group is incomplete. Save state 301 // and have it handled in the next iteration. 302 if eqColIdx == len(o.left.eqCols)-1 && (!lComplete || !rComplete) { 303 o.appendToBufferedGroup(ctx, &o.left, o.proberState.lBatch, lSel, beginLIdx, lGroupLength) 304 o.proberState.lIdx = lGroupLength + beginLIdx 305 o.appendToBufferedGroup(ctx, &o.right, o.proberState.rBatch, rSel, beginRIdx, rGroupLength) 306 o.proberState.rIdx = rGroupLength + beginRIdx 307 308 o.groups.finishedCol() 309 break EqLoop 310 } 311 312 if eqColIdx < len(o.left.eqCols)-1 { 313 o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength) 314 } else { 315 // {{if _JOIN_TYPE.IsLeftSemi}} 316 leftSemiGroupLength := lGroupLength 317 // {{if _JOIN_TYPE.IsSetOp}} 318 // For INTERSECT ALL join we add a left semi group 319 // of length min(lGroupLength, rGroupLength). 320 if rGroupLength < lGroupLength { 321 leftSemiGroupLength = rGroupLength 322 } 323 // {{end}} 324 o.groups.addLeftSemiGroup(beginLIdx, leftSemiGroupLength) 325 // {{else if _JOIN_TYPE.IsLeftAnti}} 326 // {{if _JOIN_TYPE.IsSetOp}} 327 // For EXCEPT ALL join we add (lGroupLength - rGroupLength) number 328 // (if positive) of unmatched left groups. 329 for leftUnmatchedTupleIdx := beginLIdx + rGroupLength; leftUnmatchedTupleIdx < beginLIdx+lGroupLength; leftUnmatchedTupleIdx++ { 330 // Right index here doesn't matter. 331 o.groups.addLeftUnmatchedGroup(leftUnmatchedTupleIdx, beginRIdx) 332 } 333 // {{else}} 334 // With LEFT ANTI join, we are only interested in unmatched tuples 335 // from the left, and all tuples in the current group have a match. 336 // {{end}} 337 // {{else}} 338 // Neither group ends with the batch, so add the group to the 339 // circular buffer. 340 o.groups.addGroupsToNextCol(beginLIdx, lGroupLength, beginRIdx, rGroupLength) 341 // {{end}} 342 } 343 } else { // mismatch 344 // The line below is a compact form of the following: 345 // incrementLeft := 346 // (cmp < 0 && o.left.directions[eqColIdx] == execinfrapb.Ordering_Column_ASC) || 347 // (cmp > 0 && o.left.directions[eqColIdx] == execinfrapb.Ordering_Column_DESC). 348 incrementLeft := cmp < 0 == (o.left.directions[eqColIdx] == execinfrapb.Ordering_Column_ASC) 349 if incrementLeft { 350 curLIdx++ 351 // {{if _L_HAS_NULLS}} 352 _INCREMENT_LEFT_SWITCH(_JOIN_TYPE, _SEL_ARG, true) 353 // {{else}} 354 _INCREMENT_LEFT_SWITCH(_JOIN_TYPE, _SEL_ARG, false) 355 // {{end}} 356 } else { 357 curRIdx++ 358 // {{if _R_HAS_NULLS}} 359 _INCREMENT_RIGHT_SWITCH(_JOIN_TYPE, _SEL_ARG, true) 360 // {{else}} 361 _INCREMENT_RIGHT_SWITCH(_JOIN_TYPE, _SEL_ARG, false) 362 // {{end}} 363 } 364 } 365 } 366 _PROCESS_NOT_LAST_GROUP_IN_COLUMN_SWITCH(_JOIN_TYPE) 367 // Both o.proberState.lIdx and o.proberState.rIdx should point to the 368 // last elements processed in their respective batches. 369 o.proberState.lIdx = curLIdx 370 o.proberState.rIdx = curRIdx 371 } 372 // {{end}} 373 } 374 // {{end}} 375 default: 376 colexecerror.InternalError(fmt.Sprintf("unhandled type %s", colType)) 377 } 378 // {{end}} 379 // {{/* 380 } 381 382 // */}} 383 384 // {{/* 385 // This code snippet processes an unmatched group from the left. 386 func _LEFT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}} 387 // {{define "leftUnmatchedGroupSwitch"}} 388 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 389 // {{/* 390 // Unmatched groups are not possible with INNER, LEFT SEMI, and INTERSECT 391 // ALL joins (the latter has IsLeftSemi == true), so there is nothing to do 392 // here. 393 // */}} 394 // {{end}} 395 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 396 if lGroup.unmatched { 397 if curLIdx+1 != curLEndIdx { 398 colexecerror.InternalError(fmt.Sprintf("unexpectedly length %d of the left unmatched group is not 1", curLEndIdx-curLIdx)) 399 } 400 // The row already does not have a match, so we don't need to do any 401 // additional processing. 402 o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx) 403 curLIdx++ 404 areGroupsProcessed = true 405 } 406 // {{end}} 407 // {{if $.JoinType.IsRightOuter}} 408 // {{/* 409 // Unmatched groups from the left are not possible with RIGHT OUTER join, so 410 // there is nothing to do here. 411 // */}} 412 // {{end}} 413 // {{end}} 414 // {{/* 415 } 416 417 // */}} 418 419 // {{/* 420 // This code snippet processes an unmatched group from the right. 421 func _RIGHT_UNMATCHED_GROUP_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}} 422 // {{define "rightUnmatchedGroupSwitch"}} 423 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 424 // {{/* 425 // Unmatched groups are not possible with INNER, LEFT SEMI, and INTERSECT 426 // ALL joins (the latter has IsLeftSemi == true), so there is nothing to do 427 // here. 428 // */}} 429 // {{end}} 430 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 431 // {{/* 432 // Unmatched groups from the right are not possible with LEFT OUTER, LEFT 433 // ANTI, and EXCEPT ALL joins (the latter has IsLeftAnti == true), so there 434 // is nothing to do here. 435 // */}} 436 // {{end}} 437 // {{if $.JoinType.IsRightOuter}} 438 if rGroup.unmatched { 439 if curRIdx+1 != curREndIdx { 440 colexecerror.InternalError(fmt.Sprintf("unexpectedly length %d of the right unmatched group is not 1", curREndIdx-curRIdx)) 441 } 442 // The row already does not have a match, so we don't need to do any 443 // additional processing. 444 o.groups.addRightOuterGroup(curLIdx, curRIdx) 445 curRIdx++ 446 areGroupsProcessed = true 447 } 448 // {{end}} 449 // {{end}} 450 // {{/* 451 } 452 453 // */}} 454 455 // {{/* 456 // This code snippet decides what to do if we encounter null in the equality 457 // column from the left input. Note that the case of Null equality *must* be 458 // checked separately. 459 func _NULL_FROM_LEFT_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}} 460 // {{define "nullFromLeftSwitch"}} 461 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 462 // {{/* 463 // Nulls coming from the left input are ignored in INNER and LEFT SEMI 464 // joins. 465 // */}} 466 // {{end}} 467 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 468 o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx) 469 // {{end}} 470 // {{if $.JoinType.IsRightOuter}} 471 // {{/* 472 // Nulls coming from the left input are ignored in RIGHT OUTER join. 473 // */}} 474 // {{end}} 475 // {{end}} 476 // {{/* 477 } 478 479 // */}} 480 481 // {{/* 482 // This code snippet decides what to do if we encounter null in the equality 483 // column from the right input. Note that the case of Null equality *must* be 484 // checked separately. 485 func _NULL_FROM_RIGHT_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}} 486 // {{define "nullFromRightSwitch"}} 487 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 488 // {{/* 489 // Nulls coming from the right input are ignored in INNER and LEFT SEMI 490 // joins. 491 // */}} 492 // {{end}} 493 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 494 // {{/* 495 // Nulls coming from the right input are ignored in LEFT OUTER and LEFT 496 // ANTI joins. 497 // */}} 498 // {{end}} 499 // {{if $.JoinType.IsRightOuter}} 500 o.groups.addRightOuterGroup(curLIdx, curRIdx) 501 // {{end}} 502 // {{end}} 503 // {{/* 504 } 505 506 // */}} 507 508 // {{/* 509 // This code snippet decides what to do when - while looking for a match 510 // between two inputs - we need to advance the left side, i.e. it decides how 511 // to handle an unmatched tuple from the left. 512 func _INCREMENT_LEFT_SWITCH( 513 _JOIN_TYPE joinTypeInfo, _SEL_PERMUTATION selPermutation, _L_HAS_NULLS bool, 514 ) { // */}} 515 // {{define "incrementLeftSwitch"}} 516 // {{$sel := $.SelPermutation}} 517 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 518 // {{/* 519 // Unmatched tuple from the left source is not outputted in INNER, LEFT 520 // SEMI, and INTERSECT ALL joins (the latter has IsLeftSemi == true). 521 // */}} 522 // {{end}} 523 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 524 // All the rows on the left within the current group will not get a match on 525 // the right, so we're adding each of them as a left unmatched group. 526 o.groups.addLeftUnmatchedGroup(curLIdx-1, curRIdx) 527 for curLIdx < curLEndIdx { 528 // {{/* 529 // EXCEPT ALL join allows NULL equality, so we have special 530 // treatment of NULLs. 531 // */}} 532 // {{if _L_HAS_NULLS}} 533 // {{if _JOIN_TYPE.IsSetOp}} 534 newLValNull := lVec.Nulls().NullAt(_L_SEL_IND) 535 if lNull != newLValNull { 536 // We have a null mismatch, so we've reached the end of the current 537 // group on the left. 538 break 539 } else if newLValNull && lNull { 540 nullMatch = true 541 } else { 542 nullMatch = false 543 } 544 // {{else}} 545 if lVec.Nulls().NullAt(_L_SEL_IND) { 546 break 547 } 548 // {{end}} 549 // {{end}} 550 551 // {{if and _JOIN_TYPE.IsSetOp _L_HAS_NULLS}} 552 // {{/* 553 // We have checked for null equality above and set nullMatch to the 554 // correct value. If it is true, then both the old and the new 555 // values are NULL, so there is no further comparison needed. 556 // */}} 557 if !nullMatch { 558 // {{end}} 559 lSelIdx = _L_SEL_IND 560 // {{with .Global}} 561 newLVal := execgen.UNSAFEGET(lKeys, lSelIdx) 562 _ASSIGN_EQ(match, newLVal, lVal, _, lKeys, lKeys) 563 // {{end}} 564 if !match { 565 break 566 } 567 // {{if and _JOIN_TYPE.IsSetOp _L_HAS_NULLS}} 568 } 569 // {{end}} 570 o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx) 571 curLIdx++ 572 } 573 // {{end}} 574 // {{if $.JoinType.IsRightOuter}} 575 // {{/* 576 // Unmatched tuple from the left source is not outputted in RIGHT OUTER join. 577 // */}} 578 // {{end}} 579 // {{end}} 580 // {{/* 581 } 582 583 // */}} 584 585 // {{/* 586 // This code snippet decides what to do when - while looking for a match 587 // between two inputs - we need to advance the right side, i.e. it decides how 588 // to handle an unmatched tuple from the right. 589 func _INCREMENT_RIGHT_SWITCH( 590 _JOIN_TYPE joinTypeInfo, _SEL_PERMUTATION selPermutation, _R_HAS_NULLS bool, 591 ) { // */}} 592 // {{define "incrementRightSwitch"}} 593 // {{$sel := $.SelPermutation}} 594 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 595 // {{/* 596 // Unmatched tuple from the right source is not outputted in INNER, LEFT 597 // SEMI, and INTERSECT ALL joins (the latter has IsLeftSemi == true). 598 // */}} 599 // {{end}} 600 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 601 // {{/* 602 // Unmatched tuple from the right source is not outputted in LEFT OUTER, 603 // LEFT ANTI, and EXCEPT ALL joins (the latter has IsLeftAnti == true). 604 // */}} 605 // {{end}} 606 // {{if $.JoinType.IsRightOuter}} 607 // All the rows on the right within the current group will not get a match on 608 // the left, so we're adding each of them as a right outer group. 609 o.groups.addRightOuterGroup(curLIdx, curRIdx-1) 610 for curRIdx < curREndIdx { 611 // {{if _R_HAS_NULLS}} 612 if rVec.Nulls().NullAt(_R_SEL_IND) { 613 break 614 } 615 // {{end}} 616 rSelIdx = _R_SEL_IND 617 // {{with .Global}} 618 newRVal := execgen.UNSAFEGET(rKeys, rSelIdx) 619 _ASSIGN_EQ(match, newRVal, rVal, _, rKeys, rKeys) 620 // {{end}} 621 if !match { 622 break 623 } 624 o.groups.addRightOuterGroup(curLIdx, curRIdx) 625 curRIdx++ 626 } 627 // {{end}} 628 // {{end}} 629 // {{/* 630 } 631 632 // */}} 633 634 // {{/* 635 // This code snippet processes all but last groups in a column after we have 636 // reached the end of either the left or right group. 637 func _PROCESS_NOT_LAST_GROUP_IN_COLUMN_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}} 638 // {{define "processNotLastGroupInColumnSwitch"}} 639 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 640 // {{/* 641 // Nothing to do here since an unmatched tuple is omitted. 642 // */}} 643 // {{end}} 644 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 645 if !o.groups.isLastGroupInCol() && !areGroupsProcessed { 646 // The current group is not the last one within the column, so it cannot be 647 // extended into the next batch, and we need to process it right now. Any 648 // unprocessed row in the left group will not get a match, so each one of 649 // them becomes a new unmatched group with a corresponding null group. 650 for curLIdx < curLEndIdx { 651 o.groups.addLeftUnmatchedGroup(curLIdx, curRIdx) 652 curLIdx++ 653 } 654 } 655 // {{end}} 656 // {{if $.JoinType.IsRightOuter}} 657 if !o.groups.isLastGroupInCol() && !areGroupsProcessed { 658 // The current group is not the last one within the column, so it cannot be 659 // extended into the next batch, and we need to process it right now. Any 660 // unprocessed row in the right group will not get a match, so each one of 661 // them becomes a new unmatched group with a corresponding null group. 662 for curRIdx < curREndIdx { 663 o.groups.addRightOuterGroup(curLIdx, curRIdx) 664 curRIdx++ 665 } 666 } 667 // {{end}} 668 // {{end}} 669 // {{/* 670 } 671 672 // */}} 673 674 // {{range $sel := $.SelPermutations}} 675 func (o *mergeJoin_JOIN_TYPE_STRINGOp) probeBodyLSel_IS_L_SELRSel_IS_R_SEL(ctx context.Context) { 676 lSel := o.proberState.lBatch.Selection() 677 rSel := o.proberState.rBatch.Selection() 678 EqLoop: 679 for eqColIdx := 0; eqColIdx < len(o.left.eqCols); eqColIdx++ { 680 leftColIdx := o.left.eqCols[eqColIdx] 681 rightColIdx := o.right.eqCols[eqColIdx] 682 lVec := o.proberState.lBatch.ColVec(int(leftColIdx)) 683 rVec := o.proberState.rBatch.ColVec(int(rightColIdx)) 684 leftType := o.left.sourceTypes[leftColIdx] 685 rightType := o.right.sourceTypes[rightColIdx] 686 colType := leftType 687 // Merge joiner only supports the case when the physical types in the 688 // equality columns in both inputs are the same. If that is not the case, 689 // we need to cast one of the vectors to another's physical type putting 690 // the result of the cast into a temporary vector that is used instead of 691 // the original. 692 leftCanonicalTypeFamily := o.left.canonicalTypeFamilies[leftColIdx] 693 isNumeric := leftCanonicalTypeFamily == types.IntFamily || 694 leftCanonicalTypeFamily == types.FloatFamily || 695 leftCanonicalTypeFamily == types.DecimalFamily 696 if isNumeric && !leftType.Identical(rightType) { 697 castLeftToRight := false 698 // There is a hierarchy of valid casts: 699 // Int16 -> Int32 -> Int64 -> Float64 -> Decimal 700 // and the cast is valid if 'fromType' is mentioned before 'toType' 701 // in this chain. 702 switch leftCanonicalTypeFamily { 703 case types.IntFamily: 704 switch leftType.Width() { 705 case 16: 706 castLeftToRight = true 707 case 32: 708 castLeftToRight = !rightType.Identical(types.Int2) 709 case 64: 710 castLeftToRight = !rightType.Identical(types.Int2) && !rightType.Identical(types.Int4) 711 } 712 case types.FloatFamily: 713 castLeftToRight = o.right.canonicalTypeFamilies[rightColIdx] == types.DecimalFamily 714 } 715 716 toType := leftType 717 if castLeftToRight { 718 toType = rightType 719 } 720 var tempVec coldata.Vec 721 for _, vec := range o.scratch.tempVecs { 722 if vec.Type().Identical(toType) { 723 tempVec = vec 724 break 725 } 726 } 727 if tempVec == nil { 728 tempVec = o.unlimitedAllocator.NewMemColumn(toType, coldata.BatchSize()) 729 o.scratch.tempVecs = append(o.scratch.tempVecs, tempVec) 730 } else { 731 tempVec.Nulls().UnsetNulls() 732 if tempVec.CanonicalTypeFamily() == types.BytesFamily { 733 tempVec.Bytes().Reset() 734 } 735 } 736 if castLeftToRight { 737 cast(lVec, tempVec, o.proberState.lBatch.Length(), lSel) 738 lVec = tempVec 739 colType = rightType 740 } else { 741 cast(rVec, tempVec, o.proberState.rBatch.Length(), rSel) 742 rVec = tempVec 743 } 744 } 745 if lVec.MaybeHasNulls() { 746 if rVec.MaybeHasNulls() { 747 _PROBE_SWITCH(_JOIN_TYPE, _SEL_ARG, true, true) 748 } else { 749 _PROBE_SWITCH(_JOIN_TYPE, _SEL_ARG, true, false) 750 } 751 } else { 752 if rVec.MaybeHasNulls() { 753 _PROBE_SWITCH(_JOIN_TYPE, _SEL_ARG, false, true) 754 } else { 755 _PROBE_SWITCH(_JOIN_TYPE, _SEL_ARG, false, false) 756 } 757 } 758 // Look at the groups associated with the next equality column by moving 759 // the circular buffer pointer up. 760 o.groups.finishedCol() 761 } 762 } 763 764 // {{end}} 765 766 // {{/* 767 // This code snippet builds the output corresponding to the left side (i.e. is 768 // the main body of buildLeftGroupsFromBatch()). 769 func _LEFT_SWITCH(_JOIN_TYPE joinTypeInfo, _HAS_SELECTION bool, _HAS_NULLS bool) { // */}} 770 // {{define "leftSwitch"}} 771 switch input.canonicalTypeFamilies[colIdx] { 772 // {{range $.Global.Overloads}} 773 case _CANONICAL_TYPE_FAMILY: 774 switch input.sourceTypes[colIdx].Width() { 775 // {{range .WidthOverloads}} 776 case _TYPE_WIDTH: 777 var srcCol _GOTYPESLICE 778 if src != nil { 779 srcCol = src.TemplateType() 780 } 781 outCol := out.TemplateType() 782 var val _GOTYPE 783 var srcStartIdx int 784 785 // Loop over every group. 786 for ; o.builderState.left.groupsIdx < len(leftGroups); o.builderState.left.groupsIdx++ { 787 leftGroup := &leftGroups[o.builderState.left.groupsIdx] 788 // {{if _JOIN_TYPE.IsLeftAnti}} 789 // {{/* 790 // With LEFT ANTI and EXCEPT ALL joins (the latter has 791 // IsLeftAnti == true) we want to emit output corresponding only to 792 // unmatched tuples, so we're skipping all "matched" groups. 793 // */}} 794 if !leftGroup.unmatched { 795 continue 796 } 797 // {{end}} 798 // If curSrcStartIdx is uninitialized, start it at the group's start idx. 799 // Otherwise continue where we left off. 800 if o.builderState.left.curSrcStartIdx == zeroMJCPCurSrcStartIdx { 801 o.builderState.left.curSrcStartIdx = leftGroup.rowStartIdx 802 } 803 // Loop over every row in the group. 804 for ; o.builderState.left.curSrcStartIdx < leftGroup.rowEndIdx; o.builderState.left.curSrcStartIdx++ { 805 // Repeat each row numRepeats times. 806 srcStartIdx = o.builderState.left.curSrcStartIdx 807 // {{if _HAS_SELECTION}} 808 srcStartIdx = sel[srcStartIdx] 809 // {{end}} 810 811 repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx 812 toAppend := repeatsLeft 813 if outStartIdx+toAppend > outputBatchSize { 814 toAppend = outputBatchSize - outStartIdx 815 } 816 817 // {{if _JOIN_TYPE.IsRightOuter}} 818 // {{/* 819 // Null groups on the left can only occur with RIGHT OUTER and FULL 820 // OUTER joins for both of which IsRightOuter is true. For other joins, 821 // we're omitting this check. 822 // */}} 823 if leftGroup.nullGroup { 824 out.Nulls().SetNullRange(outStartIdx, outStartIdx+toAppend) 825 outStartIdx += toAppend 826 } else 827 // {{end}} 828 { 829 // {{if _HAS_NULLS}} 830 if src.Nulls().NullAt(srcStartIdx) { 831 out.Nulls().SetNullRange(outStartIdx, outStartIdx+toAppend) 832 outStartIdx += toAppend 833 } else 834 // {{end}} 835 { 836 val = execgen.UNSAFEGET(srcCol, srcStartIdx) 837 for i := 0; i < toAppend; i++ { 838 execgen.SET(outCol, outStartIdx, val) 839 outStartIdx++ 840 } 841 } 842 } 843 844 if toAppend < repeatsLeft { 845 // We didn't materialize all the rows in the group so save state and 846 // move to the next column. 847 o.builderState.left.numRepeatsIdx += toAppend 848 if colIdx == len(input.sourceTypes)-1 { 849 return 850 } 851 o.builderState.left.setBuilderColumnState(initialBuilderState) 852 continue LeftColLoop 853 } 854 855 o.builderState.left.numRepeatsIdx = zeroMJCPNumRepeatsIdx 856 } 857 o.builderState.left.curSrcStartIdx = zeroMJCPCurSrcStartIdx 858 } 859 o.builderState.left.groupsIdx = zeroMJCPGroupsIdx 860 // {{end}} 861 } 862 // {{end}} 863 default: 864 colexecerror.InternalError(fmt.Sprintf("unhandled type %s", input.sourceTypes[colIdx].String())) 865 } 866 // {{end}} 867 // {{/* 868 } 869 870 // */}} 871 872 // buildLeftGroupsFromBatch takes a []group and expands each group into the 873 // output by repeating each row in the group numRepeats times. For example, 874 // given an input table: 875 // L1 | L2 876 // -------- 877 // 1 | a 878 // 1 | b 879 // and leftGroups = [{startIdx: 0, endIdx: 2, numRepeats: 3}] 880 // then buildLeftGroupsFromBatch expands this to 881 // L1 | L2 882 // -------- 883 // 1 | a 884 // 1 | a 885 // 1 | a 886 // 1 | b 887 // 1 | b 888 // 1 | b 889 // Note: this is different from buildRightGroupsFromBatch in that each row of 890 // group is repeated numRepeats times, instead of a simple copy of the group as 891 // a whole. 892 // SIDE EFFECTS: writes into o.output. 893 func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildLeftGroupsFromBatch( 894 leftGroups []group, input *mergeJoinInput, batch coldata.Batch, destStartIdx int, 895 ) { 896 sel := batch.Selection() 897 initialBuilderState := o.builderState.left 898 outputBatchSize := o.outputBatchSize 899 o.unlimitedAllocator.PerformOperation( 900 o.output.ColVecs()[:len(input.sourceTypes)], 901 func() { 902 // Loop over every column. 903 LeftColLoop: 904 for colIdx := range input.sourceTypes { 905 outStartIdx := destStartIdx 906 out := o.output.ColVec(colIdx) 907 var src coldata.Vec 908 if batch.Length() > 0 { 909 src = batch.ColVec(colIdx) 910 } 911 912 if sel != nil { 913 if src != nil && src.MaybeHasNulls() { 914 _LEFT_SWITCH(_JOIN_TYPE, true, true) 915 } else { 916 _LEFT_SWITCH(_JOIN_TYPE, true, false) 917 } 918 } else { 919 if src != nil && src.MaybeHasNulls() { 920 _LEFT_SWITCH(_JOIN_TYPE, false, true) 921 } else { 922 _LEFT_SWITCH(_JOIN_TYPE, false, false) 923 } 924 } 925 o.builderState.left.setBuilderColumnState(initialBuilderState) 926 } 927 o.builderState.left.reset() 928 }, 929 ) 930 } 931 932 // buildLeftBufferedGroup is similar to buildLeftGroupsFromBatch, but it 933 // builds the output columns corresponding to the left input based on the 934 // buffered group. The goal is to repeat each row from the left buffered group 935 // leftGroup.numRepeats times. 936 // Note that for non-set operation joins all other fields of leftGroup are 937 // ignored because, by definition, all rows in the buffered group are part of 938 // leftGroup (i.e. we don't need to look at rowStartIdx and rowEndIdx). Also, 939 // all rows in the buffered group do have a match, so the group can neither be 940 // "nullGroup" nor "unmatched". 941 // This function does pay attention to rowEndIdx field for set operation joins: 942 // only the first rowEndIdx will be output. For performance reasons we choose 943 // to output the first rows (for both INTERSECT ALL and EXCEPT ALL joins we 944 // need to output exactly rowEndIdx different rows from the left, but the 945 // choice of rows can be arbitrary). 946 func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildLeftBufferedGroup( 947 ctx context.Context, 948 leftGroup group, 949 input *mergeJoinInput, 950 bufferedGroup mjBufferedGroup, 951 destStartIdx int, 952 ) { 953 var err error 954 currentBatch := o.builderState.lBufferedGroupBatch 955 if currentBatch == nil { 956 currentBatch, err = bufferedGroup.dequeue(ctx) 957 if err != nil { 958 colexecerror.InternalError(err) 959 } 960 o.builderState.lBufferedGroupBatch = currentBatch 961 o.builderState.left.curSrcStartIdx = 0 962 o.builderState.left.numRepeatsIdx = 0 963 } 964 initialBuilderState := o.builderState.left 965 o.unlimitedAllocator.PerformOperation( 966 o.output.ColVecs()[:len(input.sourceTypes)], 967 func() { 968 batchLength := currentBatch.Length() 969 for batchLength > 0 { 970 // Loop over every column. 971 LeftColLoop: 972 for colIdx := range input.sourceTypes { 973 outStartIdx := destStartIdx 974 src := currentBatch.ColVec(colIdx) 975 out := o.output.ColVec(colIdx) 976 switch input.canonicalTypeFamilies[colIdx] { 977 // {{range $.Overloads}} 978 case _CANONICAL_TYPE_FAMILY: 979 switch input.sourceTypes[colIdx].Width() { 980 // {{range .WidthOverloads}} 981 case _TYPE_WIDTH: 982 srcCol := src.TemplateType() 983 outCol := out.TemplateType() 984 var val _GOTYPE 985 // {{if _JOIN_TYPE.IsSetOp}} 986 // Loop over every row in the group until we hit 987 // rowEndIdx number of rows. 988 // {{else}} 989 // Loop over every row in the group. 990 // {{end}} 991 for ; o.builderState.left.curSrcStartIdx < batchLength; o.builderState.left.curSrcStartIdx++ { 992 // Repeat each row numRepeats times. 993 // {{/* 994 // TODO(yuzefovich): we can optimize this code for 995 // LEFT SEMI, INTERSECT ALL, and EXCEPT ALL joins 996 // because in that case numRepeats is always 1. 997 // */}} 998 srcStartIdx := o.builderState.left.curSrcStartIdx 999 repeatsLeft := leftGroup.numRepeats - o.builderState.left.numRepeatsIdx 1000 toAppend := repeatsLeft 1001 if outStartIdx+toAppend > o.outputBatchSize { 1002 toAppend = o.outputBatchSize - outStartIdx 1003 } 1004 1005 // {{if _JOIN_TYPE.IsSetOp}} 1006 if o.builderState.left.setOpLeftSrcIdx == leftGroup.rowEndIdx { 1007 // We have fully materialized first rowEndIdx 1008 // rows in the current column, so we need to 1009 // either transition to the next column or exit. 1010 // We can accomplish this by setting toAppend 1011 // to 0. 1012 toAppend = 0 1013 } 1014 o.builderState.left.setOpLeftSrcIdx += toAppend 1015 // {{end}} 1016 1017 // {{/* 1018 // TODO(yuzefovich): check whether it is beneficial 1019 // to have 'if toAppend > 0' check here. 1020 // */}} 1021 if src.Nulls().NullAt(srcStartIdx) { 1022 out.Nulls().SetNullRange(outStartIdx, outStartIdx+toAppend) 1023 outStartIdx += toAppend 1024 } else { 1025 val = execgen.UNSAFEGET(srcCol, srcStartIdx) 1026 for i := 0; i < toAppend; i++ { 1027 execgen.SET(outCol, outStartIdx, val) 1028 outStartIdx++ 1029 } 1030 } 1031 1032 if toAppend < repeatsLeft { 1033 // We didn't materialize all the rows in the current batch, so 1034 // we move to the next column. 1035 if colIdx == len(input.sourceTypes)-1 { 1036 // This is the last column, so we update the builder state 1037 // and exit. 1038 o.builderState.left.numRepeatsIdx += toAppend 1039 // {{if _JOIN_TYPE.IsSetOp}} 1040 // {{/* 1041 // For non-set operation join the builder state is reset once the 1042 // buffered group has been fully built (at the very bottom of this 1043 // function), but we might be short-circuiting right now because 1044 // set operation joins can have partially-built groups. 1045 // */}} 1046 if o.builderState.left.setOpLeftSrcIdx == leftGroup.rowEndIdx { 1047 o.builderState.lBufferedGroupBatch = nil 1048 o.builderState.left.reset() 1049 } 1050 // {{end}} 1051 return 1052 } 1053 // We need to start building the next column 1054 // with the same initial builder state as the 1055 // current column. 1056 o.builderState.left.setBuilderColumnState(initialBuilderState) 1057 continue LeftColLoop 1058 } 1059 // We fully processed the current row, and before moving on to the 1060 // next one, we need to reset numRepeatsIdx (so that the next row 1061 // would be repeated leftGroup.numRepeats times). 1062 o.builderState.left.numRepeatsIdx = 0 1063 } 1064 // {{end}} 1065 } 1066 // {{end}} 1067 default: 1068 colexecerror.InternalError(fmt.Sprintf("unhandled type %s", input.sourceTypes[colIdx].String())) 1069 } 1070 if colIdx == len(input.sourceTypes)-1 { 1071 // We have appended some tuples into the output batch from the current 1072 // batch (the latter is now fully processed), so we need to adjust 1073 // destStartIdx accordingly for the next batch. 1074 destStartIdx = outStartIdx 1075 } else { 1076 o.builderState.left.setBuilderColumnState(initialBuilderState) 1077 } 1078 } 1079 // We have processed all tuples in the current batch from the 1080 // buffered group, so we need to dequeue the next one. 1081 o.unlimitedAllocator.ReleaseBatch(currentBatch) 1082 currentBatch, err = bufferedGroup.dequeue(ctx) 1083 if err != nil { 1084 colexecerror.InternalError(err) 1085 } 1086 o.builderState.lBufferedGroupBatch = currentBatch 1087 batchLength = currentBatch.Length() 1088 // We have transitioned to building from a new batch, so we 1089 // need to update the builder state to build from the beginning 1090 // of the new batch. 1091 o.builderState.left.curSrcStartIdx = 0 1092 o.builderState.left.numRepeatsIdx = 0 1093 // We also need to update 'initialBuilderState' so that the 1094 // builder state gets reset correctly in-between different 1095 // columns in the loop above. 1096 initialBuilderState = o.builderState.left 1097 } 1098 o.builderState.lBufferedGroupBatch = nil 1099 o.builderState.left.reset() 1100 }, 1101 ) 1102 } 1103 1104 // {{/* 1105 // This code snippet builds the output corresponding to the right side (i.e. is 1106 // the main body of buildRightGroupsFromBatch()). 1107 func _RIGHT_SWITCH(_JOIN_TYPE joinTypeInfo, _HAS_SELECTION bool, _HAS_NULLS bool) { // */}} 1108 // {{define "rightSwitch"}} 1109 1110 switch input.canonicalTypeFamilies[colIdx] { 1111 // {{range $.Global.Overloads}} 1112 case _CANONICAL_TYPE_FAMILY: 1113 switch input.sourceTypes[colIdx].Width() { 1114 // {{range .WidthOverloads}} 1115 case _TYPE_WIDTH: 1116 var srcCol _GOTYPESLICE 1117 if src != nil { 1118 srcCol = src.TemplateType() 1119 } 1120 outCol := out.TemplateType() 1121 1122 // Loop over every group. 1123 for ; o.builderState.right.groupsIdx < len(rightGroups); o.builderState.right.groupsIdx++ { 1124 rightGroup := &rightGroups[o.builderState.right.groupsIdx] 1125 // Repeat every group numRepeats times. 1126 for ; o.builderState.right.numRepeatsIdx < rightGroup.numRepeats; o.builderState.right.numRepeatsIdx++ { 1127 if o.builderState.right.curSrcStartIdx == zeroMJCPCurSrcStartIdx { 1128 o.builderState.right.curSrcStartIdx = rightGroup.rowStartIdx 1129 } 1130 toAppend := rightGroup.rowEndIdx - o.builderState.right.curSrcStartIdx 1131 if outStartIdx+toAppend > outputBatchSize { 1132 toAppend = outputBatchSize - outStartIdx 1133 } 1134 1135 // {{if _JOIN_TYPE.IsLeftOuter}} 1136 // {{/* 1137 // Null groups on the right can only occur with LEFT OUTER and FULL 1138 // OUTER joins for both of which IsLeftOuter is true. For other joins, 1139 // we're omitting this check. 1140 // */}} 1141 if rightGroup.nullGroup { 1142 out.Nulls().SetNullRange(outStartIdx, outStartIdx+toAppend) 1143 } else 1144 // {{end}} 1145 { 1146 // Optimization in the case that group length is 1, use assign 1147 // instead of copy. 1148 if toAppend == 1 { 1149 // {{if _HAS_SELECTION}} 1150 // {{if _HAS_NULLS}} 1151 if src.Nulls().NullAt(sel[o.builderState.right.curSrcStartIdx]) { 1152 out.Nulls().SetNull(outStartIdx) 1153 } else 1154 // {{end}} 1155 { 1156 v := execgen.UNSAFEGET(srcCol, sel[o.builderState.right.curSrcStartIdx]) 1157 execgen.SET(outCol, outStartIdx, v) 1158 } 1159 // {{else}} 1160 // {{if _HAS_NULLS}} 1161 if src.Nulls().NullAt(o.builderState.right.curSrcStartIdx) { 1162 out.Nulls().SetNull(outStartIdx) 1163 } else 1164 // {{end}} 1165 { 1166 v := execgen.UNSAFEGET(srcCol, o.builderState.right.curSrcStartIdx) 1167 execgen.SET(outCol, outStartIdx, v) 1168 } 1169 // {{end}} 1170 } else { 1171 out.Copy( 1172 coldata.CopySliceArgs{ 1173 SliceArgs: coldata.SliceArgs{ 1174 Src: src, 1175 Sel: sel, 1176 DestIdx: outStartIdx, 1177 SrcStartIdx: o.builderState.right.curSrcStartIdx, 1178 SrcEndIdx: o.builderState.right.curSrcStartIdx + toAppend, 1179 }, 1180 }, 1181 ) 1182 } 1183 } 1184 1185 outStartIdx += toAppend 1186 1187 // If we haven't materialized all the rows from the group, then we are 1188 // done with the current column. 1189 if toAppend < rightGroup.rowEndIdx-o.builderState.right.curSrcStartIdx { 1190 // If it's the last column, save state and return. 1191 if colIdx == len(input.sourceTypes)-1 { 1192 o.builderState.right.curSrcStartIdx += toAppend 1193 return 1194 } 1195 // Otherwise, reset to the initial state and begin the next column. 1196 o.builderState.right.setBuilderColumnState(initialBuilderState) 1197 continue RightColLoop 1198 } 1199 o.builderState.right.curSrcStartIdx = zeroMJCPCurSrcStartIdx 1200 } 1201 o.builderState.right.numRepeatsIdx = zeroMJCPNumRepeatsIdx 1202 } 1203 o.builderState.right.groupsIdx = zeroMJCPGroupsIdx 1204 // {{end}} 1205 } 1206 // {{end}} 1207 default: 1208 colexecerror.InternalError(fmt.Sprintf("unhandled type %s", input.sourceTypes[colIdx].String())) 1209 } 1210 // {{end}} 1211 // {{/* 1212 } 1213 1214 // */}} 1215 1216 // buildRightGroupsFromBatch takes a []group and repeats each group numRepeats 1217 // times. For example, given an input table: 1218 // R1 | R2 1219 // -------- 1220 // 1 | a 1221 // 1 | b 1222 // and rightGroups = [{startIdx: 0, endIdx: 2, numRepeats: 3}] 1223 // then buildRightGroups expands this to 1224 // R1 | R2 1225 // -------- 1226 // 1 | a 1227 // 1 | b 1228 // 1 | a 1229 // 1 | b 1230 // 1 | a 1231 // 1 | b 1232 // Note: this is different from buildLeftGroupsFromBatch in that each group is 1233 // not expanded but directly copied numRepeats times. 1234 // SIDE EFFECTS: writes into o.output. 1235 func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildRightGroupsFromBatch( 1236 rightGroups []group, colOffset int, input *mergeJoinInput, batch coldata.Batch, destStartIdx int, 1237 ) { 1238 initialBuilderState := o.builderState.right 1239 sel := batch.Selection() 1240 outputBatchSize := o.outputBatchSize 1241 1242 o.unlimitedAllocator.PerformOperation( 1243 o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)], 1244 func() { 1245 // Loop over every column. 1246 RightColLoop: 1247 for colIdx := range input.sourceTypes { 1248 outStartIdx := destStartIdx 1249 out := o.output.ColVec(colIdx + colOffset) 1250 var src coldata.Vec 1251 if batch.Length() > 0 { 1252 src = batch.ColVec(colIdx) 1253 } 1254 1255 if sel != nil { 1256 if src != nil && src.MaybeHasNulls() { 1257 _RIGHT_SWITCH(_JOIN_TYPE, true, true) 1258 } else { 1259 _RIGHT_SWITCH(_JOIN_TYPE, true, false) 1260 } 1261 } else { 1262 if src != nil && src.MaybeHasNulls() { 1263 _RIGHT_SWITCH(_JOIN_TYPE, false, true) 1264 } else { 1265 _RIGHT_SWITCH(_JOIN_TYPE, false, false) 1266 } 1267 } 1268 1269 o.builderState.right.setBuilderColumnState(initialBuilderState) 1270 } 1271 o.builderState.right.reset() 1272 }) 1273 } 1274 1275 // buildRightBufferedGroup is similar to buildRightGroupsFromBatch, but it 1276 // builds the output columns corresponding to the right input based on the 1277 // buffered group. The goal is to repeat the whole buffered group 1278 // rightGroup.numRepeats times. 1279 // Note that all other fields of rightGroup are ignored because, by definition, 1280 // all rows in the buffered group are part of rightGroup (i.e. we don't need to 1281 // look at rowStartIdx and rowEndIdx). Also, all rows in the buffered group do 1282 // have a match, so the group can neither be "nullGroup" nor "unmatched". 1283 func (o *mergeJoin_JOIN_TYPE_STRINGOp) buildRightBufferedGroup( 1284 ctx context.Context, 1285 rightGroup group, 1286 colOffset int, 1287 input *mergeJoinInput, 1288 bufferedGroup mjBufferedGroup, 1289 destStartIdx int, 1290 ) { 1291 var err error 1292 o.unlimitedAllocator.PerformOperation( 1293 o.output.ColVecs()[colOffset:colOffset+len(input.sourceTypes)], 1294 func() { 1295 outStartIdx := destStartIdx 1296 // Repeat the buffered group numRepeats times. 1297 for ; o.builderState.right.numRepeatsIdx < rightGroup.numRepeats; o.builderState.right.numRepeatsIdx++ { 1298 currentBatch := o.builderState.rBufferedGroupBatch 1299 if currentBatch == nil { 1300 currentBatch, err = bufferedGroup.dequeue(ctx) 1301 if err != nil { 1302 colexecerror.InternalError(err) 1303 } 1304 o.builderState.rBufferedGroupBatch = currentBatch 1305 o.builderState.right.curSrcStartIdx = 0 1306 } 1307 batchLength := currentBatch.Length() 1308 for batchLength > 0 { 1309 toAppend := batchLength - o.builderState.right.curSrcStartIdx 1310 if outStartIdx+toAppend > o.outputBatchSize { 1311 toAppend = o.outputBatchSize - outStartIdx 1312 } 1313 1314 // Loop over every column. 1315 for colIdx := range input.sourceTypes { 1316 out := o.output.ColVec(colIdx + colOffset) 1317 src := currentBatch.ColVec(colIdx) 1318 switch input.canonicalTypeFamilies[colIdx] { 1319 // {{range $.Overloads}} 1320 case _CANONICAL_TYPE_FAMILY: 1321 switch input.sourceTypes[colIdx].Width() { 1322 // {{range .WidthOverloads}} 1323 case _TYPE_WIDTH: 1324 srcCol := src.TemplateType() 1325 outCol := out.TemplateType() 1326 1327 // Optimization in the case that group length is 1, use assign 1328 // instead of copy. 1329 if toAppend == 1 { 1330 if src.Nulls().NullAt(o.builderState.right.curSrcStartIdx) { 1331 out.Nulls().SetNull(outStartIdx) 1332 } else { 1333 v := execgen.UNSAFEGET(srcCol, o.builderState.right.curSrcStartIdx) 1334 execgen.SET(outCol, outStartIdx, v) 1335 } 1336 } else { 1337 out.Copy( 1338 coldata.CopySliceArgs{ 1339 SliceArgs: coldata.SliceArgs{ 1340 Src: src, 1341 DestIdx: outStartIdx, 1342 SrcStartIdx: o.builderState.right.curSrcStartIdx, 1343 SrcEndIdx: o.builderState.right.curSrcStartIdx + toAppend, 1344 }, 1345 }, 1346 ) 1347 } 1348 // {{end}} 1349 } 1350 // {{end}} 1351 default: 1352 colexecerror.InternalError(fmt.Sprintf("unhandled type %s", input.sourceTypes[colIdx].String())) 1353 } 1354 } 1355 outStartIdx += toAppend 1356 1357 if toAppend < batchLength-o.builderState.right.curSrcStartIdx { 1358 // If we haven't materialized all the rows from the batch, then we 1359 // are ready to emit the output batch. 1360 o.builderState.right.curSrcStartIdx += toAppend 1361 return 1362 } 1363 // We have fully processed the current batch, so we need to get the 1364 // next one. 1365 o.unlimitedAllocator.ReleaseBatch(currentBatch) 1366 currentBatch, err = bufferedGroup.dequeue(ctx) 1367 if err != nil { 1368 colexecerror.InternalError(err) 1369 } 1370 o.builderState.rBufferedGroupBatch = currentBatch 1371 batchLength = currentBatch.Length() 1372 o.builderState.right.curSrcStartIdx = 0 1373 } 1374 // We have fully processed all the batches from the buffered group, so 1375 // we need to rewind it. 1376 if err := bufferedGroup.rewind(); err != nil { 1377 colexecerror.InternalError(err) 1378 } 1379 o.builderState.rBufferedGroupBatch = nil 1380 } 1381 o.builderState.right.reset() 1382 }) 1383 } 1384 1385 // probe is where we generate the groups slices that are used in the build 1386 // phase. We do this by first assuming that every row in both batches 1387 // contributes to the cross product. Then, with every equality column, we 1388 // filter out the rows that don't contribute to the cross product (i.e. they 1389 // don't have a matching row on the other side in the case of an inner join), 1390 // and set the correct cardinality. 1391 // Note that in this phase, we do this for every group, except the last group 1392 // in the batch. 1393 func (o *mergeJoin_JOIN_TYPE_STRINGOp) probe(ctx context.Context) { 1394 o.groups.reset(o.proberState.lIdx, o.proberState.lLength, o.proberState.rIdx, o.proberState.rLength) 1395 lSel := o.proberState.lBatch.Selection() 1396 rSel := o.proberState.rBatch.Selection() 1397 if lSel != nil { 1398 if rSel != nil { 1399 o.probeBodyLSeltrueRSeltrue(ctx) 1400 } else { 1401 o.probeBodyLSeltrueRSelfalse(ctx) 1402 } 1403 } else { 1404 if rSel != nil { 1405 o.probeBodyLSelfalseRSeltrue(ctx) 1406 } else { 1407 o.probeBodyLSelfalseRSelfalse(ctx) 1408 } 1409 } 1410 } 1411 1412 // setBuilderSourceToBufferedGroup sets up the builder state to use the 1413 // buffered group. 1414 func (o *mergeJoin_JOIN_TYPE_STRINGOp) setBuilderSourceToBufferedGroup(ctx context.Context) { 1415 // {{if and (_JOIN_TYPE.IsLeftAnti) (not _JOIN_TYPE.IsSetOp)}} 1416 // All tuples in the buffered group have matches, so they are not output in 1417 // case of LEFT ANTI join. 1418 o.builderState.lGroups = o.builderState.lGroups[:0] 1419 // {{else}} 1420 lGroupEndIdx := o.proberState.lBufferedGroup.numTuples 1421 rGroupEndIdx := o.proberState.rBufferedGroup.numTuples 1422 // The capacity of builder state lGroups and rGroups is always at least 1 1423 // given the init. 1424 o.builderState.lGroups = o.builderState.lGroups[:1] 1425 o.builderState.rGroups = o.builderState.rGroups[:1] 1426 // {{if _JOIN_TYPE.IsLeftAnti}} 1427 // For EXCEPT ALL join we add (lGroupEndIdx - rGroupEndIdx) number 1428 // (if positive) of unmatched rows. 1429 if lGroupEndIdx > rGroupEndIdx { 1430 o.builderState.lGroups[0] = group{ 1431 rowStartIdx: 0, 1432 rowEndIdx: lGroupEndIdx - rGroupEndIdx, 1433 numRepeats: 1, 1434 toBuild: lGroupEndIdx - rGroupEndIdx, 1435 unmatched: true, 1436 } 1437 } else { 1438 o.builderState.lGroups = o.builderState.lGroups[:0] 1439 } 1440 // {{else if _JOIN_TYPE.IsLeftSemi}} 1441 numMatched := lGroupEndIdx 1442 // {{if _JOIN_TYPE.IsSetOp}} 1443 // For INTERSECT ALL join we add a left group to build of length 1444 // min(lGroupEndIdx, rGroupEndIdx). 1445 if rGroupEndIdx < lGroupEndIdx { 1446 numMatched = rGroupEndIdx 1447 } 1448 // {{else}} 1449 // Remove unused warning. 1450 _ = rGroupEndIdx 1451 // {{end}} 1452 o.builderState.lGroups[0] = group{ 1453 rowStartIdx: 0, 1454 rowEndIdx: numMatched, 1455 numRepeats: 1, 1456 toBuild: numMatched, 1457 } 1458 // {{else}} 1459 o.builderState.lGroups[0] = group{ 1460 rowStartIdx: 0, 1461 rowEndIdx: lGroupEndIdx, 1462 numRepeats: rGroupEndIdx, 1463 toBuild: lGroupEndIdx * rGroupEndIdx, 1464 } 1465 o.builderState.rGroups[0] = group{ 1466 rowStartIdx: 0, 1467 rowEndIdx: rGroupEndIdx, 1468 numRepeats: lGroupEndIdx, 1469 toBuild: rGroupEndIdx * lGroupEndIdx, 1470 } 1471 // {{end}} 1472 // {{end}} 1473 1474 o.builderState.buildFrom = mjBuildFromBufferedGroup 1475 1476 // We cannot yet reset the buffered groups because the builder will be taking 1477 // input from them. The actual reset will take place on the next call to 1478 // initProberState(). 1479 o.proberState.lBufferedGroupNeedToReset = true 1480 o.proberState.rBufferedGroupNeedToReset = true 1481 } 1482 1483 // exhaustLeftSource sets up the builder to process any remaining tuples from 1484 // the left source. It should only be called when the right source has been 1485 // exhausted. 1486 func (o *mergeJoin_JOIN_TYPE_STRINGOp) exhaustLeftSource(ctx context.Context) { 1487 // {{if or _JOIN_TYPE.IsInner _JOIN_TYPE.IsLeftSemi}} 1488 // {{/* 1489 // Remaining tuples from the left source do not have a match, so they are 1490 // ignored in INNER, LEFT SEMI, and INTERSECT ALL joins (the latter has 1491 // IsLeftSemi == true). 1492 // */}} 1493 // {{end}} 1494 // {{if or _JOIN_TYPE.IsLeftOuter _JOIN_TYPE.IsLeftAnti}} 1495 // The capacity of builder state lGroups and rGroups is always at least 1 1496 // given the init. 1497 o.builderState.lGroups = o.builderState.lGroups[:1] 1498 o.builderState.lGroups[0] = group{ 1499 rowStartIdx: o.proberState.lIdx, 1500 rowEndIdx: o.proberState.lLength, 1501 numRepeats: 1, 1502 toBuild: o.proberState.lLength - o.proberState.lIdx, 1503 unmatched: true, 1504 } 1505 // {{if _JOIN_TYPE.IsLeftOuter}} 1506 o.builderState.rGroups = o.builderState.rGroups[:1] 1507 o.builderState.rGroups[0] = group{ 1508 rowStartIdx: o.proberState.lIdx, 1509 rowEndIdx: o.proberState.lLength, 1510 numRepeats: 1, 1511 toBuild: o.proberState.lLength - o.proberState.lIdx, 1512 nullGroup: true, 1513 } 1514 // {{end}} 1515 1516 o.proberState.lIdx = o.proberState.lLength 1517 // {{end}} 1518 // {{if _JOIN_TYPE.IsRightOuter}} 1519 // {{/* 1520 // Remaining tuples from the left source do not have a match, so they are 1521 // ignored in RIGHT OUTER join. 1522 // */}} 1523 // {{end}} 1524 } 1525 1526 // exhaustRightSource sets up the builder to process any remaining tuples from 1527 // the right source. It should only be called when the left source has been 1528 // exhausted. 1529 func (o *mergeJoin_JOIN_TYPE_STRINGOp) exhaustRightSource() { 1530 // {{if _JOIN_TYPE.IsRightOuter}} 1531 // The capacity of builder state lGroups and rGroups is always at least 1 1532 // given the init. 1533 o.builderState.lGroups = o.builderState.lGroups[:1] 1534 o.builderState.lGroups[0] = group{ 1535 rowStartIdx: o.proberState.rIdx, 1536 rowEndIdx: o.proberState.rLength, 1537 numRepeats: 1, 1538 toBuild: o.proberState.rLength - o.proberState.rIdx, 1539 nullGroup: true, 1540 } 1541 o.builderState.rGroups = o.builderState.rGroups[:1] 1542 o.builderState.rGroups[0] = group{ 1543 rowStartIdx: o.proberState.rIdx, 1544 rowEndIdx: o.proberState.rLength, 1545 numRepeats: 1, 1546 toBuild: o.proberState.rLength - o.proberState.rIdx, 1547 unmatched: true, 1548 } 1549 1550 o.proberState.rIdx = o.proberState.rLength 1551 // {{else}} 1552 // Remaining tuples from the right source do not have a match, so they are 1553 // ignored in all joins except for RIGHT OUTER and FULL OUTER. 1554 // {{end}} 1555 } 1556 1557 // calculateOutputCount uses the toBuild field of each group and the output 1558 // batch size to determine the output count. Note that as soon as a group is 1559 // materialized partially or fully to output, its toBuild field is updated 1560 // accordingly. 1561 func (o *mergeJoin_JOIN_TYPE_STRINGOp) calculateOutputCount(groups []group) int { 1562 count := o.builderState.outCount 1563 1564 for i := 0; i < len(groups); i++ { 1565 // {{if _JOIN_TYPE.IsLeftAnti}} 1566 if !groups[i].unmatched { 1567 // "Matched" groups are not outputted in LEFT ANTI and EXCEPT ALL 1568 // joins (for the latter IsLeftAnti == true), so they do not 1569 // contribute to the output count. 1570 continue 1571 } 1572 // {{end}} 1573 count += groups[i].toBuild 1574 groups[i].toBuild = 0 1575 if count > o.outputBatchSize { 1576 groups[i].toBuild = count - o.outputBatchSize 1577 count = o.outputBatchSize 1578 return count 1579 } 1580 } 1581 o.builderState.outFinished = true 1582 return count 1583 } 1584 1585 // build creates the cross product, and writes it to the output member. 1586 func (o *mergeJoin_JOIN_TYPE_STRINGOp) build(ctx context.Context) { 1587 outStartIdx := o.builderState.outCount 1588 o.builderState.outCount = o.calculateOutputCount(o.builderState.lGroups) 1589 if o.output.Width() != 0 && o.builderState.outCount > outStartIdx { 1590 // We will be actually building the output if we have columns in the output 1591 // batch (meaning that we're not doing query like 'SELECT count(*) ...') 1592 // and when builderState.outCount has increased (meaning that we have 1593 // something to build). 1594 switch o.builderState.buildFrom { 1595 case mjBuildFromBatch: 1596 o.buildLeftGroupsFromBatch(o.builderState.lGroups, &o.left, o.proberState.lBatch, outStartIdx) 1597 // {{if not (or _JOIN_TYPE.IsLeftSemi _JOIN_TYPE.IsLeftAnti)}} 1598 o.buildRightGroupsFromBatch(o.builderState.rGroups, len(o.left.sourceTypes), &o.right, o.proberState.rBatch, outStartIdx) 1599 // {{end}} 1600 case mjBuildFromBufferedGroup: 1601 o.buildLeftBufferedGroup(ctx, o.builderState.lGroups[0], &o.left, o.proberState.lBufferedGroup, outStartIdx) 1602 // {{if not (or _JOIN_TYPE.IsLeftSemi _JOIN_TYPE.IsLeftAnti)}} 1603 o.buildRightBufferedGroup(ctx, o.builderState.rGroups[0], len(o.left.sourceTypes), &o.right, o.proberState.rBufferedGroup, outStartIdx) 1604 // {{end}} 1605 1606 default: 1607 colexecerror.InternalError(fmt.Sprintf("unsupported mjBuildFrom %d", o.builderState.buildFrom)) 1608 } 1609 } 1610 } 1611 1612 // {{/* 1613 // This code snippet is executed when at least one of the input sources has 1614 // been exhausted. It processes any remaining tuples and then sets up the 1615 // builder. 1616 func _SOURCE_FINISHED_SWITCH(_JOIN_TYPE joinTypeInfo) { // */}} 1617 // {{define "sourceFinishedSwitch"}} 1618 o.outputReady = true 1619 o.builderState.buildFrom = mjBuildFromBatch 1620 // {{if or $.JoinType.IsInner $.JoinType.IsLeftSemi}} 1621 o.setBuilderSourceToBufferedGroup(ctx) 1622 // {{else}} 1623 // Next, we need to make sure that builder state is set up for a case when 1624 // neither exhaustLeftSource nor exhaustRightSource is called below. In such 1625 // scenario the merge joiner is done, so it'll be outputting zero-length 1626 // batches from now on. 1627 o.builderState.lGroups = o.builderState.lGroups[:0] 1628 o.builderState.rGroups = o.builderState.rGroups[:0] 1629 // {{end}} 1630 // {{if or $.JoinType.IsLeftOuter $.JoinType.IsLeftAnti}} 1631 // At least one of the sources is finished. If it was the right one, 1632 // then we need to emit remaining tuples from the left source with 1633 // nulls corresponding to the right one. But if the left source is 1634 // finished, then there is nothing left to do. 1635 if o.proberState.lIdx < o.proberState.lLength { 1636 o.exhaustLeftSource(ctx) 1637 // We unset o.outputReady here because we want to put as many unmatched 1638 // tuples from the left into the output batch. Once outCount reaches the 1639 // desired output batch size, the output will be returned. 1640 o.outputReady = false 1641 } 1642 // {{end}} 1643 // {{if $.JoinType.IsRightOuter}} 1644 // At least one of the sources is finished. If it was the left one, 1645 // then we need to emit remaining tuples from the right source with 1646 // nulls corresponding to the left one. But if the right source is 1647 // finished, then there is nothing left to do. 1648 if o.proberState.rIdx < o.proberState.rLength { 1649 o.exhaustRightSource() 1650 // We unset o.outputReady here because we want to put as many unmatched 1651 // tuples from the right into the output batch. Once outCount reaches the 1652 // desired output batch size, the output will be returned. 1653 o.outputReady = false 1654 } 1655 // {{end}} 1656 // {{end}} 1657 // {{/* 1658 } 1659 1660 // */}} 1661 1662 func (o *mergeJoin_JOIN_TYPE_STRINGOp) Next(ctx context.Context) coldata.Batch { 1663 o.mu.Lock() 1664 defer o.mu.Unlock() 1665 o.output.ResetInternalBatch() 1666 for { 1667 switch o.state { 1668 case mjEntry: 1669 o.initProberState(ctx) 1670 1671 if o.nonEmptyBufferedGroup() { 1672 o.state = mjFinishBufferedGroup 1673 break 1674 } 1675 1676 if o.sourceFinished() { 1677 o.state = mjSourceFinished 1678 break 1679 } 1680 1681 o.state = mjProbe 1682 case mjSourceFinished: 1683 _SOURCE_FINISHED_SWITCH(_JOIN_TYPE) 1684 o.state = mjBuild 1685 case mjFinishBufferedGroup: 1686 o.finishProbe(ctx) 1687 o.setBuilderSourceToBufferedGroup(ctx) 1688 o.state = mjBuild 1689 case mjProbe: 1690 o.probe(ctx) 1691 o.setBuilderSourceToBatch() 1692 o.state = mjBuild 1693 case mjBuild: 1694 o.build(ctx) 1695 1696 if o.builderState.outFinished { 1697 o.state = mjEntry 1698 o.builderState.outFinished = false 1699 } 1700 1701 if o.outputReady || o.builderState.outCount == o.outputBatchSize { 1702 if o.builderState.outCount == 0 { 1703 // We have already fully emitted the result of the join, so we 1704 // transition to "finished" state. 1705 o.state = mjDone 1706 continue 1707 } 1708 o.output.SetLength(o.builderState.outCount) 1709 // Reset builder out count. 1710 o.builderState.outCount = 0 1711 o.outputReady = false 1712 return o.output 1713 } 1714 case mjDone: 1715 // Note that resetting of buffered groups will close disk queues 1716 // (if there are any). 1717 if o.proberState.lBufferedGroupNeedToReset { 1718 o.proberState.lBufferedGroup.reset(ctx) 1719 o.proberState.lBufferedGroupNeedToReset = false 1720 } 1721 if o.proberState.rBufferedGroupNeedToReset { 1722 o.proberState.rBufferedGroup.reset(ctx) 1723 o.proberState.rBufferedGroupNeedToReset = false 1724 } 1725 return coldata.ZeroBatch 1726 default: 1727 colexecerror.InternalError(fmt.Sprintf("unexpected merge joiner state in Next: %v", o.state)) 1728 } 1729 } 1730 }