github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/hashjoiner_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 "fmt" 16 "runtime" 17 "testing" 18 "unsafe" 19 20 "github.com/cockroachdb/apd" 21 "github.com/cockroachdb/cockroach/pkg/col/coldata" 22 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 23 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 24 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 25 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 26 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 27 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 28 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 29 "github.com/cockroachdb/cockroach/pkg/sql/types" 30 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 31 "github.com/stretchr/testify/require" 32 ) 33 34 var ( 35 floats = []float64{0.314, 3.14, 31.4, 314} 36 decs []apd.Decimal 37 hjTestCases []*joinTestCase 38 ) 39 40 func init() { 41 // Set up the apd.Decimal values used in tests. 42 decs = make([]apd.Decimal, len(floats)) 43 for i, f := range floats { 44 _, err := decs[i].SetFloat64(f) 45 if err != nil { 46 colexecerror.InternalError(fmt.Sprintf("%v", err)) 47 } 48 } 49 50 hjTestCases = []*joinTestCase{ 51 { 52 description: "0", 53 leftTypes: []*types.T{types.Int}, 54 rightTypes: []*types.T{types.Int}, 55 56 leftTuples: tuples{ 57 {0}, 58 {1}, 59 {2}, 60 {3}, 61 }, 62 rightTuples: tuples{ 63 {-1}, 64 {1}, 65 {3}, 66 {5}, 67 }, 68 69 leftEqCols: []uint32{0}, 70 rightEqCols: []uint32{0}, 71 leftOutCols: []uint32{0}, 72 rightOutCols: []uint32{0}, 73 74 joinType: sqlbase.FullOuterJoin, 75 leftEqColsAreKey: true, 76 rightEqColsAreKey: true, 77 78 expected: tuples{ 79 {nil, -1}, 80 {1, 1}, 81 {3, 3}, 82 {nil, 5}, 83 {0, nil}, 84 {2, nil}, 85 }, 86 }, 87 { 88 description: "1", 89 leftTypes: []*types.T{types.Int}, 90 rightTypes: []*types.T{types.Int}, 91 92 // Test an empty build table. 93 leftTuples: tuples{}, 94 rightTuples: tuples{ 95 {-1}, 96 {1}, 97 {3}, 98 }, 99 100 leftEqCols: []uint32{0}, 101 rightEqCols: []uint32{0}, 102 leftOutCols: []uint32{0}, 103 rightOutCols: []uint32{0}, 104 105 joinType: sqlbase.FullOuterJoin, 106 leftEqColsAreKey: true, 107 108 expected: tuples{ 109 {nil, -1}, 110 {nil, 1}, 111 {nil, 3}, 112 }, 113 }, 114 { 115 description: "2", 116 leftTypes: []*types.T{types.Int}, 117 rightTypes: []*types.T{types.Int}, 118 119 leftTuples: tuples{ 120 {0}, 121 {1}, 122 {2}, 123 {3}, 124 {4}, 125 }, 126 rightTuples: tuples{ 127 {1}, 128 {3}, 129 {5}, 130 }, 131 132 leftEqCols: []uint32{0}, 133 rightEqCols: []uint32{0}, 134 leftOutCols: []uint32{0}, 135 rightOutCols: []uint32{0}, 136 137 joinType: sqlbase.LeftOuterJoin, 138 leftEqColsAreKey: true, 139 rightEqColsAreKey: true, 140 141 expected: tuples{ 142 {1, 1}, 143 {3, 3}, 144 {0, nil}, 145 {2, nil}, 146 {4, nil}, 147 }, 148 }, 149 { 150 description: "3", 151 leftTypes: []*types.T{types.Int}, 152 rightTypes: []*types.T{types.Int}, 153 154 // Test right outer join. 155 leftTuples: tuples{ 156 {0}, 157 {1}, 158 }, 159 rightTuples: tuples{ 160 {1}, 161 {2}, 162 }, 163 164 leftEqCols: []uint32{0}, 165 rightEqCols: []uint32{0}, 166 leftOutCols: []uint32{0}, 167 rightOutCols: []uint32{0}, 168 169 joinType: sqlbase.RightOuterJoin, 170 leftEqColsAreKey: true, 171 rightEqColsAreKey: true, 172 173 expected: tuples{ 174 {1, 1}, 175 {nil, 2}, 176 }, 177 }, 178 { 179 description: "4", 180 leftTypes: []*types.T{types.Int}, 181 rightTypes: []*types.T{types.Int}, 182 183 // Test right outer join with non-distinct left build table with an 184 // unmatched row from the right followed by a matched one. This is a 185 // regression test for #39303 in order to check that probeRowUnmatched 186 // is updated correctly in case of non-distinct build table. 187 leftTuples: tuples{ 188 {0}, 189 {0}, 190 {2}, 191 }, 192 rightTuples: tuples{ 193 {1}, 194 {2}, 195 }, 196 197 leftEqCols: []uint32{0}, 198 rightEqCols: []uint32{0}, 199 leftOutCols: []uint32{0}, 200 rightOutCols: []uint32{0}, 201 202 joinType: sqlbase.RightOuterJoin, 203 rightEqColsAreKey: true, 204 205 expected: tuples{ 206 {nil, 1}, 207 {2, 2}, 208 }, 209 }, 210 { 211 description: "5", 212 leftTypes: []*types.T{types.Int}, 213 rightTypes: []*types.T{types.Int}, 214 215 // Test null handling only on probe column. 216 leftTuples: tuples{ 217 {0}, 218 }, 219 rightTuples: tuples{ 220 {nil}, 221 {0}, 222 }, 223 224 leftEqCols: []uint32{0}, 225 rightEqCols: []uint32{0}, 226 leftOutCols: []uint32{0}, 227 rightOutCols: []uint32{}, 228 229 leftEqColsAreKey: true, 230 rightEqColsAreKey: false, 231 232 expected: tuples{ 233 {0}, 234 }, 235 }, 236 { 237 description: "6", 238 leftTypes: []*types.T{types.Int}, 239 rightTypes: []*types.T{types.Int}, 240 241 // Test null handling only on build column. 242 leftTuples: tuples{ 243 {nil}, 244 {nil}, 245 {1}, 246 {0}, 247 }, 248 rightTuples: tuples{ 249 {1}, 250 {0}, 251 }, 252 253 leftEqCols: []uint32{0}, 254 rightEqCols: []uint32{0}, 255 leftOutCols: []uint32{0}, 256 rightOutCols: []uint32{}, 257 // Note that although right equality columns are key, we want to test 258 // null handling on the build column, so we "lie" here. 259 leftEqColsAreKey: false, 260 rightEqColsAreKey: false, 261 262 expected: tuples{ 263 {1}, 264 {0}, 265 }, 266 }, 267 { 268 description: "7", 269 leftTypes: []*types.T{types.Int, types.Int}, 270 rightTypes: []*types.T{types.Int, types.Int}, 271 272 // Test null handling in output columns. 273 leftTuples: tuples{ 274 {1, nil}, 275 {2, nil}, 276 {3, 1}, 277 {4, 2}, 278 }, 279 rightTuples: tuples{ 280 {1, 2}, 281 {2, nil}, 282 {3, nil}, 283 {4, 4}, 284 }, 285 286 leftEqCols: []uint32{0}, 287 rightEqCols: []uint32{0}, 288 leftOutCols: []uint32{1}, 289 rightOutCols: []uint32{1}, 290 291 leftEqColsAreKey: true, 292 rightEqColsAreKey: true, 293 294 expected: tuples{ 295 {nil, 2}, 296 {nil, nil}, 297 {1, nil}, 298 {2, 4}, 299 }, 300 }, 301 { 302 description: "8", 303 leftTypes: []*types.T{types.Int}, 304 rightTypes: []*types.T{types.Int}, 305 306 // Test null handling in hash join key column. 307 leftTuples: tuples{ 308 {1}, 309 {3}, 310 {nil}, 311 {2}, 312 }, 313 rightTuples: tuples{ 314 {2}, 315 {nil}, 316 {3}, 317 {nil}, 318 {1}, 319 }, 320 321 leftEqCols: []uint32{0}, 322 rightEqCols: []uint32{0}, 323 leftOutCols: []uint32{0}, 324 rightOutCols: []uint32{}, 325 326 leftEqColsAreKey: false, 327 rightEqColsAreKey: false, 328 329 expected: tuples{ 330 {2}, 331 {3}, 332 {1}, 333 }, 334 }, 335 { 336 // Test handling of multiple column non-distinct equality keys. 337 description: "9", 338 leftTypes: []*types.T{types.Int, types.Int, types.Int}, 339 rightTypes: []*types.T{types.Int, types.Int, types.Int}, 340 341 leftTuples: tuples{ 342 {0, 0, 1}, 343 {0, 0, 2}, 344 {1, 0, 3}, 345 {1, 1, 4}, 346 {1, 1, 5}, 347 {0, 0, 6}, 348 }, 349 rightTuples: tuples{ 350 {1, 0, 7}, 351 {0, 0, 8}, 352 {0, 0, 9}, 353 {0, 1, 10}, 354 }, 355 356 leftEqCols: []uint32{0, 1}, 357 rightEqCols: []uint32{0, 1}, 358 leftOutCols: []uint32{2}, 359 rightOutCols: []uint32{2}, 360 361 leftEqColsAreKey: false, 362 rightEqColsAreKey: false, 363 364 expected: tuples{ 365 {3, 7}, 366 {6, 8}, 367 {1, 8}, 368 {2, 8}, 369 {6, 9}, 370 {1, 9}, 371 {2, 9}, 372 }, 373 }, 374 { 375 // Test handling of duplicate equality keys that map to same buckets. 376 description: "10", 377 leftTypes: []*types.T{types.Int}, 378 rightTypes: []*types.T{types.Int}, 379 380 leftTuples: tuples{ 381 {0}, 382 {hashTableNumBuckets}, 383 {hashTableNumBuckets}, 384 {hashTableNumBuckets}, 385 {0}, 386 {hashTableNumBuckets * 2}, 387 {1}, 388 {1}, 389 {hashTableNumBuckets + 1}, 390 }, 391 rightTuples: tuples{ 392 {hashTableNumBuckets}, 393 {hashTableNumBuckets * 2}, 394 {hashTableNumBuckets * 3}, 395 {0}, 396 {1}, 397 {hashTableNumBuckets + 1}, 398 }, 399 400 leftEqCols: []uint32{0}, 401 rightEqCols: []uint32{0}, 402 leftOutCols: []uint32{0}, 403 rightOutCols: []uint32{0}, 404 // Note that although right equality columns are key, we want to test 405 // handling of collisions, so we "lie" here. 406 leftEqColsAreKey: false, 407 rightEqColsAreKey: false, 408 409 expected: tuples{ 410 {hashTableNumBuckets, hashTableNumBuckets}, 411 {hashTableNumBuckets, hashTableNumBuckets}, 412 {hashTableNumBuckets, hashTableNumBuckets}, 413 {hashTableNumBuckets * 2, hashTableNumBuckets * 2}, 414 {0, 0}, 415 {0, 0}, 416 {1, 1}, 417 {1, 1}, 418 {hashTableNumBuckets + 1, hashTableNumBuckets + 1}, 419 }, 420 }, 421 { 422 // Test handling of duplicate equality keys. 423 description: "11", 424 leftTypes: []*types.T{types.Int}, 425 rightTypes: []*types.T{types.Int}, 426 427 leftTuples: tuples{ 428 {0}, 429 {0}, 430 {1}, 431 {1}, 432 {1}, 433 {2}, 434 }, 435 rightTuples: tuples{ 436 {1}, 437 {0}, 438 {2}, 439 {2}, 440 }, 441 442 leftEqCols: []uint32{0}, 443 rightEqCols: []uint32{0}, 444 leftOutCols: []uint32{0}, 445 rightOutCols: []uint32{}, 446 447 leftEqColsAreKey: false, 448 rightEqColsAreKey: false, 449 450 expected: tuples{ 451 {1}, 452 {1}, 453 {1}, 454 {0}, 455 {0}, 456 {2}, 457 {2}, 458 }, 459 }, 460 { 461 // Test handling of various output column types. 462 description: "12", 463 leftTypes: []*types.T{types.Bool, types.Int, types.Bytes, types.Int}, 464 rightTypes: []*types.T{types.Int, types.Float, types.Int4}, 465 466 leftTuples: tuples{ 467 {false, 5, "a", 10}, 468 {true, 3, "b", 30}, 469 {false, 2, "foo", 20}, 470 {false, 6, "bar", 50}, 471 }, 472 rightTuples: tuples{ 473 {1, 1.1, int32(1)}, 474 {2, 2.2, int32(2)}, 475 {3, 3.3, int32(4)}, 476 {4, 4.4, int32(8)}, 477 {5, 5.5, int32(16)}, 478 }, 479 480 leftEqCols: []uint32{1}, 481 rightEqCols: []uint32{0}, 482 leftOutCols: []uint32{1, 2}, 483 rightOutCols: []uint32{0, 2}, 484 485 leftEqColsAreKey: true, 486 rightEqColsAreKey: true, 487 488 expected: tuples{ 489 {2, "foo", 2, int32(2)}, 490 {3, "b", 3, int32(4)}, 491 {5, "a", 5, int32(16)}, 492 }, 493 }, 494 { 495 description: "13", 496 leftTypes: []*types.T{types.Int}, 497 rightTypes: []*types.T{types.Int}, 498 499 // Reverse engineering hash table hash heuristic to find key values that 500 // hash to the same bucket. 501 leftTuples: tuples{ 502 {0}, 503 {hashTableNumBuckets}, 504 {hashTableNumBuckets * 2}, 505 {hashTableNumBuckets * 3}, 506 }, 507 rightTuples: tuples{ 508 {0}, 509 {hashTableNumBuckets}, 510 {hashTableNumBuckets * 3}, 511 }, 512 513 leftEqCols: []uint32{0}, 514 rightEqCols: []uint32{0}, 515 leftOutCols: []uint32{0}, 516 rightOutCols: []uint32{}, 517 518 leftEqColsAreKey: true, 519 rightEqColsAreKey: true, 520 521 expected: tuples{ 522 {0}, 523 {hashTableNumBuckets}, 524 {hashTableNumBuckets * 3}, 525 }, 526 }, 527 { 528 description: "14", 529 leftTypes: []*types.T{types.Int}, 530 rightTypes: []*types.T{types.Int}, 531 532 // Test a N:1 inner join where the right side key has duplicate values. 533 leftTuples: tuples{ 534 {0}, 535 {1}, 536 {2}, 537 {3}, 538 {4}, 539 }, 540 rightTuples: tuples{ 541 {1}, 542 {1}, 543 {1}, 544 {2}, 545 {2}, 546 }, 547 548 leftEqCols: []uint32{0}, 549 rightEqCols: []uint32{0}, 550 leftOutCols: []uint32{0}, 551 rightOutCols: []uint32{0}, 552 553 leftEqColsAreKey: true, 554 rightEqColsAreKey: false, 555 556 expected: tuples{ 557 {1, 1}, 558 {1, 1}, 559 {1, 1}, 560 {2, 2}, 561 {2, 2}, 562 }, 563 }, 564 { 565 description: "15", 566 leftTypes: []*types.T{types.Int, types.Int, types.Int}, 567 rightTypes: []*types.T{types.Int, types.Int, types.Int}, 568 569 // Test inner join on multiple equality columns. 570 leftTuples: tuples{ 571 {0, 0, 10}, 572 {0, 1, 20}, 573 {0, 2, 30}, 574 {1, 1, 40}, 575 {1, 2, 50}, 576 {2, 0, 60}, 577 {2, 1, 70}, 578 }, 579 rightTuples: tuples{ 580 {0, 100, 2}, 581 {1, 200, 1}, 582 {2, 300, 0}, 583 {2, 400, 1}, 584 }, 585 586 leftEqCols: []uint32{0, 1}, 587 rightEqCols: []uint32{0, 2}, 588 leftOutCols: []uint32{0, 1, 2}, 589 rightOutCols: []uint32{1}, 590 591 leftEqColsAreKey: true, 592 rightEqColsAreKey: true, 593 594 expected: tuples{ 595 {0, 2, 30, 100}, 596 {1, 1, 40, 200}, 597 {2, 0, 60, 300}, 598 {2, 1, 70, 400}, 599 }, 600 }, 601 { 602 description: "16", 603 leftTypes: []*types.T{types.Int, types.Int, types.Int}, 604 rightTypes: []*types.T{types.Int, types.Int}, 605 606 // Test multiple column with values that hash to the same bucket. 607 leftTuples: tuples{ 608 {10, 0, 0}, 609 {20, 0, hashTableNumBuckets}, 610 {40, hashTableNumBuckets, 0}, 611 {50, hashTableNumBuckets, hashTableNumBuckets}, 612 {60, hashTableNumBuckets * 2, 0}, 613 {70, hashTableNumBuckets * 2, hashTableNumBuckets}, 614 }, 615 rightTuples: tuples{ 616 {0, hashTableNumBuckets}, 617 {hashTableNumBuckets * 2, hashTableNumBuckets}, 618 {0, 0}, 619 {0, hashTableNumBuckets * 2}, 620 }, 621 622 leftEqCols: []uint32{1, 2}, 623 rightEqCols: []uint32{0, 1}, 624 leftOutCols: []uint32{0, 1, 2}, 625 rightOutCols: []uint32{}, 626 627 leftEqColsAreKey: true, 628 rightEqColsAreKey: true, 629 630 expected: tuples{ 631 {20, 0, hashTableNumBuckets}, 632 {70, hashTableNumBuckets * 2, hashTableNumBuckets}, 633 {10, 0, 0}, 634 }, 635 }, 636 { 637 description: "17", 638 leftTypes: []*types.T{types.Bytes, types.Bool, types.Int2, types.Int4, types.Int, types.Bytes}, 639 rightTypes: []*types.T{types.Int, types.Int4, types.Int2, types.Bool, types.Bytes}, 640 641 // Test multiple equality columns of different types. 642 leftTuples: tuples{ 643 {"foo", false, int16(100), int32(1000), int64(10000), "aaa"}, 644 {"foo", true, 100, 1000, 10000, "bbb"}, 645 {"foo1", false, 100, 1000, 10000, "ccc"}, 646 {"foo", false, 200, 1000, 10000, "ddd"}, 647 {"foo", false, 100, 2000, 10000, "eee"}, 648 {"bar", true, 300, 3000, 30000, "fff"}, 649 }, 650 rightTuples: tuples{ 651 {int64(10000), int32(1000), int16(100), false, "foo1"}, 652 {10000, 1000, 100, false, "foo"}, 653 {30000, 3000, 300, true, "bar"}, 654 {10000, 1000, 200, false, "foo"}, 655 {30000, 3000, 300, false, "bar"}, 656 {10000, 1000, 100, false, "random"}, 657 }, 658 659 leftEqCols: []uint32{0, 1, 2, 3, 4}, 660 rightEqCols: []uint32{4, 3, 2, 1, 0}, 661 leftOutCols: []uint32{5}, 662 rightOutCols: []uint32{}, 663 664 leftEqColsAreKey: true, 665 rightEqColsAreKey: true, 666 667 expected: tuples{ 668 {"ccc"}, 669 {"aaa"}, 670 {"fff"}, 671 {"ddd"}, 672 }, 673 }, 674 { 675 description: "18", 676 leftTypes: []*types.T{types.Float}, 677 rightTypes: []*types.T{types.Float}, 678 679 // Test equality columns of type float. 680 leftTuples: tuples{ 681 {33.333}, 682 {44.4444}, 683 {55.55555}, 684 {44.4444}, 685 }, 686 rightTuples: tuples{ 687 {44.4444}, 688 {55.55555}, 689 {33.333}, 690 }, 691 692 leftEqCols: []uint32{0}, 693 rightEqCols: []uint32{0}, 694 leftOutCols: []uint32{0}, 695 rightOutCols: []uint32{}, 696 697 leftEqColsAreKey: true, 698 rightEqColsAreKey: true, 699 700 expected: tuples{ 701 {55.55555}, 702 {44.4444}, 703 {44.4444}, 704 {33.333}, 705 }, 706 }, 707 { 708 description: "19", 709 leftTypes: []*types.T{types.Int, types.Int, types.Int, types.Int}, 710 rightTypes: []*types.T{types.Int, types.Int, types.Int, types.Int}, 711 712 // Test use right side as build table. 713 leftTuples: tuples{ 714 {2, 4, 8, 16}, 715 {3, 3, 2, 2}, 716 {3, 7, 2, 1}, 717 {5, 4, 3, 2}, 718 }, 719 rightTuples: tuples{ 720 {1, 3, 5, 7}, 721 {1, 1, 1, 1}, 722 {1, 2, 3, 4}, 723 }, 724 725 leftEqCols: []uint32{2, 0}, 726 rightEqCols: []uint32{1, 2}, 727 leftOutCols: []uint32{0, 1, 2, 3}, 728 rightOutCols: []uint32{0, 1, 2, 3}, 729 730 leftEqColsAreKey: true, 731 rightEqColsAreKey: true, 732 733 expected: tuples{ 734 {3, 3, 2, 2, 1, 2, 3, 4}, 735 {3, 7, 2, 1, 1, 2, 3, 4}, 736 {5, 4, 3, 2, 1, 3, 5, 7}, 737 }, 738 }, 739 { 740 description: "20", 741 leftTypes: []*types.T{types.Decimal}, 742 rightTypes: []*types.T{types.Decimal}, 743 744 // Test types.Decimal type as equality column. 745 leftTuples: tuples{ 746 {decs[0]}, 747 {decs[1]}, 748 {decs[2]}, 749 }, 750 rightTuples: tuples{ 751 {decs[2]}, 752 {decs[3]}, 753 {decs[0]}, 754 }, 755 756 leftEqCols: []uint32{0}, 757 rightEqCols: []uint32{0}, 758 leftOutCols: []uint32{}, 759 rightOutCols: []uint32{0}, 760 761 leftEqColsAreKey: true, 762 rightEqColsAreKey: true, 763 764 expected: tuples{ 765 {decs[2]}, 766 {decs[0]}, 767 }, 768 }, 769 { 770 description: "21", 771 leftTypes: []*types.T{types.Int}, 772 rightTypes: []*types.T{types.Int}, 773 774 joinType: sqlbase.LeftSemiJoin, 775 776 leftTuples: tuples{ 777 {0}, 778 {0}, 779 {1}, 780 {2}, 781 }, 782 rightTuples: tuples{ 783 {0}, 784 {0}, 785 {1}, 786 }, 787 788 leftEqCols: []uint32{0}, 789 rightEqCols: []uint32{0}, 790 leftOutCols: []uint32{0}, 791 rightOutCols: []uint32{}, 792 793 leftEqColsAreKey: false, 794 rightEqColsAreKey: false, 795 796 expected: tuples{ 797 {0}, 798 {0}, 799 {1}, 800 }, 801 }, 802 { 803 description: "22", 804 leftTypes: []*types.T{types.Int}, 805 rightTypes: []*types.T{types.Int}, 806 807 joinType: sqlbase.LeftAntiJoin, 808 809 leftTuples: tuples{ 810 {0}, 811 {0}, 812 {1}, 813 {2}, 814 }, 815 rightTuples: tuples{ 816 {0}, 817 {0}, 818 {1}, 819 }, 820 821 leftEqCols: []uint32{0}, 822 rightEqCols: []uint32{0}, 823 leftOutCols: []uint32{0}, 824 rightOutCols: []uint32{}, 825 826 leftEqColsAreKey: false, 827 rightEqColsAreKey: false, 828 829 expected: tuples{ 830 {2}, 831 }, 832 }, 833 { 834 description: "23", 835 leftTypes: []*types.T{types.Int, types.Int}, 836 rightTypes: []*types.T{types.Int, types.Int}, 837 838 // Test ON expression. 839 leftTuples: tuples{ 840 {1, nil}, 841 {2, nil}, 842 {3, 1}, 843 {4, 2}, 844 }, 845 rightTuples: tuples{ 846 {1, 2}, 847 {2, nil}, 848 {3, nil}, 849 {4, 4}, 850 }, 851 852 leftEqCols: []uint32{0}, 853 rightEqCols: []uint32{0}, 854 leftOutCols: []uint32{1}, 855 rightOutCols: []uint32{1}, 856 857 leftEqColsAreKey: true, 858 rightEqColsAreKey: true, 859 860 onExpr: execinfrapb.Expression{Expr: "@1 + @3 > 2 AND @1 + @3 < 8"}, 861 expected: tuples{ 862 {nil, nil}, 863 {1, nil}, 864 }, 865 }, 866 { 867 description: "24", 868 leftTypes: []*types.T{types.Int, types.Int}, 869 rightTypes: []*types.T{types.Int, types.Int}, 870 871 // Test ON expression. 872 leftTuples: tuples{ 873 {1, nil}, 874 {2, nil}, 875 {3, 1}, 876 {4, 2}, 877 }, 878 rightTuples: tuples{ 879 {1, 2}, 880 {2, nil}, 881 {3, nil}, 882 {4, 4}, 883 }, 884 885 leftEqCols: []uint32{0}, 886 rightEqCols: []uint32{0}, 887 leftOutCols: []uint32{1}, 888 rightOutCols: []uint32{1}, 889 890 leftEqColsAreKey: true, 891 rightEqColsAreKey: true, 892 893 onExpr: execinfrapb.Expression{Expr: "@1 + @3 + @4 < 100"}, 894 expected: tuples{ 895 {nil, 2}, 896 {2, 4}, 897 }, 898 }, 899 { 900 description: "25", 901 joinType: sqlbase.IntersectAllJoin, 902 leftTypes: []*types.T{types.Int}, 903 rightTypes: []*types.T{types.Int}, 904 leftTuples: tuples{{1}, {1}, {2}, {2}, {2}, {3}, {3}}, 905 rightTuples: tuples{{1}, {2}, {3}, {3}, {3}}, 906 leftEqCols: []uint32{0}, 907 rightEqCols: []uint32{0}, 908 leftOutCols: []uint32{0}, 909 expected: tuples{{1}, {2}, {3}, {3}}, 910 }, 911 { 912 description: "26", 913 joinType: sqlbase.ExceptAllJoin, 914 leftTypes: []*types.T{types.Int}, 915 rightTypes: []*types.T{types.Int}, 916 leftTuples: tuples{{1}, {1}, {2}, {2}, {2}, {3}, {3}}, 917 rightTuples: tuples{{1}, {2}, {3}, {3}, {3}}, 918 leftEqCols: []uint32{0}, 919 rightEqCols: []uint32{0}, 920 leftOutCols: []uint32{0}, 921 expected: tuples{{1}, {2}, {2}}, 922 }, 923 } 924 } 925 926 // createSpecForHashJoiner creates a hash join processor spec based on a test 927 // case. 928 func createSpecForHashJoiner(tc *joinTestCase) *execinfrapb.ProcessorSpec { 929 hjSpec := &execinfrapb.HashJoinerSpec{ 930 LeftEqColumns: tc.leftEqCols, 931 RightEqColumns: tc.rightEqCols, 932 LeftEqColumnsAreKey: tc.leftEqColsAreKey, 933 RightEqColumnsAreKey: tc.rightEqColsAreKey, 934 OnExpr: tc.onExpr, 935 Type: tc.joinType, 936 } 937 projection := make([]uint32, 0, len(tc.leftOutCols)+len(tc.rightOutCols)) 938 projection = append(projection, tc.leftOutCols...) 939 rColOffset := uint32(len(tc.leftTypes)) 940 for _, outCol := range tc.rightOutCols { 941 projection = append(projection, rColOffset+outCol) 942 } 943 return &execinfrapb.ProcessorSpec{ 944 Input: []execinfrapb.InputSyncSpec{ 945 {ColumnTypes: tc.leftTypes}, 946 {ColumnTypes: tc.rightTypes}, 947 }, 948 Core: execinfrapb.ProcessorCoreUnion{ 949 HashJoiner: hjSpec, 950 }, 951 Post: execinfrapb.PostProcessSpec{ 952 Projection: true, 953 OutputColumns: projection, 954 }, 955 } 956 } 957 958 // runHashJoinTestCase is a helper function that runs a single test case 959 // against a hash join operator (either in-memory or disk-backed one) which is 960 // created by the provided constructor. 961 func runHashJoinTestCase( 962 t *testing.T, 963 tc *joinTestCase, 964 hjOpConstructor func(sources []colexecbase.Operator) (colexecbase.Operator, error), 965 ) { 966 tc.init() 967 inputs := []tuples{tc.leftTuples, tc.rightTuples} 968 typs := [][]*types.T{tc.leftTypes, tc.rightTypes} 969 var runner testRunner 970 if tc.skipAllNullsInjection { 971 // We're omitting all nulls injection test. See comments for each such 972 // test case. 973 runner = runTestsWithoutAllNullsInjection 974 } else { 975 runner = runTestsWithTyps 976 } 977 t.Run(tc.description, func(t *testing.T) { 978 runner(t, inputs, typs, tc.expected, unorderedVerifier, hjOpConstructor) 979 }) 980 } 981 982 func TestHashJoiner(t *testing.T) { 983 defer leaktest.AfterTest(t)() 984 985 ctx := context.Background() 986 st := cluster.MakeTestingClusterSettings() 987 evalCtx := tree.MakeTestingEvalContext(st) 988 defer evalCtx.Stop(ctx) 989 flowCtx := &execinfra.FlowCtx{ 990 EvalCtx: &evalCtx, 991 Cfg: &execinfra.ServerConfig{Settings: st}, 992 } 993 994 for _, outputBatchSize := range []int{1, 17, coldata.BatchSize()} { 995 if outputBatchSize > coldata.BatchSize() { 996 // It is possible for varied coldata.BatchSize() to be smaller than 997 // requested outputBatchSize. Such configuration is invalid, and we skip 998 // it. 999 continue 1000 } 1001 for _, tcs := range [][]*joinTestCase{hjTestCases, mjTestCases} { 1002 for _, tc := range tcs { 1003 for _, tc := range tc.mutateTypes() { 1004 runHashJoinTestCase(t, tc, func(sources []colexecbase.Operator) (colexecbase.Operator, error) { 1005 spec := createSpecForHashJoiner(tc) 1006 args := NewColOperatorArgs{ 1007 Spec: spec, 1008 Inputs: sources, 1009 StreamingMemAccount: testMemAcc, 1010 } 1011 args.TestingKnobs.UseStreamingMemAccountForBuffering = true 1012 args.TestingKnobs.DiskSpillingDisabled = true 1013 result, err := NewColOperator(ctx, flowCtx, args) 1014 if err != nil { 1015 return nil, err 1016 } 1017 if hj, ok := result.Op.(*hashJoiner); ok { 1018 hj.outputBatchSize = outputBatchSize 1019 } 1020 return result.Op, nil 1021 }) 1022 } 1023 } 1024 } 1025 } 1026 } 1027 1028 func BenchmarkHashJoiner(b *testing.B) { 1029 ctx := context.Background() 1030 nCols := 4 1031 sourceTypes := make([]*types.T, nCols) 1032 1033 for colIdx := 0; colIdx < nCols; colIdx++ { 1034 sourceTypes[colIdx] = types.Int 1035 } 1036 1037 batch := testAllocator.NewMemBatch(sourceTypes) 1038 1039 for colIdx := 0; colIdx < nCols; colIdx++ { 1040 col := batch.ColVec(colIdx).Int64() 1041 for i := 0; i < coldata.BatchSize(); i++ { 1042 col[i] = int64(i) 1043 } 1044 } 1045 1046 batch.SetLength(coldata.BatchSize()) 1047 1048 for _, hasNulls := range []bool{false, true} { 1049 b.Run(fmt.Sprintf("nulls=%v", hasNulls), func(b *testing.B) { 1050 1051 if hasNulls { 1052 for colIdx := 0; colIdx < nCols; colIdx++ { 1053 vec := batch.ColVec(colIdx) 1054 vec.Nulls().SetNull(0) 1055 } 1056 } else { 1057 for colIdx := 0; colIdx < nCols; colIdx++ { 1058 vec := batch.ColVec(colIdx) 1059 vec.Nulls().UnsetNulls() 1060 } 1061 } 1062 1063 for _, fullOuter := range []bool{false, true} { 1064 b.Run(fmt.Sprintf("fullOuter=%v", fullOuter), func(b *testing.B) { 1065 for _, rightDistinct := range []bool{true, false} { 1066 b.Run(fmt.Sprintf("distinct=%v", rightDistinct), func(b *testing.B) { 1067 for _, nBatches := range []int{1 << 1, 1 << 8, 1 << 12} { 1068 b.Run(fmt.Sprintf("rows=%d", nBatches*coldata.BatchSize()), func(b *testing.B) { 1069 // 8 (bytes / int64) * nBatches (number of batches) * col.BatchSize() (rows / 1070 // batch) * nCols (number of columns / row) * 2 (number of sources). 1071 b.SetBytes(int64(8 * nBatches * coldata.BatchSize() * nCols * 2)) 1072 b.ResetTimer() 1073 for i := 0; i < b.N; i++ { 1074 leftSource := colexecbase.NewRepeatableBatchSource(testAllocator, batch, sourceTypes) 1075 rightSource := newFiniteBatchSource(batch, sourceTypes, nBatches) 1076 joinType := sqlbase.InnerJoin 1077 if fullOuter { 1078 joinType = sqlbase.FullOuterJoin 1079 } 1080 hjSpec, err := makeHashJoinerSpec( 1081 joinType, 1082 []uint32{0, 1}, []uint32{2, 3}, 1083 sourceTypes, sourceTypes, 1084 rightDistinct, 1085 ) 1086 require.NoError(b, err) 1087 hj := newHashJoiner( 1088 testAllocator, hjSpec, 1089 leftSource, rightSource, 1090 ) 1091 hj.Init() 1092 1093 for i := 0; i < nBatches; i++ { 1094 // Technically, the non-distinct hash join will produce much more 1095 // than nBatches of output. 1096 hj.Next(ctx) 1097 } 1098 } 1099 }) 1100 } 1101 }) 1102 } 1103 }) 1104 } 1105 }) 1106 } 1107 } 1108 1109 // TestHashingDoesNotAllocate ensures that our use of the noescape hack to make 1110 // sure hashing with unsafe.Pointer doesn't allocate still works correctly. 1111 func TestHashingDoesNotAllocate(t *testing.T) { 1112 defer leaktest.AfterTest(t)() 1113 1114 var sum uintptr 1115 foundAllocations := 0 1116 for i := 0; i < 10; i++ { 1117 // Sometimes, Go allocates somewhere else. To make this test not flaky, 1118 // let's just make sure that at least one of the rounds of this loop doesn't 1119 // allocate at all. 1120 s := &runtime.MemStats{} 1121 runtime.ReadMemStats(s) 1122 numAlloc := s.TotalAlloc 1123 i := 10 1124 x := memhash64(noescape(unsafe.Pointer(&i)), 0) 1125 runtime.ReadMemStats(s) 1126 1127 if numAlloc != s.TotalAlloc { 1128 foundAllocations++ 1129 } 1130 sum += x 1131 } 1132 if foundAllocations == 10 { 1133 // Uhoh, we allocated every single time. This probably means we regressed, 1134 // and our hash function allocates. 1135 t.Fatalf("memhash64(noescape(&i)) allocated at least once") 1136 } 1137 t.Log(sum) 1138 } 1139 1140 // TestHashJoinerProjection tests that planning of hash joiner correctly 1141 // handles the "post-joiner" projection. The test uses different types with a 1142 // projection in which output columns from both sides are intertwined so that 1143 // if the projection is not handled correctly, the interface conversion panic 1144 // would occur. 1145 func TestHashJoinerProjection(t *testing.T) { 1146 defer leaktest.AfterTest(t)() 1147 1148 ctx := context.Background() 1149 st := cluster.MakeTestingClusterSettings() 1150 evalCtx := tree.MakeTestingEvalContext(st) 1151 defer evalCtx.Stop(ctx) 1152 flowCtx := &execinfra.FlowCtx{ 1153 EvalCtx: &evalCtx, 1154 Cfg: &execinfra.ServerConfig{ 1155 Settings: st, 1156 }, 1157 } 1158 1159 leftTypes := []*types.T{types.Bool, types.Int, types.Bytes} 1160 rightTypes := []*types.T{types.Int, types.Float, types.Decimal} 1161 leftTuples := tuples{{false, 1, "foo"}} 1162 rightTuples := tuples{{1, 1.1, decs[1]}} 1163 1164 spec := &execinfrapb.ProcessorSpec{ 1165 Core: execinfrapb.ProcessorCoreUnion{ 1166 HashJoiner: &execinfrapb.HashJoinerSpec{ 1167 LeftEqColumns: []uint32{1}, 1168 RightEqColumns: []uint32{0}, 1169 LeftEqColumnsAreKey: true, 1170 RightEqColumnsAreKey: true, 1171 }, 1172 }, 1173 Input: []execinfrapb.InputSyncSpec{ 1174 {ColumnTypes: leftTypes}, 1175 {ColumnTypes: rightTypes}, 1176 }, 1177 Post: execinfrapb.PostProcessSpec{ 1178 Projection: true, 1179 // The "core" of the test - we ask for a projection in which the columns 1180 // from the left and from the right are intertwined. 1181 OutputColumns: []uint32{3, 1, 0, 5, 4, 2}, 1182 }, 1183 } 1184 1185 leftSource := newOpTestInput(1, leftTuples, leftTypes) 1186 rightSource := newOpTestInput(1, rightTuples, rightTypes) 1187 args := NewColOperatorArgs{ 1188 Spec: spec, 1189 Inputs: []colexecbase.Operator{leftSource, rightSource}, 1190 StreamingMemAccount: testMemAcc, 1191 } 1192 args.TestingKnobs.UseStreamingMemAccountForBuffering = true 1193 args.TestingKnobs.DiskSpillingDisabled = true 1194 hjOp, err := NewColOperator(ctx, flowCtx, args) 1195 require.NoError(t, err) 1196 hjOp.Op.Init() 1197 for b := hjOp.Op.Next(ctx); b.Length() > 0; b = hjOp.Op.Next(ctx) { 1198 // The output types should be {Int64, Int64, Bool, Decimal, Float64, Bytes} 1199 // and we check this explicitly. 1200 b.ColVec(0).Int64() 1201 b.ColVec(1).Int64() 1202 b.ColVec(2).Bool() 1203 b.ColVec(3).Decimal() 1204 b.ColVec(4).Float64() 1205 b.ColVec(5).Bytes() 1206 } 1207 }