github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/logictest/testdata/logic_test/interleaved_join (about) 1 # LogicTest: !3node-tenant 2 # The following tables form the interleaved hierarchy: 3 # name: primary key: # rows: 'a' = id mod X : 4 # parent1 (pid1) 40 8 5 # child1 (pid1, cid1) 150 66 6 # grandchild1 (pid1, cid1, gcid1) 410 201 7 # child2 (pid1, cid2, cid3) 15 7 8 # grandchild2 (pid1, cid2, cid3, gcid2) 51 13 9 # parent2 (pid2) 5 2 10 # Additional rows in child1, child2, and grandchild1 with no corresponding 11 # parent row are also inserted. 12 # 13 # All IDs belonging to a table (pid1 --> parent1, cid1 --> child1, cid2,cid3 14 # --> child2, etc.) start from 1 up to (# rows). 15 # Foreign keys are modded by their ancestor's (# rows). For example, for child1 16 # row with cid1=500, we take ((cid1-1) % 200 + 1) = 100 as pid1. 17 # One exception is cid3, which is taken as cid2 % 15. 18 # There's a column 'a' that's modded by a factor. 19 # 20 # This allows us to test the following edge cases (in order of tests): 21 # - one-to-many (parent1 - child1) 22 # - one-to-one and one-to-none (parent1 - child2) 23 # - parent-grandchild (parent1 - grandchild1) 24 # - multiple interleaved columns (child2 - grandchild2) 25 # - additional ancestor above (child2 - grandchild2) 26 # - no interleaved relationship (parent1 - parent2, parent2 - child1) 27 # - TODO(richardwu): sibling-sibling (child1 - child2) 28 29 ################# 30 # Create tables # 31 ################# 32 33 # TODO(solon): Remove the FAMILY declarations when #44699 is resolved. 34 statement ok 35 CREATE TABLE parent1 (pid1 INT PRIMARY KEY, pa1 INT, FAMILY (pid1), FAMILY (pa1)) 36 37 statement ok 38 CREATE TABLE parent2 (pid2 INT PRIMARY KEY, pa2 INT) 39 40 statement ok 41 CREATE TABLE child1 ( 42 pid1 INT, 43 cid1 INT, 44 ca1 INT, 45 PRIMARY KEY(pid1, cid1) 46 ) 47 INTERLEAVE IN PARENT parent1 (pid1) 48 49 statement ok 50 CREATE TABLE child2 ( 51 pid1 INT, 52 cid2 INT, 53 cid3 INT, 54 ca2 INT, 55 PRIMARY KEY(pid1, cid2, cid3) 56 ) 57 INTERLEAVE IN PARENT parent1 (pid1) 58 59 statement ok 60 CREATE TABLE grandchild1 ( 61 pid1 INT, 62 cid1 INT, 63 gcid1 INT, 64 gca1 INT, 65 PRIMARY KEY(pid1, cid1, gcid1) 66 ) 67 INTERLEAVE IN PARENT child1 (pid1, cid1) 68 69 # No foreign key since we are permitting the rows to overflow out of child2 70 # for pid1 > 15. 71 statement ok 72 CREATE TABLE grandchild2 ( 73 pid1 INT, 74 cid2 INT, 75 cid3 INT, 76 gcid2 INT, 77 gca2 INT, 78 PRIMARY KEY(pid1, cid2, cid3, gcid2) 79 ) 80 INTERLEAVE IN PARENT child2 (pid1, cid2, cid3) 81 82 #################### 83 # Insert some rows # 84 #################### 85 86 statement ok 87 INSERT INTO parent1 SELECT 88 pid, 89 mod(pid, 8) 90 FROM 91 generate_series(1, 40) AS ID(pid) 92 93 statement ok 94 INSERT INTO parent2 SELECT 95 pid, 96 mod(pid, 2) 97 FROM 98 generate_series(1, 5) AS ID(pid) 99 100 # child1 has more rows than parent1. 101 statement ok 102 INSERT INTO child1 SELECT 103 mod(cid-1, 40) + 1, 104 cid, 105 mod(cid, 66) 106 FROM 107 generate_series(1, 150) AS ID(cid) 108 109 # Insert additional rows with no correspond parent rows to check for correctness. 110 statement ok 111 INSERT INTO child1 VALUES 112 (-1, -1, -1), 113 (0, 0, 0), 114 (41, 41, 41), 115 (151, 151, 19), 116 (160, 160, 28) 117 118 # child2 has fewer rows than parent1. 119 statement ok 120 INSERT INTO child2 SELECT 121 mod(cid-1, 40) + 1, 122 cid, 123 mod(cid, 14), 124 mod(cid, 7) 125 FROM 126 generate_series(1, 15) AS ID(cid) 127 128 statement ok 129 INSERT INTO child2 VALUES 130 (-1, -1, -1, -1), 131 (0, 0, 0, 0), 132 (16, 16, 2, 2), 133 (20, 20, 6, 6) 134 135 statement ok 136 INSERT INTO grandchild1 SELECT 137 mod(mod(gcid-1, 150), 40) + 1, 138 mod(gcid-1, 150) + 1, 139 gcid, 140 mod(gcid, 201) 141 FROM 142 generate_series(1, 410) AS ID(gcid) 143 144 statement ok 145 INSERT INTO grandchild1 VALUES 146 (-1, -1, -1, -1), 147 (0, 0, 0, 0), 148 (200, 200, 200, 200), 149 (411, 411, 411, 9) 150 151 152 # We let grandchild2.pid1 exceed child2.pid1 (one of the interleaved keys). 153 # So instead of 154 # (gcid1 - 1) % 15 % 40 + 1 155 # we choose to only mod by 40 (nrows of parent1) instead of first modding 156 # by 15 (nrows of child2). 157 statement ok 158 INSERT INTO grandchild2 SELECT 159 mod(gcid-1, 40) + 1, 160 mod(gcid-1, 15) + 1, 161 mod(mod(gcid-1, 15) + 1, 14), 162 gcid, 163 mod(gcid, 13) 164 FROM 165 generate_series(1, 51) AS ID(gcid) 166 167 ##################### 168 # Interleaved joins # 169 ##################### 170 171 # Select over two ranges for parent/child with split at children key. 172 # Returns: 173 # pid1 pa1 cid1 ca1 174 # (pid1 % 8) (cid1 % 66) 175 query IIII rowsort,colnames 176 SELECT * FROM parent1 JOIN child1 USING(pid1) WHERE pid1 >= 3 AND pid1 <= 5 177 ---- 178 pid1 pa1 cid1 ca1 179 3 3 3 3 180 3 3 43 43 181 3 3 83 17 182 3 3 123 57 183 4 4 4 4 184 4 4 44 44 185 4 4 84 18 186 4 4 124 58 187 5 5 5 5 188 5 5 45 45 189 5 5 85 19 190 5 5 125 59 191 192 # Swap parent1 and child1 tables. 193 194 query IIII rowsort,colnames 195 SELECT * FROM child1 JOIN parent1 USING(pid1) WHERE pid1 >= 3 AND pid1 <= 5 196 ---- 197 pid1 cid1 ca1 pa1 198 3 3 3 3 199 3 43 43 3 200 3 83 17 3 201 3 123 57 3 202 4 4 4 4 203 4 44 44 4 204 4 84 18 4 205 4 124 58 4 206 5 5 5 5 207 5 45 45 5 208 5 85 19 5 209 5 125 59 5 210 211 # Select over two ranges for parent/child with split at grandchild key. 212 # Also, rows with pid1 <= 30 should have 4 rows whereas pid1 > 30 should 213 # have 3 rows. 214 # Returns: 215 # parent1.pid1 pa1 child1.pid1 cid1 ca1 216 # (pid1 % 8) (cid1 % 66) 217 query IIIII colnames 218 SELECT * FROM parent1 JOIN child1 ON parent1.pid1 = child1.pid1 WHERE parent1.pid1 >= 29 AND parent1.pid1 <= 31 ORDER BY parent1.pid1 219 ---- 220 pid1 pa1 pid1 cid1 ca1 221 29 5 29 29 29 222 29 5 29 69 3 223 29 5 29 109 43 224 29 5 29 149 17 225 30 6 30 30 30 226 30 6 30 70 4 227 30 6 30 110 44 228 30 6 30 150 18 229 31 7 31 31 31 230 31 7 31 71 5 231 31 7 31 111 45 232 233 # parent-child where pid1 <= 15 have one joined row and pid1 > 15 have no 234 # joined rows (since child2 only has 15 rows up to pid1 = 15). 235 # Returns: 236 # pid1 pa1 cid2 cid3 ca2 237 # (pid1 % 8) (cid2 % 14) (cid2 % 7) 238 query IIIII rowsort,colnames 239 SELECT * FROM parent1 JOIN child2 USING(pid1) WHERE pid1 >= 12 240 ---- 241 pid1 pa1 cid2 cid3 ca2 242 12 4 12 12 5 243 13 5 13 13 6 244 14 6 14 0 0 245 15 7 15 1 1 246 16 0 16 2 2 247 20 4 20 6 6 248 249 # Single gateway node query (node 1). 250 # Returns: 251 # pid1 pa1 cid2 cid3 ca2 252 # (pid1 % 8) (cid2 % 14) (cid2 % 7) 253 # Note pid=21, 31 has no joined rows since child2 only has pid1 <= 15. 254 query IIIII rowsort,colnames 255 SELECT * FROM parent1 JOIN child2 USING(pid1) WHERE pid1 IN (1, 11, 21, 31) 256 ---- 257 pid1 pa1 cid2 cid3 ca2 258 1 1 1 1 1 259 11 3 11 11 4 260 261 # Parent-grandchild. 262 # Returns: 263 # pid1 pa2 cid2 cid3 gcid2 gca2 264 # (pid1 % 8) (cid2 % 14) (gcid2 % 13) 265 # Where clause is on ranges that overlap children, grandchildren, and parent 266 # key splits, respectively. 267 # Rows with pid >= 11 have only one joined row since there are only 51 268 # granchild2 rows. 269 query IIIIII rowsort,colnames 270 SELECT * FROM parent1 JOIN grandchild2 USING(pid1) WHERE 271 pid1 >= 11 AND pid1 <= 13 272 OR pid1 >= 19 AND pid1 <= 21 273 OR pid1 >= 31 AND pid1 <= 33 274 ---- 275 pid1 pa1 cid2 cid3 gcid2 gca2 276 11 3 6 6 51 12 277 11 3 11 11 11 11 278 12 4 12 12 12 12 279 13 5 13 13 13 0 280 19 3 4 4 19 6 281 20 4 5 5 20 7 282 21 5 6 6 21 8 283 31 7 1 1 31 5 284 32 0 2 2 32 6 285 33 1 3 3 33 7 286 287 # Swap parent1 and grandchild2 positions. 288 query IIIIII rowsort,colnames 289 SELECT * FROM grandchild2 JOIN parent1 USING(pid1) WHERE 290 pid1 >= 11 AND pid1 <= 13 291 OR pid1 >= 19 AND pid1 <= 21 292 OR pid1 >= 31 AND pid1 <= 33 293 ---- 294 pid1 cid2 cid3 gcid2 gca2 pa1 295 11 6 6 51 12 3 296 11 11 11 11 11 3 297 12 12 12 12 12 4 298 13 13 13 13 0 5 299 19 4 4 19 6 3 300 20 5 5 20 7 4 301 21 6 6 21 8 5 302 31 1 1 31 5 7 303 32 2 2 32 6 0 304 33 3 3 33 7 1 305 306 # Join on multiple interleaved columns with an overarching ancestor (parent1). 307 # Returns: 308 # child2.pid1 gc2.pid1 child2.cid2 gc2.cid2 child2.cid3 gc2.cid3 child2.ca2 gcid2 gca2 309 # (gcid2 % 40) (cid2 % 14) (gcid2 % 13) 310 query IIIIIIIII colnames,rowsort 311 SELECT * FROM child2 JOIN grandchild2 ON 312 child2.pid1=grandchild2.pid1 313 AND child2.cid2=grandchild2.cid2 314 AND child2.cid3=grandchild2.cid3 315 WHERE 316 child2.pid1 >= 5 AND child2.pid1 <= 7 317 OR child2.cid2 >= 12 AND child2.cid2 <= 14 318 OR gcid2 >= 49 AND gcid2 <= 51 319 ---- 320 pid1 cid2 cid3 ca2 pid1 cid2 cid3 gcid2 gca2 321 5 5 5 5 5 5 5 5 5 322 6 6 6 6 6 6 6 6 6 323 7 7 7 0 7 7 7 7 7 324 12 12 12 5 12 12 12 12 12 325 13 13 13 6 13 13 13 13 0 326 14 14 0 0 14 14 0 14 1 327 328 # Aggregation over parent and child keys. 329 # There are 4 rows for each 10 <= pid1 <= 30 and 3 rows for each 30 < pid1 <= 330 # 39. 331 # We thus have 3 arithmetic series of 10 + ... + 39 and 1 arithmetic series 332 # of 10 + ... + 30 or 333 # sum(pid1) = 3 * (39 - 10 + 1) * (10 + 39)/2 + (30 - 10 + 1) * (10 + 30)/2 = 2625 334 # For sum(cid1), we notice that pid1 = cid1 % 40, thus for every additional 335 # round of rows under a pid1, cid1 is increased by 40. 336 # For each additional round up to the 3rd (2 rounds after the first where 50 <= cid1 <= 79, 337 # 90 <= cid1 <= 119) , we have an additional 338 # 40 * (1 + 2) * (39 - 10 + 1) = 3600 339 # For the 4th round, we have 150 - 130 + 1 = 21 rows (130 <= cid1 <= 150) each 340 # additional row adds 120, thus 341 # sum(cid1) = sum(pid1) + 3600 + 21 * 120 = 8745 342 # For each 343 query RR 344 SELECT sum(parent1.pid1), sum(child1.cid1) FROM parent1 JOIN child1 USING(pid1) WHERE 345 pid1 >= 10 AND pid1 <= 39 346 ---- 347 2625 8745 348 349 ############### 350 # Outer joins # 351 ############### 352 353 # The schema/values for each table are as follows: 354 # Table: pkey: pkey values (same): values: 355 # outer_p1 (pid1) {1, 2, 3, ... 20} 100 + pkey 356 # outer_c1 (pid1, cid1, cid2) {2, 4, 6, ... 28} 200 + pkey 357 # outer_gc1 (pid1, cid1, cid2, gcid1) {4, 8, 12, ... 36} 300 + pkey 358 359 # Split between 4 nodes based on pkey value (p): 360 # node 1: p - 1 mod 20 ∈ [1...5) 361 # node 2: p - 1 mod 20 ∈ [5...10) 362 # node 3: p - 1 mod 20 ∈ [10...15) 363 # node 4: p - 1 mod 20 ∈ [15...20) 364 365 statement ok 366 CREATE TABLE outer_p1 ( 367 pid1 INT PRIMARY KEY, 368 pa1 INT 369 ) 370 371 statement ok 372 CREATE TABLE outer_c1 ( 373 pid1 INT, 374 cid1 INT, 375 cid2 INT, 376 ca1 INT, 377 PRIMARY KEY (pid1, cid1, cid2) 378 ) INTERLEAVE IN PARENT outer_p1 (pid1) 379 380 statement ok 381 CREATE TABLE outer_gc1 ( 382 pid1 INT, 383 cid1 INT, 384 cid2 INT, 385 gcid1 INT, 386 gca1 INT, 387 PRIMARY KEY (pid1, cid1, cid2, gcid1) 388 ) INTERLEAVE IN PARENT outer_c1 (pid1, cid1, cid2) 389 390 statement ok 391 INSERT INTO outer_p1 392 SELECT i, i+100 FROM generate_series(1, 20) AS g(i) 393 394 statement ok 395 INSERT INTO outer_c1 396 SELECT i, i, i, i+200 FROM generate_series(-2, 28, 2) AS g(i) 397 398 statement ok 399 INSERT INTO outer_gc1 400 SELECT i, i, i, i, i+300 FROM generate_series(-4, 36, 4) AS g(i) 401 402 ### Begin OUTER queries 403 404 query IIIII rowsort,colnames 405 SELECT * FROM outer_p1 FULL OUTER JOIN outer_c1 USING (pid1) 406 ---- 407 pid1 pa1 cid1 cid2 ca1 408 -2 NULL -2 -2 198 409 0 NULL 0 0 200 410 1 101 NULL NULL NULL 411 2 102 2 2 202 412 3 103 NULL NULL NULL 413 4 104 4 4 204 414 5 105 NULL NULL NULL 415 6 106 6 6 206 416 7 107 NULL NULL NULL 417 8 108 8 8 208 418 9 109 NULL NULL NULL 419 10 110 10 10 210 420 11 111 NULL NULL NULL 421 12 112 12 12 212 422 13 113 NULL NULL NULL 423 14 114 14 14 214 424 15 115 NULL NULL NULL 425 16 116 16 16 216 426 17 117 NULL NULL NULL 427 18 118 18 18 218 428 19 119 NULL NULL NULL 429 20 120 20 20 220 430 22 NULL 22 22 222 431 24 NULL 24 24 224 432 26 NULL 26 26 226 433 28 NULL 28 28 228 434 435 query IIIIII rowsort,colnames 436 SELECT * FROM outer_gc1 FULL OUTER JOIN outer_c1 USING (pid1, cid1, cid2) 437 ---- 438 pid1 cid1 cid2 gcid1 gca1 ca1 439 -4 -4 -4 -4 296 NULL 440 -2 -2 -2 NULL NULL 198 441 0 0 0 0 300 200 442 2 2 2 NULL NULL 202 443 4 4 4 4 304 204 444 6 6 6 NULL NULL 206 445 8 8 8 8 308 208 446 10 10 10 NULL NULL 210 447 12 12 12 12 312 212 448 14 14 14 NULL NULL 214 449 16 16 16 16 316 216 450 18 18 18 NULL NULL 218 451 20 20 20 20 320 220 452 22 22 22 NULL NULL 222 453 24 24 24 24 324 224 454 26 26 26 NULL NULL 226 455 28 28 28 28 328 228 456 32 32 32 32 332 NULL 457 36 36 36 36 336 NULL 458 459 query IIIII rowsort,colnames 460 SELECT * FROM outer_c1 LEFT OUTER JOIN outer_p1 USING (pid1) WHERE pid1 >= 0 AND pid1 < 40 461 ---- 462 pid1 cid1 cid2 ca1 pa1 463 0 0 0 200 NULL 464 2 2 2 202 102 465 4 4 4 204 104 466 6 6 6 206 106 467 8 8 8 208 108 468 10 10 10 210 110 469 12 12 12 212 112 470 14 14 14 214 114 471 16 16 16 216 116 472 18 18 18 218 118 473 20 20 20 220 120 474 22 22 22 222 NULL 475 24 24 24 224 NULL 476 26 26 26 226 NULL 477 28 28 28 228 NULL 478 479 query IIIIII rowsort,colnames 480 SELECT * FROM outer_p1 RIGHT OUTER JOIN outer_gc1 USING (pid1) WHERE pid1 >= 1 AND pid1 <= 20 481 ---- 482 pid1 pa1 cid1 cid2 gcid1 gca1 483 4 104 4 4 4 304 484 8 108 8 8 8 308 485 12 112 12 12 12 312 486 16 116 16 16 16 316 487 20 120 20 20 20 320 488 489 # Regression test for #22655. 490 491 statement ok 492 CREATE TABLE a (a STRING, b STRING, PRIMARY KEY (a, b)) 493 494 statement ok 495 CREATE TABLE b (a STRING, b STRING, PRIMARY KEY (a, b)) INTERLEAVE IN PARENT a (a, b) 496 497 statement ok 498 SELECT * FROM a JOIN b ON a.a=b.a AND a.b=b.b WHERE a.a='foo' 499 500 subtest ParentChildDifferentSize 501 # Regression test for #22647. Test when child is a few columns larger than parent. 502 statement ok 503 CREATE TABLE small_parent (a STRING PRIMARY KEY, b STRING); INSERT INTO small_parent VALUES ('first', 'second') 504 505 statement ok 506 CREATE TABLE large_child (a STRING PRIMARY KEY, c STRING, d STRING, e STRING, f STRING) INTERLEAVE IN PARENT small_parent (a) 507 508 statement ok 509 INSERT INTO large_child VALUES ('first', 'second_child', 'third_child', 'fourth_child', 'fifth_child') 510 511 query TTTTTT 512 SELECT * FROM large_child JOIN small_parent USING (a) 513 ---- 514 first second_child third_child fourth_child fifth_child second 515 516 # Test with composite keys. 517 statement ok 518 CREATE TABLE small_parent_ck (a STRING, b STRING, c STRING, PRIMARY KEY (a, b)); INSERT INTO small_parent_ck VALUES ('first', 'second', 'third') 519 520 statement ok 521 CREATE TABLE large_child_ck (a STRING, b STRING, d STRING, e STRING, f STRING, PRIMARY KEY (a, b, d)) INTERLEAVE IN PARENT small_parent_ck (a, b) 522 523 statement ok 524 INSERT INTO large_child_ck VALUES ('first', 'second', 'third_child', 'fourth_child', 'fifth_child') 525 526 query TTTTTTT 527 SELECT * FROM large_child_ck JOIN small_parent_ck USING (a) 528 ---- 529 first second third_child fourth_child fifth_child second third 530 531 532 # Test with families. 533 statement ok 534 CREATE TABLE small_parent_fam (a STRING, b STRING, c STRING, PRIMARY KEY (a, b)); INSERT INTO small_parent_fam VALUES ('first', 'second', 'third') 535 536 statement ok 537 CREATE TABLE large_child_fam ( 538 a STRING, 539 b STRING, 540 d STRING, 541 e STRING, 542 f STRING, 543 PRIMARY KEY (a, b, d), 544 FAMILY f1 (a, b, d, e), 545 FAMILY f2 (f) 546 ) INTERLEAVE IN PARENT small_parent_fam (a, b) 547 548 statement ok 549 INSERT INTO large_child_fam VALUES ('first', 'second', 'third_child', 'fourth_child', 'fifth_child') 550 551 query TTTTTTT 552 SELECT * FROM large_child_fam JOIN small_parent_fam USING (a) 553 ---- 554 first second third_child fourth_child fifth_child second third 555 556 557 # Test with parent being much larger than child. 558 statement ok 559 CREATE TABLE large_parent_fam ( 560 a STRING, 561 b STRING, 562 c STRING, 563 d STRING, 564 e STRING, 565 f STRING, 566 PRIMARY KEY (a, b), 567 FAMILY f1 (a, b, c, d), 568 FAMILY f2 (e, f) 569 ) 570 571 statement ok 572 INSERT INTO large_parent_fam VALUES ('first', 'second', 'third', 'fourth', 'fifth', 'sixth') 573 574 statement ok 575 CREATE TABLE small_child_fam ( 576 a STRING, 577 b STRING, 578 g STRING, 579 PRIMARY KEY (a, b) 580 ) INTERLEAVE IN PARENT large_parent_fam (a, b) 581 582 statement ok 583 INSERT INTO small_child_fam VALUES ('first', 'second', 'third_child') 584 585 query TTTTTTTT 586 SELECT * FROM small_child_fam JOIN large_parent_fam USING (a) 587 ---- 588 first second third_child second third fourth fifth sixth 589 590 query T 591 SELECT feature_name FROM crdb_internal.feature_usage WHERE feature_name='sql.plan.interleaved-table-join' AND usage_count > 0 592 ---- 593 sql.plan.interleaved-table-join 594 595 subtest regression_42609 596 597 statement ok 598 CREATE TABLE parent ( 599 a STRING, 600 b STRING, 601 extraParent STRING, 602 PRIMARY KEY (a, b) 603 ) 604 605 statement ok 606 CREATE TABLE child ( 607 a STRING, 608 b STRING, 609 c STRING, 610 extra STRING, 611 PRIMARY KEY (a,b,c) 612 ) INTERLEAVE IN PARENT "parent" (a, b) 613 614 statement ok 615 INSERT INTO parent VALUES ('a', 'b', 'ccc') 616 617 statement ok 618 INSERT INTO child VALUES ('a', 'b', '1', 'extra') 619 620 statement ok 621 CREATE INDEX idx_parent_child on child(a,b) INTERLEAVE IN PARENT parent(a,b) 622 623 # This query strictly uses the interleave index on the child to merge with the parent. 624 query TTT 625 select parent.a, parent.b, child.c from child@{force_index=idx_parent_child} left outer join parent on (parent.a=child.a and parent.b = child.b) 626 ---- 627 a b 1