github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/logictest/testdata/logic_test/inner-join (about)

     1  statement ok
     2  CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, b));
     3  INSERT INTO abc VALUES (1, 1, 2), (2, 1, 1), (2, 2, NULL)
     4  
     5  statement ok
     6  CREATE TABLE def (d INT, e INT, f INT, PRIMARY KEY (d, e));
     7  INSERT INTO def VALUES (1, 1, 2), (2, 1, 0), (1, 2, NULL)
     8  
     9  query III rowsort
    10  SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d)
    11  ----
    12  1  1  2
    13  2  1  1
    14  2  2  NULL
    15  
    16  # Test lookup inner joins created from semi-joins.
    17  query III rowsort
    18  SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=f)
    19  ----
    20  2  1  1
    21  2  2  NULL
    22  
    23  query III rowsort
    24  SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d AND c=e)
    25  ----
    26  1  1  2
    27  2  1  1
    28  
    29  # A semi-join emits exactly one row for every matching row in the LHS.
    30  # The following test ensures that the SemiJoin doesn't commute into an
    31  # InnerJoin as that guarantee would be lost.
    32  statement ok
    33  TRUNCATE TABLE abc; TRUNCATE TABLE def;
    34  
    35  statement ok
    36  INSERT INTO abc VALUES (1, 1, 1)
    37  
    38  statement ok
    39  INSERT INTO def VALUES (1, 1, 1), (2, 1, 1)
    40  
    41  query III rowsort
    42  SELECT a, b, c FROM abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR a=e)
    43  ----
    44  1  1  1
    45  
    46  # Given that we know the reason the above query would fail if an InnerJoin
    47  # was used - multiple rows emitted for each matching row in the LHS - we
    48  # might think that adding a DistinctOn over the InnerJoin would help.
    49  # This test shows why that wouldn't work. There are two reasons:
    50  #
    51  #  - The columns of the LHS that are emitted aren't guaranteed to be a key.
    52  #    This means that only unique rows are returned when that is not what the
    53  #    SemiJoin should return (notice the (1, 1, 1) row is emitted twice)
    54  #  - We can't handle general filters because of composite datums (values that
    55  #    are equal but not identical). For example the decimals 1, 1.0 and 1.00
    56  #    are equal but have different string representations.
    57  #    The DistinctOn on the RHS would omit important rows in that case.
    58  #
    59  # This tests that the InnerJoin commute rule for semi joins behaves sanely in
    60  # these cases.
    61  statement ok
    62  CREATE TABLE abc_decimal (a DECIMAL, b DECIMAL, c DECIMAL);
    63  INSERT INTO abc_decimal VALUES (1, 1, 1), (1, 1, 1), (1.0, 1.0, 1.0), (1.00, 1.00, 1.00)
    64  
    65  statement ok
    66  CREATE TABLE def_decimal (d DECIMAL, e DECIMAL, f DECIMAL);
    67  INSERT INTO def_decimal VALUES (1, 1, 1), (1.0, 1.0, 1.0), (1.00, 1.00, 1.00)
    68  
    69  query RRR rowsort
    70  SELECT a, b, c FROM abc_decimal WHERE EXISTS (SELECT * FROM def_decimal WHERE a::string=d::string)
    71  ----
    72  1     1     1
    73  1     1     1
    74  1.0   1.0   1.0
    75  1.00  1.00  1.00