github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/logictest/testdata/logic_test/distinct_on (about)

     1  statement ok
     2  CREATE TABLE xyz (
     3    x INT,
     4    y INT,
     5    z INT,
     6    pk1 INT,
     7    pk2 INT,
     8    PRIMARY KEY (pk1, pk2)
     9  )
    10  
    11  statement ok
    12  INSERT INTO xyz VALUES
    13    (1, 1, NULL, 1, 1),
    14    (1, 1, 2, 2, 2),
    15    (1, 1, 2, 3, 3),
    16    (1, 2, 1, 4, 4),
    17    (2, 2, 3, 5, 5),
    18    (4, 5, 6, 6, 6),
    19    (4, 1, 6, 7, 7)
    20  
    21  statement ok
    22  CREATE TABLE abc (
    23    a STRING,
    24    b STRING,
    25    c STRING,
    26    PRIMARY KEY (a, b, c)
    27  )
    28  
    29  statement ok
    30  INSERT INTO abc VALUES
    31    ('1', '1', '1'),
    32    ('1', '1', '2'),
    33    ('1', '2', '2')
    34  
    35  ##################
    36  # Simple queries #
    37  ##################
    38  
    39  # 3/3 columns
    40  
    41  query III rowsort
    42  SELECT DISTINCT ON (x, y, z) x, y, z FROM xyz
    43  ----
    44  1 1 NULL
    45  1 1 2
    46  1 2 1
    47  2 2 3
    48  4 5 6
    49  4 1 6
    50  
    51  query I rowsort
    52  SELECT DISTINCT ON (y, x, z) x FROM xyz
    53  ----
    54  1
    55  1
    56  1
    57  2
    58  4
    59  4
    60  
    61  query I rowsort
    62  SELECT DISTINCT ON (z, y, x) z FROM xyz
    63  ----
    64  NULL
    65  2
    66  1
    67  3
    68  6
    69  6
    70  
    71  query TTT rowsort
    72  SELECT DISTINCT ON (b, c, a) a, c, b FROM abc
    73  ----
    74  1 1 1
    75  1 2 1
    76  1 2 2
    77  
    78  query T rowsort
    79  SELECT DISTINCT ON (b, c, a) a FROM abc
    80  ----
    81  1
    82  1
    83  1
    84  
    85  # We need to rowsort this since the ORDER BY isn't on the entire SELECT columns.
    86  query T rowsort
    87  SELECT DISTINCT ON (c, a, b) b FROM abc ORDER BY b
    88  ----
    89  1
    90  1
    91  2
    92  
    93  
    94  # 2/3 columns
    95  
    96  query II rowsort
    97  SELECT DISTINCT ON (x, y) y, x FROM xyz
    98  ----
    99  1 1
   100  2 1
   101  2 2
   102  5 4
   103  1 4
   104  
   105  query I rowsort
   106  SELECT DISTINCT ON (y, x) x FROM xyz
   107  ----
   108  1
   109  1
   110  2
   111  4
   112  4
   113  
   114  query I rowsort
   115  SELECT DISTINCT ON (x, y) y FROM xyz
   116  ----
   117  1
   118  2
   119  2
   120  5
   121  1
   122  
   123  query TT
   124  SELECT DISTINCT ON (a, c) a, b FROM abc ORDER BY a, c, b
   125  ----
   126  1 1
   127  1 1
   128  
   129  # We wrap this with an ORDER BY otherwise this would be non-deterministic.
   130  query TTT
   131  SELECT DISTINCT ON (c, a) b, c, a FROM abc ORDER BY c, a, b DESC
   132  ----
   133  1 1 1
   134  2 2 1
   135  
   136  
   137  # 1/3 columns
   138  
   139  query I rowsort
   140  SELECT DISTINCT ON (y) y FROM xyz
   141  ----
   142  1
   143  2
   144  5
   145  
   146  query T rowsort
   147  SELECT DISTINCT ON (c) a FROM abc
   148  ----
   149  1
   150  1
   151  
   152  query T rowsort
   153  SELECT DISTINCT ON (b) b FROM abc
   154  ----
   155  1
   156  2
   157  
   158  # We wrap this with an ORDER BY otherwise this would be non-deterministic.
   159  query TTT
   160  SELECT DISTINCT ON (a) a, b, c FROM abc ORDER BY a, b, c
   161  ----
   162  1 1 1
   163  
   164  query TT
   165  SELECT DISTINCT ON (a) a, c FROM abc ORDER BY a, c DESC, b
   166  ----
   167  1 2
   168  
   169  #################
   170  # With ORDER BY #
   171  #################
   172  
   173  statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions
   174  SELECT DISTINCT ON (x) x, y, z FROM xyz ORDER BY y
   175  
   176  statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions
   177  SELECT DISTINCT ON (y) x, y, z FROM xyz ORDER BY x, y
   178  
   179  statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions
   180  SELECT DISTINCT ON (y, z) x, y, z FROM xyz ORDER BY x
   181  
   182  query I
   183  SELECT DISTINCT ON (x) x FROM xyz ORDER BY x DESC
   184  ----
   185  4
   186  2
   187  1
   188  
   189  # We add a filter to eliminate one of the rows that may be flakily returned
   190  # depending on parallel execution of DISTINCT ON.
   191  query III
   192  SELECT DISTINCT ON (x, z) y, z, x FROM xyz WHERE (x,y,z) != (4, 1, 6) ORDER BY z
   193  ----
   194  1 NULL 1
   195  2 1 1
   196  1 2 1
   197  2 3 2
   198  5 6 4
   199  
   200  query III
   201  SELECT DISTINCT ON (x) y, z, x FROM xyz ORDER BY x ASC, z DESC, y DESC
   202  ----
   203  1 2 1
   204  2 3 2
   205  5 6 4
   206  
   207  # Regression test for #35437: Discard extra ordering columns after performing
   208  # DISTINCT operation.
   209  query T
   210  SELECT (SELECT DISTINCT ON (a) a FROM abc ORDER BY a, b||'foo') || 'bar';
   211  ----
   212  1bar
   213  
   214  #####################
   215  # With aggregations #
   216  #####################
   217  
   218  statement error column "y" must appear in the GROUP BY clause or be used in an aggregate function
   219  SELECT DISTINCT ON(max(x)) y FROM xyz
   220  
   221  statement error column "z" must appear in the GROUP BY clause or be used in an aggregate function
   222  SELECT DISTINCT ON(max(x), z) min(y) FROM xyz
   223  
   224  query I
   225  SELECT DISTINCT ON (max(x)) min(y) FROM xyz
   226  ----
   227  1
   228  
   229  query I
   230  SELECT DISTINCT ON (min(x)) max(y) FROM xyz
   231  ----
   232  5
   233  
   234  query T
   235  SELECT DISTINCT ON(min(a), max(b), min(c)) max(c) FROM abc
   236  ----
   237  2
   238  
   239  #################
   240  # With GROUP BY #
   241  #################
   242  
   243  statement error column "x" must appear in the GROUP BY clause or be used in an aggregate function
   244  SELECT DISTINCT ON (x) min(x) FROM xyz GROUP BY y
   245  
   246  query I rowsort
   247  SELECT DISTINCT ON(y) min(x) FROM xyz GROUP BY y
   248  ----
   249  1
   250  1
   251  4
   252  
   253  query I
   254  SELECT DISTINCT ON(min(x)) min(x) FROM xyz GROUP BY y HAVING min(x) = 1
   255  ----
   256  1
   257  
   258  #########################
   259  # With window functions #
   260  #########################
   261  
   262  query I rowsort
   263  SELECT DISTINCT ON(row_number() OVER(ORDER BY (pk1, pk2))) y FROM xyz
   264  ----
   265  1
   266  1
   267  1
   268  2
   269  2
   270  5
   271  1
   272  
   273  query I
   274  SELECT DISTINCT ON(row_number() OVER(ORDER BY (pk1, pk2))) y FROM xyz ORDER BY row_number() OVER(ORDER BY (pk1, pk2)) DESC
   275  ----
   276  1
   277  5
   278  2
   279  2
   280  1
   281  1
   282  1
   283  
   284  ###########################
   285  # With ordinal references #
   286  ###########################
   287  
   288  statement error DISTINCT ON position 2 is not in select list
   289  SELECT DISTINCT ON (2) x FROM xyz
   290  
   291  query I rowsort
   292  SELECT DISTINCT ON (1) x FROM xyz
   293  ----
   294  1
   295  2
   296  4
   297  
   298  query III rowsort
   299  SELECT DISTINCT ON (1,2,3) x, y, z FROM xyz
   300  ----
   301  1  1  NULL
   302  1  1  2
   303  1  2  1
   304  2  2  3
   305  4  5  6
   306  4  1  6
   307  
   308  #########################
   309  # With alias references #
   310  #########################
   311  
   312  # This should prioritize alias (use 'x' as the key).
   313  # This would be non-deterministic if we don't select y (actually x) from the
   314  # subquery.
   315  query I rowsort
   316  SELECT y FROM (SELECT DISTINCT ON(y) x AS y, y AS x FROM xyz)
   317  ----
   318  1
   319  2
   320  4
   321  
   322  # Ignores the alias.
   323  query I rowsort
   324  SELECT DISTINCT ON(x) x AS y FROM xyz
   325  ----
   326  1
   327  2
   328  4
   329  
   330  ##################################
   331  # With nested parentheses/tuples #
   332  ##################################
   333  
   334  query II rowsort
   335  SELECT DISTINCT ON(((x)), (x, y)) x, y FROM xyz
   336  ----
   337  1  1
   338  1  2
   339  2  2
   340  4  5
   341  4  1
   342  
   343  ################################
   344  # Hybrid PK and non-PK queries #
   345  ################################
   346  
   347  # We need to rowsort this since the ORDER BY isn't on the entire SELECT columns.
   348  query III rowsort
   349  SELECT DISTINCT ON(pk1, pk2, x, y) x, y, z FROM xyz ORDER BY x, y
   350  ----
   351  1  1  NULL
   352  1  1  2
   353  1  1  2
   354  1  2  1
   355  2  2  3
   356  4  1  6
   357  4  5  6
   358  
   359  # Ordering only propagates up until distinctNode.
   360  # pk1 ordering does not propagate at all since it's not explicitly needed.
   361  # We add a filter since there could be multiple valid pk1s otherwise for distinct
   362  # rows.
   363  query I rowsort
   364  SELECT DISTINCT ON (x, y, z) pk1 FROM (SELECT * FROM xyz WHERE x >= 2) ORDER BY x
   365  ----
   366  5
   367  6
   368  7
   369  
   370  # Regression tests for #34112: distinct on constant column.
   371  query II
   372  SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y
   373  ----
   374  1 1
   375  
   376  query I
   377  SELECT count(*) FROM (SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y)
   378  ----
   379  1