github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/logictest/testdata/logic_test/distinct_on (about) 1 statement ok 2 CREATE TABLE xyz ( 3 x INT, 4 y INT, 5 z INT, 6 pk1 INT, 7 pk2 INT, 8 PRIMARY KEY (pk1, pk2) 9 ) 10 11 statement ok 12 INSERT INTO xyz VALUES 13 (1, 1, NULL, 1, 1), 14 (1, 1, 2, 2, 2), 15 (1, 1, 2, 3, 3), 16 (1, 2, 1, 4, 4), 17 (2, 2, 3, 5, 5), 18 (4, 5, 6, 6, 6), 19 (4, 1, 6, 7, 7) 20 21 statement ok 22 CREATE TABLE abc ( 23 a STRING, 24 b STRING, 25 c STRING, 26 PRIMARY KEY (a, b, c) 27 ) 28 29 statement ok 30 INSERT INTO abc VALUES 31 ('1', '1', '1'), 32 ('1', '1', '2'), 33 ('1', '2', '2') 34 35 ################## 36 # Simple queries # 37 ################## 38 39 # 3/3 columns 40 41 query III rowsort 42 SELECT DISTINCT ON (x, y, z) x, y, z FROM xyz 43 ---- 44 1 1 NULL 45 1 1 2 46 1 2 1 47 2 2 3 48 4 5 6 49 4 1 6 50 51 query I rowsort 52 SELECT DISTINCT ON (y, x, z) x FROM xyz 53 ---- 54 1 55 1 56 1 57 2 58 4 59 4 60 61 query I rowsort 62 SELECT DISTINCT ON (z, y, x) z FROM xyz 63 ---- 64 NULL 65 2 66 1 67 3 68 6 69 6 70 71 query TTT rowsort 72 SELECT DISTINCT ON (b, c, a) a, c, b FROM abc 73 ---- 74 1 1 1 75 1 2 1 76 1 2 2 77 78 query T rowsort 79 SELECT DISTINCT ON (b, c, a) a FROM abc 80 ---- 81 1 82 1 83 1 84 85 # We need to rowsort this since the ORDER BY isn't on the entire SELECT columns. 86 query T rowsort 87 SELECT DISTINCT ON (c, a, b) b FROM abc ORDER BY b 88 ---- 89 1 90 1 91 2 92 93 94 # 2/3 columns 95 96 query II rowsort 97 SELECT DISTINCT ON (x, y) y, x FROM xyz 98 ---- 99 1 1 100 2 1 101 2 2 102 5 4 103 1 4 104 105 query I rowsort 106 SELECT DISTINCT ON (y, x) x FROM xyz 107 ---- 108 1 109 1 110 2 111 4 112 4 113 114 query I rowsort 115 SELECT DISTINCT ON (x, y) y FROM xyz 116 ---- 117 1 118 2 119 2 120 5 121 1 122 123 query TT 124 SELECT DISTINCT ON (a, c) a, b FROM abc ORDER BY a, c, b 125 ---- 126 1 1 127 1 1 128 129 # We wrap this with an ORDER BY otherwise this would be non-deterministic. 130 query TTT 131 SELECT DISTINCT ON (c, a) b, c, a FROM abc ORDER BY c, a, b DESC 132 ---- 133 1 1 1 134 2 2 1 135 136 137 # 1/3 columns 138 139 query I rowsort 140 SELECT DISTINCT ON (y) y FROM xyz 141 ---- 142 1 143 2 144 5 145 146 query T rowsort 147 SELECT DISTINCT ON (c) a FROM abc 148 ---- 149 1 150 1 151 152 query T rowsort 153 SELECT DISTINCT ON (b) b FROM abc 154 ---- 155 1 156 2 157 158 # We wrap this with an ORDER BY otherwise this would be non-deterministic. 159 query TTT 160 SELECT DISTINCT ON (a) a, b, c FROM abc ORDER BY a, b, c 161 ---- 162 1 1 1 163 164 query TT 165 SELECT DISTINCT ON (a) a, c FROM abc ORDER BY a, c DESC, b 166 ---- 167 1 2 168 169 ################# 170 # With ORDER BY # 171 ################# 172 173 statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions 174 SELECT DISTINCT ON (x) x, y, z FROM xyz ORDER BY y 175 176 statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions 177 SELECT DISTINCT ON (y) x, y, z FROM xyz ORDER BY x, y 178 179 statement error SELECT DISTINCT ON expressions must match initial ORDER BY expressions 180 SELECT DISTINCT ON (y, z) x, y, z FROM xyz ORDER BY x 181 182 query I 183 SELECT DISTINCT ON (x) x FROM xyz ORDER BY x DESC 184 ---- 185 4 186 2 187 1 188 189 # We add a filter to eliminate one of the rows that may be flakily returned 190 # depending on parallel execution of DISTINCT ON. 191 query III 192 SELECT DISTINCT ON (x, z) y, z, x FROM xyz WHERE (x,y,z) != (4, 1, 6) ORDER BY z 193 ---- 194 1 NULL 1 195 2 1 1 196 1 2 1 197 2 3 2 198 5 6 4 199 200 query III 201 SELECT DISTINCT ON (x) y, z, x FROM xyz ORDER BY x ASC, z DESC, y DESC 202 ---- 203 1 2 1 204 2 3 2 205 5 6 4 206 207 # Regression test for #35437: Discard extra ordering columns after performing 208 # DISTINCT operation. 209 query T 210 SELECT (SELECT DISTINCT ON (a) a FROM abc ORDER BY a, b||'foo') || 'bar'; 211 ---- 212 1bar 213 214 ##################### 215 # With aggregations # 216 ##################### 217 218 statement error column "y" must appear in the GROUP BY clause or be used in an aggregate function 219 SELECT DISTINCT ON(max(x)) y FROM xyz 220 221 statement error column "z" must appear in the GROUP BY clause or be used in an aggregate function 222 SELECT DISTINCT ON(max(x), z) min(y) FROM xyz 223 224 query I 225 SELECT DISTINCT ON (max(x)) min(y) FROM xyz 226 ---- 227 1 228 229 query I 230 SELECT DISTINCT ON (min(x)) max(y) FROM xyz 231 ---- 232 5 233 234 query T 235 SELECT DISTINCT ON(min(a), max(b), min(c)) max(c) FROM abc 236 ---- 237 2 238 239 ################# 240 # With GROUP BY # 241 ################# 242 243 statement error column "x" must appear in the GROUP BY clause or be used in an aggregate function 244 SELECT DISTINCT ON (x) min(x) FROM xyz GROUP BY y 245 246 query I rowsort 247 SELECT DISTINCT ON(y) min(x) FROM xyz GROUP BY y 248 ---- 249 1 250 1 251 4 252 253 query I 254 SELECT DISTINCT ON(min(x)) min(x) FROM xyz GROUP BY y HAVING min(x) = 1 255 ---- 256 1 257 258 ######################### 259 # With window functions # 260 ######################### 261 262 query I rowsort 263 SELECT DISTINCT ON(row_number() OVER(ORDER BY (pk1, pk2))) y FROM xyz 264 ---- 265 1 266 1 267 1 268 2 269 2 270 5 271 1 272 273 query I 274 SELECT DISTINCT ON(row_number() OVER(ORDER BY (pk1, pk2))) y FROM xyz ORDER BY row_number() OVER(ORDER BY (pk1, pk2)) DESC 275 ---- 276 1 277 5 278 2 279 2 280 1 281 1 282 1 283 284 ########################### 285 # With ordinal references # 286 ########################### 287 288 statement error DISTINCT ON position 2 is not in select list 289 SELECT DISTINCT ON (2) x FROM xyz 290 291 query I rowsort 292 SELECT DISTINCT ON (1) x FROM xyz 293 ---- 294 1 295 2 296 4 297 298 query III rowsort 299 SELECT DISTINCT ON (1,2,3) x, y, z FROM xyz 300 ---- 301 1 1 NULL 302 1 1 2 303 1 2 1 304 2 2 3 305 4 5 6 306 4 1 6 307 308 ######################### 309 # With alias references # 310 ######################### 311 312 # This should prioritize alias (use 'x' as the key). 313 # This would be non-deterministic if we don't select y (actually x) from the 314 # subquery. 315 query I rowsort 316 SELECT y FROM (SELECT DISTINCT ON(y) x AS y, y AS x FROM xyz) 317 ---- 318 1 319 2 320 4 321 322 # Ignores the alias. 323 query I rowsort 324 SELECT DISTINCT ON(x) x AS y FROM xyz 325 ---- 326 1 327 2 328 4 329 330 ################################## 331 # With nested parentheses/tuples # 332 ################################## 333 334 query II rowsort 335 SELECT DISTINCT ON(((x)), (x, y)) x, y FROM xyz 336 ---- 337 1 1 338 1 2 339 2 2 340 4 5 341 4 1 342 343 ################################ 344 # Hybrid PK and non-PK queries # 345 ################################ 346 347 # We need to rowsort this since the ORDER BY isn't on the entire SELECT columns. 348 query III rowsort 349 SELECT DISTINCT ON(pk1, pk2, x, y) x, y, z FROM xyz ORDER BY x, y 350 ---- 351 1 1 NULL 352 1 1 2 353 1 1 2 354 1 2 1 355 2 2 3 356 4 1 6 357 4 5 6 358 359 # Ordering only propagates up until distinctNode. 360 # pk1 ordering does not propagate at all since it's not explicitly needed. 361 # We add a filter since there could be multiple valid pk1s otherwise for distinct 362 # rows. 363 query I rowsort 364 SELECT DISTINCT ON (x, y, z) pk1 FROM (SELECT * FROM xyz WHERE x >= 2) ORDER BY x 365 ---- 366 5 367 6 368 7 369 370 # Regression tests for #34112: distinct on constant column. 371 query II 372 SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y 373 ---- 374 1 1 375 376 query I 377 SELECT count(*) FROM (SELECT DISTINCT ON (x) x, y FROM xyz WHERE x = 1 ORDER BY x, y) 378 ---- 379 1