github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/logictest/testdata/logic_test/distsql_agg (about) 1 # LogicTest: 5node-default-configs 2 3 statement ok 4 CREATE TABLE data (a INT, b INT, c FLOAT, d DECIMAL, PRIMARY KEY (a, b, c, d)) 5 6 # Split into ten parts. 7 statement ok 8 ALTER TABLE data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i) 9 10 # Relocate the ten parts to the five nodes. 11 statement ok 12 ALTER TABLE data EXPERIMENTAL_RELOCATE 13 SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i) 14 15 # Generate all combinations of values 1 to 10. 16 statement ok 17 INSERT INTO data SELECT a, b, c::FLOAT, d::DECIMAL FROM 18 generate_series(1, 10) AS a(a), 19 generate_series(1, 10) AS b(b), 20 generate_series(1, 10) AS c(c), 21 generate_series(1, 10) AS d(d) 22 23 # Verify data placement. 24 query TTTI colnames,rowsort 25 SELECT start_key, end_key, replicas, lease_holder FROM [SHOW RANGES FROM TABLE data] 26 ---- 27 start_key end_key replicas lease_holder 28 NULL /1 {1} 1 29 /1 /2 {2} 2 30 /2 /3 {3} 3 31 /3 /4 {4} 4 32 /4 /5 {5} 5 33 /5 /6 {1} 1 34 /6 /7 {2} 2 35 /7 /8 {3} 3 36 /8 /9 {4} 4 37 /9 NULL {5} 5 38 39 query R 40 SELECT sum(a) FROM data 41 ---- 42 55000 43 44 query R 45 SELECT sum((a-1)*1000 + (b-1)*100 + (c::INT-1)*10 + (d-1)) FROM data 46 ---- 47 49995000 48 49 query RII 50 SELECT sum(a), count(a), max(a) FROM data 51 ---- 52 55000 10000 10 53 54 query RII 55 SELECT sum(a+b), count(a+b), max(a+b) FROM data 56 ---- 57 110000 10000 20 58 59 query R 60 SELECT sum((a-1)*1000) + sum((b-1)*100) + sum((c::INT-1)*10) + sum(d-1) FROM data 61 ---- 62 49995000 63 64 query RIRI 65 SELECT sum(a), min(b), max(c), count(d) FROM data 66 ---- 67 55000 1 10 10000 68 69 query R 70 SELECT avg(a+b+c::INT+d) FROM data 71 ---- 72 22 73 74 query RR 75 SELECT sum(a), round(stddev(b), 1) FROM data 76 ---- 77 55000 2.9 78 79 query RR 80 SELECT sum(a), round(variance(b), 1) FROM data 81 ---- 82 55000 8.3 83 84 query R 85 SELECT stddev(a+b+c::INT+d) FROM data 86 ---- 87 5.7448498962142608187 88 89 query R 90 SELECT variance(a+b+c::INT+d) FROM data 91 ---- 92 33.0033003300330033 93 94 query RRRRRRR 95 SELECT sum(a), avg(b), sum(c), avg(d), stddev(a), variance(b), sum(a+b+c::INT+d) FROM data 96 ---- 97 55000 5.5 55000 5.5 2.8724249481071304094 8.2508250825082508251 220000 98 99 query RIRIRRR 100 SELECT sum(a), min(b), max(c), count(d), avg(a+b+c::INT+d), stddev(a+b), variance(c::INT+d) FROM data 101 ---- 102 55000 1 10 10000 22 4.0622223185119375800 16.50165016501650165 103 104 query RRRIRRRR 105 SELECT sum(a), stddev(a), avg(a) FILTER (WHERE a > 5), count(b), avg(b), variance(b) FILTER (WHERE b < 8), sum(b) FILTER (WHERE b < 8), stddev(b) FILTER (WHERE b > 2) FROM data 106 ---- 107 55000 2.8724249481071304094 8 10000 5.5 4.0005715102157451064 28000 2.2914310663953007487 108 109 query RRR 110 SELECT sum(a), avg(DISTINCT a), variance(a) FILTER (WHERE a > 0) FROM data 111 ---- 112 55000 5.5 8.2508250825082508251 113 114 query RRIRR 115 SELECT sum(a), avg(a), count(a), stddev(a), variance(a) FROM data 116 ---- 117 55000 5.5 10000 2.8724249481071304094 8.2508250825082508251 118 119 query RRRRR 120 SELECT sum(a), avg(b), sum(a), sum(a), avg(b) FROM data 121 ---- 122 55000 5.5 55000 55000 5.5 123 124 query RRRR 125 SELECT avg(c), sum(c), avg(d), sum(d) FROM data 126 ---- 127 5.5 55000 5.5 55000 128 129 query II 130 SELECT max(a), min(b) FROM data HAVING min(b) > 2 131 ---- 132 133 134 query I rowsort 135 SELECT DISTINCT (a) FROM data 136 ---- 137 1 138 2 139 3 140 4 141 5 142 6 143 7 144 8 145 9 146 10 147 148 query R 149 SELECT SUM (DISTINCT A) FROM data 150 ---- 151 55 152 153 query RR 154 SELECT SUM (DISTINCT A), SUM (DISTINCT B) from data 155 ---- 156 55 55 157 158 query II 159 SELECT DISTINCT a, b FROM data WHERE (a + b + c::INT) = 27 ORDER BY a,b 160 ---- 161 7 10 162 8 9 163 8 10 164 9 8 165 9 9 166 9 10 167 10 7 168 10 8 169 10 9 170 10 10 171 172 query II 173 SELECT DISTINCT a, b FROM data WHERE (a + b + c::INT) = 27 ORDER BY b,a 174 ---- 175 10 7 176 9 8 177 10 8 178 8 9 179 9 9 180 10 9 181 7 10 182 8 10 183 9 10 184 10 10 185 186 query RRR 187 SELECT c, d, sum(a+c::INT) + avg(b+d) FROM data GROUP BY c, d ORDER BY c, d 188 ---- 189 1 1 656.5 190 1 2 657.5 191 1 3 658.5 192 1 4 659.5 193 1 5 660.5 194 1 6 661.5 195 1 7 662.5 196 1 8 663.5 197 1 9 664.5 198 1 10 665.5 199 2 1 756.5 200 2 2 757.5 201 2 3 758.5 202 2 4 759.5 203 2 5 760.5 204 2 6 761.5 205 2 7 762.5 206 2 8 763.5 207 2 9 764.5 208 2 10 765.5 209 3 1 856.5 210 3 2 857.5 211 3 3 858.5 212 3 4 859.5 213 3 5 860.5 214 3 6 861.5 215 3 7 862.5 216 3 8 863.5 217 3 9 864.5 218 3 10 865.5 219 4 1 956.5 220 4 2 957.5 221 4 3 958.5 222 4 4 959.5 223 4 5 960.5 224 4 6 961.5 225 4 7 962.5 226 4 8 963.5 227 4 9 964.5 228 4 10 965.5 229 5 1 1056.5 230 5 2 1057.5 231 5 3 1058.5 232 5 4 1059.5 233 5 5 1060.5 234 5 6 1061.5 235 5 7 1062.5 236 5 8 1063.5 237 5 9 1064.5 238 5 10 1065.5 239 6 1 1156.5 240 6 2 1157.5 241 6 3 1158.5 242 6 4 1159.5 243 6 5 1160.5 244 6 6 1161.5 245 6 7 1162.5 246 6 8 1163.5 247 6 9 1164.5 248 6 10 1165.5 249 7 1 1256.5 250 7 2 1257.5 251 7 3 1258.5 252 7 4 1259.5 253 7 5 1260.5 254 7 6 1261.5 255 7 7 1262.5 256 7 8 1263.5 257 7 9 1264.5 258 7 10 1265.5 259 8 1 1356.5 260 8 2 1357.5 261 8 3 1358.5 262 8 4 1359.5 263 8 5 1360.5 264 8 6 1361.5 265 8 7 1362.5 266 8 8 1363.5 267 8 9 1364.5 268 8 10 1365.5 269 9 1 1456.5 270 9 2 1457.5 271 9 3 1458.5 272 9 4 1459.5 273 9 5 1460.5 274 9 6 1461.5 275 9 7 1462.5 276 9 8 1463.5 277 9 9 1464.5 278 9 10 1465.5 279 10 1 1556.5 280 10 2 1557.5 281 10 3 1558.5 282 10 4 1559.5 283 10 5 1560.5 284 10 6 1561.5 285 10 7 1562.5 286 10 8 1563.5 287 10 9 1564.5 288 10 10 1565.5 289 290 # Test plans with empty streams. 291 statement ok 292 CREATE TABLE one (k INT PRIMARY KEY, v INT) 293 294 statement ok 295 ALTER TABLE one SPLIT AT VALUES (0), (99) 296 297 statement ok 298 ALTER TABLE one EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 0) 299 300 statement ok 301 INSERT INTO one VALUES (1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (9,9), (10,10) 302 303 statement ok 304 CREATE TABLE two (k INT PRIMARY KEY, v INT); 305 306 statement ok 307 ALTER TABLE two SPLIT AT VALUES (0), (99) 308 309 statement ok 310 ALTER TABLE two EXPERIMENTAL_RELOCATE VALUES (ARRAY[2], 0) 311 312 statement ok 313 INSERT INTO two VALUES (1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (9,9), (10,10) 314 315 query TTTI colnames,rowsort 316 SELECT start_key, end_key, replicas, lease_holder FROM [SHOW RANGES FROM TABLE one] 317 ---- 318 start_key end_key replicas lease_holder 319 NULL /0 {5} 5 320 /0 /99 {1} 1 321 /99 NULL {5} 5 322 323 query TTTI colnames,rowsort 324 SELECT start_key, end_key, replicas, lease_holder FROM [SHOW RANGES FROM TABLE two] 325 ---- 326 start_key end_key replicas lease_holder 327 NULL /0 {5} 5 328 /0 /99 {2} 2 329 /99 NULL {5} 5 330 331 query I 332 SELECT count(*) FROM one AS a, one AS b, two AS c 333 ---- 334 1000 335 336 query RRR 337 SELECT sum(a), sum(b), sum(c) FROM data GROUP BY d HAVING sum(a+b) > 10 338 ---- 339 5500 5500 5500 340 5500 5500 5500 341 5500 5500 5500 342 5500 5500 5500 343 5500 5500 5500 344 5500 5500 5500 345 5500 5500 5500 346 5500 5500 5500 347 5500 5500 5500 348 5500 5500 5500 349 350 351 query RR rowsort 352 SELECT avg(a+b), c FROM data GROUP BY c, d HAVING c = d 353 ---- 354 11 1 355 11 2 356 11 3 357 11 4 358 11 5 359 11 6 360 11 7 361 11 8 362 11 9 363 11 10 364 365 query RRR rowsort 366 SELECT sum(a+b), sum(a+b) FILTER (WHERE a < d), sum(a+b) FILTER (WHERE a = c) FROM data GROUP BY d 367 ---- 368 11000 NULL 1100 369 11000 650 1100 370 11000 1400 1100 371 11000 3200 1100 372 11000 2250 1100 373 11000 4250 1100 374 11000 5400 1100 375 11000 6650 1100 376 11000 8000 1100 377 11000 9450 1100 378 379 # Same query but restricted to a single range; no local aggregation stage. 380 query RRR rowsort 381 SELECT sum(a+b), sum(a+b) FILTER (WHERE a < d), sum(a+b) FILTER (WHERE a = c) FROM data WHERE a = 1 GROUP BY d 382 ---- 383 650 NULL 65 384 650 650 65 385 650 650 65 386 650 650 65 387 650 650 65 388 650 650 65 389 650 650 65 390 650 650 65 391 650 650 65 392 650 650 65 393 394 query IIRT 395 VALUES (1, 2, 1.0, 'string1'), (4, 3, 2.3, 'string2') 396 ---- 397 1 2 1.0 string1 398 4 3 2.3 string2 399 400 query IIR 401 SELECT max(t.a), min(t.b), avg(t.c) FROM (VALUES (1, 2, 3), (4, 5, 6), (7, 8, 0)) AS t(a, b, c) WHERE b > 3 402 ---- 403 7 5 3 404 405 query ITIR 406 SELECT * FROM (VALUES (1, '222'), (2, '444')) t1(a,b) JOIN (VALUES (1, 100.0), (3, 32.0)) t2(a,b) ON t1.a = t2.a 407 ---- 408 1 222 1 100.0 409 410 statement ok 411 CREATE TABLE nullables (a INT, b INT, c INT, PRIMARY KEY (a)) 412 413 statement ok 414 INSERT INTO nullables VALUES (1,1,1) 415 416 statement ok 417 INSERT INTO nullables VALUES (2,NULL,1) 418 419 query II 420 SELECT c, count(*) FROM nullables GROUP BY c; 421 ---- 422 1 2 423 424 query T 425 SELECT array_agg(a) FROM (SELECT a FROM data WHERE b = 1 AND c = 1.0 AND d = 1.0 ORDER BY a) 426 ---- 427 {1,2,3,4,5,6,7,8,9,10} 428 429 query T 430 SELECT array_agg(ab) FROM (SELECT a*b AS ab FROM data WHERE c = 1.0 AND d = 1.0 ORDER BY a*b) 431 ---- 432 {1,2,2,3,3,4,4,4,5,5,6,6,6,6,7,7,8,8,8,8,9,9,9,10,10,10,10,12,12,12,12,14,14,15,15,16,16,16,18,18,18,18,20,20,20,20,21,21,24,24,24,24,25,27,27,28,28,30,30,30,30,32,32,35,35,36,36,36,40,40,40,40,42,42,45,45,48,48,49,50,50,54,54,56,56,60,60,63,63,64,70,70,72,72,80,80,81,90,90,100} 433 434 query T 435 SELECT json_agg(a) FROM (SELECT a FROM data WHERE b = 1 AND c = 1.0 AND d = 1.0 ORDER BY a) 436 ---- 437 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 438 439 query T 440 SELECT jsonb_agg(a) FROM (SELECT a FROM data WHERE b = 1 AND c = 1.0 AND d = 1.0 ORDER BY a) 441 ---- 442 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 443 444 # Test that orderings on GROUP BY columns are propagated through aggregations. 445 statement ok 446 CREATE TABLE sorted_data (a INT PRIMARY KEY, b INT, c FLOAT, INDEX foo(b)) 447 448 statement ok 449 INSERT INTO sorted_data VALUES 450 (1, 4, 5.0), 451 (2, 3, 3.4), 452 (3, 9, 2.2), 453 (4, 13, 1.99), 454 (5, 2, 5.7), 455 (6, 7, 6.2), 456 (7, 9, 8.9), 457 (8, 1, 1.22), 458 (9, -2, 23.0), 459 (10, 100, -3.1) 460 461 # Split into ten parts. 462 statement ok 463 ALTER TABLE sorted_data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i) 464 465 # Relocate the ten parts to the five nodes. 466 statement ok 467 ALTER TABLE sorted_data EXPERIMENTAL_RELOCATE 468 SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i) 469 470 # The ordering is on all the GROUP BY columns, and isn't preserved after the 471 # aggregation. 472 query II rowsort 473 SELECT a, max(b) FROM sorted_data GROUP BY a 474 ---- 475 6 7 476 7 9 477 10 100 478 1 4 479 2 3 480 3 9 481 4 13 482 5 2 483 8 1 484 9 -2 485 486 # The ordering is on all the GROUP BY columns, and is preserved after the 487 # aggregation. 488 query II 489 SELECT a, max(b) FROM sorted_data GROUP BY a ORDER BY a 490 ---- 491 1 4 492 2 3 493 3 9 494 4 13 495 5 2 496 6 7 497 7 9 498 8 1 499 9 -2 500 10 100 501 502 # The ordering is on some of the GROUP BY columns, and isn't preserved after 503 # the aggregation. 504 query RII rowsort 505 SELECT c, min(b), a FROM sorted_data GROUP BY a, c 506 ---- 507 8.9 9 7 508 1.99 13 4 509 1.22 1 8 510 3.4 3 2 511 2.2 9 3 512 -3.1 100 10 513 23 -2 9 514 5 4 1 515 5.7 2 5 516 6.2 7 6 517 518 # The ordering is on some of the GROUP BY columns, and is preserved after 519 # the aggregation. 520 query RII 521 SELECT c, min(b), a FROM sorted_data GROUP BY a, c ORDER BY a 522 ---- 523 5 4 1 524 3.4 3 2 525 2.2 9 3 526 1.99 13 4 527 5.7 2 5 528 6.2 7 6 529 8.9 9 7 530 1.22 1 8 531 23 -2 9 532 -3.1 100 10 533 534 # If the underlying ordering isn't from the primary index, it needs to be hinted 535 # for now. 536 query IR rowsort 537 SELECT b, max(c) FROM sorted_data@foo GROUP BY b 538 ---- 539 -2 23 540 1 1.22 541 2 5.7 542 3 3.4 543 4 5 544 7 6.2 545 9 8.9 546 13 1.99 547 100 -3.1 548 549 # Test that a merge join is used on two aggregate subqueries with orderings on 550 # the GROUP BY columns. Note that an ORDER BY is not necessary on the 551 # subqueries. 552 query IRIR rowsort 553 SELECT * FROM (SELECT a, max(c) FROM sorted_data GROUP BY a) JOIN (SELECT b, min(c) FROM sorted_data@foo GROUP BY b) ON a = b 554 ---- 555 1 5 1 1.22 556 2 3.4 2 5.7 557 3 2.2 3 3.4 558 4 1.99 4 5 559 9 23 9 2.2 560 7 8.9 7 6.2 561 562 # Test that zeroNode is being handled correctly. 563 query R 564 SELECT sum(a) FROM data WHERE FALSE 565 ---- 566 NULL 567 568 # Test that statistics aggregate functions. 569 statement ok 570 CREATE TABLE statistics_agg_test (y INT, x INT) 571 572 statement ok 573 INSERT INTO statistics_agg_test SELECT y, y%10 FROM generate_series(1, 100) AS y 574 575 query R 576 SELECT corr(y, x)::decimal FROM statistics_agg_test 577 ---- 578 0.045228963191363145 579 580 # Regression test for #37211 (incorrect ordering between aggregator stages). 581 statement ok 582 CREATE TABLE uv (u INT PRIMARY KEY, v INT); 583 INSERT INTO uv SELECT x, x*10 FROM generate_series(2, 8) AS g(x); 584 585 query R 586 SELECT sum(v) FROM data INNER LOOKUP JOIN uv ON (a=u) GROUP BY u ORDER BY u 587 ---- 588 20000 589 30000 590 40000 591 50000 592 60000 593 70000 594 80000