github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/groupby (about) 1 exec-ddl 2 CREATE TABLE a (x INT PRIMARY KEY, y INT, z FLOAT NOT NULL, s STRING, UNIQUE (s DESC, z)) 3 ---- 4 5 exec-ddl 6 ALTER TABLE a INJECT STATISTICS '[ 7 { 8 "columns": ["x"], 9 "created_at": "2018-01-01 1:00:00.00000+00:00", 10 "row_count": 2000, 11 "distinct_count": 2000 12 }, 13 { 14 "columns": ["y"], 15 "created_at": "2018-01-01 1:30:00.00000+00:00", 16 "row_count": 2000, 17 "distinct_count": 400 18 }, 19 { 20 "columns": ["s"], 21 "created_at": "2018-01-01 1:30:00.00000+00:00", 22 "row_count": 2000, 23 "distinct_count": 10 24 }, 25 { 26 "columns": ["s","y","z"], 27 "created_at": "2018-01-01 1:40:00.00000+00:00", 28 "row_count": 2000, 29 "distinct_count": 600 30 } 31 ]' 32 ---- 33 34 # No aggregate. 35 build 36 SELECT x FROM a GROUP BY x, y 37 ---- 38 project 39 ├── columns: x:1(int!null) 40 ├── stats: [rows=2000] 41 ├── key: (1) 42 └── group-by 43 ├── columns: x:1(int!null) y:2(int) 44 ├── grouping columns: x:1(int!null) y:2(int) 45 ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 46 ├── key: (1) 47 ├── fd: (1)-->(2) 48 └── project 49 ├── columns: x:1(int!null) y:2(int) 50 ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 51 ├── key: (1) 52 ├── fd: (1)-->(2) 53 └── scan a 54 ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 55 ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 56 ├── key: (1) 57 └── fd: (1)-->(2-4), (3,4)~~>(1,2) 58 59 # Group by single column key. 60 build 61 SELECT max(y) FROM a GROUP BY x 62 ---- 63 project 64 ├── columns: max:5(int) 65 ├── stats: [rows=2000] 66 └── group-by 67 ├── columns: x:1(int!null) max:5(int) 68 ├── grouping columns: x:1(int!null) 69 ├── stats: [rows=2000, distinct(1)=2000, null(1)=0] 70 ├── key: (1) 71 ├── fd: (1)-->(5) 72 ├── project 73 │ ├── columns: x:1(int!null) y:2(int) 74 │ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0] 75 │ ├── key: (1) 76 │ ├── fd: (1)-->(2) 77 │ └── scan a 78 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 79 │ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0] 80 │ ├── key: (1) 81 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 82 └── aggregations 83 └── max [as=max:5, type=int, outer=(2)] 84 └── y:2 [type=int] 85 86 # Group by non-key. 87 build 88 SELECT y, sum(z) FROM a GROUP BY y 89 ---- 90 group-by 91 ├── columns: y:2(int) sum:5(float!null) 92 ├── grouping columns: y:2(int) 93 ├── stats: [rows=400, distinct(2)=400, null(2)=0] 94 ├── key: (2) 95 ├── fd: (2)-->(5) 96 ├── project 97 │ ├── columns: y:2(int) z:3(float!null) 98 │ ├── stats: [rows=2000, distinct(2)=400, null(2)=0] 99 │ └── scan a 100 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 101 │ ├── stats: [rows=2000, distinct(2)=400, null(2)=0] 102 │ ├── key: (1) 103 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 104 └── aggregations 105 └── sum [as=sum:5, type=float, outer=(3)] 106 └── z:3 [type=float] 107 108 build 109 SELECT max(x) FROM a GROUP BY y, z, s 110 ---- 111 project 112 ├── columns: max:5(int!null) 113 ├── stats: [rows=600] 114 └── group-by 115 ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int!null) 116 ├── grouping columns: y:2(int) z:3(float!null) s:4(string) 117 ├── stats: [rows=600, distinct(2-4)=600, null(2-4)=0] 118 ├── key: (2-4) 119 ├── fd: (3,4)~~>(2), (2-4)-->(5) 120 ├── scan a 121 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 122 │ ├── stats: [rows=2000, distinct(2-4)=600, null(2-4)=0] 123 │ ├── key: (1) 124 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 125 └── aggregations 126 └── max [as=max:5, type=int, outer=(1)] 127 └── x:1 [type=int] 128 129 build 130 SELECT min(x) FROM a GROUP BY y, z 131 ---- 132 project 133 ├── columns: min:5(int!null) 134 ├── stats: [rows=2000] 135 └── group-by 136 ├── columns: y:2(int) z:3(float!null) min:5(int!null) 137 ├── grouping columns: y:2(int) z:3(float!null) 138 ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0] 139 ├── key: (2,3) 140 ├── fd: (2,3)-->(5) 141 ├── project 142 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) 143 │ ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0] 144 │ ├── key: (1) 145 │ ├── fd: (1)-->(2,3) 146 │ └── scan a 147 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 148 │ ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0] 149 │ ├── key: (1) 150 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 151 └── aggregations 152 └── min [as=min:5, type=int, outer=(1)] 153 └── x:1 [type=int] 154 155 build 156 SELECT max(x) FROM a GROUP BY y, z, s HAVING s IN ('a', 'b') 157 ---- 158 project 159 ├── columns: max:5(int!null) 160 ├── stats: [rows=120] 161 └── select 162 ├── columns: y:2(int) z:3(float!null) s:4(string!null) max:5(int!null) 163 ├── stats: [rows=120, distinct(4)=2, null(4)=0] 164 ├── key: (3,4) 165 ├── fd: (3,4)-->(2), (2-4)-->(5) 166 ├── group-by 167 │ ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int!null) 168 │ ├── grouping columns: y:2(int) z:3(float!null) s:4(string) 169 │ ├── stats: [rows=600, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=0, distinct(5)=600, null(5)=0, distinct(2-4)=600, null(2-4)=0] 170 │ ├── key: (2-4) 171 │ ├── fd: (3,4)~~>(2), (2-4)-->(5) 172 │ ├── scan a 173 │ │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 174 │ │ ├── stats: [rows=2000, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=0, distinct(2-4)=600, null(2-4)=0] 175 │ │ ├── key: (1) 176 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 177 │ └── aggregations 178 │ └── max [as=max:5, type=int, outer=(1)] 179 │ └── x:1 [type=int] 180 └── filters 181 └── s:4 IN ('a', 'b') [type=bool, outer=(4), constraints=(/4: [/'a' - /'a'] [/'b' - /'b']; tight)] 182 183 # Estimate the distinct count for an aggregate column. 184 build 185 SELECT sum(x), s FROM a GROUP BY s HAVING sum(x) = 5 186 ---- 187 select 188 ├── columns: sum:5(decimal!null) s:4(string) 189 ├── stats: [rows=1, distinct(5)=1, null(5)=0] 190 ├── key: (4) 191 ├── fd: ()-->(5) 192 ├── group-by 193 │ ├── columns: s:4(string) sum:5(decimal!null) 194 │ ├── grouping columns: s:4(string) 195 │ ├── stats: [rows=10, distinct(4)=10, null(4)=0, distinct(5)=10, null(5)=0] 196 │ ├── key: (4) 197 │ ├── fd: (4)-->(5) 198 │ ├── project 199 │ │ ├── columns: x:1(int!null) s:4(string) 200 │ │ ├── stats: [rows=2000, distinct(4)=10, null(4)=0] 201 │ │ ├── key: (1) 202 │ │ ├── fd: (1)-->(4) 203 │ │ └── scan a 204 │ │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 205 │ │ ├── stats: [rows=2000, distinct(4)=10, null(4)=0] 206 │ │ ├── key: (1) 207 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 208 │ └── aggregations 209 │ └── sum [as=sum:5, type=decimal, outer=(1)] 210 │ └── x:1 [type=int] 211 └── filters 212 └── sum:5 = 5 [type=bool, outer=(5), constraints=(/5: [/5 - /5]; tight), fd=()-->(5)] 213 214 # Scalar GroupBy. 215 build 216 SELECT max(y), sum(z) FROM a HAVING sum(z) = 5.0 217 ---- 218 select 219 ├── columns: max:5(int) sum:6(float!null) 220 ├── cardinality: [0 - 1] 221 ├── stats: [rows=1, distinct(6)=1, null(6)=0] 222 ├── key: () 223 ├── fd: ()-->(5,6) 224 ├── scalar-group-by 225 │ ├── columns: max:5(int) sum:6(float) 226 │ ├── cardinality: [1 - 1] 227 │ ├── stats: [rows=1, distinct(6)=1, null(6)=0] 228 │ ├── key: () 229 │ ├── fd: ()-->(5,6) 230 │ ├── project 231 │ │ ├── columns: y:2(int) z:3(float!null) 232 │ │ ├── stats: [rows=2000] 233 │ │ └── scan a 234 │ │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 235 │ │ ├── stats: [rows=2000] 236 │ │ ├── key: (1) 237 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 238 │ └── aggregations 239 │ ├── max [as=max:5, type=int, outer=(2)] 240 │ │ └── y:2 [type=int] 241 │ └── sum [as=sum:6, type=float, outer=(3)] 242 │ └── z:3 [type=float] 243 └── filters 244 └── sum:6 = 5.0 [type=bool, outer=(6), constraints=(/6: [/5.0 - /5.0]; tight), fd=()-->(6)] 245 246 # Bump up null counts. 247 exec-ddl 248 ALTER TABLE a INJECT STATISTICS '[ 249 { 250 "columns": ["x"], 251 "created_at": "2018-01-01 2:00:00.00000+00:00", 252 "row_count": 2000, 253 "distinct_count": 2000 254 }, 255 { 256 "columns": ["y"], 257 "created_at": "2018-01-01 2:00:00.00000+00:00", 258 "row_count": 2000, 259 "distinct_count": 400, 260 "null_count": 1000 261 }, 262 { 263 "columns": ["s"], 264 "created_at": "2018-01-01 2:00:00.00000+00:00", 265 "row_count": 2000, 266 "distinct_count": 10, 267 "null_count": 1000 268 }, 269 { 270 "columns": ["s","y","z"], 271 "created_at": "2018-01-01 2:10:00.00000+00:00", 272 "row_count": 2000, 273 "distinct_count": 600, 274 "null_count": 1100 275 } 276 ]' 277 ---- 278 279 # No aggregate. 280 build 281 SELECT x FROM a GROUP BY x, y 282 ---- 283 project 284 ├── columns: x:1(int!null) 285 ├── stats: [rows=2000] 286 ├── key: (1) 287 └── group-by 288 ├── columns: x:1(int!null) y:2(int) 289 ├── grouping columns: x:1(int!null) y:2(int) 290 ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 291 ├── key: (1) 292 ├── fd: (1)-->(2) 293 └── project 294 ├── columns: x:1(int!null) y:2(int) 295 ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 296 ├── key: (1) 297 ├── fd: (1)-->(2) 298 └── scan a 299 ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 300 ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 301 ├── key: (1) 302 └── fd: (1)-->(2-4), (3,4)~~>(1,2) 303 304 # Group by single column key. 305 build 306 SELECT max(y) FROM a GROUP BY x 307 ---- 308 project 309 ├── columns: max:5(int) 310 ├── stats: [rows=2000] 311 └── group-by 312 ├── columns: x:1(int!null) max:5(int) 313 ├── grouping columns: x:1(int!null) 314 ├── stats: [rows=2000, distinct(1)=2000, null(1)=0] 315 ├── key: (1) 316 ├── fd: (1)-->(5) 317 ├── project 318 │ ├── columns: x:1(int!null) y:2(int) 319 │ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0] 320 │ ├── key: (1) 321 │ ├── fd: (1)-->(2) 322 │ └── scan a 323 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 324 │ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0] 325 │ ├── key: (1) 326 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 327 └── aggregations 328 └── max [as=max:5, type=int, outer=(2)] 329 └── y:2 [type=int] 330 331 # Group by non-key. 332 build 333 SELECT y, sum(z) FROM a GROUP BY y 334 ---- 335 group-by 336 ├── columns: y:2(int) sum:5(float!null) 337 ├── grouping columns: y:2(int) 338 ├── stats: [rows=400, distinct(2)=400, null(2)=1] 339 ├── key: (2) 340 ├── fd: (2)-->(5) 341 ├── project 342 │ ├── columns: y:2(int) z:3(float!null) 343 │ ├── stats: [rows=2000, distinct(2)=400, null(2)=1000] 344 │ └── scan a 345 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 346 │ ├── stats: [rows=2000, distinct(2)=400, null(2)=1000] 347 │ ├── key: (1) 348 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 349 └── aggregations 350 └── sum [as=sum:5, type=float, outer=(3)] 351 └── z:3 [type=float] 352 353 build 354 SELECT max(x) FROM a GROUP BY y, z, s 355 ---- 356 project 357 ├── columns: max:5(int!null) 358 ├── stats: [rows=600] 359 └── group-by 360 ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int!null) 361 ├── grouping columns: y:2(int) z:3(float!null) s:4(string) 362 ├── stats: [rows=600, distinct(2-4)=600, null(2-4)=0] 363 ├── key: (2-4) 364 ├── fd: (3,4)~~>(2), (2-4)-->(5) 365 ├── scan a 366 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 367 │ ├── stats: [rows=2000, distinct(2-4)=600, null(2-4)=0] 368 │ ├── key: (1) 369 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 370 └── aggregations 371 └── max [as=max:5, type=int, outer=(1)] 372 └── x:1 [type=int] 373 374 build 375 SELECT min(x) FROM a GROUP BY y, z 376 ---- 377 project 378 ├── columns: min:5(int!null) 379 ├── stats: [rows=2000] 380 └── group-by 381 ├── columns: y:2(int) z:3(float!null) min:5(int!null) 382 ├── grouping columns: y:2(int) z:3(float!null) 383 ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0] 384 ├── key: (2,3) 385 ├── fd: (2,3)-->(5) 386 ├── project 387 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) 388 │ ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0] 389 │ ├── key: (1) 390 │ ├── fd: (1)-->(2,3) 391 │ └── scan a 392 │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 393 │ ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=0] 394 │ ├── key: (1) 395 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 396 └── aggregations 397 └── min [as=min:5, type=int, outer=(1)] 398 └── x:1 [type=int] 399 400 build 401 SELECT max(x) FROM a GROUP BY y, z, s HAVING s IN ('a', 'b') 402 ---- 403 project 404 ├── columns: max:5(int!null) 405 ├── stats: [rows=133.111111] 406 └── select 407 ├── columns: y:2(int) z:3(float!null) s:4(string!null) max:5(int!null) 408 ├── stats: [rows=133.111111, distinct(4)=2, null(4)=0] 409 ├── key: (3,4) 410 ├── fd: (3,4)-->(2), (2-4)-->(5) 411 ├── group-by 412 │ ├── columns: y:2(int) z:3(float!null) s:4(string) max:5(int!null) 413 │ ├── grouping columns: y:2(int) z:3(float!null) s:4(string) 414 │ ├── stats: [rows=600, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=1, distinct(5)=600, null(5)=0, distinct(2-4)=600, null(2-4)=0] 415 │ ├── key: (2-4) 416 │ ├── fd: (3,4)~~>(2), (2-4)-->(5) 417 │ ├── scan a 418 │ │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 419 │ │ ├── stats: [rows=2000, distinct(3)=200, null(3)=0, distinct(4)=10, null(4)=1000, distinct(2-4)=600, null(2-4)=0] 420 │ │ ├── key: (1) 421 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 422 │ └── aggregations 423 │ └── max [as=max:5, type=int, outer=(1)] 424 │ └── x:1 [type=int] 425 └── filters 426 └── s:4 IN ('a', 'b') [type=bool, outer=(4), constraints=(/4: [/'a' - /'a'] [/'b' - /'b']; tight)] 427 428 # Estimate the null count for an aggregate column. 429 build 430 SELECT sum(x), s FROM a GROUP BY s HAVING sum(x) = 5 431 ---- 432 select 433 ├── columns: sum:5(decimal!null) s:4(string) 434 ├── stats: [rows=1, distinct(5)=1, null(5)=0] 435 ├── key: (4) 436 ├── fd: ()-->(5) 437 ├── group-by 438 │ ├── columns: s:4(string) sum:5(decimal!null) 439 │ ├── grouping columns: s:4(string) 440 │ ├── stats: [rows=10, distinct(4)=10, null(4)=1, distinct(5)=10, null(5)=0] 441 │ ├── key: (4) 442 │ ├── fd: (4)-->(5) 443 │ ├── project 444 │ │ ├── columns: x:1(int!null) s:4(string) 445 │ │ ├── stats: [rows=2000, distinct(4)=10, null(4)=1000] 446 │ │ ├── key: (1) 447 │ │ ├── fd: (1)-->(4) 448 │ │ └── scan a 449 │ │ ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) 450 │ │ ├── stats: [rows=2000, distinct(4)=10, null(4)=1000] 451 │ │ ├── key: (1) 452 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 453 │ └── aggregations 454 │ └── sum [as=sum:5, type=decimal, outer=(1)] 455 │ └── x:1 [type=int] 456 └── filters 457 └── sum:5 = 5 [type=bool, outer=(5), constraints=(/5: [/5 - /5]; tight), fd=()-->(5)] 458 459 # Regression test for #36442. 460 norm 461 WITH q (a, b) AS (SELECT * FROM (VALUES (true, NULL), (false, NULL), (true, 5))) 462 SELECT 1 463 FROM q 464 WHERE q.a 465 GROUP BY q.b 466 HAVING bool_or(q.a) 467 ---- 468 project 469 ├── columns: "?column?":6(int!null) 470 ├── cardinality: [0 - 3] 471 ├── stats: [rows=1] 472 ├── fd: ()-->(6) 473 ├── select 474 │ ├── columns: b:4(int) bool_or:5(bool!null) 475 │ ├── cardinality: [0 - 3] 476 │ ├── stats: [rows=1, distinct(5)=1, null(5)=0] 477 │ ├── key: (4) 478 │ ├── fd: ()-->(5) 479 │ ├── group-by 480 │ │ ├── columns: b:4(int) bool_or:5(bool!null) 481 │ │ ├── grouping columns: b:4(int) 482 │ │ ├── cardinality: [0 - 3] 483 │ │ ├── stats: [rows=1.29289322, distinct(4)=1.29289322, null(4)=1, distinct(5)=1.29289322, null(5)=0] 484 │ │ ├── key: (4) 485 │ │ ├── fd: (4)-->(5) 486 │ │ ├── select 487 │ │ │ ├── columns: a:3(bool!null) b:4(int) 488 │ │ │ ├── cardinality: [0 - 3] 489 │ │ │ ├── stats: [rows=1.5, distinct(3)=1, null(3)=0, distinct(4)=1.29289322, null(4)=1] 490 │ │ │ ├── fd: ()-->(3) 491 │ │ │ ├── values 492 │ │ │ │ ├── columns: a:3(bool!null) b:4(int) 493 │ │ │ │ ├── cardinality: [3 - 3] 494 │ │ │ │ ├── stats: [rows=3, distinct(3)=2, null(3)=0, distinct(4)=2, null(4)=2] 495 │ │ │ │ ├── (true, NULL) [type=tuple{bool, int}] 496 │ │ │ │ ├── (false, NULL) [type=tuple{bool, int}] 497 │ │ │ │ └── (true, 5) [type=tuple{bool, int}] 498 │ │ │ └── filters 499 │ │ │ └── a:3 [type=bool, outer=(3), constraints=(/3: [/true - /true]; tight), fd=()-->(3)] 500 │ │ └── aggregations 501 │ │ └── bool-or [as=bool_or:5, type=bool, outer=(3)] 502 │ │ └── a:3 [type=bool] 503 │ └── filters 504 │ └── bool_or:5 [type=bool, outer=(5), constraints=(/5: [/true - /true]; tight), fd=()-->(5)] 505 └── projections 506 └── 1 [as="?column?":6, type=int]