github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/logictest/testdata/logic_test/distsql_stats (about) 1 # LogicTest: 5node 5node-metadata 2 3 # Disable automatic stats. 4 statement ok 5 SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false 6 7 statement ok 8 SET CLUSTER SETTING sql.stats.histogram_collection.enabled = false 9 10 statement ok 11 CREATE TABLE data (a INT, b INT, c FLOAT, d DECIMAL, e BOOL, PRIMARY KEY (a, b, c, d), INDEX c_idx (c, d)) 12 13 # Split into ten parts. 14 statement ok 15 ALTER TABLE data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i) 16 17 # Relocate the ten parts to the five nodes. 18 statement ok 19 ALTER TABLE data EXPERIMENTAL_RELOCATE 20 SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i) 21 22 # Generate all combinations of values 1 to 4. 23 statement ok 24 INSERT INTO data SELECT a, b, c::FLOAT, d::DECIMAL, (a+b+c+d) % 2 = 0 FROM 25 generate_series(1, 4) AS a(a), 26 generate_series(1, 4) AS b(b), 27 generate_series(1, 4) AS c(c), 28 generate_series(1, 4) AS d(d) 29 30 # Verify data placement. 31 query TTTI colnames,rowsort 32 SELECT start_key, end_key, replicas, lease_holder FROM [SHOW RANGES FROM TABLE data] 33 ---- 34 start_key end_key replicas lease_holder 35 NULL /1 {1} 1 36 /1 /2 {2} 2 37 /2 /3 {3} 3 38 /3 /4 {4} 4 39 /4 /5 {5} 5 40 /5 /6 {1} 1 41 /6 /7 {2} 2 42 /7 /8 {3} 3 43 /8 /9 {4} 4 44 /9 NULL {5} 5 45 46 statement ok 47 CREATE STATISTICS s1 ON a FROM data 48 49 query TTIIII colnames 50 SELECT statistics_name, column_names, row_count, distinct_count, null_count, histogram_id 51 FROM [SHOW STATISTICS FOR TABLE data] 52 ---- 53 statistics_name column_names row_count distinct_count null_count histogram_id 54 s1 {a} 256 4 0 NULL 55 56 statement ok 57 SET CLUSTER SETTING sql.stats.histogram_collection.enabled = true 58 59 statement ok 60 CREATE STATISTICS s1 ON a FROM data 61 62 query TTIIIB colnames 63 SELECT 64 statistics_name, 65 column_names, 66 row_count, 67 distinct_count, 68 null_count, 69 histogram_id IS NOT NULL AS has_histogram 70 FROM 71 [SHOW STATISTICS FOR TABLE data]; 72 ---- 73 statistics_name column_names row_count distinct_count null_count has_histogram 74 s1 {a} 256 4 0 true 75 76 let $hist_id_1 77 SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE data] WHERE statistics_name = 's1' 78 79 query TIRI colnames 80 SHOW HISTOGRAM $hist_id_1 81 ---- 82 upper_bound range_rows distinct_range_rows equal_rows 83 1 0 0 64 84 2 0 0 64 85 3 0 0 64 86 4 0 0 64 87 88 statement ok 89 CREATE STATISTICS "" ON b FROM data 90 91 query TTIIIB colnames 92 SELECT 93 statistics_name, 94 column_names, 95 row_count, 96 distinct_count, 97 null_count, 98 histogram_id IS NOT NULL AS has_histogram 99 FROM 100 [SHOW STATISTICS FOR TABLE data]; 101 ---- 102 statistics_name column_names row_count distinct_count null_count has_histogram 103 s1 {a} 256 4 0 true 104 NULL {b} 256 4 0 true 105 106 # Verify that we can package statistics into a json object and later restore them. 107 let $json_stats 108 SHOW STATISTICS USING JSON FOR TABLE data 109 110 statement ok 111 DELETE FROM system.table_statistics 112 113 statement ok 114 ALTER TABLE data INJECT STATISTICS '$json_stats' 115 116 query TTIIIB colnames 117 SELECT 118 statistics_name, 119 column_names, 120 row_count, 121 distinct_count, 122 null_count, 123 histogram_id IS NOT NULL AS has_histogram 124 FROM 125 [SHOW STATISTICS FOR TABLE data]; 126 ---- 127 statistics_name column_names row_count distinct_count null_count has_histogram 128 s1 {a} 256 4 0 true 129 NULL {b} 256 4 0 true 130 131 # Verify that any other statistics are blown away when we INJECT. 132 statement ok 133 CREATE STATISTICS s3 ON c FROM data 134 135 query TTIII colnames 136 SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data] 137 ---- 138 statistics_name column_names row_count distinct_count null_count 139 s1 {a} 256 4 0 140 NULL {b} 256 4 0 141 s3 {c} 256 4 0 142 143 statement ok 144 ALTER TABLE data INJECT STATISTICS '$json_stats' 145 146 query TTIII colnames 147 SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data] 148 ---- 149 statistics_name column_names row_count distinct_count null_count 150 s1 {a} 256 4 0 151 NULL {b} 256 4 0 152 153 # Test AS OF SYSTEM TIME 154 155 statement error pgcode 42P01 relation "data" does not exist 156 CREATE STATISTICS s2 ON a FROM data AS OF SYSTEM TIME '2017' 157 158 statement ok 159 CREATE STATISTICS s2 ON a FROM data AS OF SYSTEM TIME '-1us' 160 161 query TTIII colnames 162 SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data] 163 ---- 164 statistics_name column_names row_count distinct_count null_count 165 NULL {b} 256 4 0 166 s2 {a} 256 4 0 167 168 # 169 # Test default column statistics 170 # 171 172 # Disable multi-column stats to start. 173 statement ok 174 SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = false 175 176 statement ok 177 CREATE STATISTICS s3 FROM data 178 179 # With default column statistics, only index columns (plus boolean columns) 180 # have a histogram_id (specifically the first column in each index). 181 query TIIIB colnames 182 SELECT column_names, row_count, distinct_count, null_count, histogram_id IS NOT NULL AS has_histogram 183 FROM [SHOW STATISTICS FOR TABLE data] 184 WHERE statistics_name = 's3' 185 ---- 186 column_names row_count distinct_count null_count has_histogram 187 {a} 256 4 0 true 188 {c} 256 4 0 true 189 {b} 256 4 0 false 190 {d} 256 4 0 false 191 {e} 256 2 0 true 192 193 194 # Re-enable multi-column stats. 195 statement ok 196 SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true 197 198 # Add indexes, including duplicate index on column c and columns (a, b). 199 statement ok 200 CREATE INDEX ON data (c DESC, b ASC); CREATE INDEX ON data (b DESC, a); 201 202 statement ok 203 CREATE STATISTICS s4 FROM data 204 205 # Check that stats are only collected once per column. 206 query TIII colnames 207 SELECT column_names, row_count, distinct_count, null_count 208 FROM [SHOW STATISTICS FOR TABLE data] 209 WHERE statistics_name = 's4' 210 ---- 211 column_names row_count distinct_count null_count 212 {a} 256 4 0 213 {a,b} 256 16 0 214 {a,b,c} 256 64 0 215 {a,b,c,d} 256 256 0 216 {c} 256 4 0 217 {c,d} 256 16 0 218 {c,b} 256 16 0 219 {b} 256 4 0 220 {d} 256 4 0 221 {e} 256 2 0 222 223 statement ok 224 DROP INDEX data@c_idx; DROP INDEX data@data_c_b_idx 225 226 statement ok 227 CREATE STATISTICS s5 FROM [53] 228 229 # We should still get stats for column c, but now column c is added later as a 230 # non-index column, resulting in a different ordering of the rows. 231 query TIII colnames 232 SELECT column_names, row_count, distinct_count, null_count 233 FROM [SHOW STATISTICS FOR TABLE data] 234 WHERE statistics_name = 's5' 235 ---- 236 column_names row_count distinct_count null_count 237 {a} 256 4 0 238 {a,b} 256 16 0 239 {a,b,c} 256 64 0 240 {a,b,c,d} 256 256 0 241 {b} 256 4 0 242 {c} 256 4 0 243 {d} 256 4 0 244 {e} 256 2 0 245 246 # Table with a hidden primary key and no other indexes. 247 statement ok 248 CREATE TABLE simple (x INT, y INT) 249 250 statement ok 251 CREATE STATISTICS default_stat1 FROM simple 252 253 query TTIII colnames 254 SELECT statistics_name, column_names, row_count, distinct_count, null_count 255 FROM [SHOW STATISTICS FOR TABLE simple] 256 ---- 257 statistics_name column_names row_count distinct_count null_count 258 default_stat1 {rowid} 0 0 0 259 default_stat1 {x} 0 0 0 260 default_stat1 {y} 0 0 0 261 262 # Add one null row. 263 statement ok 264 INSERT INTO simple VALUES (DEFAULT, DEFAULT) 265 266 # Add an index. 267 statement ok 268 CREATE UNIQUE INDEX ON simple (y) STORING (x) 269 270 statement ok 271 CREATE STATISTICS default_stat2 FROM simple 272 273 # Now stats are collected on the index column y before column x. 274 query TTIII colnames 275 SELECT statistics_name, column_names, row_count, distinct_count, null_count 276 FROM [SHOW STATISTICS FOR TABLE simple] 277 ---- 278 statistics_name column_names row_count distinct_count null_count 279 default_stat2 {rowid} 1 1 0 280 default_stat2 {y} 1 1 1 281 default_stat2 {x} 1 1 1 282 283 # Add a few more rows. 284 statement ok 285 INSERT INTO simple VALUES (DEFAULT, DEFAULT); 286 INSERT INTO simple VALUES (0, DEFAULT); 287 INSERT INTO simple VALUES (DEFAULT, 0); 288 INSERT INTO simple VALUES (0, 1); 289 290 # Add an index. 291 statement ok 292 CREATE INDEX ON simple (x, y) 293 294 statement ok 295 CREATE STATISTICS default_stat3 FROM simple 296 297 query TTIII colnames 298 SELECT statistics_name, column_names, row_count, distinct_count, null_count 299 FROM [SHOW STATISTICS FOR TABLE simple] 300 ---- 301 statistics_name column_names row_count distinct_count null_count 302 default_stat3 {rowid} 5 5 0 303 default_stat3 {y} 5 3 3 304 default_stat3 {x} 5 2 3 305 default_stat3 {x,y} 5 4 2 306 307 let $hist_id_3 308 SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE simple] 309 WHERE statistics_name = 'default_stat3' AND column_names = '{y}' 310 311 # The counts in each bucket should not include null values. 312 query TIRI colnames 313 SHOW HISTOGRAM $hist_id_3 314 ---- 315 upper_bound range_rows distinct_range_rows equal_rows 316 0 0 0 1 317 1 0 0 1 318 319 # 320 # Test numeric references 321 # 322 323 statement ok 324 CREATE STATISTICS s6 ON a FROM [53] 325 326 query TTIII colnames 327 SELECT statistics_name, column_names, row_count, distinct_count, null_count 328 FROM [SHOW STATISTICS FOR TABLE data] 329 ---- 330 statistics_name column_names row_count distinct_count null_count 331 s4 {c,d} 256 16 0 332 s4 {c,b} 256 16 0 333 s5 {a,b} 256 16 0 334 s5 {a,b,c} 256 64 0 335 s5 {a,b,c,d} 256 256 0 336 s5 {b} 256 4 0 337 s5 {c} 256 4 0 338 s5 {d} 256 4 0 339 s5 {e} 256 2 0 340 s6 {a} 256 4 0 341 342 # Combine default columns and numeric reference. 343 statement ok 344 CREATE STATISTICS __auto__ FROM [53] 345 346 query TIII colnames 347 SELECT column_names, row_count, distinct_count, null_count 348 FROM [SHOW STATISTICS FOR TABLE data] 349 WHERE statistics_name = '__auto__' 350 ---- 351 column_names row_count distinct_count null_count 352 {a} 256 4 0 353 {a,b} 256 16 0 354 {a,b,c} 256 64 0 355 {a,b,c,d} 256 256 0 356 {b} 256 4 0 357 {c} 256 4 0 358 {d} 256 4 0 359 {e} 256 2 0 360 361 # 362 # Test delete stats 363 # 364 365 statement ok 366 DROP INDEX data@data_b_a_idx 367 368 statement ok 369 CREATE STATISTICS __auto__ FROM [53]; 370 CREATE STATISTICS __auto__ FROM [53]; 371 CREATE STATISTICS __auto__ FROM [53]; 372 CREATE STATISTICS __auto__ FROM [53]; 373 CREATE STATISTICS __auto__ FROM [53]; 374 CREATE STATISTICS __auto__ FROM [53]; 375 376 # Only the last 4-5 automatic stats should remain for each column. 377 query TT colnames 378 SELECT statistics_name, column_names 379 FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING 380 ---- 381 statistics_name column_names 382 __auto__ {a,b,c,d} 383 __auto__ {a,b,c,d} 384 __auto__ {a,b,c,d} 385 __auto__ {a,b,c,d} 386 __auto__ {a,b,c,d} 387 __auto__ {a,b,c} 388 __auto__ {a,b,c} 389 __auto__ {a,b,c} 390 __auto__ {a,b,c} 391 __auto__ {a,b,c} 392 __auto__ {a,b} 393 __auto__ {a,b} 394 __auto__ {a,b} 395 __auto__ {a,b} 396 __auto__ {a,b} 397 __auto__ {a} 398 __auto__ {a} 399 __auto__ {a} 400 __auto__ {a} 401 __auto__ {a} 402 __auto__ {b} 403 __auto__ {b} 404 __auto__ {b} 405 __auto__ {b} 406 __auto__ {b} 407 __auto__ {c} 408 __auto__ {c} 409 __auto__ {c} 410 __auto__ {c} 411 __auto__ {c} 412 __auto__ {d} 413 __auto__ {d} 414 __auto__ {d} 415 __auto__ {d} 416 __auto__ {d} 417 __auto__ {e} 418 __auto__ {e} 419 __auto__ {e} 420 __auto__ {e} 421 __auto__ {e} 422 s4 {c,b} 423 s4 {c,d} 424 425 statement ok 426 CREATE STATISTICS s7 ON a FROM [53] 427 428 query TT colnames 429 SELECT statistics_name, column_names 430 FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING 431 ---- 432 statistics_name column_names 433 __auto__ {a,b,c,d} 434 __auto__ {a,b,c,d} 435 __auto__ {a,b,c,d} 436 __auto__ {a,b,c,d} 437 __auto__ {a,b,c,d} 438 __auto__ {a,b,c} 439 __auto__ {a,b,c} 440 __auto__ {a,b,c} 441 __auto__ {a,b,c} 442 __auto__ {a,b,c} 443 __auto__ {a,b} 444 __auto__ {a,b} 445 __auto__ {a,b} 446 __auto__ {a,b} 447 __auto__ {a,b} 448 __auto__ {a} 449 __auto__ {a} 450 __auto__ {a} 451 __auto__ {a} 452 __auto__ {b} 453 __auto__ {b} 454 __auto__ {b} 455 __auto__ {b} 456 __auto__ {b} 457 __auto__ {c} 458 __auto__ {c} 459 __auto__ {c} 460 __auto__ {c} 461 __auto__ {c} 462 __auto__ {d} 463 __auto__ {d} 464 __auto__ {d} 465 __auto__ {d} 466 __auto__ {d} 467 __auto__ {e} 468 __auto__ {e} 469 __auto__ {e} 470 __auto__ {e} 471 __auto__ {e} 472 s4 {c,b} 473 s4 {c,d} 474 s7 {a} 475 476 statement ok 477 CREATE STATISTICS s8 ON a FROM [53] 478 479 # s7 is deleted but the automatic stats remain. 480 query TT colnames 481 SELECT statistics_name, column_names 482 FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING 483 ---- 484 statistics_name column_names 485 __auto__ {a,b,c,d} 486 __auto__ {a,b,c,d} 487 __auto__ {a,b,c,d} 488 __auto__ {a,b,c,d} 489 __auto__ {a,b,c,d} 490 __auto__ {a,b,c} 491 __auto__ {a,b,c} 492 __auto__ {a,b,c} 493 __auto__ {a,b,c} 494 __auto__ {a,b,c} 495 __auto__ {a,b} 496 __auto__ {a,b} 497 __auto__ {a,b} 498 __auto__ {a,b} 499 __auto__ {a,b} 500 __auto__ {a} 501 __auto__ {a} 502 __auto__ {a} 503 __auto__ {a} 504 __auto__ {b} 505 __auto__ {b} 506 __auto__ {b} 507 __auto__ {b} 508 __auto__ {b} 509 __auto__ {c} 510 __auto__ {c} 511 __auto__ {c} 512 __auto__ {c} 513 __auto__ {c} 514 __auto__ {d} 515 __auto__ {d} 516 __auto__ {d} 517 __auto__ {d} 518 __auto__ {d} 519 __auto__ {e} 520 __auto__ {e} 521 __auto__ {e} 522 __auto__ {e} 523 __auto__ {e} 524 s4 {c,b} 525 s4 {c,d} 526 s8 {a} 527 528 # Regression test for #33195. 529 statement ok 530 CREATE TABLE t (x int); INSERT INTO t VALUES (1); ALTER TABLE t DROP COLUMN x 531 532 # Ensure that creating stats on a table with no columns does not cause a panic. 533 statement ok 534 CREATE STATISTICS s FROM t 535 536 # Regression test for #35150. 537 statement ok 538 CREATE TABLE groups (data JSON); INSERT INTO groups VALUES ('{"data": {"domain": "github.com"}}') 539 540 # Ensure that trying to create statistics on a JSON column gives an appropriate error. 541 statement error CREATE STATISTICS is not supported for JSON columns 542 CREATE STATISTICS s ON data FROM groups 543 544 # The json column is not included in the default columns. 545 statement ok 546 CREATE STATISTICS s FROM groups 547 548 query TT colnames 549 SELECT statistics_name, column_names 550 FROM [SHOW STATISTICS FOR TABLE groups] ORDER BY statistics_name, column_names::STRING 551 ---- 552 statistics_name column_names 553 s {rowid} 554 555 # Regression test for #35764. 556 statement ok 557 CREATE TABLE users ( 558 profile_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), 559 last_updated TIMESTAMP DEFAULT now(), 560 user_profile JSONB, 561 INVERTED INDEX user_details (user_profile) 562 ) 563 564 statement ok 565 INSERT INTO users (user_profile) VALUES 566 ('{"first_name": "Lola", "last_name": "Dog", "location": "NYC", "online" : true, "friends" : 547}'), 567 ('{"first_name": "Ernie", "status": "Looking for treats", "location" : "Brooklyn"}'), 568 ('{"first_name": "Carl", "last_name": "Kimball", "location": "NYC", "breed": "Boston Terrier"}' 569 ) 570 571 # Ensure that trying to create statistics with default columns does not fail 572 # when there is an inverted index. 573 statement ok 574 CREATE STATISTICS s FROM users 575 576 query TTI colnames 577 SELECT statistics_name, column_names, row_count 578 FROM [SHOW STATISTICS FOR TABLE users] ORDER BY statistics_name, column_names::STRING 579 ---- 580 statistics_name column_names row_count 581 s {last_updated} 3 582 s {profile_id} 3 583 584 # Arrays are supported. 585 statement ok 586 CREATE TABLE arr (x INT[]) 587 588 statement ok 589 INSERT INTO arr VALUES (ARRAY[1,2]), (ARRAY[1,2]), (ARRAY[3,4]), (NULL) 590 591 statement ok 592 CREATE STATISTICS arr_stats FROM arr 593 594 query TTIII colnames 595 SELECT statistics_name, column_names, row_count, distinct_count, null_count 596 FROM [SHOW STATISTICS FOR TABLE arr] ORDER BY statistics_name, column_names::STRING 597 ---- 598 statistics_name column_names row_count distinct_count null_count 599 arr_stats {rowid} 4 4 0 600 arr_stats {x} 4 3 1 601 602 # Regression test for #46964. Do not try to create a histogram on the array column. 603 statement ok 604 CREATE STATISTICS arr_stats_x ON x FROM arr 605 606 query TTIIIB colnames 607 SELECT 608 statistics_name, 609 column_names, 610 row_count, 611 distinct_count, 612 null_count, 613 histogram_id IS NOT NULL AS has_histogram 614 FROM [SHOW STATISTICS FOR TABLE arr] 615 ORDER BY statistics_name, column_names::STRING 616 ---- 617 statistics_name column_names row_count distinct_count null_count has_histogram 618 arr_stats {rowid} 4 4 0 true 619 arr_stats_x {x} 4 3 1 false 620 621 # Test that enum columns always have histograms collected for them. 622 statement ok 623 SET experimental_enable_enums=true; 624 CREATE TYPE e AS ENUM ('hello', 'howdy', 'hi'); 625 CREATE TABLE et (x e, y e, PRIMARY KEY (x)); 626 INSERT INTO et VALUES ('hello', 'hello'), ('howdy', 'howdy'), ('hi', 'hi'); 627 CREATE STATISTICS s FROM et 628 629 query TTIIB colnames,rowsort 630 SELECT 631 statistics_name, 632 column_names, 633 row_count, 634 null_count, 635 histogram_id IS NOT NULL AS has_histogram 636 FROM 637 [SHOW STATISTICS FOR TABLE et] 638 ORDER BY 639 column_names::STRING, created 640 ---- 641 statistics_name column_names row_count null_count has_histogram 642 s {x} 3 0 true 643 s {y} 3 0 true