github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/logictest/testdata/logic_test/distsql_stats (about)

     1  # LogicTest: 5node 5node-metadata
     2  
     3  # Disable automatic stats.
     4  statement ok
     5  SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false
     6  
     7  statement ok
     8  SET CLUSTER SETTING sql.stats.histogram_collection.enabled = false
     9  
    10  statement ok
    11  CREATE TABLE data (a INT, b INT, c FLOAT, d DECIMAL, e BOOL, PRIMARY KEY (a, b, c, d), INDEX c_idx (c, d))
    12  
    13  # Split into ten parts.
    14  statement ok
    15  ALTER TABLE data SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
    16  
    17  # Relocate the ten parts to the five nodes.
    18  statement ok
    19  ALTER TABLE data EXPERIMENTAL_RELOCATE
    20    SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
    21  
    22  # Generate all combinations of values 1 to 4.
    23  statement ok
    24  INSERT INTO data SELECT a, b, c::FLOAT, d::DECIMAL, (a+b+c+d) % 2 = 0 FROM
    25     generate_series(1, 4) AS a(a),
    26     generate_series(1, 4) AS b(b),
    27     generate_series(1, 4) AS c(c),
    28     generate_series(1, 4) AS d(d)
    29  
    30  # Verify data placement.
    31  query TTTI colnames,rowsort
    32  SELECT start_key, end_key, replicas, lease_holder FROM [SHOW RANGES FROM TABLE data]
    33  ----
    34  start_key  end_key  replicas  lease_holder
    35  NULL       /1       {1}       1
    36  /1         /2       {2}       2
    37  /2         /3       {3}       3
    38  /3         /4       {4}       4
    39  /4         /5       {5}       5
    40  /5         /6       {1}       1
    41  /6         /7       {2}       2
    42  /7         /8       {3}       3
    43  /8         /9       {4}       4
    44  /9         NULL     {5}       5
    45  
    46  statement ok
    47  CREATE STATISTICS s1 ON a FROM data
    48  
    49  query TTIIII colnames
    50  SELECT statistics_name, column_names, row_count, distinct_count, null_count, histogram_id
    51  FROM [SHOW STATISTICS FOR TABLE data]
    52  ----
    53  statistics_name  column_names  row_count  distinct_count  null_count  histogram_id
    54  s1               {a}           256        4               0           NULL
    55  
    56  statement ok
    57  SET CLUSTER SETTING sql.stats.histogram_collection.enabled = true
    58  
    59  statement ok
    60  CREATE STATISTICS s1 ON a FROM data
    61  
    62  query TTIIIB colnames
    63  SELECT
    64  	statistics_name,
    65  	column_names,
    66  	row_count,
    67  	distinct_count,
    68  	null_count,
    69  	histogram_id IS NOT NULL AS has_histogram
    70  FROM
    71  	[SHOW STATISTICS FOR TABLE data];
    72  ----
    73  statistics_name  column_names  row_count  distinct_count  null_count  has_histogram
    74  s1               {a}           256        4               0           true
    75  
    76  let $hist_id_1
    77  SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE data] WHERE statistics_name = 's1'
    78  
    79  query TIRI colnames
    80  SHOW HISTOGRAM $hist_id_1
    81  ----
    82  upper_bound  range_rows  distinct_range_rows  equal_rows
    83  1            0           0                    64
    84  2            0           0                    64
    85  3            0           0                    64
    86  4            0           0                    64
    87  
    88  statement ok
    89  CREATE STATISTICS "" ON b FROM data
    90  
    91  query TTIIIB colnames
    92  SELECT
    93  	statistics_name,
    94  	column_names,
    95  	row_count,
    96  	distinct_count,
    97  	null_count,
    98  	histogram_id IS NOT NULL AS has_histogram
    99  FROM
   100  	[SHOW STATISTICS FOR TABLE data];
   101  ----
   102  statistics_name  column_names  row_count  distinct_count  null_count  has_histogram
   103  s1               {a}           256        4               0           true
   104  NULL             {b}           256        4               0           true
   105  
   106  # Verify that we can package statistics into a json object and later restore them.
   107  let $json_stats
   108  SHOW STATISTICS USING JSON FOR TABLE data
   109  
   110  statement ok
   111  DELETE FROM system.table_statistics
   112  
   113  statement ok
   114  ALTER TABLE data INJECT STATISTICS '$json_stats'
   115  
   116  query TTIIIB colnames
   117  SELECT
   118  	statistics_name,
   119  	column_names,
   120  	row_count,
   121  	distinct_count,
   122  	null_count,
   123  	histogram_id IS NOT NULL AS has_histogram
   124  FROM
   125  	[SHOW STATISTICS FOR TABLE data];
   126  ----
   127  statistics_name  column_names  row_count  distinct_count  null_count  has_histogram
   128  s1               {a}           256        4               0           true
   129  NULL             {b}           256        4               0           true
   130  
   131  # Verify that any other statistics are blown away when we INJECT.
   132  statement ok
   133  CREATE STATISTICS s3 ON c FROM data
   134  
   135  query TTIII colnames
   136  SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
   137  ----
   138  statistics_name  column_names  row_count  distinct_count  null_count
   139  s1               {a}           256        4               0
   140  NULL             {b}           256        4               0
   141  s3               {c}           256        4               0
   142  
   143  statement ok
   144  ALTER TABLE data INJECT STATISTICS '$json_stats'
   145  
   146  query TTIII colnames
   147  SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
   148  ----
   149  statistics_name  column_names  row_count  distinct_count  null_count
   150  s1               {a}           256        4               0
   151  NULL             {b}           256        4               0
   152  
   153  # Test AS OF SYSTEM TIME
   154  
   155  statement error pgcode 42P01 relation "data" does not exist
   156  CREATE STATISTICS s2 ON a FROM data AS OF SYSTEM TIME '2017'
   157  
   158  statement ok
   159  CREATE STATISTICS s2 ON a FROM data AS OF SYSTEM TIME '-1us'
   160  
   161  query TTIII colnames
   162  SELECT statistics_name, column_names, row_count, distinct_count, null_count FROM [SHOW STATISTICS FOR TABLE data]
   163  ----
   164  statistics_name  column_names  row_count  distinct_count  null_count
   165  NULL             {b}           256        4               0
   166  s2               {a}           256        4               0
   167  
   168  #
   169  # Test default column statistics
   170  #
   171  
   172  # Disable multi-column stats to start.
   173  statement ok
   174  SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = false
   175  
   176  statement ok
   177  CREATE STATISTICS s3 FROM data
   178  
   179  # With default column statistics, only index columns (plus boolean columns)
   180  # have a histogram_id (specifically the first column in each index).
   181  query TIIIB colnames
   182  SELECT column_names, row_count, distinct_count, null_count, histogram_id IS NOT NULL AS has_histogram
   183  FROM [SHOW STATISTICS FOR TABLE data]
   184  WHERE statistics_name = 's3'
   185  ----
   186  column_names  row_count  distinct_count  null_count  has_histogram
   187  {a}           256        4               0           true
   188  {c}           256        4               0           true
   189  {b}           256        4               0           false
   190  {d}           256        4               0           false
   191  {e}           256        2               0           true
   192  
   193  
   194  # Re-enable multi-column stats.
   195  statement ok
   196  SET CLUSTER SETTING sql.stats.multi_column_collection.enabled = true
   197  
   198  # Add indexes, including duplicate index on column c and columns (a, b).
   199  statement ok
   200  CREATE INDEX ON data (c DESC, b ASC); CREATE INDEX ON data (b DESC, a);
   201  
   202  statement ok
   203  CREATE STATISTICS s4 FROM data
   204  
   205  # Check that stats are only collected once per column.
   206  query TIII colnames
   207  SELECT column_names, row_count, distinct_count, null_count
   208  FROM [SHOW STATISTICS FOR TABLE data]
   209  WHERE statistics_name = 's4'
   210  ----
   211  column_names  row_count  distinct_count  null_count
   212  {a}           256        4               0
   213  {a,b}         256        16              0
   214  {a,b,c}       256        64              0
   215  {a,b,c,d}     256        256             0
   216  {c}           256        4               0
   217  {c,d}         256        16              0
   218  {c,b}         256        16              0
   219  {b}           256        4               0
   220  {d}           256        4               0
   221  {e}           256        2               0
   222  
   223  statement ok
   224  DROP INDEX data@c_idx; DROP INDEX data@data_c_b_idx
   225  
   226  statement ok
   227  CREATE STATISTICS s5 FROM [53]
   228  
   229  # We should still get stats for column c, but now column c is added later as a
   230  # non-index column, resulting in a different ordering of the rows.
   231  query TIII colnames
   232  SELECT column_names, row_count, distinct_count, null_count
   233  FROM [SHOW STATISTICS FOR TABLE data]
   234  WHERE statistics_name = 's5'
   235  ----
   236  column_names  row_count  distinct_count  null_count
   237  {a}           256        4               0
   238  {a,b}         256        16              0
   239  {a,b,c}       256        64              0
   240  {a,b,c,d}     256        256             0
   241  {b}           256        4               0
   242  {c}           256        4               0
   243  {d}           256        4               0
   244  {e}           256        2               0
   245  
   246  # Table with a hidden primary key and no other indexes.
   247  statement ok
   248  CREATE TABLE simple (x INT, y INT)
   249  
   250  statement ok
   251  CREATE STATISTICS default_stat1 FROM simple
   252  
   253  query TTIII colnames
   254  SELECT statistics_name, column_names, row_count, distinct_count, null_count
   255  FROM [SHOW STATISTICS FOR TABLE simple]
   256  ----
   257  statistics_name  column_names  row_count  distinct_count  null_count
   258  default_stat1    {rowid}       0          0               0
   259  default_stat1    {x}           0          0               0
   260  default_stat1    {y}           0          0               0
   261  
   262  # Add one null row.
   263  statement ok
   264  INSERT INTO simple VALUES (DEFAULT, DEFAULT)
   265  
   266  # Add an index.
   267  statement ok
   268  CREATE UNIQUE INDEX ON simple (y) STORING (x)
   269  
   270  statement ok
   271  CREATE STATISTICS default_stat2 FROM simple
   272  
   273  # Now stats are collected on the index column y before column x.
   274  query TTIII colnames
   275  SELECT statistics_name, column_names, row_count, distinct_count, null_count
   276  FROM [SHOW STATISTICS FOR TABLE simple]
   277  ----
   278  statistics_name  column_names  row_count  distinct_count  null_count
   279  default_stat2    {rowid}       1          1               0
   280  default_stat2    {y}           1          1               1
   281  default_stat2    {x}           1          1               1
   282  
   283  # Add a few more rows.
   284  statement ok
   285  INSERT INTO simple VALUES (DEFAULT, DEFAULT);
   286  INSERT INTO simple VALUES (0, DEFAULT);
   287  INSERT INTO simple VALUES (DEFAULT, 0);
   288  INSERT INTO simple VALUES (0, 1);
   289  
   290  # Add an index.
   291  statement ok
   292  CREATE INDEX ON simple (x, y)
   293  
   294  statement ok
   295  CREATE STATISTICS default_stat3 FROM simple
   296  
   297  query TTIII colnames
   298  SELECT statistics_name, column_names, row_count, distinct_count, null_count
   299  FROM [SHOW STATISTICS FOR TABLE simple]
   300  ----
   301  statistics_name  column_names  row_count  distinct_count  null_count
   302  default_stat3    {rowid}       5          5               0
   303  default_stat3    {y}           5          3               3
   304  default_stat3    {x}           5          2               3
   305  default_stat3    {x,y}         5          4               2
   306  
   307  let $hist_id_3
   308  SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE simple]
   309  WHERE statistics_name = 'default_stat3' AND column_names = '{y}'
   310  
   311  # The counts in each bucket should not include null values.
   312  query TIRI colnames
   313  SHOW HISTOGRAM $hist_id_3
   314  ----
   315  upper_bound  range_rows  distinct_range_rows  equal_rows
   316  0            0           0                    1
   317  1            0           0                    1
   318  
   319  #
   320  # Test numeric references
   321  #
   322  
   323  statement ok
   324  CREATE STATISTICS s6 ON a FROM [53]
   325  
   326  query TTIII colnames
   327  SELECT statistics_name, column_names, row_count, distinct_count, null_count
   328  FROM [SHOW STATISTICS FOR TABLE data]
   329  ----
   330  statistics_name  column_names  row_count  distinct_count  null_count
   331  s4               {c,d}         256        16              0
   332  s4               {c,b}         256        16              0
   333  s5               {a,b}         256        16              0
   334  s5               {a,b,c}       256        64              0
   335  s5               {a,b,c,d}     256        256             0
   336  s5               {b}           256        4               0
   337  s5               {c}           256        4               0
   338  s5               {d}           256        4               0
   339  s5               {e}           256        2               0
   340  s6               {a}           256        4               0
   341  
   342  # Combine default columns and numeric reference.
   343  statement ok
   344  CREATE STATISTICS __auto__ FROM [53]
   345  
   346  query TIII colnames
   347  SELECT column_names, row_count, distinct_count, null_count
   348  FROM [SHOW STATISTICS FOR TABLE data]
   349  WHERE statistics_name = '__auto__'
   350  ----
   351  column_names  row_count  distinct_count  null_count
   352  {a}           256        4               0
   353  {a,b}         256        16              0
   354  {a,b,c}       256        64              0
   355  {a,b,c,d}     256        256             0
   356  {b}           256        4               0
   357  {c}           256        4               0
   358  {d}           256        4               0
   359  {e}           256        2               0
   360  
   361  #
   362  # Test delete stats
   363  #
   364  
   365  statement ok
   366  DROP INDEX data@data_b_a_idx
   367  
   368  statement ok
   369  CREATE STATISTICS __auto__ FROM [53];
   370  CREATE STATISTICS __auto__ FROM [53];
   371  CREATE STATISTICS __auto__ FROM [53];
   372  CREATE STATISTICS __auto__ FROM [53];
   373  CREATE STATISTICS __auto__ FROM [53];
   374  CREATE STATISTICS __auto__ FROM [53];
   375  
   376  # Only the last 4-5 automatic stats should remain for each column.
   377  query TT colnames
   378  SELECT statistics_name, column_names
   379  FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING
   380  ----
   381  statistics_name  column_names
   382  __auto__         {a,b,c,d}
   383  __auto__         {a,b,c,d}
   384  __auto__         {a,b,c,d}
   385  __auto__         {a,b,c,d}
   386  __auto__         {a,b,c,d}
   387  __auto__         {a,b,c}
   388  __auto__         {a,b,c}
   389  __auto__         {a,b,c}
   390  __auto__         {a,b,c}
   391  __auto__         {a,b,c}
   392  __auto__         {a,b}
   393  __auto__         {a,b}
   394  __auto__         {a,b}
   395  __auto__         {a,b}
   396  __auto__         {a,b}
   397  __auto__         {a}
   398  __auto__         {a}
   399  __auto__         {a}
   400  __auto__         {a}
   401  __auto__         {a}
   402  __auto__         {b}
   403  __auto__         {b}
   404  __auto__         {b}
   405  __auto__         {b}
   406  __auto__         {b}
   407  __auto__         {c}
   408  __auto__         {c}
   409  __auto__         {c}
   410  __auto__         {c}
   411  __auto__         {c}
   412  __auto__         {d}
   413  __auto__         {d}
   414  __auto__         {d}
   415  __auto__         {d}
   416  __auto__         {d}
   417  __auto__         {e}
   418  __auto__         {e}
   419  __auto__         {e}
   420  __auto__         {e}
   421  __auto__         {e}
   422  s4               {c,b}
   423  s4               {c,d}
   424  
   425  statement ok
   426  CREATE STATISTICS s7 ON a FROM [53]
   427  
   428  query TT colnames
   429  SELECT statistics_name, column_names
   430  FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING
   431  ----
   432  statistics_name  column_names
   433  __auto__         {a,b,c,d}
   434  __auto__         {a,b,c,d}
   435  __auto__         {a,b,c,d}
   436  __auto__         {a,b,c,d}
   437  __auto__         {a,b,c,d}
   438  __auto__         {a,b,c}
   439  __auto__         {a,b,c}
   440  __auto__         {a,b,c}
   441  __auto__         {a,b,c}
   442  __auto__         {a,b,c}
   443  __auto__         {a,b}
   444  __auto__         {a,b}
   445  __auto__         {a,b}
   446  __auto__         {a,b}
   447  __auto__         {a,b}
   448  __auto__         {a}
   449  __auto__         {a}
   450  __auto__         {a}
   451  __auto__         {a}
   452  __auto__         {b}
   453  __auto__         {b}
   454  __auto__         {b}
   455  __auto__         {b}
   456  __auto__         {b}
   457  __auto__         {c}
   458  __auto__         {c}
   459  __auto__         {c}
   460  __auto__         {c}
   461  __auto__         {c}
   462  __auto__         {d}
   463  __auto__         {d}
   464  __auto__         {d}
   465  __auto__         {d}
   466  __auto__         {d}
   467  __auto__         {e}
   468  __auto__         {e}
   469  __auto__         {e}
   470  __auto__         {e}
   471  __auto__         {e}
   472  s4               {c,b}
   473  s4               {c,d}
   474  s7               {a}
   475  
   476  statement ok
   477  CREATE STATISTICS s8 ON a FROM [53]
   478  
   479  # s7 is deleted but the automatic stats remain.
   480  query TT colnames
   481  SELECT statistics_name, column_names
   482  FROM [SHOW STATISTICS FOR TABLE data] ORDER BY statistics_name, column_names::STRING
   483  ----
   484  statistics_name  column_names
   485  __auto__         {a,b,c,d}
   486  __auto__         {a,b,c,d}
   487  __auto__         {a,b,c,d}
   488  __auto__         {a,b,c,d}
   489  __auto__         {a,b,c,d}
   490  __auto__         {a,b,c}
   491  __auto__         {a,b,c}
   492  __auto__         {a,b,c}
   493  __auto__         {a,b,c}
   494  __auto__         {a,b,c}
   495  __auto__         {a,b}
   496  __auto__         {a,b}
   497  __auto__         {a,b}
   498  __auto__         {a,b}
   499  __auto__         {a,b}
   500  __auto__         {a}
   501  __auto__         {a}
   502  __auto__         {a}
   503  __auto__         {a}
   504  __auto__         {b}
   505  __auto__         {b}
   506  __auto__         {b}
   507  __auto__         {b}
   508  __auto__         {b}
   509  __auto__         {c}
   510  __auto__         {c}
   511  __auto__         {c}
   512  __auto__         {c}
   513  __auto__         {c}
   514  __auto__         {d}
   515  __auto__         {d}
   516  __auto__         {d}
   517  __auto__         {d}
   518  __auto__         {d}
   519  __auto__         {e}
   520  __auto__         {e}
   521  __auto__         {e}
   522  __auto__         {e}
   523  __auto__         {e}
   524  s4               {c,b}
   525  s4               {c,d}
   526  s8               {a}
   527  
   528  # Regression test for #33195.
   529  statement ok
   530  CREATE TABLE t (x int); INSERT INTO t VALUES (1); ALTER TABLE t DROP COLUMN x
   531  
   532  # Ensure that creating stats on a table with no columns does not cause a panic.
   533  statement ok
   534  CREATE STATISTICS s FROM t
   535  
   536  # Regression test for #35150.
   537  statement ok
   538  CREATE TABLE groups (data JSON); INSERT INTO groups VALUES ('{"data": {"domain": "github.com"}}')
   539  
   540  # Ensure that trying to create statistics on a JSON column gives an appropriate error.
   541  statement error CREATE STATISTICS is not supported for JSON columns
   542  CREATE STATISTICS s ON data FROM groups
   543  
   544  # The json column is not included in the default columns.
   545  statement ok
   546  CREATE STATISTICS s FROM groups
   547  
   548  query TT colnames
   549  SELECT statistics_name, column_names
   550  FROM [SHOW STATISTICS FOR TABLE groups] ORDER BY statistics_name, column_names::STRING
   551  ----
   552  statistics_name  column_names
   553  s                {rowid}
   554  
   555  # Regression test for #35764.
   556  statement ok
   557  CREATE TABLE users (
   558    profile_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
   559    last_updated TIMESTAMP DEFAULT now(),
   560    user_profile JSONB,
   561    INVERTED INDEX user_details (user_profile)
   562  )
   563  
   564  statement ok
   565  INSERT INTO users (user_profile) VALUES
   566    ('{"first_name": "Lola", "last_name": "Dog", "location": "NYC", "online" : true, "friends" : 547}'),
   567    ('{"first_name": "Ernie", "status": "Looking for treats", "location" : "Brooklyn"}'),
   568    ('{"first_name": "Carl", "last_name": "Kimball", "location": "NYC", "breed": "Boston Terrier"}'
   569  )
   570  
   571  # Ensure that trying to create statistics with default columns does not fail
   572  # when there is an inverted index.
   573  statement ok
   574  CREATE STATISTICS s FROM users
   575  
   576  query TTI colnames
   577  SELECT statistics_name, column_names, row_count
   578  FROM [SHOW STATISTICS FOR TABLE users] ORDER BY statistics_name, column_names::STRING
   579  ----
   580  statistics_name  column_names    row_count
   581  s                {last_updated}  3
   582  s                {profile_id}    3
   583  
   584  # Arrays are supported.
   585  statement ok
   586  CREATE TABLE arr (x INT[])
   587  
   588  statement ok
   589  INSERT INTO arr VALUES (ARRAY[1,2]), (ARRAY[1,2]), (ARRAY[3,4]), (NULL)
   590  
   591  statement ok
   592  CREATE STATISTICS arr_stats FROM arr
   593  
   594  query TTIII colnames
   595  SELECT statistics_name, column_names, row_count, distinct_count, null_count
   596  FROM [SHOW STATISTICS FOR TABLE arr] ORDER BY statistics_name, column_names::STRING
   597  ----
   598  statistics_name  column_names  row_count  distinct_count  null_count
   599  arr_stats        {rowid}       4          4               0
   600  arr_stats        {x}           4          3               1
   601  
   602  # Regression test for #46964. Do not try to create a histogram on the array column.
   603  statement ok
   604  CREATE STATISTICS arr_stats_x ON x FROM arr
   605  
   606  query TTIIIB colnames
   607  SELECT
   608    statistics_name,
   609    column_names,
   610    row_count,
   611    distinct_count,
   612    null_count,
   613    histogram_id IS NOT NULL AS has_histogram
   614  FROM [SHOW STATISTICS FOR TABLE arr]
   615  ORDER BY statistics_name, column_names::STRING
   616  ----
   617  statistics_name  column_names  row_count  distinct_count  null_count  has_histogram
   618  arr_stats        {rowid}       4          4               0           true
   619  arr_stats_x      {x}           4          3               1           false
   620  
   621  # Test that enum columns always have histograms collected for them.
   622  statement ok
   623  SET experimental_enable_enums=true;
   624  CREATE TYPE e AS ENUM ('hello', 'howdy', 'hi');
   625  CREATE TABLE et (x e, y e, PRIMARY KEY (x));
   626  INSERT INTO et VALUES ('hello', 'hello'), ('howdy', 'howdy'), ('hi', 'hi');
   627  CREATE STATISTICS s FROM et
   628  
   629  query TTIIB colnames,rowsort
   630  SELECT
   631    statistics_name,
   632    column_names,
   633    row_count,
   634    null_count,
   635    histogram_id IS NOT NULL AS has_histogram
   636  FROM
   637    [SHOW STATISTICS FOR TABLE et]
   638  ORDER BY
   639    column_names::STRING, created
   640  ----
   641  statistics_name  column_names  row_count  null_count  has_histogram
   642  s                {x}           3          0           true
   643  s                {y}           3          0           true