github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/exec/execbuilder/testdata/stats (about)

     1  # LogicTest: 5node
     2  
     3  # Tests that verify we retrieve the stats correctly. Note that we can't create
     4  # statistics if distsql mode is OFF.
     5  
     6  statement ok
     7  CREATE TABLE uv (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u));
     8  INSERT INTO uv VALUES (1, 1), (1, 2), (1, 3), (1, 4), (2, 4), (2, 5), (2, 6), (2, 7)
     9  
    10  statement ok
    11  CREATE STATISTICS u ON u FROM uv;
    12  CREATE STATISTICS v ON v FROM uv
    13  
    14  statement ok
    15  set enable_zigzag_join = false
    16  
    17  # Verify we scan index v which has the more selective constraint.
    18  query TTTTT
    19  EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
    20  ----
    21  ·     distributed  true         ·       ·
    22  ·     vectorized   true         ·       ·
    23  scan  ·            ·            (u, v)  ·
    24  ·     table        uv@uv_v_idx  ·       ·
    25  ·     spans        /1-/2        ·       ·
    26  ·     filter       u = 1        ·       ·
    27  
    28  # Verify that injecting different statistics changes the plan.
    29  statement ok
    30  ALTER TABLE uv INJECT STATISTICS '[
    31    {
    32      "columns": ["u"],
    33      "created_at": "2018-01-01 1:00:00.00000+00:00",
    34      "row_count": 100,
    35      "distinct_count": 100
    36    },
    37    {
    38      "columns": ["v"],
    39      "created_at": "2018-01-01 1:00:00.00000+00:00",
    40      "row_count": 100,
    41      "distinct_count": 10
    42    }
    43  ]'
    44  
    45  query TTTTT
    46  EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
    47  ----
    48  ·     distributed  true         ·       ·
    49  ·     vectorized   true         ·       ·
    50  scan  ·            ·            (u, v)  ·
    51  ·     table        uv@uv_u_idx  ·       ·
    52  ·     spans        /1-/2        ·       ·
    53  ·     filter       v = 1        ·       ·
    54  
    55  # Verify that injecting different statistics with null counts
    56  # changes the plan.
    57  statement ok
    58  ALTER TABLE uv INJECT STATISTICS '[
    59    {
    60      "columns": ["u"],
    61      "created_at": "2018-01-01 1:00:00.00000+00:00",
    62      "row_count": 100,
    63      "distinct_count": 20,
    64      "null_count": 0
    65    },
    66    {
    67      "columns": ["v"],
    68      "created_at": "2018-01-01 1:00:00.00000+00:00",
    69      "row_count": 100,
    70      "distinct_count": 10,
    71      "null_count": 0
    72    }
    73  ]'
    74  
    75  query TTTTT
    76  EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
    77  ----
    78  ·     distributed  true         ·       ·
    79  ·     vectorized   true         ·       ·
    80  scan  ·            ·            (u, v)  ·
    81  ·     table        uv@uv_u_idx  ·       ·
    82  ·     spans        /1-/2        ·       ·
    83  ·     filter       v = 1        ·       ·
    84  
    85  statement ok
    86  ALTER TABLE uv INJECT STATISTICS '[
    87    {
    88      "columns": ["u"],
    89      "created_at": "2018-01-01 1:00:00.00000+00:00",
    90      "row_count": 100,
    91      "distinct_count": 20,
    92      "null_count": 0
    93    },
    94    {
    95      "columns": ["v"],
    96      "created_at": "2018-01-01 1:00:00.00000+00:00",
    97      "row_count": 100,
    98      "distinct_count": 10,
    99      "null_count": 90
   100    }
   101  ]'
   102  
   103  query TTTTT
   104  EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
   105  ----
   106  ·     distributed  true         ·       ·
   107  ·     vectorized   true         ·       ·
   108  scan  ·            ·            (u, v)  ·
   109  ·     table        uv@uv_v_idx  ·       ·
   110  ·     spans        /1-/2        ·       ·
   111  ·     filter       u = 1        ·       ·
   112  
   113  statement ok
   114  ALTER TABLE uv INJECT STATISTICS '[
   115    {
   116      "columns": ["u"],
   117      "created_at": "2018-01-01 1:00:00.00000+00:00",
   118      "row_count": 100,
   119      "distinct_count": 20,
   120      "null_count": 0,
   121      "histo_col_type":"INT4",
   122      "histo_buckets":[{
   123        "num_eq":50,
   124        "num_range":0,
   125        "distinct_range":0,
   126        "upper_bound":"1"
   127      },
   128      {
   129        "num_eq":20,
   130        "num_range":0,
   131        "distinct_range":0,
   132        "upper_bound":"2"
   133      },
   134      {
   135        "num_eq":5,
   136        "num_range":8,
   137        "distinct_range":7,
   138        "upper_bound":"10"
   139      },
   140      {
   141        "num_eq":5,
   142        "num_range":12,
   143        "distinct_range":9,
   144        "upper_bound":"20"
   145      }]
   146    },
   147    {
   148      "columns": ["v"],
   149      "created_at": "2018-01-01 1:00:00.00000+00:00",
   150      "row_count": 100,
   151      "distinct_count": 10,
   152      "null_count": 90
   153    },
   154    {
   155      "columns": ["u", "v"],
   156      "created_at": "2018-01-01 1:00:00.00000+00:00",
   157      "row_count": 100,
   158      "distinct_count": 25,
   159      "null_count": 90
   160    }
   161  ]'
   162  
   163  # Test that we respect the session settings for using histograms and
   164  # multi-column stats.
   165  statement ok
   166  set optimizer_use_histograms = false
   167  
   168  query T
   169  EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
   170  ----
   171  distinct-on
   172   ├── columns: u:1 v:2
   173   ├── grouping columns: u:1 v:2
   174   ├── internal-ordering: +1
   175   ├── stats: [rows=20.0617284, distinct(1,2)=20.0617284, null(1,2)=0]
   176   ├── cost: 37.7306173
   177   ├── key: (1,2)
   178   └── scan uv@uv_u_idx
   179        ├── columns: u:1 v:2
   180        ├── constraint: /1/3: (/NULL - /29]
   181        ├── stats: [rows=33.3333333, distinct(1)=6.66666667, null(1)=0, distinct(1,2)=20.0617284, null(1,2)=0]
   182        ├── cost: 36.6766667
   183        ├── ordering: +1
   184        ├── prune: (2)
   185        └── interesting orderings: (+1) (+2)
   186  
   187  statement ok
   188  set optimizer_use_multicol_stats = false
   189  
   190  query T
   191  EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
   192  ----
   193  distinct-on
   194   ├── columns: u:1 v:2
   195   ├── grouping columns: u:1 v:2
   196   ├── internal-ordering: +1
   197   ├── stats: [rows=33.3333333, distinct(1,2)=33.3333333, null(1,2)=0]
   198   ├── cost: 37.8633333
   199   ├── key: (1,2)
   200   └── scan uv@uv_u_idx
   201        ├── columns: u:1 v:2
   202        ├── constraint: /1/3: (/NULL - /29]
   203        ├── stats: [rows=33.3333333, distinct(1)=6.66666667, null(1)=0, distinct(1,2)=33.3333333, null(1,2)=0]
   204        ├── cost: 36.6766667
   205        ├── ordering: +1
   206        ├── prune: (2)
   207        └── interesting orderings: (+1) (+2)
   208  
   209  statement ok
   210  set optimizer_use_histograms = true
   211  
   212  query T
   213  EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
   214  ----
   215  distinct-on
   216   ├── columns: u:1 v:2
   217   ├── grouping columns: u:1 v:2
   218   ├── internal-ordering: +1
   219   ├── stats: [rows=100, distinct(1,2)=100, null(1,2)=0]
   220   ├── cost: 113.53
   221   ├── key: (1,2)
   222   └── scan uv@uv_u_idx
   223        ├── columns: u:1 v:2
   224        ├── constraint: /1/3: (/NULL - /29]
   225        ├── stats: [rows=100, distinct(1)=20, null(1)=0, distinct(1,2)=100, null(1,2)=0]
   226        │   histogram(1)=  0 50  0 20  8  5   12  5
   227        │                <--- 1 --- 2 --- 10 ---- 20
   228        ├── cost: 110.01
   229        ├── ordering: +1
   230        ├── prune: (2)
   231        └── interesting orderings: (+1) (+2)
   232  
   233  statement ok
   234  set optimizer_use_multicol_stats = true
   235  
   236  query T
   237  EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
   238  ----
   239  distinct-on
   240   ├── columns: u:1 v:2
   241   ├── grouping columns: u:1 v:2
   242   ├── internal-ordering: +1
   243   ├── stats: [rows=25, distinct(1,2)=25, null(1,2)=0]
   244   ├── cost: 112.78
   245   ├── key: (1,2)
   246   └── scan uv@uv_u_idx
   247        ├── columns: u:1 v:2
   248        ├── constraint: /1/3: (/NULL - /29]
   249        ├── stats: [rows=100, distinct(1)=20, null(1)=0, distinct(1,2)=25, null(1,2)=0]
   250        │   histogram(1)=  0 50  0 20  8  5   12  5
   251        │                <--- 1 --- 2 --- 10 ---- 20
   252        ├── cost: 110.01
   253        ├── ordering: +1
   254        ├── prune: (2)
   255        └── interesting orderings: (+1) (+2)