github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/index-join

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/index-join (about)

     1  exec-ddl
     2  CREATE TABLE a (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d))
     3  ----
     4  
     5  exec-ddl
     6  ALTER TABLE a INJECT STATISTICS '[
     7    {
     8      "columns": ["x"],
     9      "created_at": "2018-01-01 1:00:00.00000+00:00",
    10      "row_count": 2000,
    11      "distinct_count": 2000
    12    },
    13    {
    14      "columns": ["x","y"],
    15      "created_at": "2018-01-01 1:30:00.00000+00:00",
    16      "row_count": 2000,
    17      "distinct_count": 2000
    18    },
    19    {
    20      "columns": ["s"],
    21      "created_at": "2018-01-01 1:30:00.00000+00:00",
    22      "row_count": 2000,
    23      "distinct_count": 10
    24    },
    25    {
    26      "columns": ["y"],
    27      "created_at": "2018-01-01 1:40:00.00000+00:00",
    28      "row_count": 2000,
    29      "distinct_count": 100
    30    }
    31  ]'
    32  ----
    33  
    34  # In order to actually create new logical props for the index join, we
    35  # need to call ConstructIndexJoin, which only happens when there is a
    36  # remaining filter.
    37  opt
    38  SELECT count(*) FROM (SELECT * FROM a WHERE s = 'foo' AND x + y = 10) GROUP BY s, y
    39  ----
    40  project
    41   ├── columns: count:5(int!null)
    42   ├── stats: [rows=49.2384513]
    43   └── group-by
    44        ├── columns: y:2(int) count_rows:5(int!null)
    45        ├── grouping columns: y:2(int)
    46        ├── stats: [rows=49.2384513, distinct(2)=49.2384513, null(2)=0]
    47        ├── key: (2)
    48        ├── fd: (2)-->(5)
    49        ├── select
    50        │    ├── columns: x:1(int!null) y:2(int) s:3(string!null)
    51        │    ├── stats: [rows=66.6666667, distinct(2)=49.2384513, null(2)=0, distinct(3)=1, null(3)=0]
    52        │    ├── key: (1)
    53        │    ├── fd: ()-->(3), (1)-->(2)
    54        │    ├── index-join a
    55        │    │    ├── columns: x:1(int!null) y:2(int) s:3(string)
    56        │    │    ├── stats: [rows=200]
    57        │    │    ├── key: (1)
    58        │    │    ├── fd: ()-->(3), (1)-->(2)
    59        │    │    └── scan a@secondary
    60        │    │         ├── columns: x:1(int!null) s:3(string!null)
    61        │    │         ├── constraint: /-3/4: [/'foo' - /'foo']
    62        │    │         ├── stats: [rows=200, distinct(3)=1, null(3)=0]
    63        │    │         ├── key: (1)
    64        │    │         └── fd: ()-->(3)
    65        │    └── filters
    66        │         └── (x:1 + y:2) = 10 [type=bool, outer=(1,2)]
    67        └── aggregations
    68             └── count-rows [as=count_rows:5, type=int]
    69  
    70  opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=(1,2,3)
    71  SELECT * FROM a WHERE s = 'foo' AND x + y = 10
    72  ----
    73  select
    74   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
    75   ├── stats: [rows=66.6666667, distinct(1)=66.6666667, null(1)=0, distinct(2)=49.2384513, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=57.5057212, null(4)=0, distinct(1-3)=66.6666667, null(1-3)=0]
    76   ├── key: (1)
    77   ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
    78   ├── index-join a
    79   │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
    80   │    ├── stats: [rows=200]
    81   │    ├── key: (1)
    82   │    ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1), (3,4)~~>(1,2)
    83   │    └── scan a@secondary
    84   │         ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
    85   │         ├── constraint: /-3/4: [/'foo' - /'foo']
    86   │         ├── stats: [rows=200, distinct(3)=1, null(3)=0]
    87   │         ├── key: (1)
    88   │         └── fd: ()-->(3), (1)-->(4), (4)-->(1)
    89   └── filters
    90        └── (x:1 + y:2) = 10 [type=bool, outer=(1,2)]
    91  
    92  opt colstat=1 colstat=2 colstat=3 colstat=(1,2,3)
    93  SELECT * FROM a WHERE s = 'foo'
    94  ----
    95  index-join a
    96   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
    97   ├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=87.8423345, null(2)=0, distinct(3)=1, null(3)=0, distinct(1-3)=200, null(1-3)=0]
    98   ├── key: (1)
    99   ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
   100   └── scan a@secondary
   101        ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
   102        ├── constraint: /-3/4: [/'foo' - /'foo']
   103        ├── stats: [rows=200, distinct(3)=1, null(3)=0]
   104        ├── key: (1)
   105        └── fd: ()-->(3), (1)-->(4), (4)-->(1)
   106  
   107  # Note that the row count of the index join does not match the row count of
   108  # the scan, because the index join's row count was carried over from the
   109  # normalized SELECT expression in its memo group (see next test case).
   110  # In order to fix the row count, we need more precise constraint calculation
   111  # for filters.
   112  opt colstat=1 colstat=2 colstat=3 colstat=(2,3) colstat=(1,2,3)
   113  SELECT * FROM a WHERE s = 'foo' OR s = 'bar'
   114  ----
   115  index-join a
   116   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
   117   ├── stats: [rows=400, distinct(1)=400, null(1)=0, distinct(2)=98.8470785, null(2)=0, distinct(3)=2, null(3)=0, distinct(2,3)=197.694157, null(2,3)=0, distinct(1-3)=400, null(1-3)=0]
   118   ├── key: (1)
   119   ├── fd: (1)-->(2-4), (3,4)-->(1,2)
   120   └── scan a@secondary
   121        ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
   122        ├── constraint: /-3/4
   123        │    ├── [/'foo' - /'foo']
   124        │    └── [/'bar' - /'bar']
   125        ├── stats: [rows=400, distinct(3)=2, null(3)=0]
   126        ├── key: (1)
   127        └── fd: (1)-->(3,4), (3,4)-->(1)
   128  
   129  norm
   130  SELECT * FROM a WHERE s = 'foo' OR s = 'bar'
   131  ----
   132  select
   133   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
   134   ├── stats: [rows=400, distinct(3)=2, null(3)=0]
   135   ├── key: (1)
   136   ├── fd: (1)-->(2-4), (3,4)-->(1,2)
   137   ├── scan a
   138   │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
   139   │    ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(3)=10, null(3)=0, distinct(4)=200, null(4)=0]
   140   │    ├── key: (1)
   141   │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
   142   └── filters
   143        └── (s:3 = 'foo') OR (s:3 = 'bar') [type=bool, outer=(3), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; tight)]
   144  
   145  # Bump up null counts.
   146  exec-ddl
   147  ALTER TABLE a INJECT STATISTICS '[
   148    {
   149      "columns": ["x"],
   150      "created_at": "2018-01-01 2:00:00.00000+00:00",
   151      "row_count": 2000,
   152      "distinct_count": 2000
   153    },
   154    {
   155      "columns": ["x","y"],
   156      "created_at": "2018-01-01 2:00:00.00000+00:00",
   157      "row_count": 2000,
   158      "distinct_count": 2000,
   159      "null_count": 1000
   160    },
   161    {
   162      "columns": ["s"],
   163      "created_at": "2018-01-01 2:00:00.00000+00:00",
   164      "row_count": 2000,
   165      "distinct_count": 11,
   166      "null_count": 1000
   167    },
   168    {
   169      "columns": ["y"],
   170      "created_at": "2018-01-01 2:00:00.00000+00:00",
   171      "row_count": 2000,
   172      "distinct_count": 101,
   173      "null_count": 1000
   174    }
   175  ]'
   176  ----
   177  
   178  opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=(1,2,3)
   179  SELECT * FROM a WHERE s = 'foo' AND x + y = 10
   180  ----
   181  select
   182   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
   183   ├── stats: [rows=33.3333333, distinct(1)=33.3333333, null(1)=0, distinct(2)=28.5927601, null(2)=16.6666667, distinct(3)=1, null(3)=0, distinct(4)=30.9412676, null(4)=0, distinct(1-3)=33.3333333, null(1-3)=0]
   184   ├── key: (1)
   185   ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
   186   ├── index-join a
   187   │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
   188   │    ├── stats: [rows=100]
   189   │    ├── key: (1)
   190   │    ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1), (3,4)~~>(1,2)
   191   │    └── scan a@secondary
   192   │         ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
   193   │         ├── constraint: /-3/4: [/'foo' - /'foo']
   194   │         ├── stats: [rows=100, distinct(3)=1, null(3)=0]
   195   │         ├── key: (1)
   196   │         └── fd: ()-->(3), (1)-->(4), (4)-->(1)
   197   └── filters
   198        └── (x:1 + y:2) = 10 [type=bool, outer=(1,2)]
   199  
   200  opt colstat=1 colstat=2 colstat=3 colstat=(1,2,3)
   201  SELECT * FROM a WHERE s = 'foo'
   202  ----
   203  index-join a
   204   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
   205   ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=64.4232893, null(2)=50, distinct(3)=1, null(3)=0, distinct(1-3)=100, null(1-3)=0]
   206   ├── key: (1)
   207   ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
   208   └── scan a@secondary
   209        ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
   210        ├── constraint: /-3/4: [/'foo' - /'foo']
   211        ├── stats: [rows=100, distinct(3)=1, null(3)=0]
   212        ├── key: (1)
   213        └── fd: ()-->(3), (1)-->(4), (4)-->(1)
   214  
   215  # Note that the row count of the index join does not match the row count of
   216  # the scan, because the index join's row count was carried over from the
   217  # normalized SELECT expression in its memo group (see next test case).
   218  # In order to fix the row count, we need more precise constraint calculation
   219  # for filters.
   220  # Also note that we need to tack on an "s IS NOT NULL" clause to make s a
   221  # non-null column in the logical properties because the constraint
   222  # builder at the SELECT level cannot deduce whether an OR'd filter
   223  # is null-rejecting or not.
   224  opt colstat=1 colstat=2 colstat=3 colstat=(2,3) colstat=(1,2,3)
   225  SELECT * FROM a WHERE (s = 'foo' OR s = 'bar') AND s IS NOT NULL
   226  ----
   227  index-join a
   228   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
   229   ├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=88.4618791, null(2)=100, distinct(3)=2, null(3)=0, distinct(2,3)=176.923758, null(2,3)=0, distinct(1-3)=200, null(1-3)=0]
   230   ├── key: (1)
   231   ├── fd: (1)-->(2-4), (3,4)-->(1,2)
   232   └── scan a@secondary
   233        ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
   234        ├── constraint: /-3/4
   235        │    ├── [/'foo' - /'foo']
   236        │    └── [/'bar' - /'bar']
   237        ├── stats: [rows=200, distinct(3)=2, null(3)=0]
   238        ├── key: (1)
   239        └── fd: (1)-->(3,4), (3,4)-->(1)
   240  
   241  norm
   242  SELECT * FROM a WHERE (s = 'foo' OR s = 'bar') AND s IS NOT NULL
   243  ----
   244  select
   245   ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
   246   ├── stats: [rows=200, distinct(3)=2, null(3)=0]
   247   ├── key: (1)
   248   ├── fd: (1)-->(2-4), (3,4)-->(1,2)
   249   ├── scan a
   250   │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
   251   │    ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(3)=11, null(3)=1000, distinct(4)=200, null(4)=0]
   252   │    ├── key: (1)
   253   │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
   254   └── filters
   255        └── ((s:3 = 'foo') OR (s:3 = 'bar')) AND (s:3 IS NOT NULL) [type=bool, outer=(3), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; tight)]