github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/select (about)

     1  exec-ddl
     2  CREATE TABLE a (x INT PRIMARY KEY, y INT)
     3  ----
     4  
     5  exec-ddl
     6  CREATE TABLE b (x INT, z INT NOT NULL)
     7  ----
     8  
     9  exec-ddl
    10  ALTER TABLE a INJECT STATISTICS '[
    11    {
    12      "columns": ["x"],
    13      "created_at": "2018-01-01 1:00:00.00000+00:00",
    14      "row_count": 5000,
    15      "distinct_count": 5000
    16    },
    17    {
    18      "columns": ["y"],
    19      "created_at": "2018-01-01 1:30:00.00000+00:00",
    20      "row_count": 4000,
    21      "distinct_count": 400
    22    }
    23  ]'
    24  ----
    25  
    26  exec-ddl
    27  ALTER TABLE b INJECT STATISTICS '[
    28    {
    29      "columns": ["x"],
    30      "created_at": "2018-01-01 1:00:00.00000+00:00",
    31      "row_count": 10000,
    32      "distinct_count": 5000
    33    },
    34    {
    35      "columns": ["z"],
    36      "created_at": "2018-01-01 1:30:00.00000+00:00",
    37      "row_count": 10000,
    38      "distinct_count": 100
    39    },
    40    {
    41      "columns": ["rowid"],
    42      "created_at": "2018-01-01 1:30:00.00000+00:00",
    43      "row_count": 10000,
    44      "distinct_count": 10000
    45    }
    46  ]'
    47  ----
    48  
    49  norm
    50  SELECT * FROM a WHERE true
    51  ----
    52  scan a
    53   ├── columns: x:1(int!null) y:2(int)
    54   ├── stats: [rows=4000]
    55   ├── key: (1)
    56   └── fd: (1)-->(2)
    57  
    58  norm
    59  SELECT * FROM a WHERE false
    60  ----
    61  values
    62   ├── columns: x:1(int!null) y:2(int!null)
    63   ├── cardinality: [0 - 0]
    64   ├── stats: [rows=0]
    65   ├── key: ()
    66   └── fd: ()-->(1,2)
    67  
    68  # Distinct values calculation with constraints.
    69  norm
    70  SELECT * FROM b WHERE x = 1 AND z = 2 AND rowid >= 5 AND rowid <= 8
    71  ----
    72  project
    73   ├── columns: x:1(int!null) z:2(int!null)
    74   ├── cardinality: [0 - 4]
    75   ├── stats: [rows=0.8118008]
    76   ├── fd: ()-->(1,2)
    77   └── select
    78        ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null)
    79        ├── cardinality: [0 - 4]
    80        ├── stats: [rows=0.8118008, distinct(1)=0.8118008, null(1)=0, distinct(2)=0.8118008, null(2)=0, distinct(3)=0.8118008, null(3)=0, distinct(1,2)=0.8118008, null(1,2)=0, distinct(1-3)=0.8118008, null(1-3)=0]
    81        ├── key: (3)
    82        ├── fd: ()-->(1,2)
    83        ├── scan b
    84        │    ├── columns: x:1(int) z:2(int!null) rowid:3(int!null)
    85        │    ├── stats: [rows=10000, distinct(1)=5000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=10000, null(1,2)=0, distinct(1-3)=10000, null(1-3)=0]
    86        │    ├── key: (3)
    87        │    └── fd: (3)-->(1,2)
    88        └── filters
    89             ├── (rowid:3 >= 5) AND (rowid:3 <= 8) [type=bool, outer=(3), constraints=(/3: [/5 - /8]; tight)]
    90             ├── x:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
    91             └── z:2 = 2 [type=bool, outer=(2), constraints=(/2: [/2 - /2]; tight), fd=()-->(2)]
    92  
    93  # Can't determine stats from filter.
    94  norm
    95  SELECT * FROM a WHERE x + y < 10
    96  ----
    97  select
    98   ├── columns: x:1(int!null) y:2(int)
    99   ├── stats: [rows=1333.33333]
   100   ├── key: (1)
   101   ├── fd: (1)-->(2)
   102   ├── scan a
   103   │    ├── columns: x:1(int!null) y:2(int)
   104   │    ├── stats: [rows=4000]
   105   │    ├── key: (1)
   106   │    └── fd: (1)-->(2)
   107   └── filters
   108        └── (x:1 + y:2) < 10 [type=bool, outer=(1,2)]
   109  
   110  # Remaining filter.
   111  norm
   112  SELECT * FROM a WHERE y = 5 AND x + y < 10
   113  ----
   114  select
   115   ├── columns: x:1(int!null) y:2(int!null)
   116   ├── stats: [rows=9.33333333, distinct(1)=9.33333333, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=9.33333333, null(1,2)=0]
   117   ├── key: (1)
   118   ├── fd: ()-->(2)
   119   ├── scan a
   120   │    ├── columns: x:1(int!null) y:2(int)
   121   │    ├── stats: [rows=4000, distinct(1)=4000, null(1)=0, distinct(2)=400, null(2)=0, distinct(1,2)=4000, null(1,2)=0]
   122   │    ├── key: (1)
   123   │    └── fd: (1)-->(2)
   124   └── filters
   125        ├── y:2 = 5 [type=bool, outer=(2), constraints=(/2: [/5 - /5]; tight), fd=()-->(2)]
   126        └── x:1 < 5 [type=bool, outer=(1), constraints=(/1: (/NULL - /4]; tight)]
   127  
   128  # Contradiction.
   129  norm
   130  SELECT * FROM a WHERE x IS NULL
   131  ----
   132  select
   133   ├── columns: x:1(int!null) y:2(int)
   134   ├── cardinality: [0 - 1]
   135   ├── stats: [rows=1, distinct(1)=1, null(1)=0]
   136   ├── key: ()
   137   ├── fd: ()-->(1,2)
   138   ├── scan a
   139   │    ├── columns: x:1(int!null) y:2(int)
   140   │    ├── stats: [rows=4000, distinct(1)=4000, null(1)=0]
   141   │    ├── key: (1)
   142   │    └── fd: (1)-->(2)
   143   └── filters
   144        └── x:1 IS NULL [type=bool, outer=(1), constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)]
   145  
   146  norm
   147  SELECT sum(x) FROM b WHERE x > 1000 AND x <= 2000 GROUP BY z
   148  ----
   149  project
   150   ├── columns: sum:4(decimal!null)
   151   ├── stats: [rows=100]
   152   └── group-by
   153        ├── columns: z:2(int!null) sum:4(decimal!null)
   154        ├── grouping columns: z:2(int!null)
   155        ├── stats: [rows=100, distinct(2)=100, null(2)=0]
   156        ├── key: (2)
   157        ├── fd: (2)-->(4)
   158        ├── select
   159        │    ├── columns: x:1(int!null) z:2(int!null)
   160        │    ├── stats: [rows=2000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=0]
   161        │    ├── scan b
   162        │    │    ├── columns: x:1(int) z:2(int!null)
   163        │    │    └── stats: [rows=10000, distinct(1)=5000, null(1)=0, distinct(2)=100, null(2)=0]
   164        │    └── filters
   165        │         └── (x:1 > 1000) AND (x:1 <= 2000) [type=bool, outer=(1), constraints=(/1: [/1001 - /2000]; tight)]
   166        └── aggregations
   167             └── sum [as=sum:4, type=decimal, outer=(1)]
   168                  └── x:1 [type=int]
   169  
   170  # Regression: statistics builder panics when end key is NULL when it's trying
   171  # to compute start/end int boundaries.
   172  exec-ddl
   173  CREATE TABLE idx (x INT PRIMARY KEY, y INT, z INT, INDEX yz (y DESC, z))
   174  ----
   175  
   176  opt
   177  SELECT y FROM idx WHERE y < 5 AND z < 10
   178  ----
   179  project
   180   ├── columns: y:2(int!null)
   181   ├── stats: [rows=311.111111]
   182   └── select
   183        ├── columns: y:2(int!null) z:3(int!null)
   184        ├── stats: [rows=311.111111, distinct(2)=33.3333333, null(2)=0, distinct(3)=33.3333333, null(3)=0, distinct(2,3)=311.111111, null(2,3)=0]
   185        ├── scan idx@yz
   186        │    ├── columns: y:2(int!null) z:3(int)
   187        │    ├── constraint: /-2/3/1: (/4/NULL - /NULL)
   188        │    └── stats: [rows=333.333333, distinct(2)=33.3333333, null(2)=0]
   189        └── filters
   190             └── z:3 < 10 [type=bool, outer=(3), constraints=(/3: (/NULL - /9]; tight)]
   191  
   192  # Regression: certain queries could cause a NaN expected number of rows via a divide-by-zero.
   193  exec-ddl
   194  CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)
   195  ----
   196  
   197  # Note: it's not clear that this still tests the above issue, but I have left
   198  # it here anyway as an interesting test case. I've added another query below
   199  # to regression-test the divide-by-zero issue.
   200  opt
   201  SELECT pk FROM tab0 WHERE
   202    col0 = 1 AND
   203    col0 = 2 AND
   204    (col0 = 1 OR col0 IN (SELECT col3 FROM tab0)) AND
   205    (col0 = 1 OR col0 IN (SELECT col3 FROM tab0))
   206  ----
   207  values
   208   ├── columns: pk:1(int!null)
   209   ├── cardinality: [0 - 0]
   210   ├── stats: [rows=0]
   211   ├── key: ()
   212   └── fd: ()-->(1)
   213  
   214  exec-ddl
   215  ALTER TABLE tab0 INJECT STATISTICS '[
   216  {
   217    "columns": ["col0"],
   218    "created_at": "2018-01-01 1:00:00.00000+00:00",
   219    "row_count": 100,
   220    "distinct_count": 0,
   221    "null_count": 100
   222  },
   223  {
   224    "columns": ["col3"],
   225    "created_at": "2018-01-01 1:00:00.00000+00:00",
   226    "row_count": 100,
   227    "distinct_count": 10
   228  }
   229  ]'
   230  ----
   231  
   232  opt
   233  SELECT count(*) FROM (SELECT * FROM tab0 WHERE col3 = 10) GROUP BY col0
   234  ----
   235  project
   236   ├── columns: count:8(int!null)
   237   ├── stats: [rows=0.999973439]
   238   └── group-by
   239        ├── columns: col0:2(int) count_rows:8(int!null)
   240        ├── grouping columns: col0:2(int)
   241        ├── stats: [rows=0.999973439, distinct(2)=0.999973439, null(2)=0.999973439]
   242        ├── key: (2)
   243        ├── fd: (2)-->(8)
   244        ├── select
   245        │    ├── columns: col0:2(int) col3:5(int!null)
   246        │    ├── stats: [rows=10, distinct(2)=0.999973439, null(2)=10, distinct(5)=1, null(5)=0]
   247        │    ├── fd: ()-->(5)
   248        │    ├── scan tab0
   249        │    │    ├── columns: col0:2(int) col3:5(int)
   250        │    │    └── stats: [rows=100, distinct(2)=1, null(2)=100, distinct(5)=10, null(5)=0]
   251        │    └── filters
   252        │         └── col3:5 = 10 [type=bool, outer=(5), constraints=(/5: [/10 - /10]; tight), fd=()-->(5)]
   253        └── aggregations
   254             └── count-rows [as=count_rows:8, type=int]
   255  
   256  
   257  exec-ddl
   258  CREATE TABLE customers (id INT PRIMARY KEY, name STRING, state STRING)
   259  ----
   260  
   261  exec-ddl
   262  CREATE TABLE order_history (order_id INT, item_id INT, customer_id INT, year INT)
   263  ----
   264  
   265  exec-ddl
   266  CREATE TABLE district (d_id INT, d_w_id INT, d_name STRING, PRIMARY KEY(d_id, d_w_id))
   267  ----
   268  
   269  exec-ddl
   270  ALTER TABLE district INJECT STATISTICS '[
   271  {
   272    "columns": ["d_id"],
   273    "created_at": "2018-01-01 1:00:00.00000+00:00",
   274    "row_count": 100,
   275    "distinct_count": 10
   276  },
   277  {
   278    "columns": ["d_w_id"],
   279    "created_at": "2018-01-01 1:30:00.00000+00:00",
   280    "row_count": 100,
   281    "distinct_count": 10
   282  },
   283  {
   284    "columns": ["d_name"],
   285    "created_at": "2018-01-01 1:30:00.00000+00:00",
   286    "row_count": 100,
   287    "distinct_count": 100
   288  }
   289  ]'
   290  ----
   291  
   292  # This tests selectivityFromReducedCols.
   293  # Since the reduced column set is (d_id, d_name), and
   294  # both columns have distinct count 1, we expect this
   295  # to calculate selectivity through selectivityFromReducedCols.
   296  # The output is the same as the naive approach.
   297  norm
   298  SELECT * FROM district WHERE d_id = 1 AND d_name='bobs_burgers'
   299  ----
   300  select
   301   ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null)
   302   ├── stats: [rows=0.91, distinct(1)=0.91, null(1)=0, distinct(3)=0.91, null(3)=0, distinct(1,3)=0.91, null(1,3)=0]
   303   ├── key: (2)
   304   ├── fd: ()-->(1,3)
   305   ├── scan district
   306   │    ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string)
   307   │    ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,3)=100, null(1,3)=0]
   308   │    ├── key: (1,2)
   309   │    └── fd: (1,2)-->(3)
   310   └── filters
   311        ├── d_id:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
   312        └── d_name:3 = 'bobs_burgers' [type=bool, outer=(3), constraints=(/3: [/'bobs_burgers' - /'bobs_burgers']; tight), fd=()-->(3)]
   313  
   314  norm
   315  SELECT * FROM district WHERE d_id = 1 and d_name LIKE 'bob'
   316  ----
   317  select
   318   ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null)
   319   ├── stats: [rows=0.91, distinct(1)=0.91, null(1)=0, distinct(3)=0.91, null(3)=0, distinct(1,3)=0.91, null(1,3)=0]
   320   ├── key: (2)
   321   ├── fd: ()-->(1,3)
   322   ├── scan district
   323   │    ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string)
   324   │    ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,3)=100, null(1,3)=0]
   325   │    ├── key: (1,2)
   326   │    └── fd: (1,2)-->(3)
   327   └── filters
   328        ├── d_id:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
   329        └── d_name:3 LIKE 'bob' [type=bool, outer=(3), constraints=(/3: [/'bob' - /'bob']; tight), fd=()-->(3)]
   330  
   331  # This tests selectivityFromReducedCols.
   332  # Since (1,2)-->(3) in order to use selectivityFromReducedCols,
   333  # both (1,2) must have distinct=1 after applying the filter. Since
   334  # d_id is a range constraint, this fails, and we fall back to the
   335  # naive estimation for selectivity.
   336  norm
   337  SELECT * FROM district WHERE d_id > 1 AND d_id < 10 AND d_w_id=10 AND d_name='bobs_burgers'
   338  ----
   339  select
   340   ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null)
   341   ├── cardinality: [0 - 8]
   342   ├── stats: [rows=0.827, distinct(1)=0.827, null(1)=0, distinct(2)=0.827, null(2)=0, distinct(3)=0.827, null(3)=0, distinct(2,3)=0.827, null(2,3)=0, distinct(1-3)=0.827, null(1-3)=0]
   343   ├── key: (1)
   344   ├── fd: ()-->(2,3)
   345   ├── scan district
   346   │    ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string)
   347   │    ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(2,3)=100, null(2,3)=0, distinct(1-3)=100, null(1-3)=0]
   348   │    ├── key: (1,2)
   349   │    └── fd: (1,2)-->(3)
   350   └── filters
   351        ├── (d_id:1 > 1) AND (d_id:1 < 10) [type=bool, outer=(1), constraints=(/1: [/2 - /9]; tight)]
   352        ├── d_w_id:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)]
   353        └── d_name:3 = 'bobs_burgers' [type=bool, outer=(3), constraints=(/3: [/'bobs_burgers' - /'bobs_burgers']; tight), fd=()-->(3)]
   354  
   355  # This tests selectivityFromReducedCols
   356  # We don't apply the selectivity on d_name since (1,2)-->3.
   357  norm
   358  SELECT * FROM district WHERE d_id = 1 AND d_w_id=10 AND d_name='hello'
   359  ----
   360  select
   361   ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null)
   362   ├── cardinality: [0 - 1]
   363   ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(1,2)=1, null(1,2)=0]
   364   ├── key: ()
   365   ├── fd: ()-->(1-3)
   366   ├── scan district
   367   │    ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string)
   368   │    ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,2)=100, null(1,2)=0]
   369   │    ├── key: (1,2)
   370   │    └── fd: (1,2)-->(3)
   371   └── filters
   372        ├── d_id:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
   373        ├── d_w_id:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)]
   374        └── d_name:3 = 'hello' [type=bool, outer=(3), constraints=(/3: [/'hello' - /'hello']; tight), fd=()-->(3)]
   375  
   376  exec-ddl
   377  ALTER TABLE customers INJECT STATISTICS '[
   378  {
   379    "columns": ["name"],
   380    "created_at": "2018-01-01 1:00:00.00000+00:00",
   381    "row_count": 10000,
   382    "distinct_count": 500
   383  },
   384  {
   385    "columns": ["id"],
   386    "created_at": "2018-01-01 1:30:00.00000+00:00",
   387    "row_count": 10000,
   388    "distinct_count": 10000
   389  }
   390  ]'
   391  ----
   392  
   393  # This tests selectivityFromReducedCols
   394  # The following two tests cases are paired together. The first has
   395  # one constraint, one on single non-key column. The second  query has two
   396  # constraints on columns which form a determinant, dependent FD pair.
   397  # The dependent column in this FD pair is from the first test case.
   398  # This series of tests demonstrates that the selectivity
   399  # contribution for a pair of (determinant, dependent) FDs is the
   400  # selectivity of the determinant.
   401  # 1/2 join-subquery-selectivityFromReducedCols tests
   402  
   403  build
   404  SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id)
   405  WHERE name='andy'
   406  ----
   407  select
   408   ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
   409   ├── stats: [rows=2.29713221, distinct(2)=1, null(2)=0]
   410   ├── fd: ()-->(2), (1)-->(3), (1)==(6), (6)==(1)
   411   ├── project
   412   │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
   413   │    ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0]
   414   │    ├── fd: (1)-->(2,3), (1)==(6), (6)==(1)
   415   │    └── select
   416   │         ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null)
   417   │         ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0]
   418   │         ├── key: (8)
   419   │         ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1)
   420   │         ├── inner-join (cross)
   421   │         │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
   422   │         │    ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0]
   423   │         │    ├── key: (1,8)
   424   │         │    ├── fd: (1)-->(2,3), (8)-->(4-7)
   425   │         │    ├── scan customers
   426   │         │    │    ├── columns: id:1(int!null) name:2(string) state:3(string)
   427   │         │    │    ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0]
   428   │         │    │    ├── key: (1)
   429   │         │    │    └── fd: (1)-->(2,3)
   430   │         │    ├── scan order_history
   431   │         │    │    ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
   432   │         │    │    ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0]
   433   │         │    │    ├── key: (8)
   434   │         │    │    └── fd: (8)-->(4-7)
   435   │         │    └── filters (true)
   436   │         └── filters
   437   │              └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
   438   └── filters
   439        └── name:2 = 'andy' [type=bool, outer=(2), constraints=(/2: [/'andy' - /'andy']; tight), fd=()-->(2)]
   440  
   441  # This tests selectivityFromReducedCols
   442  # The previous tests case and the following are paired together. The first has
   443  # one constraint, one on single non-key column. The second  query has two
   444  # constraints on columns which form a determinant, dependent FD pair.
   445  # The dependent column in this FD pair is from the first test case.
   446  # This series of tests demonstrates that the selectivity
   447  # contribution for a pair of (determinant, dependent) FDs is the
   448  # selectivity of the determinant.
   449  # 2/2 join-subquery-selectivityFromReducedCols tests
   450  
   451  build
   452  SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id)
   453  WHERE id = 1 AND name='andy'
   454  ----
   455  select
   456   ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
   457   ├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0]
   458   ├── fd: ()-->(1-3,6), (1)==(6), (6)==(1)
   459   ├── project
   460   │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
   461   │    ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0]
   462   │    ├── fd: (1)-->(2,3), (1)==(6), (6)==(1)
   463   │    └── select
   464   │         ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null)
   465   │         ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0]
   466   │         ├── key: (8)
   467   │         ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1)
   468   │         ├── inner-join (cross)
   469   │         │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
   470   │         │    ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0]
   471   │         │    ├── key: (1,8)
   472   │         │    ├── fd: (1)-->(2,3), (8)-->(4-7)
   473   │         │    ├── scan customers
   474   │         │    │    ├── columns: id:1(int!null) name:2(string) state:3(string)
   475   │         │    │    ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0]
   476   │         │    │    ├── key: (1)
   477   │         │    │    └── fd: (1)-->(2,3)
   478   │         │    ├── scan order_history
   479   │         │    │    ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
   480   │         │    │    ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0]
   481   │         │    │    ├── key: (8)
   482   │         │    │    └── fd: (8)-->(4-7)
   483   │         │    └── filters (true)
   484   │         └── filters
   485   │              └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
   486   └── filters
   487        └── (id:1 = 1) AND (name:2 = 'andy') [type=bool, outer=(1,2), constraints=(/1: [/1 - /1]; /2: [/'andy' - /'andy']; tight), fd=()-->(1,2)]
   488  
   489  # Test equality conditions where all have distinct count 1.
   490  norm
   491  SELECT * FROM order_history WHERE item_id = order_id AND item_id = customer_id AND customer_id = 5
   492  ----
   493  select
   494   ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int!null) year:4(int)
   495   ├── stats: [rows=0.901, distinct(1)=0.901, null(1)=0, distinct(2)=0.901, null(2)=0, distinct(3)=0.901, null(3)=0, distinct(1-3)=0.901, null(1-3)=0]
   496   ├── fd: ()-->(1-3)
   497   ├── scan order_history
   498   │    ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int)
   499   │    └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(3)=100, null(3)=10, distinct(1-3)=1000, null(1-3)=0.001]
   500   └── filters
   501        ├── order_id:1 = 5 [type=bool, outer=(1), constraints=(/1: [/5 - /5]; tight), fd=()-->(1)]
   502        ├── item_id:2 = 5 [type=bool, outer=(2), constraints=(/2: [/5 - /5]; tight), fd=()-->(2)]
   503        └── customer_id:3 = 5 [type=bool, outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)]
   504  
   505  # Test equality condition with another condition on one of the attributes.
   506  norm
   507  SELECT * FROM order_history WHERE item_id = order_id AND item_id < 5 AND item_id > 0
   508  ----
   509  select
   510   ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int) year:4(int)
   511   ├── stats: [rows=0.99, distinct(1)=0.99, null(1)=0, distinct(2)=0.99, null(2)=0]
   512   ├── fd: (1)==(2), (2)==(1)
   513   ├── scan order_history
   514   │    ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int)
   515   │    └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10]
   516   └── filters
   517        ├── (item_id:2 < 5) AND (item_id:2 > 0) [type=bool, outer=(2), constraints=(/2: [/1 - /4]; tight)]
   518        └── item_id:2 = order_id:1 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]
   519  
   520  # Test equality condition with another condition on a different attribute.
   521  norm
   522  SELECT * FROM order_history WHERE item_id = order_id AND customer_id < 5 AND customer_id > 0
   523  ----
   524  select
   525   ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int!null) year:4(int)
   526   ├── stats: [rows=0.9801, distinct(1)=0.9801, null(1)=0, distinct(2)=0.9801, null(2)=0, distinct(3)=0.9801, null(3)=0]
   527   ├── fd: (1)==(2), (2)==(1)
   528   ├── scan order_history
   529   │    ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int)
   530   │    └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(3)=100, null(3)=10]
   531   └── filters
   532        ├── (customer_id:3 < 5) AND (customer_id:3 > 0) [type=bool, outer=(3), constraints=(/3: [/1 - /4]; tight)]
   533        └── item_id:2 = order_id:1 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]
   534  
   535  # Test equality condition with another filter condition without a constraint.
   536  norm
   537  SELECT * FROM order_history WHERE item_id = order_id AND customer_id % 2 = 0
   538  ----
   539  select
   540   ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int) year:4(int)
   541   ├── stats: [rows=3.267, distinct(1)=3.267, null(1)=0, distinct(2)=3.267, null(2)=0]
   542   ├── fd: (1)==(2), (2)==(1)
   543   ├── scan order_history
   544   │    ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int)
   545   │    └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10]
   546   └── filters
   547        ├── item_id:2 = order_id:1 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]
   548        └── (customer_id:3 % 2) = 0 [type=bool, outer=(3)]
   549  
   550  exec-ddl
   551  CREATE TABLE c (x INT, z INT NOT NULL, UNIQUE INDEX x_idx (x))
   552  ----
   553  
   554  # Test that the distinct and null counts for x are estimated correctly (since it's a weak
   555  # key).
   556  norm
   557  SELECT * FROM c WHERE x >= 0 AND x < 100
   558  ----
   559  select
   560   ├── columns: x:1(int!null) z:2(int!null)
   561   ├── cardinality: [0 - 100]
   562   ├── stats: [rows=100, distinct(1)=100, null(1)=0]
   563   ├── key: (1)
   564   ├── fd: (1)-->(2)
   565   ├── scan c
   566   │    ├── columns: x:1(int) z:2(int!null)
   567   │    ├── stats: [rows=1000, distinct(1)=991, null(1)=10, distinct(2)=100, null(2)=0]
   568   │    ├── lax-key: (1,2)
   569   │    └── fd: (1)~~>(2)
   570   └── filters
   571        └── (x:1 >= 0) AND (x:1 < 100) [type=bool, outer=(1), constraints=(/1: [/0 - /99]; tight)]
   572  
   573  exec-ddl
   574  CREATE TABLE uvw (u INT, v INT, w INT)
   575  ----
   576  
   577  # Test selectivity calculations by applying the two constraints in different
   578  # orders.
   579  norm
   580  SELECT * FROM uvw WHERE u=v AND u=10
   581  ----
   582  select
   583   ├── columns: u:1(int!null) v:2(int!null) w:3(int)
   584   ├── stats: [rows=0.910810811, distinct(1)=0.910810811, null(1)=0, distinct(2)=0.910810811, null(2)=0, distinct(1,2)=0.910810811, null(1,2)=0]
   585   ├── fd: ()-->(1,2)
   586   ├── scan uvw
   587   │    ├── columns: u:1(int) v:2(int) w:3(int)
   588   │    └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(1,2)=1000, null(1,2)=0.1]
   589   └── filters
   590        ├── v:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)]
   591        └── u:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)]
   592  
   593  norm disable=MergeSelects
   594  SELECT * FROM (SELECT * FROM uvw WHERE u=10) WHERE u=v
   595  ----
   596  select
   597   ├── columns: u:1(int!null) v:2(int!null) w:3(int)
   598   ├── stats: [rows=1.03537072, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0]
   599   ├── fd: ()-->(1,2), (1)==(2), (2)==(1)
   600   ├── select
   601   │    ├── columns: u:1(int!null) v:2(int) w:3(int)
   602   │    ├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(2)=9.5617925, null(2)=0.1]
   603   │    ├── fd: ()-->(1)
   604   │    ├── scan uvw
   605   │    │    ├── columns: u:1(int) v:2(int) w:3(int)
   606   │    │    └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10]
   607   │    └── filters
   608   │         └── u:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)]
   609   └── filters
   610        └── u:1 = v:2 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]
   611  
   612  norm disable=MergeSelects
   613  SELECT * FROM (SELECT * FROM uvw WHERE u=v) WHERE u=10
   614  ----
   615  select
   616   ├── columns: u:1(int!null) v:2(int!null) w:3(int)
   617   ├── stats: [rows=1, distinct(1)=1, null(1)=0]
   618   ├── fd: ()-->(1,2), (1)==(2), (2)==(1)
   619   ├── select
   620   │    ├── columns: u:1(int!null) v:2(int!null) w:3(int)
   621   │    ├── stats: [rows=9.801, distinct(1)=9.801, null(1)=0, distinct(2)=9.801, null(2)=0]
   622   │    ├── fd: (1)==(2), (2)==(1)
   623   │    ├── scan uvw
   624   │    │    ├── columns: u:1(int) v:2(int) w:3(int)
   625   │    │    └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10]
   626   │    └── filters
   627   │         └── u:1 = v:2 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]
   628   └── filters
   629        └── u:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)]
   630  
   631  exec-ddl
   632  CREATE TABLE lineitem
   633  (
   634      l_orderkey int NOT NULL,
   635      l_partkey int NOT NULL,
   636      l_suppkey int NOT NULL,
   637      l_linenumber int NOT NULL,
   638      l_quantity float NOT NULL,
   639      l_extendedprice float NOT NULL,
   640      l_discount float NOT NULL,
   641      l_tax float NOT NULL,
   642      l_returnflag char(1) NOT NULL,
   643      l_linestatus char(1) NOT NULL,
   644      l_shipdate date NOT NULL,
   645      l_commitdate date NOT NULL,
   646      l_receiptdate date NOT NULL,
   647      l_shipinstruct char(25) NOT NULL,
   648      l_shipmode char(10) NOT NULL,
   649      l_comment varchar(44) NOT NULL,
   650      PRIMARY KEY (l_orderkey, l_linenumber),
   651      INDEX l_ok (l_orderkey ASC),
   652      INDEX l_pk (l_partkey ASC),
   653      INDEX l_sk (l_suppkey ASC),
   654      INDEX l_sd (l_shipdate ASC),
   655      INDEX l_cd (l_commitdate ASC),
   656      INDEX l_rd (l_receiptdate ASC),
   657      INDEX l_pk_sk (l_partkey ASC, l_suppkey ASC),
   658      INDEX l_sk_pk (l_suppkey ASC, l_partkey ASC)
   659  );
   660  ----
   661  
   662  # We can determine that there are exactly 30 days for this range.
   663  opt
   664  SELECT *
   665  FROM lineitem
   666  WHERE
   667      l_shipdate >= DATE '1995-09-01'
   668      AND l_shipdate < DATE '1995-10-01';
   669  ----
   670  select
   671   ├── columns: l_orderkey:1(int!null) l_partkey:2(int!null) l_suppkey:3(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_tax:8(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null) l_shipdate:11(date!null) l_commitdate:12(date!null) l_receiptdate:13(date!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) l_comment:16(varchar!null)
   672   ├── stats: [rows=300, distinct(11)=30, null(11)=0]
   673   ├── key: (1,4)
   674   ├── fd: (1,4)-->(2,3,5-16)
   675   ├── scan lineitem
   676   │    ├── columns: l_orderkey:1(int!null) l_partkey:2(int!null) l_suppkey:3(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_tax:8(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null) l_shipdate:11(date!null) l_commitdate:12(date!null) l_receiptdate:13(date!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) l_comment:16(varchar!null)
   677   │    ├── stats: [rows=1000, distinct(1)=100, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=100, null(3)=0, distinct(4)=100, null(4)=0, distinct(5)=100, null(5)=0, distinct(6)=100, null(6)=0, distinct(7)=100, null(7)=0, distinct(8)=100, null(8)=0, distinct(9)=100, null(9)=0, distinct(10)=100, null(10)=0, distinct(11)=100, null(11)=0, distinct(12)=100, null(12)=0, distinct(13)=100, null(13)=0, distinct(14)=100, null(14)=0, distinct(15)=100, null(15)=0, distinct(16)=100, null(16)=0]
   678   │    ├── key: (1,4)
   679   │    └── fd: (1,4)-->(2,3,5-16)
   680   └── filters
   681        └── (l_shipdate:11 >= '1995-09-01') AND (l_shipdate:11 < '1995-10-01') [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-30']; tight)]
   682  
   683  # We cannot determine the number of distinct values exactly since the upper
   684  # bound of the date range is compared to a timestamp rather than a date.
   685  opt
   686  SELECT *
   687  FROM lineitem
   688  WHERE
   689      l_shipdate >= DATE '1995-09-01'
   690      AND l_shipdate::timestamptz < DATE '1995-10-01';
   691  ----
   692  index-join lineitem
   693   ├── columns: l_orderkey:1(int!null) l_partkey:2(int!null) l_suppkey:3(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_tax:8(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null) l_shipdate:11(date!null) l_commitdate:12(date!null) l_receiptdate:13(date!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) l_comment:16(varchar!null)
   694   ├── stats: [rows=111.111111, distinct(11)=33.3333333, null(11)=0]
   695   ├── key: (1,4)
   696   ├── fd: (1,4)-->(2,3,5-16)
   697   └── select
   698        ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null)
   699        ├── stats: [rows=111.111111]
   700        ├── key: (1,4)
   701        ├── fd: (1,4)-->(11)
   702        ├── scan lineitem@l_sd
   703        │    ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null)
   704        │    ├── constraint: /11/1/4: [/'1995-09-01' - ]
   705        │    ├── stats: [rows=333.333333, distinct(1)=98.265847, null(1)=0, distinct(4)=98.265847, null(4)=0, distinct(11)=33.3333333, null(11)=0]
   706        │    ├── key: (1,4)
   707        │    └── fd: (1,4)-->(11)
   708        └── filters
   709             └── l_shipdate:11::TIMESTAMPTZ < '1995-10-01' [type=bool, outer=(11)]
   710  
   711  # These queries should generate zigzag joins in xform rules. The column statistics
   712  # should be comparable between the norm'd and fully optimized expressions.
   713  opt colstat=11 colstat=12 colstat=(11,12)
   714  SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber
   715  FROM lineitem
   716  WHERE
   717      l_shipdate = DATE '1995-09-01'
   718      AND l_commitdate = DATE '1995-08-01';
   719  ----
   720  inner-join (zigzag lineitem@l_sd lineitem@l_cd)
   721   ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null)
   722   ├── eq columns: [1 4] = [1 4]
   723   ├── left fixed columns: [11] = ['1995-09-01']
   724   ├── right fixed columns: [12] = ['1995-08-01']
   725   ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0]
   726   ├── key: (1,4)
   727   ├── fd: ()-->(11,12)
   728   └── filters
   729        ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)]
   730        └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)]
   731  
   732  norm colstat=11 colstat=12 colstat=(11,12)
   733  SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber
   734  FROM lineitem
   735  WHERE
   736      l_shipdate = DATE '1995-09-01'
   737      AND l_commitdate = DATE '1995-08-01';
   738  ----
   739  select
   740   ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null)
   741   ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0]
   742   ├── key: (1,4)
   743   ├── fd: ()-->(11,12)
   744   ├── scan lineitem
   745   │    ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null) l_commitdate:12(date!null)
   746   │    ├── stats: [rows=1000, distinct(1)=100, null(1)=0, distinct(4)=100, null(4)=0, distinct(11)=100, null(11)=0, distinct(12)=100, null(12)=0, distinct(11,12)=1000, null(11,12)=0]
   747   │    ├── key: (1,4)
   748   │    └── fd: (1,4)-->(11,12)
   749   └── filters
   750        ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)]
   751        └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)]
   752  
   753  # These queries should also generate zigzag joins in xform rules, like the
   754  # ones above. These zigzag joins should be nested inside a lookup join on
   755  # the primary index. Since the zigzag join lies in a new memo group, we will
   756  # see the zigzag-join-specific stats/logprops build and colStat functions in
   757  # action. Again, the colstats of the inner zigzag expression should be
   758  # reasonably close to those of the full normalized select expression.
   759  opt colstat=11 colstat=12 colstat=(11,12)
   760  SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber, l_quantity
   761  FROM lineitem
   762  WHERE
   763      l_shipdate = DATE '1995-09-01'
   764      AND l_commitdate = DATE '1995-08-01';
   765  ----
   766  inner-join (lookup lineitem)
   767   ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null)
   768   ├── key columns: [1 4] = [1 4]
   769   ├── lookup columns are key
   770   ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0]
   771   ├── key: (1,4)
   772   ├── fd: ()-->(11,12), (1,4)-->(5)
   773   ├── inner-join (zigzag lineitem@l_sd lineitem@l_cd)
   774   │    ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null) l_commitdate:12(date!null)
   775   │    ├── eq columns: [1 4] = [1 4]
   776   │    ├── left fixed columns: [11] = ['1995-09-01']
   777   │    ├── right fixed columns: [12] = ['1995-08-01']
   778   │    ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0]
   779   │    ├── fd: ()-->(11,12)
   780   │    └── filters
   781   │         ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)]
   782   │         └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)]
   783   └── filters (true)
   784  
   785  norm colstat=11 colstat=12 colstat=(11,12)
   786  SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber, l_quantity
   787  FROM lineitem
   788  WHERE
   789      l_shipdate = DATE '1995-09-01'
   790      AND l_commitdate = DATE '1995-08-01';
   791  ----
   792  select
   793   ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null)
   794   ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0]
   795   ├── key: (1,4)
   796   ├── fd: ()-->(11,12), (1,4)-->(5)
   797   ├── scan lineitem
   798   │    ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_shipdate:11(date!null) l_commitdate:12(date!null)
   799   │    ├── stats: [rows=1000, distinct(1)=100, null(1)=0, distinct(4)=100, null(4)=0, distinct(5)=100, null(5)=0, distinct(11)=100, null(11)=0, distinct(12)=100, null(12)=0, distinct(11,12)=1000, null(11,12)=0]
   800   │    ├── key: (1,4)
   801   │    └── fd: (1,4)-->(5,11,12)
   802   └── filters
   803        ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)]
   804        └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)]
   805  
   806  # Create a table with an inverted index to test statistics around
   807  # JSON containment filter operators and zigzag joins.
   808  exec-ddl
   809  CREATE TABLE tjson (a INT PRIMARY KEY, b JSON, c JSON, INVERTED INDEX b_idx (b))
   810  ----
   811  
   812  exec-ddl
   813  ALTER TABLE tjson INJECT STATISTICS '[
   814    {
   815      "columns": ["a"],
   816      "created_at": "2018-01-01 2:00:00.00000+00:00",
   817      "row_count": 5000,
   818      "distinct_count": 5000
   819    },
   820    {
   821      "columns": ["b"],
   822      "created_at": "2018-01-01 2:00:00.00000+00:00",
   823      "row_count": 5000,
   824      "distinct_count": 2500
   825    },
   826    {
   827      "columns": ["c"],
   828      "created_at": "2018-01-01 2:00:00.00000+00:00",
   829      "row_count": 5000,
   830      "distinct_count": 2500
   831    }
   832  ]'
   833  ----
   834  
   835  # Should generate a scan on the inverted index.
   836  opt
   837  SELECT * FROM tjson WHERE b @> '{"a":"b"}'
   838  ----
   839  index-join tjson
   840   ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb)
   841   ├── stats: [rows=555.555556]
   842   ├── key: (1)
   843   ├── fd: (1)-->(2,3)
   844   └── scan tjson@b_idx
   845        ├── columns: a:1(int!null)
   846        ├── constraint: /2/1: [/'{"a": "b"}' - /'{"a": "b"}']
   847        ├── stats: [rows=555.555556]
   848        └── key: (1)
   849  
   850  # Should generate a zigzag join on the inverted index. Row count should be
   851  # strictly lower than the above scan.
   852  opt
   853  SELECT * FROM tjson WHERE b @> '{"a":"b", "c":"d"}'
   854  ----
   855  inner-join (lookup tjson)
   856   ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb)
   857   ├── key columns: [1] = [1]
   858   ├── lookup columns are key
   859   ├── stats: [rows=61.7283951]
   860   ├── key: (1)
   861   ├── fd: (1)-->(2,3)
   862   ├── inner-join (zigzag tjson@b_idx tjson@b_idx)
   863   │    ├── columns: a:1(int!null)
   864   │    ├── eq columns: [1] = [1]
   865   │    ├── left fixed columns: [2] = ['{"a": "b"}']
   866   │    ├── right fixed columns: [2] = ['{"c": "d"}']
   867   │    ├── stats: [rows=61.7283951, distinct(1)=61.7283951, null(1)=0]
   868   │    └── filters (true)
   869   └── filters
   870        └── b:2 @> '{"a": "b", "c": "d"}' [type=bool, outer=(2)]
   871  
   872  # Should generate a select on the table with a JSON filter, since c does not
   873  # have an inverted index.
   874  opt
   875  SELECT * FROM tjson WHERE c @> '{"a":"b"}'
   876  ----
   877  select
   878   ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb)
   879   ├── stats: [rows=555.555556]
   880   ├── key: (1)
   881   ├── fd: (1)-->(2,3)
   882   ├── scan tjson
   883   │    ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb)
   884   │    ├── stats: [rows=5000, distinct(1)=5000, null(1)=0]
   885   │    ├── key: (1)
   886   │    └── fd: (1)-->(2,3)
   887   └── filters
   888        └── c:3 @> '{"a": "b"}' [type=bool, outer=(3)]
   889  
   890  # Should have a lower row count than the above case, due to a containment query
   891  # on 2 json paths.
   892  opt
   893  SELECT * FROM tjson WHERE c @> '{"a":"b", "c":"d"}'
   894  ----
   895  select
   896   ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb)
   897   ├── stats: [rows=61.7283951]
   898   ├── key: (1)
   899   ├── fd: (1)-->(2,3)
   900   ├── scan tjson
   901   │    ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb)
   902   │    ├── stats: [rows=5000, distinct(1)=5000, null(1)=0]
   903   │    ├── key: (1)
   904   │    └── fd: (1)-->(2,3)
   905   └── filters
   906        └── c:3 @> '{"a": "b", "c": "d"}' [type=bool, outer=(3)]
   907  
   908  # Bump up null counts.
   909  exec-ddl
   910  ALTER TABLE a INJECT STATISTICS '[
   911    {
   912      "columns": ["x"],
   913      "created_at": "2018-01-01 2:00:00.00000+00:00",
   914      "row_count": 5000,
   915      "distinct_count": 5000
   916    },
   917    {
   918      "columns": ["y"],
   919      "created_at": "2018-01-01 2:00:00.00000+00:00",
   920      "row_count": 4000,
   921      "distinct_count": 400,
   922      "null_count": 1000
   923    }
   924  ]'
   925  ----
   926  
   927  exec-ddl
   928  ALTER TABLE b INJECT STATISTICS '[
   929    {
   930      "columns": ["x"],
   931      "created_at": "2018-01-01 2:00:00.00000+00:00",
   932      "row_count": 10000,
   933      "distinct_count": 5000,
   934      "null_count": 2000
   935    },
   936    {
   937      "columns": ["z"],
   938      "created_at": "2018-01-01 2:00:00.00000+00:00",
   939      "row_count": 10000,
   940      "distinct_count": 100
   941    },
   942    {
   943      "columns": ["rowid"],
   944      "created_at": "2018-01-01 1:30:00.00000+00:00",
   945      "row_count": 10000,
   946      "distinct_count": 10000
   947    }
   948  ]'
   949  ----
   950  
   951  exec-ddl
   952  ALTER TABLE c INJECT STATISTICS '[
   953    {
   954      "columns": ["x"],
   955      "created_at": "2018-01-01 2:00:00.00000+00:00",
   956      "row_count": 10000,
   957      "distinct_count": 5000,
   958      "null_count": 5000
   959    },
   960    {
   961      "columns": ["z"],
   962      "created_at": "2018-01-01 2:00:00.00000+00:00",
   963      "row_count": 10000,
   964      "distinct_count": 10000
   965    }
   966  ]'
   967  ----
   968  
   969  # Distinct values calculation with constraints.
   970  norm
   971  SELECT * FROM b WHERE x = 1 AND z = 2 AND rowid >= 5 AND rowid <= 8
   972  ----
   973  project
   974   ├── columns: x:1(int!null) z:2(int!null)
   975   ├── cardinality: [0 - 4]
   976   ├── stats: [rows=0.811440928]
   977   ├── fd: ()-->(1,2)
   978   └── select
   979        ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null)
   980        ├── cardinality: [0 - 4]
   981        ├── stats: [rows=0.811440928, distinct(1)=0.811440928, null(1)=0, distinct(2)=0.811440928, null(2)=0, distinct(3)=0.811440928, null(3)=0, distinct(1,2)=0.811440928, null(1,2)=0, distinct(1-3)=0.811440928, null(1-3)=0]
   982        ├── key: (3)
   983        ├── fd: ()-->(1,2)
   984        ├── scan b
   985        │    ├── columns: x:1(int) z:2(int!null) rowid:3(int!null)
   986        │    ├── stats: [rows=10000, distinct(1)=5000, null(1)=2000, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=10000, null(1,2)=0, distinct(1-3)=10000, null(1-3)=0]
   987        │    ├── key: (3)
   988        │    └── fd: (3)-->(1,2)
   989        └── filters
   990             ├── (rowid:3 >= 5) AND (rowid:3 <= 8) [type=bool, outer=(3), constraints=(/3: [/5 - /8]; tight)]
   991             ├── x:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
   992             └── z:2 = 2 [type=bool, outer=(2), constraints=(/2: [/2 - /2]; tight), fd=()-->(2)]
   993  
   994  # Can't determine stats from filter.
   995  norm
   996  SELECT * FROM a WHERE x + y < 10
   997  ----
   998  select
   999   ├── columns: x:1(int!null) y:2(int)
  1000   ├── stats: [rows=1666.66667]
  1001   ├── key: (1)
  1002   ├── fd: (1)-->(2)
  1003   ├── scan a
  1004   │    ├── columns: x:1(int!null) y:2(int)
  1005   │    ├── stats: [rows=5000]
  1006   │    ├── key: (1)
  1007   │    └── fd: (1)-->(2)
  1008   └── filters
  1009        └── (x:1 + y:2) < 10 [type=bool, outer=(1,2)]
  1010  
  1011  # Remaining filter.
  1012  norm
  1013  SELECT * FROM a WHERE y = 5 AND x + y < 10
  1014  ----
  1015  select
  1016   ├── columns: x:1(int!null) y:2(int!null)
  1017   ├── stats: [rows=9.35672515, distinct(1)=9.35672515, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=9.35672515, null(1,2)=0]
  1018   ├── key: (1)
  1019   ├── fd: ()-->(2)
  1020   ├── scan a
  1021   │    ├── columns: x:1(int!null) y:2(int)
  1022   │    ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=1000, distinct(1,2)=5000, null(1,2)=0]
  1023   │    ├── key: (1)
  1024   │    └── fd: (1)-->(2)
  1025   └── filters
  1026        ├── y:2 = 5 [type=bool, outer=(2), constraints=(/2: [/5 - /5]; tight), fd=()-->(2)]
  1027        └── x:1 < 5 [type=bool, outer=(1), constraints=(/1: (/NULL - /4]; tight)]
  1028  
  1029  # Test that the null count for x is propagated correctly (since it's a weak
  1030  # key).
  1031  norm
  1032  SELECT * FROM c WHERE x >= 0 AND x < 100
  1033  ----
  1034  select
  1035   ├── columns: x:1(int!null) z:2(int!null)
  1036   ├── cardinality: [0 - 100]
  1037   ├── stats: [rows=100, distinct(1)=100, null(1)=0]
  1038   ├── key: (1)
  1039   ├── fd: (1)-->(2)
  1040   ├── scan c
  1041   │    ├── columns: x:1(int) z:2(int!null)
  1042   │    ├── stats: [rows=10000, distinct(1)=5000, null(1)=5000, distinct(2)=10000, null(2)=0]
  1043   │    ├── lax-key: (1,2)
  1044   │    └── fd: (1)~~>(2)
  1045   └── filters
  1046        └── (x:1 >= 0) AND (x:1 < 100) [type=bool, outer=(1), constraints=(/1: [/0 - /99]; tight)]
  1047  
  1048  # Bump up null counts
  1049  exec-ddl
  1050  ALTER TABLE customers INJECT STATISTICS '[
  1051  {
  1052    "columns": ["name"],
  1053    "created_at": "2018-01-01 1:00:00.00000+00:00",
  1054    "row_count": 10000,
  1055    "distinct_count": 500,
  1056    "null_count": 2000
  1057  },
  1058  {
  1059    "columns": ["id"],
  1060    "created_at": "2018-01-01 1:30:00.00000+00:00",
  1061    "row_count": 10000,
  1062    "distinct_count": 10000
  1063  }
  1064  ]'
  1065  ----
  1066  
  1067  # This tests selectivityFromReducedCols
  1068  # The following two tests cases are paired together. The first has
  1069  # one constraint, one on single non-key column. The second  query has two
  1070  # constraints on columns which form a determinant, dependent FD pair.
  1071  # The dependent column in this FD pair is from the first test case.
  1072  # This series of tests demonstrates that the selectivity
  1073  # contribution for a pair of (determinant, dependent) FDs is the
  1074  # selectivity of the determinant.
  1075  # 1/2 join-subquery-selectivityFromReducedCols tests
  1076  
  1077  build
  1078  SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id)
  1079  WHERE name='andy'
  1080  ----
  1081  select
  1082   ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
  1083   ├── stats: [rows=1.84197978, distinct(2)=1, null(2)=0]
  1084   ├── fd: ()-->(2), (1)-->(3), (1)==(6), (6)==(1)
  1085   ├── project
  1086   │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
  1087   │    ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0]
  1088   │    ├── fd: (1)-->(2,3), (1)==(6), (6)==(1)
  1089   │    └── select
  1090   │         ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null)
  1091   │         ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0]
  1092   │         ├── key: (8)
  1093   │         ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1)
  1094   │         ├── inner-join (cross)
  1095   │         │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
  1096   │         │    ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000000, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0]
  1097   │         │    ├── key: (1,8)
  1098   │         │    ├── fd: (1)-->(2,3), (8)-->(4-7)
  1099   │         │    ├── scan customers
  1100   │         │    │    ├── columns: id:1(int!null) name:2(string) state:3(string)
  1101   │         │    │    ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000]
  1102   │         │    │    ├── key: (1)
  1103   │         │    │    └── fd: (1)-->(2,3)
  1104   │         │    ├── scan order_history
  1105   │         │    │    ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
  1106   │         │    │    ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0]
  1107   │         │    │    ├── key: (8)
  1108   │         │    │    └── fd: (8)-->(4-7)
  1109   │         │    └── filters (true)
  1110   │         └── filters
  1111   │              └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
  1112   └── filters
  1113        └── name:2 = 'andy' [type=bool, outer=(2), constraints=(/2: [/'andy' - /'andy']; tight), fd=()-->(2)]
  1114  
  1115  # This tests selectivityFromReducedCols
  1116  # The previous tests case and the following are paired together. The first has
  1117  # one constraint, one on single non-key column. The second  query has two
  1118  # constraints on columns which form a determinant, dependent FD pair.
  1119  # The dependent column in this FD pair is from the first test case.
  1120  # This series of tests demonstrates that the selectivity
  1121  # contribution for a pair of (determinant, dependent) FDs is the
  1122  # selectivity of the determinant.
  1123  # 2/2 join-subquery-selectivityFromReducedCols tests
  1124  
  1125  build
  1126  SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id)
  1127  WHERE id = 1 AND name='andy'
  1128  ----
  1129  select
  1130   ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
  1131   ├── stats: [rows=8, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0]
  1132   ├── fd: ()-->(1-3,6), (1)==(6), (6)==(1)
  1133   ├── project
  1134   │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int)
  1135   │    ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0]
  1136   │    ├── fd: (1)-->(2,3), (1)==(6), (6)==(1)
  1137   │    └── select
  1138   │         ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null)
  1139   │         ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0]
  1140   │         ├── key: (8)
  1141   │         ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1)
  1142   │         ├── inner-join (cross)
  1143   │         │    ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
  1144   │         │    ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000000, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0]
  1145   │         │    ├── key: (1,8)
  1146   │         │    ├── fd: (1)-->(2,3), (8)-->(4-7)
  1147   │         │    ├── scan customers
  1148   │         │    │    ├── columns: id:1(int!null) name:2(string) state:3(string)
  1149   │         │    │    ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000]
  1150   │         │    │    ├── key: (1)
  1151   │         │    │    └── fd: (1)-->(2,3)
  1152   │         │    ├── scan order_history
  1153   │         │    │    ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null)
  1154   │         │    │    ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0]
  1155   │         │    │    ├── key: (8)
  1156   │         │    │    └── fd: (8)-->(4-7)
  1157   │         │    └── filters (true)
  1158   │         └── filters
  1159   │              └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
  1160   └── filters
  1161        └── (id:1 = 1) AND (name:2 = 'andy') [type=bool, outer=(1,2), constraints=(/1: [/1 - /1]; /2: [/'andy' - /'andy']; tight), fd=()-->(1,2)]
  1162  
  1163  exec-ddl
  1164  CREATE TABLE nulls (x INT, y INT);
  1165  ----
  1166  
  1167  exec-ddl
  1168  ALTER TABLE nulls INJECT STATISTICS '[
  1169    {
  1170      "columns": ["x"],
  1171      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1172      "row_count": 1000,
  1173      "distinct_count": 0,
  1174      "null_count": 1000
  1175    }
  1176  ]'
  1177  ----
  1178  
  1179  build
  1180  SELECT * FROM nulls WHERE x = y
  1181  ----
  1182  project
  1183   ├── columns: x:1(int!null) y:2(int!null)
  1184   ├── stats: [rows=9.9e-10]
  1185   ├── fd: (1)==(2), (2)==(1)
  1186   └── select
  1187        ├── columns: x:1(int!null) y:2(int!null) rowid:3(int!null)
  1188        ├── stats: [rows=9.9e-10, distinct(1)=1e-10, null(1)=0, distinct(2)=1e-10, null(2)=0]
  1189        ├── key: (3)
  1190        ├── fd: (3)-->(1,2), (1)==(2), (2)==(1)
  1191        ├── scan nulls
  1192        │    ├── columns: x:1(int) y:2(int) rowid:3(int!null)
  1193        │    ├── stats: [rows=1000, distinct(1)=1, null(1)=1000, distinct(2)=100, null(2)=10, distinct(3)=1000, null(3)=0]
  1194        │    ├── key: (3)
  1195        │    └── fd: (3)-->(1,2)
  1196        └── filters
  1197             └── x:1 = y:2 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)]
  1198  
  1199  build
  1200  SELECT * FROM nulls WHERE x IS NULL
  1201  ----
  1202  project
  1203   ├── columns: x:1(int) y:2(int)
  1204   ├── stats: [rows=1000]
  1205   ├── fd: ()-->(1)
  1206   └── select
  1207        ├── columns: x:1(int) y:2(int) rowid:3(int!null)
  1208        ├── stats: [rows=1000, distinct(1)=1, null(1)=1000]
  1209        ├── key: (3)
  1210        ├── fd: ()-->(1), (3)-->(2)
  1211        ├── scan nulls
  1212        │    ├── columns: x:1(int) y:2(int) rowid:3(int!null)
  1213        │    ├── stats: [rows=1000, distinct(1)=1, null(1)=1000, distinct(3)=1000, null(3)=0]
  1214        │    ├── key: (3)
  1215        │    └── fd: (3)-->(1,2)
  1216        └── filters
  1217             └── x:1 IS NULL [type=bool, outer=(1), constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)]
  1218  
  1219  
  1220  # Regression test for #34440. Ensure the null count for x is less than or equal
  1221  # to the row count.
  1222  build colstat=1
  1223  SELECT * FROM nulls WHERE y = 3
  1224  ----
  1225  project
  1226   ├── columns: x:1(int) y:2(int!null)
  1227   ├── stats: [rows=10, distinct(1)=0.999956829, null(1)=10]
  1228   ├── fd: ()-->(2)
  1229   └── select
  1230        ├── columns: x:1(int) y:2(int!null) rowid:3(int!null)
  1231        ├── stats: [rows=10, distinct(1)=0.999956829, null(1)=10, distinct(2)=1, null(2)=0]
  1232        ├── key: (3)
  1233        ├── fd: ()-->(2), (3)-->(1)
  1234        ├── scan nulls
  1235        │    ├── columns: x:1(int) y:2(int) rowid:3(int!null)
  1236        │    ├── stats: [rows=1000, distinct(1)=1, null(1)=1000, distinct(2)=100, null(2)=10, distinct(3)=1000, null(3)=0]
  1237        │    ├── key: (3)
  1238        │    └── fd: (3)-->(1,2)
  1239        └── filters
  1240             └── y:2 = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)]
  1241  
  1242  # Sample testcase using exprgen.
  1243  expr colstat=1 colstat=2
  1244  (Select 
  1245    (FakeRel
  1246      [
  1247        (OutputCols [ (NewColumn "a" "int") (NewColumn "b" "int") (NewColumn "c" "int")] )
  1248        (Cardinality "-")
  1249        (Stats `[
  1250          {
  1251            "columns": ["a"],
  1252            "distinct_count": 100,
  1253            "null_count": 0, 
  1254            "row_count": 100, 
  1255            "created_at": "2018-01-01 1:00:00.00000+00:00"
  1256          },
  1257          {
  1258            "columns": ["b"],
  1259            "distinct_count": 20,
  1260            "null_count": 5, 
  1261            "row_count": 100, 
  1262            "created_at": "2018-01-01 1:00:00.00000+00:00"
  1263          }
  1264        ]`)
  1265      ]
  1266    )
  1267    [ (Eq (Var "b") (Const 1 "int")) ]
  1268  )
  1269  ----
  1270  select
  1271   ├── columns: a:1(int) b:2(int!null) c:3(int)
  1272   ├── stats: [rows=5, distinct(1)=5, null(1)=0, distinct(2)=1, null(2)=0]
  1273   ├── fd: ()-->(2)
  1274   ├── fake-rel
  1275   │    ├── columns: a:1(int) b:2(int) c:3(int)
  1276   │    └── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=20, null(2)=5]
  1277   └── filters
  1278        └── b:2 = 1 [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)]
  1279  
  1280  # Regression test for #37754.
  1281  norm
  1282  SELECT
  1283      1
  1284  FROM
  1285      (
  1286          VALUES
  1287              (true, NULL, B'001000101101110'),
  1288              (true, e'19\x1e':::STRING, NULL)
  1289      )
  1290          AS t (_bool, _string, _bit)
  1291  GROUP BY
  1292      _string, _bit
  1293  HAVING
  1294      min(_bool)
  1295  ----
  1296  project
  1297   ├── columns: "?column?":5(int!null)
  1298   ├── cardinality: [0 - 2]
  1299   ├── stats: [rows=1]
  1300   ├── fd: ()-->(5)
  1301   ├── select
  1302   │    ├── columns: column2:2(string) column3:3(varbit) min:4(bool!null)
  1303   │    ├── cardinality: [0 - 2]
  1304   │    ├── stats: [rows=1, distinct(4)=1, null(4)=0]
  1305   │    ├── key: (2,3)
  1306   │    ├── fd: ()-->(4)
  1307   │    ├── group-by
  1308   │    │    ├── columns: column2:2(string) column3:3(varbit) min:4(bool!null)
  1309   │    │    ├── grouping columns: column2:2(string) column3:3(varbit)
  1310   │    │    ├── cardinality: [1 - 2]
  1311   │    │    ├── stats: [rows=2, distinct(4)=2, null(4)=0, distinct(2,3)=2, null(2,3)=0]
  1312   │    │    ├── key: (2,3)
  1313   │    │    ├── fd: (2,3)-->(4)
  1314   │    │    ├── values
  1315   │    │    │    ├── columns: column1:1(bool!null) column2:2(string) column3:3(varbit)
  1316   │    │    │    ├── cardinality: [2 - 2]
  1317   │    │    │    ├── stats: [rows=2, distinct(2,3)=2, null(2,3)=0]
  1318   │    │    │    ├── (true, NULL, B'001000101101110') [type=tuple{bool, string, varbit}]
  1319   │    │    │    └── (true, e'19\x1e', NULL) [type=tuple{bool, string, varbit}]
  1320   │    │    └── aggregations
  1321   │    │         └── min [as=min:4, type=bool, outer=(1)]
  1322   │    │              └── column1:1 [type=bool]
  1323   │    └── filters
  1324   │         └── min:4 [type=bool, outer=(4), constraints=(/4: [/true - /true]; tight), fd=()-->(4)]
  1325   └── projections
  1326        └── 1 [as="?column?":5, type=int]
  1327  
  1328  # Test that distinct count estimates are correct for date ranges.
  1329  exec-ddl
  1330  CREATE TABLE date_test (
  1331      k INT PRIMARY KEY,
  1332      d1 date NOT NULL,
  1333      d2 date NOT NULL,
  1334      d3 date NOT NULL,
  1335      INDEX d1_idx (d1 ASC),
  1336      INDEX d2_idx (d2 DESC)
  1337  )
  1338  ----
  1339  
  1340  opt
  1341  SELECT d1 FROM date_test WHERE d1 > DATE '1995-10-01' AND d1 < DATE '1995-11-01'
  1342  ----
  1343  scan date_test@d1_idx
  1344   ├── columns: d1:2(date!null)
  1345   ├── constraint: /2/1: [/'1995-10-02' - /'1995-10-31']
  1346   └── stats: [rows=300, distinct(2)=30, null(2)=0]
  1347  
  1348  opt
  1349  SELECT d1 FROM date_test WHERE d1 >= DATE '1995-10-01' AND d1 <= DATE '1995-11-01'
  1350  ----
  1351  scan date_test@d1_idx
  1352   ├── columns: d1:2(date!null)
  1353   ├── constraint: /2/1: [/'1995-10-01' - /'1995-11-01']
  1354   └── stats: [rows=320, distinct(2)=32, null(2)=0]
  1355  
  1356  opt
  1357  SELECT d2 FROM date_test WHERE d2 > DATE '1903-10-01' AND d2 <= DATE '1903-11-01'
  1358  ----
  1359  scan date_test@d2_idx
  1360   ├── columns: d2:3(date!null)
  1361   ├── constraint: /-3/1: [/'1903-11-01' - /'1903-10-02']
  1362   └── stats: [rows=310, distinct(3)=31, null(3)=0]
  1363  
  1364  opt
  1365  SELECT d2 FROM date_test WHERE d2 >= DATE '2003-10-01' AND d2 < DATE '2003-11-01'
  1366  ----
  1367  scan date_test@d2_idx
  1368   ├── columns: d2:3(date!null)
  1369   ├── constraint: /-3/1: [/'2003-10-31' - /'2003-10-01']
  1370   └── stats: [rows=310, distinct(3)=31, null(3)=0]
  1371  
  1372  opt
  1373  SELECT d3 FROM date_test WHERE d3 >= DATE '2003-10-01' AND d3 < DATE '2003-11-01'
  1374  ----
  1375  select
  1376   ├── columns: d3:4(date!null)
  1377   ├── stats: [rows=310, distinct(4)=31, null(4)=0]
  1378   ├── scan date_test
  1379   │    ├── columns: d3:4(date!null)
  1380   │    └── stats: [rows=1000, distinct(4)=100, null(4)=0]
  1381   └── filters
  1382        └── (d3:4 >= '2003-10-01') AND (d3:4 < '2003-11-01') [type=bool, outer=(4), constraints=(/4: [/'2003-10-01' - /'2003-10-31']; tight)]
  1383  
  1384  opt
  1385  SELECT d3 FROM date_test WHERE d3 >= DATE '1903-10-01' AND d3 < DATE '2003-10-01'
  1386  ----
  1387  select
  1388   ├── columns: d3:4(date!null)
  1389   ├── stats: [rows=1000, distinct(4)=100, null(4)=0]
  1390   ├── scan date_test
  1391   │    ├── columns: d3:4(date!null)
  1392   │    └── stats: [rows=1000, distinct(4)=100, null(4)=0]
  1393   └── filters
  1394        └── (d3:4 >= '1903-10-01') AND (d3:4 < '2003-10-01') [type=bool, outer=(4), constraints=(/4: [/'1903-10-01' - /'2003-09-30']; tight)]
  1395  
  1396  # Regression test for #38344. Avoid floating point precision errors.
  1397  exec-ddl
  1398  CREATE TABLE t38344 (x BOOL)
  1399  ----
  1400  
  1401  exec-ddl
  1402  ALTER TABLE t38344 INJECT STATISTICS '[
  1403    {
  1404      "columns": ["x"],
  1405      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1406      "row_count": 20000000000,
  1407      "distinct_count": 1,
  1408      "null_count": 20000000000
  1409    }
  1410  ]'
  1411  ----
  1412  
  1413  norm disable=InlineWith
  1414  WITH t(x) AS (
  1415    SELECT (t1.x::int << 5533)::bool OR t2.x  AS x
  1416    FROM t38344 AS t1 LEFT JOIN t38344 AS t2 ON true
  1417  )
  1418  SELECT x FROM t WHERE x
  1419  ----
  1420  with &1 (t)
  1421   ├── columns: x:6(bool!null)
  1422   ├── stats: [rows=1.98e+20, distinct(6)=1, null(6)=0]
  1423   ├── fd: ()-->(6)
  1424   ├── project
  1425   │    ├── columns: x:5(bool)
  1426   │    ├── stats: [rows=4e+20]
  1427   │    ├── left-join (cross)
  1428   │    │    ├── columns: t1.x:1(bool) t2.x:3(bool)
  1429   │    │    ├── stats: [rows=4e+20]
  1430   │    │    ├── scan t1
  1431   │    │    │    ├── columns: t1.x:1(bool)
  1432   │    │    │    └── stats: [rows=2e+10]
  1433   │    │    ├── scan t2
  1434   │    │    │    ├── columns: t2.x:3(bool)
  1435   │    │    │    └── stats: [rows=2e+10]
  1436   │    │    └── filters (true)
  1437   │    └── projections
  1438   │         └── (t1.x:1::INT8 << 5533)::BOOL OR t2.x:3 [as=x:5, type=bool, outer=(1,3)]
  1439   └── select
  1440        ├── columns: x:6(bool!null)
  1441        ├── stats: [rows=1.98e+20, distinct(6)=1, null(6)=0]
  1442        ├── fd: ()-->(6)
  1443        ├── with-scan &1 (t)
  1444        │    ├── columns: x:6(bool)
  1445        │    ├── mapping:
  1446        │    │    └──  x:5(bool) => x:6(bool)
  1447        │    └── stats: [rows=4e+20, distinct(6)=3, null(6)=4e+18]
  1448        └── filters
  1449             └── x:6 [type=bool, outer=(6), constraints=(/6: [/true - /true]; tight), fd=()-->(6)]
  1450  
  1451  # Regression test for #38375. Avoid floating point precision errors.
  1452  exec-ddl
  1453  CREATE TABLE t38375 (x INT, y INT)
  1454  ----
  1455  
  1456  exec-ddl
  1457  ALTER TABLE t38375 INJECT STATISTICS '[
  1458    {
  1459      "columns": ["x"],
  1460      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1461      "row_count": 20000000000,
  1462      "distinct_count": 20000000000,
  1463      "null_count": 20000000000
  1464    },
  1465    {
  1466      "columns": ["y"],
  1467      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1468      "row_count": 20000000000,
  1469      "distinct_count": 10,
  1470      "null_count": 0
  1471    }
  1472  ]'
  1473  ----
  1474  
  1475  opt colstat=2
  1476  SELECT * FROM t38375 WHERE x = 1
  1477  ----
  1478  select
  1479   ├── columns: x:1(int!null) y:2(int)
  1480   ├── stats: [rows=2, distinct(1)=1, null(1)=0, distinct(2)=1.81269262, null(2)=0]
  1481   ├── fd: ()-->(1)
  1482   ├── scan t38375
  1483   │    ├── columns: x:1(int) y:2(int)
  1484   │    └── stats: [rows=2e+10, distinct(1)=2e+10, null(1)=2e+10, distinct(2)=10, null(2)=0]
  1485   └── filters
  1486        └── x:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
  1487  
  1488  # Support OR constraints.
  1489  exec-ddl
  1490  CREATE TABLE nation
  1491  (
  1492      n_nationkey int PRIMARY KEY,
  1493      n_name char(25) NOT NULL,
  1494      n_regionkey int NOT NULL,
  1495      neighbor char(25) NOT NULL,
  1496      INDEX n_rk (n_regionkey ASC)
  1497  )
  1498  ----
  1499  
  1500  exec-ddl
  1501  ALTER TABLE nation INJECT STATISTICS '[
  1502    {
  1503      "columns": ["n_name"],
  1504      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1505      "row_count": 1000000,
  1506      "distinct_count": 2,
  1507      "null_count": 0
  1508    }
  1509  ]'
  1510  ----
  1511  
  1512  opt
  1513  SELECT * FROM nation WHERE n_name = 'FRANCE' OR n_name = 'GERMANY'
  1514  ----
  1515  select
  1516   ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null)
  1517   ├── stats: [rows=1000000, distinct(2)=2, null(2)=0]
  1518   ├── key: (1)
  1519   ├── fd: (1)-->(2-4)
  1520   ├── scan nation
  1521   │    ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null)
  1522   │    ├── stats: [rows=1000000, distinct(1)=1000000, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=100000, null(3)=0, distinct(4)=100000, null(4)=0]
  1523   │    ├── key: (1)
  1524   │    └── fd: (1)-->(2-4)
  1525   └── filters
  1526        └── (n_name:2 = 'FRANCE') OR (n_name:2 = 'GERMANY') [type=bool, outer=(2), constraints=(/2: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY']; tight)]
  1527  
  1528  opt
  1529  SELECT * FROM nation WHERE (n_name = 'FRANCE' AND neighbor = 'GERMANY') OR (n_name = 'GERMANY' AND neighbor = 'FRANCE')
  1530  ----
  1531  select
  1532   ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null)
  1533   ├── stats: [rows=6.66666667, distinct(2)=2, null(2)=0, distinct(4)=2, null(4)=0]
  1534   ├── key: (1)
  1535   ├── fd: (1)-->(2-4)
  1536   ├── scan nation
  1537   │    ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null)
  1538   │    ├── stats: [rows=1000000, distinct(1)=1000000, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=100000, null(3)=0, distinct(4)=100000, null(4)=0]
  1539   │    ├── key: (1)
  1540   │    └── fd: (1)-->(2-4)
  1541   └── filters
  1542        └── ((n_name:2 = 'FRANCE') AND (neighbor:4 = 'GERMANY')) OR ((n_name:2 = 'GERMANY') AND (neighbor:4 = 'FRANCE')) [type=bool, outer=(2,4), constraints=(/2: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY']; /4: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY'])]
  1543  
  1544  opt
  1545  SELECT * FROM nation WHERE (n_name, neighbor) in (('FRANCE', 'GERMANY'), ('GERMANY', 'FRANCE'))
  1546  ----
  1547  select
  1548   ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null)
  1549   ├── stats: [rows=20, distinct(2)=2, null(2)=0, distinct(4)=2, null(4)=0]
  1550   ├── key: (1)
  1551   ├── fd: (1)-->(2-4)
  1552   ├── scan nation
  1553   │    ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null)
  1554   │    ├── stats: [rows=1000000, distinct(1)=1000000, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=100000, null(3)=0, distinct(4)=100000, null(4)=0]
  1555   │    ├── key: (1)
  1556   │    └── fd: (1)-->(2-4)
  1557   └── filters
  1558        └── (n_name:2, neighbor:4) IN (('FRANCE', 'GERMANY'), ('GERMANY', 'FRANCE')) [type=bool, outer=(2,4), constraints=(/2/4: [/'FRANCE'/'GERMANY' - /'FRANCE'/'GERMANY'] [/'GERMANY'/'FRANCE' - /'GERMANY'/'FRANCE']; /4: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY']; tight)]
  1559  
  1560  # Make sure the that histogram and distinct counts don't interfere with each
  1561  # other during selectivity calculation.
  1562  exec-ddl
  1563  CREATE TABLE hist_and_distinct (
  1564    a INT,
  1565    b INT,
  1566    c INT,
  1567    d INT,
  1568    INDEX idx_a (a)
  1569  )
  1570  ----
  1571  
  1572  exec-ddl
  1573  ALTER TABLE hist_and_distinct INJECT STATISTICS '[
  1574    {
  1575      "columns": ["a"],
  1576      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1577      "row_count": 1000,
  1578      "distinct_count": 40,
  1579      "histo_col_type": "int",
  1580      "histo_buckets": [
  1581        {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"},
  1582        {"num_eq": 10, "num_range": 90, "distinct_range": 9, "upper_bound": "10"},
  1583        {"num_eq": 20, "num_range": 180, "distinct_range": 9, "upper_bound": "20"},
  1584        {"num_eq": 30, "num_range": 270, "distinct_range": 9, "upper_bound": "30"},
  1585        {"num_eq": 40, "num_range": 360, "distinct_range": 9, "upper_bound": "40"}
  1586      ]
  1587    },
  1588    {
  1589      "columns": ["b"],
  1590      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1591      "row_count": 1000,
  1592      "distinct_count": 5
  1593    },
  1594    {
  1595      "columns": ["c"],
  1596      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1597      "row_count": 1000,
  1598      "distinct_count": 5
  1599    },
  1600    {
  1601      "columns": ["d"],
  1602      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1603      "row_count": 1000,
  1604      "distinct_count": 120
  1605    }
  1606  ]'
  1607  ----
  1608  
  1609  norm
  1610  SELECT * FROM hist_and_distinct WHERE a = 10 AND b = 10 AND c = 10 AND d >= 10 AND d < 100
  1611  ----
  1612  select
  1613   ├── columns: a:1(int!null) b:2(int!null) c:3(int!null) d:4(int!null)
  1614   ├── stats: [rows=0.39, distinct(1)=0.39, null(1)=0, distinct(2)=0.39, null(2)=0, distinct(3)=0.39, null(3)=0, distinct(4)=0.39, null(4)=0, distinct(1-3)=0.39, null(1-3)=0, distinct(1-4)=0.39, null(1-4)=0]
  1615   │   histogram(1)=  0 0.39
  1616   │                <--- 10
  1617   ├── fd: ()-->(1-3)
  1618   ├── scan hist_and_distinct
  1619   │    ├── columns: a:1(int) b:2(int) c:3(int) d:4(int)
  1620   │    └── stats: [rows=1000, distinct(1)=40, null(1)=0, distinct(2)=5, null(2)=0, distinct(3)=5, null(3)=0, distinct(4)=120, null(4)=0, distinct(1-3)=1000, null(1-3)=0, distinct(1-4)=1000, null(1-4)=0]
  1621   │        histogram(1)=  0  0  90  10  180  20  270  30  360  40
  1622   │                     <--- 0 ---- 10 ----- 20 ----- 30 ----- 40
  1623   └── filters
  1624        ├── (d:4 >= 10) AND (d:4 < 100) [type=bool, outer=(4), constraints=(/4: [/10 - /99]; tight)]
  1625        ├── a:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)]
  1626        ├── b:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)]
  1627        └── c:3 = 10 [type=bool, outer=(3), constraints=(/3: [/10 - /10]; tight), fd=()-->(3)]
  1628  
  1629  # Test that a histogram on a boolean column is used.
  1630  exec-ddl
  1631  CREATE TABLE hist_bool (a INT, b BOOL)
  1632  ----
  1633  
  1634  exec-ddl
  1635  ALTER TABLE hist_bool INJECT STATISTICS '[
  1636    {
  1637      "columns": ["a"],
  1638      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1639      "row_count": 1000,
  1640      "distinct_count": 40
  1641    },
  1642    {
  1643      "columns": ["b"],
  1644      "created_at": "2018-01-01 1:00:00.00000+00:00",
  1645      "row_count": 1000,
  1646      "distinct_count": 2,
  1647      "histo_col_type": "BOOL",
  1648      "histo_buckets": [
  1649        {"num_eq": 900, "num_range": 0, "distinct_range": 0, "upper_bound": "false"},
  1650        {"num_eq": 100, "num_range": 0, "distinct_range": 0, "upper_bound": "true"}
  1651      ]
  1652    }
  1653  ]'
  1654  ----
  1655  
  1656  norm
  1657  SELECT * FROM hist_bool WHERE b = false
  1658  ----
  1659  select
  1660   ├── columns: a:1(int) b:2(bool!null)
  1661   ├── stats: [rows=900, distinct(2)=1, null(2)=0]
  1662   │   histogram(2)=  0   900
  1663   │                <--- false
  1664   ├── fd: ()-->(2)
  1665   ├── scan hist_bool
  1666   │    ├── columns: a:1(int) b:2(bool)
  1667   │    └── stats: [rows=1000, distinct(2)=2, null(2)=0]
  1668   │        histogram(2)=  0   900   0  100
  1669   │                     <--- false --- true
  1670   └── filters
  1671        └── b:2 = false [type=bool, outer=(2), constraints=(/2: [/false - /false]; tight), fd=()-->(2)]
  1672  
  1673  exec-ddl
  1674  CREATE TABLE t0(c0 INT)
  1675  ----
  1676  
  1677  exec-ddl
  1678  CREATE VIEW v0(c0) AS SELECT CASE WHEN t0.c0 > 0 THEN 1 ELSE t0.rowid END FROM t0
  1679  ----
  1680  
  1681  exec-ddl
  1682  ALTER TABLE t0 INJECT STATISTICS '[
  1683    {
  1684      "columns": ["c0"],
  1685      "created_at": "2020-01-28 03:02:57.841772+00:00",
  1686      "row_count": 3,
  1687      "distinct_count": 1,
  1688      "null_count": 3
  1689    },
  1690    {
  1691      "columns": ["rowid"],
  1692      "created_at": "2020-01-28 03:03:03.012072+00:00",
  1693      "row_count": 2,
  1694      "distinct_count": 2,
  1695      "null_count": 0,
  1696      "histo_buckets": [
  1697        {
  1698          "distinct_range": 0,
  1699          "num_eq": 1,
  1700          "num_range": 0,
  1701          "upper_bound": "3"
  1702        },
  1703        {
  1704          "distinct_range": 0,
  1705          "num_eq": 1,
  1706          "num_range": 0,
  1707          "upper_bound": "4"
  1708        }
  1709      ],
  1710      "histo_col_type": "INT8"
  1711    }
  1712  ]'
  1713  ----
  1714  
  1715  # Regression test for #44418. Make sure inconsistent stats don't cause an
  1716  # error.
  1717  norm
  1718  SELECT * FROM v0 WHERE v0.c0 > 0
  1719  ----
  1720  select
  1721   ├── columns: c0:3(int!null)
  1722   ├── stats: [rows=1, distinct(3)=1, null(3)=0]
  1723   ├── project
  1724   │    ├── columns: rowid:3(int)
  1725   │    ├── stats: [rows=2, distinct(3)=2, null(3)=0]
  1726   │    ├── scan t0
  1727   │    │    ├── columns: c0:1(int) t0.rowid:2(int!null)
  1728   │    │    ├── stats: [rows=2, distinct(1,2)=2, null(1,2)=0]
  1729   │    │    ├── key: (2)
  1730   │    │    └── fd: (2)-->(1)
  1731   │    └── projections
  1732   │         └── CASE WHEN c0:1 > 0 THEN 1 ELSE t0.rowid:2 END [as=rowid:3, type=int, outer=(1,2)]
  1733   └── filters
  1734        └── rowid:3 > 0 [type=bool, outer=(3), constraints=(/3: [/1 - ]; tight)]
  1735  
  1736  exec-ddl
  1737  ALTER TABLE a INJECT STATISTICS '[
  1738    {
  1739      "columns": ["x"],
  1740      "created_at": "2020-01-28 03:02:57.841772+00:00",
  1741      "row_count": 3,
  1742      "distinct_count": 3
  1743    }
  1744  ]'
  1745  ----
  1746  
  1747  # Regression test for #44563. Set a lower bound on the distinct count from
  1748  # the multi-span constraint.
  1749  norm
  1750  SELECT * FROM a WHERE x <= 5 OR x = 10 OR x = 15
  1751  ----
  1752  select
  1753   ├── columns: x:1(int!null) y:2(int)
  1754   ├── stats: [rows=2, distinct(1)=2, null(1)=0]
  1755   ├── key: (1)
  1756   ├── fd: (1)-->(2)
  1757   ├── scan a
  1758   │    ├── columns: x:1(int!null) y:2(int)
  1759   │    ├── stats: [rows=3, distinct(1)=3, null(1)=0]
  1760   │    ├── key: (1)
  1761   │    └── fd: (1)-->(2)
  1762   └── filters
  1763        └── ((x:1 <= 5) OR (x:1 = 10)) OR (x:1 = 15) [type=bool, outer=(1), constraints=(/1: (/NULL - /5] [/10 - /10] [/15 - /15]; tight)]
  1764  
  1765  exec-ddl
  1766  CREATE TABLE data (
  1767    user_id UUID NOT NULL,
  1768    name VARCHAR(255) NULL,
  1769    created TIMESTAMPTZ,
  1770    INDEX user_id_idx (user_id ASC),
  1771    INDEX name_idx (name ASC),
  1772    INDEX created_idx (created ASC)
  1773  )
  1774  ----
  1775  
  1776  exec-ddl
  1777  ALTER TABLE data INJECT STATISTICS '[
  1778    {
  1779      "columns": ["user_id"],
  1780      "created_at": "2020-01-28 03:02:57.841772+00:00",
  1781      "row_count": 10000,
  1782      "distinct_count": 1000,
  1783      "null_count": 0,
  1784      "histo_buckets": [
  1785        {
  1786          "distinct_range": 0,
  1787          "num_eq": 1,
  1788          "num_range": 0,
  1789          "upper_bound": "3b57b3e4-a68a-9b47-2752-e365d7d8954e"
  1790        },
  1791        {
  1792          "distinct_range": 499,
  1793          "num_eq": 1,
  1794          "num_range": 4998,
  1795          "upper_bound": "6b49a786-387b-d5a2-6582-4e963eb4d537"
  1796        },
  1797        {
  1798          "distinct_range": 499,
  1799          "num_eq": 1,
  1800          "num_range": 4999,
  1801          "upper_bound": "d9739a48-d5be-9a62-e752-34d877e56ba5"
  1802        }
  1803      ],
  1804      "histo_col_type": "UUID"
  1805    },
  1806    {
  1807      "columns": ["name"],
  1808      "created_at": "2020-01-28 03:03:03.012072+00:00",
  1809      "row_count": 10000,
  1810      "distinct_count": 1000,
  1811      "null_count": 0,
  1812      "histo_buckets": [
  1813        {
  1814          "distinct_range": 0,
  1815          "num_eq": 1,
  1816          "num_range": 0,
  1817          "upper_bound": "a"
  1818        },
  1819        {
  1820          "distinct_range": 499,
  1821          "num_eq": 1,
  1822          "num_range": 4998,
  1823          "upper_bound": "b"
  1824        },
  1825        {
  1826          "distinct_range": 499,
  1827          "num_eq": 1,
  1828          "num_range": 4999,
  1829          "upper_bound": "c"
  1830        }
  1831      ],
  1832      "histo_col_type": "STRING"
  1833    },
  1834    {
  1835      "columns": ["created"],
  1836      "created_at": "2020-01-28 03:03:03.012072+00:00",
  1837      "row_count": 10000,
  1838      "distinct_count": 10000,
  1839      "null_count": 0,
  1840      "histo_buckets": [
  1841        {
  1842          "distinct_range": 0,
  1843          "num_eq": 1,
  1844          "num_range": 0,
  1845          "upper_bound": "2020-02-11 07:25:00+00:00"
  1846        },
  1847        {
  1848          "distinct_range": 4998,
  1849          "num_eq": 1,
  1850          "num_range": 4998,
  1851          "upper_bound": "2020-03-21 06:45:41+00:00"
  1852        },
  1853        {
  1854          "distinct_range": 4999,
  1855          "num_eq": 1,
  1856          "num_range": 4999,
  1857          "upper_bound": "2020-04-21 06:25:41+00:00"
  1858        }
  1859      ],
  1860      "histo_col_type": "TIMESTAMPTZ"
  1861    }
  1862  ]'
  1863  ----
  1864  
  1865  # Make sure that using a histogram produces correct stats with equality
  1866  # predicates on data types such as UUID, string, and timepstamptz.
  1867  norm
  1868  SELECT * FROM data WHERE user_id = '679d3e56-b985-63d2-5442-e4ba7a8479e3'
  1869  ----
  1870  select
  1871   ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz)
  1872   ├── stats: [rows=10.0160321, distinct(1)=1, null(1)=0]
  1873   │   histogram(1)=  0                  10.016
  1874   │                <--- '679d3e56-b985-63d2-5442-e4ba7a8479e3'
  1875   ├── fd: ()-->(1)
  1876   ├── scan data
  1877   │    ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz)
  1878   │    └── stats: [rows=10000, distinct(1)=1000, null(1)=0]
  1879   │        histogram(1)=  0                    1                     4998                    1                     4999                    1
  1880   │                     <--- '3b57b3e4-a68a-9b47-2752-e365d7d8954e' ------ '6b49a786-387b-d5a2-6582-4e963eb4d537' ------ 'd9739a48-d5be-9a62-e752-34d877e56ba5'
  1881   └── filters
  1882        └── user_id:1 = '679d3e56-b985-63d2-5442-e4ba7a8479e3' [type=bool, outer=(1), constraints=(/1: [/'679d3e56-b985-63d2-5442-e4ba7a8479e3' - /'679d3e56-b985-63d2-5442-e4ba7a8479e3']; tight), fd=()-->(1)]
  1883  
  1884  norm
  1885  SELECT * FROM data WHERE name = 'abc'
  1886  ----
  1887  select
  1888   ├── columns: user_id:1(uuid!null) name:2(varchar!null) created:3(timestamptz)
  1889   ├── stats: [rows=10.0160321, distinct(2)=1, null(2)=0]
  1890   │   histogram(2)=  0 10.016
  1891   │                <--- 'abc'
  1892   ├── fd: ()-->(2)
  1893   ├── scan data
  1894   │    ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz)
  1895   │    └── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(2)=1000, null(2)=0]
  1896   │        histogram(1)=  0                    1                     4998                    1                     4999                    1
  1897   │                     <--- '3b57b3e4-a68a-9b47-2752-e365d7d8954e' ------ '6b49a786-387b-d5a2-6582-4e963eb4d537' ------ 'd9739a48-d5be-9a62-e752-34d877e56ba5'
  1898   │        histogram(2)=  0   1   4998   1   4999   1
  1899   │                     <--- 'a' ------ 'b' ------ 'c'
  1900   └── filters
  1901        └── name:2 = 'abc' [type=bool, outer=(2), constraints=(/2: [/'abc' - /'abc']; tight), fd=()-->(2)]
  1902  
  1903  norm
  1904  SELECT * FROM data WHERE created = '2020-04-11 06:25:41+00:00'
  1905  ----
  1906  select
  1907   ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz!null)
  1908   ├── stats: [rows=1, distinct(3)=1, null(3)=0]
  1909   │   histogram(3)=  0               1
  1910   │                <--- '2020-04-11 06:25:41+00:00'
  1911   ├── fd: ()-->(3)
  1912   ├── scan data
  1913   │    ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz)
  1914   │    └── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(3)=10000, null(3)=0]
  1915   │        histogram(1)=  0                    1                     4998                    1                     4999                    1
  1916   │                     <--- '3b57b3e4-a68a-9b47-2752-e365d7d8954e' ------ '6b49a786-387b-d5a2-6582-4e963eb4d537' ------ 'd9739a48-d5be-9a62-e752-34d877e56ba5'
  1917   │        histogram(3)=  0               1               4998               1               4999               1
  1918   │                     <--- '2020-02-11 07:25:00+00:00' ------ '2020-03-21 06:45:41+00:00' ------ '2020-04-21 06:25:41+00:00'
  1919   └── filters
  1920        └── created:3 = '2020-04-11 06:25:41+00:00' [type=bool, outer=(3), constraints=(/3: [/'2020-04-11 06:25:41+00:00' - /'2020-04-11 06:25:41+00:00']; tight), fd=()-->(3)]
  1921  
  1922  exec-ddl
  1923  ALTER TABLE a INJECT STATISTICS '[
  1924    {
  1925      "columns": ["x"],
  1926      "created_at": "2020-05-26 03:02:57.841772+00:00",
  1927      "row_count": 1000,
  1928      "distinct_count": 1000
  1929    }
  1930  ]'
  1931  ----
  1932  
  1933  # Regression test for #48828. Stats for BETWEEN SYMMETRIC should be based on
  1934  # the tight constraint rather than calculated as 1/3rd of the cardinality.
  1935  norm
  1936  SELECT * FROM a WHERE x BETWEEN SYMMETRIC 25 and 50
  1937  ----
  1938  select
  1939   ├── columns: x:1(int!null) y:2(int)
  1940   ├── cardinality: [0 - 26]
  1941   ├── stats: [rows=26, distinct(1)=26, null(1)=0]
  1942   ├── key: (1)
  1943   ├── fd: (1)-->(2)
  1944   ├── scan a
  1945   │    ├── columns: x:1(int!null) y:2(int)
  1946   │    ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
  1947   │    ├── key: (1)
  1948   │    └── fd: (1)-->(2)
  1949   └── filters
  1950        └── ((x:1 >= 25) AND (x:1 <= 50)) OR ((x:1 >= 50) AND (x:1 <= 25)) [type=bool, outer=(1), constraints=(/1: [/25 - /50]; tight)]
  1951  
  1952  exec-ddl
  1953  ALTER TABLE b INJECT STATISTICS '[
  1954    {
  1955      "columns": ["x"],
  1956      "created_at": "2020-01-28 03:02:57.841772+00:00",
  1957      "row_count": 10000,
  1958      "distinct_count": 1000
  1959    },
  1960    {
  1961      "columns": ["z"],
  1962      "created_at": "2020-01-28 03:02:57.841772+00:00",
  1963      "row_count": 10000,
  1964      "distinct_count": 100
  1965    } ,
  1966    {
  1967      "columns": ["x","z"],
  1968      "created_at": "2020-01-28 03:02:57.841772+00:00",
  1969      "row_count": 10000,
  1970      "distinct_count": 1500
  1971    }
  1972  ]'
  1973  ----
  1974  
  1975  # Multi-column stats test.
  1976  build
  1977  SELECT * FROM b WHERE x = 1 AND z = 2
  1978  ----
  1979  project
  1980   ├── columns: x:1(int!null) z:2(int!null)
  1981   ├── stats: [rows=6.01]
  1982   ├── fd: ()-->(1,2)
  1983   └── select
  1984        ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null)
  1985        ├── stats: [rows=6.01, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=1, null(1,2)=0]
  1986        ├── key: (3)
  1987        ├── fd: ()-->(1,2)
  1988        ├── scan b
  1989        │    ├── columns: x:1(int) z:2(int!null) rowid:3(int!null)
  1990        │    ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=1500, null(1,2)=0]
  1991        │    ├── key: (3)
  1992        │    └── fd: (3)-->(1,2)
  1993        └── filters
  1994             └── (x:1 = 1) AND (z:2 = 2) [type=bool, outer=(1,2), constraints=(/1: [/1 - /1]; /2: [/2 - /2]; tight), fd=()-->(1,2)]