github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/select (about) 1 exec-ddl 2 CREATE TABLE a (x INT PRIMARY KEY, y INT) 3 ---- 4 5 exec-ddl 6 CREATE TABLE b (x INT, z INT NOT NULL) 7 ---- 8 9 exec-ddl 10 ALTER TABLE a INJECT STATISTICS '[ 11 { 12 "columns": ["x"], 13 "created_at": "2018-01-01 1:00:00.00000+00:00", 14 "row_count": 5000, 15 "distinct_count": 5000 16 }, 17 { 18 "columns": ["y"], 19 "created_at": "2018-01-01 1:30:00.00000+00:00", 20 "row_count": 4000, 21 "distinct_count": 400 22 } 23 ]' 24 ---- 25 26 exec-ddl 27 ALTER TABLE b INJECT STATISTICS '[ 28 { 29 "columns": ["x"], 30 "created_at": "2018-01-01 1:00:00.00000+00:00", 31 "row_count": 10000, 32 "distinct_count": 5000 33 }, 34 { 35 "columns": ["z"], 36 "created_at": "2018-01-01 1:30:00.00000+00:00", 37 "row_count": 10000, 38 "distinct_count": 100 39 }, 40 { 41 "columns": ["rowid"], 42 "created_at": "2018-01-01 1:30:00.00000+00:00", 43 "row_count": 10000, 44 "distinct_count": 10000 45 } 46 ]' 47 ---- 48 49 norm 50 SELECT * FROM a WHERE true 51 ---- 52 scan a 53 ├── columns: x:1(int!null) y:2(int) 54 ├── stats: [rows=4000] 55 ├── key: (1) 56 └── fd: (1)-->(2) 57 58 norm 59 SELECT * FROM a WHERE false 60 ---- 61 values 62 ├── columns: x:1(int!null) y:2(int!null) 63 ├── cardinality: [0 - 0] 64 ├── stats: [rows=0] 65 ├── key: () 66 └── fd: ()-->(1,2) 67 68 # Distinct values calculation with constraints. 69 norm 70 SELECT * FROM b WHERE x = 1 AND z = 2 AND rowid >= 5 AND rowid <= 8 71 ---- 72 project 73 ├── columns: x:1(int!null) z:2(int!null) 74 ├── cardinality: [0 - 4] 75 ├── stats: [rows=0.8118008] 76 ├── fd: ()-->(1,2) 77 └── select 78 ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null) 79 ├── cardinality: [0 - 4] 80 ├── stats: [rows=0.8118008, distinct(1)=0.8118008, null(1)=0, distinct(2)=0.8118008, null(2)=0, distinct(3)=0.8118008, null(3)=0, distinct(1,2)=0.8118008, null(1,2)=0, distinct(1-3)=0.8118008, null(1-3)=0] 81 ├── key: (3) 82 ├── fd: ()-->(1,2) 83 ├── scan b 84 │ ├── columns: x:1(int) z:2(int!null) rowid:3(int!null) 85 │ ├── stats: [rows=10000, distinct(1)=5000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=10000, null(1,2)=0, distinct(1-3)=10000, null(1-3)=0] 86 │ ├── key: (3) 87 │ └── fd: (3)-->(1,2) 88 └── filters 89 ├── (rowid:3 >= 5) AND (rowid:3 <= 8) [type=bool, outer=(3), constraints=(/3: [/5 - /8]; tight)] 90 ├── x:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)] 91 └── z:2 = 2 [type=bool, outer=(2), constraints=(/2: [/2 - /2]; tight), fd=()-->(2)] 92 93 # Can't determine stats from filter. 94 norm 95 SELECT * FROM a WHERE x + y < 10 96 ---- 97 select 98 ├── columns: x:1(int!null) y:2(int) 99 ├── stats: [rows=1333.33333] 100 ├── key: (1) 101 ├── fd: (1)-->(2) 102 ├── scan a 103 │ ├── columns: x:1(int!null) y:2(int) 104 │ ├── stats: [rows=4000] 105 │ ├── key: (1) 106 │ └── fd: (1)-->(2) 107 └── filters 108 └── (x:1 + y:2) < 10 [type=bool, outer=(1,2)] 109 110 # Remaining filter. 111 norm 112 SELECT * FROM a WHERE y = 5 AND x + y < 10 113 ---- 114 select 115 ├── columns: x:1(int!null) y:2(int!null) 116 ├── stats: [rows=9.33333333, distinct(1)=9.33333333, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=9.33333333, null(1,2)=0] 117 ├── key: (1) 118 ├── fd: ()-->(2) 119 ├── scan a 120 │ ├── columns: x:1(int!null) y:2(int) 121 │ ├── stats: [rows=4000, distinct(1)=4000, null(1)=0, distinct(2)=400, null(2)=0, distinct(1,2)=4000, null(1,2)=0] 122 │ ├── key: (1) 123 │ └── fd: (1)-->(2) 124 └── filters 125 ├── y:2 = 5 [type=bool, outer=(2), constraints=(/2: [/5 - /5]; tight), fd=()-->(2)] 126 └── x:1 < 5 [type=bool, outer=(1), constraints=(/1: (/NULL - /4]; tight)] 127 128 # Contradiction. 129 norm 130 SELECT * FROM a WHERE x IS NULL 131 ---- 132 select 133 ├── columns: x:1(int!null) y:2(int) 134 ├── cardinality: [0 - 1] 135 ├── stats: [rows=1, distinct(1)=1, null(1)=0] 136 ├── key: () 137 ├── fd: ()-->(1,2) 138 ├── scan a 139 │ ├── columns: x:1(int!null) y:2(int) 140 │ ├── stats: [rows=4000, distinct(1)=4000, null(1)=0] 141 │ ├── key: (1) 142 │ └── fd: (1)-->(2) 143 └── filters 144 └── x:1 IS NULL [type=bool, outer=(1), constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)] 145 146 norm 147 SELECT sum(x) FROM b WHERE x > 1000 AND x <= 2000 GROUP BY z 148 ---- 149 project 150 ├── columns: sum:4(decimal!null) 151 ├── stats: [rows=100] 152 └── group-by 153 ├── columns: z:2(int!null) sum:4(decimal!null) 154 ├── grouping columns: z:2(int!null) 155 ├── stats: [rows=100, distinct(2)=100, null(2)=0] 156 ├── key: (2) 157 ├── fd: (2)-->(4) 158 ├── select 159 │ ├── columns: x:1(int!null) z:2(int!null) 160 │ ├── stats: [rows=2000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=0] 161 │ ├── scan b 162 │ │ ├── columns: x:1(int) z:2(int!null) 163 │ │ └── stats: [rows=10000, distinct(1)=5000, null(1)=0, distinct(2)=100, null(2)=0] 164 │ └── filters 165 │ └── (x:1 > 1000) AND (x:1 <= 2000) [type=bool, outer=(1), constraints=(/1: [/1001 - /2000]; tight)] 166 └── aggregations 167 └── sum [as=sum:4, type=decimal, outer=(1)] 168 └── x:1 [type=int] 169 170 # Regression: statistics builder panics when end key is NULL when it's trying 171 # to compute start/end int boundaries. 172 exec-ddl 173 CREATE TABLE idx (x INT PRIMARY KEY, y INT, z INT, INDEX yz (y DESC, z)) 174 ---- 175 176 opt 177 SELECT y FROM idx WHERE y < 5 AND z < 10 178 ---- 179 project 180 ├── columns: y:2(int!null) 181 ├── stats: [rows=311.111111] 182 └── select 183 ├── columns: y:2(int!null) z:3(int!null) 184 ├── stats: [rows=311.111111, distinct(2)=33.3333333, null(2)=0, distinct(3)=33.3333333, null(3)=0, distinct(2,3)=311.111111, null(2,3)=0] 185 ├── scan idx@yz 186 │ ├── columns: y:2(int!null) z:3(int) 187 │ ├── constraint: /-2/3/1: (/4/NULL - /NULL) 188 │ └── stats: [rows=333.333333, distinct(2)=33.3333333, null(2)=0] 189 └── filters 190 └── z:3 < 10 [type=bool, outer=(3), constraints=(/3: (/NULL - /9]; tight)] 191 192 # Regression: certain queries could cause a NaN expected number of rows via a divide-by-zero. 193 exec-ddl 194 CREATE TABLE tab0(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT) 195 ---- 196 197 # Note: it's not clear that this still tests the above issue, but I have left 198 # it here anyway as an interesting test case. I've added another query below 199 # to regression-test the divide-by-zero issue. 200 opt 201 SELECT pk FROM tab0 WHERE 202 col0 = 1 AND 203 col0 = 2 AND 204 (col0 = 1 OR col0 IN (SELECT col3 FROM tab0)) AND 205 (col0 = 1 OR col0 IN (SELECT col3 FROM tab0)) 206 ---- 207 values 208 ├── columns: pk:1(int!null) 209 ├── cardinality: [0 - 0] 210 ├── stats: [rows=0] 211 ├── key: () 212 └── fd: ()-->(1) 213 214 exec-ddl 215 ALTER TABLE tab0 INJECT STATISTICS '[ 216 { 217 "columns": ["col0"], 218 "created_at": "2018-01-01 1:00:00.00000+00:00", 219 "row_count": 100, 220 "distinct_count": 0, 221 "null_count": 100 222 }, 223 { 224 "columns": ["col3"], 225 "created_at": "2018-01-01 1:00:00.00000+00:00", 226 "row_count": 100, 227 "distinct_count": 10 228 } 229 ]' 230 ---- 231 232 opt 233 SELECT count(*) FROM (SELECT * FROM tab0 WHERE col3 = 10) GROUP BY col0 234 ---- 235 project 236 ├── columns: count:8(int!null) 237 ├── stats: [rows=0.999973439] 238 └── group-by 239 ├── columns: col0:2(int) count_rows:8(int!null) 240 ├── grouping columns: col0:2(int) 241 ├── stats: [rows=0.999973439, distinct(2)=0.999973439, null(2)=0.999973439] 242 ├── key: (2) 243 ├── fd: (2)-->(8) 244 ├── select 245 │ ├── columns: col0:2(int) col3:5(int!null) 246 │ ├── stats: [rows=10, distinct(2)=0.999973439, null(2)=10, distinct(5)=1, null(5)=0] 247 │ ├── fd: ()-->(5) 248 │ ├── scan tab0 249 │ │ ├── columns: col0:2(int) col3:5(int) 250 │ │ └── stats: [rows=100, distinct(2)=1, null(2)=100, distinct(5)=10, null(5)=0] 251 │ └── filters 252 │ └── col3:5 = 10 [type=bool, outer=(5), constraints=(/5: [/10 - /10]; tight), fd=()-->(5)] 253 └── aggregations 254 └── count-rows [as=count_rows:8, type=int] 255 256 257 exec-ddl 258 CREATE TABLE customers (id INT PRIMARY KEY, name STRING, state STRING) 259 ---- 260 261 exec-ddl 262 CREATE TABLE order_history (order_id INT, item_id INT, customer_id INT, year INT) 263 ---- 264 265 exec-ddl 266 CREATE TABLE district (d_id INT, d_w_id INT, d_name STRING, PRIMARY KEY(d_id, d_w_id)) 267 ---- 268 269 exec-ddl 270 ALTER TABLE district INJECT STATISTICS '[ 271 { 272 "columns": ["d_id"], 273 "created_at": "2018-01-01 1:00:00.00000+00:00", 274 "row_count": 100, 275 "distinct_count": 10 276 }, 277 { 278 "columns": ["d_w_id"], 279 "created_at": "2018-01-01 1:30:00.00000+00:00", 280 "row_count": 100, 281 "distinct_count": 10 282 }, 283 { 284 "columns": ["d_name"], 285 "created_at": "2018-01-01 1:30:00.00000+00:00", 286 "row_count": 100, 287 "distinct_count": 100 288 } 289 ]' 290 ---- 291 292 # This tests selectivityFromReducedCols. 293 # Since the reduced column set is (d_id, d_name), and 294 # both columns have distinct count 1, we expect this 295 # to calculate selectivity through selectivityFromReducedCols. 296 # The output is the same as the naive approach. 297 norm 298 SELECT * FROM district WHERE d_id = 1 AND d_name='bobs_burgers' 299 ---- 300 select 301 ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) 302 ├── stats: [rows=0.91, distinct(1)=0.91, null(1)=0, distinct(3)=0.91, null(3)=0, distinct(1,3)=0.91, null(1,3)=0] 303 ├── key: (2) 304 ├── fd: ()-->(1,3) 305 ├── scan district 306 │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) 307 │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,3)=100, null(1,3)=0] 308 │ ├── key: (1,2) 309 │ └── fd: (1,2)-->(3) 310 └── filters 311 ├── d_id:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)] 312 └── d_name:3 = 'bobs_burgers' [type=bool, outer=(3), constraints=(/3: [/'bobs_burgers' - /'bobs_burgers']; tight), fd=()-->(3)] 313 314 norm 315 SELECT * FROM district WHERE d_id = 1 and d_name LIKE 'bob' 316 ---- 317 select 318 ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) 319 ├── stats: [rows=0.91, distinct(1)=0.91, null(1)=0, distinct(3)=0.91, null(3)=0, distinct(1,3)=0.91, null(1,3)=0] 320 ├── key: (2) 321 ├── fd: ()-->(1,3) 322 ├── scan district 323 │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) 324 │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,3)=100, null(1,3)=0] 325 │ ├── key: (1,2) 326 │ └── fd: (1,2)-->(3) 327 └── filters 328 ├── d_id:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)] 329 └── d_name:3 LIKE 'bob' [type=bool, outer=(3), constraints=(/3: [/'bob' - /'bob']; tight), fd=()-->(3)] 330 331 # This tests selectivityFromReducedCols. 332 # Since (1,2)-->(3) in order to use selectivityFromReducedCols, 333 # both (1,2) must have distinct=1 after applying the filter. Since 334 # d_id is a range constraint, this fails, and we fall back to the 335 # naive estimation for selectivity. 336 norm 337 SELECT * FROM district WHERE d_id > 1 AND d_id < 10 AND d_w_id=10 AND d_name='bobs_burgers' 338 ---- 339 select 340 ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) 341 ├── cardinality: [0 - 8] 342 ├── stats: [rows=0.827, distinct(1)=0.827, null(1)=0, distinct(2)=0.827, null(2)=0, distinct(3)=0.827, null(3)=0, distinct(2,3)=0.827, null(2,3)=0, distinct(1-3)=0.827, null(1-3)=0] 343 ├── key: (1) 344 ├── fd: ()-->(2,3) 345 ├── scan district 346 │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) 347 │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(2,3)=100, null(2,3)=0, distinct(1-3)=100, null(1-3)=0] 348 │ ├── key: (1,2) 349 │ └── fd: (1,2)-->(3) 350 └── filters 351 ├── (d_id:1 > 1) AND (d_id:1 < 10) [type=bool, outer=(1), constraints=(/1: [/2 - /9]; tight)] 352 ├── d_w_id:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] 353 └── d_name:3 = 'bobs_burgers' [type=bool, outer=(3), constraints=(/3: [/'bobs_burgers' - /'bobs_burgers']; tight), fd=()-->(3)] 354 355 # This tests selectivityFromReducedCols 356 # We don't apply the selectivity on d_name since (1,2)-->3. 357 norm 358 SELECT * FROM district WHERE d_id = 1 AND d_w_id=10 AND d_name='hello' 359 ---- 360 select 361 ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) 362 ├── cardinality: [0 - 1] 363 ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(1,2)=1, null(1,2)=0] 364 ├── key: () 365 ├── fd: ()-->(1-3) 366 ├── scan district 367 │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) 368 │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,2)=100, null(1,2)=0] 369 │ ├── key: (1,2) 370 │ └── fd: (1,2)-->(3) 371 └── filters 372 ├── d_id:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)] 373 ├── d_w_id:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] 374 └── d_name:3 = 'hello' [type=bool, outer=(3), constraints=(/3: [/'hello' - /'hello']; tight), fd=()-->(3)] 375 376 exec-ddl 377 ALTER TABLE customers INJECT STATISTICS '[ 378 { 379 "columns": ["name"], 380 "created_at": "2018-01-01 1:00:00.00000+00:00", 381 "row_count": 10000, 382 "distinct_count": 500 383 }, 384 { 385 "columns": ["id"], 386 "created_at": "2018-01-01 1:30:00.00000+00:00", 387 "row_count": 10000, 388 "distinct_count": 10000 389 } 390 ]' 391 ---- 392 393 # This tests selectivityFromReducedCols 394 # The following two tests cases are paired together. The first has 395 # one constraint, one on single non-key column. The second query has two 396 # constraints on columns which form a determinant, dependent FD pair. 397 # The dependent column in this FD pair is from the first test case. 398 # This series of tests demonstrates that the selectivity 399 # contribution for a pair of (determinant, dependent) FDs is the 400 # selectivity of the determinant. 401 # 1/2 join-subquery-selectivityFromReducedCols tests 402 403 build 404 SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id) 405 WHERE name='andy' 406 ---- 407 select 408 ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 409 ├── stats: [rows=2.29713221, distinct(2)=1, null(2)=0] 410 ├── fd: ()-->(2), (1)-->(3), (1)==(6), (6)==(1) 411 ├── project 412 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 413 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0] 414 │ ├── fd: (1)-->(2,3), (1)==(6), (6)==(1) 415 │ └── select 416 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null) 417 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0] 418 │ ├── key: (8) 419 │ ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1) 420 │ ├── inner-join (cross) 421 │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 422 │ │ ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0] 423 │ │ ├── key: (1,8) 424 │ │ ├── fd: (1)-->(2,3), (8)-->(4-7) 425 │ │ ├── scan customers 426 │ │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) 427 │ │ │ ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0] 428 │ │ │ ├── key: (1) 429 │ │ │ └── fd: (1)-->(2,3) 430 │ │ ├── scan order_history 431 │ │ │ ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 432 │ │ │ ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0] 433 │ │ │ ├── key: (8) 434 │ │ │ └── fd: (8)-->(4-7) 435 │ │ └── filters (true) 436 │ └── filters 437 │ └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] 438 └── filters 439 └── name:2 = 'andy' [type=bool, outer=(2), constraints=(/2: [/'andy' - /'andy']; tight), fd=()-->(2)] 440 441 # This tests selectivityFromReducedCols 442 # The previous tests case and the following are paired together. The first has 443 # one constraint, one on single non-key column. The second query has two 444 # constraints on columns which form a determinant, dependent FD pair. 445 # The dependent column in this FD pair is from the first test case. 446 # This series of tests demonstrates that the selectivity 447 # contribution for a pair of (determinant, dependent) FDs is the 448 # selectivity of the determinant. 449 # 2/2 join-subquery-selectivityFromReducedCols tests 450 451 build 452 SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id) 453 WHERE id = 1 AND name='andy' 454 ---- 455 select 456 ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 457 ├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0] 458 ├── fd: ()-->(1-3,6), (1)==(6), (6)==(1) 459 ├── project 460 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 461 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0] 462 │ ├── fd: (1)-->(2,3), (1)==(6), (6)==(1) 463 │ └── select 464 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null) 465 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=0, distinct(6)=99, null(6)=0] 466 │ ├── key: (8) 467 │ ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1) 468 │ ├── inner-join (cross) 469 │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 470 │ │ ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0] 471 │ │ ├── key: (1,8) 472 │ │ ├── fd: (1)-->(2,3), (8)-->(4-7) 473 │ │ ├── scan customers 474 │ │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) 475 │ │ │ ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=0] 476 │ │ │ ├── key: (1) 477 │ │ │ └── fd: (1)-->(2,3) 478 │ │ ├── scan order_history 479 │ │ │ ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 480 │ │ │ ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0] 481 │ │ │ ├── key: (8) 482 │ │ │ └── fd: (8)-->(4-7) 483 │ │ └── filters (true) 484 │ └── filters 485 │ └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] 486 └── filters 487 └── (id:1 = 1) AND (name:2 = 'andy') [type=bool, outer=(1,2), constraints=(/1: [/1 - /1]; /2: [/'andy' - /'andy']; tight), fd=()-->(1,2)] 488 489 # Test equality conditions where all have distinct count 1. 490 norm 491 SELECT * FROM order_history WHERE item_id = order_id AND item_id = customer_id AND customer_id = 5 492 ---- 493 select 494 ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int!null) year:4(int) 495 ├── stats: [rows=0.901, distinct(1)=0.901, null(1)=0, distinct(2)=0.901, null(2)=0, distinct(3)=0.901, null(3)=0, distinct(1-3)=0.901, null(1-3)=0] 496 ├── fd: ()-->(1-3) 497 ├── scan order_history 498 │ ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int) 499 │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(3)=100, null(3)=10, distinct(1-3)=1000, null(1-3)=0.001] 500 └── filters 501 ├── order_id:1 = 5 [type=bool, outer=(1), constraints=(/1: [/5 - /5]; tight), fd=()-->(1)] 502 ├── item_id:2 = 5 [type=bool, outer=(2), constraints=(/2: [/5 - /5]; tight), fd=()-->(2)] 503 └── customer_id:3 = 5 [type=bool, outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)] 504 505 # Test equality condition with another condition on one of the attributes. 506 norm 507 SELECT * FROM order_history WHERE item_id = order_id AND item_id < 5 AND item_id > 0 508 ---- 509 select 510 ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int) year:4(int) 511 ├── stats: [rows=0.99, distinct(1)=0.99, null(1)=0, distinct(2)=0.99, null(2)=0] 512 ├── fd: (1)==(2), (2)==(1) 513 ├── scan order_history 514 │ ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int) 515 │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10] 516 └── filters 517 ├── (item_id:2 < 5) AND (item_id:2 > 0) [type=bool, outer=(2), constraints=(/2: [/1 - /4]; tight)] 518 └── item_id:2 = order_id:1 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] 519 520 # Test equality condition with another condition on a different attribute. 521 norm 522 SELECT * FROM order_history WHERE item_id = order_id AND customer_id < 5 AND customer_id > 0 523 ---- 524 select 525 ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int!null) year:4(int) 526 ├── stats: [rows=0.9801, distinct(1)=0.9801, null(1)=0, distinct(2)=0.9801, null(2)=0, distinct(3)=0.9801, null(3)=0] 527 ├── fd: (1)==(2), (2)==(1) 528 ├── scan order_history 529 │ ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int) 530 │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(3)=100, null(3)=10] 531 └── filters 532 ├── (customer_id:3 < 5) AND (customer_id:3 > 0) [type=bool, outer=(3), constraints=(/3: [/1 - /4]; tight)] 533 └── item_id:2 = order_id:1 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] 534 535 # Test equality condition with another filter condition without a constraint. 536 norm 537 SELECT * FROM order_history WHERE item_id = order_id AND customer_id % 2 = 0 538 ---- 539 select 540 ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int) year:4(int) 541 ├── stats: [rows=3.267, distinct(1)=3.267, null(1)=0, distinct(2)=3.267, null(2)=0] 542 ├── fd: (1)==(2), (2)==(1) 543 ├── scan order_history 544 │ ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int) 545 │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10] 546 └── filters 547 ├── item_id:2 = order_id:1 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] 548 └── (customer_id:3 % 2) = 0 [type=bool, outer=(3)] 549 550 exec-ddl 551 CREATE TABLE c (x INT, z INT NOT NULL, UNIQUE INDEX x_idx (x)) 552 ---- 553 554 # Test that the distinct and null counts for x are estimated correctly (since it's a weak 555 # key). 556 norm 557 SELECT * FROM c WHERE x >= 0 AND x < 100 558 ---- 559 select 560 ├── columns: x:1(int!null) z:2(int!null) 561 ├── cardinality: [0 - 100] 562 ├── stats: [rows=100, distinct(1)=100, null(1)=0] 563 ├── key: (1) 564 ├── fd: (1)-->(2) 565 ├── scan c 566 │ ├── columns: x:1(int) z:2(int!null) 567 │ ├── stats: [rows=1000, distinct(1)=991, null(1)=10, distinct(2)=100, null(2)=0] 568 │ ├── lax-key: (1,2) 569 │ └── fd: (1)~~>(2) 570 └── filters 571 └── (x:1 >= 0) AND (x:1 < 100) [type=bool, outer=(1), constraints=(/1: [/0 - /99]; tight)] 572 573 exec-ddl 574 CREATE TABLE uvw (u INT, v INT, w INT) 575 ---- 576 577 # Test selectivity calculations by applying the two constraints in different 578 # orders. 579 norm 580 SELECT * FROM uvw WHERE u=v AND u=10 581 ---- 582 select 583 ├── columns: u:1(int!null) v:2(int!null) w:3(int) 584 ├── stats: [rows=0.910810811, distinct(1)=0.910810811, null(1)=0, distinct(2)=0.910810811, null(2)=0, distinct(1,2)=0.910810811, null(1,2)=0] 585 ├── fd: ()-->(1,2) 586 ├── scan uvw 587 │ ├── columns: u:1(int) v:2(int) w:3(int) 588 │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(1,2)=1000, null(1,2)=0.1] 589 └── filters 590 ├── v:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] 591 └── u:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)] 592 593 norm disable=MergeSelects 594 SELECT * FROM (SELECT * FROM uvw WHERE u=10) WHERE u=v 595 ---- 596 select 597 ├── columns: u:1(int!null) v:2(int!null) w:3(int) 598 ├── stats: [rows=1.03537072, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0] 599 ├── fd: ()-->(1,2), (1)==(2), (2)==(1) 600 ├── select 601 │ ├── columns: u:1(int!null) v:2(int) w:3(int) 602 │ ├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(2)=9.5617925, null(2)=0.1] 603 │ ├── fd: ()-->(1) 604 │ ├── scan uvw 605 │ │ ├── columns: u:1(int) v:2(int) w:3(int) 606 │ │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10] 607 │ └── filters 608 │ └── u:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)] 609 └── filters 610 └── u:1 = v:2 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] 611 612 norm disable=MergeSelects 613 SELECT * FROM (SELECT * FROM uvw WHERE u=v) WHERE u=10 614 ---- 615 select 616 ├── columns: u:1(int!null) v:2(int!null) w:3(int) 617 ├── stats: [rows=1, distinct(1)=1, null(1)=0] 618 ├── fd: ()-->(1,2), (1)==(2), (2)==(1) 619 ├── select 620 │ ├── columns: u:1(int!null) v:2(int!null) w:3(int) 621 │ ├── stats: [rows=9.801, distinct(1)=9.801, null(1)=0, distinct(2)=9.801, null(2)=0] 622 │ ├── fd: (1)==(2), (2)==(1) 623 │ ├── scan uvw 624 │ │ ├── columns: u:1(int) v:2(int) w:3(int) 625 │ │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10] 626 │ └── filters 627 │ └── u:1 = v:2 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] 628 └── filters 629 └── u:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)] 630 631 exec-ddl 632 CREATE TABLE lineitem 633 ( 634 l_orderkey int NOT NULL, 635 l_partkey int NOT NULL, 636 l_suppkey int NOT NULL, 637 l_linenumber int NOT NULL, 638 l_quantity float NOT NULL, 639 l_extendedprice float NOT NULL, 640 l_discount float NOT NULL, 641 l_tax float NOT NULL, 642 l_returnflag char(1) NOT NULL, 643 l_linestatus char(1) NOT NULL, 644 l_shipdate date NOT NULL, 645 l_commitdate date NOT NULL, 646 l_receiptdate date NOT NULL, 647 l_shipinstruct char(25) NOT NULL, 648 l_shipmode char(10) NOT NULL, 649 l_comment varchar(44) NOT NULL, 650 PRIMARY KEY (l_orderkey, l_linenumber), 651 INDEX l_ok (l_orderkey ASC), 652 INDEX l_pk (l_partkey ASC), 653 INDEX l_sk (l_suppkey ASC), 654 INDEX l_sd (l_shipdate ASC), 655 INDEX l_cd (l_commitdate ASC), 656 INDEX l_rd (l_receiptdate ASC), 657 INDEX l_pk_sk (l_partkey ASC, l_suppkey ASC), 658 INDEX l_sk_pk (l_suppkey ASC, l_partkey ASC) 659 ); 660 ---- 661 662 # We can determine that there are exactly 30 days for this range. 663 opt 664 SELECT * 665 FROM lineitem 666 WHERE 667 l_shipdate >= DATE '1995-09-01' 668 AND l_shipdate < DATE '1995-10-01'; 669 ---- 670 select 671 ├── columns: l_orderkey:1(int!null) l_partkey:2(int!null) l_suppkey:3(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_tax:8(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null) l_shipdate:11(date!null) l_commitdate:12(date!null) l_receiptdate:13(date!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) l_comment:16(varchar!null) 672 ├── stats: [rows=300, distinct(11)=30, null(11)=0] 673 ├── key: (1,4) 674 ├── fd: (1,4)-->(2,3,5-16) 675 ├── scan lineitem 676 │ ├── columns: l_orderkey:1(int!null) l_partkey:2(int!null) l_suppkey:3(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_tax:8(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null) l_shipdate:11(date!null) l_commitdate:12(date!null) l_receiptdate:13(date!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) l_comment:16(varchar!null) 677 │ ├── stats: [rows=1000, distinct(1)=100, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=100, null(3)=0, distinct(4)=100, null(4)=0, distinct(5)=100, null(5)=0, distinct(6)=100, null(6)=0, distinct(7)=100, null(7)=0, distinct(8)=100, null(8)=0, distinct(9)=100, null(9)=0, distinct(10)=100, null(10)=0, distinct(11)=100, null(11)=0, distinct(12)=100, null(12)=0, distinct(13)=100, null(13)=0, distinct(14)=100, null(14)=0, distinct(15)=100, null(15)=0, distinct(16)=100, null(16)=0] 678 │ ├── key: (1,4) 679 │ └── fd: (1,4)-->(2,3,5-16) 680 └── filters 681 └── (l_shipdate:11 >= '1995-09-01') AND (l_shipdate:11 < '1995-10-01') [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-30']; tight)] 682 683 # We cannot determine the number of distinct values exactly since the upper 684 # bound of the date range is compared to a timestamp rather than a date. 685 opt 686 SELECT * 687 FROM lineitem 688 WHERE 689 l_shipdate >= DATE '1995-09-01' 690 AND l_shipdate::timestamptz < DATE '1995-10-01'; 691 ---- 692 index-join lineitem 693 ├── columns: l_orderkey:1(int!null) l_partkey:2(int!null) l_suppkey:3(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_tax:8(float!null) l_returnflag:9(char!null) l_linestatus:10(char!null) l_shipdate:11(date!null) l_commitdate:12(date!null) l_receiptdate:13(date!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) l_comment:16(varchar!null) 694 ├── stats: [rows=111.111111, distinct(11)=33.3333333, null(11)=0] 695 ├── key: (1,4) 696 ├── fd: (1,4)-->(2,3,5-16) 697 └── select 698 ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null) 699 ├── stats: [rows=111.111111] 700 ├── key: (1,4) 701 ├── fd: (1,4)-->(11) 702 ├── scan lineitem@l_sd 703 │ ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null) 704 │ ├── constraint: /11/1/4: [/'1995-09-01' - ] 705 │ ├── stats: [rows=333.333333, distinct(1)=98.265847, null(1)=0, distinct(4)=98.265847, null(4)=0, distinct(11)=33.3333333, null(11)=0] 706 │ ├── key: (1,4) 707 │ └── fd: (1,4)-->(11) 708 └── filters 709 └── l_shipdate:11::TIMESTAMPTZ < '1995-10-01' [type=bool, outer=(11)] 710 711 # These queries should generate zigzag joins in xform rules. The column statistics 712 # should be comparable between the norm'd and fully optimized expressions. 713 opt colstat=11 colstat=12 colstat=(11,12) 714 SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber 715 FROM lineitem 716 WHERE 717 l_shipdate = DATE '1995-09-01' 718 AND l_commitdate = DATE '1995-08-01'; 719 ---- 720 inner-join (zigzag lineitem@l_sd lineitem@l_cd) 721 ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) 722 ├── eq columns: [1 4] = [1 4] 723 ├── left fixed columns: [11] = ['1995-09-01'] 724 ├── right fixed columns: [12] = ['1995-08-01'] 725 ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] 726 ├── key: (1,4) 727 ├── fd: ()-->(11,12) 728 └── filters 729 ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)] 730 └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)] 731 732 norm colstat=11 colstat=12 colstat=(11,12) 733 SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber 734 FROM lineitem 735 WHERE 736 l_shipdate = DATE '1995-09-01' 737 AND l_commitdate = DATE '1995-08-01'; 738 ---- 739 select 740 ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) 741 ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] 742 ├── key: (1,4) 743 ├── fd: ()-->(11,12) 744 ├── scan lineitem 745 │ ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null) l_commitdate:12(date!null) 746 │ ├── stats: [rows=1000, distinct(1)=100, null(1)=0, distinct(4)=100, null(4)=0, distinct(11)=100, null(11)=0, distinct(12)=100, null(12)=0, distinct(11,12)=1000, null(11,12)=0] 747 │ ├── key: (1,4) 748 │ └── fd: (1,4)-->(11,12) 749 └── filters 750 ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)] 751 └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)] 752 753 # These queries should also generate zigzag joins in xform rules, like the 754 # ones above. These zigzag joins should be nested inside a lookup join on 755 # the primary index. Since the zigzag join lies in a new memo group, we will 756 # see the zigzag-join-specific stats/logprops build and colStat functions in 757 # action. Again, the colstats of the inner zigzag expression should be 758 # reasonably close to those of the full normalized select expression. 759 opt colstat=11 colstat=12 colstat=(11,12) 760 SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber, l_quantity 761 FROM lineitem 762 WHERE 763 l_shipdate = DATE '1995-09-01' 764 AND l_commitdate = DATE '1995-08-01'; 765 ---- 766 inner-join (lookup lineitem) 767 ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) 768 ├── key columns: [1 4] = [1 4] 769 ├── lookup columns are key 770 ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] 771 ├── key: (1,4) 772 ├── fd: ()-->(11,12), (1,4)-->(5) 773 ├── inner-join (zigzag lineitem@l_sd lineitem@l_cd) 774 │ ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_shipdate:11(date!null) l_commitdate:12(date!null) 775 │ ├── eq columns: [1 4] = [1 4] 776 │ ├── left fixed columns: [11] = ['1995-09-01'] 777 │ ├── right fixed columns: [12] = ['1995-08-01'] 778 │ ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] 779 │ ├── fd: ()-->(11,12) 780 │ └── filters 781 │ ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)] 782 │ └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)] 783 └── filters (true) 784 785 norm colstat=11 colstat=12 colstat=(11,12) 786 SELECT l_shipdate, l_commitdate, l_orderkey, l_linenumber, l_quantity 787 FROM lineitem 788 WHERE 789 l_shipdate = DATE '1995-09-01' 790 AND l_commitdate = DATE '1995-08-01'; 791 ---- 792 select 793 ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) 794 ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] 795 ├── key: (1,4) 796 ├── fd: ()-->(11,12), (1,4)-->(5) 797 ├── scan lineitem 798 │ ├── columns: l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) l_shipdate:11(date!null) l_commitdate:12(date!null) 799 │ ├── stats: [rows=1000, distinct(1)=100, null(1)=0, distinct(4)=100, null(4)=0, distinct(5)=100, null(5)=0, distinct(11)=100, null(11)=0, distinct(12)=100, null(12)=0, distinct(11,12)=1000, null(11,12)=0] 800 │ ├── key: (1,4) 801 │ └── fd: (1,4)-->(5,11,12) 802 └── filters 803 ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)] 804 └── l_commitdate:12 = '1995-08-01' [type=bool, outer=(12), constraints=(/12: [/'1995-08-01' - /'1995-08-01']; tight), fd=()-->(12)] 805 806 # Create a table with an inverted index to test statistics around 807 # JSON containment filter operators and zigzag joins. 808 exec-ddl 809 CREATE TABLE tjson (a INT PRIMARY KEY, b JSON, c JSON, INVERTED INDEX b_idx (b)) 810 ---- 811 812 exec-ddl 813 ALTER TABLE tjson INJECT STATISTICS '[ 814 { 815 "columns": ["a"], 816 "created_at": "2018-01-01 2:00:00.00000+00:00", 817 "row_count": 5000, 818 "distinct_count": 5000 819 }, 820 { 821 "columns": ["b"], 822 "created_at": "2018-01-01 2:00:00.00000+00:00", 823 "row_count": 5000, 824 "distinct_count": 2500 825 }, 826 { 827 "columns": ["c"], 828 "created_at": "2018-01-01 2:00:00.00000+00:00", 829 "row_count": 5000, 830 "distinct_count": 2500 831 } 832 ]' 833 ---- 834 835 # Should generate a scan on the inverted index. 836 opt 837 SELECT * FROM tjson WHERE b @> '{"a":"b"}' 838 ---- 839 index-join tjson 840 ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb) 841 ├── stats: [rows=555.555556] 842 ├── key: (1) 843 ├── fd: (1)-->(2,3) 844 └── scan tjson@b_idx 845 ├── columns: a:1(int!null) 846 ├── constraint: /2/1: [/'{"a": "b"}' - /'{"a": "b"}'] 847 ├── stats: [rows=555.555556] 848 └── key: (1) 849 850 # Should generate a zigzag join on the inverted index. Row count should be 851 # strictly lower than the above scan. 852 opt 853 SELECT * FROM tjson WHERE b @> '{"a":"b", "c":"d"}' 854 ---- 855 inner-join (lookup tjson) 856 ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb) 857 ├── key columns: [1] = [1] 858 ├── lookup columns are key 859 ├── stats: [rows=61.7283951] 860 ├── key: (1) 861 ├── fd: (1)-->(2,3) 862 ├── inner-join (zigzag tjson@b_idx tjson@b_idx) 863 │ ├── columns: a:1(int!null) 864 │ ├── eq columns: [1] = [1] 865 │ ├── left fixed columns: [2] = ['{"a": "b"}'] 866 │ ├── right fixed columns: [2] = ['{"c": "d"}'] 867 │ ├── stats: [rows=61.7283951, distinct(1)=61.7283951, null(1)=0] 868 │ └── filters (true) 869 └── filters 870 └── b:2 @> '{"a": "b", "c": "d"}' [type=bool, outer=(2)] 871 872 # Should generate a select on the table with a JSON filter, since c does not 873 # have an inverted index. 874 opt 875 SELECT * FROM tjson WHERE c @> '{"a":"b"}' 876 ---- 877 select 878 ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb) 879 ├── stats: [rows=555.555556] 880 ├── key: (1) 881 ├── fd: (1)-->(2,3) 882 ├── scan tjson 883 │ ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb) 884 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0] 885 │ ├── key: (1) 886 │ └── fd: (1)-->(2,3) 887 └── filters 888 └── c:3 @> '{"a": "b"}' [type=bool, outer=(3)] 889 890 # Should have a lower row count than the above case, due to a containment query 891 # on 2 json paths. 892 opt 893 SELECT * FROM tjson WHERE c @> '{"a":"b", "c":"d"}' 894 ---- 895 select 896 ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb) 897 ├── stats: [rows=61.7283951] 898 ├── key: (1) 899 ├── fd: (1)-->(2,3) 900 ├── scan tjson 901 │ ├── columns: a:1(int!null) b:2(jsonb) c:3(jsonb) 902 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0] 903 │ ├── key: (1) 904 │ └── fd: (1)-->(2,3) 905 └── filters 906 └── c:3 @> '{"a": "b", "c": "d"}' [type=bool, outer=(3)] 907 908 # Bump up null counts. 909 exec-ddl 910 ALTER TABLE a INJECT STATISTICS '[ 911 { 912 "columns": ["x"], 913 "created_at": "2018-01-01 2:00:00.00000+00:00", 914 "row_count": 5000, 915 "distinct_count": 5000 916 }, 917 { 918 "columns": ["y"], 919 "created_at": "2018-01-01 2:00:00.00000+00:00", 920 "row_count": 4000, 921 "distinct_count": 400, 922 "null_count": 1000 923 } 924 ]' 925 ---- 926 927 exec-ddl 928 ALTER TABLE b INJECT STATISTICS '[ 929 { 930 "columns": ["x"], 931 "created_at": "2018-01-01 2:00:00.00000+00:00", 932 "row_count": 10000, 933 "distinct_count": 5000, 934 "null_count": 2000 935 }, 936 { 937 "columns": ["z"], 938 "created_at": "2018-01-01 2:00:00.00000+00:00", 939 "row_count": 10000, 940 "distinct_count": 100 941 }, 942 { 943 "columns": ["rowid"], 944 "created_at": "2018-01-01 1:30:00.00000+00:00", 945 "row_count": 10000, 946 "distinct_count": 10000 947 } 948 ]' 949 ---- 950 951 exec-ddl 952 ALTER TABLE c INJECT STATISTICS '[ 953 { 954 "columns": ["x"], 955 "created_at": "2018-01-01 2:00:00.00000+00:00", 956 "row_count": 10000, 957 "distinct_count": 5000, 958 "null_count": 5000 959 }, 960 { 961 "columns": ["z"], 962 "created_at": "2018-01-01 2:00:00.00000+00:00", 963 "row_count": 10000, 964 "distinct_count": 10000 965 } 966 ]' 967 ---- 968 969 # Distinct values calculation with constraints. 970 norm 971 SELECT * FROM b WHERE x = 1 AND z = 2 AND rowid >= 5 AND rowid <= 8 972 ---- 973 project 974 ├── columns: x:1(int!null) z:2(int!null) 975 ├── cardinality: [0 - 4] 976 ├── stats: [rows=0.811440928] 977 ├── fd: ()-->(1,2) 978 └── select 979 ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null) 980 ├── cardinality: [0 - 4] 981 ├── stats: [rows=0.811440928, distinct(1)=0.811440928, null(1)=0, distinct(2)=0.811440928, null(2)=0, distinct(3)=0.811440928, null(3)=0, distinct(1,2)=0.811440928, null(1,2)=0, distinct(1-3)=0.811440928, null(1-3)=0] 982 ├── key: (3) 983 ├── fd: ()-->(1,2) 984 ├── scan b 985 │ ├── columns: x:1(int) z:2(int!null) rowid:3(int!null) 986 │ ├── stats: [rows=10000, distinct(1)=5000, null(1)=2000, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=10000, null(1,2)=0, distinct(1-3)=10000, null(1-3)=0] 987 │ ├── key: (3) 988 │ └── fd: (3)-->(1,2) 989 └── filters 990 ├── (rowid:3 >= 5) AND (rowid:3 <= 8) [type=bool, outer=(3), constraints=(/3: [/5 - /8]; tight)] 991 ├── x:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)] 992 └── z:2 = 2 [type=bool, outer=(2), constraints=(/2: [/2 - /2]; tight), fd=()-->(2)] 993 994 # Can't determine stats from filter. 995 norm 996 SELECT * FROM a WHERE x + y < 10 997 ---- 998 select 999 ├── columns: x:1(int!null) y:2(int) 1000 ├── stats: [rows=1666.66667] 1001 ├── key: (1) 1002 ├── fd: (1)-->(2) 1003 ├── scan a 1004 │ ├── columns: x:1(int!null) y:2(int) 1005 │ ├── stats: [rows=5000] 1006 │ ├── key: (1) 1007 │ └── fd: (1)-->(2) 1008 └── filters 1009 └── (x:1 + y:2) < 10 [type=bool, outer=(1,2)] 1010 1011 # Remaining filter. 1012 norm 1013 SELECT * FROM a WHERE y = 5 AND x + y < 10 1014 ---- 1015 select 1016 ├── columns: x:1(int!null) y:2(int!null) 1017 ├── stats: [rows=9.35672515, distinct(1)=9.35672515, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=9.35672515, null(1,2)=0] 1018 ├── key: (1) 1019 ├── fd: ()-->(2) 1020 ├── scan a 1021 │ ├── columns: x:1(int!null) y:2(int) 1022 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=1000, distinct(1,2)=5000, null(1,2)=0] 1023 │ ├── key: (1) 1024 │ └── fd: (1)-->(2) 1025 └── filters 1026 ├── y:2 = 5 [type=bool, outer=(2), constraints=(/2: [/5 - /5]; tight), fd=()-->(2)] 1027 └── x:1 < 5 [type=bool, outer=(1), constraints=(/1: (/NULL - /4]; tight)] 1028 1029 # Test that the null count for x is propagated correctly (since it's a weak 1030 # key). 1031 norm 1032 SELECT * FROM c WHERE x >= 0 AND x < 100 1033 ---- 1034 select 1035 ├── columns: x:1(int!null) z:2(int!null) 1036 ├── cardinality: [0 - 100] 1037 ├── stats: [rows=100, distinct(1)=100, null(1)=0] 1038 ├── key: (1) 1039 ├── fd: (1)-->(2) 1040 ├── scan c 1041 │ ├── columns: x:1(int) z:2(int!null) 1042 │ ├── stats: [rows=10000, distinct(1)=5000, null(1)=5000, distinct(2)=10000, null(2)=0] 1043 │ ├── lax-key: (1,2) 1044 │ └── fd: (1)~~>(2) 1045 └── filters 1046 └── (x:1 >= 0) AND (x:1 < 100) [type=bool, outer=(1), constraints=(/1: [/0 - /99]; tight)] 1047 1048 # Bump up null counts 1049 exec-ddl 1050 ALTER TABLE customers INJECT STATISTICS '[ 1051 { 1052 "columns": ["name"], 1053 "created_at": "2018-01-01 1:00:00.00000+00:00", 1054 "row_count": 10000, 1055 "distinct_count": 500, 1056 "null_count": 2000 1057 }, 1058 { 1059 "columns": ["id"], 1060 "created_at": "2018-01-01 1:30:00.00000+00:00", 1061 "row_count": 10000, 1062 "distinct_count": 10000 1063 } 1064 ]' 1065 ---- 1066 1067 # This tests selectivityFromReducedCols 1068 # The following two tests cases are paired together. The first has 1069 # one constraint, one on single non-key column. The second query has two 1070 # constraints on columns which form a determinant, dependent FD pair. 1071 # The dependent column in this FD pair is from the first test case. 1072 # This series of tests demonstrates that the selectivity 1073 # contribution for a pair of (determinant, dependent) FDs is the 1074 # selectivity of the determinant. 1075 # 1/2 join-subquery-selectivityFromReducedCols tests 1076 1077 build 1078 SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id) 1079 WHERE name='andy' 1080 ---- 1081 select 1082 ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 1083 ├── stats: [rows=1.84197978, distinct(2)=1, null(2)=0] 1084 ├── fd: ()-->(2), (1)-->(3), (1)==(6), (6)==(1) 1085 ├── project 1086 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 1087 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0] 1088 │ ├── fd: (1)-->(2,3), (1)==(6), (6)==(1) 1089 │ └── select 1090 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null) 1091 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0] 1092 │ ├── key: (8) 1093 │ ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1) 1094 │ ├── inner-join (cross) 1095 │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 1096 │ │ ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000000, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0] 1097 │ │ ├── key: (1,8) 1098 │ │ ├── fd: (1)-->(2,3), (8)-->(4-7) 1099 │ │ ├── scan customers 1100 │ │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) 1101 │ │ │ ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000] 1102 │ │ │ ├── key: (1) 1103 │ │ │ └── fd: (1)-->(2,3) 1104 │ │ ├── scan order_history 1105 │ │ │ ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 1106 │ │ │ ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0] 1107 │ │ │ ├── key: (8) 1108 │ │ │ └── fd: (8)-->(4-7) 1109 │ │ └── filters (true) 1110 │ └── filters 1111 │ └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] 1112 └── filters 1113 └── name:2 = 'andy' [type=bool, outer=(2), constraints=(/2: [/'andy' - /'andy']; tight), fd=()-->(2)] 1114 1115 # This tests selectivityFromReducedCols 1116 # The previous tests case and the following are paired together. The first has 1117 # one constraint, one on single non-key column. The second query has two 1118 # constraints on columns which form a determinant, dependent FD pair. 1119 # The dependent column in this FD pair is from the first test case. 1120 # This series of tests demonstrates that the selectivity 1121 # contribution for a pair of (determinant, dependent) FDs is the 1122 # selectivity of the determinant. 1123 # 2/2 join-subquery-selectivityFromReducedCols tests 1124 1125 build 1126 SELECT * FROM (SELECT * FROM customers, order_history WHERE id = customer_id) 1127 WHERE id = 1 AND name='andy' 1128 ---- 1129 select 1130 ├── columns: id:1(int!null) name:2(string!null) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 1131 ├── stats: [rows=8, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0] 1132 ├── fd: ()-->(1-3,6), (1)==(6), (6)==(1) 1133 ├── project 1134 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) 1135 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0] 1136 │ ├── fd: (1)-->(2,3), (1)==(6), (6)==(1) 1137 │ └── select 1138 │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int!null) year:7(int) rowid:8(int!null) 1139 │ ├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(2)=430.972148, null(2)=198, distinct(6)=99, null(6)=0] 1140 │ ├── key: (8) 1141 │ ├── fd: (1)-->(2,3), (8)-->(4-7), (1)==(6), (6)==(1) 1142 │ ├── inner-join (cross) 1143 │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 1144 │ │ ├── stats: [rows=10000000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000000, distinct(6)=100, null(6)=100000, distinct(8)=1000, null(8)=0] 1145 │ │ ├── key: (1,8) 1146 │ │ ├── fd: (1)-->(2,3), (8)-->(4-7) 1147 │ │ ├── scan customers 1148 │ │ │ ├── columns: id:1(int!null) name:2(string) state:3(string) 1149 │ │ │ ├── stats: [rows=10000, distinct(1)=10000, null(1)=0, distinct(2)=500, null(2)=2000] 1150 │ │ │ ├── key: (1) 1151 │ │ │ └── fd: (1)-->(2,3) 1152 │ │ ├── scan order_history 1153 │ │ │ ├── columns: order_id:4(int) item_id:5(int) customer_id:6(int) year:7(int) rowid:8(int!null) 1154 │ │ │ ├── stats: [rows=1000, distinct(6)=100, null(6)=10, distinct(8)=1000, null(8)=0] 1155 │ │ │ ├── key: (8) 1156 │ │ │ └── fd: (8)-->(4-7) 1157 │ │ └── filters (true) 1158 │ └── filters 1159 │ └── id:1 = customer_id:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] 1160 └── filters 1161 └── (id:1 = 1) AND (name:2 = 'andy') [type=bool, outer=(1,2), constraints=(/1: [/1 - /1]; /2: [/'andy' - /'andy']; tight), fd=()-->(1,2)] 1162 1163 exec-ddl 1164 CREATE TABLE nulls (x INT, y INT); 1165 ---- 1166 1167 exec-ddl 1168 ALTER TABLE nulls INJECT STATISTICS '[ 1169 { 1170 "columns": ["x"], 1171 "created_at": "2018-01-01 1:00:00.00000+00:00", 1172 "row_count": 1000, 1173 "distinct_count": 0, 1174 "null_count": 1000 1175 } 1176 ]' 1177 ---- 1178 1179 build 1180 SELECT * FROM nulls WHERE x = y 1181 ---- 1182 project 1183 ├── columns: x:1(int!null) y:2(int!null) 1184 ├── stats: [rows=9.9e-10] 1185 ├── fd: (1)==(2), (2)==(1) 1186 └── select 1187 ├── columns: x:1(int!null) y:2(int!null) rowid:3(int!null) 1188 ├── stats: [rows=9.9e-10, distinct(1)=1e-10, null(1)=0, distinct(2)=1e-10, null(2)=0] 1189 ├── key: (3) 1190 ├── fd: (3)-->(1,2), (1)==(2), (2)==(1) 1191 ├── scan nulls 1192 │ ├── columns: x:1(int) y:2(int) rowid:3(int!null) 1193 │ ├── stats: [rows=1000, distinct(1)=1, null(1)=1000, distinct(2)=100, null(2)=10, distinct(3)=1000, null(3)=0] 1194 │ ├── key: (3) 1195 │ └── fd: (3)-->(1,2) 1196 └── filters 1197 └── x:1 = y:2 [type=bool, outer=(1,2), constraints=(/1: (/NULL - ]; /2: (/NULL - ]), fd=(1)==(2), (2)==(1)] 1198 1199 build 1200 SELECT * FROM nulls WHERE x IS NULL 1201 ---- 1202 project 1203 ├── columns: x:1(int) y:2(int) 1204 ├── stats: [rows=1000] 1205 ├── fd: ()-->(1) 1206 └── select 1207 ├── columns: x:1(int) y:2(int) rowid:3(int!null) 1208 ├── stats: [rows=1000, distinct(1)=1, null(1)=1000] 1209 ├── key: (3) 1210 ├── fd: ()-->(1), (3)-->(2) 1211 ├── scan nulls 1212 │ ├── columns: x:1(int) y:2(int) rowid:3(int!null) 1213 │ ├── stats: [rows=1000, distinct(1)=1, null(1)=1000, distinct(3)=1000, null(3)=0] 1214 │ ├── key: (3) 1215 │ └── fd: (3)-->(1,2) 1216 └── filters 1217 └── x:1 IS NULL [type=bool, outer=(1), constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)] 1218 1219 1220 # Regression test for #34440. Ensure the null count for x is less than or equal 1221 # to the row count. 1222 build colstat=1 1223 SELECT * FROM nulls WHERE y = 3 1224 ---- 1225 project 1226 ├── columns: x:1(int) y:2(int!null) 1227 ├── stats: [rows=10, distinct(1)=0.999956829, null(1)=10] 1228 ├── fd: ()-->(2) 1229 └── select 1230 ├── columns: x:1(int) y:2(int!null) rowid:3(int!null) 1231 ├── stats: [rows=10, distinct(1)=0.999956829, null(1)=10, distinct(2)=1, null(2)=0] 1232 ├── key: (3) 1233 ├── fd: ()-->(2), (3)-->(1) 1234 ├── scan nulls 1235 │ ├── columns: x:1(int) y:2(int) rowid:3(int!null) 1236 │ ├── stats: [rows=1000, distinct(1)=1, null(1)=1000, distinct(2)=100, null(2)=10, distinct(3)=1000, null(3)=0] 1237 │ ├── key: (3) 1238 │ └── fd: (3)-->(1,2) 1239 └── filters 1240 └── y:2 = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)] 1241 1242 # Sample testcase using exprgen. 1243 expr colstat=1 colstat=2 1244 (Select 1245 (FakeRel 1246 [ 1247 (OutputCols [ (NewColumn "a" "int") (NewColumn "b" "int") (NewColumn "c" "int")] ) 1248 (Cardinality "-") 1249 (Stats `[ 1250 { 1251 "columns": ["a"], 1252 "distinct_count": 100, 1253 "null_count": 0, 1254 "row_count": 100, 1255 "created_at": "2018-01-01 1:00:00.00000+00:00" 1256 }, 1257 { 1258 "columns": ["b"], 1259 "distinct_count": 20, 1260 "null_count": 5, 1261 "row_count": 100, 1262 "created_at": "2018-01-01 1:00:00.00000+00:00" 1263 } 1264 ]`) 1265 ] 1266 ) 1267 [ (Eq (Var "b") (Const 1 "int")) ] 1268 ) 1269 ---- 1270 select 1271 ├── columns: a:1(int) b:2(int!null) c:3(int) 1272 ├── stats: [rows=5, distinct(1)=5, null(1)=0, distinct(2)=1, null(2)=0] 1273 ├── fd: ()-->(2) 1274 ├── fake-rel 1275 │ ├── columns: a:1(int) b:2(int) c:3(int) 1276 │ └── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=20, null(2)=5] 1277 └── filters 1278 └── b:2 = 1 [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] 1279 1280 # Regression test for #37754. 1281 norm 1282 SELECT 1283 1 1284 FROM 1285 ( 1286 VALUES 1287 (true, NULL, B'001000101101110'), 1288 (true, e'19\x1e':::STRING, NULL) 1289 ) 1290 AS t (_bool, _string, _bit) 1291 GROUP BY 1292 _string, _bit 1293 HAVING 1294 min(_bool) 1295 ---- 1296 project 1297 ├── columns: "?column?":5(int!null) 1298 ├── cardinality: [0 - 2] 1299 ├── stats: [rows=1] 1300 ├── fd: ()-->(5) 1301 ├── select 1302 │ ├── columns: column2:2(string) column3:3(varbit) min:4(bool!null) 1303 │ ├── cardinality: [0 - 2] 1304 │ ├── stats: [rows=1, distinct(4)=1, null(4)=0] 1305 │ ├── key: (2,3) 1306 │ ├── fd: ()-->(4) 1307 │ ├── group-by 1308 │ │ ├── columns: column2:2(string) column3:3(varbit) min:4(bool!null) 1309 │ │ ├── grouping columns: column2:2(string) column3:3(varbit) 1310 │ │ ├── cardinality: [1 - 2] 1311 │ │ ├── stats: [rows=2, distinct(4)=2, null(4)=0, distinct(2,3)=2, null(2,3)=0] 1312 │ │ ├── key: (2,3) 1313 │ │ ├── fd: (2,3)-->(4) 1314 │ │ ├── values 1315 │ │ │ ├── columns: column1:1(bool!null) column2:2(string) column3:3(varbit) 1316 │ │ │ ├── cardinality: [2 - 2] 1317 │ │ │ ├── stats: [rows=2, distinct(2,3)=2, null(2,3)=0] 1318 │ │ │ ├── (true, NULL, B'001000101101110') [type=tuple{bool, string, varbit}] 1319 │ │ │ └── (true, e'19\x1e', NULL) [type=tuple{bool, string, varbit}] 1320 │ │ └── aggregations 1321 │ │ └── min [as=min:4, type=bool, outer=(1)] 1322 │ │ └── column1:1 [type=bool] 1323 │ └── filters 1324 │ └── min:4 [type=bool, outer=(4), constraints=(/4: [/true - /true]; tight), fd=()-->(4)] 1325 └── projections 1326 └── 1 [as="?column?":5, type=int] 1327 1328 # Test that distinct count estimates are correct for date ranges. 1329 exec-ddl 1330 CREATE TABLE date_test ( 1331 k INT PRIMARY KEY, 1332 d1 date NOT NULL, 1333 d2 date NOT NULL, 1334 d3 date NOT NULL, 1335 INDEX d1_idx (d1 ASC), 1336 INDEX d2_idx (d2 DESC) 1337 ) 1338 ---- 1339 1340 opt 1341 SELECT d1 FROM date_test WHERE d1 > DATE '1995-10-01' AND d1 < DATE '1995-11-01' 1342 ---- 1343 scan date_test@d1_idx 1344 ├── columns: d1:2(date!null) 1345 ├── constraint: /2/1: [/'1995-10-02' - /'1995-10-31'] 1346 └── stats: [rows=300, distinct(2)=30, null(2)=0] 1347 1348 opt 1349 SELECT d1 FROM date_test WHERE d1 >= DATE '1995-10-01' AND d1 <= DATE '1995-11-01' 1350 ---- 1351 scan date_test@d1_idx 1352 ├── columns: d1:2(date!null) 1353 ├── constraint: /2/1: [/'1995-10-01' - /'1995-11-01'] 1354 └── stats: [rows=320, distinct(2)=32, null(2)=0] 1355 1356 opt 1357 SELECT d2 FROM date_test WHERE d2 > DATE '1903-10-01' AND d2 <= DATE '1903-11-01' 1358 ---- 1359 scan date_test@d2_idx 1360 ├── columns: d2:3(date!null) 1361 ├── constraint: /-3/1: [/'1903-11-01' - /'1903-10-02'] 1362 └── stats: [rows=310, distinct(3)=31, null(3)=0] 1363 1364 opt 1365 SELECT d2 FROM date_test WHERE d2 >= DATE '2003-10-01' AND d2 < DATE '2003-11-01' 1366 ---- 1367 scan date_test@d2_idx 1368 ├── columns: d2:3(date!null) 1369 ├── constraint: /-3/1: [/'2003-10-31' - /'2003-10-01'] 1370 └── stats: [rows=310, distinct(3)=31, null(3)=0] 1371 1372 opt 1373 SELECT d3 FROM date_test WHERE d3 >= DATE '2003-10-01' AND d3 < DATE '2003-11-01' 1374 ---- 1375 select 1376 ├── columns: d3:4(date!null) 1377 ├── stats: [rows=310, distinct(4)=31, null(4)=0] 1378 ├── scan date_test 1379 │ ├── columns: d3:4(date!null) 1380 │ └── stats: [rows=1000, distinct(4)=100, null(4)=0] 1381 └── filters 1382 └── (d3:4 >= '2003-10-01') AND (d3:4 < '2003-11-01') [type=bool, outer=(4), constraints=(/4: [/'2003-10-01' - /'2003-10-31']; tight)] 1383 1384 opt 1385 SELECT d3 FROM date_test WHERE d3 >= DATE '1903-10-01' AND d3 < DATE '2003-10-01' 1386 ---- 1387 select 1388 ├── columns: d3:4(date!null) 1389 ├── stats: [rows=1000, distinct(4)=100, null(4)=0] 1390 ├── scan date_test 1391 │ ├── columns: d3:4(date!null) 1392 │ └── stats: [rows=1000, distinct(4)=100, null(4)=0] 1393 └── filters 1394 └── (d3:4 >= '1903-10-01') AND (d3:4 < '2003-10-01') [type=bool, outer=(4), constraints=(/4: [/'1903-10-01' - /'2003-09-30']; tight)] 1395 1396 # Regression test for #38344. Avoid floating point precision errors. 1397 exec-ddl 1398 CREATE TABLE t38344 (x BOOL) 1399 ---- 1400 1401 exec-ddl 1402 ALTER TABLE t38344 INJECT STATISTICS '[ 1403 { 1404 "columns": ["x"], 1405 "created_at": "2018-01-01 1:00:00.00000+00:00", 1406 "row_count": 20000000000, 1407 "distinct_count": 1, 1408 "null_count": 20000000000 1409 } 1410 ]' 1411 ---- 1412 1413 norm disable=InlineWith 1414 WITH t(x) AS ( 1415 SELECT (t1.x::int << 5533)::bool OR t2.x AS x 1416 FROM t38344 AS t1 LEFT JOIN t38344 AS t2 ON true 1417 ) 1418 SELECT x FROM t WHERE x 1419 ---- 1420 with &1 (t) 1421 ├── columns: x:6(bool!null) 1422 ├── stats: [rows=1.98e+20, distinct(6)=1, null(6)=0] 1423 ├── fd: ()-->(6) 1424 ├── project 1425 │ ├── columns: x:5(bool) 1426 │ ├── stats: [rows=4e+20] 1427 │ ├── left-join (cross) 1428 │ │ ├── columns: t1.x:1(bool) t2.x:3(bool) 1429 │ │ ├── stats: [rows=4e+20] 1430 │ │ ├── scan t1 1431 │ │ │ ├── columns: t1.x:1(bool) 1432 │ │ │ └── stats: [rows=2e+10] 1433 │ │ ├── scan t2 1434 │ │ │ ├── columns: t2.x:3(bool) 1435 │ │ │ └── stats: [rows=2e+10] 1436 │ │ └── filters (true) 1437 │ └── projections 1438 │ └── (t1.x:1::INT8 << 5533)::BOOL OR t2.x:3 [as=x:5, type=bool, outer=(1,3)] 1439 └── select 1440 ├── columns: x:6(bool!null) 1441 ├── stats: [rows=1.98e+20, distinct(6)=1, null(6)=0] 1442 ├── fd: ()-->(6) 1443 ├── with-scan &1 (t) 1444 │ ├── columns: x:6(bool) 1445 │ ├── mapping: 1446 │ │ └── x:5(bool) => x:6(bool) 1447 │ └── stats: [rows=4e+20, distinct(6)=3, null(6)=4e+18] 1448 └── filters 1449 └── x:6 [type=bool, outer=(6), constraints=(/6: [/true - /true]; tight), fd=()-->(6)] 1450 1451 # Regression test for #38375. Avoid floating point precision errors. 1452 exec-ddl 1453 CREATE TABLE t38375 (x INT, y INT) 1454 ---- 1455 1456 exec-ddl 1457 ALTER TABLE t38375 INJECT STATISTICS '[ 1458 { 1459 "columns": ["x"], 1460 "created_at": "2018-01-01 1:00:00.00000+00:00", 1461 "row_count": 20000000000, 1462 "distinct_count": 20000000000, 1463 "null_count": 20000000000 1464 }, 1465 { 1466 "columns": ["y"], 1467 "created_at": "2018-01-01 1:00:00.00000+00:00", 1468 "row_count": 20000000000, 1469 "distinct_count": 10, 1470 "null_count": 0 1471 } 1472 ]' 1473 ---- 1474 1475 opt colstat=2 1476 SELECT * FROM t38375 WHERE x = 1 1477 ---- 1478 select 1479 ├── columns: x:1(int!null) y:2(int) 1480 ├── stats: [rows=2, distinct(1)=1, null(1)=0, distinct(2)=1.81269262, null(2)=0] 1481 ├── fd: ()-->(1) 1482 ├── scan t38375 1483 │ ├── columns: x:1(int) y:2(int) 1484 │ └── stats: [rows=2e+10, distinct(1)=2e+10, null(1)=2e+10, distinct(2)=10, null(2)=0] 1485 └── filters 1486 └── x:1 = 1 [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)] 1487 1488 # Support OR constraints. 1489 exec-ddl 1490 CREATE TABLE nation 1491 ( 1492 n_nationkey int PRIMARY KEY, 1493 n_name char(25) NOT NULL, 1494 n_regionkey int NOT NULL, 1495 neighbor char(25) NOT NULL, 1496 INDEX n_rk (n_regionkey ASC) 1497 ) 1498 ---- 1499 1500 exec-ddl 1501 ALTER TABLE nation INJECT STATISTICS '[ 1502 { 1503 "columns": ["n_name"], 1504 "created_at": "2018-01-01 1:00:00.00000+00:00", 1505 "row_count": 1000000, 1506 "distinct_count": 2, 1507 "null_count": 0 1508 } 1509 ]' 1510 ---- 1511 1512 opt 1513 SELECT * FROM nation WHERE n_name = 'FRANCE' OR n_name = 'GERMANY' 1514 ---- 1515 select 1516 ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null) 1517 ├── stats: [rows=1000000, distinct(2)=2, null(2)=0] 1518 ├── key: (1) 1519 ├── fd: (1)-->(2-4) 1520 ├── scan nation 1521 │ ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null) 1522 │ ├── stats: [rows=1000000, distinct(1)=1000000, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=100000, null(3)=0, distinct(4)=100000, null(4)=0] 1523 │ ├── key: (1) 1524 │ └── fd: (1)-->(2-4) 1525 └── filters 1526 └── (n_name:2 = 'FRANCE') OR (n_name:2 = 'GERMANY') [type=bool, outer=(2), constraints=(/2: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY']; tight)] 1527 1528 opt 1529 SELECT * FROM nation WHERE (n_name = 'FRANCE' AND neighbor = 'GERMANY') OR (n_name = 'GERMANY' AND neighbor = 'FRANCE') 1530 ---- 1531 select 1532 ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null) 1533 ├── stats: [rows=6.66666667, distinct(2)=2, null(2)=0, distinct(4)=2, null(4)=0] 1534 ├── key: (1) 1535 ├── fd: (1)-->(2-4) 1536 ├── scan nation 1537 │ ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null) 1538 │ ├── stats: [rows=1000000, distinct(1)=1000000, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=100000, null(3)=0, distinct(4)=100000, null(4)=0] 1539 │ ├── key: (1) 1540 │ └── fd: (1)-->(2-4) 1541 └── filters 1542 └── ((n_name:2 = 'FRANCE') AND (neighbor:4 = 'GERMANY')) OR ((n_name:2 = 'GERMANY') AND (neighbor:4 = 'FRANCE')) [type=bool, outer=(2,4), constraints=(/2: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY']; /4: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY'])] 1543 1544 opt 1545 SELECT * FROM nation WHERE (n_name, neighbor) in (('FRANCE', 'GERMANY'), ('GERMANY', 'FRANCE')) 1546 ---- 1547 select 1548 ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null) 1549 ├── stats: [rows=20, distinct(2)=2, null(2)=0, distinct(4)=2, null(4)=0] 1550 ├── key: (1) 1551 ├── fd: (1)-->(2-4) 1552 ├── scan nation 1553 │ ├── columns: n_nationkey:1(int!null) n_name:2(char!null) n_regionkey:3(int!null) neighbor:4(char!null) 1554 │ ├── stats: [rows=1000000, distinct(1)=1000000, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=100000, null(3)=0, distinct(4)=100000, null(4)=0] 1555 │ ├── key: (1) 1556 │ └── fd: (1)-->(2-4) 1557 └── filters 1558 └── (n_name:2, neighbor:4) IN (('FRANCE', 'GERMANY'), ('GERMANY', 'FRANCE')) [type=bool, outer=(2,4), constraints=(/2/4: [/'FRANCE'/'GERMANY' - /'FRANCE'/'GERMANY'] [/'GERMANY'/'FRANCE' - /'GERMANY'/'FRANCE']; /4: [/'FRANCE' - /'FRANCE'] [/'GERMANY' - /'GERMANY']; tight)] 1559 1560 # Make sure the that histogram and distinct counts don't interfere with each 1561 # other during selectivity calculation. 1562 exec-ddl 1563 CREATE TABLE hist_and_distinct ( 1564 a INT, 1565 b INT, 1566 c INT, 1567 d INT, 1568 INDEX idx_a (a) 1569 ) 1570 ---- 1571 1572 exec-ddl 1573 ALTER TABLE hist_and_distinct INJECT STATISTICS '[ 1574 { 1575 "columns": ["a"], 1576 "created_at": "2018-01-01 1:00:00.00000+00:00", 1577 "row_count": 1000, 1578 "distinct_count": 40, 1579 "histo_col_type": "int", 1580 "histo_buckets": [ 1581 {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, 1582 {"num_eq": 10, "num_range": 90, "distinct_range": 9, "upper_bound": "10"}, 1583 {"num_eq": 20, "num_range": 180, "distinct_range": 9, "upper_bound": "20"}, 1584 {"num_eq": 30, "num_range": 270, "distinct_range": 9, "upper_bound": "30"}, 1585 {"num_eq": 40, "num_range": 360, "distinct_range": 9, "upper_bound": "40"} 1586 ] 1587 }, 1588 { 1589 "columns": ["b"], 1590 "created_at": "2018-01-01 1:00:00.00000+00:00", 1591 "row_count": 1000, 1592 "distinct_count": 5 1593 }, 1594 { 1595 "columns": ["c"], 1596 "created_at": "2018-01-01 1:00:00.00000+00:00", 1597 "row_count": 1000, 1598 "distinct_count": 5 1599 }, 1600 { 1601 "columns": ["d"], 1602 "created_at": "2018-01-01 1:00:00.00000+00:00", 1603 "row_count": 1000, 1604 "distinct_count": 120 1605 } 1606 ]' 1607 ---- 1608 1609 norm 1610 SELECT * FROM hist_and_distinct WHERE a = 10 AND b = 10 AND c = 10 AND d >= 10 AND d < 100 1611 ---- 1612 select 1613 ├── columns: a:1(int!null) b:2(int!null) c:3(int!null) d:4(int!null) 1614 ├── stats: [rows=0.39, distinct(1)=0.39, null(1)=0, distinct(2)=0.39, null(2)=0, distinct(3)=0.39, null(3)=0, distinct(4)=0.39, null(4)=0, distinct(1-3)=0.39, null(1-3)=0, distinct(1-4)=0.39, null(1-4)=0] 1615 │ histogram(1)= 0 0.39 1616 │ <--- 10 1617 ├── fd: ()-->(1-3) 1618 ├── scan hist_and_distinct 1619 │ ├── columns: a:1(int) b:2(int) c:3(int) d:4(int) 1620 │ └── stats: [rows=1000, distinct(1)=40, null(1)=0, distinct(2)=5, null(2)=0, distinct(3)=5, null(3)=0, distinct(4)=120, null(4)=0, distinct(1-3)=1000, null(1-3)=0, distinct(1-4)=1000, null(1-4)=0] 1621 │ histogram(1)= 0 0 90 10 180 20 270 30 360 40 1622 │ <--- 0 ---- 10 ----- 20 ----- 30 ----- 40 1623 └── filters 1624 ├── (d:4 >= 10) AND (d:4 < 100) [type=bool, outer=(4), constraints=(/4: [/10 - /99]; tight)] 1625 ├── a:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)] 1626 ├── b:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] 1627 └── c:3 = 10 [type=bool, outer=(3), constraints=(/3: [/10 - /10]; tight), fd=()-->(3)] 1628 1629 # Test that a histogram on a boolean column is used. 1630 exec-ddl 1631 CREATE TABLE hist_bool (a INT, b BOOL) 1632 ---- 1633 1634 exec-ddl 1635 ALTER TABLE hist_bool INJECT STATISTICS '[ 1636 { 1637 "columns": ["a"], 1638 "created_at": "2018-01-01 1:00:00.00000+00:00", 1639 "row_count": 1000, 1640 "distinct_count": 40 1641 }, 1642 { 1643 "columns": ["b"], 1644 "created_at": "2018-01-01 1:00:00.00000+00:00", 1645 "row_count": 1000, 1646 "distinct_count": 2, 1647 "histo_col_type": "BOOL", 1648 "histo_buckets": [ 1649 {"num_eq": 900, "num_range": 0, "distinct_range": 0, "upper_bound": "false"}, 1650 {"num_eq": 100, "num_range": 0, "distinct_range": 0, "upper_bound": "true"} 1651 ] 1652 } 1653 ]' 1654 ---- 1655 1656 norm 1657 SELECT * FROM hist_bool WHERE b = false 1658 ---- 1659 select 1660 ├── columns: a:1(int) b:2(bool!null) 1661 ├── stats: [rows=900, distinct(2)=1, null(2)=0] 1662 │ histogram(2)= 0 900 1663 │ <--- false 1664 ├── fd: ()-->(2) 1665 ├── scan hist_bool 1666 │ ├── columns: a:1(int) b:2(bool) 1667 │ └── stats: [rows=1000, distinct(2)=2, null(2)=0] 1668 │ histogram(2)= 0 900 0 100 1669 │ <--- false --- true 1670 └── filters 1671 └── b:2 = false [type=bool, outer=(2), constraints=(/2: [/false - /false]; tight), fd=()-->(2)] 1672 1673 exec-ddl 1674 CREATE TABLE t0(c0 INT) 1675 ---- 1676 1677 exec-ddl 1678 CREATE VIEW v0(c0) AS SELECT CASE WHEN t0.c0 > 0 THEN 1 ELSE t0.rowid END FROM t0 1679 ---- 1680 1681 exec-ddl 1682 ALTER TABLE t0 INJECT STATISTICS '[ 1683 { 1684 "columns": ["c0"], 1685 "created_at": "2020-01-28 03:02:57.841772+00:00", 1686 "row_count": 3, 1687 "distinct_count": 1, 1688 "null_count": 3 1689 }, 1690 { 1691 "columns": ["rowid"], 1692 "created_at": "2020-01-28 03:03:03.012072+00:00", 1693 "row_count": 2, 1694 "distinct_count": 2, 1695 "null_count": 0, 1696 "histo_buckets": [ 1697 { 1698 "distinct_range": 0, 1699 "num_eq": 1, 1700 "num_range": 0, 1701 "upper_bound": "3" 1702 }, 1703 { 1704 "distinct_range": 0, 1705 "num_eq": 1, 1706 "num_range": 0, 1707 "upper_bound": "4" 1708 } 1709 ], 1710 "histo_col_type": "INT8" 1711 } 1712 ]' 1713 ---- 1714 1715 # Regression test for #44418. Make sure inconsistent stats don't cause an 1716 # error. 1717 norm 1718 SELECT * FROM v0 WHERE v0.c0 > 0 1719 ---- 1720 select 1721 ├── columns: c0:3(int!null) 1722 ├── stats: [rows=1, distinct(3)=1, null(3)=0] 1723 ├── project 1724 │ ├── columns: rowid:3(int) 1725 │ ├── stats: [rows=2, distinct(3)=2, null(3)=0] 1726 │ ├── scan t0 1727 │ │ ├── columns: c0:1(int) t0.rowid:2(int!null) 1728 │ │ ├── stats: [rows=2, distinct(1,2)=2, null(1,2)=0] 1729 │ │ ├── key: (2) 1730 │ │ └── fd: (2)-->(1) 1731 │ └── projections 1732 │ └── CASE WHEN c0:1 > 0 THEN 1 ELSE t0.rowid:2 END [as=rowid:3, type=int, outer=(1,2)] 1733 └── filters 1734 └── rowid:3 > 0 [type=bool, outer=(3), constraints=(/3: [/1 - ]; tight)] 1735 1736 exec-ddl 1737 ALTER TABLE a INJECT STATISTICS '[ 1738 { 1739 "columns": ["x"], 1740 "created_at": "2020-01-28 03:02:57.841772+00:00", 1741 "row_count": 3, 1742 "distinct_count": 3 1743 } 1744 ]' 1745 ---- 1746 1747 # Regression test for #44563. Set a lower bound on the distinct count from 1748 # the multi-span constraint. 1749 norm 1750 SELECT * FROM a WHERE x <= 5 OR x = 10 OR x = 15 1751 ---- 1752 select 1753 ├── columns: x:1(int!null) y:2(int) 1754 ├── stats: [rows=2, distinct(1)=2, null(1)=0] 1755 ├── key: (1) 1756 ├── fd: (1)-->(2) 1757 ├── scan a 1758 │ ├── columns: x:1(int!null) y:2(int) 1759 │ ├── stats: [rows=3, distinct(1)=3, null(1)=0] 1760 │ ├── key: (1) 1761 │ └── fd: (1)-->(2) 1762 └── filters 1763 └── ((x:1 <= 5) OR (x:1 = 10)) OR (x:1 = 15) [type=bool, outer=(1), constraints=(/1: (/NULL - /5] [/10 - /10] [/15 - /15]; tight)] 1764 1765 exec-ddl 1766 CREATE TABLE data ( 1767 user_id UUID NOT NULL, 1768 name VARCHAR(255) NULL, 1769 created TIMESTAMPTZ, 1770 INDEX user_id_idx (user_id ASC), 1771 INDEX name_idx (name ASC), 1772 INDEX created_idx (created ASC) 1773 ) 1774 ---- 1775 1776 exec-ddl 1777 ALTER TABLE data INJECT STATISTICS '[ 1778 { 1779 "columns": ["user_id"], 1780 "created_at": "2020-01-28 03:02:57.841772+00:00", 1781 "row_count": 10000, 1782 "distinct_count": 1000, 1783 "null_count": 0, 1784 "histo_buckets": [ 1785 { 1786 "distinct_range": 0, 1787 "num_eq": 1, 1788 "num_range": 0, 1789 "upper_bound": "3b57b3e4-a68a-9b47-2752-e365d7d8954e" 1790 }, 1791 { 1792 "distinct_range": 499, 1793 "num_eq": 1, 1794 "num_range": 4998, 1795 "upper_bound": "6b49a786-387b-d5a2-6582-4e963eb4d537" 1796 }, 1797 { 1798 "distinct_range": 499, 1799 "num_eq": 1, 1800 "num_range": 4999, 1801 "upper_bound": "d9739a48-d5be-9a62-e752-34d877e56ba5" 1802 } 1803 ], 1804 "histo_col_type": "UUID" 1805 }, 1806 { 1807 "columns": ["name"], 1808 "created_at": "2020-01-28 03:03:03.012072+00:00", 1809 "row_count": 10000, 1810 "distinct_count": 1000, 1811 "null_count": 0, 1812 "histo_buckets": [ 1813 { 1814 "distinct_range": 0, 1815 "num_eq": 1, 1816 "num_range": 0, 1817 "upper_bound": "a" 1818 }, 1819 { 1820 "distinct_range": 499, 1821 "num_eq": 1, 1822 "num_range": 4998, 1823 "upper_bound": "b" 1824 }, 1825 { 1826 "distinct_range": 499, 1827 "num_eq": 1, 1828 "num_range": 4999, 1829 "upper_bound": "c" 1830 } 1831 ], 1832 "histo_col_type": "STRING" 1833 }, 1834 { 1835 "columns": ["created"], 1836 "created_at": "2020-01-28 03:03:03.012072+00:00", 1837 "row_count": 10000, 1838 "distinct_count": 10000, 1839 "null_count": 0, 1840 "histo_buckets": [ 1841 { 1842 "distinct_range": 0, 1843 "num_eq": 1, 1844 "num_range": 0, 1845 "upper_bound": "2020-02-11 07:25:00+00:00" 1846 }, 1847 { 1848 "distinct_range": 4998, 1849 "num_eq": 1, 1850 "num_range": 4998, 1851 "upper_bound": "2020-03-21 06:45:41+00:00" 1852 }, 1853 { 1854 "distinct_range": 4999, 1855 "num_eq": 1, 1856 "num_range": 4999, 1857 "upper_bound": "2020-04-21 06:25:41+00:00" 1858 } 1859 ], 1860 "histo_col_type": "TIMESTAMPTZ" 1861 } 1862 ]' 1863 ---- 1864 1865 # Make sure that using a histogram produces correct stats with equality 1866 # predicates on data types such as UUID, string, and timepstamptz. 1867 norm 1868 SELECT * FROM data WHERE user_id = '679d3e56-b985-63d2-5442-e4ba7a8479e3' 1869 ---- 1870 select 1871 ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz) 1872 ├── stats: [rows=10.0160321, distinct(1)=1, null(1)=0] 1873 │ histogram(1)= 0 10.016 1874 │ <--- '679d3e56-b985-63d2-5442-e4ba7a8479e3' 1875 ├── fd: ()-->(1) 1876 ├── scan data 1877 │ ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz) 1878 │ └── stats: [rows=10000, distinct(1)=1000, null(1)=0] 1879 │ histogram(1)= 0 1 4998 1 4999 1 1880 │ <--- '3b57b3e4-a68a-9b47-2752-e365d7d8954e' ------ '6b49a786-387b-d5a2-6582-4e963eb4d537' ------ 'd9739a48-d5be-9a62-e752-34d877e56ba5' 1881 └── filters 1882 └── user_id:1 = '679d3e56-b985-63d2-5442-e4ba7a8479e3' [type=bool, outer=(1), constraints=(/1: [/'679d3e56-b985-63d2-5442-e4ba7a8479e3' - /'679d3e56-b985-63d2-5442-e4ba7a8479e3']; tight), fd=()-->(1)] 1883 1884 norm 1885 SELECT * FROM data WHERE name = 'abc' 1886 ---- 1887 select 1888 ├── columns: user_id:1(uuid!null) name:2(varchar!null) created:3(timestamptz) 1889 ├── stats: [rows=10.0160321, distinct(2)=1, null(2)=0] 1890 │ histogram(2)= 0 10.016 1891 │ <--- 'abc' 1892 ├── fd: ()-->(2) 1893 ├── scan data 1894 │ ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz) 1895 │ └── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(2)=1000, null(2)=0] 1896 │ histogram(1)= 0 1 4998 1 4999 1 1897 │ <--- '3b57b3e4-a68a-9b47-2752-e365d7d8954e' ------ '6b49a786-387b-d5a2-6582-4e963eb4d537' ------ 'd9739a48-d5be-9a62-e752-34d877e56ba5' 1898 │ histogram(2)= 0 1 4998 1 4999 1 1899 │ <--- 'a' ------ 'b' ------ 'c' 1900 └── filters 1901 └── name:2 = 'abc' [type=bool, outer=(2), constraints=(/2: [/'abc' - /'abc']; tight), fd=()-->(2)] 1902 1903 norm 1904 SELECT * FROM data WHERE created = '2020-04-11 06:25:41+00:00' 1905 ---- 1906 select 1907 ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz!null) 1908 ├── stats: [rows=1, distinct(3)=1, null(3)=0] 1909 │ histogram(3)= 0 1 1910 │ <--- '2020-04-11 06:25:41+00:00' 1911 ├── fd: ()-->(3) 1912 ├── scan data 1913 │ ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz) 1914 │ └── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(3)=10000, null(3)=0] 1915 │ histogram(1)= 0 1 4998 1 4999 1 1916 │ <--- '3b57b3e4-a68a-9b47-2752-e365d7d8954e' ------ '6b49a786-387b-d5a2-6582-4e963eb4d537' ------ 'd9739a48-d5be-9a62-e752-34d877e56ba5' 1917 │ histogram(3)= 0 1 4998 1 4999 1 1918 │ <--- '2020-02-11 07:25:00+00:00' ------ '2020-03-21 06:45:41+00:00' ------ '2020-04-21 06:25:41+00:00' 1919 └── filters 1920 └── created:3 = '2020-04-11 06:25:41+00:00' [type=bool, outer=(3), constraints=(/3: [/'2020-04-11 06:25:41+00:00' - /'2020-04-11 06:25:41+00:00']; tight), fd=()-->(3)] 1921 1922 exec-ddl 1923 ALTER TABLE a INJECT STATISTICS '[ 1924 { 1925 "columns": ["x"], 1926 "created_at": "2020-05-26 03:02:57.841772+00:00", 1927 "row_count": 1000, 1928 "distinct_count": 1000 1929 } 1930 ]' 1931 ---- 1932 1933 # Regression test for #48828. Stats for BETWEEN SYMMETRIC should be based on 1934 # the tight constraint rather than calculated as 1/3rd of the cardinality. 1935 norm 1936 SELECT * FROM a WHERE x BETWEEN SYMMETRIC 25 and 50 1937 ---- 1938 select 1939 ├── columns: x:1(int!null) y:2(int) 1940 ├── cardinality: [0 - 26] 1941 ├── stats: [rows=26, distinct(1)=26, null(1)=0] 1942 ├── key: (1) 1943 ├── fd: (1)-->(2) 1944 ├── scan a 1945 │ ├── columns: x:1(int!null) y:2(int) 1946 │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0] 1947 │ ├── key: (1) 1948 │ └── fd: (1)-->(2) 1949 └── filters 1950 └── ((x:1 >= 25) AND (x:1 <= 50)) OR ((x:1 >= 50) AND (x:1 <= 25)) [type=bool, outer=(1), constraints=(/1: [/25 - /50]; tight)] 1951 1952 exec-ddl 1953 ALTER TABLE b INJECT STATISTICS '[ 1954 { 1955 "columns": ["x"], 1956 "created_at": "2020-01-28 03:02:57.841772+00:00", 1957 "row_count": 10000, 1958 "distinct_count": 1000 1959 }, 1960 { 1961 "columns": ["z"], 1962 "created_at": "2020-01-28 03:02:57.841772+00:00", 1963 "row_count": 10000, 1964 "distinct_count": 100 1965 } , 1966 { 1967 "columns": ["x","z"], 1968 "created_at": "2020-01-28 03:02:57.841772+00:00", 1969 "row_count": 10000, 1970 "distinct_count": 1500 1971 } 1972 ]' 1973 ---- 1974 1975 # Multi-column stats test. 1976 build 1977 SELECT * FROM b WHERE x = 1 AND z = 2 1978 ---- 1979 project 1980 ├── columns: x:1(int!null) z:2(int!null) 1981 ├── stats: [rows=6.01] 1982 ├── fd: ()-->(1,2) 1983 └── select 1984 ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null) 1985 ├── stats: [rows=6.01, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=1, null(1,2)=0] 1986 ├── key: (3) 1987 ├── fd: ()-->(1,2) 1988 ├── scan b 1989 │ ├── columns: x:1(int) z:2(int!null) rowid:3(int!null) 1990 │ ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=1500, null(1,2)=0] 1991 │ ├── key: (3) 1992 │ └── fd: (3)-->(1,2) 1993 └── filters 1994 └── (x:1 = 1) AND (z:2 = 2) [type=bool, outer=(1,2), constraints=(/1: [/1 - /1]; /2: [/2 - /2]; tight), fd=()-->(1,2)]