github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/join (about) 1 exec-ddl 2 CREATE TABLE xysd (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d)) 3 ---- 4 5 exec-ddl 6 CREATE TABLE uv (u INT, v INT NOT NULL) 7 ---- 8 9 exec-ddl 10 ALTER TABLE xysd INJECT STATISTICS '[ 11 { 12 "columns": ["x"], 13 "created_at": "2018-01-01 1:00:00.00000+00:00", 14 "row_count": 5000, 15 "distinct_count": 5000 16 }, 17 { 18 "columns": ["y"], 19 "created_at": "2018-01-01 1:30:00.00000+00:00", 20 "row_count": 5000, 21 "distinct_count": 400 22 } 23 ]' 24 ---- 25 26 exec-ddl 27 ALTER TABLE uv INJECT STATISTICS '[ 28 { 29 "columns": ["u"], 30 "created_at": "2018-01-01 1:00:00.00000+00:00", 31 "row_count": 10000, 32 "distinct_count": 500 33 }, 34 { 35 "columns": ["v"], 36 "created_at": "2018-01-01 1:30:00.00000+00:00", 37 "row_count": 10000, 38 "distinct_count": 100 39 }, 40 { 41 "columns": ["rowid"], 42 "created_at": "2018-01-01 1:30:00.00000+00:00", 43 "row_count": 10000, 44 "distinct_count": 10000 45 } 46 ]' 47 ---- 48 49 norm 50 SELECT * FROM xysd JOIN uv ON true 51 ---- 52 inner-join (cross) 53 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) 54 ├── stats: [rows=50000000] 55 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 56 ├── scan xysd 57 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 58 │ ├── stats: [rows=5000] 59 │ ├── key: (1) 60 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 61 ├── scan uv 62 │ ├── columns: u:5(int) v:6(int!null) 63 │ └── stats: [rows=10000] 64 └── filters (true) 65 66 norm colstat=1 colstat=2 colstat=3 colstat=4 colstat=5 colstat=6 colstat=(2,5,6) 67 SELECT * FROM xysd JOIN uv ON true 68 ---- 69 inner-join (cross) 70 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) 71 ├── stats: [rows=50000000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0, distinct(3)=500, null(3)=500000, distinct(4)=500, null(4)=0, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0, distinct(2,5,6)=4000000, null(2,5,6)=0] 72 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 73 ├── scan xysd 74 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 75 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0, distinct(3)=500, null(3)=50, distinct(4)=500, null(4)=0] 76 │ ├── key: (1) 77 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 78 ├── scan uv 79 │ ├── columns: u:5(int) v:6(int!null) 80 │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0, distinct(5,6)=10000, null(5,6)=0] 81 └── filters (true) 82 83 norm 84 SELECT * FROM xysd JOIN uv ON false 85 ---- 86 values 87 ├── columns: x:1(int!null) y:2(int!null) s:3(string!null) d:4(decimal!null) u:5(int!null) v:6(int!null) 88 ├── cardinality: [0 - 0] 89 ├── stats: [rows=0] 90 ├── key: () 91 └── fd: ()-->(1-6) 92 93 build colstat=2 94 SELECT *, rowid FROM xysd INNER JOIN uv ON x=u 95 ---- 96 inner-join (hash) 97 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int!null) v:6(int!null) rowid:7(int!null) 98 ├── stats: [rows=10000, distinct(1)=500, null(1)=0, distinct(2)=400, null(2)=0, distinct(5)=500, null(5)=0] 99 ├── key: (7) 100 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6), (1)==(5), (5)==(1) 101 ├── scan xysd 102 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 103 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0, distinct(4)=500, null(4)=0] 104 │ ├── key: (1) 105 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 106 ├── scan uv 107 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 108 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 109 │ ├── key: (7) 110 │ └── fd: (7)-->(5,6) 111 └── filters 112 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 113 114 build 115 SELECT *, rowid FROM xysd LEFT JOIN uv ON x=u 116 ---- 117 left-join (hash) 118 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int) rowid:7(int) 119 ├── stats: [rows=10000, distinct(5)=500, null(5)=0] 120 ├── key: (1,7) 121 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 122 ├── scan xysd 123 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 124 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(4)=500, null(4)=0] 125 │ ├── key: (1) 126 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 127 ├── scan uv 128 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 129 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=0] 130 │ ├── key: (7) 131 │ └── fd: (7)-->(5,6) 132 └── filters 133 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 134 135 build 136 SELECT *, rowid FROM xysd RIGHT JOIN uv ON x=u 137 ---- 138 right-join (hash) 139 ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int!null) rowid:7(int!null) 140 ├── stats: [rows=10000, distinct(1)=500, null(1)=0] 141 ├── key: (7) 142 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(1-6) 143 ├── scan xysd 144 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 145 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0] 146 │ ├── key: (1) 147 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 148 ├── scan uv 149 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 150 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 151 │ ├── key: (7) 152 │ └── fd: (7)-->(5,6) 153 └── filters 154 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 155 156 build 157 SELECT *, rowid FROM xysd FULL JOIN uv ON x=u 158 ---- 159 full-join (hash) 160 ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int) rowid:7(int) 161 ├── stats: [rows=10000] 162 ├── key: (1,7) 163 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 164 ├── scan xysd 165 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 166 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0] 167 │ ├── key: (1) 168 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 169 ├── scan uv 170 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 171 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=0] 172 │ ├── key: (7) 173 │ └── fd: (7)-->(5,6) 174 └── filters 175 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 176 177 build 178 SELECT * FROM xysd, uv 179 ---- 180 project 181 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) 182 ├── stats: [rows=50000000] 183 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 184 └── inner-join (cross) 185 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) rowid:7(int!null) 186 ├── stats: [rows=50000000] 187 ├── key: (1,7) 188 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 189 ├── scan xysd 190 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 191 │ ├── stats: [rows=5000] 192 │ ├── key: (1) 193 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 194 ├── scan uv 195 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 196 │ ├── stats: [rows=10000] 197 │ ├── key: (7) 198 │ └── fd: (7)-->(5,6) 199 └── filters (true) 200 201 build 202 SELECT * FROM xysd, xysd AS xysd 203 ---- 204 inner-join (cross) 205 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) 206 ├── stats: [rows=25000000] 207 ├── key: (1,5) 208 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (5)-->(6-8), (7,8)~~>(5,6) 209 ├── scan xysd 210 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 211 │ ├── stats: [rows=5000] 212 │ ├── key: (1) 213 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 214 ├── scan xysd 215 │ ├── columns: x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) 216 │ ├── stats: [rows=5000] 217 │ ├── key: (5) 218 │ └── fd: (5)-->(6-8), (7,8)~~>(5,6) 219 └── filters (true) 220 221 build 222 SELECT * FROM xysd, uv WHERE v = 5 223 ---- 224 project 225 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) 226 ├── stats: [rows=500000] 227 ├── fd: ()-->(6), (1)-->(2-4), (3,4)~~>(1,2) 228 └── select 229 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) rowid:7(int!null) 230 ├── stats: [rows=500000, distinct(6)=1, null(6)=0] 231 ├── key: (1,7) 232 ├── fd: ()-->(6), (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5) 233 ├── inner-join (cross) 234 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) rowid:7(int!null) 235 │ ├── stats: [rows=50000000, distinct(1)=5000, null(1)=0, distinct(4)=500, null(4)=0, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 236 │ ├── key: (1,7) 237 │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 238 │ ├── scan xysd 239 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 240 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(4)=500, null(4)=0] 241 │ │ ├── key: (1) 242 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 243 │ ├── scan uv 244 │ │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 245 │ │ ├── stats: [rows=10000, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 246 │ │ ├── key: (7) 247 │ │ └── fd: (7)-->(5,6) 248 │ └── filters (true) 249 └── filters 250 └── v:6 = 5 [type=bool, outer=(6), constraints=(/6: [/5 - /5]; tight), fd=()-->(6)] 251 252 # Force calculation of the distinct count for the column set spanning both 253 # tables in the join. 254 build 255 SELECT sum(v), x, v FROM xysd, uv GROUP BY x, v 256 ---- 257 group-by 258 ├── columns: sum:8(decimal!null) x:1(int!null) v:6(int!null) 259 ├── grouping columns: x:1(int!null) v:6(int!null) 260 ├── stats: [rows=500000, distinct(1,6)=500000, null(1,6)=0] 261 ├── key: (1,6) 262 ├── fd: (1,6)-->(8) 263 ├── project 264 │ ├── columns: x:1(int!null) v:6(int!null) 265 │ ├── stats: [rows=50000000, distinct(1,6)=500000, null(1,6)=0] 266 │ └── inner-join (cross) 267 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) rowid:7(int!null) 268 │ ├── stats: [rows=50000000, distinct(1,6)=500000, null(1,6)=0] 269 │ ├── key: (1,7) 270 │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 271 │ ├── scan xysd 272 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 273 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0] 274 │ │ ├── key: (1) 275 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 276 │ ├── scan uv 277 │ │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 278 │ │ ├── stats: [rows=10000, distinct(6)=100, null(6)=0] 279 │ │ ├── key: (7) 280 │ │ └── fd: (7)-->(5,6) 281 │ └── filters (true) 282 └── aggregations 283 └── sum [as=sum:8, type=decimal, outer=(6)] 284 └── v:6 [type=int] 285 286 # Join selectivity: 1/max(distinct(x), distinct(u)) = 1/5000. 287 norm 288 SELECT sum(v), x, v FROM xysd, uv WHERE x=u GROUP BY x, v 289 ---- 290 group-by 291 ├── columns: sum:8(decimal!null) x:1(int!null) v:6(int!null) 292 ├── grouping columns: x:1(int!null) v:6(int!null) 293 ├── stats: [rows=10000, distinct(1,6)=10000, null(1,6)=0] 294 ├── key: (1,6) 295 ├── fd: (1,6)-->(8) 296 ├── inner-join (hash) 297 │ ├── columns: x:1(int!null) u:5(int!null) v:6(int!null) 298 │ ├── stats: [rows=10000, distinct(1)=500, null(1)=0, distinct(5)=500, null(5)=0, distinct(1,6)=10000, null(1,6)=0] 299 │ ├── fd: (1)==(5), (5)==(1) 300 │ ├── scan xysd 301 │ │ ├── columns: x:1(int!null) 302 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0] 303 │ │ └── key: (1) 304 │ ├── scan uv 305 │ │ ├── columns: u:5(int) v:6(int!null) 306 │ │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0] 307 │ └── filters 308 │ └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 309 └── aggregations 310 └── sum [as=sum:8, type=decimal, outer=(6)] 311 └── v:6 [type=int] 312 313 # Semi-join. 314 norm 315 SELECT * FROM xysd WHERE EXISTS (SELECT * FROM uv WHERE x=u) 316 ---- 317 semi-join (hash) 318 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 319 ├── stats: [rows=500, distinct(1)=500, null(1)=0] 320 ├── key: (1) 321 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 322 ├── scan xysd 323 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 324 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(4)=500, null(4)=0] 325 │ ├── key: (1) 326 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 327 ├── scan uv 328 │ ├── columns: u:5(int) 329 │ └── stats: [rows=10000, distinct(5)=500, null(5)=0] 330 └── filters 331 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 332 333 # Anti-join. 334 norm 335 SELECT * FROM xysd WHERE NOT EXISTS (SELECT * FROM uv WHERE x=u) 336 ---- 337 anti-join (hash) 338 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 339 ├── stats: [rows=4500] 340 ├── key: (1) 341 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 342 ├── scan xysd 343 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 344 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(4)=500, null(4)=0] 345 │ ├── key: (1) 346 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 347 ├── scan uv 348 │ ├── columns: u:5(int) 349 │ └── stats: [rows=10000, distinct(5)=500, null(5)=0] 350 └── filters 351 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 352 353 # Multiple equality conditions. 354 norm 355 SELECT * FROM xysd JOIN uv ON x=u AND y=v 356 ---- 357 inner-join (hash) 358 ├── columns: x:1(int!null) y:2(int!null) s:3(string) d:4(decimal!null) u:5(int!null) v:6(int!null) 359 ├── stats: [rows=25, distinct(1)=25, null(1)=0, distinct(2)=25, null(2)=0, distinct(5)=25, null(5)=0, distinct(6)=25, null(6)=0] 360 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (1)==(5), (5)==(1), (2)==(6), (6)==(2) 361 ├── scan xysd 362 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 363 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0, distinct(4)=500, null(4)=0] 364 │ ├── key: (1) 365 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 366 ├── scan uv 367 │ ├── columns: u:5(int) v:6(int!null) 368 │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0] 369 └── filters 370 ├── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 371 └── y:2 = v:6 [type=bool, outer=(2,6), constraints=(/2: (/NULL - ]; /6: (/NULL - ]), fd=(2)==(6), (6)==(2)] 372 373 # Equality condition + extra filters. 374 norm 375 SELECT * FROM xysd JOIN uv ON x=u AND y+v=5 AND y > 0 AND y < 300 376 ---- 377 inner-join (hash) 378 ├── columns: x:1(int!null) y:2(int!null) s:3(string) d:4(decimal!null) u:5(int!null) v:6(int!null) 379 ├── stats: [rows=3333.33333, distinct(1)=500, null(1)=0, distinct(5)=500, null(5)=0] 380 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (1)==(5), (5)==(1) 381 ├── select 382 │ ├── columns: x:1(int!null) y:2(int!null) s:3(string) d:4(decimal!null) 383 │ ├── stats: [rows=3737.5, distinct(1)=3737.5, null(1)=0, distinct(2)=299, null(2)=0, distinct(4)=499.999473, null(4)=0] 384 │ ├── key: (1) 385 │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 386 │ ├── scan xysd 387 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 388 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0, distinct(4)=500, null(4)=0] 389 │ │ ├── key: (1) 390 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 391 │ └── filters 392 │ └── (y:2 > 0) AND (y:2 < 300) [type=bool, outer=(2), constraints=(/2: [/1 - /299]; tight)] 393 ├── scan uv 394 │ ├── columns: u:5(int) v:6(int!null) 395 │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0] 396 └── filters 397 ├── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 398 └── (y:2 + v:6) = 5 [type=bool, outer=(2,6)] 399 400 # Force column statistics calculation for semi-join. 401 norm 402 SELECT count(*) 403 FROM (SELECT * FROM xysd WHERE EXISTS (SELECT * FROM uv WHERE x=u AND y+v=5)) AS a 404 GROUP BY y 405 ---- 406 project 407 ├── columns: count:8(int!null) 408 ├── stats: [rows=138.170075] 409 └── group-by 410 ├── columns: y:2(int) count_rows:8(int!null) 411 ├── grouping columns: y:2(int) 412 ├── stats: [rows=138.170075, distinct(2)=138.170075, null(2)=0] 413 ├── key: (2) 414 ├── fd: (2)-->(8) 415 ├── semi-join (hash) 416 │ ├── columns: x:1(int!null) y:2(int) 417 │ ├── stats: [rows=166.666667, distinct(1)=166.666667, null(1)=0, distinct(2)=138.170075, null(2)=0] 418 │ ├── key: (1) 419 │ ├── fd: (1)-->(2) 420 │ ├── scan xysd 421 │ │ ├── columns: x:1(int!null) y:2(int) 422 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0] 423 │ │ ├── key: (1) 424 │ │ └── fd: (1)-->(2) 425 │ ├── scan uv 426 │ │ ├── columns: u:5(int) v:6(int!null) 427 │ │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0] 428 │ └── filters 429 │ ├── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 430 │ └── (y:2 + v:6) = 5 [type=bool, outer=(2,6)] 431 └── aggregations 432 └── count-rows [as=count_rows:8, type=int] 433 434 # Force column statistics calculation for anti-join. 435 norm 436 SELECT count(*) 437 FROM (SELECT * FROM xysd WHERE NOT EXISTS (SELECT * FROM uv WHERE x=u AND y+v=5)) AS a 438 GROUP BY y 439 ---- 440 project 441 ├── columns: count:8(int!null) 442 ├── stats: [rows=400] 443 └── group-by 444 ├── columns: y:2(int) count_rows:8(int!null) 445 ├── grouping columns: y:2(int) 446 ├── stats: [rows=400, distinct(2)=400, null(2)=0] 447 ├── key: (2) 448 ├── fd: (2)-->(8) 449 ├── anti-join (hash) 450 │ ├── columns: x:1(int!null) y:2(int) 451 │ ├── stats: [rows=4833.33333, distinct(2)=400, null(2)=0] 452 │ ├── key: (1) 453 │ ├── fd: (1)-->(2) 454 │ ├── scan xysd 455 │ │ ├── columns: x:1(int!null) y:2(int) 456 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0] 457 │ │ ├── key: (1) 458 │ │ └── fd: (1)-->(2) 459 │ ├── scan uv 460 │ │ ├── columns: u:5(int) v:6(int!null) 461 │ │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0] 462 │ └── filters 463 │ ├── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 464 │ └── (y:2 + v:6) = 5 [type=bool, outer=(2,6)] 465 └── aggregations 466 └── count-rows [as=count_rows:8, type=int] 467 468 # Force column statistics calculation for left join. 469 norm 470 SELECT count(*) 471 FROM (SELECT * FROM xysd LEFT OUTER JOIN uv ON x=u AND y+v=5) AS a 472 GROUP BY y 473 ---- 474 project 475 ├── columns: count:8(int!null) 476 ├── stats: [rows=400] 477 └── group-by 478 ├── columns: y:2(int) count_rows:8(int!null) 479 ├── grouping columns: y:2(int) 480 ├── stats: [rows=400, distinct(2)=400, null(2)=0] 481 ├── key: (2) 482 ├── fd: (2)-->(8) 483 ├── left-join (hash) 484 │ ├── columns: x:1(int!null) y:2(int) u:5(int) v:6(int) 485 │ ├── stats: [rows=5000, distinct(2)=400, null(2)=0, distinct(5)=500, null(5)=1666.66667] 486 │ ├── fd: (1)-->(2) 487 │ ├── scan xysd 488 │ │ ├── columns: x:1(int!null) y:2(int) 489 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0] 490 │ │ ├── key: (1) 491 │ │ └── fd: (1)-->(2) 492 │ ├── scan uv 493 │ │ ├── columns: u:5(int) v:6(int!null) 494 │ │ └── stats: [rows=10000, distinct(5)=500, null(5)=0] 495 │ └── filters 496 │ ├── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 497 │ └── (y:2 + v:6) = 5 [type=bool, outer=(2,6)] 498 └── aggregations 499 └── count-rows [as=count_rows:8, type=int] 500 501 # Force column statistics calculation for right join. 502 norm 503 SELECT count(*) 504 FROM (SELECT * FROM xysd RIGHT OUTER JOIN uv ON x=u AND y+v=5) AS a 505 GROUP BY y 506 ---- 507 project 508 ├── columns: count:8(int!null) 509 ├── stats: [rows=399.903879] 510 └── group-by 511 ├── columns: y:2(int) count_rows:8(int!null) 512 ├── grouping columns: y:2(int) 513 ├── stats: [rows=399.903879, distinct(2)=399.903879, null(2)=1] 514 ├── key: (2) 515 ├── fd: (2)-->(8) 516 ├── left-join (hash) 517 │ ├── columns: x:1(int) y:2(int) u:5(int) v:6(int!null) 518 │ ├── stats: [rows=10000, distinct(1)=500, null(1)=6666.66667, distinct(2)=399.903879, null(2)=6666.66667] 519 │ ├── fd: (1)-->(2) 520 │ ├── scan uv 521 │ │ ├── columns: u:5(int) v:6(int!null) 522 │ │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0] 523 │ ├── scan xysd 524 │ │ ├── columns: x:1(int!null) y:2(int) 525 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0] 526 │ │ ├── key: (1) 527 │ │ └── fd: (1)-->(2) 528 │ └── filters 529 │ ├── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 530 │ └── (y:2 + v:6) = 5 [type=bool, outer=(2,6)] 531 └── aggregations 532 └── count-rows [as=count_rows:8, type=int] 533 534 # Force column statistics calculation for outer join. 535 norm 536 SELECT count(*) 537 FROM (SELECT * FROM xysd FULL OUTER JOIN uv ON x=u AND y+v=5) AS a 538 GROUP BY y 539 ---- 540 project 541 ├── columns: count:8(int!null) 542 ├── stats: [rows=400] 543 └── group-by 544 ├── columns: y:2(int) count_rows:8(int!null) 545 ├── grouping columns: y:2(int) 546 ├── stats: [rows=400, distinct(2)=400, null(2)=1] 547 ├── key: (2) 548 ├── fd: (2)-->(8) 549 ├── full-join (hash) 550 │ ├── columns: x:1(int) y:2(int) u:5(int) v:6(int) 551 │ ├── stats: [rows=11666.6667, distinct(2)=400, null(2)=6666.66667] 552 │ ├── fd: (1)-->(2) 553 │ ├── scan xysd 554 │ │ ├── columns: x:1(int!null) y:2(int) 555 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=0] 556 │ │ ├── key: (1) 557 │ │ └── fd: (1)-->(2) 558 │ ├── scan uv 559 │ │ ├── columns: u:5(int) v:6(int!null) 560 │ │ └── stats: [rows=10000, distinct(5)=500, null(5)=0] 561 │ └── filters 562 │ ├── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 563 │ └── (y:2 + v:6) = 5 [type=bool, outer=(2,6)] 564 └── aggregations 565 └── count-rows [as=count_rows:8, type=int] 566 567 exec-ddl 568 CREATE TABLE uvw (u INT, v INT, w INT) 569 ---- 570 571 exec-ddl 572 CREATE TABLE xyz (x INT, y INT, z INT) 573 ---- 574 575 # Verify that two equivalent formulations of a join lead to similar statistics. 576 # In the first case, x=10 is pushed down; in the second case it is part of the 577 # ON condition. The latter formulation happens in practice when we convert to 578 # lookup join (we incorporate the filter back into the ON condition). 579 580 norm disable=(PushFilterIntoJoinLeftAndRight,PushFilterIntoJoinLeft,PushFilterIntoJoinRight,MapFilterIntoJoinLeft,MapFilterIntoJoinRight) 581 SELECT * FROM (SELECT * FROM uvw WHERE w=1) JOIN (SELECT * FROM xyz WHERE x=10) ON u=x 582 ---- 583 inner-join (hash) 584 ├── columns: u:1(int!null) v:2(int) w:3(int!null) x:5(int!null) y:6(int) z:7(int) 585 ├── stats: [rows=10.3537072, distinct(1)=1, null(1)=0, distinct(5)=1, null(5)=0] 586 ├── fd: ()-->(1,3,5), (1)==(5), (5)==(1) 587 ├── select 588 │ ├── columns: u:1(int) v:2(int) w:3(int!null) 589 │ ├── stats: [rows=10, distinct(1)=9.5617925, null(1)=0.1, distinct(3)=1, null(3)=0] 590 │ ├── fd: ()-->(3) 591 │ ├── scan uvw 592 │ │ ├── columns: u:1(int) v:2(int) w:3(int) 593 │ │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(3)=100, null(3)=10] 594 │ └── filters 595 │ └── w:3 = 1 [type=bool, outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] 596 ├── select 597 │ ├── columns: x:5(int!null) y:6(int) z:7(int) 598 │ ├── stats: [rows=10, distinct(5)=1, null(5)=0] 599 │ ├── fd: ()-->(5) 600 │ ├── scan xyz 601 │ │ ├── columns: x:5(int) y:6(int) z:7(int) 602 │ │ └── stats: [rows=1000, distinct(5)=100, null(5)=10] 603 │ └── filters 604 │ └── x:5 = 10 [type=bool, outer=(5), constraints=(/5: [/10 - /10]; tight), fd=()-->(5)] 605 └── filters 606 └── u:1 = x:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 607 608 norm disable=(PushFilterIntoJoinLeftAndRight,PushFilterIntoJoinLeft,PushFilterIntoJoinRight,MapFilterIntoJoinLeft,MapFilterIntoJoinRight) 609 SELECT * FROM (SELECT * FROM uvw WHERE w=1) JOIN xyz ON u=x AND x=10 610 ---- 611 inner-join (hash) 612 ├── columns: u:1(int!null) v:2(int) w:3(int!null) x:5(int!null) y:6(int) z:7(int) 613 ├── stats: [rows=10.3537072, distinct(1)=1, null(1)=0, distinct(5)=1, null(5)=0] 614 ├── fd: ()-->(1,3,5), (1)==(5), (5)==(1) 615 ├── select 616 │ ├── columns: u:1(int) v:2(int) w:3(int!null) 617 │ ├── stats: [rows=10, distinct(1)=9.5617925, null(1)=0.1, distinct(3)=1, null(3)=0] 618 │ ├── fd: ()-->(3) 619 │ ├── scan uvw 620 │ │ ├── columns: u:1(int) v:2(int) w:3(int) 621 │ │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(3)=100, null(3)=10] 622 │ └── filters 623 │ └── w:3 = 1 [type=bool, outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] 624 ├── scan xyz 625 │ ├── columns: x:5(int) y:6(int) z:7(int) 626 │ └── stats: [rows=1000, distinct(5)=100, null(5)=10] 627 └── filters 628 ├── u:1 = x:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 629 └── x:5 = 10 [type=bool, outer=(5), constraints=(/5: [/10 - /10]; tight), fd=()-->(5)] 630 631 # Bump up null counts. 632 exec-ddl 633 ALTER TABLE xysd INJECT STATISTICS '[ 634 { 635 "columns": ["x"], 636 "created_at": "2018-01-01 2:00:00.00000+00:00", 637 "row_count": 5000, 638 "distinct_count": 5000 639 }, 640 { 641 "columns": ["y"], 642 "created_at": "2018-01-01 2:00:00.00000+00:00", 643 "row_count": 5000, 644 "distinct_count": 400, 645 "null_count": 2500 646 } 647 ]' 648 ---- 649 650 exec-ddl 651 ALTER TABLE uv INJECT STATISTICS '[ 652 { 653 "columns": ["u"], 654 "created_at": "2018-01-01 2:00:00.00000+00:00", 655 "row_count": 10000, 656 "distinct_count": 500, 657 "null_count": 5000 658 }, 659 { 660 "columns": ["v"], 661 "created_at": "2018-01-01 2:00:00.00000+00:00", 662 "row_count": 10000, 663 "distinct_count": 100 664 }, 665 { 666 "columns": ["rowid"], 667 "created_at": "2018-01-01 2:00:00.00000+00:00", 668 "row_count": 10000, 669 "distinct_count": 10000 670 } 671 ]' 672 ---- 673 674 build colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 675 SELECT *, rowid FROM xysd INNER JOIN uv ON x=u 676 ---- 677 inner-join (hash) 678 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int!null) v:6(int!null) rowid:7(int!null) 679 ├── stats: [rows=5000, distinct(1)=499, null(1)=0, distinct(2)=399.99851, null(2)=2500, distinct(3)=499.977311, null(3)=50, distinct(5)=499, null(5)=0, distinct(2,3)=3160.69477, null(2,3)=25, distinct(3,5)=5000, null(3,5)=0, distinct(1,2,7)=5000, null(1,2,7)=0] 680 ├── key: (7) 681 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6), (1)==(5), (5)==(1) 682 ├── scan xysd 683 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 684 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(4)=500, null(4)=0, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 685 │ ├── key: (1) 686 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 687 ├── scan uv 688 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 689 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 690 │ ├── key: (7) 691 │ └── fd: (7)-->(5,6) 692 └── filters 693 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 694 695 build colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 696 SELECT *, rowid FROM xysd LEFT JOIN uv ON x=u 697 ---- 698 left-join (hash) 699 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int) rowid:7(int) 700 ├── stats: [rows=10000, distinct(2)=400, null(2)=5000, distinct(3)=500, null(3)=100, distinct(5)=500, null(5)=0, distinct(2,3)=5000, null(2,3)=50, distinct(3,5)=10000, null(3,5)=50, distinct(1,2,7)=10000, null(1,2,7)=0] 701 ├── key: (1,7) 702 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 703 ├── scan xysd 704 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 705 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(4)=500, null(4)=0, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 706 │ ├── key: (1) 707 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 708 ├── scan uv 709 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 710 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(7)=10000, null(7)=0] 711 │ ├── key: (7) 712 │ └── fd: (7)-->(5,6) 713 └── filters 714 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 715 716 build colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 717 SELECT *, rowid FROM xysd RIGHT JOIN uv ON x=u 718 ---- 719 right-join (hash) 720 ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int!null) rowid:7(int!null) 721 ├── stats: [rows=10000, distinct(1)=500, null(1)=0, distinct(2)=400, null(2)=5000, distinct(3)=499.999999, null(3)=100, distinct(5)=500, null(5)=5000, distinct(2,3)=4323.45892, null(2,3)=50, distinct(3,5)=10000, null(3,5)=50, distinct(1,2,7)=10000, null(1,2,7)=0] 722 ├── key: (7) 723 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(1-6) 724 ├── scan xysd 725 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 726 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 727 │ ├── key: (1) 728 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 729 ├── scan uv 730 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 731 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 732 │ ├── key: (7) 733 │ └── fd: (7)-->(5,6) 734 └── filters 735 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 736 737 build colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 738 SELECT *, rowid FROM xysd FULL JOIN uv ON x=u 739 ---- 740 full-join (hash) 741 ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int) rowid:7(int) 742 ├── stats: [rows=10000, distinct(2)=400, null(2)=5000, distinct(3)=500, null(3)=100, distinct(5)=500, null(5)=5000, distinct(2,3)=5000, null(2,3)=50, distinct(3,5)=10000, null(3,5)=50, distinct(1,2,7)=10000, null(1,2,7)=0] 743 ├── key: (1,7) 744 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 745 ├── scan xysd 746 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 747 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 748 │ ├── key: (1) 749 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 750 ├── scan uv 751 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 752 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(7)=10000, null(7)=0] 753 │ ├── key: (7) 754 │ └── fd: (7)-->(5,6) 755 └── filters 756 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 757 758 # Set one of the columns to non-nullable and see impact on multi-column null counts. 759 build colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 760 SELECT *, rowid FROM xysd FULL JOIN uv ON x=u WHERE s IS NOT NULL 761 ---- 762 select 763 ├── columns: x:1(int) y:2(int) s:3(string!null) d:4(decimal) u:5(int) v:6(int) rowid:7(int) 764 ├── stats: [rows=9900, distinct(2)=400, null(2)=4950, distinct(3)=500, null(3)=0, distinct(5)=500, null(5)=4950, distinct(2,3)=4999.5, null(2,3)=0, distinct(3,5)=9900, null(3,5)=0, distinct(1,2,7)=9900, null(1,2,7)=0] 765 ├── key: (1,7) 766 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 767 ├── full-join (hash) 768 │ ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int) rowid:7(int) 769 │ ├── stats: [rows=10000, distinct(2)=400, null(2)=5000, distinct(3)=500, null(3)=100, distinct(5)=500, null(5)=5000, distinct(2,3)=5000, null(2,3)=50, distinct(3,5)=10000, null(3,5)=50, distinct(1,2,7)=10000, null(1,2,7)=0] 770 │ ├── key: (1,7) 771 │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 772 │ ├── scan xysd 773 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 774 │ │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 775 │ │ ├── key: (1) 776 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 777 │ ├── scan uv 778 │ │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 779 │ │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(7)=10000, null(7)=0] 780 │ │ ├── key: (7) 781 │ │ └── fd: (7)-->(5,6) 782 │ └── filters 783 │ └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 784 └── filters 785 └── s:3 IS NOT NULL [type=bool, outer=(3), constraints=(/3: (/NULL - ]; tight)] 786 787 # Do a full join on a condition that results in 0 rows on one side. All null counts 788 # on the right side should be greater due to expected null-extension of columns. 789 build colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 790 SELECT *, rowid FROM xysd FULL JOIN uv ON u > 4 AND u < 2 791 ---- 792 full-join (cross) 793 ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int) rowid:7(int) 794 ├── stats: [rows=50000000, distinct(2)=400, null(2)=25000000, distinct(3)=500, null(3)=500000, distinct(5)=500, null(5)=25000000, distinct(2,3)=5000, null(2,3)=250000, distinct(3,5)=250000, null(3,5)=250000, distinct(1,2,7)=50000000, null(1,2,7)=0] 795 ├── key: (1,7) 796 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 797 ├── scan xysd 798 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 799 │ ├── stats: [rows=5000, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 800 │ ├── key: (1) 801 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 802 ├── scan uv 803 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 804 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(7)=10000, null(7)=0] 805 │ ├── key: (7) 806 │ └── fd: (7)-->(5,6) 807 └── filters 808 └── (u:5 > 4) AND (u:5 < 2) [type=bool, outer=(5), constraints=(contradiction; tight)] 809 810 build colstat=2 colstat=(1,2,7) 811 SELECT * FROM xysd, uv 812 ---- 813 project 814 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) 815 ├── stats: [rows=50000000, distinct(2)=400, null(2)=25000000, distinct(1,2,7)=50000000, null(1,2,7)=0] 816 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 817 └── inner-join (cross) 818 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) rowid:7(int!null) 819 ├── stats: [rows=50000000, distinct(2)=400, null(2)=25000000, distinct(1,2,7)=50000000, null(1,2,7)=0] 820 ├── key: (1,7) 821 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 822 ├── scan xysd 823 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 824 │ ├── stats: [rows=5000, distinct(2)=400, null(2)=2500, distinct(1,2)=5000, null(1,2)=0] 825 │ ├── key: (1) 826 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 827 ├── scan uv 828 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 829 │ ├── stats: [rows=10000, distinct(7)=10000, null(7)=0] 830 │ ├── key: (7) 831 │ └── fd: (7)-->(5,6) 832 └── filters (true) 833 834 norm 835 SELECT * FROM xysd WHERE EXISTS(SELECT * FROM uv WHERE x=u) 836 ---- 837 semi-join (hash) 838 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 839 ├── stats: [rows=500, distinct(1)=500, null(1)=0] 840 ├── key: (1) 841 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 842 ├── scan xysd 843 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 844 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(4)=500, null(4)=0] 845 │ ├── key: (1) 846 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 847 ├── scan uv 848 │ ├── columns: u:5(int) 849 │ └── stats: [rows=10000, distinct(5)=500, null(5)=5000] 850 └── filters 851 └── x:1 = u:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 852 853 norm 854 SELECT * FROM uv WHERE EXISTS(SELECT * FROM xysd WHERE x=u) 855 ---- 856 semi-join (hash) 857 ├── columns: u:1(int) v:2(int!null) 858 ├── stats: [rows=10000, distinct(1)=500, null(1)=0] 859 ├── scan uv 860 │ ├── columns: u:1(int) v:2(int!null) 861 │ └── stats: [rows=10000, distinct(1)=500, null(1)=5000, distinct(2)=100, null(2)=0] 862 ├── scan xysd 863 │ ├── columns: x:4(int!null) 864 │ ├── stats: [rows=5000, distinct(4)=5000, null(4)=0] 865 │ └── key: (4) 866 └── filters 867 └── x:4 = u:1 [type=bool, outer=(1,4), constraints=(/1: (/NULL - ]; /4: (/NULL - ]), fd=(1)==(4), (4)==(1)] 868 869 # Merge join (inner). 870 expr colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 871 (MergeJoin 872 (Scan [ (Table "xysd") (Cols "x,y,s,d") ]) 873 (Sort (Scan [ (Table "uv") (Cols "u,v,rowid") ])) 874 [ ] 875 [ 876 (JoinType "inner-join") 877 (LeftEq "+x") 878 (RightEq "+u") 879 (LeftOrdering "+x") 880 (RightOrdering "+u") 881 ] 882 ) 883 ---- 884 inner-join (merge) 885 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int!null) v:6(int!null) rowid:7(int!null) 886 ├── left ordering: +1 887 ├── right ordering: +5 888 ├── stats: [rows=5000, distinct(1)=499, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(5)=499, null(5)=0, distinct(2,3)=5000, null(2,3)=25, distinct(3,5)=5000, null(3,5)=0, distinct(1,2,7)=5000, null(1,2,7)=0] 889 ├── key: (7) 890 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6), (1)==(5), (5)==(1) 891 ├── scan xysd 892 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 893 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(4)=500, null(4)=0, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 894 │ ├── key: (1) 895 │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 896 │ └── ordering: +1 897 ├── sort 898 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 899 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 900 │ ├── key: (7) 901 │ ├── fd: (7)-->(5,6) 902 │ ├── ordering: +5 903 │ └── scan uv 904 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 905 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 906 │ ├── key: (7) 907 │ └── fd: (7)-->(5,6) 908 └── filters (true) 909 910 # Merge join (left) with extra ON condition. 911 expr colstat=2 colstat=(1,2,7) colstat=(2,3) colstat=3 colstat=(3,5) colstat=5 912 (MergeJoin 913 (Scan [ (Table "xysd") (Cols "x,y,s,d") ]) 914 (Sort (Scan [ (Table "uv") (Cols "u,v,rowid") ])) 915 [ (Gt (Var "y") (Var "v")) ] 916 [ 917 (JoinType "left-join") 918 (LeftEq "+x") 919 (RightEq "+u") 920 (LeftOrdering "+x") 921 (RightOrdering "+u") 922 ] 923 ) 924 ---- 925 left-join (merge) 926 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int) rowid:7(int) 927 ├── left ordering: +1 928 ├── right ordering: +5 929 ├── stats: [rows=5000, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(5)=500, null(5)=1666.66667, distinct(6)=100, null(6)=1666.66667, distinct(2,3)=5000, null(2,3)=25, distinct(3,5)=5000, null(3,5)=25, distinct(1,2,7)=5000, null(1,2,7)=0] 930 ├── key: (1,7) 931 ├── fd: (1)-->(2-4), (3,4)~~>(1,2), (7)-->(5,6) 932 ├── scan xysd 933 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 934 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=2500, distinct(3)=500, null(3)=50, distinct(4)=500, null(4)=0, distinct(1,2)=5000, null(1,2)=0, distinct(2,3)=5000, null(2,3)=25] 935 │ ├── key: (1) 936 │ ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 937 │ └── ordering: +1 938 ├── sort 939 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 940 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 941 │ ├── key: (7) 942 │ ├── fd: (7)-->(5,6) 943 │ ├── ordering: +5 944 │ └── scan uv 945 │ ├── columns: u:5(int) v:6(int!null) rowid:7(int!null) 946 │ ├── stats: [rows=10000, distinct(5)=500, null(5)=5000, distinct(6)=100, null(6)=0, distinct(7)=10000, null(7)=0] 947 │ ├── key: (7) 948 │ └── fd: (7)-->(5,6) 949 └── filters 950 └── y:2 > v:6 [type=bool, outer=(2,6), constraints=(/2: (/NULL - ]; /6: (/NULL - ])] 951 952 # Check that true filters are handled correctly for all join types. 953 norm 954 SELECT * FROM (SELECT 1) JOIN (SELECT 1 WHERE false) ON true 955 ---- 956 values 957 ├── columns: "?column?":1(int!null) "?column?":2(int!null) 958 ├── cardinality: [0 - 0] 959 ├── stats: [rows=0] 960 ├── key: () 961 └── fd: ()-->(1,2) 962 963 norm 964 SELECT * FROM (SELECT 1) LEFT JOIN (SELECT 1 WHERE false) ON true 965 ---- 966 left-join (cross) 967 ├── columns: "?column?":1(int!null) "?column?":2(int) 968 ├── cardinality: [1 - 1] 969 ├── stats: [rows=1] 970 ├── key: () 971 ├── fd: ()-->(1,2) 972 ├── values 973 │ ├── columns: "?column?":1(int!null) 974 │ ├── cardinality: [1 - 1] 975 │ ├── stats: [rows=1] 976 │ ├── key: () 977 │ ├── fd: ()-->(1) 978 │ └── (1,) [type=tuple{int}] 979 ├── values 980 │ ├── columns: "?column?":2(int!null) 981 │ ├── cardinality: [0 - 0] 982 │ ├── stats: [rows=0] 983 │ ├── key: () 984 │ └── fd: ()-->(2) 985 └── filters (true) 986 987 norm 988 SELECT * FROM (SELECT 1) RIGHT JOIN (SELECT 1 WHERE false) ON true 989 ---- 990 values 991 ├── columns: "?column?":1(int!null) "?column?":2(int!null) 992 ├── cardinality: [0 - 0] 993 ├── stats: [rows=0] 994 ├── key: () 995 └── fd: ()-->(1,2) 996 997 norm 998 SELECT * FROM (SELECT 1) FULL JOIN (SELECT 1 WHERE false) ON true 999 ---- 1000 left-join (cross) 1001 ├── columns: "?column?":1(int!null) "?column?":2(int) 1002 ├── cardinality: [1 - 1] 1003 ├── stats: [rows=1] 1004 ├── key: () 1005 ├── fd: ()-->(1,2) 1006 ├── values 1007 │ ├── columns: "?column?":1(int!null) 1008 │ ├── cardinality: [1 - 1] 1009 │ ├── stats: [rows=1] 1010 │ ├── key: () 1011 │ ├── fd: ()-->(1) 1012 │ └── (1,) [type=tuple{int}] 1013 ├── values 1014 │ ├── columns: "?column?":2(int!null) 1015 │ ├── cardinality: [0 - 0] 1016 │ ├── stats: [rows=0] 1017 │ ├── key: () 1018 │ └── fd: ()-->(2) 1019 └── filters (true) 1020 1021 norm 1022 SELECT * FROM (SELECT 1 WHERE false) JOIN (SELECT 1) ON true 1023 ---- 1024 values 1025 ├── columns: "?column?":1(int!null) "?column?":2(int!null) 1026 ├── cardinality: [0 - 0] 1027 ├── stats: [rows=0] 1028 ├── key: () 1029 └── fd: ()-->(1,2) 1030 1031 norm 1032 SELECT * FROM (SELECT 1 WHERE false) LEFT JOIN (SELECT 1) ON true 1033 ---- 1034 values 1035 ├── columns: "?column?":1(int!null) "?column?":2(int!null) 1036 ├── cardinality: [0 - 0] 1037 ├── stats: [rows=0] 1038 ├── key: () 1039 └── fd: ()-->(1,2) 1040 1041 norm 1042 SELECT * FROM (SELECT 1 WHERE false) RIGHT JOIN (SELECT 1) ON true 1043 ---- 1044 left-join (cross) 1045 ├── columns: "?column?":1(int) "?column?":2(int!null) 1046 ├── cardinality: [1 - 1] 1047 ├── stats: [rows=1] 1048 ├── key: () 1049 ├── fd: ()-->(1,2) 1050 ├── values 1051 │ ├── columns: "?column?":2(int!null) 1052 │ ├── cardinality: [1 - 1] 1053 │ ├── stats: [rows=1] 1054 │ ├── key: () 1055 │ ├── fd: ()-->(2) 1056 │ └── (1,) [type=tuple{int}] 1057 ├── values 1058 │ ├── columns: "?column?":1(int!null) 1059 │ ├── cardinality: [0 - 0] 1060 │ ├── stats: [rows=0] 1061 │ ├── key: () 1062 │ └── fd: ()-->(1) 1063 └── filters (true) 1064 1065 norm 1066 SELECT * FROM (SELECT 1 WHERE false) FULL JOIN (SELECT 1) ON true 1067 ---- 1068 left-join (cross) 1069 ├── columns: "?column?":1(int) "?column?":2(int!null) 1070 ├── cardinality: [1 - 1] 1071 ├── stats: [rows=1] 1072 ├── key: () 1073 ├── fd: ()-->(1,2) 1074 ├── values 1075 │ ├── columns: "?column?":2(int!null) 1076 │ ├── cardinality: [1 - 1] 1077 │ ├── stats: [rows=1] 1078 │ ├── key: () 1079 │ ├── fd: ()-->(2) 1080 │ └── (1,) [type=tuple{int}] 1081 ├── values 1082 │ ├── columns: "?column?":1(int!null) 1083 │ ├── cardinality: [0 - 0] 1084 │ ├── stats: [rows=0] 1085 │ ├── key: () 1086 │ └── fd: ()-->(1) 1087 └── filters (true) 1088 1089 norm 1090 SELECT * FROM (SELECT 1) FULL JOIN (VALUES (1), (2)) ON true 1091 ---- 1092 inner-join (cross) 1093 ├── columns: "?column?":1(int!null) column1:2(int!null) 1094 ├── cardinality: [2 - 2] 1095 ├── stats: [rows=2] 1096 ├── fd: ()-->(1) 1097 ├── values 1098 │ ├── columns: column1:2(int!null) 1099 │ ├── cardinality: [2 - 2] 1100 │ ├── stats: [rows=2] 1101 │ ├── (1,) [type=tuple{int}] 1102 │ └── (2,) [type=tuple{int}] 1103 ├── values 1104 │ ├── columns: "?column?":1(int!null) 1105 │ ├── cardinality: [1 - 1] 1106 │ ├── stats: [rows=1] 1107 │ ├── key: () 1108 │ ├── fd: ()-->(1) 1109 │ └── (1,) [type=tuple{int}] 1110 └── filters (true) 1111 1112 exec-ddl 1113 CREATE TABLE table0 ( 1114 col0 INT4, 1115 col1 BOOL NULL, 1116 col2 BIT(40) NOT NULL 1117 ) 1118 ---- 1119 1120 exec-ddl 1121 CREATE TABLE table1 ( 1122 col0 BIT(23) NULL, 1123 col1 INET NULL 1124 ) 1125 ---- 1126 1127 # Regression test for #38091. 1128 norm 1129 SELECT ( 1130 SELECT 1 1131 FROM table1 1132 LEFT JOIN table1 AS t1 1133 INNER JOIN table0 ON false ON t0.col1 1134 ) 1135 FROM table0 AS t0 1136 ---- 1137 project 1138 ├── columns: "?column?":16(int) 1139 ├── stats: [rows=1000000] 1140 ├── ensure-distinct-on 1141 │ ├── columns: t0.rowid:4(int!null) "?column?":15(int) 1142 │ ├── grouping columns: t0.rowid:4(int!null) 1143 │ ├── error: "more than one row returned by a subquery used as an expression" 1144 │ ├── stats: [rows=1000000] 1145 │ ├── key: (4) 1146 │ ├── fd: (4)-->(15) 1147 │ ├── left-join-apply 1148 │ │ ├── columns: t0.col1:2(bool) t0.rowid:4(int!null) "?column?":15(int) 1149 │ │ ├── stats: [rows=1000000] 1150 │ │ ├── fd: (4)-->(2) 1151 │ │ ├── scan t0 1152 │ │ │ ├── columns: t0.col1:2(bool) t0.rowid:4(int!null) 1153 │ │ │ ├── stats: [rows=1000] 1154 │ │ │ ├── key: (4) 1155 │ │ │ └── fd: (4)-->(2) 1156 │ │ ├── project 1157 │ │ │ ├── columns: "?column?":15(int!null) 1158 │ │ │ ├── outer: (2) 1159 │ │ │ ├── stats: [rows=1000] 1160 │ │ │ ├── fd: ()-->(15) 1161 │ │ │ ├── left-join (cross) 1162 │ │ │ │ ├── outer: (2) 1163 │ │ │ │ ├── stats: [rows=1000, distinct(2)=1, null(2)=0] 1164 │ │ │ │ ├── scan table1 1165 │ │ │ │ │ └── stats: [rows=1000] 1166 │ │ │ │ ├── values 1167 │ │ │ │ │ ├── cardinality: [0 - 0] 1168 │ │ │ │ │ ├── stats: [rows=0] 1169 │ │ │ │ │ └── key: () 1170 │ │ │ │ └── filters 1171 │ │ │ │ └── t0.col1:2 [type=bool, outer=(2), constraints=(/2: [/true - /true]; tight), fd=()-->(2)] 1172 │ │ │ └── projections 1173 │ │ │ └── 1 [as="?column?":15, type=int] 1174 │ │ └── filters (true) 1175 │ └── aggregations 1176 │ └── const-agg [as="?column?":15, type=int, outer=(15)] 1177 │ └── "?column?":15 [type=int] 1178 └── projections 1179 └── "?column?":15 [as="?column?":16, type=int, outer=(15)] 1180 1181 norm colstat=1 colstat=2 1182 SELECT * FROM (SELECT 1) AS a(x) LEFT JOIN (SELECT 2) AS b(x) ON a.x = b.x 1183 ---- 1184 left-join (cross) 1185 ├── columns: x:1(int!null) x:2(int) 1186 ├── cardinality: [1 - 1] 1187 ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=1] 1188 ├── key: () 1189 ├── fd: ()-->(1,2) 1190 ├── values 1191 │ ├── columns: "?column?":1(int!null) 1192 │ ├── cardinality: [1 - 1] 1193 │ ├── stats: [rows=1, distinct(1)=1, null(1)=0] 1194 │ ├── key: () 1195 │ ├── fd: ()-->(1) 1196 │ └── (1,) [type=tuple{int}] 1197 ├── values 1198 │ ├── columns: "?column?":2(int!null) 1199 │ ├── cardinality: [0 - 0] 1200 │ ├── stats: [rows=0, distinct(2)=0, null(2)=0] 1201 │ ├── key: () 1202 │ └── fd: ()-->(2) 1203 └── filters (true) 1204 1205 exec-ddl 1206 CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, b)); 1207 ---- 1208 1209 exec-ddl 1210 CREATE TABLE def (d INT, e INT, f INT, PRIMARY KEY (d, e)); 1211 ---- 1212 1213 exec-ddl 1214 ALTER TABLE abc INJECT STATISTICS '[ 1215 { 1216 "columns": ["a"], 1217 "created_at": "2018-01-01 1:00:00.00000+00:00", 1218 "row_count": 100, 1219 "distinct_count": 100 1220 }, 1221 { 1222 "columns": ["b"], 1223 "created_at": "2018-01-01 1:00:00.00000+00:00", 1224 "row_count": 100, 1225 "distinct_count": 10 1226 } 1227 ]' 1228 ---- 1229 1230 exec-ddl 1231 ALTER TABLE def INJECT STATISTICS '[ 1232 { 1233 "columns": ["d"], 1234 "created_at": "2018-01-01 1:00:00.00000+00:00", 1235 "row_count": 10000, 1236 "distinct_count": 10000 1237 }, 1238 { 1239 "columns": ["e"], 1240 "created_at": "2018-01-01 1:00:00.00000+00:00", 1241 "row_count": 10000, 1242 "distinct_count": 10000 1243 } 1244 ]' 1245 ---- 1246 1247 # TODO(rytaft): The cardinality estimates here are unrealistically low. 1248 # Maybe revisit this? I doubt there's anything we can do there though. 1249 expr format=show-all colstat=5 colstat=6 colstat=(5, 6) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3) 1250 (MakeLookupJoin 1251 (Scan [ (Table "abc") (Cols "a,b,c") ]) 1252 [ (JoinType "inner-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,f,e") ] 1253 [ ] 1254 ) 1255 ---- 1256 inner-join (lookup def) 1257 ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) t.public.def.d:4(int!null) t.public.def.e:5(int!null) t.public.def.f:6(int) 1258 ├── key columns: [1 2] = [4 5] 1259 ├── stats: [rows=0.01, distinct(1)=0.01, null(1)=0, distinct(2)=0.01, null(2)=0, distinct(3)=0.00999500175, null(3)=0.0001, distinct(4)=0.01, null(4)=0, distinct(5)=0.01, null(5)=0, distinct(6)=0.00999995009, null(6)=0.0001, distinct(5,6)=0.00999999509, null(5,6)=0, distinct(1-3)=0.0099995001, null(1-3)=0] 1260 ├── cost: 2106.0506 1261 ├── key: (4,5) 1262 ├── fd: (1,2)-->(3), (4,5)-->(6), (1)==(4), (4)==(1), (2)==(5), (5)==(2) 1263 ├── interesting orderings: (+1,+2) 1264 ├── scan t.public.abc 1265 │ ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1266 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(1-3)=100, null(1-3)=0] 1267 │ ├── cost: 106.02 1268 │ ├── key: (1,2) 1269 │ ├── fd: (1,2)-->(3) 1270 │ ├── prune: (1-3) 1271 │ └── interesting orderings: (+1,+2) 1272 └── filters (true) 1273 1274 # TODO(rytaft): The cardinality estimates for the semi-join are the same as the table. 1275 # The semi-join currently ignores the selectivities of the filters in the On condition. 1276 # We should fix this. 1277 expr format=show-all colstat=5 colstat=6 colstat=(5, 6) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3) 1278 (MakeLookupJoin 1279 (Scan [ (Table "abc") (Cols "a,b,c") ]) 1280 [ (JoinType "semi-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ] 1281 [ ] 1282 ) 1283 ---- 1284 semi-join (lookup def) 1285 ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1286 ├── key columns: [1 2] = [4 5] 1287 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(5)=1, null(5)=0, distinct(6)=1, null(6)=0, distinct(5,6)=1, null(5,6)=0, distinct(1-3)=100, null(1-3)=0] 1288 ├── cost: 2106.0506 1289 ├── key: (1,2) 1290 ├── fd: (1,2)-->(3) 1291 ├── interesting orderings: (+1,+2) 1292 ├── scan t.public.abc 1293 │ ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1294 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(1-3)=100, null(1-3)=0] 1295 │ ├── cost: 106.02 1296 │ ├── key: (1,2) 1297 │ ├── fd: (1,2)-->(3) 1298 │ ├── prune: (1-3) 1299 │ └── interesting orderings: (+1,+2) 1300 └── filters (true) 1301 1302 expr format=show-all colstat=5 colstat=6 colstat=(5, 6) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3) 1303 (MakeLookupJoin 1304 (Scan [ (Table "abc") (Cols "a,b,c") ]) 1305 [ (JoinType "anti-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ] 1306 [ ] 1307 ) 1308 ---- 1309 anti-join (lookup def) 1310 ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1311 ├── key columns: [1 2] = [4 5] 1312 ├── stats: [rows=1e-10, distinct(1)=1e-10, null(1)=0, distinct(2)=1e-10, null(2)=0, distinct(3)=1e-10, null(3)=1e-10, distinct(5)=1e-10, null(5)=0, distinct(6)=1e-10, null(6)=0, distinct(5,6)=1e-10, null(5,6)=0, distinct(1-3)=1e-10, null(1-3)=0] 1313 ├── cost: 2106.0506 1314 ├── key: (1,2) 1315 ├── fd: (1,2)-->(3) 1316 ├── interesting orderings: (+1,+2) 1317 ├── scan t.public.abc 1318 │ ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1319 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(1-3)=100, null(1-3)=0] 1320 │ ├── cost: 106.02 1321 │ ├── key: (1,2) 1322 │ ├── fd: (1,2)-->(3) 1323 │ ├── prune: (1-3) 1324 │ └── interesting orderings: (+1,+2) 1325 └── filters (true) 1326 1327 expr format=show-all colstat=5 colstat=6 colstat=(5, 6) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3) 1328 (MakeLookupJoin 1329 (Scan [ (Table "abc") (Cols "a,b,c") ]) 1330 [ (JoinType "semi-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ] 1331 [ (False) ] 1332 ) 1333 ---- 1334 semi-join (lookup def) 1335 ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1336 ├── key columns: [1 2] = [4 5] 1337 ├── stats: [rows=0, distinct(1)=0, null(1)=0, distinct(2)=0, null(2)=0, distinct(3)=0, null(3)=0, distinct(5)=0, null(5)=0, distinct(6)=0, null(6)=0, distinct(5,6)=0, null(5,6)=0, distinct(1-3)=0, null(1-3)=0] 1338 ├── cost: 2106.0606 1339 ├── key: (1,2) 1340 ├── fd: (1,2)-->(3) 1341 ├── interesting orderings: (+1,+2) 1342 ├── scan t.public.abc 1343 │ ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1344 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(1-3)=100, null(1-3)=0] 1345 │ ├── cost: 106.02 1346 │ ├── key: (1,2) 1347 │ ├── fd: (1,2)-->(3) 1348 │ ├── prune: (1-3) 1349 │ └── interesting orderings: (+1,+2) 1350 └── filters 1351 └── false [type=bool] 1352 1353 expr format=show-all colstat=5 colstat=6 colstat=(5, 6) colstat=1 colstat=2 colstat=3 colstat=(1, 2, 3) 1354 (MakeLookupJoin 1355 (Scan [ (Table "abc") (Cols "a,b,c") ]) 1356 [ (JoinType "anti-join") (Table "def") (Index "def@primary") (KeyCols "a,b") (Cols "a,b,c,d,e,f") ] 1357 [ (False) ] 1358 ) 1359 ---- 1360 anti-join (lookup def) 1361 ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1362 ├── key columns: [1 2] = [4 5] 1363 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(5)=1, null(5)=0, distinct(6)=1, null(6)=0, distinct(5,6)=1, null(5,6)=0, distinct(1-3)=100, null(1-3)=0] 1364 ├── cost: 2106.0606 1365 ├── key: (1,2) 1366 ├── fd: (1,2)-->(3) 1367 ├── interesting orderings: (+1,+2) 1368 ├── scan t.public.abc 1369 │ ├── columns: t.public.abc.a:1(int!null) t.public.abc.b:2(int!null) t.public.abc.c:3(int) 1370 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=10, null(3)=1, distinct(1-3)=100, null(1-3)=0] 1371 │ ├── cost: 106.02 1372 │ ├── key: (1,2) 1373 │ ├── fd: (1,2)-->(3) 1374 │ ├── prune: (1-3) 1375 │ └── interesting orderings: (+1,+2) 1376 └── filters 1377 └── false [type=bool] 1378 1379 # Regression test for #40460. 1380 opt 1381 SELECT 1382 * 1383 FROM 1384 abc 1385 FULL JOIN (SELECT * FROM abc WHERE false) ON 1386 false 1387 IS NOT DISTINCT FROM not_like_escape( 1388 '', 1389 NULL::STRING, 1390 (SELECT NULL)::STRING 1391 ); 1392 ---- 1393 full-join (cross) 1394 ├── columns: a:1(int) b:2(int) c:3(int) a:4(int) b:5(int) c:6(int) 1395 ├── immutable 1396 ├── stats: [rows=100] 1397 ├── key: (1,2) 1398 ├── fd: (1,2)-->(3-6) 1399 ├── scan abc 1400 │ ├── columns: a:1(int!null) b:2(int!null) c:3(int) 1401 │ ├── stats: [rows=100] 1402 │ ├── key: (1,2) 1403 │ └── fd: (1,2)-->(3) 1404 ├── values 1405 │ ├── columns: a:4(int!null) b:5(int!null) c:6(int!null) 1406 │ ├── cardinality: [0 - 0] 1407 │ ├── stats: [rows=0] 1408 │ ├── key: () 1409 │ └── fd: ()-->(4-6) 1410 └── filters 1411 └── is [type=bool, immutable, subquery] 1412 ├── function: not_like_escape [type=bool] 1413 │ ├── '' [type=string] 1414 │ ├── CAST(NULL AS STRING) [type=string] 1415 │ └── cast: STRING [type=string] 1416 │ └── subquery [type=unknown] 1417 │ └── values 1418 │ ├── columns: "?column?":7(unknown) 1419 │ ├── cardinality: [1 - 1] 1420 │ ├── stats: [rows=1] 1421 │ ├── key: () 1422 │ ├── fd: ()-->(7) 1423 │ └── (NULL,) [type=tuple{unknown}] 1424 └── false [type=bool] 1425 1426 expr 1427 (SemiJoin 1428 (Values 1429 [ (Tuple [ (Const 1 "int") (Const 2 "int") ] "tuple{int}" ) ] 1430 [ (Cols [ (NewColumn "a" "int") (NewColumn "b" "int") ]) ] 1431 ) 1432 (Scan [ (Table "uv") (Cols "u,v,rowid") ]) 1433 [] 1434 [] 1435 ) 1436 ---- 1437 semi-join (cross) 1438 ├── columns: a:1(int!null) b:2(int!null) 1439 ├── cardinality: [0 - 1] 1440 ├── stats: [rows=1] 1441 ├── key: () 1442 ├── fd: ()-->(1,2) 1443 ├── values 1444 │ ├── columns: a:1(int!null) b:2(int!null) 1445 │ ├── cardinality: [1 - 1] 1446 │ ├── stats: [rows=1] 1447 │ ├── key: () 1448 │ ├── fd: ()-->(1,2) 1449 │ └── (1, 2) [type=tuple{int}] 1450 ├── scan uv 1451 │ ├── columns: u:3(int) v:4(int!null) rowid:5(int!null) 1452 │ ├── stats: [rows=10000] 1453 │ ├── key: (5) 1454 │ └── fd: (5)-->(3,4) 1455 └── filters (true) 1456 1457 expr 1458 (AntiJoin 1459 (Values 1460 [ (Tuple [ (Const 1 "int") (Const 2 "int") ] "tuple{int}" ) ] 1461 [ (Cols [ (NewColumn "a" "int") (NewColumn "b" "int") ]) ] 1462 ) 1463 (Scan [ (Table "uv") (Cols "u,v,rowid") ]) 1464 [] 1465 [] 1466 ) 1467 ---- 1468 anti-join (cross) 1469 ├── columns: a:1(int!null) b:2(int!null) 1470 ├── cardinality: [0 - 1] 1471 ├── stats: [rows=1e-10] 1472 ├── key: () 1473 ├── fd: ()-->(1,2) 1474 ├── values 1475 │ ├── columns: a:1(int!null) b:2(int!null) 1476 │ ├── cardinality: [1 - 1] 1477 │ ├── stats: [rows=1] 1478 │ ├── key: () 1479 │ ├── fd: ()-->(1,2) 1480 │ └── (1, 2) [type=tuple{int}] 1481 ├── scan uv 1482 │ ├── columns: u:3(int) v:4(int!null) rowid:5(int!null) 1483 │ ├── stats: [rows=10000] 1484 │ ├── key: (5) 1485 │ └── fd: (5)-->(3,4) 1486 └── filters (true) 1487 1488 exec-ddl 1489 ALTER TABLE xysd INJECT STATISTICS '[ 1490 { 1491 "columns": ["x"], 1492 "created_at": "2018-01-01 1:00:00.00000+00:00", 1493 "row_count": 5000, 1494 "distinct_count": 5000 1495 }, 1496 { 1497 "columns": ["y"], 1498 "created_at": "2018-01-01 1:30:00.00000+00:00", 1499 "row_count": 5000, 1500 "distinct_count": 400 1501 }, 1502 { 1503 "columns": ["s"], 1504 "created_at": "2018-01-01 1:30:00.00000+00:00", 1505 "row_count": 5000, 1506 "distinct_count": 10 1507 } 1508 ]' 1509 ---- 1510 1511 exec-ddl 1512 ALTER TABLE uv INJECT STATISTICS '[ 1513 { 1514 "columns": ["u"], 1515 "created_at": "2018-01-01 1:00:00.00000+00:00", 1516 "row_count": 10000, 1517 "distinct_count": 500 1518 }, 1519 { 1520 "columns": ["v"], 1521 "created_at": "2018-01-01 1:30:00.00000+00:00", 1522 "row_count": 10000, 1523 "distinct_count": 100 1524 }, 1525 { 1526 "columns": ["u","v"], 1527 "created_at": "2018-01-01 1:30:00.00000+00:00", 1528 "row_count": 10000, 1529 "distinct_count": 550 1530 }, 1531 { 1532 "columns": ["rowid"], 1533 "created_at": "2018-01-01 1:30:00.00000+00:00", 1534 "row_count": 10000, 1535 "distinct_count": 10000 1536 } 1537 ]' 1538 ---- 1539 1540 # We use multi-column stats split across the join to estimate the selectivity 1541 # here. 1542 opt 1543 SELECT * FROM xysd, uv WHERE (s = 'foo' AND u = 3 AND v = 4) OR (s = 'bar' AND u = 5 AND v = 6) 1544 ---- 1545 inner-join (cross) 1546 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) u:5(int!null) v:6(int!null) 1547 ├── stats: [rows=11979.6897, distinct(3)=2, null(3)=0, distinct(5)=2, null(5)=0, distinct(6)=2, null(6)=0, distinct(5,6)=2.19138756, null(5,6)=0] 1548 ├── fd: (1)-->(2-4), (3,4)-->(1,2) 1549 ├── scan uv 1550 │ ├── columns: u:5(int) v:6(int!null) 1551 │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0, distinct(5,6)=550, null(5,6)=0] 1552 ├── scan xysd 1553 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 1554 │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(3)=10, null(3)=0, distinct(4)=500, null(4)=0] 1555 │ ├── key: (1) 1556 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 1557 └── filters 1558 └── (((s:3 = 'foo') AND (u:5 = 3)) AND (v:6 = 4)) OR (((s:3 = 'bar') AND (u:5 = 5)) AND (v:6 = 6)) [type=bool, outer=(3,5,6), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; /5: [/3 - /3] [/5 - /5]; /6: [/4 - /4] [/6 - /6])]