github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/lookup-join (about) 1 exec-ddl 2 CREATE TABLE abcd (a INT, b INT, c INT, INDEX (a,b)) 3 ---- 4 5 exec-ddl 6 CREATE TABLE small (m INT, n INT) 7 ---- 8 9 exec-ddl 10 ALTER TABLE small INJECT STATISTICS '[ 11 { 12 "columns": ["m"], 13 "created_at": "2018-01-01 1:00:00.00000+00:00", 14 "row_count": 10, 15 "distinct_count": 10 16 } 17 ]' 18 ---- 19 20 # We can only test lookup stat generation when using non-covering indexes 21 # (that's when we create a group with LookupJoin). We can compare the 22 # statistics with the top-level join, they should be in the same ballpark. 23 24 opt 25 SELECT * FROM small JOIN abcd ON a=m 26 ---- 27 inner-join (lookup abcd) 28 ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) c:6(int) 29 ├── key columns: [7] = [7] 30 ├── lookup columns are key 31 ├── stats: [rows=99, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0] 32 ├── fd: (1)==(4), (4)==(1) 33 ├── inner-join (lookup abcd@secondary) 34 │ ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) abcd.rowid:7(int!null) 35 │ ├── key columns: [1] = [4] 36 │ ├── stats: [rows=99, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0] 37 │ ├── fd: (7)-->(4,5), (1)==(4), (4)==(1) 38 │ ├── scan small 39 │ │ ├── columns: m:1(int) n:2(int) 40 │ │ └── stats: [rows=10, distinct(1)=10, null(1)=0] 41 │ └── filters (true) 42 └── filters (true) 43 44 # Filter that gets pushed down on both sides, but comes back into the ON 45 # condition for the lookup side. 46 opt 47 SELECT * FROM small JOIN abcd ON a=m WHERE n > 2 48 ---- 49 inner-join (lookup abcd) 50 ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int) c:6(int) 51 ├── key columns: [7] = [7] 52 ├── lookup columns are key 53 ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(4)=9.9, null(4)=0] 54 ├── fd: (1)==(4), (4)==(1) 55 ├── inner-join (lookup abcd@secondary) 56 │ ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int) abcd.rowid:7(int!null) 57 │ ├── key columns: [1] = [4] 58 │ ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(4)=9.9, null(4)=0] 59 │ ├── fd: (7)-->(4,5), (1)==(4), (4)==(1) 60 │ ├── select 61 │ │ ├── columns: m:1(int) n:2(int!null) 62 │ │ ├── stats: [rows=9.9, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0] 63 │ │ ├── scan small 64 │ │ │ ├── columns: m:1(int) n:2(int) 65 │ │ │ └── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(2)=1, null(2)=0.1] 66 │ │ └── filters 67 │ │ └── n:2 > 2 [type=bool, outer=(2), constraints=(/2: [/3 - ]; tight)] 68 │ └── filters (true) 69 └── filters (true) 70 71 # Filter that applies to the right side and gets pulled back into the ON 72 # condition. 73 opt 74 SELECT * FROM small JOIN abcd ON a=m WHERE b > 2 75 ---- 76 inner-join (lookup abcd) 77 ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int!null) c:6(int) 78 ├── key columns: [7] = [7] 79 ├── lookup columns are key 80 ├── stats: [rows=33.5823697, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0] 81 ├── fd: (1)==(4), (4)==(1) 82 ├── inner-join (lookup abcd@secondary) 83 │ ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int!null) abcd.rowid:7(int!null) 84 │ ├── key columns: [1] = [4] 85 │ ├── stats: [rows=33, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0, distinct(5)=33, null(5)=0] 86 │ ├── fd: (7)-->(4,5), (1)==(4), (4)==(1) 87 │ ├── scan small 88 │ │ ├── columns: m:1(int) n:2(int) 89 │ │ └── stats: [rows=10, distinct(1)=10, null(1)=0] 90 │ └── filters 91 │ └── b:5 > 2 [type=bool, outer=(5), constraints=(/5: [/3 - ]; tight)] 92 └── filters (true) 93 94 # Filter that can only be applied after the primary index join. 95 opt 96 SELECT * FROM small JOIN abcd ON a=m WHERE c>2 97 ---- 98 inner-join (lookup abcd) 99 ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) c:6(int!null) 100 ├── key columns: [7] = [7] 101 ├── lookup columns are key 102 ├── stats: [rows=33.5823697, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0] 103 ├── fd: (1)==(4), (4)==(1) 104 ├── inner-join (lookup abcd@secondary) 105 │ ├── columns: m:1(int!null) n:2(int) a:4(int!null) b:5(int) abcd.rowid:7(int!null) 106 │ ├── key columns: [1] = [4] 107 │ ├── stats: [rows=99, distinct(1)=10, null(1)=0, distinct(4)=10, null(4)=0] 108 │ ├── fd: (7)-->(4,5), (1)==(4), (4)==(1) 109 │ ├── scan small 110 │ │ ├── columns: m:1(int) n:2(int) 111 │ │ └── stats: [rows=10, distinct(1)=10, null(1)=0] 112 │ └── filters (true) 113 └── filters 114 └── c:6 > 2 [type=bool, outer=(6), constraints=(/6: [/3 - ]; tight)] 115 116 # Multiple equalities. 117 opt 118 SELECT * FROM small JOIN abcd ON a=m AND b=n WHERE c>2 119 ---- 120 inner-join (lookup abcd) 121 ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int!null) c:6(int!null) 122 ├── key columns: [7] = [7] 123 ├── lookup columns are key 124 ├── stats: [rows=0.334949339, distinct(1)=0.334949339, null(1)=0, distinct(2)=1e-10, null(2)=0, distinct(4)=0.334949339, null(4)=0, distinct(5)=1e-10, null(5)=0] 125 ├── fd: (1)==(4), (4)==(1), (2)==(5), (5)==(2) 126 ├── inner-join (lookup abcd@secondary) 127 │ ├── columns: m:1(int!null) n:2(int!null) a:4(int!null) b:5(int!null) abcd.rowid:7(int!null) 128 │ ├── key columns: [1 2] = [4 5] 129 │ ├── stats: [rows=0.970299, distinct(1)=0.970299, null(1)=0, distinct(2)=1e-10, null(2)=0, distinct(4)=0.970299, null(4)=0, distinct(5)=1e-10, null(5)=0] 130 │ ├── fd: (7)-->(4,5), (1)==(4), (4)==(1), (2)==(5), (5)==(2) 131 │ ├── scan small 132 │ │ ├── columns: m:1(int) n:2(int) 133 │ │ └── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(2)=1, null(2)=0.1] 134 │ └── filters (true) 135 └── filters 136 └── c:6 > 2 [type=bool, outer=(6), constraints=(/6: [/3 - ]; tight)] 137 138 exec-ddl 139 CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, c)) 140 ---- 141 142 exec-ddl 143 CREATE TABLE def (d INT, e INT, f INT, g FLOAT, PRIMARY KEY (f, e), INDEX e_idx (e) STORING (d), INDEX d_idx (d)) 144 ---- 145 146 # Set up the statistics as if the first table is much smaller than the second. 147 exec-ddl 148 ALTER TABLE abc INJECT STATISTICS '[ 149 { 150 "columns": ["a"], 151 "created_at": "2018-01-01 1:00:00.00000+00:00", 152 "row_count": 100, 153 "distinct_count": 100 154 } 155 ]' 156 ---- 157 158 exec-ddl 159 ALTER TABLE def INJECT STATISTICS '[ 160 { 161 "columns": ["d"], 162 "created_at": "2018-01-01 1:00:00.00000+00:00", 163 "row_count": 10000, 164 "distinct_count": 1000 165 }, 166 { 167 "columns": ["e"], 168 "created_at": "2018-01-01 1:00:00.00000+00:00", 169 "row_count": 10000, 170 "distinct_count": 100 171 }, 172 { 173 "columns": ["f"], 174 "created_at": "2018-01-01 1:00:00.00000+00:00", 175 "row_count": 10000, 176 "distinct_count": 10000 177 } 178 ]' 179 ---- 180 181 # The filter a=f is selective, so we expect a lookup join. 182 opt 183 SELECT a, b, c, d, e, f FROM abc JOIN def ON a = f 184 ---- 185 inner-join (lookup def) 186 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int!null) f:6(int!null) 187 ├── key columns: [1] = [6] 188 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(6)=100, null(6)=0] 189 ├── key: (3,5,6) 190 ├── fd: (1,3)-->(2), (5,6)-->(4), (1)==(6), (6)==(1) 191 ├── scan abc 192 │ ├── columns: a:1(int!null) b:2(int) c:3(int!null) 193 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0] 194 │ ├── key: (1,3) 195 │ └── fd: (1,3)-->(2) 196 └── filters (true) 197 198 # The filter a=e is not very selective, so we do not expect a lookup join. 199 opt 200 SELECT a, b, c, d, e, f FROM abc JOIN def ON a = e 201 ---- 202 inner-join (merge) 203 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int!null) f:6(int!null) 204 ├── left ordering: +1 205 ├── right ordering: +5 206 ├── stats: [rows=10000, distinct(1)=100, null(1)=0, distinct(5)=100, null(5)=0] 207 ├── key: (3,5,6) 208 ├── fd: (1,3)-->(2), (5,6)-->(4), (1)==(5), (5)==(1) 209 ├── scan abc 210 │ ├── columns: a:1(int!null) b:2(int) c:3(int!null) 211 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0] 212 │ ├── key: (1,3) 213 │ ├── fd: (1,3)-->(2) 214 │ └── ordering: +1 215 ├── scan def@e_idx 216 │ ├── columns: d:4(int) e:5(int!null) f:6(int!null) 217 │ ├── stats: [rows=10000, distinct(5)=100, null(5)=0, distinct(6)=10000, null(6)=0] 218 │ ├── key: (5,6) 219 │ ├── fd: (5,6)-->(4) 220 │ └── ordering: +5 221 └── filters (true) 222 223 # Check column statistics for lookup join. 224 opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=5 colstat=6 colstat=(2,5,6) 225 SELECT a, b, c, d, e, f FROM abc JOIN DEF ON a = f 226 ---- 227 inner-join (lookup def) 228 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int!null) f:6(int!null) 229 ├── key columns: [1] = [6] 230 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=9.99954623, null(2)=1, distinct(3)=9.99954623, null(3)=0, distinct(4)=95.1671064, null(4)=0, distinct(5)=63.2138954, null(5)=0, distinct(6)=100, null(6)=0, distinct(2,5,6)=100, null(2,5,6)=0] 231 ├── key: (3,5,6) 232 ├── fd: (1,3)-->(2), (5,6)-->(4), (1)==(6), (6)==(1) 233 ├── scan abc 234 │ ├── columns: a:1(int!null) b:2(int) c:3(int!null) 235 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1, distinct(3)=10, null(3)=0] 236 │ ├── key: (1,3) 237 │ └── fd: (1,3)-->(2) 238 └── filters (true) 239 240 # Check column statistics for double lookup join. 241 opt colstat=7 242 SELECT * FROM abc LEFT JOIN DEF ON a = d AND b = 3 243 ---- 244 left-join (lookup def) 245 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int) f:6(int) g:7(float) 246 ├── key columns: [6 5] = [6 5] 247 ├── lookup columns are key 248 ├── stats: [rows=100, distinct(4)=10, null(4)=90, distinct(7)=9.95021575, null(7)=91] 249 ├── key: (1,3,5,6) 250 ├── fd: (1,3)-->(2), (5,6)-->(4,7) 251 ├── left-join (lookup def@d_idx) 252 │ ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int) f:6(int) 253 │ ├── key columns: [1] = [4] 254 │ ├── stats: [rows=100, distinct(4)=10, null(4)=90] 255 │ ├── key: (1,3,5,6) 256 │ ├── fd: (1,3)-->(2), (5,6)-->(4) 257 │ ├── scan abc 258 │ │ ├── columns: a:1(int!null) b:2(int) c:3(int!null) 259 │ │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1, distinct(3)=10, null(3)=0] 260 │ │ ├── key: (1,3) 261 │ │ └── fd: (1,3)-->(2) 262 │ └── filters 263 │ └── b:2 = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)] 264 └── filters (true) 265 266 # The filter a=e is not very selective, so we do not expect a lookup join, even 267 # though there is an additional filter. 268 opt colstat=7 269 SELECT * FROM abc LEFT JOIN DEF ON a = e AND b = 3 270 ---- 271 right-join (hash) 272 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:4(int) e:5(int) f:6(int) g:7(float) 273 ├── stats: [rows=100, distinct(5)=100, null(5)=0, distinct(7)=95.1671064, null(7)=1] 274 ├── key: (1,3,5,6) 275 ├── fd: (1,3)-->(2), (5,6)-->(4,7) 276 ├── scan def 277 │ ├── columns: d:4(int) e:5(int!null) f:6(int!null) g:7(float) 278 │ ├── stats: [rows=10000, distinct(5)=100, null(5)=0, distinct(7)=1000, null(7)=100] 279 │ ├── key: (5,6) 280 │ └── fd: (5,6)-->(4,7) 281 ├── scan abc 282 │ ├── columns: a:1(int!null) b:2(int) c:3(int!null) 283 │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1, distinct(3)=10, null(3)=0] 284 │ ├── key: (1,3) 285 │ └── fd: (1,3)-->(2) 286 └── filters 287 ├── a:1 = e:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] 288 └── b:2 = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)] 289 290 exec-ddl 291 CREATE TABLE t (x INT, y INT, INDEX x_idx (x) STORING (y), INDEX y_idx (y) STORING (x), INDEX xy_idx (x, y)) 292 ---- 293 294 exec-ddl 295 CREATE TABLE u (x INT, y INT, INDEX x_idx (x) STORING (y), INDEX y_idx (y) STORING (x), INDEX xy_idx (x, y)) 296 ---- 297 298 exec-ddl 299 ALTER TABLE t INJECT STATISTICS '[ 300 { 301 "columns": ["x"], 302 "created_at": "2018-01-01 1:00:00.00000+00:00", 303 "row_count": 1000, 304 "distinct_count": 10 305 }, 306 { 307 "columns": ["y"], 308 "created_at": "2018-01-01 1:00:00.00000+00:00", 309 "row_count": 1000, 310 "distinct_count": 10 311 } 312 ]' 313 ---- 314 315 exec-ddl 316 ALTER TABLE u INJECT STATISTICS '[ 317 { 318 "columns": ["x"], 319 "created_at": "2018-01-01 1:00:00.00000+00:00", 320 "row_count": 10, 321 "distinct_count": 2 322 }, 323 { 324 "columns": ["y"], 325 "created_at": "2018-01-01 1:00:00.00000+00:00", 326 "row_count": 10, 327 "distinct_count": 2 328 } 329 ]' 330 ---- 331 332 # Test that the correct index is used for the lookup join. 333 opt 334 SELECT * FROM u WHERE EXISTS (SELECT * FROM t WHERE u.x=t.x AND u.y=t.y); 335 ---- 336 semi-join (lookup t@xy_idx) 337 ├── columns: x:1(int) y:2(int) 338 ├── key columns: [1 2] = [4 5] 339 ├── stats: [rows=10, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0] 340 ├── scan u 341 │ ├── columns: u.x:1(int) u.y:2(int) 342 │ └── stats: [rows=10, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0] 343 └── filters (true) 344 345 opt 346 SELECT * FROM u WHERE NOT EXISTS (SELECT * FROM t WHERE u.x=t.x AND u.y=t.y); 347 ---- 348 anti-join (lookup t@xy_idx) 349 ├── columns: x:1(int) y:2(int) 350 ├── key columns: [1 2] = [4 5] 351 ├── stats: [rows=1e-10] 352 ├── scan u 353 │ ├── columns: u.x:1(int) u.y:2(int) 354 │ └── stats: [rows=10, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0] 355 └── filters (true) 356 357 358 exec-ddl 359 CREATE TABLE medium (m INT, n INT) 360 ---- 361 362 exec-ddl 363 ALTER TABLE medium INJECT STATISTICS '[ 364 { 365 "columns": ["m"], 366 "created_at": "2018-01-01 1:00:00.00000+00:00", 367 "row_count": 40, 368 "distinct_count": 40 369 } 370 ]' 371 ---- 372 373 exec-ddl 374 CREATE TABLE wxyz (w INT, x INT, y INT, z INT, INDEX (x,y,z)) 375 ---- 376 377 exec-ddl 378 ALTER TABLE wxyz INJECT STATISTICS '[ 379 { 380 "columns": ["y"], 381 "created_at": "2018-01-01 1:00:00.00000+00:00", 382 "row_count": 1000, 383 "distinct_count": 11, 384 "histo_col_type": "int", 385 "histo_buckets": [ 386 {"num_eq": 50, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, 387 {"num_eq": 50, "num_range": 900, "distinct_range": 9, "upper_bound": "10"} 388 ] 389 } 390 ]' 391 ---- 392 393 # Choose the lookup join due to the highly selective constant column. 394 opt 395 SELECT * FROM medium INNER JOIN wxyz ON m=x AND y=10 396 ---- 397 inner-join (lookup wxyz) 398 ├── columns: m:1(int!null) n:2(int) w:4(int) x:5(int!null) y:6(int!null) z:7(int) 399 ├── key columns: [8] = [8] 400 ├── lookup columns are key 401 ├── stats: [rows=49.3441882, distinct(1)=39.1263061, null(1)=0, distinct(5)=39.1263061, null(5)=0] 402 ├── fd: ()-->(6), (1)==(5), (5)==(1) 403 ├── inner-join (lookup wxyz@secondary) 404 │ ├── columns: m:1(int!null) n:2(int) x:5(int!null) y:6(int!null) z:7(int) wxyz.rowid:8(int!null) 405 │ ├── key columns: [1 9] = [5 6] 406 │ ├── stats: [rows=19.8, distinct(1)=19.8, null(1)=0, distinct(5)=19.8, null(5)=0, distinct(6)=1, null(6)=0, distinct(9)=1, null(9)=0] 407 │ ├── fd: ()-->(6), (8)-->(5,7), (1)==(5), (5)==(1) 408 │ ├── project 409 │ │ ├── columns: "project_const_col_@6":9(int!null) m:1(int) n:2(int) 410 │ │ ├── stats: [rows=40, distinct(1)=40, null(1)=0, distinct(9)=1, null(9)=0] 411 │ │ ├── fd: ()-->(9) 412 │ │ ├── scan medium 413 │ │ │ ├── columns: m:1(int) n:2(int) 414 │ │ │ └── stats: [rows=40, distinct(1)=40, null(1)=0] 415 │ │ └── projections 416 │ │ └── 10 [as="project_const_col_@6":9, type=int] 417 │ └── filters (true) 418 └── filters (true) 419 420 exec-ddl 421 ALTER TABLE wxyz INJECT STATISTICS '[ 422 { 423 "columns": ["y"], 424 "created_at": "2018-01-01 1:00:00.00000+00:00", 425 "row_count": 1000, 426 "distinct_count": 11, 427 "histo_col_type": "int", 428 "histo_buckets": [ 429 {"num_eq": 10, "num_range": 0, "distinct_range": 0, "upper_bound": "0"}, 430 {"num_eq": 900, "num_range": 90, "distinct_range": 9, "upper_bound": "10"} 431 ] 432 } 433 ]' 434 ---- 435 436 # With a less selective constant column, the hash join should be chosen instead. 437 opt 438 SELECT * FROM medium INNER JOIN wxyz ON m=x AND y=10 439 ---- 440 inner-join (hash) 441 ├── columns: m:1(int!null) n:2(int) w:4(int) x:5(int!null) y:6(int!null) z:7(int) 442 ├── stats: [rows=356.4, distinct(1)=40, null(1)=0, distinct(5)=40, null(5)=0] 443 ├── fd: ()-->(6), (1)==(5), (5)==(1) 444 ├── select 445 │ ├── columns: w:4(int) x:5(int) y:6(int!null) z:7(int) 446 │ ├── stats: [rows=900, distinct(5)=100, null(5)=9, distinct(6)=1, null(6)=0] 447 │ │ histogram(6)= 0 900 448 │ │ <--- 10 449 │ ├── fd: ()-->(6) 450 │ ├── scan wxyz 451 │ │ ├── columns: w:4(int) x:5(int) y:6(int) z:7(int) 452 │ │ └── stats: [rows=1000, distinct(5)=100, null(5)=10, distinct(6)=11, null(6)=0] 453 │ │ histogram(6)= 0 10 90 900 454 │ │ <--- 0 ---- 10 455 │ └── filters 456 │ └── y:6 = 10 [type=bool, outer=(6), constraints=(/6: [/10 - /10]; tight), fd=()-->(6)] 457 ├── scan medium 458 │ ├── columns: m:1(int) n:2(int) 459 │ └── stats: [rows=40, distinct(1)=40, null(1)=0] 460 └── filters 461 └── m:1 = x:5 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)]