github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/index-join (about) 1 exec-ddl 2 CREATE TABLE a (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d)) 3 ---- 4 5 exec-ddl 6 ALTER TABLE a INJECT STATISTICS '[ 7 { 8 "columns": ["x"], 9 "created_at": "2018-01-01 1:00:00.00000+00:00", 10 "row_count": 2000, 11 "distinct_count": 2000 12 }, 13 { 14 "columns": ["x","y"], 15 "created_at": "2018-01-01 1:30:00.00000+00:00", 16 "row_count": 2000, 17 "distinct_count": 2000 18 }, 19 { 20 "columns": ["s"], 21 "created_at": "2018-01-01 1:30:00.00000+00:00", 22 "row_count": 2000, 23 "distinct_count": 10 24 }, 25 { 26 "columns": ["y"], 27 "created_at": "2018-01-01 1:40:00.00000+00:00", 28 "row_count": 2000, 29 "distinct_count": 100 30 } 31 ]' 32 ---- 33 34 # In order to actually create new logical props for the index join, we 35 # need to call ConstructIndexJoin, which only happens when there is a 36 # remaining filter. 37 opt 38 SELECT count(*) FROM (SELECT * FROM a WHERE s = 'foo' AND x + y = 10) GROUP BY s, y 39 ---- 40 project 41 ├── columns: count:5(int!null) 42 ├── stats: [rows=49.2384513] 43 └── group-by 44 ├── columns: y:2(int) count_rows:5(int!null) 45 ├── grouping columns: y:2(int) 46 ├── stats: [rows=49.2384513, distinct(2)=49.2384513, null(2)=0] 47 ├── key: (2) 48 ├── fd: (2)-->(5) 49 ├── select 50 │ ├── columns: x:1(int!null) y:2(int) s:3(string!null) 51 │ ├── stats: [rows=66.6666667, distinct(2)=49.2384513, null(2)=0, distinct(3)=1, null(3)=0] 52 │ ├── key: (1) 53 │ ├── fd: ()-->(3), (1)-->(2) 54 │ ├── index-join a 55 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) 56 │ │ ├── stats: [rows=200] 57 │ │ ├── key: (1) 58 │ │ ├── fd: ()-->(3), (1)-->(2) 59 │ │ └── scan a@secondary 60 │ │ ├── columns: x:1(int!null) s:3(string!null) 61 │ │ ├── constraint: /-3/4: [/'foo' - /'foo'] 62 │ │ ├── stats: [rows=200, distinct(3)=1, null(3)=0] 63 │ │ ├── key: (1) 64 │ │ └── fd: ()-->(3) 65 │ └── filters 66 │ └── (x:1 + y:2) = 10 [type=bool, outer=(1,2)] 67 └── aggregations 68 └── count-rows [as=count_rows:5, type=int] 69 70 opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=(1,2,3) 71 SELECT * FROM a WHERE s = 'foo' AND x + y = 10 72 ---- 73 select 74 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 75 ├── stats: [rows=66.6666667, distinct(1)=66.6666667, null(1)=0, distinct(2)=49.2384513, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=57.5057212, null(4)=0, distinct(1-3)=66.6666667, null(1-3)=0] 76 ├── key: (1) 77 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2) 78 ├── index-join a 79 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 80 │ ├── stats: [rows=200] 81 │ ├── key: (1) 82 │ ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1), (3,4)~~>(1,2) 83 │ └── scan a@secondary 84 │ ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) 85 │ ├── constraint: /-3/4: [/'foo' - /'foo'] 86 │ ├── stats: [rows=200, distinct(3)=1, null(3)=0] 87 │ ├── key: (1) 88 │ └── fd: ()-->(3), (1)-->(4), (4)-->(1) 89 └── filters 90 └── (x:1 + y:2) = 10 [type=bool, outer=(1,2)] 91 92 opt colstat=1 colstat=2 colstat=3 colstat=(1,2,3) 93 SELECT * FROM a WHERE s = 'foo' 94 ---- 95 index-join a 96 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 97 ├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=87.8423345, null(2)=0, distinct(3)=1, null(3)=0, distinct(1-3)=200, null(1-3)=0] 98 ├── key: (1) 99 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2) 100 └── scan a@secondary 101 ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) 102 ├── constraint: /-3/4: [/'foo' - /'foo'] 103 ├── stats: [rows=200, distinct(3)=1, null(3)=0] 104 ├── key: (1) 105 └── fd: ()-->(3), (1)-->(4), (4)-->(1) 106 107 # Note that the row count of the index join does not match the row count of 108 # the scan, because the index join's row count was carried over from the 109 # normalized SELECT expression in its memo group (see next test case). 110 # In order to fix the row count, we need more precise constraint calculation 111 # for filters. 112 opt colstat=1 colstat=2 colstat=3 colstat=(2,3) colstat=(1,2,3) 113 SELECT * FROM a WHERE s = 'foo' OR s = 'bar' 114 ---- 115 index-join a 116 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 117 ├── stats: [rows=400, distinct(1)=400, null(1)=0, distinct(2)=98.8470785, null(2)=0, distinct(3)=2, null(3)=0, distinct(2,3)=197.694157, null(2,3)=0, distinct(1-3)=400, null(1-3)=0] 118 ├── key: (1) 119 ├── fd: (1)-->(2-4), (3,4)-->(1,2) 120 └── scan a@secondary 121 ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) 122 ├── constraint: /-3/4 123 │ ├── [/'foo' - /'foo'] 124 │ └── [/'bar' - /'bar'] 125 ├── stats: [rows=400, distinct(3)=2, null(3)=0] 126 ├── key: (1) 127 └── fd: (1)-->(3,4), (3,4)-->(1) 128 129 norm 130 SELECT * FROM a WHERE s = 'foo' OR s = 'bar' 131 ---- 132 select 133 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 134 ├── stats: [rows=400, distinct(3)=2, null(3)=0] 135 ├── key: (1) 136 ├── fd: (1)-->(2-4), (3,4)-->(1,2) 137 ├── scan a 138 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 139 │ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(3)=10, null(3)=0, distinct(4)=200, null(4)=0] 140 │ ├── key: (1) 141 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 142 └── filters 143 └── (s:3 = 'foo') OR (s:3 = 'bar') [type=bool, outer=(3), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; tight)] 144 145 # Bump up null counts. 146 exec-ddl 147 ALTER TABLE a INJECT STATISTICS '[ 148 { 149 "columns": ["x"], 150 "created_at": "2018-01-01 2:00:00.00000+00:00", 151 "row_count": 2000, 152 "distinct_count": 2000 153 }, 154 { 155 "columns": ["x","y"], 156 "created_at": "2018-01-01 2:00:00.00000+00:00", 157 "row_count": 2000, 158 "distinct_count": 2000, 159 "null_count": 1000 160 }, 161 { 162 "columns": ["s"], 163 "created_at": "2018-01-01 2:00:00.00000+00:00", 164 "row_count": 2000, 165 "distinct_count": 11, 166 "null_count": 1000 167 }, 168 { 169 "columns": ["y"], 170 "created_at": "2018-01-01 2:00:00.00000+00:00", 171 "row_count": 2000, 172 "distinct_count": 101, 173 "null_count": 1000 174 } 175 ]' 176 ---- 177 178 opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=(1,2,3) 179 SELECT * FROM a WHERE s = 'foo' AND x + y = 10 180 ---- 181 select 182 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 183 ├── stats: [rows=33.3333333, distinct(1)=33.3333333, null(1)=0, distinct(2)=28.5927601, null(2)=16.6666667, distinct(3)=1, null(3)=0, distinct(4)=30.9412676, null(4)=0, distinct(1-3)=33.3333333, null(1-3)=0] 184 ├── key: (1) 185 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2) 186 ├── index-join a 187 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 188 │ ├── stats: [rows=100] 189 │ ├── key: (1) 190 │ ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1), (3,4)~~>(1,2) 191 │ └── scan a@secondary 192 │ ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) 193 │ ├── constraint: /-3/4: [/'foo' - /'foo'] 194 │ ├── stats: [rows=100, distinct(3)=1, null(3)=0] 195 │ ├── key: (1) 196 │ └── fd: ()-->(3), (1)-->(4), (4)-->(1) 197 └── filters 198 └── (x:1 + y:2) = 10 [type=bool, outer=(1,2)] 199 200 opt colstat=1 colstat=2 colstat=3 colstat=(1,2,3) 201 SELECT * FROM a WHERE s = 'foo' 202 ---- 203 index-join a 204 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 205 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=64.4232893, null(2)=50, distinct(3)=1, null(3)=0, distinct(1-3)=100, null(1-3)=0] 206 ├── key: (1) 207 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2) 208 └── scan a@secondary 209 ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) 210 ├── constraint: /-3/4: [/'foo' - /'foo'] 211 ├── stats: [rows=100, distinct(3)=1, null(3)=0] 212 ├── key: (1) 213 └── fd: ()-->(3), (1)-->(4), (4)-->(1) 214 215 # Note that the row count of the index join does not match the row count of 216 # the scan, because the index join's row count was carried over from the 217 # normalized SELECT expression in its memo group (see next test case). 218 # In order to fix the row count, we need more precise constraint calculation 219 # for filters. 220 # Also note that we need to tack on an "s IS NOT NULL" clause to make s a 221 # non-null column in the logical properties because the constraint 222 # builder at the SELECT level cannot deduce whether an OR'd filter 223 # is null-rejecting or not. 224 opt colstat=1 colstat=2 colstat=3 colstat=(2,3) colstat=(1,2,3) 225 SELECT * FROM a WHERE (s = 'foo' OR s = 'bar') AND s IS NOT NULL 226 ---- 227 index-join a 228 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 229 ├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=88.4618791, null(2)=100, distinct(3)=2, null(3)=0, distinct(2,3)=176.923758, null(2,3)=0, distinct(1-3)=200, null(1-3)=0] 230 ├── key: (1) 231 ├── fd: (1)-->(2-4), (3,4)-->(1,2) 232 └── scan a@secondary 233 ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) 234 ├── constraint: /-3/4 235 │ ├── [/'foo' - /'foo'] 236 │ └── [/'bar' - /'bar'] 237 ├── stats: [rows=200, distinct(3)=2, null(3)=0] 238 ├── key: (1) 239 └── fd: (1)-->(3,4), (3,4)-->(1) 240 241 norm 242 SELECT * FROM a WHERE (s = 'foo' OR s = 'bar') AND s IS NOT NULL 243 ---- 244 select 245 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) 246 ├── stats: [rows=200, distinct(3)=2, null(3)=0] 247 ├── key: (1) 248 ├── fd: (1)-->(2-4), (3,4)-->(1,2) 249 ├── scan a 250 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 251 │ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(3)=11, null(3)=1000, distinct(4)=200, null(4)=0] 252 │ ├── key: (1) 253 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 254 └── filters 255 └── ((s:3 = 'foo') OR (s:3 = 'bar')) AND (s:3 IS NOT NULL) [type=bool, outer=(3), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; tight)]