github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/testdata/stats/project (about) 1 exec-ddl 2 CREATE TABLE a (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d)) 3 ---- 4 5 exec-ddl 6 ALTER TABLE a INJECT STATISTICS '[ 7 { 8 "columns": ["x"], 9 "created_at": "2018-01-01 1:00:00.00000+00:00", 10 "row_count": 2000, 11 "distinct_count": 2000 12 }, 13 { 14 "columns": ["x","y"], 15 "created_at": "2018-01-01 1:30:00.00000+00:00", 16 "row_count": 2000, 17 "distinct_count": 2000 18 }, 19 { 20 "columns": ["s"], 21 "created_at": "2018-01-01 1:30:00.00000+00:00", 22 "row_count": 2000, 23 "distinct_count": 10 24 }, 25 { 26 "columns": ["s","y"], 27 "created_at": "2018-01-01 1:40:00.00000+00:00", 28 "row_count": 2000, 29 "distinct_count": 100 30 } 31 ]' 32 ---- 33 34 build 35 SELECT x FROM a 36 ---- 37 project 38 ├── columns: x:1(int!null) 39 ├── stats: [rows=2000] 40 ├── key: (1) 41 └── scan a 42 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 43 ├── stats: [rows=2000] 44 ├── key: (1) 45 └── fd: (1)-->(2-4), (3,4)~~>(1,2) 46 47 build 48 SELECT y, s FROM a 49 ---- 50 project 51 ├── columns: y:2(int) s:3(string) 52 ├── stats: [rows=2000] 53 └── scan a 54 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 55 ├── stats: [rows=2000] 56 ├── key: (1) 57 └── fd: (1)-->(2-4), (3,4)~~>(1,2) 58 59 build 60 SELECT count(*) FROM (SELECT x, y FROM a) GROUP BY x, y 61 ---- 62 project 63 ├── columns: count:5(int!null) 64 ├── stats: [rows=2000] 65 └── group-by 66 ├── columns: x:1(int!null) y:2(int) count_rows:5(int!null) 67 ├── grouping columns: x:1(int!null) y:2(int) 68 ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 69 ├── key: (1) 70 ├── fd: (1)-->(2,5) 71 ├── project 72 │ ├── columns: x:1(int!null) y:2(int) 73 │ ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 74 │ ├── key: (1) 75 │ ├── fd: (1)-->(2) 76 │ └── scan a 77 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 78 │ ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 79 │ ├── key: (1) 80 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 81 └── aggregations 82 └── count-rows [as=count_rows:5, type=int] 83 84 # Test that the stats are calculated correctly for synthesized columns. 85 build 86 SELECT * FROM (SELECT concat(s, y::string) FROM a) AS q(v) WHERE v = 'foo' 87 ---- 88 select 89 ├── columns: v:5(string!null) 90 ├── stable 91 ├── stats: [rows=20, distinct(5)=1, null(5)=0] 92 ├── fd: ()-->(5) 93 ├── project 94 │ ├── columns: concat:5(string) 95 │ ├── stable 96 │ ├── stats: [rows=2000, distinct(5)=100, null(5)=0] 97 │ ├── scan a 98 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 99 │ │ ├── stats: [rows=2000, distinct(2,3)=100, null(2,3)=0] 100 │ │ ├── key: (1) 101 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 102 │ └── projections 103 │ └── concat(s:3, y:2::STRING) [as=concat:5, type=string, outer=(2,3), stable] 104 └── filters 105 └── concat:5 = 'foo' [type=bool, outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)] 106 107 # Test that stats for synthesized and non-synthesized columns are combined. 108 build 109 SELECT * FROM (SELECT concat(s, y::string), x FROM a) AS q(v, x) GROUP BY v, x 110 ---- 111 group-by 112 ├── columns: v:5(string) x:1(int!null) 113 ├── grouping columns: x:1(int!null) concat:5(string) 114 ├── stable 115 ├── stats: [rows=2000, distinct(1,5)=2000, null(1,5)=0] 116 ├── key: (1) 117 ├── fd: (1)-->(5) 118 └── project 119 ├── columns: concat:5(string) x:1(int!null) 120 ├── stable 121 ├── stats: [rows=2000, distinct(1,5)=2000, null(1,5)=0] 122 ├── key: (1) 123 ├── fd: (1)-->(5) 124 ├── scan a 125 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 126 │ ├── stats: [rows=2000, distinct(1-3)=2000, null(1-3)=0] 127 │ ├── key: (1) 128 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 129 └── projections 130 └── concat(s:3, y:2::STRING) [as=concat:5, type=string, outer=(2,3), stable] 131 132 # No available stats for column y. 133 build 134 SELECT * FROM (SELECT y + 3 AS v FROM a) WHERE v >= 1 AND v <= 100 135 ---- 136 select 137 ├── columns: v:5(int!null) 138 ├── stats: [rows=1000, distinct(5)=100, null(5)=0] 139 ├── project 140 │ ├── columns: v:5(int) 141 │ ├── stats: [rows=2000, distinct(5)=200, null(5)=0] 142 │ ├── scan a 143 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 144 │ │ ├── stats: [rows=2000, distinct(2)=200, null(2)=20] 145 │ │ ├── key: (1) 146 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 147 │ └── projections 148 │ └── y:2 + 3 [as=v:5, type=int, outer=(2)] 149 └── filters 150 └── (v:5 >= 1) AND (v:5 <= 100) [type=bool, outer=(5), constraints=(/5: [/1 - /100]; tight)] 151 152 exec-ddl 153 CREATE TABLE kuv (k INT PRIMARY KEY, u FLOAT, v STRING) 154 ---- 155 156 exec-ddl 157 ALTER TABLE kuv INJECT STATISTICS '[ 158 { 159 "columns": ["k"], 160 "created_at": "2018-01-01 1:00:00.00000+00:00", 161 "row_count": 2000, 162 "distinct_count": 2000 163 }, 164 { 165 "columns": ["v"], 166 "created_at": "2018-01-01 1:30:00.00000+00:00", 167 "row_count": 2000, 168 "distinct_count": 10 169 } 170 ]' 171 ---- 172 173 # Correlated subquery. 174 build 175 SELECT * FROM a WHERE EXISTS (SELECT s < v FROM kuv GROUP BY s < v) 176 ---- 177 select 178 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 179 ├── stats: [rows=666.666667] 180 ├── key: (1) 181 ├── fd: (1)-->(2-4), (3,4)~~>(1,2) 182 ├── scan a 183 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 184 │ ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(4)=200, null(4)=0] 185 │ ├── key: (1) 186 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 187 └── filters 188 └── exists [type=bool, outer=(3), correlated-subquery] 189 └── group-by 190 ├── columns: column8:8(bool) 191 ├── grouping columns: column8:8(bool) 192 ├── outer: (3) 193 ├── stats: [rows=10, distinct(8)=10, null(8)=0] 194 ├── key: (8) 195 └── project 196 ├── columns: column8:8(bool) 197 ├── outer: (3) 198 ├── stats: [rows=2000, distinct(8)=10, null(8)=0] 199 ├── scan kuv 200 │ ├── columns: k:5(int!null) u:6(float) v:7(string) 201 │ ├── stats: [rows=2000, distinct(7)=10, null(7)=0] 202 │ ├── key: (5) 203 │ └── fd: (5)-->(6,7) 204 └── projections 205 └── s:3 < v:7 [as=column8:8, type=bool, outer=(3,7)] 206 207 # Bump up null counts. 208 exec-ddl 209 ALTER TABLE a INJECT STATISTICS '[ 210 { 211 "columns": ["x"], 212 "created_at": "2018-01-01 2:00:00.00000+00:00", 213 "row_count": 2000, 214 "distinct_count": 2000 215 }, 216 { 217 "columns": ["y"], 218 "created_at": "2018-01-01 2:00:00.00000+00:00", 219 "row_count": 2000, 220 "distinct_count": 500, 221 "null_count": 1000 222 }, 223 { 224 "columns": ["s"], 225 "created_at": "2018-01-01 2:00:00.00000+00:00", 226 "row_count": 2000, 227 "distinct_count": 500, 228 "null_count": 500 229 } 230 ]' 231 ---- 232 233 build colstat=2 colstat=3 colstat=(2,3) 234 SELECT y, s FROM a 235 ---- 236 project 237 ├── columns: y:2(int) s:3(string) 238 ├── stats: [rows=2000, distinct(2)=500, null(2)=1000, distinct(3)=500, null(3)=500, distinct(2,3)=2000, null(2,3)=250] 239 └── scan a 240 ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 241 ├── stats: [rows=2000, distinct(2)=500, null(2)=1000, distinct(3)=500, null(3)=500, distinct(2,3)=2000, null(2,3)=250] 242 ├── key: (1) 243 └── fd: (1)-->(2-4), (3,4)~~>(1,2) 244 245 # Test that the stats are calculated correctly for synthesized columns. 246 build 247 SELECT * FROM (SELECT concat(s, y::string) FROM a) AS q(v) WHERE v = 'foo' 248 ---- 249 select 250 ├── columns: v:5(string!null) 251 ├── stable 252 ├── stats: [rows=1, distinct(5)=1, null(5)=0] 253 ├── fd: ()-->(5) 254 ├── project 255 │ ├── columns: concat:5(string) 256 │ ├── stable 257 │ ├── stats: [rows=2000, distinct(5)=2000, null(5)=0] 258 │ ├── scan a 259 │ │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 260 │ │ ├── stats: [rows=2000, distinct(2,3)=2000, null(2,3)=250] 261 │ │ ├── key: (1) 262 │ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 263 │ └── projections 264 │ └── concat(s:3, y:2::STRING) [as=concat:5, type=string, outer=(2,3), stable] 265 └── filters 266 └── concat:5 = 'foo' [type=bool, outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)] 267 268 build colstat=5 colstat=6 colstat=(5,6) 269 SELECT NULL, NULLIF(x,y) FROM a 270 ---- 271 project 272 ├── columns: "?column?":5(unknown) nullif:6(int) 273 ├── stats: [rows=2000, distinct(5)=1, null(5)=2000, distinct(6)=2000, null(6)=0, distinct(5,6)=2000, null(5,6)=0] 274 ├── fd: ()-->(5) 275 ├── scan a 276 │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) 277 │ ├── stats: [rows=2000, distinct(1,2)=2000, null(1,2)=0] 278 │ ├── key: (1) 279 │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) 280 └── projections 281 ├── NULL [as="?column?":5, type=unknown] 282 └── CASE x:1 WHEN y:2 THEN NULL ELSE x:1 END [as=nullif:6, type=int, outer=(1,2)]