github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/enginetest/stats_queries.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package enginetest 16 17 import ( 18 "fmt" 19 "strings" 20 "testing" 21 22 gms "github.com/dolthub/go-mysql-server" 23 "github.com/dolthub/go-mysql-server/enginetest" 24 "github.com/dolthub/go-mysql-server/enginetest/queries" 25 "github.com/dolthub/go-mysql-server/sql" 26 "github.com/dolthub/go-mysql-server/sql/types" 27 "github.com/stretchr/testify/require" 28 29 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 30 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" 31 ) 32 33 // fillerVarchar pushes the tree into level 3 34 var fillerVarchar = strings.Repeat("x", 500) 35 36 var DoltHistogramTests = []queries.ScriptTest{ 37 { 38 Name: "mcv checking", 39 SetUpScript: []string{ 40 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 41 "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", 42 "analyze table xy", 43 }, 44 Assertions: []queries.ScriptTestAssertion{ 45 { 46 Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt where table_name = 'xy' and column_name = 'y,z'", 47 Expected: []sql.Row{ 48 {types.JSONDocument{Val: []interface{}{ 49 float64(1), 50 float64(4), 51 float64(1), 52 }}}, 53 }, 54 }, 55 { 56 Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt where table_name = 'xy' and column_name = 'y,z'", 57 Expected: []sql.Row{ 58 {types.JSONDocument{Val: []interface{}{ 59 []interface{}{float64(1), "a"}, 60 []interface{}{float64(0), "a"}, 61 []interface{}{float64(2), "a"}, 62 }}}, 63 }, 64 }, 65 { 66 Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt where table_name = 'xy' and column_name = 'y,z'", 67 Expected: []sql.Row{ 68 {2, "a"}, 69 }, 70 }, 71 }, 72 }, 73 { 74 Name: "int pk", 75 SetUpScript: []string{ 76 "CREATE table xy (x bigint primary key, y varchar(500));", 77 fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), 78 fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), 79 fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), 80 "analyze table xy", 81 }, 82 Assertions: []queries.ScriptTestAssertion{ 83 { 84 Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'", 85 Expected: []sql.Row{{32}}, 86 }, 87 { 88 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'", 89 Expected: []sql.Row{{float64(30000)}}, 90 }, 91 { 92 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'", 93 Expected: []sql.Row{{float64(0)}}, 94 }, 95 { 96 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'", 97 Expected: []sql.Row{{float64(30000)}}, 98 }, 99 { 100 Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'", 101 Expected: []sql.Row{{int64(1)}}, 102 }, 103 }, 104 }, 105 { 106 Name: "nulls distinct across chunk boundary", 107 SetUpScript: []string{ 108 "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));", 109 fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar), 110 fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar), 111 "analyze table xy", 112 }, 113 Assertions: []queries.ScriptTestAssertion{ 114 { 115 Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'", 116 Expected: []sql.Row{{2}}, 117 }, 118 { 119 // bucket boundary duplication 120 Query: "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'", 121 Expected: []sql.Row{{202}}, 122 }, 123 { 124 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'", 125 Expected: []sql.Row{{float64(400)}}, 126 }, 127 { 128 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'", 129 Expected: []sql.Row{{float64(200)}}, 130 }, 131 { 132 // chunk border double count 133 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'", 134 Expected: []sql.Row{{float64(202)}}, 135 }, 136 { 137 // max bound count is an all nulls chunk 138 Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'", 139 Expected: []sql.Row{{int64(183)}}, 140 }, 141 }, 142 }, 143 { 144 Name: "int index", 145 SetUpScript: []string{ 146 "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));", 147 fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), 148 fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), 149 fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), 150 "analyze table xy", 151 }, 152 Assertions: []queries.ScriptTestAssertion{ 153 { 154 Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'", 155 Expected: []sql.Row{{152}}, 156 }, 157 { 158 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z'", 159 Expected: []sql.Row{{float64(30000)}}, 160 }, 161 { 162 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z'", 163 Expected: []sql.Row{{float64(10000)}}, 164 }, 165 { 166 // border NULL double count 167 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z'", 168 Expected: []sql.Row{{float64(20036)}}, 169 }, 170 { 171 // max bound count is nulls chunk 172 Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z'", 173 Expected: []sql.Row{{int64(440)}}, 174 }, 175 }, 176 }, 177 { 178 Name: "multiint index", 179 SetUpScript: []string{ 180 "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(x, z));", 181 fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), 182 fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), 183 fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), 184 "analyze table xy", 185 }, 186 Assertions: []queries.ScriptTestAssertion{ 187 { 188 Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'", 189 Expected: []sql.Row{{155}}, 190 }, 191 { 192 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x,z'", 193 Expected: []sql.Row{{float64(30000)}}, 194 }, 195 { 196 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x,z'", 197 Expected: []sql.Row{{float64(10000)}}, 198 }, 199 { 200 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x,z'", 201 Expected: []sql.Row{{float64(30000)}}, 202 }, 203 { 204 // max bound count is nulls chunk 205 Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x,z'", 206 Expected: []sql.Row{{int64(1)}}, 207 }, 208 }, 209 }, 210 { 211 Name: "several int index", 212 SetUpScript: []string{ 213 "CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z), key (x,z));", 214 fmt.Sprintf("insert into xy select x, '%s', x+1 from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), 215 }, 216 Assertions: []queries.ScriptTestAssertion{ 217 { 218 Query: " SELECT column_name from information_schema.column_statistics", 219 Expected: []sql.Row{}, 220 }, 221 { 222 Query: "analyze table xy", 223 }, 224 { 225 Query: " SELECT column_name from information_schema.column_statistics", 226 Expected: []sql.Row{{"x"}, {"z"}, {"x,z"}}, 227 }, 228 }, 229 }, 230 { 231 Name: "varchar pk", 232 SetUpScript: []string{ 233 "CREATE table xy (x varchar(16) primary key, y varchar(500));", 234 fmt.Sprintf("insert into xy select cast (x as char), '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), 235 fmt.Sprintf("insert into xy select cast (x as char), '%s' from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), 236 fmt.Sprintf("insert into xy select cast (x as char), '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), 237 "analyze table xy", 238 }, 239 Assertions: []queries.ScriptTestAssertion{ 240 { 241 Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'", 242 Expected: []sql.Row{{26}}, 243 }, 244 { 245 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'x'", 246 Expected: []sql.Row{{float64(30000)}}, 247 }, 248 { 249 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'x'", 250 Expected: []sql.Row{{float64(0)}}, 251 }, 252 { 253 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'x'", 254 Expected: []sql.Row{{float64(30000)}}, 255 }, 256 { 257 // max bound count is nulls chunk 258 Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'x'", 259 Expected: []sql.Row{{int64(1)}}, 260 }, 261 }, 262 }, 263 { 264 Name: "int-varchar inverse ordinal pk", 265 SetUpScript: []string{ 266 "CREATE table xy (x varchar(16), y varchar(500), z bigint, primary key(z,x));", 267 fmt.Sprintf("insert into xy select cast (x as char), '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar), 268 fmt.Sprintf("insert into xy select cast (x as char), '%s', x from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar), 269 fmt.Sprintf("insert into xy select cast (x as char), '%s', x from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar), 270 "analyze table xy", 271 }, 272 Assertions: []queries.ScriptTestAssertion{ 273 { 274 Query: " SELECT column_name from information_schema.column_statistics", 275 Expected: []sql.Row{{"z,x"}}, 276 }, 277 { 278 Query: "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z,x'", 279 Expected: []sql.Row{{42}}, 280 }, 281 { 282 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt where table_name = 'xy' and column_name = 'z,x'", 283 Expected: []sql.Row{{float64(30000)}}, 284 }, 285 { 286 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt where table_name = 'xy' and column_name = 'z,x'", 287 Expected: []sql.Row{{float64(0)}}, 288 }, 289 { 290 Query: " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt where table_name = 'xy' and column_name = 'z,x'", 291 Expected: []sql.Row{{float64(30000)}}, 292 }, 293 { 294 // max bound count is nulls chunk 295 Query: " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt where table_name = 'xy' and column_name = 'z,x'", 296 Expected: []sql.Row{{int64(1)}}, 297 }, 298 }, 299 }, 300 } 301 302 var DoltStatsIOTests = []queries.ScriptTest{ 303 { 304 Name: "single-table", 305 SetUpScript: []string{ 306 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 307 "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", 308 "analyze table xy", 309 }, 310 Assertions: []queries.ScriptTestAssertion{ 311 { 312 Query: "select database_name, table_name, index_name, columns, types from dolt_statistics", 313 Expected: []sql.Row{ 314 {"mydb", "xy", "primary", "x", "bigint"}, 315 {"mydb", "xy", "yz", "y,z", "int,varchar(500)"}, 316 }, 317 }, 318 { 319 Query: fmt.Sprintf("select %s, %s, %s from dolt_statistics", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName), 320 Expected: []sql.Row{{uint64(6), uint64(6), uint64(0)}, {uint64(6), uint64(3), uint64(0)}}, 321 }, 322 { 323 Query: fmt.Sprintf("select %s, %s from dolt_statistics", schema.StatsUpperBoundColName, schema.StatsUpperBoundCntColName), 324 Expected: []sql.Row{ 325 {"5", uint64(1)}, 326 {"2,a", uint64(1)}, 327 }, 328 }, 329 { 330 Query: fmt.Sprintf("select %s, %s, %s, %s, %s from dolt_statistics", schema.StatsMcv1ColName, schema.StatsMcv2ColName, schema.StatsMcv3ColName, schema.StatsMcv4ColName, schema.StatsMcvCountsColName), 331 Expected: []sql.Row{ 332 {"5", "1", "2", "", "1,1,1"}, 333 {"1,a", "0,a", "2,a", "", "1,4,1"}, 334 }, 335 }, 336 }, 337 }, 338 { 339 Name: "multi-table", 340 SetUpScript: []string{ 341 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 342 "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", 343 "CREATE table ab (a bigint primary key, b int, c int, key(b,c));", 344 "insert into ab values (0,0,1), (1,0,1), (2,0,1), (3,0,1), (4,1,1), (5,2,1)", 345 "analyze table xy", 346 "analyze table ab", 347 }, 348 Assertions: []queries.ScriptTestAssertion{ 349 { 350 Query: "select database_name, table_name, index_name, columns, types from dolt_statistics where table_name = 'xy'", 351 Expected: []sql.Row{ 352 {"mydb", "xy", "primary", "x", "bigint"}, 353 {"mydb", "xy", "yz", "y,z", "int,varchar(500)"}, 354 }, 355 }, 356 { 357 Query: fmt.Sprintf("select %s, %s, %s from dolt_statistics where table_name = 'xy'", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName), 358 Expected: []sql.Row{{uint64(6), uint64(6), uint64(0)}, {uint64(6), uint64(3), uint64(0)}}, 359 }, 360 { 361 Query: "select `table_name`, `index_name` from dolt_statistics", 362 Expected: []sql.Row{ 363 {"ab", "primary"}, 364 {"ab", "bc"}, 365 {"xy", "primary"}, 366 {"xy", "yz"}, 367 }, 368 }, 369 { 370 Query: "select database_name, table_name, index_name, columns, types from dolt_statistics where table_name = 'ab'", 371 Expected: []sql.Row{ 372 {"mydb", "ab", "primary", "a", "bigint"}, 373 {"mydb", "ab", "bc", "b,c", "int,int"}, 374 }, 375 }, 376 { 377 Query: fmt.Sprintf("select %s, %s, %s from dolt_statistics where table_name = 'ab'", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName), 378 Expected: []sql.Row{{uint64(6), uint64(6), uint64(0)}, {uint64(6), uint64(3), uint64(0)}}, 379 }, 380 }, 381 }, 382 { 383 // only edited chunks are scanned and re-written 384 Name: "incremental stats updates", 385 SetUpScript: []string{ 386 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 387 "insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')", 388 "analyze table xy", 389 "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'), (7,1,'a')", 390 "analyze table xy", 391 }, 392 Assertions: []queries.ScriptTestAssertion{ 393 { 394 Query: fmt.Sprintf("select %s, %s, %s from dolt_statistics where table_name = 'xy'", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName), 395 Expected: []sql.Row{ 396 {uint64(8), uint64(8), uint64(0)}, 397 {uint64(8), uint64(3), uint64(0)}, 398 }, 399 }, 400 }, 401 }, 402 { 403 Name: "incremental stats deletes manual analyze", 404 SetUpScript: []string{ 405 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 406 "insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", 407 "analyze table xy", 408 }, 409 Assertions: []queries.ScriptTestAssertion{ 410 { 411 Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt", 412 Expected: []sql.Row{{6}, {7}}, 413 }, 414 { 415 Query: "delete from xy where x > 500", 416 }, 417 { 418 Query: "analyze table xy", 419 }, 420 { 421 Query: "select count(*) from dolt_statistics group by table_name, index_name", 422 Expected: []sql.Row{{4}, {4}}, 423 }, 424 }, 425 }, 426 { 427 Name: "incremental stats deletes auto", 428 SetUpScript: []string{ 429 "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", 430 "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", 431 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 432 "insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", 433 "analyze table xy", 434 }, 435 Assertions: []queries.ScriptTestAssertion{ 436 { 437 Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt", 438 Expected: []sql.Row{{6}, {7}}, 439 }, 440 { 441 Query: "delete from xy where x > 500", 442 }, 443 { 444 Query: "call dolt_stats_restart()", 445 }, 446 { 447 Query: "select sleep(.1)", 448 }, 449 { 450 Query: "select count(*) from dolt_statistics group by table_name, index_name", 451 Expected: []sql.Row{{4}, {4}}, 452 }, 453 }, 454 }, 455 } 456 457 var StatBranchTests = []queries.ScriptTest{ 458 { 459 Name: "multi branch stats", 460 SetUpScript: []string{ 461 "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", 462 "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", 463 "set @@PERSIST.dolt_stats_branches = 'main,feat';", 464 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 465 "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", 466 "call dolt_commit('-Am', 'xy')", 467 "call dolt_checkout('-b','feat')", 468 "CREATE table ab (a bigint primary key, b int, c int, key(b,c));", 469 "insert into ab values (0,0,1), (1,0,1), (2,0,1), (3,0,1), (4,1,1), (5,2,1)", 470 "call dolt_commit('-Am', 'ab')", 471 "call dolt_checkout('main')", 472 }, 473 Assertions: []queries.ScriptTestAssertion{ 474 { 475 Query: "call dolt_stats_restart()", 476 }, 477 { 478 Query: "select sleep(.1)", 479 }, 480 { 481 Query: "select table_name, index_name, row_count from dolt_statistics", 482 Expected: []sql.Row{ 483 {"xy", "primary", uint64(6)}, 484 {"xy", "yz", uint64(6)}, 485 }, 486 }, 487 { 488 Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'", 489 Expected: []sql.Row{ 490 {"ab", "primary", uint64(6)}, 491 {"ab", "bc", uint64(6)}, 492 {"xy", "primary", uint64(6)}, 493 {"xy", "yz", uint64(6)}, 494 }, 495 }, 496 { 497 Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'", 498 Expected: []sql.Row{ 499 {"xy", "primary", uint64(6)}, 500 {"xy", "yz", uint64(6)}, 501 }, 502 }, 503 { 504 Query: "call dolt_checkout('feat')", 505 }, 506 { 507 Query: "insert into xy values ('6',3,'a')", 508 }, 509 { 510 Query: "call dolt_commit('-am', 'cm')", 511 }, 512 { 513 Query: "select sleep(.1)", 514 }, 515 { 516 Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'", 517 Expected: []sql.Row{ 518 {"ab", "primary", uint64(6)}, 519 {"ab", "bc", uint64(6)}, 520 {"xy", "primary", uint64(7)}, 521 {"xy", "yz", uint64(7)}, 522 }, 523 }, 524 { 525 Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'", 526 Expected: []sql.Row{ 527 {"xy", "primary", uint64(6)}, 528 {"xy", "yz", uint64(6)}, 529 }, 530 }, 531 { 532 Query: "call dolt_checkout('feat')", 533 }, 534 { 535 Query: "call dolt_stats_stop()", 536 }, 537 { 538 Query: "call dolt_stats_drop()", 539 }, 540 { 541 Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'", 542 Expected: []sql.Row{}, 543 }, 544 { 545 // we dropped 'feat', not 'main' 546 Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'", 547 Expected: []sql.Row{ 548 {"xy", "primary", uint64(6)}, 549 {"xy", "yz", uint64(6)}, 550 }, 551 }, 552 }, 553 }, 554 { 555 Name: "issue #7710: branch connection string errors", 556 SetUpScript: []string{ 557 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 558 "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", 559 "use `mydb/main`", 560 }, 561 Assertions: []queries.ScriptTestAssertion{ 562 { 563 Query: "analyze table xy", 564 Expected: []sql.Row{ 565 {"xy", "analyze", "status", "OK"}, 566 }, 567 }, 568 }, 569 }, 570 } 571 572 var StatProcTests = []queries.ScriptTest{ 573 { 574 Name: "deleting stats removes information_schema access point", 575 SetUpScript: []string{ 576 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 577 "insert into xy values (0,0,0)", 578 }, 579 Assertions: []queries.ScriptTestAssertion{ 580 { 581 Query: "analyze table xy", 582 }, 583 { 584 Query: "select count(*) from information_schema.column_statistics", 585 Expected: []sql.Row{{2}}, 586 }, 587 { 588 Query: "call dolt_stats_drop()", 589 }, 590 { 591 Query: "select count(*) from information_schema.column_statistics", 592 Expected: []sql.Row{{0}}, 593 }, 594 }, 595 }, 596 { 597 Name: "restart empty stats panic", 598 SetUpScript: []string{ 599 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 600 }, 601 Assertions: []queries.ScriptTestAssertion{ 602 { 603 Query: "analyze table xy", 604 }, 605 { 606 Query: "select count(*) from dolt_statistics", 607 Expected: []sql.Row{{0}}, 608 }, 609 { 610 Query: "set @@GLOBAL.dolt_stats_auto_refresh_threshold = 0", 611 Expected: []sql.Row{{}}, 612 }, 613 { 614 Query: "set @@GLOBAL.dolt_stats_auto_refresh_interval = 0", 615 Expected: []sql.Row{{}}, 616 }, 617 { 618 // don't panic 619 Query: "call dolt_stats_restart()", 620 }, 621 { 622 Query: "select sleep(.1)", 623 }, 624 { 625 Query: "insert into xy values (0,0,0)", 626 }, 627 { 628 Query: "select sleep(.1)", 629 }, 630 { 631 Query: "select count(*) from dolt_statistics", 632 Expected: []sql.Row{{2}}, 633 }, 634 }, 635 }, 636 { 637 Name: "basic start, status, stop loop", 638 SetUpScript: []string{ 639 "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", 640 "insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')", 641 }, 642 Assertions: []queries.ScriptTestAssertion{ 643 { 644 Query: "select count(*) from dolt_statistics", 645 Expected: []sql.Row{{0}}, 646 }, 647 { 648 Query: "call dolt_stats_status()", 649 Expected: []sql.Row{{"no active stats thread"}}, 650 }, 651 // set refresh interval arbitrarily high to avoid updating when we restart 652 { 653 Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 100000;", 654 Expected: []sql.Row{{}}, 655 }, 656 { 657 Query: "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0", 658 Expected: []sql.Row{{}}, 659 }, 660 { 661 Query: "call dolt_stats_restart()", 662 }, 663 { 664 Query: "call dolt_stats_status()", 665 Expected: []sql.Row{{"restarted thread: mydb"}}, 666 }, 667 { 668 Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", 669 Expected: []sql.Row{{}}, 670 }, 671 // new restart picks up 0-interval, will start refreshing immediately 672 { 673 Query: "call dolt_stats_restart()", 674 }, 675 { 676 Query: "select sleep(.1)", 677 }, 678 { 679 Query: "call dolt_stats_status()", 680 Expected: []sql.Row{{"refreshed mydb"}}, 681 }, 682 { 683 Query: "select count(*) from dolt_statistics", 684 Expected: []sql.Row{{2}}, 685 }, 686 // kill refresh thread 687 { 688 Query: "call dolt_stats_stop()", 689 }, 690 { 691 Query: "call dolt_stats_status()", 692 Expected: []sql.Row{{"cancelled thread: mydb"}}, 693 }, 694 // insert without refresh thread will not update stats 695 { 696 Query: "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'), (7,1,'a')", 697 }, 698 { 699 Query: "select sleep(.1)", 700 }, 701 { 702 Query: "call dolt_stats_status()", 703 Expected: []sql.Row{{"cancelled thread: mydb"}}, 704 }, 705 // manual analyze will update stats 706 { 707 Query: "analyze table xy", 708 Expected: []sql.Row{{"xy", "analyze", "status", "OK"}}, 709 }, 710 { 711 Query: "call dolt_stats_status()", 712 Expected: []sql.Row{{"refreshed mydb"}}, 713 }, 714 { 715 Query: "select count(*) from dolt_statistics", 716 Expected: []sql.Row{{2}}, 717 }, 718 // kill refresh thread and delete stats ref 719 { 720 Query: "call dolt_stats_drop()", 721 }, 722 { 723 Query: "call dolt_stats_status()", 724 Expected: []sql.Row{{"dropped"}}, 725 }, 726 { 727 Query: "select count(*) from dolt_statistics", 728 Expected: []sql.Row{{0}}, 729 }, 730 }, 731 }, 732 } 733 734 // TestProviderReloadScriptWithEngine runs the test script given with the engine provided. 735 func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) { 736 ctx := enginetest.NewContext(harness) 737 err := enginetest.CreateNewConnectionForServerEngine(ctx, e) 738 require.NoError(t, err, nil) 739 740 t.Run(script.Name, func(t *testing.T) { 741 for _, statement := range script.SetUpScript { 742 if sh, ok := harness.(enginetest.SkippingHarness); ok { 743 if sh.SkipQueryTest(statement) { 744 t.Skip() 745 } 746 } 747 ctx = ctx.WithQuery(statement) 748 enginetest.RunQueryWithContext(t, e, harness, ctx, statement) 749 } 750 751 assertions := script.Assertions 752 if len(assertions) == 0 { 753 assertions = []queries.ScriptTestAssertion{ 754 { 755 Query: script.Query, 756 Expected: script.Expected, 757 ExpectedErr: script.ExpectedErr, 758 ExpectedIndexes: script.ExpectedIndexes, 759 }, 760 } 761 } 762 763 { 764 // reload provider, get disk stats 765 eng, ok := e.(*gms.Engine) 766 if !ok { 767 t.Errorf("expected *gms.Engine but found: %T", e) 768 } 769 770 err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false) 771 require.NoError(t, err) 772 773 err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", "main") 774 require.NoError(t, err) 775 } 776 777 for _, assertion := range assertions { 778 t.Run(assertion.Query, func(t *testing.T) { 779 if assertion.NewSession { 780 th, ok := harness.(enginetest.TransactionHarness) 781 require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+ 782 "but harness doesn't implement TransactionHarness") 783 ctx = th.NewSession() 784 } 785 786 if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) { 787 t.Skip() 788 } 789 if assertion.Skip { 790 t.Skip() 791 } 792 793 if assertion.ExpectedErr != nil { 794 enginetest.AssertErr(t, e, harness, assertion.Query, assertion.ExpectedErr) 795 } else if assertion.ExpectedErrStr != "" { 796 enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, assertion.ExpectedErrStr) 797 } else if assertion.ExpectedWarning != 0 { 798 enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query, 799 assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount, 800 assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck) 801 } else if assertion.SkipResultsCheck { 802 enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query) 803 } else if assertion.CheckIndexedAccess { 804 enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings) 805 } else { 806 var expected = assertion.Expected 807 if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine { 808 // TODO: remove this check in the future 809 expected = nil 810 } 811 enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings) 812 } 813 }) 814 } 815 }) 816 } 817 818 func mustNewStatQual(s string) sql.StatQualifier { 819 qual, _ := sql.NewQualifierFromString(s) 820 return qual 821 }