github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/enginetest/stats_queries.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package enginetest
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  	"testing"
    21  
    22  	gms "github.com/dolthub/go-mysql-server"
    23  	"github.com/dolthub/go-mysql-server/enginetest"
    24  	"github.com/dolthub/go-mysql-server/enginetest/queries"
    25  	"github.com/dolthub/go-mysql-server/sql"
    26  	"github.com/dolthub/go-mysql-server/sql/types"
    27  	"github.com/stretchr/testify/require"
    28  
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
    31  )
    32  
    33  // fillerVarchar pushes the tree into level 3
    34  var fillerVarchar = strings.Repeat("x", 500)
    35  
    36  var DoltHistogramTests = []queries.ScriptTest{
    37  	{
    38  		Name: "mcv checking",
    39  		SetUpScript: []string{
    40  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
    41  			"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
    42  			"analyze table xy",
    43  		},
    44  		Assertions: []queries.ScriptTestAssertion{
    45  			{
    46  				Query: " SELECT mcv_cnt from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv_cnt JSON path '$.mcv_counts')) as dt  where table_name = 'xy' and column_name = 'y,z'",
    47  				Expected: []sql.Row{
    48  					{types.JSONDocument{Val: []interface{}{
    49  						float64(1),
    50  						float64(4),
    51  						float64(1),
    52  					}}},
    53  				},
    54  			},
    55  			{
    56  				Query: " SELECT mcv from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(mcv JSON path '$.mcvs[*]')) as dt  where table_name = 'xy' and column_name = 'y,z'",
    57  				Expected: []sql.Row{
    58  					{types.JSONDocument{Val: []interface{}{
    59  						[]interface{}{float64(1), "a"},
    60  						[]interface{}{float64(0), "a"},
    61  						[]interface{}{float64(2), "a"},
    62  					}}},
    63  				},
    64  			},
    65  			{
    66  				Query: " SELECT x,z from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(x bigint path '$.upper_bound[0]', z text path '$.upper_bound[1]')) as dt  where table_name = 'xy' and column_name = 'y,z'",
    67  				Expected: []sql.Row{
    68  					{2, "a"},
    69  				},
    70  			},
    71  		},
    72  	},
    73  	{
    74  		Name: "int pk",
    75  		SetUpScript: []string{
    76  			"CREATE table xy (x bigint primary key, y varchar(500));",
    77  			fmt.Sprintf("insert into xy select x, '%s' from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
    78  			fmt.Sprintf("insert into xy select x, '%s'  from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
    79  			fmt.Sprintf("insert into xy select x, '%s'  from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
    80  			"analyze table xy",
    81  		},
    82  		Assertions: []queries.ScriptTestAssertion{
    83  			{
    84  				Query:    "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'",
    85  				Expected: []sql.Row{{32}},
    86  			},
    87  			{
    88  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt  where table_name = 'xy' and column_name = 'x'",
    89  				Expected: []sql.Row{{float64(30000)}},
    90  			},
    91  			{
    92  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt  where table_name = 'xy' and column_name = 'x'",
    93  				Expected: []sql.Row{{float64(0)}},
    94  			},
    95  			{
    96  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt  where table_name = 'xy' and column_name = 'x'",
    97  				Expected: []sql.Row{{float64(30000)}},
    98  			},
    99  			{
   100  				Query:    " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt  where table_name = 'xy' and column_name = 'x'",
   101  				Expected: []sql.Row{{int64(1)}},
   102  			},
   103  		},
   104  	},
   105  	{
   106  		Name: "nulls distinct across chunk boundary",
   107  		SetUpScript: []string{
   108  			"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
   109  			fmt.Sprintf("insert into xy select x, '%s', x  from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 200) select * from inputs) dt", fillerVarchar),
   110  			fmt.Sprintf("insert into xy select x, '%s', NULL  from (with recursive inputs(x) as (select 201 union select x+1 from inputs where x < 400) select * from inputs) dt", fillerVarchar),
   111  			"analyze table xy",
   112  		},
   113  		Assertions: []queries.ScriptTestAssertion{
   114  			{
   115  				Query:    "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
   116  				Expected: []sql.Row{{2}},
   117  			},
   118  			{
   119  				// bucket boundary duplication
   120  				Query:    "SELECT json_value(histogram, \"$.statistic.distinct_count\", 'signed') from information_schema.column_statistics where column_name = 'z'",
   121  				Expected: []sql.Row{{202}},
   122  			},
   123  			{
   124  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   125  				Expected: []sql.Row{{float64(400)}},
   126  			},
   127  			{
   128  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   129  				Expected: []sql.Row{{float64(200)}},
   130  			},
   131  			{
   132  				// chunk border double count
   133  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   134  				Expected: []sql.Row{{float64(202)}},
   135  			},
   136  			{
   137  				// max bound count is an all nulls chunk
   138  				Query:    " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   139  				Expected: []sql.Row{{int64(183)}},
   140  			},
   141  		},
   142  	},
   143  	{
   144  		Name: "int index",
   145  		SetUpScript: []string{
   146  			"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z));",
   147  			fmt.Sprintf("insert into xy select x, '%s', x from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
   148  			fmt.Sprintf("insert into xy select x, '%s', x  from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
   149  			fmt.Sprintf("insert into xy select x, '%s', NULL  from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
   150  			"analyze table xy",
   151  		},
   152  		Assertions: []queries.ScriptTestAssertion{
   153  			{
   154  				Query:    "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z'",
   155  				Expected: []sql.Row{{152}},
   156  			},
   157  			{
   158  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   159  				Expected: []sql.Row{{float64(30000)}},
   160  			},
   161  			{
   162  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   163  				Expected: []sql.Row{{float64(10000)}},
   164  			},
   165  			{
   166  				// border NULL double count
   167  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   168  				Expected: []sql.Row{{float64(20036)}},
   169  			},
   170  			{
   171  				// max bound count is nulls chunk
   172  				Query:    " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt  where table_name = 'xy' and column_name = 'z'",
   173  				Expected: []sql.Row{{int64(440)}},
   174  			},
   175  		},
   176  	},
   177  	{
   178  		Name: "multiint index",
   179  		SetUpScript: []string{
   180  			"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(x, z));",
   181  			fmt.Sprintf("insert into xy select x, '%s', x+1  from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
   182  			fmt.Sprintf("insert into xy select x, '%s', x+1  from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
   183  			fmt.Sprintf("insert into xy select x, '%s', NULL from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
   184  			"analyze table xy",
   185  		},
   186  		Assertions: []queries.ScriptTestAssertion{
   187  			{
   188  				Query:    "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x,z'",
   189  				Expected: []sql.Row{{155}},
   190  			},
   191  			{
   192  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt  where table_name = 'xy' and column_name = 'x,z'",
   193  				Expected: []sql.Row{{float64(30000)}},
   194  			},
   195  			{
   196  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt  where table_name = 'xy' and column_name = 'x,z'",
   197  				Expected: []sql.Row{{float64(10000)}},
   198  			},
   199  			{
   200  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt  where table_name = 'xy' and column_name = 'x,z'",
   201  				Expected: []sql.Row{{float64(30000)}},
   202  			},
   203  			{
   204  				// max bound count is nulls chunk
   205  				Query:    " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt  where table_name = 'xy' and column_name = 'x,z'",
   206  				Expected: []sql.Row{{int64(1)}},
   207  			},
   208  		},
   209  	},
   210  	{
   211  		Name: "several int index",
   212  		SetUpScript: []string{
   213  			"CREATE table xy (x bigint primary key, y varchar(500), z bigint, key(z), key (x,z));",
   214  			fmt.Sprintf("insert into xy select x, '%s', x+1  from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
   215  		},
   216  		Assertions: []queries.ScriptTestAssertion{
   217  			{
   218  				Query:    " SELECT column_name from information_schema.column_statistics",
   219  				Expected: []sql.Row{},
   220  			},
   221  			{
   222  				Query: "analyze table xy",
   223  			},
   224  			{
   225  				Query:    " SELECT column_name from information_schema.column_statistics",
   226  				Expected: []sql.Row{{"x"}, {"z"}, {"x,z"}},
   227  			},
   228  		},
   229  	},
   230  	{
   231  		Name: "varchar pk",
   232  		SetUpScript: []string{
   233  			"CREATE table xy (x varchar(16) primary key, y varchar(500));",
   234  			fmt.Sprintf("insert into xy select cast (x as char), '%s'  from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
   235  			fmt.Sprintf("insert into xy select cast (x as char), '%s'  from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
   236  			fmt.Sprintf("insert into xy select cast (x as char), '%s' from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
   237  			"analyze table xy",
   238  		},
   239  		Assertions: []queries.ScriptTestAssertion{
   240  			{
   241  				Query:    "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'x'",
   242  				Expected: []sql.Row{{26}},
   243  			},
   244  			{
   245  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt  where table_name = 'xy' and column_name = 'x'",
   246  				Expected: []sql.Row{{float64(30000)}},
   247  			},
   248  			{
   249  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt  where table_name = 'xy' and column_name = 'x'",
   250  				Expected: []sql.Row{{float64(0)}},
   251  			},
   252  			{
   253  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt  where table_name = 'xy' and column_name = 'x'",
   254  				Expected: []sql.Row{{float64(30000)}},
   255  			},
   256  			{
   257  				// max bound count is nulls chunk
   258  				Query:    " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt  where table_name = 'xy' and column_name = 'x'",
   259  				Expected: []sql.Row{{int64(1)}},
   260  			},
   261  		},
   262  	},
   263  	{
   264  		Name: "int-varchar inverse ordinal pk",
   265  		SetUpScript: []string{
   266  			"CREATE table xy (x varchar(16), y varchar(500), z bigint, primary key(z,x));",
   267  			fmt.Sprintf("insert into xy select cast (x as char), '%s', x  from (with recursive inputs(x) as (select 1 union select x+1 from inputs where x < 10000) select * from inputs) dt", fillerVarchar),
   268  			fmt.Sprintf("insert into xy select cast (x as char), '%s', x  from (with recursive inputs(x) as (select 10001 union select x+1 from inputs where x < 20000) select * from inputs) dt", fillerVarchar),
   269  			fmt.Sprintf("insert into xy select cast (x as char), '%s', x from (with recursive inputs(x) as (select 20001 union select x+1 from inputs where x < 30000) select * from inputs) dt", fillerVarchar),
   270  			"analyze table xy",
   271  		},
   272  		Assertions: []queries.ScriptTestAssertion{
   273  			{
   274  				Query:    " SELECT column_name from information_schema.column_statistics",
   275  				Expected: []sql.Row{{"z,x"}},
   276  			},
   277  			{
   278  				Query:    "SELECT json_length(json_extract(histogram, \"$.statistic.buckets\")) from information_schema.column_statistics where column_name = 'z,x'",
   279  				Expected: []sql.Row{{42}},
   280  			},
   281  			{
   282  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.row_count')) as dt  where table_name = 'xy' and column_name = 'z,x'",
   283  				Expected: []sql.Row{{float64(30000)}},
   284  			},
   285  			{
   286  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.null_count')) as dt  where table_name = 'xy' and column_name = 'z,x'",
   287  				Expected: []sql.Row{{float64(0)}},
   288  			},
   289  			{
   290  				Query:    " SELECT sum(cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(cnt int path '$.distinct_count')) as dt  where table_name = 'xy' and column_name = 'z,x'",
   291  				Expected: []sql.Row{{float64(30000)}},
   292  			},
   293  			{
   294  				// max bound count is nulls chunk
   295  				Query:    " SELECT max(bound_cnt) from information_schema.column_statistics join json_table(histogram, '$.statistic.buckets[*]' COLUMNS(bound_cnt int path '$.bound_count')) as dt  where table_name = 'xy' and column_name = 'z,x'",
   296  				Expected: []sql.Row{{int64(1)}},
   297  			},
   298  		},
   299  	},
   300  }
   301  
   302  var DoltStatsIOTests = []queries.ScriptTest{
   303  	{
   304  		Name: "single-table",
   305  		SetUpScript: []string{
   306  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   307  			"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
   308  			"analyze table xy",
   309  		},
   310  		Assertions: []queries.ScriptTestAssertion{
   311  			{
   312  				Query: "select database_name, table_name, index_name, columns, types from dolt_statistics",
   313  				Expected: []sql.Row{
   314  					{"mydb", "xy", "primary", "x", "bigint"},
   315  					{"mydb", "xy", "yz", "y,z", "int,varchar(500)"},
   316  				},
   317  			},
   318  			{
   319  				Query:    fmt.Sprintf("select %s, %s, %s from dolt_statistics", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName),
   320  				Expected: []sql.Row{{uint64(6), uint64(6), uint64(0)}, {uint64(6), uint64(3), uint64(0)}},
   321  			},
   322  			{
   323  				Query: fmt.Sprintf("select %s, %s from dolt_statistics", schema.StatsUpperBoundColName, schema.StatsUpperBoundCntColName),
   324  				Expected: []sql.Row{
   325  					{"5", uint64(1)},
   326  					{"2,a", uint64(1)},
   327  				},
   328  			},
   329  			{
   330  				Query: fmt.Sprintf("select %s, %s, %s, %s, %s from dolt_statistics", schema.StatsMcv1ColName, schema.StatsMcv2ColName, schema.StatsMcv3ColName, schema.StatsMcv4ColName, schema.StatsMcvCountsColName),
   331  				Expected: []sql.Row{
   332  					{"5", "1", "2", "", "1,1,1"},
   333  					{"1,a", "0,a", "2,a", "", "1,4,1"},
   334  				},
   335  			},
   336  		},
   337  	},
   338  	{
   339  		Name: "multi-table",
   340  		SetUpScript: []string{
   341  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   342  			"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
   343  			"CREATE table ab (a bigint primary key, b int, c int, key(b,c));",
   344  			"insert into ab values (0,0,1), (1,0,1), (2,0,1), (3,0,1), (4,1,1), (5,2,1)",
   345  			"analyze table xy",
   346  			"analyze table ab",
   347  		},
   348  		Assertions: []queries.ScriptTestAssertion{
   349  			{
   350  				Query: "select database_name, table_name, index_name, columns, types  from dolt_statistics where table_name = 'xy'",
   351  				Expected: []sql.Row{
   352  					{"mydb", "xy", "primary", "x", "bigint"},
   353  					{"mydb", "xy", "yz", "y,z", "int,varchar(500)"},
   354  				},
   355  			},
   356  			{
   357  				Query:    fmt.Sprintf("select %s, %s, %s from dolt_statistics where table_name = 'xy'", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName),
   358  				Expected: []sql.Row{{uint64(6), uint64(6), uint64(0)}, {uint64(6), uint64(3), uint64(0)}},
   359  			},
   360  			{
   361  				Query: "select `table_name`, `index_name` from dolt_statistics",
   362  				Expected: []sql.Row{
   363  					{"ab", "primary"},
   364  					{"ab", "bc"},
   365  					{"xy", "primary"},
   366  					{"xy", "yz"},
   367  				},
   368  			},
   369  			{
   370  				Query: "select database_name, table_name, index_name, columns, types  from dolt_statistics where table_name = 'ab'",
   371  				Expected: []sql.Row{
   372  					{"mydb", "ab", "primary", "a", "bigint"},
   373  					{"mydb", "ab", "bc", "b,c", "int,int"},
   374  				},
   375  			},
   376  			{
   377  				Query:    fmt.Sprintf("select %s, %s, %s from dolt_statistics where table_name = 'ab'", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName),
   378  				Expected: []sql.Row{{uint64(6), uint64(6), uint64(0)}, {uint64(6), uint64(3), uint64(0)}},
   379  			},
   380  		},
   381  	},
   382  	{
   383  		// only edited chunks are scanned and re-written
   384  		Name: "incremental stats updates",
   385  		SetUpScript: []string{
   386  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   387  			"insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')",
   388  			"analyze table xy",
   389  			"insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'),  (7,1,'a')",
   390  			"analyze table xy",
   391  		},
   392  		Assertions: []queries.ScriptTestAssertion{
   393  			{
   394  				Query: fmt.Sprintf("select %s, %s, %s from dolt_statistics where table_name = 'xy'", schema.StatsRowCountColName, schema.StatsDistinctCountColName, schema.StatsNullCountColName),
   395  				Expected: []sql.Row{
   396  					{uint64(8), uint64(8), uint64(0)},
   397  					{uint64(8), uint64(3), uint64(0)},
   398  				},
   399  			},
   400  		},
   401  	},
   402  	{
   403  		Name: "incremental stats deletes manual analyze",
   404  		SetUpScript: []string{
   405  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   406  			"insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
   407  			"analyze table xy",
   408  		},
   409  		Assertions: []queries.ScriptTestAssertion{
   410  			{
   411  				Query:    "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
   412  				Expected: []sql.Row{{6}, {7}},
   413  			},
   414  			{
   415  				Query: "delete from xy where x > 500",
   416  			},
   417  			{
   418  				Query: "analyze table xy",
   419  			},
   420  			{
   421  				Query:    "select count(*) from dolt_statistics group by table_name, index_name",
   422  				Expected: []sql.Row{{4}, {4}},
   423  			},
   424  		},
   425  	},
   426  	{
   427  		Name: "incremental stats deletes auto",
   428  		SetUpScript: []string{
   429  			"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
   430  			"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
   431  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   432  			"insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
   433  			"analyze table xy",
   434  		},
   435  		Assertions: []queries.ScriptTestAssertion{
   436  			{
   437  				Query:    "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
   438  				Expected: []sql.Row{{6}, {7}},
   439  			},
   440  			{
   441  				Query: "delete from xy where x > 500",
   442  			},
   443  			{
   444  				Query: "call dolt_stats_restart()",
   445  			},
   446  			{
   447  				Query: "select sleep(.1)",
   448  			},
   449  			{
   450  				Query:    "select count(*) from dolt_statistics group by table_name, index_name",
   451  				Expected: []sql.Row{{4}, {4}},
   452  			},
   453  		},
   454  	},
   455  }
   456  
   457  var StatBranchTests = []queries.ScriptTest{
   458  	{
   459  		Name: "multi branch stats",
   460  		SetUpScript: []string{
   461  			"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
   462  			"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
   463  			"set @@PERSIST.dolt_stats_branches = 'main,feat';",
   464  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   465  			"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
   466  			"call dolt_commit('-Am', 'xy')",
   467  			"call dolt_checkout('-b','feat')",
   468  			"CREATE table ab (a bigint primary key, b int, c int, key(b,c));",
   469  			"insert into ab values (0,0,1), (1,0,1), (2,0,1), (3,0,1), (4,1,1), (5,2,1)",
   470  			"call dolt_commit('-Am', 'ab')",
   471  			"call dolt_checkout('main')",
   472  		},
   473  		Assertions: []queries.ScriptTestAssertion{
   474  			{
   475  				Query: "call dolt_stats_restart()",
   476  			},
   477  			{
   478  				Query: "select sleep(.1)",
   479  			},
   480  			{
   481  				Query: "select table_name, index_name, row_count from dolt_statistics",
   482  				Expected: []sql.Row{
   483  					{"xy", "primary", uint64(6)},
   484  					{"xy", "yz", uint64(6)},
   485  				},
   486  			},
   487  			{
   488  				Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
   489  				Expected: []sql.Row{
   490  					{"ab", "primary", uint64(6)},
   491  					{"ab", "bc", uint64(6)},
   492  					{"xy", "primary", uint64(6)},
   493  					{"xy", "yz", uint64(6)},
   494  				},
   495  			},
   496  			{
   497  				Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'",
   498  				Expected: []sql.Row{
   499  					{"xy", "primary", uint64(6)},
   500  					{"xy", "yz", uint64(6)},
   501  				},
   502  			},
   503  			{
   504  				Query: "call dolt_checkout('feat')",
   505  			},
   506  			{
   507  				Query: "insert into xy values ('6',3,'a')",
   508  			},
   509  			{
   510  				Query: "call dolt_commit('-am', 'cm')",
   511  			},
   512  			{
   513  				Query: "select sleep(.1)",
   514  			},
   515  			{
   516  				Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
   517  				Expected: []sql.Row{
   518  					{"ab", "primary", uint64(6)},
   519  					{"ab", "bc", uint64(6)},
   520  					{"xy", "primary", uint64(7)},
   521  					{"xy", "yz", uint64(7)},
   522  				},
   523  			},
   524  			{
   525  				Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'",
   526  				Expected: []sql.Row{
   527  					{"xy", "primary", uint64(6)},
   528  					{"xy", "yz", uint64(6)},
   529  				},
   530  			},
   531  			{
   532  				Query: "call dolt_checkout('feat')",
   533  			},
   534  			{
   535  				Query: "call dolt_stats_stop()",
   536  			},
   537  			{
   538  				Query: "call dolt_stats_drop()",
   539  			},
   540  			{
   541  				Query:    "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
   542  				Expected: []sql.Row{},
   543  			},
   544  			{
   545  				// we dropped 'feat', not 'main'
   546  				Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'",
   547  				Expected: []sql.Row{
   548  					{"xy", "primary", uint64(6)},
   549  					{"xy", "yz", uint64(6)},
   550  				},
   551  			},
   552  		},
   553  	},
   554  	{
   555  		Name: "issue #7710: branch connection string errors",
   556  		SetUpScript: []string{
   557  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   558  			"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
   559  			"use `mydb/main`",
   560  		},
   561  		Assertions: []queries.ScriptTestAssertion{
   562  			{
   563  				Query: "analyze table xy",
   564  				Expected: []sql.Row{
   565  					{"xy", "analyze", "status", "OK"},
   566  				},
   567  			},
   568  		},
   569  	},
   570  }
   571  
   572  var StatProcTests = []queries.ScriptTest{
   573  	{
   574  		Name: "deleting stats removes information_schema access point",
   575  		SetUpScript: []string{
   576  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   577  			"insert into xy values (0,0,0)",
   578  		},
   579  		Assertions: []queries.ScriptTestAssertion{
   580  			{
   581  				Query: "analyze table xy",
   582  			},
   583  			{
   584  				Query:    "select count(*) from information_schema.column_statistics",
   585  				Expected: []sql.Row{{2}},
   586  			},
   587  			{
   588  				Query: "call dolt_stats_drop()",
   589  			},
   590  			{
   591  				Query:    "select count(*) from information_schema.column_statistics",
   592  				Expected: []sql.Row{{0}},
   593  			},
   594  		},
   595  	},
   596  	{
   597  		Name: "restart empty stats panic",
   598  		SetUpScript: []string{
   599  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   600  		},
   601  		Assertions: []queries.ScriptTestAssertion{
   602  			{
   603  				Query: "analyze table xy",
   604  			},
   605  			{
   606  				Query:    "select count(*) from dolt_statistics",
   607  				Expected: []sql.Row{{0}},
   608  			},
   609  			{
   610  				Query:    "set @@GLOBAL.dolt_stats_auto_refresh_threshold = 0",
   611  				Expected: []sql.Row{{}},
   612  			},
   613  			{
   614  				Query:    "set @@GLOBAL.dolt_stats_auto_refresh_interval = 0",
   615  				Expected: []sql.Row{{}},
   616  			},
   617  			{
   618  				// don't panic
   619  				Query: "call dolt_stats_restart()",
   620  			},
   621  			{
   622  				Query: "select sleep(.1)",
   623  			},
   624  			{
   625  				Query: "insert into xy values (0,0,0)",
   626  			},
   627  			{
   628  				Query: "select sleep(.1)",
   629  			},
   630  			{
   631  				Query:    "select count(*) from dolt_statistics",
   632  				Expected: []sql.Row{{2}},
   633  			},
   634  		},
   635  	},
   636  	{
   637  		Name: "basic start, status, stop loop",
   638  		SetUpScript: []string{
   639  			"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
   640  			"insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')",
   641  		},
   642  		Assertions: []queries.ScriptTestAssertion{
   643  			{
   644  				Query:    "select count(*) from dolt_statistics",
   645  				Expected: []sql.Row{{0}},
   646  			},
   647  			{
   648  				Query:    "call dolt_stats_status()",
   649  				Expected: []sql.Row{{"no active stats thread"}},
   650  			},
   651  			// set refresh interval arbitrarily high to avoid updating when we restart
   652  			{
   653  				Query:    "set @@PERSIST.dolt_stats_auto_refresh_interval = 100000;",
   654  				Expected: []sql.Row{{}},
   655  			},
   656  			{
   657  				Query:    "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0",
   658  				Expected: []sql.Row{{}},
   659  			},
   660  			{
   661  				Query: "call dolt_stats_restart()",
   662  			},
   663  			{
   664  				Query:    "call dolt_stats_status()",
   665  				Expected: []sql.Row{{"restarted thread: mydb"}},
   666  			},
   667  			{
   668  				Query:    "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
   669  				Expected: []sql.Row{{}},
   670  			},
   671  			// new restart picks up 0-interval, will start refreshing immediately
   672  			{
   673  				Query: "call dolt_stats_restart()",
   674  			},
   675  			{
   676  				Query: "select sleep(.1)",
   677  			},
   678  			{
   679  				Query:    "call dolt_stats_status()",
   680  				Expected: []sql.Row{{"refreshed mydb"}},
   681  			},
   682  			{
   683  				Query:    "select count(*) from dolt_statistics",
   684  				Expected: []sql.Row{{2}},
   685  			},
   686  			// kill refresh thread
   687  			{
   688  				Query: "call dolt_stats_stop()",
   689  			},
   690  			{
   691  				Query:    "call dolt_stats_status()",
   692  				Expected: []sql.Row{{"cancelled thread: mydb"}},
   693  			},
   694  			// insert without refresh thread will not update stats
   695  			{
   696  				Query: "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'),  (7,1,'a')",
   697  			},
   698  			{
   699  				Query: "select sleep(.1)",
   700  			},
   701  			{
   702  				Query:    "call dolt_stats_status()",
   703  				Expected: []sql.Row{{"cancelled thread: mydb"}},
   704  			},
   705  			// manual analyze will update stats
   706  			{
   707  				Query:    "analyze table xy",
   708  				Expected: []sql.Row{{"xy", "analyze", "status", "OK"}},
   709  			},
   710  			{
   711  				Query:    "call dolt_stats_status()",
   712  				Expected: []sql.Row{{"refreshed mydb"}},
   713  			},
   714  			{
   715  				Query:    "select count(*) from dolt_statistics",
   716  				Expected: []sql.Row{{2}},
   717  			},
   718  			// kill refresh thread and delete stats ref
   719  			{
   720  				Query: "call dolt_stats_drop()",
   721  			},
   722  			{
   723  				Query:    "call dolt_stats_status()",
   724  				Expected: []sql.Row{{"dropped"}},
   725  			},
   726  			{
   727  				Query:    "select count(*) from dolt_statistics",
   728  				Expected: []sql.Row{{0}},
   729  			},
   730  		},
   731  	},
   732  }
   733  
   734  // TestProviderReloadScriptWithEngine runs the test script given with the engine provided.
   735  func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) {
   736  	ctx := enginetest.NewContext(harness)
   737  	err := enginetest.CreateNewConnectionForServerEngine(ctx, e)
   738  	require.NoError(t, err, nil)
   739  
   740  	t.Run(script.Name, func(t *testing.T) {
   741  		for _, statement := range script.SetUpScript {
   742  			if sh, ok := harness.(enginetest.SkippingHarness); ok {
   743  				if sh.SkipQueryTest(statement) {
   744  					t.Skip()
   745  				}
   746  			}
   747  			ctx = ctx.WithQuery(statement)
   748  			enginetest.RunQueryWithContext(t, e, harness, ctx, statement)
   749  		}
   750  
   751  		assertions := script.Assertions
   752  		if len(assertions) == 0 {
   753  			assertions = []queries.ScriptTestAssertion{
   754  				{
   755  					Query:           script.Query,
   756  					Expected:        script.Expected,
   757  					ExpectedErr:     script.ExpectedErr,
   758  					ExpectedIndexes: script.ExpectedIndexes,
   759  				},
   760  			}
   761  		}
   762  
   763  		{
   764  			// reload provider, get disk stats
   765  			eng, ok := e.(*gms.Engine)
   766  			if !ok {
   767  				t.Errorf("expected *gms.Engine but found: %T", e)
   768  			}
   769  
   770  			err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
   771  			require.NoError(t, err)
   772  
   773  			err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", "main")
   774  			require.NoError(t, err)
   775  		}
   776  
   777  		for _, assertion := range assertions {
   778  			t.Run(assertion.Query, func(t *testing.T) {
   779  				if assertion.NewSession {
   780  					th, ok := harness.(enginetest.TransactionHarness)
   781  					require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+
   782  						"but harness doesn't implement TransactionHarness")
   783  					ctx = th.NewSession()
   784  				}
   785  
   786  				if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) {
   787  					t.Skip()
   788  				}
   789  				if assertion.Skip {
   790  					t.Skip()
   791  				}
   792  
   793  				if assertion.ExpectedErr != nil {
   794  					enginetest.AssertErr(t, e, harness, assertion.Query, assertion.ExpectedErr)
   795  				} else if assertion.ExpectedErrStr != "" {
   796  					enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, assertion.ExpectedErrStr)
   797  				} else if assertion.ExpectedWarning != 0 {
   798  					enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query,
   799  						assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount,
   800  						assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck)
   801  				} else if assertion.SkipResultsCheck {
   802  					enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query)
   803  				} else if assertion.CheckIndexedAccess {
   804  					enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
   805  				} else {
   806  					var expected = assertion.Expected
   807  					if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine {
   808  						// TODO: remove this check in the future
   809  						expected = nil
   810  					}
   811  					enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings)
   812  				}
   813  			})
   814  		}
   815  	})
   816  }
   817  
   818  func mustNewStatQual(s string) sql.StatQualifier {
   819  	qual, _ := sql.NewQualifierFromString(s)
   820  	return qual
   821  }