github.com/dolthub/go-mysql-server@v0.18.0/enginetest/queries/stats_queries.go (about)

     1  // Copyright 2023 DoltHub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package queries
    16  
    17  import (
    18  	"time"
    19  
    20  	"github.com/dolthub/go-mysql-server/sql"
    21  	"github.com/dolthub/go-mysql-server/sql/stats"
    22  	"github.com/dolthub/go-mysql-server/sql/types"
    23  )
    24  
    25  var StatisticsQueries = []ScriptTest{
    26  	{
    27  		Name: "analyze single int column",
    28  		SetUpScript: []string{
    29  			"CREATE TABLE t (i bigint primary key)",
    30  			"INSERT INTO t VALUES (1), (2), (3)",
    31  			"ANALYZE TABLE t",
    32  		},
    33  		Assertions: []ScriptTestAssertion{
    34  			{
    35  				SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result
    36  				Query:                         "SELECT * FROM information_schema.column_statistics",
    37  				Expected: []sql.Row{
    38  					{"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 24, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []*stats.Bucket{
    39  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(1)}, nil, nil),
    40  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(2)}, nil, nil),
    41  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(3)}, nil, nil),
    42  					}, sql.IndexClassDefault, nil),
    43  					},
    44  				},
    45  			},
    46  		},
    47  	},
    48  	{
    49  		Name: "analyze update/drop",
    50  		SetUpScript: []string{
    51  			"CREATE TABLE t (i bigint primary key, j bigint, key(j))",
    52  			"INSERT INTO t VALUES (1, 4), (2, 5), (3, 6)",
    53  		},
    54  		Assertions: []ScriptTestAssertion{
    55  			{
    56  				Query:    "analyze table t update histogram on (i) using data '{\"row_count\": 40, \"distinct_count\": 40, \"null_count\": 1, \"buckets\": [{\"row_count\": 20, \"distinct_count\": 20, \"upper_bound\": [50], \"bound_count\": 1}, {\"row_count\": 20, \"distinct_count\": 20, \"upper_bound\": [80], \"bound_count\": 1}]}'",
    57  				Expected: []sql.Row{{"t", "histogram", "status", "OK"}},
    58  			},
    59  			{
    60  				SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result
    61  				Query:                         "SELECT * FROM information_schema.column_statistics",
    62  				Expected: []sql.Row{
    63  					{"mydb", "t", "i", stats.NewStatistic(40, 40, 1, 0, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []*stats.Bucket{
    64  						stats.NewHistogramBucket(20, 20, 0, 1, sql.Row{float64(50)}, nil, nil),
    65  						stats.NewHistogramBucket(20, 20, 0, 1, sql.Row{float64(80)}, nil, nil),
    66  					}, sql.IndexClassDefault, nil),
    67  					},
    68  				},
    69  			},
    70  			{
    71  				Query:    "analyze table t drop histogram on (i)",
    72  				Expected: []sql.Row{{"t", "histogram", "status", "OK"}},
    73  			},
    74  			{
    75  				Query:    "SELECT * FROM information_schema.column_statistics",
    76  				Expected: []sql.Row{},
    77  			},
    78  		},
    79  	},
    80  	{
    81  		Name: "analyze two int columns",
    82  		SetUpScript: []string{
    83  			"CREATE TABLE t (i bigint primary key, j bigint, key(j))",
    84  			"INSERT INTO t VALUES (1, 4), (2, 5), (3, 6)",
    85  			"ANALYZE TABLE t",
    86  		},
    87  		Assertions: []ScriptTestAssertion{
    88  			{
    89  				SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result
    90  				Query:                         "SELECT * FROM information_schema.column_statistics",
    91  				Expected: []sql.Row{
    92  					{"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []*stats.Bucket{
    93  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(1)}, nil, []sql.Row{}),
    94  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(2)}, nil, []sql.Row{}),
    95  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(3)}, nil, []sql.Row{}),
    96  					}, sql.IndexClassDefault, nil),
    97  					},
    98  					{"mydb", "t", "j", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "t", "j"), []string{"j"}, []sql.Type{types.Int64}, []*stats.Bucket{
    99  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(4)}, nil, []sql.Row{}),
   100  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(5)}, nil, []sql.Row{}),
   101  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(6)}, nil, []sql.Row{}),
   102  					}, sql.IndexClassDefault, nil),
   103  					},
   104  				},
   105  			},
   106  		},
   107  	},
   108  	{
   109  		Name: "analyze float columns",
   110  		SetUpScript: []string{
   111  			"CREATE TABLE t (i double primary key)",
   112  			"INSERT INTO t VALUES (1.25), (45.25), (7.5), (10.5)",
   113  			"ANALYZE TABLE t",
   114  		},
   115  		Assertions: []ScriptTestAssertion{
   116  			{
   117  				SkipResultCheckOnServerEngine: true, // the non-interface types are not identified over the wire result
   118  				Query:                         "SELECT * FROM information_schema.column_statistics",
   119  				Expected: []sql.Row{
   120  					{"mydb", "t", "i", stats.NewStatistic(4, 4, 0, 32, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Float64}, []*stats.Bucket{
   121  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(1.25)}, nil, []sql.Row{}),
   122  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(7.5)}, nil, []sql.Row{}),
   123  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(10.5)}, nil, []sql.Row{}),
   124  						stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(45.25)}, nil, []sql.Row{}),
   125  					}, sql.IndexClassDefault, nil),
   126  					},
   127  				},
   128  			},
   129  		},
   130  	},
   131  	{
   132  		Name: "analyze empty table creates stats with 0s",
   133  		SetUpScript: []string{
   134  			"CREATE TABLE t (i float)",
   135  			"ANALYZE TABLE t",
   136  		},
   137  		Assertions: []ScriptTestAssertion{
   138  			{
   139  				Query:    "SELECT * FROM information_schema.column_statistics",
   140  				Expected: []sql.Row{},
   141  			},
   142  		},
   143  	},
   144  	{
   145  		Name: "analyze columns that can't be converted to float throws error",
   146  		SetUpScript: []string{
   147  			"CREATE TABLE t (t longtext)",
   148  			"INSERT INTO t VALUES ('not a number')",
   149  			"ANALYZE TABLE t",
   150  		},
   151  		Assertions: []ScriptTestAssertion{
   152  			{
   153  				Query:    "SELECT * FROM information_schema.column_statistics",
   154  				Expected: []sql.Row{},
   155  			},
   156  		},
   157  	},
   158  	{
   159  		Query: `
   160  		SELECT
   161  			COLUMN_NAME,
   162  			JSON_EXTRACT(HISTOGRAM, '$."number-of-buckets-specified"')
   163  		FROM information_schema.COLUMN_STATISTICS
   164  		WHERE SCHEMA_NAME = 'mydb'
   165  		AND TABLE_NAME = 'mytable'
   166  		`,
   167  		Expected: nil,
   168  	},
   169  }
   170  
   171  type StatsPlanTest struct {
   172  	Name        string
   173  	SetUpScript []string
   174  	Query       string
   175  	Expected    []sql.Row
   176  	IndexName   string
   177  }
   178  
   179  var StatsIndexTests = []ScriptTest{
   180  	{
   181  		// todo deprecate: this is wrong, full prefix match is preferable
   182  		// if not index statistics should be used to indicate
   183  		Name: "choose range over full prefix match",
   184  		SetUpScript: []string{
   185  			"create table xy (x int, y int, z varchar(36) default(uuid()), w varchar(10), key (z), key (y,w), key(x,y))",
   186  			"insert into xy (x,y,w) values (1, 1, 'a'), (2,1,'a'), (3,1,'b'),(4,2,'b'),(5,2,'c')",
   187  			`
   188  analyze table xy update histogram on (x,y) using data
   189  '{
   190      "qualifier": {
   191          "database": "mydb",
   192          "table": "xy",
   193         "index": "xy"
   194      },
   195      "types:":["bigint","bigint"],
   196      "columns":["x", "y"],
   197      "buckets": [
   198        {"upper_bound": [1,1], "row_count": 1},
   199        {"upper_bound": [2,1], "row_count": 1},
   200        {"upper_bound": [3,1], "row_count": 1},
   201        {"upper_bound": [4,2], "row_count": 1},
   202        {"upper_bound": [5,2], "row_count": 1}
   203  ]
   204  }'`,
   205  			`analyze table xy update histogram on (y,w) using data '
   206  {
   207      "qualifier": {
   208          "database": "mydb",
   209          "table": "xy",
   210          "index": "yw"
   211        },    "types:":["bigint","varchar(10)"],
   212      "columns":["y", "w"],
   213      "buckets":[
   214          {"upper_bound": [1,"a"], "row_count": 2},
   215          {"upper_bound": [1,"b"], "row_count": 1},
   216          {"upper_bound": [2,"b"], "row_count": 1},
   217          {"upper_bound": [2,"c"], "row_count": 1}
   218      ]
   219  }'`,
   220  			`analyze table xy update histogram on (z) using data '
   221  {
   222      "qualifier": {
   223          "database": "mydb",
   224          "table": "xy",
   225          "index": "z"
   226        },    "types:":["varchar(36)"],
   227      "columns":["z"],
   228      "buckets":[
   229          {"upper_bound": ["1"], "row_count": 1},
   230          {"upper_bound": ["2"], "row_count": 1},
   231          {"upper_bound": ["3"], "row_count": 1},
   232          {"upper_bound": ["4"], "row_count": 1},
   233  		{"upper_bound": ["5"], "row_count": 1}
   234      ]
   235  }'`,
   236  		},
   237  		Assertions: []ScriptTestAssertion{
   238  			{
   239  				Query:           "select * from xy where x > 4 and y = 1 and w = 'a'",
   240  				Expected:        []sql.Row{},
   241  				ExpectedIndexes: []string{"yw"},
   242  			},
   243  		},
   244  	},
   245  }