github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/selectivity_test.go

github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/selectivity_test.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package statistics_test
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"math"
    20  	"os"
    21  	"runtime/pprof"
    22  	"strings"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    27  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    28  	. "github.com/whtcorpsinc/check"
    29  	"github.com/whtcorpsinc/errors"
    30  	"github.com/whtcorpsinc/log"
    31  	causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded"
    32  	"github.com/whtcorpsinc/milevadb/causetstore/mockstore"
    33  	"github.com/whtcorpsinc/milevadb/ekv"
    34  	"github.com/whtcorpsinc/milevadb/petri"
    35  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    36  	"github.com/whtcorpsinc/milevadb/soliton/collate"
    37  	"github.com/whtcorpsinc/milevadb/soliton/ranger"
    38  	"github.com/whtcorpsinc/milevadb/soliton/solitonutil"
    39  	"github.com/whtcorpsinc/milevadb/soliton/testkit"
    40  	"github.com/whtcorpsinc/milevadb/soliton/testleak"
    41  	"github.com/whtcorpsinc/milevadb/statistics"
    42  	"github.com/whtcorpsinc/milevadb/statistics/handle"
    43  	"github.com/whtcorpsinc/milevadb/stochastik"
    44  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    45  	"github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx"
    46  	"github.com/whtcorpsinc/milevadb/types"
    47  	"go.uber.org/zap"
    48  	"go.uber.org/zap/zapembedded"
    49  )
    50  
    51  const eps = 1e-9
    52  
    53  var _ = SerialSuites(&testStatsSuite{})
    54  
    55  type testStatsSuite struct {
    56  	causetstore ekv.CausetStorage
    57  	do          *petri.Petri
    58  	hook        *logHook
    59  	testData    solitonutil.TestData
    60  }
    61  
    62  func (s *testStatsSuite) SetUpSuite(c *C) {
    63  	testleak.BeforeTest()
    64  	// Add the hook here to avoid data race.
    65  	s.registerHook()
    66  	var err error
    67  	s.causetstore, s.do, err = newStoreWithBootstrap()
    68  	c.Assert(err, IsNil)
    69  	s.testData, err = solitonutil.LoadTestSuiteData("testdata", "stats_suite")
    70  	c.Assert(err, IsNil)
    71  }
    72  
    73  func (s *testStatsSuite) TearDownSuite(c *C) {
    74  	s.do.Close()
    75  	c.Assert(s.causetstore.Close(), IsNil)
    76  	testleak.AfterTest(c)()
    77  	c.Assert(s.testData.GenerateOutputIfNeeded(), IsNil)
    78  }
    79  
    80  func (s *testStatsSuite) registerHook() {
    81  	conf := &log.Config{Level: os.Getenv("log_level"), File: log.FileLogConfig{}}
    82  	_, r, _ := log.InitLogger(conf)
    83  	s.hook = &logHook{r.Core, ""}
    84  	lg := zap.New(s.hook)
    85  	log.ReplaceGlobals(lg, r)
    86  }
    87  
    88  type logHook struct {
    89  	zapembedded.Core
    90  	results string
    91  }
    92  
    93  func (h *logHook) Write(entry zapembedded.Entry, fields []zapembedded.Field) error {
    94  	message := entry.Message
    95  	if idx := strings.Index(message, "[stats"); idx != -1 {
    96  		h.results = h.results + message
    97  		for _, f := range fields {
    98  			h.results = h.results + ", " + f.Key + "=" + h.field2String(f)
    99  		}
   100  	}
   101  	return nil
   102  }
   103  
   104  func (h *logHook) field2String(field zapembedded.Field) string {
   105  	switch field.Type {
   106  	case zapembedded.StringType:
   107  		return field.String
   108  	case zapembedded.Int64Type, zapembedded.Int32Type, zapembedded.Uint32Type:
   109  		return fmt.Sprintf("%v", field.Integer)
   110  	case zapembedded.Float64Type:
   111  		return fmt.Sprintf("%v", math.Float64frombits(uint64(field.Integer)))
   112  	case zapembedded.StringerType:
   113  		return field.Interface.(fmt.Stringer).String()
   114  	}
   115  	return "not support"
   116  }
   117  
   118  func (h *logHook) Check(e zapembedded.Entry, ce *zapembedded.CheckedEntry) *zapembedded.CheckedEntry {
   119  	if h.Enabled(e.Level) {
   120  		return ce.AddCore(e, h)
   121  	}
   122  	return ce
   123  }
   124  
   125  func newStoreWithBootstrap() (ekv.CausetStorage, *petri.Petri, error) {
   126  	causetstore, err := mockstore.NewMockStore()
   127  	if err != nil {
   128  		return nil, nil, errors.Trace(err)
   129  	}
   130  	stochastik.SetSchemaLease(0)
   131  	stochastik.DisableStats4Test()
   132  	petri.RunAutoAnalyze = false
   133  	do, err := stochastik.BootstrapStochastik(causetstore)
   134  	do.SetStatsUFIDelating(true)
   135  	return causetstore, do, errors.Trace(err)
   136  }
   137  
   138  func cleanEnv(c *C, causetstore ekv.CausetStorage, do *petri.Petri) {
   139  	tk := testkit.NewTestKit(c, causetstore)
   140  	tk.MustInterDirc("use test")
   141  	r := tk.MustQuery("show blocks")
   142  	for _, tb := range r.Rows() {
   143  		blockName := tb[0]
   144  		tk.MustInterDirc(fmt.Sprintf("drop causet %v", blockName))
   145  	}
   146  	tk.MustInterDirc("delete from allegrosql.stats_spacetime")
   147  	tk.MustInterDirc("delete from allegrosql.stats_histograms")
   148  	tk.MustInterDirc("delete from allegrosql.stats_buckets")
   149  	do.StatsHandle().Clear()
   150  }
   151  
   152  // generateIntCauset will generate a causet slice, every dimension is begin from 0, end with num - 1.
   153  // If dimension is x, num is y, the total number of causet is y^x. And This slice is sorted.
   154  func (s *testStatsSuite) generateIntCauset(dimension, num int) ([]types.Causet, error) {
   155  	length := int(math.Pow(float64(num), float64(dimension)))
   156  	ret := make([]types.Causet, length)
   157  	if dimension == 1 {
   158  		for i := 0; i < num; i++ {
   159  			ret[i] = types.NewIntCauset(int64(i))
   160  		}
   161  	} else {
   162  		sc := &stmtctx.StatementContext{TimeZone: time.Local}
   163  		// In this way, we can guarantee the causet is in order.
   164  		for i := 0; i < length; i++ {
   165  			data := make([]types.Causet, dimension)
   166  			j := i
   167  			for k := 0; k < dimension; k++ {
   168  				data[dimension-k-1].SetInt64(int64(j % num))
   169  				j = j / num
   170  			}
   171  			bytes, err := codec.EncodeKey(sc, nil, data...)
   172  			if err != nil {
   173  				return nil, err
   174  			}
   175  			ret[i].SetBytes(bytes)
   176  		}
   177  	}
   178  	return ret, nil
   179  }
   180  
   181  // mockStatsHistogram will create a statistics.Histogram, of which the data is uniform distribution.
   182  func mockStatsHistogram(id int64, values []types.Causet, repeat int64, tp *types.FieldType) *statistics.Histogram {
   183  	ndv := len(values)
   184  	histogram := statistics.NewHistogram(id, int64(ndv), 0, 0, tp, ndv, 0)
   185  	for i := 0; i < ndv; i++ {
   186  		histogram.AppendBucket(&values[i], &values[i], repeat*int64(i+1), repeat)
   187  	}
   188  	return histogram
   189  }
   190  
   191  func mockStatsTable(tbl *perceptron.TableInfo, rowCount int64) *statistics.Block {
   192  	histDefCausl := statistics.HistDefCausl{
   193  		PhysicalID:      tbl.ID,
   194  		HavePhysicalID:  true,
   195  		Count:           rowCount,
   196  		DeferredCausets: make(map[int64]*statistics.DeferredCauset, len(tbl.DeferredCausets)),
   197  		Indices:         make(map[int64]*statistics.Index, len(tbl.Indices)),
   198  	}
   199  	statsTbl := &statistics.Block{
   200  		HistDefCausl: histDefCausl,
   201  	}
   202  	return statsTbl
   203  }
   204  
   205  func (s *testStatsSuite) prepareSelectivity(testKit *testkit.TestKit, c *C) *statistics.Block {
   206  	testKit.MustInterDirc("use test")
   207  	testKit.MustInterDirc("drop causet if exists t")
   208  	testKit.MustInterDirc("create causet t(a int primary key, b int, c int, d int, e int, index idx_cd(c, d), index idx_de(d, e))")
   209  
   210  	is := s.do.SchemaReplicant()
   211  	tb, err := is.TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   212  	c.Assert(err, IsNil)
   213  	tbl := tb.Meta()
   214  
   215  	// mock the statistic causet
   216  	statsTbl := mockStatsTable(tbl, 540)
   217  
   218  	// Set the value of columns' histogram.
   219  	colValues, err := s.generateIntCauset(1, 54)
   220  	c.Assert(err, IsNil)
   221  	for i := 1; i <= 5; i++ {
   222  		statsTbl.DeferredCausets[int64(i)] = &statistics.DeferredCauset{Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(allegrosql.TypeLonglong)), Info: tbl.DeferredCausets[i-1]}
   223  	}
   224  
   225  	// Set the value of two indices' histograms.
   226  	idxValues, err := s.generateIntCauset(2, 3)
   227  	c.Assert(err, IsNil)
   228  	tp := types.NewFieldType(allegrosql.TypeBlob)
   229  	statsTbl.Indices[1] = &statistics.Index{Histogram: *mockStatsHistogram(1, idxValues, 60, tp), Info: tbl.Indices[0]}
   230  	statsTbl.Indices[2] = &statistics.Index{Histogram: *mockStatsHistogram(2, idxValues, 60, tp), Info: tbl.Indices[1]}
   231  	return statsTbl
   232  }
   233  
   234  func (s *testStatsSuite) TestSelectivity(c *C) {
   235  	defer cleanEnv(c, s.causetstore, s.do)
   236  	testKit := testkit.NewTestKit(c, s.causetstore)
   237  	statsTbl := s.prepareSelectivity(testKit, c)
   238  	is := s.do.SchemaReplicant()
   239  
   240  	longExpr := "0 < a and a = 1 "
   241  	for i := 1; i < 64; i++ {
   242  		longExpr += fmt.Sprintf(" and a > %d ", i)
   243  	}
   244  	tests := []struct {
   245  		exprs       string
   246  		selectivity float64
   247  	}{
   248  		{
   249  			exprs:       "a > 0 and a < 2",
   250  			selectivity: 0.01851851851,
   251  		},
   252  		{
   253  			exprs:       "a >= 1 and a < 2",
   254  			selectivity: 0.01851851851,
   255  		},
   256  		{
   257  			exprs:       "a >= 1 and b > 1 and a < 2",
   258  			selectivity: 0.01783264746,
   259  		},
   260  		{
   261  			exprs:       "a >= 1 and c > 1 and a < 2",
   262  			selectivity: 0.00617283950,
   263  		},
   264  		{
   265  			exprs:       "a >= 1 and c >= 1 and a < 2",
   266  			selectivity: 0.01234567901,
   267  		},
   268  		{
   269  			exprs:       "d = 0 and e = 1",
   270  			selectivity: 0.11111111111,
   271  		},
   272  		{
   273  			exprs:       "b > 1",
   274  			selectivity: 0.96296296296,
   275  		},
   276  		{
   277  			exprs:       "a > 1 and b < 2 and c > 3 and d < 4 and e > 5",
   278  			selectivity: 0,
   279  		},
   280  		{
   281  			exprs:       longExpr,
   282  			selectivity: 0.001,
   283  		},
   284  	}
   285  
   286  	ctx := context.Background()
   287  	for _, tt := range tests {
   288  		allegrosql := "select * from t where " + tt.exprs
   289  		comment := Commentf("for %s", tt.exprs)
   290  		sctx := testKit.Se.(stochastikctx.Context)
   291  		stmts, err := stochastik.Parse(sctx, allegrosql)
   292  		c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt.exprs))
   293  		c.Assert(stmts, HasLen, 1)
   294  
   295  		err = causetembedded.Preprocess(sctx, stmts[0], is)
   296  		c.Assert(err, IsNil, comment)
   297  		p, _, err := causetembedded.BuildLogicalCauset(ctx, sctx, stmts[0], is)
   298  		c.Assert(err, IsNil, Commentf("error %v, for building plan, expr %s", err, tt.exprs))
   299  
   300  		sel := p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection)
   301  		ds := sel.Children()[0].(*causetembedded.DataSource)
   302  
   303  		histDefCausl := statsTbl.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.Schema().DeferredCausets)
   304  
   305  		ratio, _, err := histDefCausl.Selectivity(sctx, sel.Conditions, nil)
   306  		c.Assert(err, IsNil, comment)
   307  		c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))
   308  
   309  		histDefCausl.Count *= 10
   310  		ratio, _, err = histDefCausl.Selectivity(sctx, sel.Conditions, nil)
   311  		c.Assert(err, IsNil, comment)
   312  		c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))
   313  	}
   314  }
   315  
   316  // TestDiscreteDistribution tests the estimation for discrete data distribution. This is more common when the index
   317  // consists several columns, and the first column has small NDV.
   318  func (s *testStatsSuite) TestDiscreteDistribution(c *C) {
   319  	defer cleanEnv(c, s.causetstore, s.do)
   320  	testKit := testkit.NewTestKit(c, s.causetstore)
   321  	testKit.MustInterDirc("use test")
   322  	testKit.MustInterDirc("drop causet if exists t")
   323  	testKit.MustInterDirc("create causet t(a char(10), b int, key idx(a, b))")
   324  	for i := 0; i < 499; i++ {
   325  		testKit.MustInterDirc(fmt.Sprintf("insert into t values ('cn', %d)", i))
   326  	}
   327  	for i := 0; i < 10; i++ {
   328  		testKit.MustInterDirc("insert into t values ('tw', 0)")
   329  	}
   330  	testKit.MustInterDirc("analyze causet t")
   331  	var (
   332  		input  []string
   333  		output [][]string
   334  	)
   335  	s.testData.GetTestCases(c, &input, &output)
   336  	for i, tt := range input {
   337  		s.testData.OnRecord(func() {
   338  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
   339  		})
   340  		testKit.MustQuery(tt).Check(testkit.Rows(output[i]...))
   341  	}
   342  }
   343  
   344  func (s *testStatsSuite) TestSelectCombinedLowBound(c *C) {
   345  	defer cleanEnv(c, s.causetstore, s.do)
   346  	testKit := testkit.NewTestKit(c, s.causetstore)
   347  	testKit.MustInterDirc("use test")
   348  	testKit.MustInterDirc("drop causet if exists t")
   349  	testKit.MustInterDirc("create causet t(id int auto_increment, kid int, pid int, primary key(id), key(kid, pid))")
   350  	testKit.MustInterDirc("insert into t (kid, pid) values (1,2), (1,3), (1,4),(1, 11), (1, 12), (1, 13), (1, 14), (2, 2), (2, 3), (2, 4)")
   351  	testKit.MustInterDirc("analyze causet t")
   352  	var (
   353  		input  []string
   354  		output [][]string
   355  	)
   356  	s.testData.GetTestCases(c, &input, &output)
   357  	for i, tt := range input {
   358  		s.testData.OnRecord(func() {
   359  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
   360  		})
   361  		testKit.MustQuery(tt).Check(testkit.Rows(output[i]...))
   362  	}
   363  }
   364  
   365  func getRange(start, end int64) []*ranger.Range {
   366  	ran := &ranger.Range{
   367  		LowVal:  []types.Causet{types.NewIntCauset(start)},
   368  		HighVal: []types.Causet{types.NewIntCauset(end)},
   369  	}
   370  	return []*ranger.Range{ran}
   371  }
   372  
   373  func (s *testStatsSuite) TestOutOfRangeEQEstimation(c *C) {
   374  	defer cleanEnv(c, s.causetstore, s.do)
   375  	testKit := testkit.NewTestKit(c, s.causetstore)
   376  	testKit.MustInterDirc("use test")
   377  	testKit.MustInterDirc("drop causet if exists t")
   378  	testKit.MustInterDirc("create causet t(a int)")
   379  	for i := 0; i < 1000; i++ {
   380  		testKit.MustInterDirc(fmt.Sprintf("insert into t values (%v)", i/4)) // 0 ~ 249
   381  	}
   382  	testKit.MustInterDirc("analyze causet t")
   383  
   384  	h := s.do.StatsHandle()
   385  	causet, err := s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   386  	c.Assert(err, IsNil)
   387  	statsTbl := h.GetTableStats(causet.Meta())
   388  	sc := &stmtctx.StatementContext{}
   389  	col := statsTbl.DeferredCausets[causet.Meta().DeferredCausets[0].ID]
   390  	count, err := col.GetDeferredCausetRowCount(sc, getRange(250, 250), 0, false)
   391  	c.Assert(err, IsNil)
   392  	c.Assert(count, Equals, float64(0))
   393  
   394  	for i := 0; i < 8; i++ {
   395  		count, err := col.GetDeferredCausetRowCount(sc, getRange(250, 250), int64(i+1), false)
   396  		c.Assert(err, IsNil)
   397  		c.Assert(count, Equals, math.Min(float64(i+1), 4)) // estRows must be less than modifyCnt
   398  	}
   399  }
   400  
   401  func (s *testStatsSuite) TestEstimationForUnknownValues(c *C) {
   402  	defer cleanEnv(c, s.causetstore, s.do)
   403  	testKit := testkit.NewTestKit(c, s.causetstore)
   404  	testKit.MustInterDirc("use test")
   405  	testKit.MustInterDirc("drop causet if exists t")
   406  	testKit.MustInterDirc("create causet t(a int, b int, key idx(a, b))")
   407  	testKit.MustInterDirc("analyze causet t")
   408  	for i := 0; i < 10; i++ {
   409  		testKit.MustInterDirc(fmt.Sprintf("insert into t values (%d, %d)", i, i))
   410  	}
   411  	h := s.do.StatsHandle()
   412  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   413  	testKit.MustInterDirc("analyze causet t")
   414  	for i := 0; i < 10; i++ {
   415  		testKit.MustInterDirc(fmt.Sprintf("insert into t values (%d, %d)", i+10, i+10))
   416  	}
   417  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   418  	c.Assert(h.UFIDelate(s.do.SchemaReplicant()), IsNil)
   419  	causet, err := s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   420  	c.Assert(err, IsNil)
   421  	statsTbl := h.GetTableStats(causet.Meta())
   422  
   423  	sc := &stmtctx.StatementContext{}
   424  	colID := causet.Meta().DeferredCausets[0].ID
   425  	count, err := statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(30, 30))
   426  	c.Assert(err, IsNil)
   427  	c.Assert(count, Equals, 0.2)
   428  
   429  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(9, 30))
   430  	c.Assert(err, IsNil)
   431  	c.Assert(count, Equals, 2.4000000000000004)
   432  
   433  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(9, math.MaxInt64))
   434  	c.Assert(err, IsNil)
   435  	c.Assert(count, Equals, 2.4000000000000004)
   436  
   437  	idxID := causet.Meta().Indices[0].ID
   438  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(30, 30))
   439  	c.Assert(err, IsNil)
   440  	c.Assert(count, Equals, 0.2)
   441  
   442  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(9, 30))
   443  	c.Assert(err, IsNil)
   444  	c.Assert(count, Equals, 2.2)
   445  
   446  	testKit.MustInterDirc("truncate causet t")
   447  	testKit.MustInterDirc("insert into t values (null, null)")
   448  	testKit.MustInterDirc("analyze causet t")
   449  	causet, err = s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   450  	c.Assert(err, IsNil)
   451  	statsTbl = h.GetTableStats(causet.Meta())
   452  
   453  	colID = causet.Meta().DeferredCausets[0].ID
   454  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(1, 30))
   455  	c.Assert(err, IsNil)
   456  	c.Assert(count, Equals, 0.0)
   457  
   458  	testKit.MustInterDirc("drop causet t")
   459  	testKit.MustInterDirc("create causet t(a int, b int, index idx(b))")
   460  	testKit.MustInterDirc("insert into t values (1,1)")
   461  	testKit.MustInterDirc("analyze causet t")
   462  	causet, err = s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   463  	c.Assert(err, IsNil)
   464  	statsTbl = h.GetTableStats(causet.Meta())
   465  
   466  	colID = causet.Meta().DeferredCausets[0].ID
   467  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(2, 2))
   468  	c.Assert(err, IsNil)
   469  	c.Assert(count, Equals, 0.0)
   470  
   471  	idxID = causet.Meta().Indices[0].ID
   472  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2))
   473  	c.Assert(err, IsNil)
   474  	c.Assert(count, Equals, 0.0)
   475  }
   476  
   477  func (s *testStatsSuite) TestEstimationUniqueKeyEqualConds(c *C) {
   478  	defer cleanEnv(c, s.causetstore, s.do)
   479  	testKit := testkit.NewTestKit(c, s.causetstore)
   480  	testKit.MustInterDirc("use test")
   481  	testKit.MustInterDirc("drop causet if exists t")
   482  	testKit.MustInterDirc("create causet t(a int, b int, c int, unique key(b))")
   483  	testKit.MustInterDirc("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7)")
   484  	testKit.MustInterDirc("analyze causet t with 4 cmsketch width, 1 cmsketch depth;")
   485  	causet, err := s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   486  	c.Assert(err, IsNil)
   487  	statsTbl := s.do.StatsHandle().GetTableStats(causet.Meta())
   488  
   489  	sc := &stmtctx.StatementContext{}
   490  	idxID := causet.Meta().Indices[0].ID
   491  	count, err := statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(7, 7))
   492  	c.Assert(err, IsNil)
   493  	c.Assert(count, Equals, 1.0)
   494  
   495  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(6, 6))
   496  	c.Assert(err, IsNil)
   497  	c.Assert(count, Equals, 1.0)
   498  
   499  	colID := causet.Meta().DeferredCausets[0].ID
   500  	count, err = statsTbl.GetRowCountByIntDeferredCausetRanges(sc, colID, getRange(7, 7))
   501  	c.Assert(err, IsNil)
   502  	c.Assert(count, Equals, 1.0)
   503  
   504  	count, err = statsTbl.GetRowCountByIntDeferredCausetRanges(sc, colID, getRange(6, 6))
   505  	c.Assert(err, IsNil)
   506  	c.Assert(count, Equals, 1.0)
   507  }
   508  
   509  func (s *testStatsSuite) TestPrimaryKeySelectivity(c *C) {
   510  	defer cleanEnv(c, s.causetstore, s.do)
   511  	testKit := testkit.NewTestKit(c, s.causetstore)
   512  	testKit.MustInterDirc("use test")
   513  	testKit.MustInterDirc("drop causet if exists t")
   514  	testKit.MustInterDirc("set @@milevadb_enable_clustered_index=0")
   515  	testKit.MustInterDirc("create causet t(a char(10) primary key, b int)")
   516  	var input, output [][]string
   517  	s.testData.GetTestCases(c, &input, &output)
   518  	for i, ts := range input {
   519  		for j, tt := range ts {
   520  			if j != len(ts)-1 {
   521  				testKit.MustInterDirc(tt)
   522  			}
   523  			s.testData.OnRecord(func() {
   524  				if j == len(ts)-1 {
   525  					output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
   526  				}
   527  			})
   528  			if j == len(ts)-1 {
   529  				testKit.MustQuery(tt).Check(testkit.Rows(output[i]...))
   530  			}
   531  		}
   532  	}
   533  }
   534  
   535  func BenchmarkSelectivity(b *testing.B) {
   536  	c := &C{}
   537  	s := &testStatsSuite{}
   538  	s.SetUpSuite(c)
   539  	defer s.TearDownSuite(c)
   540  
   541  	testKit := testkit.NewTestKit(c, s.causetstore)
   542  	statsTbl := s.prepareSelectivity(testKit, c)
   543  	is := s.do.SchemaReplicant()
   544  	exprs := "a > 1 and b < 2 and c > 3 and d < 4 and e > 5"
   545  	allegrosql := "select * from t where " + exprs
   546  	comment := Commentf("for %s", exprs)
   547  	sctx := testKit.Se.(stochastikctx.Context)
   548  	stmts, err := stochastik.Parse(sctx, allegrosql)
   549  	c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, exprs))
   550  	c.Assert(stmts, HasLen, 1)
   551  	err = causetembedded.Preprocess(sctx, stmts[0], is)
   552  	c.Assert(err, IsNil, comment)
   553  	p, _, err := causetembedded.BuildLogicalCauset(context.Background(), sctx, stmts[0], is)
   554  	c.Assert(err, IsNil, Commentf("error %v, for building plan, expr %s", err, exprs))
   555  
   556  	file, err := os.Create("cpu.profile")
   557  	c.Assert(err, IsNil)
   558  	defer file.Close()
   559  	pprof.StartCPUProfile(file)
   560  
   561  	b.Run("Selectivity", func(b *testing.B) {
   562  		b.ResetTimer()
   563  		for i := 0; i < b.N; i++ {
   564  			_, _, err := statsTbl.Selectivity(sctx, p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection).Conditions, nil)
   565  			c.Assert(err, IsNil)
   566  		}
   567  		b.ReportAllocs()
   568  	})
   569  	pprof.StopCPUProfile()
   570  }
   571  
   572  func (s *testStatsSuite) TestDeferredCausetIndexNullEstimation(c *C) {
   573  	defer cleanEnv(c, s.causetstore, s.do)
   574  	testKit := testkit.NewTestKit(c, s.causetstore)
   575  	testKit.MustInterDirc("use test")
   576  	testKit.MustInterDirc("drop causet if exists t")
   577  	testKit.MustInterDirc("create causet t(a int, b int, c int, index idx_b(b), index idx_c_a(c, a))")
   578  	testKit.MustInterDirc("insert into t values(1,null,1),(2,null,2),(3,3,3),(4,null,4),(null,null,null);")
   579  	h := s.do.StatsHandle()
   580  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   581  	testKit.MustInterDirc("analyze causet t")
   582  	var (
   583  		input  []string
   584  		output [][]string
   585  	)
   586  	s.testData.GetTestCases(c, &input, &output)
   587  	for i := 0; i < 5; i++ {
   588  		s.testData.OnRecord(func() {
   589  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
   590  		})
   591  		testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   592  	}
   593  	// Make sure column stats has been loaded.
   594  	testKit.MustInterDirc(`explain select * from t where a is null`)
   595  	c.Assert(h.LoadNeededHistograms(), IsNil)
   596  	for i := 5; i < len(input); i++ {
   597  		s.testData.OnRecord(func() {
   598  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
   599  		})
   600  		testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   601  	}
   602  }
   603  
   604  func (s *testStatsSuite) TestUniqCompEqualEst(c *C) {
   605  	defer cleanEnv(c, s.causetstore, s.do)
   606  	testKit := testkit.NewTestKit(c, s.causetstore)
   607  	testKit.MustInterDirc("use test")
   608  	testKit.MustInterDirc("drop causet if exists t")
   609  	testKit.MustInterDirc("create causet t(a int, b int, primary key(a, b))")
   610  	testKit.MustInterDirc("insert into t values(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9),(1,10)")
   611  	h := s.do.StatsHandle()
   612  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   613  	testKit.MustInterDirc("analyze causet t")
   614  	var (
   615  		input  []string
   616  		output [][]string
   617  	)
   618  	s.testData.GetTestCases(c, &input, &output)
   619  	for i := 0; i < 1; i++ {
   620  		s.testData.OnRecord(func() {
   621  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
   622  		})
   623  		testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   624  	}
   625  }
   626  
   627  func (s *testStatsSuite) TestSelectivityGreedyAlgo(c *C) {
   628  	nodes := make([]*statistics.StatsNode, 3)
   629  	nodes[0] = statistics.MockStatsNode(1, 3, 2)
   630  	nodes[1] = statistics.MockStatsNode(2, 5, 2)
   631  	nodes[2] = statistics.MockStatsNode(3, 9, 2)
   632  
   633  	// Sets should not overlap on mask, so only nodes[0] is chosen.
   634  	usedSets := statistics.GetUsableSetsByGreedy(nodes)
   635  	c.Assert(len(usedSets), Equals, 1)
   636  	c.Assert(usedSets[0].ID, Equals, int64(1))
   637  
   638  	nodes[0], nodes[1] = nodes[1], nodes[0]
   639  	// Sets chosen should be sblock, so the returned node is still the one with ID 1.
   640  	usedSets = statistics.GetUsableSetsByGreedy(nodes)
   641  	c.Assert(len(usedSets), Equals, 1)
   642  	c.Assert(usedSets[0].ID, Equals, int64(1))
   643  }
   644  
   645  func (s *testStatsSuite) TestDefCauslationDeferredCausetEstimate(c *C) {
   646  	defer cleanEnv(c, s.causetstore, s.do)
   647  	tk := testkit.NewTestKit(c, s.causetstore)
   648  	collate.SetNewDefCauslationEnabledForTest(true)
   649  	defer collate.SetNewDefCauslationEnabledForTest(false)
   650  	tk.MustInterDirc("use test")
   651  	tk.MustInterDirc("drop causet if exists t")
   652  	tk.MustInterDirc("create causet t(a varchar(20) collate utf8mb4_general_ci)")
   653  	tk.MustInterDirc("insert into t values('aaa'), ('bbb'), ('AAA'), ('BBB')")
   654  	h := s.do.StatsHandle()
   655  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   656  	tk.MustInterDirc("analyze causet t")
   657  	tk.MustInterDirc("explain select * from t where a = 'aaa'")
   658  	c.Assert(h.LoadNeededHistograms(), IsNil)
   659  	var (
   660  		input  []string
   661  		output [][]string
   662  	)
   663  	s.testData.GetTestCases(c, &input, &output)
   664  	for i := 0; i < len(input); i++ {
   665  		s.testData.OnRecord(func() {
   666  			output[i] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[i]).Rows())
   667  		})
   668  		tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   669  	}
   670  }
   671  
   672  // TestDNFCondSelectivity tests selectivity calculation with DNF conditions covered by using independence assumption.
   673  func (s *testStatsSuite) TestDNFCondSelectivity(c *C) {
   674  	defer cleanEnv(c, s.causetstore, s.do)
   675  	testKit := testkit.NewTestKit(c, s.causetstore)
   676  
   677  	testKit.MustInterDirc("use test")
   678  	testKit.MustInterDirc("drop causet if exists t")
   679  	testKit.MustInterDirc("create causet t(a int, b int, c int, d int)")
   680  	testKit.MustInterDirc("insert into t value(1,5,4,4),(3,4,1,8),(4,2,6,10),(6,7,2,5),(7,1,4,9),(8,9,8,3),(9,1,9,1),(10,6,6,2)")
   681  	testKit.MustInterDirc("alter causet t add index (b)")
   682  	testKit.MustInterDirc("alter causet t add index (d)")
   683  	testKit.MustInterDirc(`analyze causet t`)
   684  
   685  	ctx := context.Background()
   686  	is := s.do.SchemaReplicant()
   687  	h := s.do.StatsHandle()
   688  	tb, err := is.TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   689  	c.Assert(err, IsNil)
   690  	tblInfo := tb.Meta()
   691  	statsTbl := h.GetTableStats(tblInfo)
   692  
   693  	var (
   694  		input  []string
   695  		output []struct {
   696  			ALLEGROALLEGROSQL string
   697  			Selectivity       float64
   698  		}
   699  	)
   700  	s.testData.GetTestCases(c, &input, &output)
   701  	for i, tt := range input {
   702  		sctx := testKit.Se.(stochastikctx.Context)
   703  		stmts, err := stochastik.Parse(sctx, tt)
   704  		c.Assert(err, IsNil, Commentf("error %v, for allegrosql %s", err, tt))
   705  		c.Assert(stmts, HasLen, 1)
   706  
   707  		err = causetembedded.Preprocess(sctx, stmts[0], is)
   708  		c.Assert(err, IsNil, Commentf("error %v, for allegrosql %s", err, tt))
   709  		p, _, err := causetembedded.BuildLogicalCauset(ctx, sctx, stmts[0], is)
   710  		c.Assert(err, IsNil, Commentf("error %v, for building plan, allegrosql %s", err, tt))
   711  
   712  		sel := p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection)
   713  		ds := sel.Children()[0].(*causetembedded.DataSource)
   714  
   715  		histDefCausl := statsTbl.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.Schema().DeferredCausets)
   716  
   717  		ratio, _, err := histDefCausl.Selectivity(sctx, sel.Conditions, nil)
   718  		c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt))
   719  		s.testData.OnRecord(func() {
   720  			output[i].ALLEGROALLEGROSQL = tt
   721  			output[i].Selectivity = ratio
   722  		})
   723  		c.Assert(math.Abs(ratio-output[i].Selectivity) < eps, IsTrue,
   724  			Commentf("for %s, needed: %v, got: %v", tt, output[i].Selectivity, ratio))
   725  	}
   726  
   727  	// Test issue 19981
   728  	testKit.MustInterDirc("select * from t where _milevadb_rowid is null or _milevadb_rowid > 7")
   729  }