
     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    14  package statistics_test
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"math"
    20  	"os"
    21  	"runtime/pprof"
    22  	"strings"
    23  	"testing"
    24  	"time"
    26  	""
    27  	""
    28  	. ""
    29  	""
    30  	""
    31  	causetembedded ""
    32  	""
    33  	""
    34  	""
    35  	""
    36  	""
    37  	""
    38  	""
    39  	""
    40  	""
    41  	""
    42  	""
    43  	""
    44  	""
    45  	""
    46  	""
    47  	""
    48  	""
    49  )
    51  const eps = 1e-9
    53  var _ = SerialSuites(&testStatsSuite{})
    55  type testStatsSuite struct {
    56  	causetstore ekv.CausetStorage
    57  	do          *petri.Petri
    58  	hook        *logHook
    59  	testData    solitonutil.TestData
    60  }
    62  func (s *testStatsSuite) SetUpSuite(c *C) {
    63  	testleak.BeforeTest()
    64  	// Add the hook here to avoid data race.
    65  	s.registerHook()
    66  	var err error
    67  	s.causetstore,, err = newStoreWithBootstrap()
    68  	c.Assert(err, IsNil)
    69  	s.testData, err = solitonutil.LoadTestSuiteData("testdata", "stats_suite")
    70  	c.Assert(err, IsNil)
    71  }
    73  func (s *testStatsSuite) TearDownSuite(c *C) {
    75  	c.Assert(s.causetstore.Close(), IsNil)
    76  	testleak.AfterTest(c)()
    77  	c.Assert(s.testData.GenerateOutputIfNeeded(), IsNil)
    78  }
    80  func (s *testStatsSuite) registerHook() {
    81  	conf := &log.Config{Level: os.Getenv("log_level"), File: log.FileLogConfig{}}
    82  	_, r, _ := log.InitLogger(conf)
    83  	s.hook = &logHook{r.Core, ""}
    84  	lg := zap.New(s.hook)
    85  	log.ReplaceGlobals(lg, r)
    86  }
    88  type logHook struct {
    89  	zapembedded.Core
    90  	results string
    91  }
    93  func (h *logHook) Write(entry zapembedded.Entry, fields []zapembedded.Field) error {
    94  	message := entry.Message
    95  	if idx := strings.Index(message, "[stats"); idx != -1 {
    96  		h.results = h.results + message
    97  		for _, f := range fields {
    98  			h.results = h.results + ", " + f.Key + "=" + h.field2String(f)
    99  		}
   100  	}
   101  	return nil
   102  }
   104  func (h *logHook) field2String(field zapembedded.Field) string {
   105  	switch field.Type {
   106  	case zapembedded.StringType:
   107  		return field.String
   108  	case zapembedded.Int64Type, zapembedded.Int32Type, zapembedded.Uint32Type:
   109  		return fmt.Sprintf("%v", field.Integer)
   110  	case zapembedded.Float64Type:
   111  		return fmt.Sprintf("%v", math.Float64frombits(uint64(field.Integer)))
   112  	case zapembedded.StringerType:
   113  		return field.Interface.(fmt.Stringer).String()
   114  	}
   115  	return "not support"
   116  }
   118  func (h *logHook) Check(e zapembedded.Entry, ce *zapembedded.CheckedEntry) *zapembedded.CheckedEntry {
   119  	if h.Enabled(e.Level) {
   120  		return ce.AddCore(e, h)
   121  	}
   122  	return ce
   123  }
   125  func newStoreWithBootstrap() (ekv.CausetStorage, *petri.Petri, error) {
   126  	causetstore, err := mockstore.NewMockStore()
   127  	if err != nil {
   128  		return nil, nil, errors.Trace(err)
   129  	}
   130  	stochastik.SetSchemaLease(0)
   131  	stochastik.DisableStats4Test()
   132  	petri.RunAutoAnalyze = false
   133  	do, err := stochastik.BootstrapStochastik(causetstore)
   134  	do.SetStatsUFIDelating(true)
   135  	return causetstore, do, errors.Trace(err)
   136  }
   138  func cleanEnv(c *C, causetstore ekv.CausetStorage, do *petri.Petri) {
   139  	tk := testkit.NewTestKit(c, causetstore)
   140  	tk.MustInterDirc("use test")
   141  	r := tk.MustQuery("show blocks")
   142  	for _, tb := range r.Rows() {
   143  		blockName := tb[0]
   144  		tk.MustInterDirc(fmt.Sprintf("drop causet %v", blockName))
   145  	}
   146  	tk.MustInterDirc("delete from allegrosql.stats_spacetime")
   147  	tk.MustInterDirc("delete from allegrosql.stats_histograms")
   148  	tk.MustInterDirc("delete from allegrosql.stats_buckets")
   149  	do.StatsHandle().Clear()
   150  }
   152  // generateIntCauset will generate a causet slice, every dimension is begin from 0, end with num - 1.
   153  // If dimension is x, num is y, the total number of causet is y^x. And This slice is sorted.
   154  func (s *testStatsSuite) generateIntCauset(dimension, num int) ([]types.Causet, error) {
   155  	length := int(math.Pow(float64(num), float64(dimension)))
   156  	ret := make([]types.Causet, length)
   157  	if dimension == 1 {
   158  		for i := 0; i < num; i++ {
   159  			ret[i] = types.NewIntCauset(int64(i))
   160  		}
   161  	} else {
   162  		sc := &stmtctx.StatementContext{TimeZone: time.Local}
   163  		// In this way, we can guarantee the causet is in order.
   164  		for i := 0; i < length; i++ {
   165  			data := make([]types.Causet, dimension)
   166  			j := i
   167  			for k := 0; k < dimension; k++ {
   168  				data[dimension-k-1].SetInt64(int64(j % num))
   169  				j = j / num
   170  			}
   171  			bytes, err := codec.EncodeKey(sc, nil, data...)
   172  			if err != nil {
   173  				return nil, err
   174  			}
   175  			ret[i].SetBytes(bytes)
   176  		}
   177  	}
   178  	return ret, nil
   179  }
   181  // mockStatsHistogram will create a statistics.Histogram, of which the data is uniform distribution.
   182  func mockStatsHistogram(id int64, values []types.Causet, repeat int64, tp *types.FieldType) *statistics.Histogram {
   183  	ndv := len(values)
   184  	histogram := statistics.NewHistogram(id, int64(ndv), 0, 0, tp, ndv, 0)
   185  	for i := 0; i < ndv; i++ {
   186  		histogram.AppendBucket(&values[i], &values[i], repeat*int64(i+1), repeat)
   187  	}
   188  	return histogram
   189  }
   191  func mockStatsTable(tbl *perceptron.TableInfo, rowCount int64) *statistics.Block {
   192  	histDefCausl := statistics.HistDefCausl{
   193  		PhysicalID:      tbl.ID,
   194  		HavePhysicalID:  true,
   195  		Count:           rowCount,
   196  		DeferredCausets: make(map[int64]*statistics.DeferredCauset, len(tbl.DeferredCausets)),
   197  		Indices:         make(map[int64]*statistics.Index, len(tbl.Indices)),
   198  	}
   199  	statsTbl := &statistics.Block{
   200  		HistDefCausl: histDefCausl,
   201  	}
   202  	return statsTbl
   203  }
   205  func (s *testStatsSuite) prepareSelectivity(testKit *testkit.TestKit, c *C) *statistics.Block {
   206  	testKit.MustInterDirc("use test")
   207  	testKit.MustInterDirc("drop causet if exists t")
   208  	testKit.MustInterDirc("create causet t(a int primary key, b int, c int, d int, e int, index idx_cd(c, d), index idx_de(d, e))")
   210  	is :=
   211  	tb, err := is.TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   212  	c.Assert(err, IsNil)
   213  	tbl := tb.Meta()
   215  	// mock the statistic causet
   216  	statsTbl := mockStatsTable(tbl, 540)
   218  	// Set the value of columns' histogram.
   219  	colValues, err := s.generateIntCauset(1, 54)
   220  	c.Assert(err, IsNil)
   221  	for i := 1; i <= 5; i++ {
   222  		statsTbl.DeferredCausets[int64(i)] = &statistics.DeferredCauset{Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(allegrosql.TypeLonglong)), Info: tbl.DeferredCausets[i-1]}
   223  	}
   225  	// Set the value of two indices' histograms.
   226  	idxValues, err := s.generateIntCauset(2, 3)
   227  	c.Assert(err, IsNil)
   228  	tp := types.NewFieldType(allegrosql.TypeBlob)
   229  	statsTbl.Indices[1] = &statistics.Index{Histogram: *mockStatsHistogram(1, idxValues, 60, tp), Info: tbl.Indices[0]}
   230  	statsTbl.Indices[2] = &statistics.Index{Histogram: *mockStatsHistogram(2, idxValues, 60, tp), Info: tbl.Indices[1]}
   231  	return statsTbl
   232  }
   234  func (s *testStatsSuite) TestSelectivity(c *C) {
   235  	defer cleanEnv(c, s.causetstore,
   236  	testKit := testkit.NewTestKit(c, s.causetstore)
   237  	statsTbl := s.prepareSelectivity(testKit, c)
   238  	is :=
   240  	longExpr := "0 < a and a = 1 "
   241  	for i := 1; i < 64; i++ {
   242  		longExpr += fmt.Sprintf(" and a > %d ", i)
   243  	}
   244  	tests := []struct {
   245  		exprs       string
   246  		selectivity float64
   247  	}{
   248  		{
   249  			exprs:       "a > 0 and a < 2",
   250  			selectivity: 0.01851851851,
   251  		},
   252  		{
   253  			exprs:       "a >= 1 and a < 2",
   254  			selectivity: 0.01851851851,
   255  		},
   256  		{
   257  			exprs:       "a >= 1 and b > 1 and a < 2",
   258  			selectivity: 0.01783264746,
   259  		},
   260  		{
   261  			exprs:       "a >= 1 and c > 1 and a < 2",
   262  			selectivity: 0.00617283950,
   263  		},
   264  		{
   265  			exprs:       "a >= 1 and c >= 1 and a < 2",
   266  			selectivity: 0.01234567901,
   267  		},
   268  		{
   269  			exprs:       "d = 0 and e = 1",
   270  			selectivity: 0.11111111111,
   271  		},
   272  		{
   273  			exprs:       "b > 1",
   274  			selectivity: 0.96296296296,
   275  		},
   276  		{
   277  			exprs:       "a > 1 and b < 2 and c > 3 and d < 4 and e > 5",
   278  			selectivity: 0,
   279  		},
   280  		{
   281  			exprs:       longExpr,
   282  			selectivity: 0.001,
   283  		},
   284  	}
   286  	ctx := context.Background()
   287  	for _, tt := range tests {
   288  		allegrosql := "select * from t where " + tt.exprs
   289  		comment := Commentf("for %s", tt.exprs)
   290  		sctx := testKit.Se.(stochastikctx.Context)
   291  		stmts, err := stochastik.Parse(sctx, allegrosql)
   292  		c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt.exprs))
   293  		c.Assert(stmts, HasLen, 1)
   295  		err = causetembedded.Preprocess(sctx, stmts[0], is)
   296  		c.Assert(err, IsNil, comment)
   297  		p, _, err := causetembedded.BuildLogicalCauset(ctx, sctx, stmts[0], is)
   298  		c.Assert(err, IsNil, Commentf("error %v, for building plan, expr %s", err, tt.exprs))
   300  		sel := p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection)
   301  		ds := sel.Children()[0].(*causetembedded.DataSource)
   303  		histDefCausl := statsTbl.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.Schema().DeferredCausets)
   305  		ratio, _, err := histDefCausl.Selectivity(sctx, sel.Conditions, nil)
   306  		c.Assert(err, IsNil, comment)
   307  		c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))
   309  		histDefCausl.Count *= 10
   310  		ratio, _, err = histDefCausl.Selectivity(sctx, sel.Conditions, nil)
   311  		c.Assert(err, IsNil, comment)
   312  		c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))
   313  	}
   314  }
   316  // TestDiscreteDistribution tests the estimation for discrete data distribution. This is more common when the index
   317  // consists several columns, and the first column has small NDV.
   318  func (s *testStatsSuite) TestDiscreteDistribution(c *C) {
   319  	defer cleanEnv(c, s.causetstore,
   320  	testKit := testkit.NewTestKit(c, s.causetstore)
   321  	testKit.MustInterDirc("use test")
   322  	testKit.MustInterDirc("drop causet if exists t")
   323  	testKit.MustInterDirc("create causet t(a char(10), b int, key idx(a, b))")
   324  	for i := 0; i < 499; i++ {
   325  		testKit.MustInterDirc(fmt.Sprintf("insert into t values ('cn', %d)", i))
   326  	}
   327  	for i := 0; i < 10; i++ {
   328  		testKit.MustInterDirc("insert into t values ('tw', 0)")
   329  	}
   330  	testKit.MustInterDirc("analyze causet t")
   331  	var (
   332  		input  []string
   333  		output [][]string
   334  	)
   335  	s.testData.GetTestCases(c, &input, &output)
   336  	for i, tt := range input {
   337  		s.testData.OnRecord(func() {
   338  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
   339  		})
   340  		testKit.MustQuery(tt).Check(testkit.Rows(output[i]...))
   341  	}
   342  }
   344  func (s *testStatsSuite) TestSelectCombinedLowBound(c *C) {
   345  	defer cleanEnv(c, s.causetstore,
   346  	testKit := testkit.NewTestKit(c, s.causetstore)
   347  	testKit.MustInterDirc("use test")
   348  	testKit.MustInterDirc("drop causet if exists t")
   349  	testKit.MustInterDirc("create causet t(id int auto_increment, kid int, pid int, primary key(id), key(kid, pid))")
   350  	testKit.MustInterDirc("insert into t (kid, pid) values (1,2), (1,3), (1,4),(1, 11), (1, 12), (1, 13), (1, 14), (2, 2), (2, 3), (2, 4)")
   351  	testKit.MustInterDirc("analyze causet t")
   352  	var (
   353  		input  []string
   354  		output [][]string
   355  	)
   356  	s.testData.GetTestCases(c, &input, &output)
   357  	for i, tt := range input {
   358  		s.testData.OnRecord(func() {
   359  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
   360  		})
   361  		testKit.MustQuery(tt).Check(testkit.Rows(output[i]...))
   362  	}
   363  }
   365  func getRange(start, end int64) []*ranger.Range {
   366  	ran := &ranger.Range{
   367  		LowVal:  []types.Causet{types.NewIntCauset(start)},
   368  		HighVal: []types.Causet{types.NewIntCauset(end)},
   369  	}
   370  	return []*ranger.Range{ran}
   371  }
   373  func (s *testStatsSuite) TestOutOfRangeEQEstimation(c *C) {
   374  	defer cleanEnv(c, s.causetstore,
   375  	testKit := testkit.NewTestKit(c, s.causetstore)
   376  	testKit.MustInterDirc("use test")
   377  	testKit.MustInterDirc("drop causet if exists t")
   378  	testKit.MustInterDirc("create causet t(a int)")
   379  	for i := 0; i < 1000; i++ {
   380  		testKit.MustInterDirc(fmt.Sprintf("insert into t values (%v)", i/4)) // 0 ~ 249
   381  	}
   382  	testKit.MustInterDirc("analyze causet t")
   384  	h :=
   385  	causet, err :="test"), perceptron.NewCIStr("t"))
   386  	c.Assert(err, IsNil)
   387  	statsTbl := h.GetTableStats(causet.Meta())
   388  	sc := &stmtctx.StatementContext{}
   389  	col := statsTbl.DeferredCausets[causet.Meta().DeferredCausets[0].ID]
   390  	count, err := col.GetDeferredCausetRowCount(sc, getRange(250, 250), 0, false)
   391  	c.Assert(err, IsNil)
   392  	c.Assert(count, Equals, float64(0))
   394  	for i := 0; i < 8; i++ {
   395  		count, err := col.GetDeferredCausetRowCount(sc, getRange(250, 250), int64(i+1), false)
   396  		c.Assert(err, IsNil)
   397  		c.Assert(count, Equals, math.Min(float64(i+1), 4)) // estRows must be less than modifyCnt
   398  	}
   399  }
   401  func (s *testStatsSuite) TestEstimationForUnknownValues(c *C) {
   402  	defer cleanEnv(c, s.causetstore,
   403  	testKit := testkit.NewTestKit(c, s.causetstore)
   404  	testKit.MustInterDirc("use test")
   405  	testKit.MustInterDirc("drop causet if exists t")
   406  	testKit.MustInterDirc("create causet t(a int, b int, key idx(a, b))")
   407  	testKit.MustInterDirc("analyze causet t")
   408  	for i := 0; i < 10; i++ {
   409  		testKit.MustInterDirc(fmt.Sprintf("insert into t values (%d, %d)", i, i))
   410  	}
   411  	h :=
   412  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   413  	testKit.MustInterDirc("analyze causet t")
   414  	for i := 0; i < 10; i++ {
   415  		testKit.MustInterDirc(fmt.Sprintf("insert into t values (%d, %d)", i+10, i+10))
   416  	}
   417  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   418  	c.Assert(h.UFIDelate(, IsNil)
   419  	causet, err :="test"), perceptron.NewCIStr("t"))
   420  	c.Assert(err, IsNil)
   421  	statsTbl := h.GetTableStats(causet.Meta())
   423  	sc := &stmtctx.StatementContext{}
   424  	colID := causet.Meta().DeferredCausets[0].ID
   425  	count, err := statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(30, 30))
   426  	c.Assert(err, IsNil)
   427  	c.Assert(count, Equals, 0.2)
   429  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(9, 30))
   430  	c.Assert(err, IsNil)
   431  	c.Assert(count, Equals, 2.4000000000000004)
   433  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(9, math.MaxInt64))
   434  	c.Assert(err, IsNil)
   435  	c.Assert(count, Equals, 2.4000000000000004)
   437  	idxID := causet.Meta().Indices[0].ID
   438  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(30, 30))
   439  	c.Assert(err, IsNil)
   440  	c.Assert(count, Equals, 0.2)
   442  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(9, 30))
   443  	c.Assert(err, IsNil)
   444  	c.Assert(count, Equals, 2.2)
   446  	testKit.MustInterDirc("truncate causet t")
   447  	testKit.MustInterDirc("insert into t values (null, null)")
   448  	testKit.MustInterDirc("analyze causet t")
   449  	causet, err ="test"), perceptron.NewCIStr("t"))
   450  	c.Assert(err, IsNil)
   451  	statsTbl = h.GetTableStats(causet.Meta())
   453  	colID = causet.Meta().DeferredCausets[0].ID
   454  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(1, 30))
   455  	c.Assert(err, IsNil)
   456  	c.Assert(count, Equals, 0.0)
   458  	testKit.MustInterDirc("drop causet t")
   459  	testKit.MustInterDirc("create causet t(a int, b int, index idx(b))")
   460  	testKit.MustInterDirc("insert into t values (1,1)")
   461  	testKit.MustInterDirc("analyze causet t")
   462  	causet, err ="test"), perceptron.NewCIStr("t"))
   463  	c.Assert(err, IsNil)
   464  	statsTbl = h.GetTableStats(causet.Meta())
   466  	colID = causet.Meta().DeferredCausets[0].ID
   467  	count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(2, 2))
   468  	c.Assert(err, IsNil)
   469  	c.Assert(count, Equals, 0.0)
   471  	idxID = causet.Meta().Indices[0].ID
   472  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2))
   473  	c.Assert(err, IsNil)
   474  	c.Assert(count, Equals, 0.0)
   475  }
   477  func (s *testStatsSuite) TestEstimationUniqueKeyEqualConds(c *C) {
   478  	defer cleanEnv(c, s.causetstore,
   479  	testKit := testkit.NewTestKit(c, s.causetstore)
   480  	testKit.MustInterDirc("use test")
   481  	testKit.MustInterDirc("drop causet if exists t")
   482  	testKit.MustInterDirc("create causet t(a int, b int, c int, unique key(b))")
   483  	testKit.MustInterDirc("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7)")
   484  	testKit.MustInterDirc("analyze causet t with 4 cmsketch width, 1 cmsketch depth;")
   485  	causet, err :="test"), perceptron.NewCIStr("t"))
   486  	c.Assert(err, IsNil)
   487  	statsTbl :=
   489  	sc := &stmtctx.StatementContext{}
   490  	idxID := causet.Meta().Indices[0].ID
   491  	count, err := statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(7, 7))
   492  	c.Assert(err, IsNil)
   493  	c.Assert(count, Equals, 1.0)
   495  	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(6, 6))
   496  	c.Assert(err, IsNil)
   497  	c.Assert(count, Equals, 1.0)
   499  	colID := causet.Meta().DeferredCausets[0].ID
   500  	count, err = statsTbl.GetRowCountByIntDeferredCausetRanges(sc, colID, getRange(7, 7))
   501  	c.Assert(err, IsNil)
   502  	c.Assert(count, Equals, 1.0)
   504  	count, err = statsTbl.GetRowCountByIntDeferredCausetRanges(sc, colID, getRange(6, 6))
   505  	c.Assert(err, IsNil)
   506  	c.Assert(count, Equals, 1.0)
   507  }
   509  func (s *testStatsSuite) TestPrimaryKeySelectivity(c *C) {
   510  	defer cleanEnv(c, s.causetstore,
   511  	testKit := testkit.NewTestKit(c, s.causetstore)
   512  	testKit.MustInterDirc("use test")
   513  	testKit.MustInterDirc("drop causet if exists t")
   514  	testKit.MustInterDirc("set @@milevadb_enable_clustered_index=0")
   515  	testKit.MustInterDirc("create causet t(a char(10) primary key, b int)")
   516  	var input, output [][]string
   517  	s.testData.GetTestCases(c, &input, &output)
   518  	for i, ts := range input {
   519  		for j, tt := range ts {
   520  			if j != len(ts)-1 {
   521  				testKit.MustInterDirc(tt)
   522  			}
   523  			s.testData.OnRecord(func() {
   524  				if j == len(ts)-1 {
   525  					output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
   526  				}
   527  			})
   528  			if j == len(ts)-1 {
   529  				testKit.MustQuery(tt).Check(testkit.Rows(output[i]...))
   530  			}
   531  		}
   532  	}
   533  }
   535  func BenchmarkSelectivity(b *testing.B) {
   536  	c := &C{}
   537  	s := &testStatsSuite{}
   538  	s.SetUpSuite(c)
   539  	defer s.TearDownSuite(c)
   541  	testKit := testkit.NewTestKit(c, s.causetstore)
   542  	statsTbl := s.prepareSelectivity(testKit, c)
   543  	is :=
   544  	exprs := "a > 1 and b < 2 and c > 3 and d < 4 and e > 5"
   545  	allegrosql := "select * from t where " + exprs
   546  	comment := Commentf("for %s", exprs)
   547  	sctx := testKit.Se.(stochastikctx.Context)
   548  	stmts, err := stochastik.Parse(sctx, allegrosql)
   549  	c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, exprs))
   550  	c.Assert(stmts, HasLen, 1)
   551  	err = causetembedded.Preprocess(sctx, stmts[0], is)
   552  	c.Assert(err, IsNil, comment)
   553  	p, _, err := causetembedded.BuildLogicalCauset(context.Background(), sctx, stmts[0], is)
   554  	c.Assert(err, IsNil, Commentf("error %v, for building plan, expr %s", err, exprs))
   556  	file, err := os.Create("cpu.profile")
   557  	c.Assert(err, IsNil)
   558  	defer file.Close()
   559  	pprof.StartCPUProfile(file)
   561  	b.Run("Selectivity", func(b *testing.B) {
   562  		b.ResetTimer()
   563  		for i := 0; i < b.N; i++ {
   564  			_, _, err := statsTbl.Selectivity(sctx, p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection).Conditions, nil)
   565  			c.Assert(err, IsNil)
   566  		}
   567  		b.ReportAllocs()
   568  	})
   569  	pprof.StopCPUProfile()
   570  }
   572  func (s *testStatsSuite) TestDeferredCausetIndexNullEstimation(c *C) {
   573  	defer cleanEnv(c, s.causetstore,
   574  	testKit := testkit.NewTestKit(c, s.causetstore)
   575  	testKit.MustInterDirc("use test")
   576  	testKit.MustInterDirc("drop causet if exists t")
   577  	testKit.MustInterDirc("create causet t(a int, b int, c int, index idx_b(b), index idx_c_a(c, a))")
   578  	testKit.MustInterDirc("insert into t values(1,null,1),(2,null,2),(3,3,3),(4,null,4),(null,null,null);")
   579  	h :=
   580  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   581  	testKit.MustInterDirc("analyze causet t")
   582  	var (
   583  		input  []string
   584  		output [][]string
   585  	)
   586  	s.testData.GetTestCases(c, &input, &output)
   587  	for i := 0; i < 5; i++ {
   588  		s.testData.OnRecord(func() {
   589  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
   590  		})
   591  		testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   592  	}
   593  	// Make sure column stats has been loaded.
   594  	testKit.MustInterDirc(`explain select * from t where a is null`)
   595  	c.Assert(h.LoadNeededHistograms(), IsNil)
   596  	for i := 5; i < len(input); i++ {
   597  		s.testData.OnRecord(func() {
   598  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
   599  		})
   600  		testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   601  	}
   602  }
   604  func (s *testStatsSuite) TestUniqCompEqualEst(c *C) {
   605  	defer cleanEnv(c, s.causetstore,
   606  	testKit := testkit.NewTestKit(c, s.causetstore)
   607  	testKit.MustInterDirc("use test")
   608  	testKit.MustInterDirc("drop causet if exists t")
   609  	testKit.MustInterDirc("create causet t(a int, b int, primary key(a, b))")
   610  	testKit.MustInterDirc("insert into t values(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9),(1,10)")
   611  	h :=
   612  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   613  	testKit.MustInterDirc("analyze causet t")
   614  	var (
   615  		input  []string
   616  		output [][]string
   617  	)
   618  	s.testData.GetTestCases(c, &input, &output)
   619  	for i := 0; i < 1; i++ {
   620  		s.testData.OnRecord(func() {
   621  			output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
   622  		})
   623  		testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   624  	}
   625  }
   627  func (s *testStatsSuite) TestSelectivityGreedyAlgo(c *C) {
   628  	nodes := make([]*statistics.StatsNode, 3)
   629  	nodes[0] = statistics.MockStatsNode(1, 3, 2)
   630  	nodes[1] = statistics.MockStatsNode(2, 5, 2)
   631  	nodes[2] = statistics.MockStatsNode(3, 9, 2)
   633  	// Sets should not overlap on mask, so only nodes[0] is chosen.
   634  	usedSets := statistics.GetUsableSetsByGreedy(nodes)
   635  	c.Assert(len(usedSets), Equals, 1)
   636  	c.Assert(usedSets[0].ID, Equals, int64(1))
   638  	nodes[0], nodes[1] = nodes[1], nodes[0]
   639  	// Sets chosen should be sblock, so the returned node is still the one with ID 1.
   640  	usedSets = statistics.GetUsableSetsByGreedy(nodes)
   641  	c.Assert(len(usedSets), Equals, 1)
   642  	c.Assert(usedSets[0].ID, Equals, int64(1))
   643  }
   645  func (s *testStatsSuite) TestDefCauslationDeferredCausetEstimate(c *C) {
   646  	defer cleanEnv(c, s.causetstore,
   647  	tk := testkit.NewTestKit(c, s.causetstore)
   648  	collate.SetNewDefCauslationEnabledForTest(true)
   649  	defer collate.SetNewDefCauslationEnabledForTest(false)
   650  	tk.MustInterDirc("use test")
   651  	tk.MustInterDirc("drop causet if exists t")
   652  	tk.MustInterDirc("create causet t(a varchar(20) collate utf8mb4_general_ci)")
   653  	tk.MustInterDirc("insert into t values('aaa'), ('bbb'), ('AAA'), ('BBB')")
   654  	h :=
   655  	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
   656  	tk.MustInterDirc("analyze causet t")
   657  	tk.MustInterDirc("explain select * from t where a = 'aaa'")
   658  	c.Assert(h.LoadNeededHistograms(), IsNil)
   659  	var (
   660  		input  []string
   661  		output [][]string
   662  	)
   663  	s.testData.GetTestCases(c, &input, &output)
   664  	for i := 0; i < len(input); i++ {
   665  		s.testData.OnRecord(func() {
   666  			output[i] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[i]).Rows())
   667  		})
   668  		tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
   669  	}
   670  }
   672  // TestDNFCondSelectivity tests selectivity calculation with DNF conditions covered by using independence assumption.
   673  func (s *testStatsSuite) TestDNFCondSelectivity(c *C) {
   674  	defer cleanEnv(c, s.causetstore,
   675  	testKit := testkit.NewTestKit(c, s.causetstore)
   677  	testKit.MustInterDirc("use test")
   678  	testKit.MustInterDirc("drop causet if exists t")
   679  	testKit.MustInterDirc("create causet t(a int, b int, c int, d int)")
   680  	testKit.MustInterDirc("insert into t value(1,5,4,4),(3,4,1,8),(4,2,6,10),(6,7,2,5),(7,1,4,9),(8,9,8,3),(9,1,9,1),(10,6,6,2)")
   681  	testKit.MustInterDirc("alter causet t add index (b)")
   682  	testKit.MustInterDirc("alter causet t add index (d)")
   683  	testKit.MustInterDirc(`analyze causet t`)
   685  	ctx := context.Background()
   686  	is :=
   687  	h :=
   688  	tb, err := is.TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t"))
   689  	c.Assert(err, IsNil)
   690  	tblInfo := tb.Meta()
   691  	statsTbl := h.GetTableStats(tblInfo)
   693  	var (
   694  		input  []string
   695  		output []struct {
   696  			ALLEGROALLEGROSQL string
   697  			Selectivity       float64
   698  		}
   699  	)
   700  	s.testData.GetTestCases(c, &input, &output)
   701  	for i, tt := range input {
   702  		sctx := testKit.Se.(stochastikctx.Context)
   703  		stmts, err := stochastik.Parse(sctx, tt)
   704  		c.Assert(err, IsNil, Commentf("error %v, for allegrosql %s", err, tt))
   705  		c.Assert(stmts, HasLen, 1)
   707  		err = causetembedded.Preprocess(sctx, stmts[0], is)
   708  		c.Assert(err, IsNil, Commentf("error %v, for allegrosql %s", err, tt))
   709  		p, _, err := causetembedded.BuildLogicalCauset(ctx, sctx, stmts[0], is)
   710  		c.Assert(err, IsNil, Commentf("error %v, for building plan, allegrosql %s", err, tt))
   712  		sel := p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection)
   713  		ds := sel.Children()[0].(*causetembedded.DataSource)
   715  		histDefCausl := statsTbl.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.Schema().DeferredCausets)
   717  		ratio, _, err := histDefCausl.Selectivity(sctx, sel.Conditions, nil)
   718  		c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt))
   719  		s.testData.OnRecord(func() {
   720  			output[i].ALLEGROALLEGROSQL = tt
   721  			output[i].Selectivity = ratio
   722  		})
   723  		c.Assert(math.Abs(ratio-output[i].Selectivity) < eps, IsTrue,
   724  			Commentf("for %s, needed: %v, got: %v", tt, output[i].Selectivity, ratio))
   725  	}
   727  	// Test issue 19981
   728  	testKit.MustInterDirc("select * from t where _milevadb_rowid is null or _milevadb_rowid > 7")
   729  }