github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/selectivity_test.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package statistics_test 15 16 import ( 17 "context" 18 "fmt" 19 "math" 20 "os" 21 "runtime/pprof" 22 "strings" 23 "testing" 24 "time" 25 26 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 27 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 28 . "github.com/whtcorpsinc/check" 29 "github.com/whtcorpsinc/errors" 30 "github.com/whtcorpsinc/log" 31 causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded" 32 "github.com/whtcorpsinc/milevadb/causetstore/mockstore" 33 "github.com/whtcorpsinc/milevadb/ekv" 34 "github.com/whtcorpsinc/milevadb/petri" 35 "github.com/whtcorpsinc/milevadb/soliton/codec" 36 "github.com/whtcorpsinc/milevadb/soliton/collate" 37 "github.com/whtcorpsinc/milevadb/soliton/ranger" 38 "github.com/whtcorpsinc/milevadb/soliton/solitonutil" 39 "github.com/whtcorpsinc/milevadb/soliton/testkit" 40 "github.com/whtcorpsinc/milevadb/soliton/testleak" 41 "github.com/whtcorpsinc/milevadb/statistics" 42 "github.com/whtcorpsinc/milevadb/statistics/handle" 43 "github.com/whtcorpsinc/milevadb/stochastik" 44 "github.com/whtcorpsinc/milevadb/stochastikctx" 45 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 46 "github.com/whtcorpsinc/milevadb/types" 47 "go.uber.org/zap" 48 "go.uber.org/zap/zapembedded" 49 ) 50 51 const eps = 1e-9 52 53 var _ = SerialSuites(&testStatsSuite{}) 54 55 type testStatsSuite struct { 56 causetstore ekv.CausetStorage 57 do *petri.Petri 58 hook *logHook 59 testData solitonutil.TestData 60 } 61 62 func (s *testStatsSuite) SetUpSuite(c *C) { 63 testleak.BeforeTest() 64 // Add the hook here to avoid data race. 65 s.registerHook() 66 var err error 67 s.causetstore, s.do, err = newStoreWithBootstrap() 68 c.Assert(err, IsNil) 69 s.testData, err = solitonutil.LoadTestSuiteData("testdata", "stats_suite") 70 c.Assert(err, IsNil) 71 } 72 73 func (s *testStatsSuite) TearDownSuite(c *C) { 74 s.do.Close() 75 c.Assert(s.causetstore.Close(), IsNil) 76 testleak.AfterTest(c)() 77 c.Assert(s.testData.GenerateOutputIfNeeded(), IsNil) 78 } 79 80 func (s *testStatsSuite) registerHook() { 81 conf := &log.Config{Level: os.Getenv("log_level"), File: log.FileLogConfig{}} 82 _, r, _ := log.InitLogger(conf) 83 s.hook = &logHook{r.Core, ""} 84 lg := zap.New(s.hook) 85 log.ReplaceGlobals(lg, r) 86 } 87 88 type logHook struct { 89 zapembedded.Core 90 results string 91 } 92 93 func (h *logHook) Write(entry zapembedded.Entry, fields []zapembedded.Field) error { 94 message := entry.Message 95 if idx := strings.Index(message, "[stats"); idx != -1 { 96 h.results = h.results + message 97 for _, f := range fields { 98 h.results = h.results + ", " + f.Key + "=" + h.field2String(f) 99 } 100 } 101 return nil 102 } 103 104 func (h *logHook) field2String(field zapembedded.Field) string { 105 switch field.Type { 106 case zapembedded.StringType: 107 return field.String 108 case zapembedded.Int64Type, zapembedded.Int32Type, zapembedded.Uint32Type: 109 return fmt.Sprintf("%v", field.Integer) 110 case zapembedded.Float64Type: 111 return fmt.Sprintf("%v", math.Float64frombits(uint64(field.Integer))) 112 case zapembedded.StringerType: 113 return field.Interface.(fmt.Stringer).String() 114 } 115 return "not support" 116 } 117 118 func (h *logHook) Check(e zapembedded.Entry, ce *zapembedded.CheckedEntry) *zapembedded.CheckedEntry { 119 if h.Enabled(e.Level) { 120 return ce.AddCore(e, h) 121 } 122 return ce 123 } 124 125 func newStoreWithBootstrap() (ekv.CausetStorage, *petri.Petri, error) { 126 causetstore, err := mockstore.NewMockStore() 127 if err != nil { 128 return nil, nil, errors.Trace(err) 129 } 130 stochastik.SetSchemaLease(0) 131 stochastik.DisableStats4Test() 132 petri.RunAutoAnalyze = false 133 do, err := stochastik.BootstrapStochastik(causetstore) 134 do.SetStatsUFIDelating(true) 135 return causetstore, do, errors.Trace(err) 136 } 137 138 func cleanEnv(c *C, causetstore ekv.CausetStorage, do *petri.Petri) { 139 tk := testkit.NewTestKit(c, causetstore) 140 tk.MustInterDirc("use test") 141 r := tk.MustQuery("show blocks") 142 for _, tb := range r.Rows() { 143 blockName := tb[0] 144 tk.MustInterDirc(fmt.Sprintf("drop causet %v", blockName)) 145 } 146 tk.MustInterDirc("delete from allegrosql.stats_spacetime") 147 tk.MustInterDirc("delete from allegrosql.stats_histograms") 148 tk.MustInterDirc("delete from allegrosql.stats_buckets") 149 do.StatsHandle().Clear() 150 } 151 152 // generateIntCauset will generate a causet slice, every dimension is begin from 0, end with num - 1. 153 // If dimension is x, num is y, the total number of causet is y^x. And This slice is sorted. 154 func (s *testStatsSuite) generateIntCauset(dimension, num int) ([]types.Causet, error) { 155 length := int(math.Pow(float64(num), float64(dimension))) 156 ret := make([]types.Causet, length) 157 if dimension == 1 { 158 for i := 0; i < num; i++ { 159 ret[i] = types.NewIntCauset(int64(i)) 160 } 161 } else { 162 sc := &stmtctx.StatementContext{TimeZone: time.Local} 163 // In this way, we can guarantee the causet is in order. 164 for i := 0; i < length; i++ { 165 data := make([]types.Causet, dimension) 166 j := i 167 for k := 0; k < dimension; k++ { 168 data[dimension-k-1].SetInt64(int64(j % num)) 169 j = j / num 170 } 171 bytes, err := codec.EncodeKey(sc, nil, data...) 172 if err != nil { 173 return nil, err 174 } 175 ret[i].SetBytes(bytes) 176 } 177 } 178 return ret, nil 179 } 180 181 // mockStatsHistogram will create a statistics.Histogram, of which the data is uniform distribution. 182 func mockStatsHistogram(id int64, values []types.Causet, repeat int64, tp *types.FieldType) *statistics.Histogram { 183 ndv := len(values) 184 histogram := statistics.NewHistogram(id, int64(ndv), 0, 0, tp, ndv, 0) 185 for i := 0; i < ndv; i++ { 186 histogram.AppendBucket(&values[i], &values[i], repeat*int64(i+1), repeat) 187 } 188 return histogram 189 } 190 191 func mockStatsTable(tbl *perceptron.TableInfo, rowCount int64) *statistics.Block { 192 histDefCausl := statistics.HistDefCausl{ 193 PhysicalID: tbl.ID, 194 HavePhysicalID: true, 195 Count: rowCount, 196 DeferredCausets: make(map[int64]*statistics.DeferredCauset, len(tbl.DeferredCausets)), 197 Indices: make(map[int64]*statistics.Index, len(tbl.Indices)), 198 } 199 statsTbl := &statistics.Block{ 200 HistDefCausl: histDefCausl, 201 } 202 return statsTbl 203 } 204 205 func (s *testStatsSuite) prepareSelectivity(testKit *testkit.TestKit, c *C) *statistics.Block { 206 testKit.MustInterDirc("use test") 207 testKit.MustInterDirc("drop causet if exists t") 208 testKit.MustInterDirc("create causet t(a int primary key, b int, c int, d int, e int, index idx_cd(c, d), index idx_de(d, e))") 209 210 is := s.do.SchemaReplicant() 211 tb, err := is.TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t")) 212 c.Assert(err, IsNil) 213 tbl := tb.Meta() 214 215 // mock the statistic causet 216 statsTbl := mockStatsTable(tbl, 540) 217 218 // Set the value of columns' histogram. 219 colValues, err := s.generateIntCauset(1, 54) 220 c.Assert(err, IsNil) 221 for i := 1; i <= 5; i++ { 222 statsTbl.DeferredCausets[int64(i)] = &statistics.DeferredCauset{Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(allegrosql.TypeLonglong)), Info: tbl.DeferredCausets[i-1]} 223 } 224 225 // Set the value of two indices' histograms. 226 idxValues, err := s.generateIntCauset(2, 3) 227 c.Assert(err, IsNil) 228 tp := types.NewFieldType(allegrosql.TypeBlob) 229 statsTbl.Indices[1] = &statistics.Index{Histogram: *mockStatsHistogram(1, idxValues, 60, tp), Info: tbl.Indices[0]} 230 statsTbl.Indices[2] = &statistics.Index{Histogram: *mockStatsHistogram(2, idxValues, 60, tp), Info: tbl.Indices[1]} 231 return statsTbl 232 } 233 234 func (s *testStatsSuite) TestSelectivity(c *C) { 235 defer cleanEnv(c, s.causetstore, s.do) 236 testKit := testkit.NewTestKit(c, s.causetstore) 237 statsTbl := s.prepareSelectivity(testKit, c) 238 is := s.do.SchemaReplicant() 239 240 longExpr := "0 < a and a = 1 " 241 for i := 1; i < 64; i++ { 242 longExpr += fmt.Sprintf(" and a > %d ", i) 243 } 244 tests := []struct { 245 exprs string 246 selectivity float64 247 }{ 248 { 249 exprs: "a > 0 and a < 2", 250 selectivity: 0.01851851851, 251 }, 252 { 253 exprs: "a >= 1 and a < 2", 254 selectivity: 0.01851851851, 255 }, 256 { 257 exprs: "a >= 1 and b > 1 and a < 2", 258 selectivity: 0.01783264746, 259 }, 260 { 261 exprs: "a >= 1 and c > 1 and a < 2", 262 selectivity: 0.00617283950, 263 }, 264 { 265 exprs: "a >= 1 and c >= 1 and a < 2", 266 selectivity: 0.01234567901, 267 }, 268 { 269 exprs: "d = 0 and e = 1", 270 selectivity: 0.11111111111, 271 }, 272 { 273 exprs: "b > 1", 274 selectivity: 0.96296296296, 275 }, 276 { 277 exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5", 278 selectivity: 0, 279 }, 280 { 281 exprs: longExpr, 282 selectivity: 0.001, 283 }, 284 } 285 286 ctx := context.Background() 287 for _, tt := range tests { 288 allegrosql := "select * from t where " + tt.exprs 289 comment := Commentf("for %s", tt.exprs) 290 sctx := testKit.Se.(stochastikctx.Context) 291 stmts, err := stochastik.Parse(sctx, allegrosql) 292 c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt.exprs)) 293 c.Assert(stmts, HasLen, 1) 294 295 err = causetembedded.Preprocess(sctx, stmts[0], is) 296 c.Assert(err, IsNil, comment) 297 p, _, err := causetembedded.BuildLogicalCauset(ctx, sctx, stmts[0], is) 298 c.Assert(err, IsNil, Commentf("error %v, for building plan, expr %s", err, tt.exprs)) 299 300 sel := p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection) 301 ds := sel.Children()[0].(*causetembedded.DataSource) 302 303 histDefCausl := statsTbl.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.Schema().DeferredCausets) 304 305 ratio, _, err := histDefCausl.Selectivity(sctx, sel.Conditions, nil) 306 c.Assert(err, IsNil, comment) 307 c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)) 308 309 histDefCausl.Count *= 10 310 ratio, _, err = histDefCausl.Selectivity(sctx, sel.Conditions, nil) 311 c.Assert(err, IsNil, comment) 312 c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)) 313 } 314 } 315 316 // TestDiscreteDistribution tests the estimation for discrete data distribution. This is more common when the index 317 // consists several columns, and the first column has small NDV. 318 func (s *testStatsSuite) TestDiscreteDistribution(c *C) { 319 defer cleanEnv(c, s.causetstore, s.do) 320 testKit := testkit.NewTestKit(c, s.causetstore) 321 testKit.MustInterDirc("use test") 322 testKit.MustInterDirc("drop causet if exists t") 323 testKit.MustInterDirc("create causet t(a char(10), b int, key idx(a, b))") 324 for i := 0; i < 499; i++ { 325 testKit.MustInterDirc(fmt.Sprintf("insert into t values ('cn', %d)", i)) 326 } 327 for i := 0; i < 10; i++ { 328 testKit.MustInterDirc("insert into t values ('tw', 0)") 329 } 330 testKit.MustInterDirc("analyze causet t") 331 var ( 332 input []string 333 output [][]string 334 ) 335 s.testData.GetTestCases(c, &input, &output) 336 for i, tt := range input { 337 s.testData.OnRecord(func() { 338 output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows()) 339 }) 340 testKit.MustQuery(tt).Check(testkit.Rows(output[i]...)) 341 } 342 } 343 344 func (s *testStatsSuite) TestSelectCombinedLowBound(c *C) { 345 defer cleanEnv(c, s.causetstore, s.do) 346 testKit := testkit.NewTestKit(c, s.causetstore) 347 testKit.MustInterDirc("use test") 348 testKit.MustInterDirc("drop causet if exists t") 349 testKit.MustInterDirc("create causet t(id int auto_increment, kid int, pid int, primary key(id), key(kid, pid))") 350 testKit.MustInterDirc("insert into t (kid, pid) values (1,2), (1,3), (1,4),(1, 11), (1, 12), (1, 13), (1, 14), (2, 2), (2, 3), (2, 4)") 351 testKit.MustInterDirc("analyze causet t") 352 var ( 353 input []string 354 output [][]string 355 ) 356 s.testData.GetTestCases(c, &input, &output) 357 for i, tt := range input { 358 s.testData.OnRecord(func() { 359 output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows()) 360 }) 361 testKit.MustQuery(tt).Check(testkit.Rows(output[i]...)) 362 } 363 } 364 365 func getRange(start, end int64) []*ranger.Range { 366 ran := &ranger.Range{ 367 LowVal: []types.Causet{types.NewIntCauset(start)}, 368 HighVal: []types.Causet{types.NewIntCauset(end)}, 369 } 370 return []*ranger.Range{ran} 371 } 372 373 func (s *testStatsSuite) TestOutOfRangeEQEstimation(c *C) { 374 defer cleanEnv(c, s.causetstore, s.do) 375 testKit := testkit.NewTestKit(c, s.causetstore) 376 testKit.MustInterDirc("use test") 377 testKit.MustInterDirc("drop causet if exists t") 378 testKit.MustInterDirc("create causet t(a int)") 379 for i := 0; i < 1000; i++ { 380 testKit.MustInterDirc(fmt.Sprintf("insert into t values (%v)", i/4)) // 0 ~ 249 381 } 382 testKit.MustInterDirc("analyze causet t") 383 384 h := s.do.StatsHandle() 385 causet, err := s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t")) 386 c.Assert(err, IsNil) 387 statsTbl := h.GetTableStats(causet.Meta()) 388 sc := &stmtctx.StatementContext{} 389 col := statsTbl.DeferredCausets[causet.Meta().DeferredCausets[0].ID] 390 count, err := col.GetDeferredCausetRowCount(sc, getRange(250, 250), 0, false) 391 c.Assert(err, IsNil) 392 c.Assert(count, Equals, float64(0)) 393 394 for i := 0; i < 8; i++ { 395 count, err := col.GetDeferredCausetRowCount(sc, getRange(250, 250), int64(i+1), false) 396 c.Assert(err, IsNil) 397 c.Assert(count, Equals, math.Min(float64(i+1), 4)) // estRows must be less than modifyCnt 398 } 399 } 400 401 func (s *testStatsSuite) TestEstimationForUnknownValues(c *C) { 402 defer cleanEnv(c, s.causetstore, s.do) 403 testKit := testkit.NewTestKit(c, s.causetstore) 404 testKit.MustInterDirc("use test") 405 testKit.MustInterDirc("drop causet if exists t") 406 testKit.MustInterDirc("create causet t(a int, b int, key idx(a, b))") 407 testKit.MustInterDirc("analyze causet t") 408 for i := 0; i < 10; i++ { 409 testKit.MustInterDirc(fmt.Sprintf("insert into t values (%d, %d)", i, i)) 410 } 411 h := s.do.StatsHandle() 412 c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) 413 testKit.MustInterDirc("analyze causet t") 414 for i := 0; i < 10; i++ { 415 testKit.MustInterDirc(fmt.Sprintf("insert into t values (%d, %d)", i+10, i+10)) 416 } 417 c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) 418 c.Assert(h.UFIDelate(s.do.SchemaReplicant()), IsNil) 419 causet, err := s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t")) 420 c.Assert(err, IsNil) 421 statsTbl := h.GetTableStats(causet.Meta()) 422 423 sc := &stmtctx.StatementContext{} 424 colID := causet.Meta().DeferredCausets[0].ID 425 count, err := statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(30, 30)) 426 c.Assert(err, IsNil) 427 c.Assert(count, Equals, 0.2) 428 429 count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(9, 30)) 430 c.Assert(err, IsNil) 431 c.Assert(count, Equals, 2.4000000000000004) 432 433 count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(9, math.MaxInt64)) 434 c.Assert(err, IsNil) 435 c.Assert(count, Equals, 2.4000000000000004) 436 437 idxID := causet.Meta().Indices[0].ID 438 count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(30, 30)) 439 c.Assert(err, IsNil) 440 c.Assert(count, Equals, 0.2) 441 442 count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(9, 30)) 443 c.Assert(err, IsNil) 444 c.Assert(count, Equals, 2.2) 445 446 testKit.MustInterDirc("truncate causet t") 447 testKit.MustInterDirc("insert into t values (null, null)") 448 testKit.MustInterDirc("analyze causet t") 449 causet, err = s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t")) 450 c.Assert(err, IsNil) 451 statsTbl = h.GetTableStats(causet.Meta()) 452 453 colID = causet.Meta().DeferredCausets[0].ID 454 count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(1, 30)) 455 c.Assert(err, IsNil) 456 c.Assert(count, Equals, 0.0) 457 458 testKit.MustInterDirc("drop causet t") 459 testKit.MustInterDirc("create causet t(a int, b int, index idx(b))") 460 testKit.MustInterDirc("insert into t values (1,1)") 461 testKit.MustInterDirc("analyze causet t") 462 causet, err = s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t")) 463 c.Assert(err, IsNil) 464 statsTbl = h.GetTableStats(causet.Meta()) 465 466 colID = causet.Meta().DeferredCausets[0].ID 467 count, err = statsTbl.GetRowCountByDeferredCausetRanges(sc, colID, getRange(2, 2)) 468 c.Assert(err, IsNil) 469 c.Assert(count, Equals, 0.0) 470 471 idxID = causet.Meta().Indices[0].ID 472 count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2)) 473 c.Assert(err, IsNil) 474 c.Assert(count, Equals, 0.0) 475 } 476 477 func (s *testStatsSuite) TestEstimationUniqueKeyEqualConds(c *C) { 478 defer cleanEnv(c, s.causetstore, s.do) 479 testKit := testkit.NewTestKit(c, s.causetstore) 480 testKit.MustInterDirc("use test") 481 testKit.MustInterDirc("drop causet if exists t") 482 testKit.MustInterDirc("create causet t(a int, b int, c int, unique key(b))") 483 testKit.MustInterDirc("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7)") 484 testKit.MustInterDirc("analyze causet t with 4 cmsketch width, 1 cmsketch depth;") 485 causet, err := s.do.SchemaReplicant().TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t")) 486 c.Assert(err, IsNil) 487 statsTbl := s.do.StatsHandle().GetTableStats(causet.Meta()) 488 489 sc := &stmtctx.StatementContext{} 490 idxID := causet.Meta().Indices[0].ID 491 count, err := statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(7, 7)) 492 c.Assert(err, IsNil) 493 c.Assert(count, Equals, 1.0) 494 495 count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(6, 6)) 496 c.Assert(err, IsNil) 497 c.Assert(count, Equals, 1.0) 498 499 colID := causet.Meta().DeferredCausets[0].ID 500 count, err = statsTbl.GetRowCountByIntDeferredCausetRanges(sc, colID, getRange(7, 7)) 501 c.Assert(err, IsNil) 502 c.Assert(count, Equals, 1.0) 503 504 count, err = statsTbl.GetRowCountByIntDeferredCausetRanges(sc, colID, getRange(6, 6)) 505 c.Assert(err, IsNil) 506 c.Assert(count, Equals, 1.0) 507 } 508 509 func (s *testStatsSuite) TestPrimaryKeySelectivity(c *C) { 510 defer cleanEnv(c, s.causetstore, s.do) 511 testKit := testkit.NewTestKit(c, s.causetstore) 512 testKit.MustInterDirc("use test") 513 testKit.MustInterDirc("drop causet if exists t") 514 testKit.MustInterDirc("set @@milevadb_enable_clustered_index=0") 515 testKit.MustInterDirc("create causet t(a char(10) primary key, b int)") 516 var input, output [][]string 517 s.testData.GetTestCases(c, &input, &output) 518 for i, ts := range input { 519 for j, tt := range ts { 520 if j != len(ts)-1 { 521 testKit.MustInterDirc(tt) 522 } 523 s.testData.OnRecord(func() { 524 if j == len(ts)-1 { 525 output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(tt).Rows()) 526 } 527 }) 528 if j == len(ts)-1 { 529 testKit.MustQuery(tt).Check(testkit.Rows(output[i]...)) 530 } 531 } 532 } 533 } 534 535 func BenchmarkSelectivity(b *testing.B) { 536 c := &C{} 537 s := &testStatsSuite{} 538 s.SetUpSuite(c) 539 defer s.TearDownSuite(c) 540 541 testKit := testkit.NewTestKit(c, s.causetstore) 542 statsTbl := s.prepareSelectivity(testKit, c) 543 is := s.do.SchemaReplicant() 544 exprs := "a > 1 and b < 2 and c > 3 and d < 4 and e > 5" 545 allegrosql := "select * from t where " + exprs 546 comment := Commentf("for %s", exprs) 547 sctx := testKit.Se.(stochastikctx.Context) 548 stmts, err := stochastik.Parse(sctx, allegrosql) 549 c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, exprs)) 550 c.Assert(stmts, HasLen, 1) 551 err = causetembedded.Preprocess(sctx, stmts[0], is) 552 c.Assert(err, IsNil, comment) 553 p, _, err := causetembedded.BuildLogicalCauset(context.Background(), sctx, stmts[0], is) 554 c.Assert(err, IsNil, Commentf("error %v, for building plan, expr %s", err, exprs)) 555 556 file, err := os.Create("cpu.profile") 557 c.Assert(err, IsNil) 558 defer file.Close() 559 pprof.StartCPUProfile(file) 560 561 b.Run("Selectivity", func(b *testing.B) { 562 b.ResetTimer() 563 for i := 0; i < b.N; i++ { 564 _, _, err := statsTbl.Selectivity(sctx, p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection).Conditions, nil) 565 c.Assert(err, IsNil) 566 } 567 b.ReportAllocs() 568 }) 569 pprof.StopCPUProfile() 570 } 571 572 func (s *testStatsSuite) TestDeferredCausetIndexNullEstimation(c *C) { 573 defer cleanEnv(c, s.causetstore, s.do) 574 testKit := testkit.NewTestKit(c, s.causetstore) 575 testKit.MustInterDirc("use test") 576 testKit.MustInterDirc("drop causet if exists t") 577 testKit.MustInterDirc("create causet t(a int, b int, c int, index idx_b(b), index idx_c_a(c, a))") 578 testKit.MustInterDirc("insert into t values(1,null,1),(2,null,2),(3,3,3),(4,null,4),(null,null,null);") 579 h := s.do.StatsHandle() 580 c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) 581 testKit.MustInterDirc("analyze causet t") 582 var ( 583 input []string 584 output [][]string 585 ) 586 s.testData.GetTestCases(c, &input, &output) 587 for i := 0; i < 5; i++ { 588 s.testData.OnRecord(func() { 589 output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows()) 590 }) 591 testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...)) 592 } 593 // Make sure column stats has been loaded. 594 testKit.MustInterDirc(`explain select * from t where a is null`) 595 c.Assert(h.LoadNeededHistograms(), IsNil) 596 for i := 5; i < len(input); i++ { 597 s.testData.OnRecord(func() { 598 output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows()) 599 }) 600 testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...)) 601 } 602 } 603 604 func (s *testStatsSuite) TestUniqCompEqualEst(c *C) { 605 defer cleanEnv(c, s.causetstore, s.do) 606 testKit := testkit.NewTestKit(c, s.causetstore) 607 testKit.MustInterDirc("use test") 608 testKit.MustInterDirc("drop causet if exists t") 609 testKit.MustInterDirc("create causet t(a int, b int, primary key(a, b))") 610 testKit.MustInterDirc("insert into t values(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(1,9),(1,10)") 611 h := s.do.StatsHandle() 612 c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) 613 testKit.MustInterDirc("analyze causet t") 614 var ( 615 input []string 616 output [][]string 617 ) 618 s.testData.GetTestCases(c, &input, &output) 619 for i := 0; i < 1; i++ { 620 s.testData.OnRecord(func() { 621 output[i] = s.testData.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows()) 622 }) 623 testKit.MustQuery(input[i]).Check(testkit.Rows(output[i]...)) 624 } 625 } 626 627 func (s *testStatsSuite) TestSelectivityGreedyAlgo(c *C) { 628 nodes := make([]*statistics.StatsNode, 3) 629 nodes[0] = statistics.MockStatsNode(1, 3, 2) 630 nodes[1] = statistics.MockStatsNode(2, 5, 2) 631 nodes[2] = statistics.MockStatsNode(3, 9, 2) 632 633 // Sets should not overlap on mask, so only nodes[0] is chosen. 634 usedSets := statistics.GetUsableSetsByGreedy(nodes) 635 c.Assert(len(usedSets), Equals, 1) 636 c.Assert(usedSets[0].ID, Equals, int64(1)) 637 638 nodes[0], nodes[1] = nodes[1], nodes[0] 639 // Sets chosen should be sblock, so the returned node is still the one with ID 1. 640 usedSets = statistics.GetUsableSetsByGreedy(nodes) 641 c.Assert(len(usedSets), Equals, 1) 642 c.Assert(usedSets[0].ID, Equals, int64(1)) 643 } 644 645 func (s *testStatsSuite) TestDefCauslationDeferredCausetEstimate(c *C) { 646 defer cleanEnv(c, s.causetstore, s.do) 647 tk := testkit.NewTestKit(c, s.causetstore) 648 collate.SetNewDefCauslationEnabledForTest(true) 649 defer collate.SetNewDefCauslationEnabledForTest(false) 650 tk.MustInterDirc("use test") 651 tk.MustInterDirc("drop causet if exists t") 652 tk.MustInterDirc("create causet t(a varchar(20) collate utf8mb4_general_ci)") 653 tk.MustInterDirc("insert into t values('aaa'), ('bbb'), ('AAA'), ('BBB')") 654 h := s.do.StatsHandle() 655 c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) 656 tk.MustInterDirc("analyze causet t") 657 tk.MustInterDirc("explain select * from t where a = 'aaa'") 658 c.Assert(h.LoadNeededHistograms(), IsNil) 659 var ( 660 input []string 661 output [][]string 662 ) 663 s.testData.GetTestCases(c, &input, &output) 664 for i := 0; i < len(input); i++ { 665 s.testData.OnRecord(func() { 666 output[i] = s.testData.ConvertRowsToStrings(tk.MustQuery(input[i]).Rows()) 667 }) 668 tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...)) 669 } 670 } 671 672 // TestDNFCondSelectivity tests selectivity calculation with DNF conditions covered by using independence assumption. 673 func (s *testStatsSuite) TestDNFCondSelectivity(c *C) { 674 defer cleanEnv(c, s.causetstore, s.do) 675 testKit := testkit.NewTestKit(c, s.causetstore) 676 677 testKit.MustInterDirc("use test") 678 testKit.MustInterDirc("drop causet if exists t") 679 testKit.MustInterDirc("create causet t(a int, b int, c int, d int)") 680 testKit.MustInterDirc("insert into t value(1,5,4,4),(3,4,1,8),(4,2,6,10),(6,7,2,5),(7,1,4,9),(8,9,8,3),(9,1,9,1),(10,6,6,2)") 681 testKit.MustInterDirc("alter causet t add index (b)") 682 testKit.MustInterDirc("alter causet t add index (d)") 683 testKit.MustInterDirc(`analyze causet t`) 684 685 ctx := context.Background() 686 is := s.do.SchemaReplicant() 687 h := s.do.StatsHandle() 688 tb, err := is.TableByName(perceptron.NewCIStr("test"), perceptron.NewCIStr("t")) 689 c.Assert(err, IsNil) 690 tblInfo := tb.Meta() 691 statsTbl := h.GetTableStats(tblInfo) 692 693 var ( 694 input []string 695 output []struct { 696 ALLEGROALLEGROSQL string 697 Selectivity float64 698 } 699 ) 700 s.testData.GetTestCases(c, &input, &output) 701 for i, tt := range input { 702 sctx := testKit.Se.(stochastikctx.Context) 703 stmts, err := stochastik.Parse(sctx, tt) 704 c.Assert(err, IsNil, Commentf("error %v, for allegrosql %s", err, tt)) 705 c.Assert(stmts, HasLen, 1) 706 707 err = causetembedded.Preprocess(sctx, stmts[0], is) 708 c.Assert(err, IsNil, Commentf("error %v, for allegrosql %s", err, tt)) 709 p, _, err := causetembedded.BuildLogicalCauset(ctx, sctx, stmts[0], is) 710 c.Assert(err, IsNil, Commentf("error %v, for building plan, allegrosql %s", err, tt)) 711 712 sel := p.(causetembedded.LogicalCauset).Children()[0].(*causetembedded.LogicalSelection) 713 ds := sel.Children()[0].(*causetembedded.DataSource) 714 715 histDefCausl := statsTbl.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.Schema().DeferredCausets) 716 717 ratio, _, err := histDefCausl.Selectivity(sctx, sel.Conditions, nil) 718 c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt)) 719 s.testData.OnRecord(func() { 720 output[i].ALLEGROALLEGROSQL = tt 721 output[i].Selectivity = ratio 722 }) 723 c.Assert(math.Abs(ratio-output[i].Selectivity) < eps, IsTrue, 724 Commentf("for %s, needed: %v, got: %v", tt, output[i].Selectivity, ratio)) 725 } 726 727 // Test issue 19981 728 testKit.MustInterDirc("select * from t where _milevadb_rowid is null or _milevadb_rowid > 7") 729 }