github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/interlock/analyze.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "bytes" 18 "context" 19 "fmt" 20 "math" 21 "math/rand" 22 "runtime" 23 "sort" 24 "strconv" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/cznic/mathutil" 30 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 31 "github.com/whtcorpsinc/BerolinaSQL/ast" 32 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 33 "github.com/whtcorpsinc/BerolinaSQL/terror" 34 "github.com/whtcorpsinc/errors" 35 "github.com/whtcorpsinc/failpoint" 36 "github.com/whtcorpsinc/fidelpb/go-fidelpb" 37 "github.com/whtcorpsinc/milevadb/allegrosql" 38 "github.com/whtcorpsinc/milevadb/blockcodec" 39 "github.com/whtcorpsinc/milevadb/causet" 40 "github.com/whtcorpsinc/milevadb/causet/embedded" 41 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb" 42 "github.com/whtcorpsinc/milevadb/ekv" 43 "github.com/whtcorpsinc/milevadb/metrics" 44 "github.com/whtcorpsinc/milevadb/petri" 45 "github.com/whtcorpsinc/milevadb/schemareplicant" 46 "github.com/whtcorpsinc/milevadb/soliton/chunk" 47 "github.com/whtcorpsinc/milevadb/soliton/codec" 48 "github.com/whtcorpsinc/milevadb/soliton/logutil" 49 "github.com/whtcorpsinc/milevadb/soliton/ranger" 50 "github.com/whtcorpsinc/milevadb/soliton/sqlexec" 51 "github.com/whtcorpsinc/milevadb/statistics" 52 "github.com/whtcorpsinc/milevadb/stochastikctx" 53 "github.com/whtcorpsinc/milevadb/stochastikctx/variable" 54 "github.com/whtcorpsinc/milevadb/types" 55 "go.uber.org/zap" 56 ) 57 58 var _ InterlockingDirectorate = &AnalyzeInterDirc{} 59 60 // AnalyzeInterDirc represents Analyze interlock. 61 type AnalyzeInterDirc struct { 62 baseInterlockingDirectorate 63 tasks []*analyzeTask 64 wg *sync.WaitGroup 65 } 66 67 var ( 68 // RandSeed is the seed for randing package. 69 // It's public for test. 70 RandSeed = int64(1) 71 ) 72 73 const ( 74 maxRegionSampleSize = 1000 75 maxSketchSize = 10000 76 ) 77 78 // Next implements the InterlockingDirectorate Next interface. 79 func (e *AnalyzeInterDirc) Next(ctx context.Context, req *chunk.Chunk) error { 80 concurrency, err := getBuildStatsConcurrency(e.ctx) 81 if err != nil { 82 return err 83 } 84 taskCh := make(chan *analyzeTask, len(e.tasks)) 85 resultCh := make(chan analyzeResult, len(e.tasks)) 86 e.wg.Add(concurrency) 87 for i := 0; i < concurrency; i++ { 88 go e.analyzeWorker(taskCh, resultCh, i == 0) 89 } 90 for _, task := range e.tasks { 91 statistics.AddNewAnalyzeJob(task.job) 92 } 93 for _, task := range e.tasks { 94 taskCh <- task 95 } 96 close(taskCh) 97 statsHandle := petri.GetPetri(e.ctx).StatsHandle() 98 panicCnt := 0 99 for panicCnt < concurrency { 100 result, ok := <-resultCh 101 if !ok { 102 break 103 } 104 if result.Err != nil { 105 err = result.Err 106 if err == errAnalyzeWorkerPanic { 107 panicCnt++ 108 } else { 109 logutil.Logger(ctx).Error("analyze failed", zap.Error(err)) 110 } 111 result.job.Finish(true) 112 continue 113 } 114 for i, hg := range result.Hist { 115 err1 := statsHandle.SaveStatsToStorage(result.BlockID.PersistID, result.Count, result.IsIndex, hg, result.Cms[i], 1) 116 if err1 != nil { 117 err = err1 118 logutil.Logger(ctx).Error("save stats to storage failed", zap.Error(err)) 119 result.job.Finish(true) 120 continue 121 } 122 } 123 if err1 := statsHandle.SaveExtendedStatsToStorage(result.BlockID.PersistID, result.ExtStats, false); err1 != nil { 124 err = err1 125 logutil.Logger(ctx).Error("save extended stats to storage failed", zap.Error(err)) 126 result.job.Finish(true) 127 } else { 128 result.job.Finish(false) 129 } 130 } 131 for _, task := range e.tasks { 132 statistics.MoveToHistory(task.job) 133 } 134 if err != nil { 135 return err 136 } 137 return statsHandle.UFIDelate(schemareplicant.GetSchemaReplicant(e.ctx)) 138 } 139 140 func getBuildStatsConcurrency(ctx stochastikctx.Context) (int, error) { 141 stochastikVars := ctx.GetStochastikVars() 142 concurrency, err := variable.GetStochastikSystemVar(stochastikVars, variable.MilevaDBBuildStatsConcurrency) 143 if err != nil { 144 return 0, err 145 } 146 c, err := strconv.ParseInt(concurrency, 10, 64) 147 return int(c), err 148 } 149 150 type taskType int 151 152 const ( 153 defCausTask taskType = iota 154 idxTask 155 fastTask 156 pkIncrementalTask 157 idxIncrementalTask 158 ) 159 160 type analyzeTask struct { 161 taskType taskType 162 idxInterDirc *AnalyzeIndexInterDirc 163 defCausInterDirc *AnalyzeDeferredCausetsInterDirc 164 fastInterDirc *AnalyzeFastInterDirc 165 idxIncrementalInterDirc *analyzeIndexIncrementalInterDirc 166 defCausIncrementalInterDirc *analyzePKIncrementalInterDirc 167 job *statistics.AnalyzeJob 168 } 169 170 var errAnalyzeWorkerPanic = errors.New("analyze worker panic") 171 172 func (e *AnalyzeInterDirc) analyzeWorker(taskCh <-chan *analyzeTask, resultCh chan<- analyzeResult, isCloseChanThread bool) { 173 var task *analyzeTask 174 defer func() { 175 if r := recover(); r != nil { 176 buf := make([]byte, 4096) 177 stackSize := runtime.Stack(buf, false) 178 buf = buf[:stackSize] 179 logutil.BgLogger().Error("analyze worker panicked", zap.String("stack", string(buf))) 180 metrics.PanicCounter.WithLabelValues(metrics.LabelAnalyze).Inc() 181 resultCh <- analyzeResult{ 182 Err: errAnalyzeWorkerPanic, 183 job: task.job, 184 } 185 } 186 e.wg.Done() 187 if isCloseChanThread { 188 e.wg.Wait() 189 close(resultCh) 190 } 191 }() 192 for { 193 var ok bool 194 task, ok = <-taskCh 195 if !ok { 196 break 197 } 198 task.job.Start() 199 switch task.taskType { 200 case defCausTask: 201 task.defCausInterDirc.job = task.job 202 resultCh <- analyzeDeferredCausetsPushdown(task.defCausInterDirc) 203 case idxTask: 204 task.idxInterDirc.job = task.job 205 resultCh <- analyzeIndexPushdown(task.idxInterDirc) 206 case fastTask: 207 task.fastInterDirc.job = task.job 208 task.job.Start() 209 for _, result := range analyzeFastInterDirc(task.fastInterDirc) { 210 resultCh <- result 211 } 212 case pkIncrementalTask: 213 task.defCausIncrementalInterDirc.job = task.job 214 resultCh <- analyzePKIncremental(task.defCausIncrementalInterDirc) 215 case idxIncrementalTask: 216 task.idxIncrementalInterDirc.job = task.job 217 resultCh <- analyzeIndexIncremental(task.idxIncrementalInterDirc) 218 } 219 } 220 } 221 222 func analyzeIndexPushdown(idxInterDirc *AnalyzeIndexInterDirc) analyzeResult { 223 ranges := ranger.FullRange() 224 // For single-defCausumn index, we do not load null rows from EinsteinDB, so the built histogram would not include 225 // null values, and its `NullCount` would be set by result of another allegrosql call to get null rows. 226 // For multi-defCausumn index, we cannot define null for the rows, so we still use full range, and the rows 227 // containing null fields would exist in built histograms. Note that, the `NullCount` of histograms for 228 // multi-defCausumn index is always 0 then. 229 if len(idxInterDirc.idxInfo.DeferredCausets) == 1 { 230 ranges = ranger.FullNotNullRange() 231 } 232 hist, cms, err := idxInterDirc.buildStats(ranges, true) 233 if err != nil { 234 return analyzeResult{Err: err, job: idxInterDirc.job} 235 } 236 result := analyzeResult{ 237 BlockID: idxInterDirc.blockID, 238 Hist: []*statistics.Histogram{hist}, 239 Cms: []*statistics.CMSketch{cms}, 240 IsIndex: 1, 241 job: idxInterDirc.job, 242 } 243 result.Count = hist.NullCount 244 if hist.Len() > 0 { 245 result.Count += hist.Buckets[hist.Len()-1].Count 246 } 247 return result 248 } 249 250 // AnalyzeIndexInterDirc represents analyze index push down interlock. 251 type AnalyzeIndexInterDirc struct { 252 ctx stochastikctx.Context 253 blockID embedded.AnalyzeBlockID 254 idxInfo *perceptron.IndexInfo 255 isCommonHandle bool 256 concurrency int 257 priority int 258 analyzePB *fidelpb.AnalyzeReq 259 result allegrosql.SelectResult 260 countNullRes allegrosql.SelectResult 261 opts map[ast.AnalyzeOptionType]uint64 262 job *statistics.AnalyzeJob 263 } 264 265 // fetchAnalyzeResult builds and dispatches the `ekv.Request` from given ranges, and stores the `SelectResult` 266 // in corresponding fields based on the input `isNullRange` argument, which indicates if the range is the 267 // special null range for single-defCausumn index to get the null count. 268 func (e *AnalyzeIndexInterDirc) fetchAnalyzeResult(ranges []*ranger.Range, isNullRange bool) error { 269 var builder allegrosql.RequestBuilder 270 var ekvReqBuilder *allegrosql.RequestBuilder 271 if e.isCommonHandle && e.idxInfo.Primary { 272 ekvReqBuilder = builder.SetCommonHandleRanges(e.ctx.GetStochastikVars().StmtCtx, e.blockID.DefCauslectIDs[0], ranges) 273 } else { 274 ekvReqBuilder = builder.SetIndexRanges(e.ctx.GetStochastikVars().StmtCtx, e.blockID.DefCauslectIDs[0], e.idxInfo.ID, ranges) 275 } 276 ekvReq, err := ekvReqBuilder. 277 SetAnalyzeRequest(e.analyzePB). 278 SetStartTS(math.MaxUint64). 279 SetKeepOrder(true). 280 SetConcurrency(e.concurrency). 281 Build() 282 if err != nil { 283 return err 284 } 285 ctx := context.TODO() 286 result, err := allegrosql.Analyze(ctx, e.ctx.GetClient(), ekvReq, e.ctx.GetStochastikVars().KVVars, e.ctx.GetStochastikVars().InRestrictedALLEGROSQL) 287 if err != nil { 288 return err 289 } 290 result.Fetch(ctx) 291 if isNullRange { 292 e.countNullRes = result 293 } else { 294 e.result = result 295 } 296 return nil 297 } 298 299 func (e *AnalyzeIndexInterDirc) open(ranges []*ranger.Range, considerNull bool) error { 300 err := e.fetchAnalyzeResult(ranges, false) 301 if err != nil { 302 return err 303 } 304 if considerNull && len(e.idxInfo.DeferredCausets) == 1 { 305 ranges = ranger.NullRange() 306 err = e.fetchAnalyzeResult(ranges, true) 307 if err != nil { 308 return err 309 } 310 } 311 return nil 312 } 313 314 func (e *AnalyzeIndexInterDirc) buildStatsFromResult(result allegrosql.SelectResult, needCMS bool) (*statistics.Histogram, *statistics.CMSketch, error) { 315 failpoint.Inject("buildStatsFromResult", func(val failpoint.Value) { 316 if val.(bool) { 317 failpoint.Return(nil, nil, errors.New("mock buildStatsFromResult error")) 318 } 319 }) 320 hist := &statistics.Histogram{} 321 var cms *statistics.CMSketch 322 if needCMS { 323 cms = statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])) 324 } 325 for { 326 data, err := result.NextRaw(context.TODO()) 327 if err != nil { 328 return nil, nil, err 329 } 330 if data == nil { 331 break 332 } 333 resp := &fidelpb.AnalyzeIndexResp{} 334 err = resp.Unmarshal(data) 335 if err != nil { 336 return nil, nil, err 337 } 338 respHist := statistics.HistogramFromProto(resp.Hist) 339 e.job.UFIDelate(int64(respHist.TotalEventCount())) 340 hist, err = statistics.MergeHistograms(e.ctx.GetStochastikVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) 341 if err != nil { 342 return nil, nil, err 343 } 344 if needCMS { 345 if resp.Cms == nil { 346 logutil.Logger(context.TODO()).Warn("nil CMS in response", zap.String("causet", e.idxInfo.Block.O), zap.String("index", e.idxInfo.Name.O)) 347 } else if err := cms.MergeCMSketch(statistics.CMSketchFromProto(resp.Cms), 0); err != nil { 348 return nil, nil, err 349 } 350 } 351 } 352 err := hist.ExtractTopN(cms, len(e.idxInfo.DeferredCausets), uint32(e.opts[ast.AnalyzeOptNumTopN])) 353 if needCMS && cms != nil { 354 cms.CalcDefaultValForAnalyze(uint64(hist.NDV)) 355 } 356 return hist, cms, err 357 } 358 359 func (e *AnalyzeIndexInterDirc) buildStats(ranges []*ranger.Range, considerNull bool) (hist *statistics.Histogram, cms *statistics.CMSketch, err error) { 360 if err = e.open(ranges, considerNull); err != nil { 361 return nil, nil, err 362 } 363 defer func() { 364 err1 := closeAll(e.result, e.countNullRes) 365 if err == nil { 366 err = err1 367 } 368 }() 369 hist, cms, err = e.buildStatsFromResult(e.result, true) 370 if err != nil { 371 return nil, nil, err 372 } 373 if e.countNullRes != nil { 374 nullHist, _, err := e.buildStatsFromResult(e.countNullRes, false) 375 if err != nil { 376 return nil, nil, err 377 } 378 if l := nullHist.Len(); l > 0 { 379 hist.NullCount = nullHist.Buckets[l-1].Count 380 } 381 } 382 hist.ID = e.idxInfo.ID 383 return hist, cms, nil 384 } 385 386 func analyzeDeferredCausetsPushdown(defCausInterDirc *AnalyzeDeferredCausetsInterDirc) analyzeResult { 387 var ranges []*ranger.Range 388 if hc := defCausInterDirc.handleDefCauss; hc != nil { 389 if hc.IsInt() { 390 ranges = ranger.FullIntRange(allegrosql.HasUnsignedFlag(hc.GetDefCaus(0).RetType.Flag)) 391 } else { 392 ranges = ranger.FullNotNullRange() 393 } 394 } else { 395 ranges = ranger.FullIntRange(false) 396 } 397 hists, cms, extStats, err := defCausInterDirc.buildStats(ranges, true) 398 if err != nil { 399 return analyzeResult{Err: err, job: defCausInterDirc.job} 400 } 401 result := analyzeResult{ 402 BlockID: defCausInterDirc.blockID, 403 Hist: hists, 404 Cms: cms, 405 ExtStats: extStats, 406 job: defCausInterDirc.job, 407 } 408 hist := hists[0] 409 result.Count = hist.NullCount 410 if hist.Len() > 0 { 411 result.Count += hist.Buckets[hist.Len()-1].Count 412 } 413 return result 414 } 415 416 // AnalyzeDeferredCausetsInterDirc represents Analyze defCausumns push down interlock. 417 type AnalyzeDeferredCausetsInterDirc struct { 418 ctx stochastikctx.Context 419 blockID embedded.AnalyzeBlockID 420 defcausInfo []*perceptron.DeferredCausetInfo 421 handleDefCauss embedded.HandleDefCauss 422 concurrency int 423 priority int 424 analyzePB *fidelpb.AnalyzeReq 425 resultHandler *blockResultHandler 426 opts map[ast.AnalyzeOptionType]uint64 427 job *statistics.AnalyzeJob 428 } 429 430 func (e *AnalyzeDeferredCausetsInterDirc) open(ranges []*ranger.Range) error { 431 e.resultHandler = &blockResultHandler{} 432 firstPartRanges, secondPartRanges := splitRanges(ranges, true, false) 433 firstResult, err := e.buildResp(firstPartRanges) 434 if err != nil { 435 return err 436 } 437 if len(secondPartRanges) == 0 { 438 e.resultHandler.open(nil, firstResult) 439 return nil 440 } 441 var secondResult allegrosql.SelectResult 442 secondResult, err = e.buildResp(secondPartRanges) 443 if err != nil { 444 return err 445 } 446 e.resultHandler.open(firstResult, secondResult) 447 448 return nil 449 } 450 451 func (e *AnalyzeDeferredCausetsInterDirc) buildResp(ranges []*ranger.Range) (allegrosql.SelectResult, error) { 452 var builder allegrosql.RequestBuilder 453 var reqBuilder *allegrosql.RequestBuilder 454 if e.handleDefCauss != nil && !e.handleDefCauss.IsInt() { 455 reqBuilder = builder.SetCommonHandleRanges(e.ctx.GetStochastikVars().StmtCtx, e.blockID.DefCauslectIDs[0], ranges) 456 } else { 457 reqBuilder = builder.SetBlockRanges(e.blockID.DefCauslectIDs[0], ranges, nil) 458 } 459 // Always set KeepOrder of the request to be true, in order to compute 460 // correct `correlation` of defCausumns. 461 ekvReq, err := reqBuilder. 462 SetAnalyzeRequest(e.analyzePB). 463 SetStartTS(math.MaxUint64). 464 SetKeepOrder(true). 465 SetConcurrency(e.concurrency). 466 Build() 467 if err != nil { 468 return nil, err 469 } 470 ctx := context.TODO() 471 result, err := allegrosql.Analyze(ctx, e.ctx.GetClient(), ekvReq, e.ctx.GetStochastikVars().KVVars, e.ctx.GetStochastikVars().InRestrictedALLEGROSQL) 472 if err != nil { 473 return nil, err 474 } 475 result.Fetch(ctx) 476 return result, nil 477 } 478 479 func (e *AnalyzeDeferredCausetsInterDirc) buildStats(ranges []*ranger.Range, needExtStats bool) (hists []*statistics.Histogram, cms []*statistics.CMSketch, extStats *statistics.ExtendedStatsDefCausl, err error) { 480 if err = e.open(ranges); err != nil { 481 return nil, nil, nil, err 482 } 483 defer func() { 484 if err1 := e.resultHandler.Close(); err1 != nil { 485 hists = nil 486 cms = nil 487 extStats = nil 488 err = err1 489 } 490 }() 491 pkHist := &statistics.Histogram{} 492 defCauslectors := make([]*statistics.SampleDefCauslector, len(e.defcausInfo)) 493 for i := range defCauslectors { 494 defCauslectors[i] = &statistics.SampleDefCauslector{ 495 IsMerger: true, 496 FMSketch: statistics.NewFMSketch(maxSketchSize), 497 MaxSampleSize: int64(e.opts[ast.AnalyzeOptNumSamples]), 498 CMSketch: statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])), 499 } 500 } 501 for { 502 data, err1 := e.resultHandler.nextRaw(context.TODO()) 503 if err1 != nil { 504 return nil, nil, nil, err1 505 } 506 if data == nil { 507 break 508 } 509 resp := &fidelpb.AnalyzeDeferredCausetsResp{} 510 err = resp.Unmarshal(data) 511 if err != nil { 512 return nil, nil, nil, err 513 } 514 sc := e.ctx.GetStochastikVars().StmtCtx 515 rowCount := int64(0) 516 if hasPkHist(e.handleDefCauss) { 517 respHist := statistics.HistogramFromProto(resp.PkHist) 518 rowCount = int64(respHist.TotalEventCount()) 519 pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) 520 if err != nil { 521 return nil, nil, nil, err 522 } 523 } 524 for i, rc := range resp.DefCauslectors { 525 respSample := statistics.SampleDefCauslectorFromProto(rc) 526 rowCount = respSample.Count + respSample.NullCount 527 defCauslectors[i].MergeSampleDefCauslector(sc, respSample) 528 } 529 e.job.UFIDelate(rowCount) 530 } 531 timeZone := e.ctx.GetStochastikVars().Location() 532 if hasPkHist(e.handleDefCauss) { 533 pkInfo := e.handleDefCauss.GetDefCaus(0) 534 pkHist.ID = pkInfo.ID 535 err = pkHist.DecodeTo(pkInfo.RetType, timeZone) 536 if err != nil { 537 return nil, nil, nil, err 538 } 539 hists = append(hists, pkHist) 540 cms = append(cms, nil) 541 } 542 for i, defCaus := range e.defcausInfo { 543 err := defCauslectors[i].ExtractTopN(uint32(e.opts[ast.AnalyzeOptNumTopN]), e.ctx.GetStochastikVars().StmtCtx, &defCaus.FieldType, timeZone) 544 if err != nil { 545 return nil, nil, nil, err 546 } 547 for j, s := range defCauslectors[i].Samples { 548 defCauslectors[i].Samples[j].Ordinal = j 549 defCauslectors[i].Samples[j].Value, err = blockcodec.DecodeDeferredCausetValue(s.Value.GetBytes(), &defCaus.FieldType, timeZone) 550 if err != nil { 551 return nil, nil, nil, err 552 } 553 } 554 hg, err := statistics.BuildDeferredCauset(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), defCaus.ID, defCauslectors[i], &defCaus.FieldType) 555 if err != nil { 556 return nil, nil, nil, err 557 } 558 hists = append(hists, hg) 559 defCauslectors[i].CMSketch.CalcDefaultValForAnalyze(uint64(hg.NDV)) 560 cms = append(cms, defCauslectors[i].CMSketch) 561 } 562 if needExtStats { 563 statsHandle := petri.GetPetri(e.ctx).StatsHandle() 564 extStats, err = statsHandle.BuildExtendedStats(e.blockID.PersistID, e.defcausInfo, defCauslectors) 565 if err != nil { 566 return nil, nil, nil, err 567 } 568 } 569 return hists, cms, extStats, nil 570 } 571 572 func hasPkHist(handleDefCauss embedded.HandleDefCauss) bool { 573 return handleDefCauss != nil && handleDefCauss.IsInt() 574 } 575 576 func pkDefCaussCount(handleDefCauss embedded.HandleDefCauss) int { 577 if handleDefCauss == nil { 578 return 0 579 } 580 return handleDefCauss.NumDefCauss() 581 } 582 583 var ( 584 fastAnalyzeHistogramSample = metrics.FastAnalyzeHistogram.WithLabelValues(metrics.LblGeneral, "sample") 585 fastAnalyzeHistogramAccessRegions = metrics.FastAnalyzeHistogram.WithLabelValues(metrics.LblGeneral, "access_regions") 586 fastAnalyzeHistogramScanKeys = metrics.FastAnalyzeHistogram.WithLabelValues(metrics.LblGeneral, "scan_keys") 587 ) 588 589 func analyzeFastInterDirc(exec *AnalyzeFastInterDirc) []analyzeResult { 590 hists, cms, err := exec.buildStats() 591 if err != nil { 592 return []analyzeResult{{Err: err, job: exec.job}} 593 } 594 var results []analyzeResult 595 pkDefCausCount := pkDefCaussCount(exec.handleDefCauss) 596 if len(exec.idxsInfo) > 0 { 597 for i := pkDefCausCount + len(exec.defcausInfo); i < len(hists); i++ { 598 idxResult := analyzeResult{ 599 BlockID: exec.blockID, 600 Hist: []*statistics.Histogram{hists[i]}, 601 Cms: []*statistics.CMSketch{cms[i]}, 602 IsIndex: 1, 603 Count: hists[i].NullCount, 604 job: exec.job, 605 } 606 if hists[i].Len() > 0 { 607 idxResult.Count += hists[i].Buckets[hists[i].Len()-1].Count 608 } 609 if exec.rowCount != 0 { 610 idxResult.Count = exec.rowCount 611 } 612 results = append(results, idxResult) 613 } 614 } 615 hist := hists[0] 616 defCausResult := analyzeResult{ 617 BlockID: exec.blockID, 618 Hist: hists[:pkDefCausCount+len(exec.defcausInfo)], 619 Cms: cms[:pkDefCausCount+len(exec.defcausInfo)], 620 Count: hist.NullCount, 621 job: exec.job, 622 } 623 if hist.Len() > 0 { 624 defCausResult.Count += hist.Buckets[hist.Len()-1].Count 625 } 626 if exec.rowCount != 0 { 627 defCausResult.Count = exec.rowCount 628 } 629 results = append(results, defCausResult) 630 return results 631 } 632 633 // AnalyzeFastInterDirc represents Fast Analyze interlock. 634 type AnalyzeFastInterDirc struct { 635 ctx stochastikctx.Context 636 blockID embedded.AnalyzeBlockID 637 handleDefCauss embedded.HandleDefCauss 638 defcausInfo []*perceptron.DeferredCausetInfo 639 idxsInfo []*perceptron.IndexInfo 640 concurrency int 641 opts map[ast.AnalyzeOptionType]uint64 642 tblInfo *perceptron.BlockInfo 643 cache *einsteindb.RegionCache 644 wg *sync.WaitGroup 645 rowCount int64 646 sampCursor int32 647 sampTasks []*einsteindb.KeyLocation 648 scanTasks []*einsteindb.KeyLocation 649 defCauslectors []*statistics.SampleDefCauslector 650 randSeed int64 651 job *statistics.AnalyzeJob 652 estSampStep uint32 653 } 654 655 func (e *AnalyzeFastInterDirc) calculateEstimateSampleStep() (err error) { 656 allegrosql := fmt.Sprintf("select flag from allegrosql.stats_histograms where block_id = %d;", e.blockID.PersistID) 657 var rows []chunk.Event 658 rows, _, err = e.ctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate).InterDircRestrictedALLEGROSQL(allegrosql) 659 if err != nil { 660 return 661 } 662 var historyEventCount uint64 663 hasBeenAnalyzed := len(rows) != 0 && rows[0].GetInt64(0) == statistics.AnalyzeFlag 664 if hasBeenAnalyzed { 665 historyEventCount = uint64(petri.GetPetri(e.ctx).StatsHandle().GetPartitionStats(e.tblInfo, e.blockID.PersistID).Count) 666 } else { 667 dbInfo, ok := petri.GetPetri(e.ctx).SchemaReplicant().SchemaByBlock(e.tblInfo) 668 if !ok { 669 err = errors.Errorf("database not found for causet '%s'", e.tblInfo.Name) 670 return 671 } 672 var rollbackFn func() error 673 rollbackFn, err = e.activateTxnForEventCount() 674 if err != nil { 675 return 676 } 677 defer func() { 678 if rollbackFn != nil { 679 err = rollbackFn() 680 } 681 }() 682 var partition string 683 if e.tblInfo.ID != e.blockID.PersistID { 684 for _, definition := range e.tblInfo.Partition.Definitions { 685 if definition.ID == e.blockID.PersistID { 686 partition = fmt.Sprintf(" partition(%s)", definition.Name.L) 687 break 688 } 689 } 690 } 691 allegrosql := fmt.Sprintf("select count(*) from %s.%s", dbInfo.Name.L, e.tblInfo.Name.L) 692 if len(partition) > 0 { 693 allegrosql += partition 694 } 695 var recordSets []sqlexec.RecordSet 696 recordSets, err = e.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircuteInternal(context.TODO(), allegrosql) 697 if err != nil || len(recordSets) == 0 { 698 return 699 } 700 if len(recordSets) == 0 { 701 err = errors.Trace(errors.Errorf("empty record set")) 702 return 703 } 704 defer func() { 705 for _, r := range recordSets { 706 terror.Call(r.Close) 707 } 708 }() 709 chk := recordSets[0].NewChunk() 710 err = recordSets[0].Next(context.TODO(), chk) 711 if err != nil { 712 return 713 } 714 e.rowCount = chk.GetEvent(0).GetInt64(0) 715 historyEventCount = uint64(e.rowCount) 716 } 717 totalSampSize := e.opts[ast.AnalyzeOptNumSamples] 718 e.estSampStep = uint32(historyEventCount / totalSampSize) 719 return 720 } 721 722 func (e *AnalyzeFastInterDirc) activateTxnForEventCount() (rollbackFn func() error, err error) { 723 txn, err := e.ctx.Txn(true) 724 if err != nil { 725 if ekv.ErrInvalidTxn.Equal(err) { 726 _, err := e.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircuteInternal(context.TODO(), "begin") 727 if err != nil { 728 return nil, errors.Trace(err) 729 } 730 rollbackFn = func() error { 731 _, err := e.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircuteInternal(context.TODO(), "rollback") 732 return err 733 } 734 } else { 735 return nil, errors.Trace(err) 736 } 737 } 738 txn.SetOption(ekv.Priority, ekv.PriorityLow) 739 txn.SetOption(ekv.IsolationLevel, ekv.RC) 740 txn.SetOption(ekv.NotFillCache, true) 741 return nil, nil 742 } 743 744 // buildSampTask build sample tasks. 745 func (e *AnalyzeFastInterDirc) buildSampTask() (err error) { 746 bo := einsteindb.NewBackofferWithVars(context.Background(), 500, nil) 747 causetstore, _ := e.ctx.GetStore().(einsteindb.CausetStorage) 748 e.cache = causetstore.GetRegionCache() 749 startKey, endKey := blockcodec.GetBlockHandleKeyRange(e.blockID.DefCauslectIDs[0]) 750 targetKey := startKey 751 accessRegionsCounter := 0 752 for { 753 // Search for the region which contains the targetKey. 754 loc, err := e.cache.LocateKey(bo, targetKey) 755 if err != nil { 756 return err 757 } 758 if bytes.Compare(endKey, loc.StartKey) < 0 { 759 break 760 } 761 accessRegionsCounter++ 762 763 // Set the next search key. 764 targetKey = loc.EndKey 765 766 // If the KV pairs in the region all belonging to the causet, add it to the sample task. 767 if bytes.Compare(startKey, loc.StartKey) <= 0 && len(loc.EndKey) != 0 && bytes.Compare(loc.EndKey, endKey) <= 0 { 768 e.sampTasks = append(e.sampTasks, loc) 769 continue 770 } 771 772 e.scanTasks = append(e.scanTasks, loc) 773 if bytes.Compare(loc.StartKey, startKey) < 0 { 774 loc.StartKey = startKey 775 } 776 if bytes.Compare(endKey, loc.EndKey) < 0 || len(loc.EndKey) == 0 { 777 loc.EndKey = endKey 778 break 779 } 780 } 781 fastAnalyzeHistogramAccessRegions.Observe(float64(accessRegionsCounter)) 782 783 return nil 784 } 785 786 func (e *AnalyzeFastInterDirc) decodeValues(handle ekv.Handle, sValue []byte, wantDefCauss map[int64]*types.FieldType) (values map[int64]types.Causet, err error) { 787 loc := e.ctx.GetStochastikVars().Location() 788 values, err = blockcodec.DecodeEventToCausetMap(sValue, wantDefCauss, loc) 789 if err != nil || e.handleDefCauss == nil { 790 return values, err 791 } 792 wantDefCauss = make(map[int64]*types.FieldType, e.handleDefCauss.NumDefCauss()) 793 handleDefCausIDs := make([]int64, e.handleDefCauss.NumDefCauss()) 794 for i := 0; i < e.handleDefCauss.NumDefCauss(); i++ { 795 c := e.handleDefCauss.GetDefCaus(i) 796 handleDefCausIDs[i] = c.ID 797 wantDefCauss[c.ID] = c.RetType 798 } 799 return blockcodec.DecodeHandleToCausetMap(handle, handleDefCausIDs, wantDefCauss, loc, values) 800 } 801 802 func (e *AnalyzeFastInterDirc) getValueByInfo(defCausInfo *perceptron.DeferredCausetInfo, values map[int64]types.Causet) (types.Causet, error) { 803 val, ok := values[defCausInfo.ID] 804 if !ok { 805 return causet.GetDefCausOriginDefaultValue(e.ctx, defCausInfo) 806 } 807 return val, nil 808 } 809 810 func (e *AnalyzeFastInterDirc) uFIDelateDefCauslectorSamples(sValue []byte, sKey ekv.Key, samplePos int32) (err error) { 811 var handle ekv.Handle 812 handle, err = blockcodec.DecodeEventKey(sKey) 813 if err != nil { 814 return err 815 } 816 817 // Decode defcaus for analyze causet 818 wantDefCauss := make(map[int64]*types.FieldType, len(e.defcausInfo)) 819 for _, defCaus := range e.defcausInfo { 820 wantDefCauss[defCaus.ID] = &defCaus.FieldType 821 } 822 823 // Pre-build index->defcaus relationship and refill wantDefCauss if not exists(analyze index) 824 index2DefCauss := make([][]*perceptron.DeferredCausetInfo, len(e.idxsInfo)) 825 for i, idxInfo := range e.idxsInfo { 826 for _, idxDefCaus := range idxInfo.DeferredCausets { 827 defCausInfo := e.tblInfo.DeferredCausets[idxDefCaus.Offset] 828 index2DefCauss[i] = append(index2DefCauss[i], defCausInfo) 829 wantDefCauss[defCausInfo.ID] = &defCausInfo.FieldType 830 } 831 } 832 833 // Decode the defcaus value in order. 834 var values map[int64]types.Causet 835 values, err = e.decodeValues(handle, sValue, wantDefCauss) 836 if err != nil { 837 return err 838 } 839 // UFIDelate the primary key defCauslector. 840 pkDefCaussCount := pkDefCaussCount(e.handleDefCauss) 841 for i := 0; i < pkDefCaussCount; i++ { 842 defCaus := e.handleDefCauss.GetDefCaus(i) 843 v, ok := values[defCaus.ID] 844 if !ok { 845 return errors.Trace(errors.Errorf("Primary key defCausumn not found")) 846 } 847 if e.defCauslectors[i].Samples[samplePos] == nil { 848 e.defCauslectors[i].Samples[samplePos] = &statistics.SampleItem{} 849 } 850 e.defCauslectors[i].Samples[samplePos].Handle = handle 851 e.defCauslectors[i].Samples[samplePos].Value = v 852 } 853 854 // UFIDelate the defCausumns' defCauslectors. 855 for j, defCausInfo := range e.defcausInfo { 856 v, err := e.getValueByInfo(defCausInfo, values) 857 if err != nil { 858 return err 859 } 860 if e.defCauslectors[pkDefCaussCount+j].Samples[samplePos] == nil { 861 e.defCauslectors[pkDefCaussCount+j].Samples[samplePos] = &statistics.SampleItem{} 862 } 863 e.defCauslectors[pkDefCaussCount+j].Samples[samplePos].Handle = handle 864 e.defCauslectors[pkDefCaussCount+j].Samples[samplePos].Value = v 865 } 866 // UFIDelate the indexes' defCauslectors. 867 for j, idxInfo := range e.idxsInfo { 868 idxVals := make([]types.Causet, 0, len(idxInfo.DeferredCausets)) 869 defcaus := index2DefCauss[j] 870 for _, defCausInfo := range defcaus { 871 v, err := e.getValueByInfo(defCausInfo, values) 872 if err != nil { 873 return err 874 } 875 idxVals = append(idxVals, v) 876 } 877 var bytes []byte 878 bytes, err = codec.EncodeKey(e.ctx.GetStochastikVars().StmtCtx, bytes, idxVals...) 879 if err != nil { 880 return err 881 } 882 if e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos] == nil { 883 e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos] = &statistics.SampleItem{} 884 } 885 e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos].Handle = handle 886 e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos].Value = types.NewBytesCauset(bytes) 887 } 888 return nil 889 } 890 891 func (e *AnalyzeFastInterDirc) handleBatchSeekResponse(ekvMap map[string][]byte) (err error) { 892 length := int32(len(ekvMap)) 893 newCursor := atomic.AddInt32(&e.sampCursor, length) 894 samplePos := newCursor - length 895 for sKey, sValue := range ekvMap { 896 exceedNeededSampleCounts := uint64(samplePos) >= e.opts[ast.AnalyzeOptNumSamples] 897 if exceedNeededSampleCounts { 898 atomic.StoreInt32(&e.sampCursor, int32(e.opts[ast.AnalyzeOptNumSamples])) 899 break 900 } 901 err = e.uFIDelateDefCauslectorSamples(sValue, ekv.Key(sKey), samplePos) 902 if err != nil { 903 return err 904 } 905 samplePos++ 906 } 907 return nil 908 } 909 910 func (e *AnalyzeFastInterDirc) handleScanIter(iter ekv.Iterator) (scanKeysSize int, err error) { 911 rander := rand.New(rand.NewSource(e.randSeed)) 912 sampleSize := int64(e.opts[ast.AnalyzeOptNumSamples]) 913 for ; iter.Valid() && err == nil; err = iter.Next() { 914 // reservoir sampling 915 scanKeysSize++ 916 randNum := rander.Int63n(int64(e.sampCursor) + int64(scanKeysSize)) 917 if randNum > sampleSize && e.sampCursor == int32(sampleSize) { 918 continue 919 } 920 921 p := rander.Int31n(int32(sampleSize)) 922 if e.sampCursor < int32(sampleSize) { 923 p = e.sampCursor 924 e.sampCursor++ 925 } 926 927 err = e.uFIDelateDefCauslectorSamples(iter.Value(), iter.Key(), p) 928 if err != nil { 929 return 930 } 931 } 932 return 933 } 934 935 func (e *AnalyzeFastInterDirc) handleScanTasks(bo *einsteindb.Backoffer) (keysSize int, err error) { 936 snapshot, err := e.ctx.GetStore().(einsteindb.CausetStorage).GetSnapshot(ekv.MaxVersion) 937 if err != nil { 938 return 0, err 939 } 940 if e.ctx.GetStochastikVars().GetReplicaRead().IsFollowerRead() { 941 snapshot.SetOption(ekv.ReplicaRead, ekv.ReplicaReadFollower) 942 } 943 for _, t := range e.scanTasks { 944 iter, err := snapshot.Iter(t.StartKey, t.EndKey) 945 if err != nil { 946 return keysSize, err 947 } 948 size, err := e.handleScanIter(iter) 949 keysSize += size 950 if err != nil { 951 return keysSize, err 952 } 953 } 954 return keysSize, nil 955 } 956 957 func (e *AnalyzeFastInterDirc) handleSampTasks(workID int, step uint32, err *error) { 958 defer e.wg.Done() 959 var snapshot ekv.Snapshot 960 snapshot, *err = e.ctx.GetStore().(einsteindb.CausetStorage).GetSnapshot(ekv.MaxVersion) 961 if *err != nil { 962 return 963 } 964 snapshot.SetOption(ekv.NotFillCache, true) 965 snapshot.SetOption(ekv.IsolationLevel, ekv.RC) 966 snapshot.SetOption(ekv.Priority, ekv.PriorityLow) 967 if e.ctx.GetStochastikVars().GetReplicaRead().IsFollowerRead() { 968 snapshot.SetOption(ekv.ReplicaRead, ekv.ReplicaReadFollower) 969 } 970 971 rander := rand.New(rand.NewSource(e.randSeed)) 972 for i := workID; i < len(e.sampTasks); i += e.concurrency { 973 task := e.sampTasks[i] 974 // randomize the estimate step in range [step - 2 * sqrt(step), step] 975 if step > 4 { // 2*sqrt(x) < x 976 lower, upper := step-uint32(2*math.Sqrt(float64(step))), step 977 step = uint32(rander.Intn(int(upper-lower))) + lower 978 } 979 snapshot.SetOption(ekv.SampleStep, step) 980 ekvMap := make(map[string][]byte) 981 var iter ekv.Iterator 982 iter, *err = snapshot.Iter(task.StartKey, task.EndKey) 983 if *err != nil { 984 return 985 } 986 for iter.Valid() { 987 ekvMap[string(iter.Key())] = iter.Value() 988 *err = iter.Next() 989 if *err != nil { 990 return 991 } 992 } 993 fastAnalyzeHistogramSample.Observe(float64(len(ekvMap))) 994 995 *err = e.handleBatchSeekResponse(ekvMap) 996 if *err != nil { 997 return 998 } 999 } 1000 } 1001 1002 func (e *AnalyzeFastInterDirc) buildDeferredCausetStats(ID int64, defCauslector *statistics.SampleDefCauslector, tp *types.FieldType, rowCount int64) (*statistics.Histogram, *statistics.CMSketch, error) { 1003 data := make([][]byte, 0, len(defCauslector.Samples)) 1004 for i, sample := range defCauslector.Samples { 1005 sample.Ordinal = i 1006 if sample.Value.IsNull() { 1007 defCauslector.NullCount++ 1008 continue 1009 } 1010 bytes, err := blockcodec.EncodeValue(e.ctx.GetStochastikVars().StmtCtx, nil, sample.Value) 1011 if err != nil { 1012 return nil, nil, err 1013 } 1014 data = append(data, bytes) 1015 } 1016 // Build CMSketch. 1017 cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data, uint32(e.opts[ast.AnalyzeOptNumTopN]), uint64(rowCount)) 1018 // Build Histogram. 1019 hist, err := statistics.BuildDeferredCausetHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), ID, defCauslector, tp, rowCount, int64(ndv), defCauslector.NullCount*int64(scaleRatio)) 1020 return hist, cmSketch, err 1021 } 1022 1023 func (e *AnalyzeFastInterDirc) buildIndexStats(idxInfo *perceptron.IndexInfo, defCauslector *statistics.SampleDefCauslector, rowCount int64) (*statistics.Histogram, *statistics.CMSketch, error) { 1024 data := make([][][]byte, len(idxInfo.DeferredCausets)) 1025 for _, sample := range defCauslector.Samples { 1026 var preLen int 1027 remained := sample.Value.GetBytes() 1028 // We need to insert each prefix values into CM Sketch. 1029 for i := 0; i < len(idxInfo.DeferredCausets); i++ { 1030 var err error 1031 var value []byte 1032 value, remained, err = codec.CutOne(remained) 1033 if err != nil { 1034 return nil, nil, err 1035 } 1036 preLen += len(value) 1037 data[i] = append(data[i], sample.Value.GetBytes()[:preLen]) 1038 } 1039 } 1040 numTop := uint32(e.opts[ast.AnalyzeOptNumTopN]) 1041 cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[0], numTop, uint64(rowCount)) 1042 // Build CM Sketch for each prefix and merge them into one. 1043 for i := 1; i < len(idxInfo.DeferredCausets); i++ { 1044 var curCMSketch *statistics.CMSketch 1045 // `ndv` should be the ndv of full index, so just rewrite it here. 1046 curCMSketch, ndv, scaleRatio = statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[i], numTop, uint64(rowCount)) 1047 err := cmSketch.MergeCMSketch(curCMSketch, numTop) 1048 if err != nil { 1049 return nil, nil, err 1050 } 1051 } 1052 // Build Histogram. 1053 hist, err := statistics.BuildDeferredCausetHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), idxInfo.ID, defCauslector, types.NewFieldType(allegrosql.TypeBlob), rowCount, int64(ndv), defCauslector.NullCount*int64(scaleRatio)) 1054 return hist, cmSketch, err 1055 } 1056 1057 func (e *AnalyzeFastInterDirc) runTasks() ([]*statistics.Histogram, []*statistics.CMSketch, error) { 1058 errs := make([]error, e.concurrency) 1059 pkDefCausCount := pkDefCaussCount(e.handleDefCauss) 1060 // defCauslect defCausumn samples and primary key samples and index samples. 1061 length := len(e.defcausInfo) + pkDefCausCount + len(e.idxsInfo) 1062 e.defCauslectors = make([]*statistics.SampleDefCauslector, length) 1063 for i := range e.defCauslectors { 1064 e.defCauslectors[i] = &statistics.SampleDefCauslector{ 1065 MaxSampleSize: int64(e.opts[ast.AnalyzeOptNumSamples]), 1066 Samples: make([]*statistics.SampleItem, e.opts[ast.AnalyzeOptNumSamples]), 1067 } 1068 } 1069 1070 e.wg.Add(e.concurrency) 1071 bo := einsteindb.NewBackofferWithVars(context.Background(), 500, nil) 1072 for i := 0; i < e.concurrency; i++ { 1073 go e.handleSampTasks(i, e.estSampStep, &errs[i]) 1074 } 1075 e.wg.Wait() 1076 for _, err := range errs { 1077 if err != nil { 1078 return nil, nil, err 1079 } 1080 } 1081 1082 scanKeysSize, err := e.handleScanTasks(bo) 1083 fastAnalyzeHistogramScanKeys.Observe(float64(scanKeysSize)) 1084 if err != nil { 1085 return nil, nil, err 1086 } 1087 1088 stats := petri.GetPetri(e.ctx).StatsHandle() 1089 var rowCount int64 = 0 1090 if stats.Lease() > 0 { 1091 if t := stats.GetPartitionStats(e.tblInfo, e.blockID.PersistID); !t.Pseudo { 1092 rowCount = t.Count 1093 } 1094 } 1095 hists, cms := make([]*statistics.Histogram, length), make([]*statistics.CMSketch, length) 1096 for i := 0; i < length; i++ { 1097 // Build defCauslector properties. 1098 defCauslector := e.defCauslectors[i] 1099 defCauslector.Samples = defCauslector.Samples[:e.sampCursor] 1100 sort.Slice(defCauslector.Samples, func(i, j int) bool { 1101 return defCauslector.Samples[i].Handle.Compare(defCauslector.Samples[j].Handle) < 0 1102 }) 1103 defCauslector.CalcTotalSize() 1104 // Adjust the event count in case the count of `tblStats` is not accurate and too small. 1105 rowCount = mathutil.MaxInt64(rowCount, int64(len(defCauslector.Samples))) 1106 // Scale the total defCausumn size. 1107 if len(defCauslector.Samples) > 0 { 1108 defCauslector.TotalSize *= rowCount / int64(len(defCauslector.Samples)) 1109 } 1110 if i < pkDefCausCount { 1111 pkDefCaus := e.handleDefCauss.GetDefCaus(i) 1112 hists[i], cms[i], err = e.buildDeferredCausetStats(pkDefCaus.ID, e.defCauslectors[i], pkDefCaus.RetType, rowCount) 1113 } else if i < pkDefCausCount+len(e.defcausInfo) { 1114 hists[i], cms[i], err = e.buildDeferredCausetStats(e.defcausInfo[i-pkDefCausCount].ID, e.defCauslectors[i], &e.defcausInfo[i-pkDefCausCount].FieldType, rowCount) 1115 } else { 1116 hists[i], cms[i], err = e.buildIndexStats(e.idxsInfo[i-pkDefCausCount-len(e.defcausInfo)], e.defCauslectors[i], rowCount) 1117 } 1118 if err != nil { 1119 return nil, nil, err 1120 } 1121 } 1122 return hists, cms, nil 1123 } 1124 1125 func (e *AnalyzeFastInterDirc) buildStats() (hists []*statistics.Histogram, cms []*statistics.CMSketch, err error) { 1126 // To set rand seed, it's for unit test. 1127 // To ensure that random sequences are different in non-test environments, RandSeed must be set time.Now(). 1128 if RandSeed == 1 { 1129 e.randSeed = time.Now().UnixNano() 1130 } else { 1131 e.randSeed = RandSeed 1132 } 1133 1134 err = e.buildSampTask() 1135 if err != nil { 1136 return nil, nil, err 1137 } 1138 1139 return e.runTasks() 1140 } 1141 1142 // AnalyzeTestFastInterDirc is for fast sample in unit test. 1143 type AnalyzeTestFastInterDirc struct { 1144 AnalyzeFastInterDirc 1145 Ctx stochastikctx.Context 1146 PhysicalBlockID int64 1147 HandleDefCauss embedded.HandleDefCauss 1148 DefCaussInfo []*perceptron.DeferredCausetInfo 1149 IdxsInfo []*perceptron.IndexInfo 1150 Concurrency int 1151 DefCauslectors []*statistics.SampleDefCauslector 1152 TblInfo *perceptron.BlockInfo 1153 Opts map[ast.AnalyzeOptionType]uint64 1154 } 1155 1156 // TestFastSample only test the fast sample in unit test. 1157 func (e *AnalyzeTestFastInterDirc) TestFastSample() error { 1158 e.ctx = e.Ctx 1159 e.handleDefCauss = e.HandleDefCauss 1160 e.defcausInfo = e.DefCaussInfo 1161 e.idxsInfo = e.IdxsInfo 1162 e.concurrency = e.Concurrency 1163 e.blockID = embedded.AnalyzeBlockID{PersistID: e.PhysicalBlockID, DefCauslectIDs: []int64{e.PhysicalBlockID}} 1164 e.wg = &sync.WaitGroup{} 1165 e.job = &statistics.AnalyzeJob{} 1166 e.tblInfo = e.TblInfo 1167 e.opts = e.Opts 1168 _, _, err := e.buildStats() 1169 e.DefCauslectors = e.defCauslectors 1170 return err 1171 } 1172 1173 type analyzeIndexIncrementalInterDirc struct { 1174 AnalyzeIndexInterDirc 1175 oldHist *statistics.Histogram 1176 oldCMS *statistics.CMSketch 1177 } 1178 1179 func analyzeIndexIncremental(idxInterDirc *analyzeIndexIncrementalInterDirc) analyzeResult { 1180 startPos := idxInterDirc.oldHist.GetUpper(idxInterDirc.oldHist.Len() - 1) 1181 values, _, err := codec.DecodeRange(startPos.GetBytes(), len(idxInterDirc.idxInfo.DeferredCausets), nil, nil) 1182 if err != nil { 1183 return analyzeResult{Err: err, job: idxInterDirc.job} 1184 } 1185 ran := ranger.Range{LowVal: values, HighVal: []types.Causet{types.MaxValueCauset()}} 1186 hist, cms, err := idxInterDirc.buildStats([]*ranger.Range{&ran}, false) 1187 if err != nil { 1188 return analyzeResult{Err: err, job: idxInterDirc.job} 1189 } 1190 hist, err = statistics.MergeHistograms(idxInterDirc.ctx.GetStochastikVars().StmtCtx, idxInterDirc.oldHist, hist, int(idxInterDirc.opts[ast.AnalyzeOptNumBuckets])) 1191 if err != nil { 1192 return analyzeResult{Err: err, job: idxInterDirc.job} 1193 } 1194 if idxInterDirc.oldCMS != nil && cms != nil { 1195 err = cms.MergeCMSketch4IncrementalAnalyze(idxInterDirc.oldCMS, uint32(idxInterDirc.opts[ast.AnalyzeOptNumTopN])) 1196 if err != nil { 1197 return analyzeResult{Err: err, job: idxInterDirc.job} 1198 } 1199 cms.CalcDefaultValForAnalyze(uint64(hist.NDV)) 1200 } 1201 result := analyzeResult{ 1202 BlockID: idxInterDirc.blockID, 1203 Hist: []*statistics.Histogram{hist}, 1204 Cms: []*statistics.CMSketch{cms}, 1205 IsIndex: 1, 1206 job: idxInterDirc.job, 1207 } 1208 result.Count = hist.NullCount 1209 if hist.Len() > 0 { 1210 result.Count += hist.Buckets[hist.Len()-1].Count 1211 } 1212 return result 1213 } 1214 1215 type analyzePKIncrementalInterDirc struct { 1216 AnalyzeDeferredCausetsInterDirc 1217 oldHist *statistics.Histogram 1218 } 1219 1220 func analyzePKIncremental(defCausInterDirc *analyzePKIncrementalInterDirc) analyzeResult { 1221 var maxVal types.Causet 1222 pkInfo := defCausInterDirc.handleDefCauss.GetDefCaus(0) 1223 if allegrosql.HasUnsignedFlag(pkInfo.RetType.Flag) { 1224 maxVal = types.NewUintCauset(math.MaxUint64) 1225 } else { 1226 maxVal = types.NewIntCauset(math.MaxInt64) 1227 } 1228 startPos := *defCausInterDirc.oldHist.GetUpper(defCausInterDirc.oldHist.Len() - 1) 1229 ran := ranger.Range{LowVal: []types.Causet{startPos}, LowExclude: true, HighVal: []types.Causet{maxVal}} 1230 hists, _, _, err := defCausInterDirc.buildStats([]*ranger.Range{&ran}, false) 1231 if err != nil { 1232 return analyzeResult{Err: err, job: defCausInterDirc.job} 1233 } 1234 hist := hists[0] 1235 hist, err = statistics.MergeHistograms(defCausInterDirc.ctx.GetStochastikVars().StmtCtx, defCausInterDirc.oldHist, hist, int(defCausInterDirc.opts[ast.AnalyzeOptNumBuckets])) 1236 if err != nil { 1237 return analyzeResult{Err: err, job: defCausInterDirc.job} 1238 } 1239 result := analyzeResult{ 1240 BlockID: defCausInterDirc.blockID, 1241 Hist: []*statistics.Histogram{hist}, 1242 Cms: []*statistics.CMSketch{nil}, 1243 job: defCausInterDirc.job, 1244 } 1245 if hist.Len() > 0 { 1246 result.Count += hist.Buckets[hist.Len()-1].Count 1247 } 1248 return result 1249 } 1250 1251 // analyzeResult is used to represent analyze result. 1252 type analyzeResult struct { 1253 BlockID embedded.AnalyzeBlockID 1254 Hist []*statistics.Histogram 1255 Cms []*statistics.CMSketch 1256 ExtStats *statistics.ExtendedStatsDefCausl 1257 Count int64 1258 IsIndex int 1259 Err error 1260 job *statistics.AnalyzeJob 1261 }