github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/analyze.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"math/rand"
    22  	"runtime"
    23  	"sort"
    24  	"strconv"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"github.com/cznic/mathutil"
    30  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    31  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    32  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    33  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    34  	"github.com/whtcorpsinc/errors"
    35  	"github.com/whtcorpsinc/failpoint"
    36  	"github.com/whtcorpsinc/fidelpb/go-fidelpb"
    37  	"github.com/whtcorpsinc/milevadb/allegrosql"
    38  	"github.com/whtcorpsinc/milevadb/blockcodec"
    39  	"github.com/whtcorpsinc/milevadb/causet"
    40  	"github.com/whtcorpsinc/milevadb/causet/embedded"
    41  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb"
    42  	"github.com/whtcorpsinc/milevadb/ekv"
    43  	"github.com/whtcorpsinc/milevadb/metrics"
    44  	"github.com/whtcorpsinc/milevadb/petri"
    45  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    46  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    47  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    48  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    49  	"github.com/whtcorpsinc/milevadb/soliton/ranger"
    50  	"github.com/whtcorpsinc/milevadb/soliton/sqlexec"
    51  	"github.com/whtcorpsinc/milevadb/statistics"
    52  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    53  	"github.com/whtcorpsinc/milevadb/stochastikctx/variable"
    54  	"github.com/whtcorpsinc/milevadb/types"
    55  	"go.uber.org/zap"
    56  )
    57  
    58  var _ InterlockingDirectorate = &AnalyzeInterDirc{}
    59  
    60  // AnalyzeInterDirc represents Analyze interlock.
    61  type AnalyzeInterDirc struct {
    62  	baseInterlockingDirectorate
    63  	tasks []*analyzeTask
    64  	wg    *sync.WaitGroup
    65  }
    66  
    67  var (
    68  	// RandSeed is the seed for randing package.
    69  	// It's public for test.
    70  	RandSeed = int64(1)
    71  )
    72  
    73  const (
    74  	maxRegionSampleSize = 1000
    75  	maxSketchSize       = 10000
    76  )
    77  
    78  // Next implements the InterlockingDirectorate Next interface.
    79  func (e *AnalyzeInterDirc) Next(ctx context.Context, req *chunk.Chunk) error {
    80  	concurrency, err := getBuildStatsConcurrency(e.ctx)
    81  	if err != nil {
    82  		return err
    83  	}
    84  	taskCh := make(chan *analyzeTask, len(e.tasks))
    85  	resultCh := make(chan analyzeResult, len(e.tasks))
    86  	e.wg.Add(concurrency)
    87  	for i := 0; i < concurrency; i++ {
    88  		go e.analyzeWorker(taskCh, resultCh, i == 0)
    89  	}
    90  	for _, task := range e.tasks {
    91  		statistics.AddNewAnalyzeJob(task.job)
    92  	}
    93  	for _, task := range e.tasks {
    94  		taskCh <- task
    95  	}
    96  	close(taskCh)
    97  	statsHandle := petri.GetPetri(e.ctx).StatsHandle()
    98  	panicCnt := 0
    99  	for panicCnt < concurrency {
   100  		result, ok := <-resultCh
   101  		if !ok {
   102  			break
   103  		}
   104  		if result.Err != nil {
   105  			err = result.Err
   106  			if err == errAnalyzeWorkerPanic {
   107  				panicCnt++
   108  			} else {
   109  				logutil.Logger(ctx).Error("analyze failed", zap.Error(err))
   110  			}
   111  			result.job.Finish(true)
   112  			continue
   113  		}
   114  		for i, hg := range result.Hist {
   115  			err1 := statsHandle.SaveStatsToStorage(result.BlockID.PersistID, result.Count, result.IsIndex, hg, result.Cms[i], 1)
   116  			if err1 != nil {
   117  				err = err1
   118  				logutil.Logger(ctx).Error("save stats to storage failed", zap.Error(err))
   119  				result.job.Finish(true)
   120  				continue
   121  			}
   122  		}
   123  		if err1 := statsHandle.SaveExtendedStatsToStorage(result.BlockID.PersistID, result.ExtStats, false); err1 != nil {
   124  			err = err1
   125  			logutil.Logger(ctx).Error("save extended stats to storage failed", zap.Error(err))
   126  			result.job.Finish(true)
   127  		} else {
   128  			result.job.Finish(false)
   129  		}
   130  	}
   131  	for _, task := range e.tasks {
   132  		statistics.MoveToHistory(task.job)
   133  	}
   134  	if err != nil {
   135  		return err
   136  	}
   137  	return statsHandle.UFIDelate(schemareplicant.GetSchemaReplicant(e.ctx))
   138  }
   139  
   140  func getBuildStatsConcurrency(ctx stochastikctx.Context) (int, error) {
   141  	stochastikVars := ctx.GetStochastikVars()
   142  	concurrency, err := variable.GetStochastikSystemVar(stochastikVars, variable.MilevaDBBuildStatsConcurrency)
   143  	if err != nil {
   144  		return 0, err
   145  	}
   146  	c, err := strconv.ParseInt(concurrency, 10, 64)
   147  	return int(c), err
   148  }
   149  
   150  type taskType int
   151  
   152  const (
   153  	defCausTask taskType = iota
   154  	idxTask
   155  	fastTask
   156  	pkIncrementalTask
   157  	idxIncrementalTask
   158  )
   159  
   160  type analyzeTask struct {
   161  	taskType                    taskType
   162  	idxInterDirc                *AnalyzeIndexInterDirc
   163  	defCausInterDirc            *AnalyzeDeferredCausetsInterDirc
   164  	fastInterDirc               *AnalyzeFastInterDirc
   165  	idxIncrementalInterDirc     *analyzeIndexIncrementalInterDirc
   166  	defCausIncrementalInterDirc *analyzePKIncrementalInterDirc
   167  	job                         *statistics.AnalyzeJob
   168  }
   169  
   170  var errAnalyzeWorkerPanic = errors.New("analyze worker panic")
   171  
   172  func (e *AnalyzeInterDirc) analyzeWorker(taskCh <-chan *analyzeTask, resultCh chan<- analyzeResult, isCloseChanThread bool) {
   173  	var task *analyzeTask
   174  	defer func() {
   175  		if r := recover(); r != nil {
   176  			buf := make([]byte, 4096)
   177  			stackSize := runtime.Stack(buf, false)
   178  			buf = buf[:stackSize]
   179  			logutil.BgLogger().Error("analyze worker panicked", zap.String("stack", string(buf)))
   180  			metrics.PanicCounter.WithLabelValues(metrics.LabelAnalyze).Inc()
   181  			resultCh <- analyzeResult{
   182  				Err: errAnalyzeWorkerPanic,
   183  				job: task.job,
   184  			}
   185  		}
   186  		e.wg.Done()
   187  		if isCloseChanThread {
   188  			e.wg.Wait()
   189  			close(resultCh)
   190  		}
   191  	}()
   192  	for {
   193  		var ok bool
   194  		task, ok = <-taskCh
   195  		if !ok {
   196  			break
   197  		}
   198  		task.job.Start()
   199  		switch task.taskType {
   200  		case defCausTask:
   201  			task.defCausInterDirc.job = task.job
   202  			resultCh <- analyzeDeferredCausetsPushdown(task.defCausInterDirc)
   203  		case idxTask:
   204  			task.idxInterDirc.job = task.job
   205  			resultCh <- analyzeIndexPushdown(task.idxInterDirc)
   206  		case fastTask:
   207  			task.fastInterDirc.job = task.job
   208  			task.job.Start()
   209  			for _, result := range analyzeFastInterDirc(task.fastInterDirc) {
   210  				resultCh <- result
   211  			}
   212  		case pkIncrementalTask:
   213  			task.defCausIncrementalInterDirc.job = task.job
   214  			resultCh <- analyzePKIncremental(task.defCausIncrementalInterDirc)
   215  		case idxIncrementalTask:
   216  			task.idxIncrementalInterDirc.job = task.job
   217  			resultCh <- analyzeIndexIncremental(task.idxIncrementalInterDirc)
   218  		}
   219  	}
   220  }
   221  
   222  func analyzeIndexPushdown(idxInterDirc *AnalyzeIndexInterDirc) analyzeResult {
   223  	ranges := ranger.FullRange()
   224  	// For single-defCausumn index, we do not load null rows from EinsteinDB, so the built histogram would not include
   225  	// null values, and its `NullCount` would be set by result of another allegrosql call to get null rows.
   226  	// For multi-defCausumn index, we cannot define null for the rows, so we still use full range, and the rows
   227  	// containing null fields would exist in built histograms. Note that, the `NullCount` of histograms for
   228  	// multi-defCausumn index is always 0 then.
   229  	if len(idxInterDirc.idxInfo.DeferredCausets) == 1 {
   230  		ranges = ranger.FullNotNullRange()
   231  	}
   232  	hist, cms, err := idxInterDirc.buildStats(ranges, true)
   233  	if err != nil {
   234  		return analyzeResult{Err: err, job: idxInterDirc.job}
   235  	}
   236  	result := analyzeResult{
   237  		BlockID: idxInterDirc.blockID,
   238  		Hist:    []*statistics.Histogram{hist},
   239  		Cms:     []*statistics.CMSketch{cms},
   240  		IsIndex: 1,
   241  		job:     idxInterDirc.job,
   242  	}
   243  	result.Count = hist.NullCount
   244  	if hist.Len() > 0 {
   245  		result.Count += hist.Buckets[hist.Len()-1].Count
   246  	}
   247  	return result
   248  }
   249  
   250  // AnalyzeIndexInterDirc represents analyze index push down interlock.
   251  type AnalyzeIndexInterDirc struct {
   252  	ctx            stochastikctx.Context
   253  	blockID        embedded.AnalyzeBlockID
   254  	idxInfo        *perceptron.IndexInfo
   255  	isCommonHandle bool
   256  	concurrency    int
   257  	priority       int
   258  	analyzePB      *fidelpb.AnalyzeReq
   259  	result         allegrosql.SelectResult
   260  	countNullRes   allegrosql.SelectResult
   261  	opts           map[ast.AnalyzeOptionType]uint64
   262  	job            *statistics.AnalyzeJob
   263  }
   264  
   265  // fetchAnalyzeResult builds and dispatches the `ekv.Request` from given ranges, and stores the `SelectResult`
   266  // in corresponding fields based on the input `isNullRange` argument, which indicates if the range is the
   267  // special null range for single-defCausumn index to get the null count.
   268  func (e *AnalyzeIndexInterDirc) fetchAnalyzeResult(ranges []*ranger.Range, isNullRange bool) error {
   269  	var builder allegrosql.RequestBuilder
   270  	var ekvReqBuilder *allegrosql.RequestBuilder
   271  	if e.isCommonHandle && e.idxInfo.Primary {
   272  		ekvReqBuilder = builder.SetCommonHandleRanges(e.ctx.GetStochastikVars().StmtCtx, e.blockID.DefCauslectIDs[0], ranges)
   273  	} else {
   274  		ekvReqBuilder = builder.SetIndexRanges(e.ctx.GetStochastikVars().StmtCtx, e.blockID.DefCauslectIDs[0], e.idxInfo.ID, ranges)
   275  	}
   276  	ekvReq, err := ekvReqBuilder.
   277  		SetAnalyzeRequest(e.analyzePB).
   278  		SetStartTS(math.MaxUint64).
   279  		SetKeepOrder(true).
   280  		SetConcurrency(e.concurrency).
   281  		Build()
   282  	if err != nil {
   283  		return err
   284  	}
   285  	ctx := context.TODO()
   286  	result, err := allegrosql.Analyze(ctx, e.ctx.GetClient(), ekvReq, e.ctx.GetStochastikVars().KVVars, e.ctx.GetStochastikVars().InRestrictedALLEGROSQL)
   287  	if err != nil {
   288  		return err
   289  	}
   290  	result.Fetch(ctx)
   291  	if isNullRange {
   292  		e.countNullRes = result
   293  	} else {
   294  		e.result = result
   295  	}
   296  	return nil
   297  }
   298  
   299  func (e *AnalyzeIndexInterDirc) open(ranges []*ranger.Range, considerNull bool) error {
   300  	err := e.fetchAnalyzeResult(ranges, false)
   301  	if err != nil {
   302  		return err
   303  	}
   304  	if considerNull && len(e.idxInfo.DeferredCausets) == 1 {
   305  		ranges = ranger.NullRange()
   306  		err = e.fetchAnalyzeResult(ranges, true)
   307  		if err != nil {
   308  			return err
   309  		}
   310  	}
   311  	return nil
   312  }
   313  
   314  func (e *AnalyzeIndexInterDirc) buildStatsFromResult(result allegrosql.SelectResult, needCMS bool) (*statistics.Histogram, *statistics.CMSketch, error) {
   315  	failpoint.Inject("buildStatsFromResult", func(val failpoint.Value) {
   316  		if val.(bool) {
   317  			failpoint.Return(nil, nil, errors.New("mock buildStatsFromResult error"))
   318  		}
   319  	})
   320  	hist := &statistics.Histogram{}
   321  	var cms *statistics.CMSketch
   322  	if needCMS {
   323  		cms = statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]))
   324  	}
   325  	for {
   326  		data, err := result.NextRaw(context.TODO())
   327  		if err != nil {
   328  			return nil, nil, err
   329  		}
   330  		if data == nil {
   331  			break
   332  		}
   333  		resp := &fidelpb.AnalyzeIndexResp{}
   334  		err = resp.Unmarshal(data)
   335  		if err != nil {
   336  			return nil, nil, err
   337  		}
   338  		respHist := statistics.HistogramFromProto(resp.Hist)
   339  		e.job.UFIDelate(int64(respHist.TotalEventCount()))
   340  		hist, err = statistics.MergeHistograms(e.ctx.GetStochastikVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]))
   341  		if err != nil {
   342  			return nil, nil, err
   343  		}
   344  		if needCMS {
   345  			if resp.Cms == nil {
   346  				logutil.Logger(context.TODO()).Warn("nil CMS in response", zap.String("causet", e.idxInfo.Block.O), zap.String("index", e.idxInfo.Name.O))
   347  			} else if err := cms.MergeCMSketch(statistics.CMSketchFromProto(resp.Cms), 0); err != nil {
   348  				return nil, nil, err
   349  			}
   350  		}
   351  	}
   352  	err := hist.ExtractTopN(cms, len(e.idxInfo.DeferredCausets), uint32(e.opts[ast.AnalyzeOptNumTopN]))
   353  	if needCMS && cms != nil {
   354  		cms.CalcDefaultValForAnalyze(uint64(hist.NDV))
   355  	}
   356  	return hist, cms, err
   357  }
   358  
   359  func (e *AnalyzeIndexInterDirc) buildStats(ranges []*ranger.Range, considerNull bool) (hist *statistics.Histogram, cms *statistics.CMSketch, err error) {
   360  	if err = e.open(ranges, considerNull); err != nil {
   361  		return nil, nil, err
   362  	}
   363  	defer func() {
   364  		err1 := closeAll(e.result, e.countNullRes)
   365  		if err == nil {
   366  			err = err1
   367  		}
   368  	}()
   369  	hist, cms, err = e.buildStatsFromResult(e.result, true)
   370  	if err != nil {
   371  		return nil, nil, err
   372  	}
   373  	if e.countNullRes != nil {
   374  		nullHist, _, err := e.buildStatsFromResult(e.countNullRes, false)
   375  		if err != nil {
   376  			return nil, nil, err
   377  		}
   378  		if l := nullHist.Len(); l > 0 {
   379  			hist.NullCount = nullHist.Buckets[l-1].Count
   380  		}
   381  	}
   382  	hist.ID = e.idxInfo.ID
   383  	return hist, cms, nil
   384  }
   385  
   386  func analyzeDeferredCausetsPushdown(defCausInterDirc *AnalyzeDeferredCausetsInterDirc) analyzeResult {
   387  	var ranges []*ranger.Range
   388  	if hc := defCausInterDirc.handleDefCauss; hc != nil {
   389  		if hc.IsInt() {
   390  			ranges = ranger.FullIntRange(allegrosql.HasUnsignedFlag(hc.GetDefCaus(0).RetType.Flag))
   391  		} else {
   392  			ranges = ranger.FullNotNullRange()
   393  		}
   394  	} else {
   395  		ranges = ranger.FullIntRange(false)
   396  	}
   397  	hists, cms, extStats, err := defCausInterDirc.buildStats(ranges, true)
   398  	if err != nil {
   399  		return analyzeResult{Err: err, job: defCausInterDirc.job}
   400  	}
   401  	result := analyzeResult{
   402  		BlockID:  defCausInterDirc.blockID,
   403  		Hist:     hists,
   404  		Cms:      cms,
   405  		ExtStats: extStats,
   406  		job:      defCausInterDirc.job,
   407  	}
   408  	hist := hists[0]
   409  	result.Count = hist.NullCount
   410  	if hist.Len() > 0 {
   411  		result.Count += hist.Buckets[hist.Len()-1].Count
   412  	}
   413  	return result
   414  }
   415  
   416  // AnalyzeDeferredCausetsInterDirc represents Analyze defCausumns push down interlock.
   417  type AnalyzeDeferredCausetsInterDirc struct {
   418  	ctx            stochastikctx.Context
   419  	blockID        embedded.AnalyzeBlockID
   420  	defcausInfo    []*perceptron.DeferredCausetInfo
   421  	handleDefCauss embedded.HandleDefCauss
   422  	concurrency    int
   423  	priority       int
   424  	analyzePB      *fidelpb.AnalyzeReq
   425  	resultHandler  *blockResultHandler
   426  	opts           map[ast.AnalyzeOptionType]uint64
   427  	job            *statistics.AnalyzeJob
   428  }
   429  
   430  func (e *AnalyzeDeferredCausetsInterDirc) open(ranges []*ranger.Range) error {
   431  	e.resultHandler = &blockResultHandler{}
   432  	firstPartRanges, secondPartRanges := splitRanges(ranges, true, false)
   433  	firstResult, err := e.buildResp(firstPartRanges)
   434  	if err != nil {
   435  		return err
   436  	}
   437  	if len(secondPartRanges) == 0 {
   438  		e.resultHandler.open(nil, firstResult)
   439  		return nil
   440  	}
   441  	var secondResult allegrosql.SelectResult
   442  	secondResult, err = e.buildResp(secondPartRanges)
   443  	if err != nil {
   444  		return err
   445  	}
   446  	e.resultHandler.open(firstResult, secondResult)
   447  
   448  	return nil
   449  }
   450  
   451  func (e *AnalyzeDeferredCausetsInterDirc) buildResp(ranges []*ranger.Range) (allegrosql.SelectResult, error) {
   452  	var builder allegrosql.RequestBuilder
   453  	var reqBuilder *allegrosql.RequestBuilder
   454  	if e.handleDefCauss != nil && !e.handleDefCauss.IsInt() {
   455  		reqBuilder = builder.SetCommonHandleRanges(e.ctx.GetStochastikVars().StmtCtx, e.blockID.DefCauslectIDs[0], ranges)
   456  	} else {
   457  		reqBuilder = builder.SetBlockRanges(e.blockID.DefCauslectIDs[0], ranges, nil)
   458  	}
   459  	// Always set KeepOrder of the request to be true, in order to compute
   460  	// correct `correlation` of defCausumns.
   461  	ekvReq, err := reqBuilder.
   462  		SetAnalyzeRequest(e.analyzePB).
   463  		SetStartTS(math.MaxUint64).
   464  		SetKeepOrder(true).
   465  		SetConcurrency(e.concurrency).
   466  		Build()
   467  	if err != nil {
   468  		return nil, err
   469  	}
   470  	ctx := context.TODO()
   471  	result, err := allegrosql.Analyze(ctx, e.ctx.GetClient(), ekvReq, e.ctx.GetStochastikVars().KVVars, e.ctx.GetStochastikVars().InRestrictedALLEGROSQL)
   472  	if err != nil {
   473  		return nil, err
   474  	}
   475  	result.Fetch(ctx)
   476  	return result, nil
   477  }
   478  
   479  func (e *AnalyzeDeferredCausetsInterDirc) buildStats(ranges []*ranger.Range, needExtStats bool) (hists []*statistics.Histogram, cms []*statistics.CMSketch, extStats *statistics.ExtendedStatsDefCausl, err error) {
   480  	if err = e.open(ranges); err != nil {
   481  		return nil, nil, nil, err
   482  	}
   483  	defer func() {
   484  		if err1 := e.resultHandler.Close(); err1 != nil {
   485  			hists = nil
   486  			cms = nil
   487  			extStats = nil
   488  			err = err1
   489  		}
   490  	}()
   491  	pkHist := &statistics.Histogram{}
   492  	defCauslectors := make([]*statistics.SampleDefCauslector, len(e.defcausInfo))
   493  	for i := range defCauslectors {
   494  		defCauslectors[i] = &statistics.SampleDefCauslector{
   495  			IsMerger:      true,
   496  			FMSketch:      statistics.NewFMSketch(maxSketchSize),
   497  			MaxSampleSize: int64(e.opts[ast.AnalyzeOptNumSamples]),
   498  			CMSketch:      statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])),
   499  		}
   500  	}
   501  	for {
   502  		data, err1 := e.resultHandler.nextRaw(context.TODO())
   503  		if err1 != nil {
   504  			return nil, nil, nil, err1
   505  		}
   506  		if data == nil {
   507  			break
   508  		}
   509  		resp := &fidelpb.AnalyzeDeferredCausetsResp{}
   510  		err = resp.Unmarshal(data)
   511  		if err != nil {
   512  			return nil, nil, nil, err
   513  		}
   514  		sc := e.ctx.GetStochastikVars().StmtCtx
   515  		rowCount := int64(0)
   516  		if hasPkHist(e.handleDefCauss) {
   517  			respHist := statistics.HistogramFromProto(resp.PkHist)
   518  			rowCount = int64(respHist.TotalEventCount())
   519  			pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]))
   520  			if err != nil {
   521  				return nil, nil, nil, err
   522  			}
   523  		}
   524  		for i, rc := range resp.DefCauslectors {
   525  			respSample := statistics.SampleDefCauslectorFromProto(rc)
   526  			rowCount = respSample.Count + respSample.NullCount
   527  			defCauslectors[i].MergeSampleDefCauslector(sc, respSample)
   528  		}
   529  		e.job.UFIDelate(rowCount)
   530  	}
   531  	timeZone := e.ctx.GetStochastikVars().Location()
   532  	if hasPkHist(e.handleDefCauss) {
   533  		pkInfo := e.handleDefCauss.GetDefCaus(0)
   534  		pkHist.ID = pkInfo.ID
   535  		err = pkHist.DecodeTo(pkInfo.RetType, timeZone)
   536  		if err != nil {
   537  			return nil, nil, nil, err
   538  		}
   539  		hists = append(hists, pkHist)
   540  		cms = append(cms, nil)
   541  	}
   542  	for i, defCaus := range e.defcausInfo {
   543  		err := defCauslectors[i].ExtractTopN(uint32(e.opts[ast.AnalyzeOptNumTopN]), e.ctx.GetStochastikVars().StmtCtx, &defCaus.FieldType, timeZone)
   544  		if err != nil {
   545  			return nil, nil, nil, err
   546  		}
   547  		for j, s := range defCauslectors[i].Samples {
   548  			defCauslectors[i].Samples[j].Ordinal = j
   549  			defCauslectors[i].Samples[j].Value, err = blockcodec.DecodeDeferredCausetValue(s.Value.GetBytes(), &defCaus.FieldType, timeZone)
   550  			if err != nil {
   551  				return nil, nil, nil, err
   552  			}
   553  		}
   554  		hg, err := statistics.BuildDeferredCauset(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), defCaus.ID, defCauslectors[i], &defCaus.FieldType)
   555  		if err != nil {
   556  			return nil, nil, nil, err
   557  		}
   558  		hists = append(hists, hg)
   559  		defCauslectors[i].CMSketch.CalcDefaultValForAnalyze(uint64(hg.NDV))
   560  		cms = append(cms, defCauslectors[i].CMSketch)
   561  	}
   562  	if needExtStats {
   563  		statsHandle := petri.GetPetri(e.ctx).StatsHandle()
   564  		extStats, err = statsHandle.BuildExtendedStats(e.blockID.PersistID, e.defcausInfo, defCauslectors)
   565  		if err != nil {
   566  			return nil, nil, nil, err
   567  		}
   568  	}
   569  	return hists, cms, extStats, nil
   570  }
   571  
   572  func hasPkHist(handleDefCauss embedded.HandleDefCauss) bool {
   573  	return handleDefCauss != nil && handleDefCauss.IsInt()
   574  }
   575  
   576  func pkDefCaussCount(handleDefCauss embedded.HandleDefCauss) int {
   577  	if handleDefCauss == nil {
   578  		return 0
   579  	}
   580  	return handleDefCauss.NumDefCauss()
   581  }
   582  
   583  var (
   584  	fastAnalyzeHistogramSample        = metrics.FastAnalyzeHistogram.WithLabelValues(metrics.LblGeneral, "sample")
   585  	fastAnalyzeHistogramAccessRegions = metrics.FastAnalyzeHistogram.WithLabelValues(metrics.LblGeneral, "access_regions")
   586  	fastAnalyzeHistogramScanKeys      = metrics.FastAnalyzeHistogram.WithLabelValues(metrics.LblGeneral, "scan_keys")
   587  )
   588  
   589  func analyzeFastInterDirc(exec *AnalyzeFastInterDirc) []analyzeResult {
   590  	hists, cms, err := exec.buildStats()
   591  	if err != nil {
   592  		return []analyzeResult{{Err: err, job: exec.job}}
   593  	}
   594  	var results []analyzeResult
   595  	pkDefCausCount := pkDefCaussCount(exec.handleDefCauss)
   596  	if len(exec.idxsInfo) > 0 {
   597  		for i := pkDefCausCount + len(exec.defcausInfo); i < len(hists); i++ {
   598  			idxResult := analyzeResult{
   599  				BlockID: exec.blockID,
   600  				Hist:    []*statistics.Histogram{hists[i]},
   601  				Cms:     []*statistics.CMSketch{cms[i]},
   602  				IsIndex: 1,
   603  				Count:   hists[i].NullCount,
   604  				job:     exec.job,
   605  			}
   606  			if hists[i].Len() > 0 {
   607  				idxResult.Count += hists[i].Buckets[hists[i].Len()-1].Count
   608  			}
   609  			if exec.rowCount != 0 {
   610  				idxResult.Count = exec.rowCount
   611  			}
   612  			results = append(results, idxResult)
   613  		}
   614  	}
   615  	hist := hists[0]
   616  	defCausResult := analyzeResult{
   617  		BlockID: exec.blockID,
   618  		Hist:    hists[:pkDefCausCount+len(exec.defcausInfo)],
   619  		Cms:     cms[:pkDefCausCount+len(exec.defcausInfo)],
   620  		Count:   hist.NullCount,
   621  		job:     exec.job,
   622  	}
   623  	if hist.Len() > 0 {
   624  		defCausResult.Count += hist.Buckets[hist.Len()-1].Count
   625  	}
   626  	if exec.rowCount != 0 {
   627  		defCausResult.Count = exec.rowCount
   628  	}
   629  	results = append(results, defCausResult)
   630  	return results
   631  }
   632  
   633  // AnalyzeFastInterDirc represents Fast Analyze interlock.
   634  type AnalyzeFastInterDirc struct {
   635  	ctx            stochastikctx.Context
   636  	blockID        embedded.AnalyzeBlockID
   637  	handleDefCauss embedded.HandleDefCauss
   638  	defcausInfo    []*perceptron.DeferredCausetInfo
   639  	idxsInfo       []*perceptron.IndexInfo
   640  	concurrency    int
   641  	opts           map[ast.AnalyzeOptionType]uint64
   642  	tblInfo        *perceptron.BlockInfo
   643  	cache          *einsteindb.RegionCache
   644  	wg             *sync.WaitGroup
   645  	rowCount       int64
   646  	sampCursor     int32
   647  	sampTasks      []*einsteindb.KeyLocation
   648  	scanTasks      []*einsteindb.KeyLocation
   649  	defCauslectors []*statistics.SampleDefCauslector
   650  	randSeed       int64
   651  	job            *statistics.AnalyzeJob
   652  	estSampStep    uint32
   653  }
   654  
   655  func (e *AnalyzeFastInterDirc) calculateEstimateSampleStep() (err error) {
   656  	allegrosql := fmt.Sprintf("select flag from allegrosql.stats_histograms where block_id = %d;", e.blockID.PersistID)
   657  	var rows []chunk.Event
   658  	rows, _, err = e.ctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate).InterDircRestrictedALLEGROSQL(allegrosql)
   659  	if err != nil {
   660  		return
   661  	}
   662  	var historyEventCount uint64
   663  	hasBeenAnalyzed := len(rows) != 0 && rows[0].GetInt64(0) == statistics.AnalyzeFlag
   664  	if hasBeenAnalyzed {
   665  		historyEventCount = uint64(petri.GetPetri(e.ctx).StatsHandle().GetPartitionStats(e.tblInfo, e.blockID.PersistID).Count)
   666  	} else {
   667  		dbInfo, ok := petri.GetPetri(e.ctx).SchemaReplicant().SchemaByBlock(e.tblInfo)
   668  		if !ok {
   669  			err = errors.Errorf("database not found for causet '%s'", e.tblInfo.Name)
   670  			return
   671  		}
   672  		var rollbackFn func() error
   673  		rollbackFn, err = e.activateTxnForEventCount()
   674  		if err != nil {
   675  			return
   676  		}
   677  		defer func() {
   678  			if rollbackFn != nil {
   679  				err = rollbackFn()
   680  			}
   681  		}()
   682  		var partition string
   683  		if e.tblInfo.ID != e.blockID.PersistID {
   684  			for _, definition := range e.tblInfo.Partition.Definitions {
   685  				if definition.ID == e.blockID.PersistID {
   686  					partition = fmt.Sprintf(" partition(%s)", definition.Name.L)
   687  					break
   688  				}
   689  			}
   690  		}
   691  		allegrosql := fmt.Sprintf("select count(*) from %s.%s", dbInfo.Name.L, e.tblInfo.Name.L)
   692  		if len(partition) > 0 {
   693  			allegrosql += partition
   694  		}
   695  		var recordSets []sqlexec.RecordSet
   696  		recordSets, err = e.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircuteInternal(context.TODO(), allegrosql)
   697  		if err != nil || len(recordSets) == 0 {
   698  			return
   699  		}
   700  		if len(recordSets) == 0 {
   701  			err = errors.Trace(errors.Errorf("empty record set"))
   702  			return
   703  		}
   704  		defer func() {
   705  			for _, r := range recordSets {
   706  				terror.Call(r.Close)
   707  			}
   708  		}()
   709  		chk := recordSets[0].NewChunk()
   710  		err = recordSets[0].Next(context.TODO(), chk)
   711  		if err != nil {
   712  			return
   713  		}
   714  		e.rowCount = chk.GetEvent(0).GetInt64(0)
   715  		historyEventCount = uint64(e.rowCount)
   716  	}
   717  	totalSampSize := e.opts[ast.AnalyzeOptNumSamples]
   718  	e.estSampStep = uint32(historyEventCount / totalSampSize)
   719  	return
   720  }
   721  
   722  func (e *AnalyzeFastInterDirc) activateTxnForEventCount() (rollbackFn func() error, err error) {
   723  	txn, err := e.ctx.Txn(true)
   724  	if err != nil {
   725  		if ekv.ErrInvalidTxn.Equal(err) {
   726  			_, err := e.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircuteInternal(context.TODO(), "begin")
   727  			if err != nil {
   728  				return nil, errors.Trace(err)
   729  			}
   730  			rollbackFn = func() error {
   731  				_, err := e.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircuteInternal(context.TODO(), "rollback")
   732  				return err
   733  			}
   734  		} else {
   735  			return nil, errors.Trace(err)
   736  		}
   737  	}
   738  	txn.SetOption(ekv.Priority, ekv.PriorityLow)
   739  	txn.SetOption(ekv.IsolationLevel, ekv.RC)
   740  	txn.SetOption(ekv.NotFillCache, true)
   741  	return nil, nil
   742  }
   743  
   744  // buildSampTask build sample tasks.
   745  func (e *AnalyzeFastInterDirc) buildSampTask() (err error) {
   746  	bo := einsteindb.NewBackofferWithVars(context.Background(), 500, nil)
   747  	causetstore, _ := e.ctx.GetStore().(einsteindb.CausetStorage)
   748  	e.cache = causetstore.GetRegionCache()
   749  	startKey, endKey := blockcodec.GetBlockHandleKeyRange(e.blockID.DefCauslectIDs[0])
   750  	targetKey := startKey
   751  	accessRegionsCounter := 0
   752  	for {
   753  		// Search for the region which contains the targetKey.
   754  		loc, err := e.cache.LocateKey(bo, targetKey)
   755  		if err != nil {
   756  			return err
   757  		}
   758  		if bytes.Compare(endKey, loc.StartKey) < 0 {
   759  			break
   760  		}
   761  		accessRegionsCounter++
   762  
   763  		// Set the next search key.
   764  		targetKey = loc.EndKey
   765  
   766  		// If the KV pairs in the region all belonging to the causet, add it to the sample task.
   767  		if bytes.Compare(startKey, loc.StartKey) <= 0 && len(loc.EndKey) != 0 && bytes.Compare(loc.EndKey, endKey) <= 0 {
   768  			e.sampTasks = append(e.sampTasks, loc)
   769  			continue
   770  		}
   771  
   772  		e.scanTasks = append(e.scanTasks, loc)
   773  		if bytes.Compare(loc.StartKey, startKey) < 0 {
   774  			loc.StartKey = startKey
   775  		}
   776  		if bytes.Compare(endKey, loc.EndKey) < 0 || len(loc.EndKey) == 0 {
   777  			loc.EndKey = endKey
   778  			break
   779  		}
   780  	}
   781  	fastAnalyzeHistogramAccessRegions.Observe(float64(accessRegionsCounter))
   782  
   783  	return nil
   784  }
   785  
   786  func (e *AnalyzeFastInterDirc) decodeValues(handle ekv.Handle, sValue []byte, wantDefCauss map[int64]*types.FieldType) (values map[int64]types.Causet, err error) {
   787  	loc := e.ctx.GetStochastikVars().Location()
   788  	values, err = blockcodec.DecodeEventToCausetMap(sValue, wantDefCauss, loc)
   789  	if err != nil || e.handleDefCauss == nil {
   790  		return values, err
   791  	}
   792  	wantDefCauss = make(map[int64]*types.FieldType, e.handleDefCauss.NumDefCauss())
   793  	handleDefCausIDs := make([]int64, e.handleDefCauss.NumDefCauss())
   794  	for i := 0; i < e.handleDefCauss.NumDefCauss(); i++ {
   795  		c := e.handleDefCauss.GetDefCaus(i)
   796  		handleDefCausIDs[i] = c.ID
   797  		wantDefCauss[c.ID] = c.RetType
   798  	}
   799  	return blockcodec.DecodeHandleToCausetMap(handle, handleDefCausIDs, wantDefCauss, loc, values)
   800  }
   801  
   802  func (e *AnalyzeFastInterDirc) getValueByInfo(defCausInfo *perceptron.DeferredCausetInfo, values map[int64]types.Causet) (types.Causet, error) {
   803  	val, ok := values[defCausInfo.ID]
   804  	if !ok {
   805  		return causet.GetDefCausOriginDefaultValue(e.ctx, defCausInfo)
   806  	}
   807  	return val, nil
   808  }
   809  
   810  func (e *AnalyzeFastInterDirc) uFIDelateDefCauslectorSamples(sValue []byte, sKey ekv.Key, samplePos int32) (err error) {
   811  	var handle ekv.Handle
   812  	handle, err = blockcodec.DecodeEventKey(sKey)
   813  	if err != nil {
   814  		return err
   815  	}
   816  
   817  	// Decode defcaus for analyze causet
   818  	wantDefCauss := make(map[int64]*types.FieldType, len(e.defcausInfo))
   819  	for _, defCaus := range e.defcausInfo {
   820  		wantDefCauss[defCaus.ID] = &defCaus.FieldType
   821  	}
   822  
   823  	// Pre-build index->defcaus relationship and refill wantDefCauss if not exists(analyze index)
   824  	index2DefCauss := make([][]*perceptron.DeferredCausetInfo, len(e.idxsInfo))
   825  	for i, idxInfo := range e.idxsInfo {
   826  		for _, idxDefCaus := range idxInfo.DeferredCausets {
   827  			defCausInfo := e.tblInfo.DeferredCausets[idxDefCaus.Offset]
   828  			index2DefCauss[i] = append(index2DefCauss[i], defCausInfo)
   829  			wantDefCauss[defCausInfo.ID] = &defCausInfo.FieldType
   830  		}
   831  	}
   832  
   833  	// Decode the defcaus value in order.
   834  	var values map[int64]types.Causet
   835  	values, err = e.decodeValues(handle, sValue, wantDefCauss)
   836  	if err != nil {
   837  		return err
   838  	}
   839  	// UFIDelate the primary key defCauslector.
   840  	pkDefCaussCount := pkDefCaussCount(e.handleDefCauss)
   841  	for i := 0; i < pkDefCaussCount; i++ {
   842  		defCaus := e.handleDefCauss.GetDefCaus(i)
   843  		v, ok := values[defCaus.ID]
   844  		if !ok {
   845  			return errors.Trace(errors.Errorf("Primary key defCausumn not found"))
   846  		}
   847  		if e.defCauslectors[i].Samples[samplePos] == nil {
   848  			e.defCauslectors[i].Samples[samplePos] = &statistics.SampleItem{}
   849  		}
   850  		e.defCauslectors[i].Samples[samplePos].Handle = handle
   851  		e.defCauslectors[i].Samples[samplePos].Value = v
   852  	}
   853  
   854  	// UFIDelate the defCausumns' defCauslectors.
   855  	for j, defCausInfo := range e.defcausInfo {
   856  		v, err := e.getValueByInfo(defCausInfo, values)
   857  		if err != nil {
   858  			return err
   859  		}
   860  		if e.defCauslectors[pkDefCaussCount+j].Samples[samplePos] == nil {
   861  			e.defCauslectors[pkDefCaussCount+j].Samples[samplePos] = &statistics.SampleItem{}
   862  		}
   863  		e.defCauslectors[pkDefCaussCount+j].Samples[samplePos].Handle = handle
   864  		e.defCauslectors[pkDefCaussCount+j].Samples[samplePos].Value = v
   865  	}
   866  	// UFIDelate the indexes' defCauslectors.
   867  	for j, idxInfo := range e.idxsInfo {
   868  		idxVals := make([]types.Causet, 0, len(idxInfo.DeferredCausets))
   869  		defcaus := index2DefCauss[j]
   870  		for _, defCausInfo := range defcaus {
   871  			v, err := e.getValueByInfo(defCausInfo, values)
   872  			if err != nil {
   873  				return err
   874  			}
   875  			idxVals = append(idxVals, v)
   876  		}
   877  		var bytes []byte
   878  		bytes, err = codec.EncodeKey(e.ctx.GetStochastikVars().StmtCtx, bytes, idxVals...)
   879  		if err != nil {
   880  			return err
   881  		}
   882  		if e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos] == nil {
   883  			e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos] = &statistics.SampleItem{}
   884  		}
   885  		e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos].Handle = handle
   886  		e.defCauslectors[len(e.defcausInfo)+pkDefCaussCount+j].Samples[samplePos].Value = types.NewBytesCauset(bytes)
   887  	}
   888  	return nil
   889  }
   890  
   891  func (e *AnalyzeFastInterDirc) handleBatchSeekResponse(ekvMap map[string][]byte) (err error) {
   892  	length := int32(len(ekvMap))
   893  	newCursor := atomic.AddInt32(&e.sampCursor, length)
   894  	samplePos := newCursor - length
   895  	for sKey, sValue := range ekvMap {
   896  		exceedNeededSampleCounts := uint64(samplePos) >= e.opts[ast.AnalyzeOptNumSamples]
   897  		if exceedNeededSampleCounts {
   898  			atomic.StoreInt32(&e.sampCursor, int32(e.opts[ast.AnalyzeOptNumSamples]))
   899  			break
   900  		}
   901  		err = e.uFIDelateDefCauslectorSamples(sValue, ekv.Key(sKey), samplePos)
   902  		if err != nil {
   903  			return err
   904  		}
   905  		samplePos++
   906  	}
   907  	return nil
   908  }
   909  
   910  func (e *AnalyzeFastInterDirc) handleScanIter(iter ekv.Iterator) (scanKeysSize int, err error) {
   911  	rander := rand.New(rand.NewSource(e.randSeed))
   912  	sampleSize := int64(e.opts[ast.AnalyzeOptNumSamples])
   913  	for ; iter.Valid() && err == nil; err = iter.Next() {
   914  		// reservoir sampling
   915  		scanKeysSize++
   916  		randNum := rander.Int63n(int64(e.sampCursor) + int64(scanKeysSize))
   917  		if randNum > sampleSize && e.sampCursor == int32(sampleSize) {
   918  			continue
   919  		}
   920  
   921  		p := rander.Int31n(int32(sampleSize))
   922  		if e.sampCursor < int32(sampleSize) {
   923  			p = e.sampCursor
   924  			e.sampCursor++
   925  		}
   926  
   927  		err = e.uFIDelateDefCauslectorSamples(iter.Value(), iter.Key(), p)
   928  		if err != nil {
   929  			return
   930  		}
   931  	}
   932  	return
   933  }
   934  
   935  func (e *AnalyzeFastInterDirc) handleScanTasks(bo *einsteindb.Backoffer) (keysSize int, err error) {
   936  	snapshot, err := e.ctx.GetStore().(einsteindb.CausetStorage).GetSnapshot(ekv.MaxVersion)
   937  	if err != nil {
   938  		return 0, err
   939  	}
   940  	if e.ctx.GetStochastikVars().GetReplicaRead().IsFollowerRead() {
   941  		snapshot.SetOption(ekv.ReplicaRead, ekv.ReplicaReadFollower)
   942  	}
   943  	for _, t := range e.scanTasks {
   944  		iter, err := snapshot.Iter(t.StartKey, t.EndKey)
   945  		if err != nil {
   946  			return keysSize, err
   947  		}
   948  		size, err := e.handleScanIter(iter)
   949  		keysSize += size
   950  		if err != nil {
   951  			return keysSize, err
   952  		}
   953  	}
   954  	return keysSize, nil
   955  }
   956  
   957  func (e *AnalyzeFastInterDirc) handleSampTasks(workID int, step uint32, err *error) {
   958  	defer e.wg.Done()
   959  	var snapshot ekv.Snapshot
   960  	snapshot, *err = e.ctx.GetStore().(einsteindb.CausetStorage).GetSnapshot(ekv.MaxVersion)
   961  	if *err != nil {
   962  		return
   963  	}
   964  	snapshot.SetOption(ekv.NotFillCache, true)
   965  	snapshot.SetOption(ekv.IsolationLevel, ekv.RC)
   966  	snapshot.SetOption(ekv.Priority, ekv.PriorityLow)
   967  	if e.ctx.GetStochastikVars().GetReplicaRead().IsFollowerRead() {
   968  		snapshot.SetOption(ekv.ReplicaRead, ekv.ReplicaReadFollower)
   969  	}
   970  
   971  	rander := rand.New(rand.NewSource(e.randSeed))
   972  	for i := workID; i < len(e.sampTasks); i += e.concurrency {
   973  		task := e.sampTasks[i]
   974  		// randomize the estimate step in range [step - 2 * sqrt(step), step]
   975  		if step > 4 { // 2*sqrt(x) < x
   976  			lower, upper := step-uint32(2*math.Sqrt(float64(step))), step
   977  			step = uint32(rander.Intn(int(upper-lower))) + lower
   978  		}
   979  		snapshot.SetOption(ekv.SampleStep, step)
   980  		ekvMap := make(map[string][]byte)
   981  		var iter ekv.Iterator
   982  		iter, *err = snapshot.Iter(task.StartKey, task.EndKey)
   983  		if *err != nil {
   984  			return
   985  		}
   986  		for iter.Valid() {
   987  			ekvMap[string(iter.Key())] = iter.Value()
   988  			*err = iter.Next()
   989  			if *err != nil {
   990  				return
   991  			}
   992  		}
   993  		fastAnalyzeHistogramSample.Observe(float64(len(ekvMap)))
   994  
   995  		*err = e.handleBatchSeekResponse(ekvMap)
   996  		if *err != nil {
   997  			return
   998  		}
   999  	}
  1000  }
  1001  
  1002  func (e *AnalyzeFastInterDirc) buildDeferredCausetStats(ID int64, defCauslector *statistics.SampleDefCauslector, tp *types.FieldType, rowCount int64) (*statistics.Histogram, *statistics.CMSketch, error) {
  1003  	data := make([][]byte, 0, len(defCauslector.Samples))
  1004  	for i, sample := range defCauslector.Samples {
  1005  		sample.Ordinal = i
  1006  		if sample.Value.IsNull() {
  1007  			defCauslector.NullCount++
  1008  			continue
  1009  		}
  1010  		bytes, err := blockcodec.EncodeValue(e.ctx.GetStochastikVars().StmtCtx, nil, sample.Value)
  1011  		if err != nil {
  1012  			return nil, nil, err
  1013  		}
  1014  		data = append(data, bytes)
  1015  	}
  1016  	// Build CMSketch.
  1017  	cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data, uint32(e.opts[ast.AnalyzeOptNumTopN]), uint64(rowCount))
  1018  	// Build Histogram.
  1019  	hist, err := statistics.BuildDeferredCausetHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), ID, defCauslector, tp, rowCount, int64(ndv), defCauslector.NullCount*int64(scaleRatio))
  1020  	return hist, cmSketch, err
  1021  }
  1022  
  1023  func (e *AnalyzeFastInterDirc) buildIndexStats(idxInfo *perceptron.IndexInfo, defCauslector *statistics.SampleDefCauslector, rowCount int64) (*statistics.Histogram, *statistics.CMSketch, error) {
  1024  	data := make([][][]byte, len(idxInfo.DeferredCausets))
  1025  	for _, sample := range defCauslector.Samples {
  1026  		var preLen int
  1027  		remained := sample.Value.GetBytes()
  1028  		// We need to insert each prefix values into CM Sketch.
  1029  		for i := 0; i < len(idxInfo.DeferredCausets); i++ {
  1030  			var err error
  1031  			var value []byte
  1032  			value, remained, err = codec.CutOne(remained)
  1033  			if err != nil {
  1034  				return nil, nil, err
  1035  			}
  1036  			preLen += len(value)
  1037  			data[i] = append(data[i], sample.Value.GetBytes()[:preLen])
  1038  		}
  1039  	}
  1040  	numTop := uint32(e.opts[ast.AnalyzeOptNumTopN])
  1041  	cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[0], numTop, uint64(rowCount))
  1042  	// Build CM Sketch for each prefix and merge them into one.
  1043  	for i := 1; i < len(idxInfo.DeferredCausets); i++ {
  1044  		var curCMSketch *statistics.CMSketch
  1045  		// `ndv` should be the ndv of full index, so just rewrite it here.
  1046  		curCMSketch, ndv, scaleRatio = statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[i], numTop, uint64(rowCount))
  1047  		err := cmSketch.MergeCMSketch(curCMSketch, numTop)
  1048  		if err != nil {
  1049  			return nil, nil, err
  1050  		}
  1051  	}
  1052  	// Build Histogram.
  1053  	hist, err := statistics.BuildDeferredCausetHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), idxInfo.ID, defCauslector, types.NewFieldType(allegrosql.TypeBlob), rowCount, int64(ndv), defCauslector.NullCount*int64(scaleRatio))
  1054  	return hist, cmSketch, err
  1055  }
  1056  
  1057  func (e *AnalyzeFastInterDirc) runTasks() ([]*statistics.Histogram, []*statistics.CMSketch, error) {
  1058  	errs := make([]error, e.concurrency)
  1059  	pkDefCausCount := pkDefCaussCount(e.handleDefCauss)
  1060  	// defCauslect defCausumn samples and primary key samples and index samples.
  1061  	length := len(e.defcausInfo) + pkDefCausCount + len(e.idxsInfo)
  1062  	e.defCauslectors = make([]*statistics.SampleDefCauslector, length)
  1063  	for i := range e.defCauslectors {
  1064  		e.defCauslectors[i] = &statistics.SampleDefCauslector{
  1065  			MaxSampleSize: int64(e.opts[ast.AnalyzeOptNumSamples]),
  1066  			Samples:       make([]*statistics.SampleItem, e.opts[ast.AnalyzeOptNumSamples]),
  1067  		}
  1068  	}
  1069  
  1070  	e.wg.Add(e.concurrency)
  1071  	bo := einsteindb.NewBackofferWithVars(context.Background(), 500, nil)
  1072  	for i := 0; i < e.concurrency; i++ {
  1073  		go e.handleSampTasks(i, e.estSampStep, &errs[i])
  1074  	}
  1075  	e.wg.Wait()
  1076  	for _, err := range errs {
  1077  		if err != nil {
  1078  			return nil, nil, err
  1079  		}
  1080  	}
  1081  
  1082  	scanKeysSize, err := e.handleScanTasks(bo)
  1083  	fastAnalyzeHistogramScanKeys.Observe(float64(scanKeysSize))
  1084  	if err != nil {
  1085  		return nil, nil, err
  1086  	}
  1087  
  1088  	stats := petri.GetPetri(e.ctx).StatsHandle()
  1089  	var rowCount int64 = 0
  1090  	if stats.Lease() > 0 {
  1091  		if t := stats.GetPartitionStats(e.tblInfo, e.blockID.PersistID); !t.Pseudo {
  1092  			rowCount = t.Count
  1093  		}
  1094  	}
  1095  	hists, cms := make([]*statistics.Histogram, length), make([]*statistics.CMSketch, length)
  1096  	for i := 0; i < length; i++ {
  1097  		// Build defCauslector properties.
  1098  		defCauslector := e.defCauslectors[i]
  1099  		defCauslector.Samples = defCauslector.Samples[:e.sampCursor]
  1100  		sort.Slice(defCauslector.Samples, func(i, j int) bool {
  1101  			return defCauslector.Samples[i].Handle.Compare(defCauslector.Samples[j].Handle) < 0
  1102  		})
  1103  		defCauslector.CalcTotalSize()
  1104  		// Adjust the event count in case the count of `tblStats` is not accurate and too small.
  1105  		rowCount = mathutil.MaxInt64(rowCount, int64(len(defCauslector.Samples)))
  1106  		// Scale the total defCausumn size.
  1107  		if len(defCauslector.Samples) > 0 {
  1108  			defCauslector.TotalSize *= rowCount / int64(len(defCauslector.Samples))
  1109  		}
  1110  		if i < pkDefCausCount {
  1111  			pkDefCaus := e.handleDefCauss.GetDefCaus(i)
  1112  			hists[i], cms[i], err = e.buildDeferredCausetStats(pkDefCaus.ID, e.defCauslectors[i], pkDefCaus.RetType, rowCount)
  1113  		} else if i < pkDefCausCount+len(e.defcausInfo) {
  1114  			hists[i], cms[i], err = e.buildDeferredCausetStats(e.defcausInfo[i-pkDefCausCount].ID, e.defCauslectors[i], &e.defcausInfo[i-pkDefCausCount].FieldType, rowCount)
  1115  		} else {
  1116  			hists[i], cms[i], err = e.buildIndexStats(e.idxsInfo[i-pkDefCausCount-len(e.defcausInfo)], e.defCauslectors[i], rowCount)
  1117  		}
  1118  		if err != nil {
  1119  			return nil, nil, err
  1120  		}
  1121  	}
  1122  	return hists, cms, nil
  1123  }
  1124  
  1125  func (e *AnalyzeFastInterDirc) buildStats() (hists []*statistics.Histogram, cms []*statistics.CMSketch, err error) {
  1126  	// To set rand seed, it's for unit test.
  1127  	// To ensure that random sequences are different in non-test environments, RandSeed must be set time.Now().
  1128  	if RandSeed == 1 {
  1129  		e.randSeed = time.Now().UnixNano()
  1130  	} else {
  1131  		e.randSeed = RandSeed
  1132  	}
  1133  
  1134  	err = e.buildSampTask()
  1135  	if err != nil {
  1136  		return nil, nil, err
  1137  	}
  1138  
  1139  	return e.runTasks()
  1140  }
  1141  
  1142  // AnalyzeTestFastInterDirc is for fast sample in unit test.
  1143  type AnalyzeTestFastInterDirc struct {
  1144  	AnalyzeFastInterDirc
  1145  	Ctx             stochastikctx.Context
  1146  	PhysicalBlockID int64
  1147  	HandleDefCauss  embedded.HandleDefCauss
  1148  	DefCaussInfo    []*perceptron.DeferredCausetInfo
  1149  	IdxsInfo        []*perceptron.IndexInfo
  1150  	Concurrency     int
  1151  	DefCauslectors  []*statistics.SampleDefCauslector
  1152  	TblInfo         *perceptron.BlockInfo
  1153  	Opts            map[ast.AnalyzeOptionType]uint64
  1154  }
  1155  
  1156  // TestFastSample only test the fast sample in unit test.
  1157  func (e *AnalyzeTestFastInterDirc) TestFastSample() error {
  1158  	e.ctx = e.Ctx
  1159  	e.handleDefCauss = e.HandleDefCauss
  1160  	e.defcausInfo = e.DefCaussInfo
  1161  	e.idxsInfo = e.IdxsInfo
  1162  	e.concurrency = e.Concurrency
  1163  	e.blockID = embedded.AnalyzeBlockID{PersistID: e.PhysicalBlockID, DefCauslectIDs: []int64{e.PhysicalBlockID}}
  1164  	e.wg = &sync.WaitGroup{}
  1165  	e.job = &statistics.AnalyzeJob{}
  1166  	e.tblInfo = e.TblInfo
  1167  	e.opts = e.Opts
  1168  	_, _, err := e.buildStats()
  1169  	e.DefCauslectors = e.defCauslectors
  1170  	return err
  1171  }
  1172  
  1173  type analyzeIndexIncrementalInterDirc struct {
  1174  	AnalyzeIndexInterDirc
  1175  	oldHist *statistics.Histogram
  1176  	oldCMS  *statistics.CMSketch
  1177  }
  1178  
  1179  func analyzeIndexIncremental(idxInterDirc *analyzeIndexIncrementalInterDirc) analyzeResult {
  1180  	startPos := idxInterDirc.oldHist.GetUpper(idxInterDirc.oldHist.Len() - 1)
  1181  	values, _, err := codec.DecodeRange(startPos.GetBytes(), len(idxInterDirc.idxInfo.DeferredCausets), nil, nil)
  1182  	if err != nil {
  1183  		return analyzeResult{Err: err, job: idxInterDirc.job}
  1184  	}
  1185  	ran := ranger.Range{LowVal: values, HighVal: []types.Causet{types.MaxValueCauset()}}
  1186  	hist, cms, err := idxInterDirc.buildStats([]*ranger.Range{&ran}, false)
  1187  	if err != nil {
  1188  		return analyzeResult{Err: err, job: idxInterDirc.job}
  1189  	}
  1190  	hist, err = statistics.MergeHistograms(idxInterDirc.ctx.GetStochastikVars().StmtCtx, idxInterDirc.oldHist, hist, int(idxInterDirc.opts[ast.AnalyzeOptNumBuckets]))
  1191  	if err != nil {
  1192  		return analyzeResult{Err: err, job: idxInterDirc.job}
  1193  	}
  1194  	if idxInterDirc.oldCMS != nil && cms != nil {
  1195  		err = cms.MergeCMSketch4IncrementalAnalyze(idxInterDirc.oldCMS, uint32(idxInterDirc.opts[ast.AnalyzeOptNumTopN]))
  1196  		if err != nil {
  1197  			return analyzeResult{Err: err, job: idxInterDirc.job}
  1198  		}
  1199  		cms.CalcDefaultValForAnalyze(uint64(hist.NDV))
  1200  	}
  1201  	result := analyzeResult{
  1202  		BlockID: idxInterDirc.blockID,
  1203  		Hist:    []*statistics.Histogram{hist},
  1204  		Cms:     []*statistics.CMSketch{cms},
  1205  		IsIndex: 1,
  1206  		job:     idxInterDirc.job,
  1207  	}
  1208  	result.Count = hist.NullCount
  1209  	if hist.Len() > 0 {
  1210  		result.Count += hist.Buckets[hist.Len()-1].Count
  1211  	}
  1212  	return result
  1213  }
  1214  
  1215  type analyzePKIncrementalInterDirc struct {
  1216  	AnalyzeDeferredCausetsInterDirc
  1217  	oldHist *statistics.Histogram
  1218  }
  1219  
  1220  func analyzePKIncremental(defCausInterDirc *analyzePKIncrementalInterDirc) analyzeResult {
  1221  	var maxVal types.Causet
  1222  	pkInfo := defCausInterDirc.handleDefCauss.GetDefCaus(0)
  1223  	if allegrosql.HasUnsignedFlag(pkInfo.RetType.Flag) {
  1224  		maxVal = types.NewUintCauset(math.MaxUint64)
  1225  	} else {
  1226  		maxVal = types.NewIntCauset(math.MaxInt64)
  1227  	}
  1228  	startPos := *defCausInterDirc.oldHist.GetUpper(defCausInterDirc.oldHist.Len() - 1)
  1229  	ran := ranger.Range{LowVal: []types.Causet{startPos}, LowExclude: true, HighVal: []types.Causet{maxVal}}
  1230  	hists, _, _, err := defCausInterDirc.buildStats([]*ranger.Range{&ran}, false)
  1231  	if err != nil {
  1232  		return analyzeResult{Err: err, job: defCausInterDirc.job}
  1233  	}
  1234  	hist := hists[0]
  1235  	hist, err = statistics.MergeHistograms(defCausInterDirc.ctx.GetStochastikVars().StmtCtx, defCausInterDirc.oldHist, hist, int(defCausInterDirc.opts[ast.AnalyzeOptNumBuckets]))
  1236  	if err != nil {
  1237  		return analyzeResult{Err: err, job: defCausInterDirc.job}
  1238  	}
  1239  	result := analyzeResult{
  1240  		BlockID: defCausInterDirc.blockID,
  1241  		Hist:    []*statistics.Histogram{hist},
  1242  		Cms:     []*statistics.CMSketch{nil},
  1243  		job:     defCausInterDirc.job,
  1244  	}
  1245  	if hist.Len() > 0 {
  1246  		result.Count += hist.Buckets[hist.Len()-1].Count
  1247  	}
  1248  	return result
  1249  }
  1250  
  1251  // analyzeResult is used to represent analyze result.
  1252  type analyzeResult struct {
  1253  	BlockID  embedded.AnalyzeBlockID
  1254  	Hist     []*statistics.Histogram
  1255  	Cms      []*statistics.CMSketch
  1256  	ExtStats *statistics.ExtendedStatsDefCausl
  1257  	Count    int64
  1258  	IsIndex  int
  1259  	Err      error
  1260  	job      *statistics.AnalyzeJob
  1261  }