github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/handle/handle.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package handle
    15  
    16  import (
    17  	"context"
    18  	"encoding/json"
    19  	"fmt"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/cznic/mathutil"
    25  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    26  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    27  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    28  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    29  	"github.com/whtcorpsinc/errors"
    30  	"github.com/whtcorpsinc/failpoint"
    31  	"github.com/whtcorpsinc/milevadb/causet"
    32  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb/oracle"
    33  	"github.com/whtcorpsinc/milevadb/dbs/soliton"
    34  	"github.com/whtcorpsinc/milevadb/ekv"
    35  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    36  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    37  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    38  	"github.com/whtcorpsinc/milevadb/soliton/memory"
    39  	"github.com/whtcorpsinc/milevadb/soliton/sqlexec"
    40  	"github.com/whtcorpsinc/milevadb/statistics"
    41  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    42  	"github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx"
    43  	"github.com/whtcorpsinc/milevadb/types"
    44  	atomic2 "go.uber.org/atomic"
    45  	"go.uber.org/zap"
    46  )
    47  
    48  // statsCache caches the blocks in memory for Handle.
    49  type statsCache struct {
    50  	blocks map[int64]*statistics.Block
    51  	// version is the latest version of cache.
    52  	version  uint64
    53  	memUsage int64
    54  }
    55  
    56  // Handle can uFIDelate stats info periodically.
    57  type Handle struct {
    58  	mu struct {
    59  		sync.Mutex
    60  		ctx stochastikctx.Context
    61  		// rateMap contains the error rate delta from feedback.
    62  		rateMap errorRateDeltaMap
    63  		// pid2tid is the map from partition ID to causet ID.
    64  		pid2tid map[int64]int64
    65  		// schemaVersion is the version of information schemaReplicant when `pid2tid` is built.
    66  		schemaVersion int64
    67  	}
    68  
    69  	// It can be read by multiple readers at the same time without acquiring dagger, but it can be
    70  	// written only after acquiring the dagger.
    71  	statsCache struct {
    72  		sync.Mutex
    73  		atomic.Value
    74  		memTracker *memory.Tracker
    75  	}
    76  
    77  	restrictedInterDirc sqlexec.RestrictedALLEGROSQLInterlockingDirectorate
    78  
    79  	// dbsEventCh is a channel to notify a dbs operation has happened.
    80  	// It is sent only by tenant or the drop stats interlock, and read by stats handle.
    81  	dbsEventCh chan *soliton.Event
    82  	// listHead contains all the stats collector required by stochastik.
    83  	listHead *StochastikStatsDefCauslector
    84  	// globalMap contains all the delta map from collectors when we dump them to KV.
    85  	globalMap blockDeltaMap
    86  	// feedback is used to causetstore query feedback info.
    87  	feedback *statistics.QueryFeedbackMap
    88  
    89  	lease atomic2.Duration
    90  }
    91  
    92  // Clear the statsCache, only for test.
    93  func (h *Handle) Clear() {
    94  	h.mu.Lock()
    95  	h.statsCache.Lock()
    96  	h.statsCache.CausetStore(statsCache{blocks: make(map[int64]*statistics.Block)})
    97  	h.statsCache.memTracker = memory.NewTracker(memory.LabelForStatsCache, -1)
    98  	h.statsCache.Unlock()
    99  	for len(h.dbsEventCh) > 0 {
   100  		<-h.dbsEventCh
   101  	}
   102  	h.feedback = statistics.NewQueryFeedbackMap()
   103  	h.mu.ctx.GetStochastikVars().InitChunkSize = 1
   104  	h.mu.ctx.GetStochastikVars().MaxChunkSize = 1
   105  	h.mu.ctx.GetStochastikVars().EnableChunkRPC = false
   106  	h.mu.ctx.GetStochastikVars().SetProjectionConcurrency(0)
   107  	h.listHead = &StochastikStatsDefCauslector{mapper: make(blockDeltaMap), rateMap: make(errorRateDeltaMap)}
   108  	h.globalMap = make(blockDeltaMap)
   109  	h.mu.rateMap = make(errorRateDeltaMap)
   110  	h.mu.Unlock()
   111  }
   112  
   113  // NewHandle creates a Handle for uFIDelate stats.
   114  func NewHandle(ctx stochastikctx.Context, lease time.Duration) *Handle {
   115  	handle := &Handle{
   116  		dbsEventCh: make(chan *soliton.Event, 100),
   117  		listHead:   &StochastikStatsDefCauslector{mapper: make(blockDeltaMap), rateMap: make(errorRateDeltaMap)},
   118  		globalMap:  make(blockDeltaMap),
   119  		feedback:   statistics.NewQueryFeedbackMap(),
   120  	}
   121  	handle.lease.CausetStore(lease)
   122  	// It is safe to use it concurrently because the exec won't touch the ctx.
   123  	if exec, ok := ctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate); ok {
   124  		handle.restrictedInterDirc = exec
   125  	}
   126  	handle.statsCache.memTracker = memory.NewTracker(memory.LabelForStatsCache, -1)
   127  	handle.mu.ctx = ctx
   128  	handle.mu.rateMap = make(errorRateDeltaMap)
   129  	handle.statsCache.CausetStore(statsCache{blocks: make(map[int64]*statistics.Block)})
   130  	return handle
   131  }
   132  
   133  // Lease returns the stats lease.
   134  func (h *Handle) Lease() time.Duration {
   135  	return h.lease.Load()
   136  }
   137  
   138  // SetLease sets the stats lease.
   139  func (h *Handle) SetLease(lease time.Duration) {
   140  	h.lease.CausetStore(lease)
   141  }
   142  
   143  // GetQueryFeedback gets the query feedback. It is only used in test.
   144  func (h *Handle) GetQueryFeedback() *statistics.QueryFeedbackMap {
   145  	defer func() {
   146  		h.feedback = statistics.NewQueryFeedbackMap()
   147  	}()
   148  	return h.feedback
   149  }
   150  
   151  // DurationToTS converts duration to timestamp.
   152  func DurationToTS(d time.Duration) uint64 {
   153  	return oracle.ComposeTS(d.Nanoseconds()/int64(time.Millisecond), 0)
   154  }
   155  
   156  // UFIDelate reads stats spacetime from causetstore and uFIDelates the stats map.
   157  func (h *Handle) UFIDelate(is schemareplicant.SchemaReplicant) error {
   158  	oldCache := h.statsCache.Load().(statsCache)
   159  	lastVersion := oldCache.version
   160  	// We need this because for two blocks, the smaller version may write later than the one with larger version.
   161  	// Consider the case that there are two blocks A and B, their version and commit time is (A0, A1) and (B0, B1),
   162  	// and A0 < B0 < B1 < A1. We will first read the stats of B, and uFIDelate the lastVersion to B0, but we cannot read
   163  	// the causet stats of A0 if we read stats that greater than lastVersion which is B0.
   164  	// We can read the stats if the diff between commit time and version is less than three lease.
   165  	offset := DurationToTS(3 * h.Lease())
   166  	if oldCache.version >= offset {
   167  		lastVersion = lastVersion - offset
   168  	} else {
   169  		lastVersion = 0
   170  	}
   171  	allegrosql := fmt.Sprintf("SELECT version, block_id, modify_count, count from allegrosql.stats_spacetime where version > %d order by version", lastVersion)
   172  	rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql)
   173  	if err != nil {
   174  		return errors.Trace(err)
   175  	}
   176  
   177  	blocks := make([]*statistics.Block, 0, len(rows))
   178  	deletedTableIDs := make([]int64, 0, len(rows))
   179  	for _, event := range rows {
   180  		version := event.GetUint64(0)
   181  		physicalID := event.GetInt64(1)
   182  		modifyCount := event.GetInt64(2)
   183  		count := event.GetInt64(3)
   184  		lastVersion = version
   185  		h.mu.Lock()
   186  		causet, ok := h.getTableByPhysicalID(is, physicalID)
   187  		h.mu.Unlock()
   188  		if !ok {
   189  			logutil.BgLogger().Debug("unknown physical ID in stats spacetime causet, maybe it has been dropped", zap.Int64("ID", physicalID))
   190  			deletedTableIDs = append(deletedTableIDs, physicalID)
   191  			continue
   192  		}
   193  		blockInfo := causet.Meta()
   194  		tbl, err := h.blockStatsFromStorage(blockInfo, physicalID, false, nil)
   195  		// Error is not nil may mean that there are some dbs changes on this causet, we will not uFIDelate it.
   196  		if err != nil {
   197  			logutil.BgLogger().Debug("error occurred when read causet stats", zap.String("causet", blockInfo.Name.O), zap.Error(err))
   198  			continue
   199  		}
   200  		if tbl == nil {
   201  			deletedTableIDs = append(deletedTableIDs, physicalID)
   202  			continue
   203  		}
   204  		tbl.Version = version
   205  		tbl.Count = count
   206  		tbl.ModifyCount = modifyCount
   207  		tbl.Name = getFullTableName(is, blockInfo)
   208  		blocks = append(blocks, tbl)
   209  	}
   210  	h.uFIDelateStatsCache(oldCache.uFIDelate(blocks, deletedTableIDs, lastVersion))
   211  	return nil
   212  }
   213  
   214  func (h *Handle) getTableByPhysicalID(is schemareplicant.SchemaReplicant, physicalID int64) (causet.Block, bool) {
   215  	if is.SchemaMetaVersion() != h.mu.schemaVersion {
   216  		h.mu.schemaVersion = is.SchemaMetaVersion()
   217  		h.mu.pid2tid = buildPartitionID2TableID(is)
   218  	}
   219  	if id, ok := h.mu.pid2tid[physicalID]; ok {
   220  		return is.TableByID(id)
   221  	}
   222  	return is.TableByID(physicalID)
   223  }
   224  
   225  func buildPartitionID2TableID(is schemareplicant.SchemaReplicant) map[int64]int64 {
   226  	mapper := make(map[int64]int64)
   227  	for _, EDB := range is.AllSchemas() {
   228  		tbls := EDB.Tables
   229  		for _, tbl := range tbls {
   230  			pi := tbl.GetPartitionInfo()
   231  			if pi == nil {
   232  				continue
   233  			}
   234  			for _, def := range pi.Definitions {
   235  				mapper[def.ID] = tbl.ID
   236  			}
   237  		}
   238  	}
   239  	return mapper
   240  }
   241  
   242  // GetMemConsumed returns the mem size of statscache consumed
   243  func (h *Handle) GetMemConsumed() (size int64) {
   244  	size = h.statsCache.memTracker.BytesConsumed()
   245  	return
   246  }
   247  
   248  // GetAllTableStatsMemUsage get all the mem usage with true causet.
   249  // only used by test.
   250  func (h *Handle) GetAllTableStatsMemUsage() int64 {
   251  	data := h.statsCache.Value.Load().(statsCache)
   252  	cache := data.copy()
   253  	allUsage := int64(0)
   254  	for _, t := range cache.blocks {
   255  		allUsage += t.MemoryUsage()
   256  	}
   257  	return allUsage
   258  }
   259  
   260  // GetTableStats retrieves the statistics causet from cache, and the cache will be uFIDelated by a goroutine.
   261  func (h *Handle) GetTableStats(tblInfo *perceptron.TableInfo) *statistics.Block {
   262  	return h.GetPartitionStats(tblInfo, tblInfo.ID)
   263  }
   264  
   265  // GetPartitionStats retrieves the partition stats from cache.
   266  func (h *Handle) GetPartitionStats(tblInfo *perceptron.TableInfo, pid int64) *statistics.Block {
   267  	statsCache := h.statsCache.Load().(statsCache)
   268  	tbl, ok := statsCache.blocks[pid]
   269  	if !ok {
   270  		tbl = statistics.PseudoTable(tblInfo)
   271  		tbl.PhysicalID = pid
   272  		h.uFIDelateStatsCache(statsCache.uFIDelate([]*statistics.Block{tbl}, nil, statsCache.version))
   273  		return tbl
   274  	}
   275  	return tbl
   276  }
   277  
   278  // CanRuntimePrune indicates whether tbl support runtime prune for causet and first partition id.
   279  func (h *Handle) CanRuntimePrune(tid, p0Id int64) bool {
   280  	if h == nil {
   281  		return false
   282  	}
   283  	if tid == p0Id {
   284  		return false
   285  	}
   286  	statsCache := h.statsCache.Load().(statsCache)
   287  	_, tblExists := statsCache.blocks[tid]
   288  	if tblExists {
   289  		return true
   290  	}
   291  	_, partExists := statsCache.blocks[p0Id]
   292  	if !partExists {
   293  		return true
   294  	}
   295  	return false
   296  }
   297  
   298  func (h *Handle) uFIDelateStatsCache(newCache statsCache) {
   299  	h.statsCache.Lock()
   300  	oldCache := h.statsCache.Load().(statsCache)
   301  	if oldCache.version <= newCache.version {
   302  		h.statsCache.memTracker.Consume(newCache.memUsage - oldCache.memUsage)
   303  		h.statsCache.CausetStore(newCache)
   304  	}
   305  	h.statsCache.Unlock()
   306  }
   307  
   308  func (sc statsCache) copy() statsCache {
   309  	newCache := statsCache{blocks: make(map[int64]*statistics.Block, len(sc.blocks)),
   310  		version:  sc.version,
   311  		memUsage: sc.memUsage}
   312  	for k, v := range sc.blocks {
   313  		newCache.blocks[k] = v
   314  	}
   315  	return newCache
   316  }
   317  
   318  //initMemoryUsage calc total memory usage of statsCache and set statsCache.memUsage
   319  //should be called after the blocks and their stats are initilazed
   320  func (sc statsCache) initMemoryUsage() {
   321  	sum := int64(0)
   322  	for _, tb := range sc.blocks {
   323  		sum += tb.MemoryUsage()
   324  	}
   325  	sc.memUsage = sum
   326  	return
   327  }
   328  
   329  // uFIDelate uFIDelates the statistics causet cache using copy on write.
   330  func (sc statsCache) uFIDelate(blocks []*statistics.Block, deletedIDs []int64, newVersion uint64) statsCache {
   331  	newCache := sc.copy()
   332  	newCache.version = newVersion
   333  	for _, tbl := range blocks {
   334  		id := tbl.PhysicalID
   335  		if ptbl, ok := newCache.blocks[id]; ok {
   336  			newCache.memUsage -= ptbl.MemoryUsage()
   337  		}
   338  		newCache.blocks[id] = tbl
   339  		newCache.memUsage += tbl.MemoryUsage()
   340  	}
   341  	for _, id := range deletedIDs {
   342  		if ptbl, ok := newCache.blocks[id]; ok {
   343  			newCache.memUsage -= ptbl.MemoryUsage()
   344  		}
   345  		delete(newCache.blocks, id)
   346  	}
   347  	return newCache
   348  }
   349  
   350  // LoadNeededHistograms will load histograms for those needed columns.
   351  func (h *Handle) LoadNeededHistograms() (err error) {
   352  	defcaus := statistics.HistogramNeededDeferredCausets.AllDefCauss()
   353  	reader, err := h.getStatsReader(nil)
   354  	if err != nil {
   355  		return err
   356  	}
   357  
   358  	defer func() {
   359  		err1 := h.releaseStatsReader(reader)
   360  		if err1 != nil && err == nil {
   361  			err = err1
   362  		}
   363  	}()
   364  
   365  	for _, col := range defcaus {
   366  		statsCache := h.statsCache.Load().(statsCache)
   367  		tbl, ok := statsCache.blocks[col.TableID]
   368  		if !ok {
   369  			continue
   370  		}
   371  		tbl = tbl.Copy()
   372  		c, ok := tbl.DeferredCausets[col.DeferredCausetID]
   373  		if !ok || c.Len() > 0 {
   374  			statistics.HistogramNeededDeferredCausets.Delete(col)
   375  			continue
   376  		}
   377  		hg, err := h.histogramFromStorage(reader, col.TableID, c.ID, &c.Info.FieldType, c.NDV, 0, c.LastUFIDelateVersion, c.NullCount, c.TotDefCausSize, c.Correlation)
   378  		if err != nil {
   379  			return errors.Trace(err)
   380  		}
   381  		cms, err := h.cmSketchFromStorage(reader, col.TableID, 0, col.DeferredCausetID)
   382  		if err != nil {
   383  			return errors.Trace(err)
   384  		}
   385  		tbl.DeferredCausets[c.ID] = &statistics.DeferredCauset{
   386  			PhysicalID: col.TableID,
   387  			Histogram:  *hg,
   388  			Info:       c.Info,
   389  			CMSketch:   cms,
   390  			Count:      int64(hg.TotalRowCount()),
   391  			IsHandle:   c.IsHandle,
   392  		}
   393  		h.uFIDelateStatsCache(statsCache.uFIDelate([]*statistics.Block{tbl}, nil, statsCache.version))
   394  		statistics.HistogramNeededDeferredCausets.Delete(col)
   395  	}
   396  	return nil
   397  }
   398  
   399  // LastUFIDelateVersion gets the last uFIDelate version.
   400  func (h *Handle) LastUFIDelateVersion() uint64 {
   401  	return h.statsCache.Load().(statsCache).version
   402  }
   403  
   404  // SetLastUFIDelateVersion sets the last uFIDelate version.
   405  func (h *Handle) SetLastUFIDelateVersion(version uint64) {
   406  	statsCache := h.statsCache.Load().(statsCache)
   407  	h.uFIDelateStatsCache(statsCache.uFIDelate(nil, nil, version))
   408  }
   409  
   410  // FlushStats flushes the cached stats uFIDelate into causetstore.
   411  func (h *Handle) FlushStats() {
   412  	for len(h.dbsEventCh) > 0 {
   413  		e := <-h.dbsEventCh
   414  		if err := h.HandleDBSEvent(e); err != nil {
   415  			logutil.BgLogger().Debug("[stats] handle dbs event fail", zap.Error(err))
   416  		}
   417  	}
   418  	if err := h.DumpStatsDeltaToKV(DumpAll); err != nil {
   419  		logutil.BgLogger().Debug("[stats] dump stats delta fail", zap.Error(err))
   420  	}
   421  	if err := h.DumpStatsFeedbackToKV(); err != nil {
   422  		logutil.BgLogger().Debug("[stats] dump stats feedback fail", zap.Error(err))
   423  	}
   424  }
   425  
   426  func (h *Handle) cmSketchFromStorage(reader *statsReader, tblID int64, isIndex, histID int64) (_ *statistics.CMSketch, err error) {
   427  	selALLEGROSQL := fmt.Sprintf("select cm_sketch from allegrosql.stats_histograms where block_id = %d and is_index = %d and hist_id = %d", tblID, isIndex, histID)
   428  	rows, _, err := reader.read(selALLEGROSQL)
   429  	if err != nil || len(rows) == 0 {
   430  		return nil, err
   431  	}
   432  	selALLEGROSQL = fmt.Sprintf("select HIGH_PRIORITY value, count from allegrosql.stats_top_n where block_id = %d and is_index = %d and hist_id = %d", tblID, isIndex, histID)
   433  	topNRows, _, err := reader.read(selALLEGROSQL)
   434  	if err != nil {
   435  		return nil, err
   436  	}
   437  	return statistics.DecodeCMSketch(rows[0].GetBytes(0), topNRows)
   438  }
   439  
   440  func (h *Handle) indexStatsFromStorage(reader *statsReader, event chunk.Row, causet *statistics.Block, blockInfo *perceptron.TableInfo) error {
   441  	histID := event.GetInt64(2)
   442  	distinct := event.GetInt64(3)
   443  	histVer := event.GetUint64(4)
   444  	nullCount := event.GetInt64(5)
   445  	idx := causet.Indices[histID]
   446  	errorRate := statistics.ErrorRate{}
   447  	flag := event.GetInt64(8)
   448  	lastAnalyzePos := event.GetCauset(10, types.NewFieldType(allegrosql.TypeBlob))
   449  	if statistics.IsAnalyzed(flag) && !reader.isHistory() {
   450  		h.mu.rateMap.clear(causet.PhysicalID, histID, true)
   451  	} else if idx != nil {
   452  		errorRate = idx.ErrorRate
   453  	}
   454  	for _, idxInfo := range blockInfo.Indices {
   455  		if histID != idxInfo.ID {
   456  			continue
   457  		}
   458  		if idx == nil || idx.LastUFIDelateVersion < histVer {
   459  			hg, err := h.histogramFromStorage(reader, causet.PhysicalID, histID, types.NewFieldType(allegrosql.TypeBlob), distinct, 1, histVer, nullCount, 0, 0)
   460  			if err != nil {
   461  				return errors.Trace(err)
   462  			}
   463  			cms, err := h.cmSketchFromStorage(reader, causet.PhysicalID, 1, idxInfo.ID)
   464  			if err != nil {
   465  				return errors.Trace(err)
   466  			}
   467  			idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: event.GetInt64(7), Flag: flag}
   468  			lastAnalyzePos.Copy(&idx.LastAnalyzePos)
   469  		}
   470  		break
   471  	}
   472  	if idx != nil {
   473  		causet.Indices[histID] = idx
   474  	} else {
   475  		logutil.BgLogger().Debug("we cannot find index id in causet info. It may be deleted.", zap.Int64("indexID", histID), zap.String("causet", blockInfo.Name.O))
   476  	}
   477  	return nil
   478  }
   479  
   480  func (h *Handle) columnStatsFromStorage(reader *statsReader, event chunk.Row, causet *statistics.Block, blockInfo *perceptron.TableInfo, loadAll bool) error {
   481  	histID := event.GetInt64(2)
   482  	distinct := event.GetInt64(3)
   483  	histVer := event.GetUint64(4)
   484  	nullCount := event.GetInt64(5)
   485  	totDefCausSize := event.GetInt64(6)
   486  	correlation := event.GetFloat64(9)
   487  	lastAnalyzePos := event.GetCauset(10, types.NewFieldType(allegrosql.TypeBlob))
   488  	col := causet.DeferredCausets[histID]
   489  	errorRate := statistics.ErrorRate{}
   490  	flag := event.GetInt64(8)
   491  	if statistics.IsAnalyzed(flag) && !reader.isHistory() {
   492  		h.mu.rateMap.clear(causet.PhysicalID, histID, false)
   493  	} else if col != nil {
   494  		errorRate = col.ErrorRate
   495  	}
   496  	for _, colInfo := range blockInfo.DeferredCausets {
   497  		if histID != colInfo.ID {
   498  			continue
   499  		}
   500  		isHandle := blockInfo.PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag)
   501  		// We will not load buckets if:
   502  		// 1. Lease > 0, and:
   503  		// 2. this column is not handle, and:
   504  		// 3. the column doesn't has buckets before, and:
   505  		// 4. loadAll is false.
   506  		notNeedLoad := h.Lease() > 0 &&
   507  			!isHandle &&
   508  			(col == nil || col.Len() == 0 && col.LastUFIDelateVersion < histVer) &&
   509  			!loadAll
   510  		if notNeedLoad {
   511  			count, err := h.columnCountFromStorage(reader, causet.PhysicalID, histID)
   512  			if err != nil {
   513  				return errors.Trace(err)
   514  			}
   515  			col = &statistics.DeferredCauset{
   516  				PhysicalID: causet.PhysicalID,
   517  				Histogram:  *statistics.NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totDefCausSize),
   518  				Info:       colInfo,
   519  				Count:      count + nullCount,
   520  				ErrorRate:  errorRate,
   521  				IsHandle:   blockInfo.PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag),
   522  				Flag:       flag,
   523  			}
   524  			lastAnalyzePos.Copy(&col.LastAnalyzePos)
   525  			col.Histogram.Correlation = correlation
   526  			break
   527  		}
   528  		if col == nil || col.LastUFIDelateVersion < histVer || loadAll {
   529  			hg, err := h.histogramFromStorage(reader, causet.PhysicalID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totDefCausSize, correlation)
   530  			if err != nil {
   531  				return errors.Trace(err)
   532  			}
   533  			cms, err := h.cmSketchFromStorage(reader, causet.PhysicalID, 0, colInfo.ID)
   534  			if err != nil {
   535  				return errors.Trace(err)
   536  			}
   537  			col = &statistics.DeferredCauset{
   538  				PhysicalID: causet.PhysicalID,
   539  				Histogram:  *hg,
   540  				Info:       colInfo,
   541  				CMSketch:   cms,
   542  				Count:      int64(hg.TotalRowCount()),
   543  				ErrorRate:  errorRate,
   544  				IsHandle:   blockInfo.PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag),
   545  				Flag:       flag,
   546  			}
   547  			lastAnalyzePos.Copy(&col.LastAnalyzePos)
   548  			break
   549  		}
   550  		if col.TotDefCausSize != totDefCausSize {
   551  			newDefCaus := *col
   552  			newDefCaus.TotDefCausSize = totDefCausSize
   553  			col = &newDefCaus
   554  		}
   555  		break
   556  	}
   557  	if col != nil {
   558  		causet.DeferredCausets[col.ID] = col
   559  	} else {
   560  		// If we didn't find a DeferredCauset or Index in blockInfo, we won't load the histogram for it.
   561  		// But don't worry, next lease the dbs will be uFIDelated, and we will load a same causet for two times to
   562  		// avoid error.
   563  		logutil.BgLogger().Debug("we cannot find column in causet info now. It may be deleted", zap.Int64("colID", histID), zap.String("causet", blockInfo.Name.O))
   564  	}
   565  	return nil
   566  }
   567  
   568  // blockStatsFromStorage loads causet stats info from storage.
   569  func (h *Handle) blockStatsFromStorage(blockInfo *perceptron.TableInfo, physicalID int64, loadAll bool, historyStatsInterDirc sqlexec.RestrictedALLEGROSQLInterlockingDirectorate) (_ *statistics.Block, err error) {
   570  	reader, err := h.getStatsReader(historyStatsInterDirc)
   571  	if err != nil {
   572  		return nil, err
   573  	}
   574  	defer func() {
   575  		err1 := h.releaseStatsReader(reader)
   576  		if err == nil && err1 != nil {
   577  			err = err1
   578  		}
   579  	}()
   580  	causet, ok := h.statsCache.Load().(statsCache).blocks[physicalID]
   581  	// If causet stats is pseudo, we also need to copy it, since we will use the column stats when
   582  	// the average error rate of it is small.
   583  	if !ok || historyStatsInterDirc != nil {
   584  		histDefCausl := statistics.HistDefCausl{
   585  			PhysicalID:      physicalID,
   586  			HavePhysicalID:  true,
   587  			DeferredCausets: make(map[int64]*statistics.DeferredCauset, len(blockInfo.DeferredCausets)),
   588  			Indices:         make(map[int64]*statistics.Index, len(blockInfo.Indices)),
   589  		}
   590  		causet = &statistics.Block{
   591  			HistDefCausl: histDefCausl,
   592  		}
   593  	} else {
   594  		// We copy it before writing to avoid race.
   595  		causet = causet.Copy()
   596  	}
   597  	causet.Pseudo = false
   598  	selALLEGROSQL := fmt.Sprintf("select block_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from allegrosql.stats_histograms where block_id = %d", physicalID)
   599  	rows, _, err := reader.read(selALLEGROSQL)
   600  	// Check deleted causet.
   601  	if err != nil || len(rows) == 0 {
   602  		return nil, nil
   603  	}
   604  	for _, event := range rows {
   605  		if event.GetInt64(1) > 0 {
   606  			err = h.indexStatsFromStorage(reader, event, causet, blockInfo)
   607  		} else {
   608  			err = h.columnStatsFromStorage(reader, event, causet, blockInfo, loadAll)
   609  		}
   610  		if err != nil {
   611  			return nil, err
   612  		}
   613  	}
   614  	return h.extendedStatsFromStorage(reader, causet, physicalID, loadAll)
   615  }
   616  
   617  func (h *Handle) extendedStatsFromStorage(reader *statsReader, causet *statistics.Block, physicalID int64, loadAll bool) (*statistics.Block, error) {
   618  	lastVersion := uint64(0)
   619  	if causet.ExtendedStats != nil && !loadAll {
   620  		lastVersion = causet.ExtendedStats.LastUFIDelateVersion
   621  	} else {
   622  		causet.ExtendedStats = statistics.NewExtendedStatsDefCausl()
   623  	}
   624  	allegrosql := fmt.Sprintf("select stats_name, EDB, status, type, column_ids, scalar_stats, blob_stats, version from allegrosql.stats_extended where block_id = %d and status in (%d, %d) and version > %d", physicalID, StatsStatusAnalyzed, StatsStatusDeleted, lastVersion)
   625  	rows, _, err := reader.read(allegrosql)
   626  	if err != nil || len(rows) == 0 {
   627  		return causet, nil
   628  	}
   629  	for _, event := range rows {
   630  		lastVersion = mathutil.MaxUint64(lastVersion, event.GetUint64(7))
   631  		key := statistics.ExtendedStatsKey{
   632  			StatsName: event.GetString(0),
   633  			EDB:       event.GetString(1),
   634  		}
   635  		status := uint8(event.GetInt64(2))
   636  		if status == StatsStatusDeleted {
   637  			delete(causet.ExtendedStats.Stats, key)
   638  		} else {
   639  			item := &statistics.ExtendedStatsItem{
   640  				Tp:         uint8(event.GetInt64(3)),
   641  				ScalarVals: event.GetFloat64(5),
   642  				StringVals: event.GetString(6),
   643  			}
   644  			colIDs := event.GetString(4)
   645  			err := json.Unmarshal([]byte(colIDs), &item.DefCausIDs)
   646  			if err != nil {
   647  				logutil.BgLogger().Debug("decode column IDs failed", zap.String("column_ids", colIDs), zap.Error(err))
   648  				return nil, err
   649  			}
   650  			causet.ExtendedStats.Stats[key] = item
   651  		}
   652  	}
   653  	causet.ExtendedStats.LastUFIDelateVersion = lastVersion
   654  	return causet, nil
   655  }
   656  
   657  // SaveStatsToStorage saves the stats to storage.
   658  func (h *Handle) SaveStatsToStorage(blockID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, isAnalyzed int64) (err error) {
   659  	h.mu.Lock()
   660  	defer h.mu.Unlock()
   661  	ctx := context.TODO()
   662  	exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate)
   663  	_, err = exec.InterDircute(ctx, "begin")
   664  	if err != nil {
   665  		return errors.Trace(err)
   666  	}
   667  	defer func() {
   668  		err = finishTransaction(context.Background(), exec, err)
   669  	}()
   670  	txn, err := h.mu.ctx.Txn(true)
   671  	if err != nil {
   672  		return errors.Trace(err)
   673  	}
   674  
   675  	version := txn.StartTS()
   676  	sqls := make([]string, 0, 4)
   677  	// If the count is less than 0, then we do not want to uFIDelate the modify count and count.
   678  	if count >= 0 {
   679  		sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_spacetime (version, block_id, count) values (%d, %d, %d)", version, blockID, count))
   680  	} else {
   681  		sqls = append(sqls, fmt.Sprintf("uFIDelate allegrosql.stats_spacetime set version = %d where block_id = %d", version, blockID))
   682  	}
   683  	data, err := statistics.EncodeCMSketchWithoutTopN(cms)
   684  	if err != nil {
   685  		return
   686  	}
   687  	// Delete outdated data
   688  	sqls = append(sqls, fmt.Sprintf("delete from allegrosql.stats_top_n where block_id = %d and is_index = %d and hist_id = %d", blockID, isIndex, hg.ID))
   689  	for _, spacetime := range cms.TopN() {
   690  		sqls = append(sqls, fmt.Sprintf("insert into allegrosql.stats_top_n (block_id, is_index, hist_id, value, count) values (%d, %d, %d, X'%X', %d)", blockID, isIndex, hg.ID, spacetime.Data, spacetime.Count))
   691  	}
   692  	flag := 0
   693  	if isAnalyzed == 1 {
   694  		flag = statistics.AnalyzeFlag
   695  	}
   696  	sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_histograms (block_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)",
   697  		blockID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotDefCausSize, statistics.CurStatsVersion, flag, hg.Correlation))
   698  	sqls = append(sqls, fmt.Sprintf("delete from allegrosql.stats_buckets where block_id = %d and is_index = %d and hist_id = %d", blockID, isIndex, hg.ID))
   699  	sc := h.mu.ctx.GetStochastikVars().StmtCtx
   700  	var lastAnalyzePos []byte
   701  	for i := range hg.Buckets {
   702  		count := hg.Buckets[i].Count
   703  		if i > 0 {
   704  			count -= hg.Buckets[i-1].Count
   705  		}
   706  		var upperBound types.Causet
   707  		upperBound, err = hg.GetUpper(i).ConvertTo(sc, types.NewFieldType(allegrosql.TypeBlob))
   708  		if err != nil {
   709  			return
   710  		}
   711  		if i == len(hg.Buckets)-1 {
   712  			lastAnalyzePos = upperBound.GetBytes()
   713  		}
   714  		var lowerBound types.Causet
   715  		lowerBound, err = hg.GetLower(i).ConvertTo(sc, types.NewFieldType(allegrosql.TypeBlob))
   716  		if err != nil {
   717  			return
   718  		}
   719  		sqls = append(sqls, fmt.Sprintf("insert into allegrosql.stats_buckets(block_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X')", blockID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes()))
   720  	}
   721  	if isAnalyzed == 1 && len(lastAnalyzePos) > 0 {
   722  		sqls = append(sqls, fmt.Sprintf("uFIDelate allegrosql.stats_histograms set last_analyze_pos = X'%X' where block_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, blockID, isIndex, hg.ID))
   723  	}
   724  	return execALLEGROSQLs(context.Background(), exec, sqls)
   725  }
   726  
   727  // SaveMetaToStorage will save stats_spacetime to storage.
   728  func (h *Handle) SaveMetaToStorage(blockID, count, modifyCount int64) (err error) {
   729  	h.mu.Lock()
   730  	defer h.mu.Unlock()
   731  	ctx := context.TODO()
   732  	exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate)
   733  	_, err = exec.InterDircute(ctx, "begin")
   734  	if err != nil {
   735  		return errors.Trace(err)
   736  	}
   737  	defer func() {
   738  		err = finishTransaction(ctx, exec, err)
   739  	}()
   740  	txn, err := h.mu.ctx.Txn(true)
   741  	if err != nil {
   742  		return errors.Trace(err)
   743  	}
   744  	var allegrosql string
   745  	version := txn.StartTS()
   746  	allegrosql = fmt.Sprintf("replace into allegrosql.stats_spacetime (version, block_id, count, modify_count) values (%d, %d, %d, %d)", version, blockID, count, modifyCount)
   747  	_, err = exec.InterDircute(ctx, allegrosql)
   748  	return
   749  }
   750  
   751  func (h *Handle) histogramFromStorage(reader *statsReader, blockID int64, colID int64, tp *types.FieldType, distinct int64, isIndex int, ver uint64, nullCount int64, totDefCausSize int64, corr float64) (_ *statistics.Histogram, err error) {
   752  	selALLEGROSQL := fmt.Sprintf("select count, repeats, lower_bound, upper_bound from allegrosql.stats_buckets where block_id = %d and is_index = %d and hist_id = %d order by bucket_id", blockID, isIndex, colID)
   753  	rows, fields, err := reader.read(selALLEGROSQL)
   754  	if err != nil {
   755  		return nil, errors.Trace(err)
   756  	}
   757  	bucketSize := len(rows)
   758  	hg := statistics.NewHistogram(colID, distinct, nullCount, ver, tp, bucketSize, totDefCausSize)
   759  	hg.Correlation = corr
   760  	totalCount := int64(0)
   761  	for i := 0; i < bucketSize; i++ {
   762  		count := rows[i].GetInt64(0)
   763  		repeats := rows[i].GetInt64(1)
   764  		var upperBound, lowerBound types.Causet
   765  		if isIndex == 1 {
   766  			lowerBound = rows[i].GetCauset(2, &fields[2].DeferredCauset.FieldType)
   767  			upperBound = rows[i].GetCauset(3, &fields[3].DeferredCauset.FieldType)
   768  		} else {
   769  			sc := &stmtctx.StatementContext{TimeZone: time.UTC}
   770  			d := rows[i].GetCauset(2, &fields[2].DeferredCauset.FieldType)
   771  			lowerBound, err = d.ConvertTo(sc, tp)
   772  			if err != nil {
   773  				return nil, errors.Trace(err)
   774  			}
   775  			d = rows[i].GetCauset(3, &fields[3].DeferredCauset.FieldType)
   776  			upperBound, err = d.ConvertTo(sc, tp)
   777  			if err != nil {
   778  				return nil, errors.Trace(err)
   779  			}
   780  		}
   781  		totalCount += count
   782  		hg.AppendBucket(&lowerBound, &upperBound, totalCount, repeats)
   783  	}
   784  	hg.PreCalculateScalar()
   785  	return hg, nil
   786  }
   787  
   788  func (h *Handle) columnCountFromStorage(reader *statsReader, blockID, colID int64) (int64, error) {
   789  	selALLEGROSQL := fmt.Sprintf("select sum(count) from allegrosql.stats_buckets where block_id = %d and is_index = %d and hist_id = %d", blockID, 0, colID)
   790  	rows, _, err := reader.read(selALLEGROSQL)
   791  	if err != nil {
   792  		return 0, errors.Trace(err)
   793  	}
   794  	if rows[0].IsNull(0) {
   795  		return 0, nil
   796  	}
   797  	return rows[0].GetMyDecimal(0).ToInt()
   798  }
   799  
   800  func (h *Handle) statsMetaByTableIDFromStorage(blockID int64, historyStatsInterDirc sqlexec.RestrictedALLEGROSQLInterlockingDirectorate) (version uint64, modifyCount, count int64, err error) {
   801  	selALLEGROSQL := fmt.Sprintf("SELECT version, modify_count, count from allegrosql.stats_spacetime where block_id = %d order by version", blockID)
   802  	var rows []chunk.Row
   803  	if historyStatsInterDirc == nil {
   804  		rows, _, err = h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(selALLEGROSQL)
   805  	} else {
   806  		rows, _, err = historyStatsInterDirc.InterDircRestrictedALLEGROSQLWithSnapshot(selALLEGROSQL)
   807  	}
   808  	if err != nil || len(rows) == 0 {
   809  		return
   810  	}
   811  	version = rows[0].GetUint64(0)
   812  	modifyCount = rows[0].GetInt64(1)
   813  	count = rows[0].GetInt64(2)
   814  	return
   815  }
   816  
   817  // statsReader is used for simplify code that needs to read system blocks in different sqls
   818  // but requires the same transactions.
   819  type statsReader struct {
   820  	ctx     stochastikctx.Context
   821  	history sqlexec.RestrictedALLEGROSQLInterlockingDirectorate
   822  }
   823  
   824  func (sr *statsReader) read(allegrosql string) (rows []chunk.Row, fields []*ast.ResultField, err error) {
   825  	if sr.history != nil {
   826  		return sr.history.InterDircRestrictedALLEGROSQLWithSnapshot(allegrosql)
   827  	}
   828  	rc, err := sr.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
   829  	if len(rc) > 0 {
   830  		defer terror.Call(rc[0].Close)
   831  	}
   832  	if err != nil {
   833  		return nil, nil, err
   834  	}
   835  	for {
   836  		req := rc[0].NewChunk()
   837  		err := rc[0].Next(context.TODO(), req)
   838  		if err != nil {
   839  			return nil, nil, err
   840  		}
   841  		if req.NumRows() == 0 {
   842  			break
   843  		}
   844  		for i := 0; i < req.NumRows(); i++ {
   845  			rows = append(rows, req.GetRow(i))
   846  		}
   847  	}
   848  	return rows, rc[0].Fields(), nil
   849  }
   850  
   851  func (sr *statsReader) isHistory() bool {
   852  	return sr.history != nil
   853  }
   854  
   855  func (h *Handle) getStatsReader(history sqlexec.RestrictedALLEGROSQLInterlockingDirectorate) (*statsReader, error) {
   856  	failpoint.Inject("mockGetStatsReaderFail", func(val failpoint.Value) {
   857  		if val.(bool) {
   858  			failpoint.Return(nil, errors.New("gofail genStatsReader error"))
   859  		}
   860  	})
   861  	if history != nil {
   862  		return &statsReader{history: history}, nil
   863  	}
   864  	h.mu.Lock()
   865  	_, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "begin")
   866  	if err != nil {
   867  		return nil, err
   868  	}
   869  	return &statsReader{ctx: h.mu.ctx}, nil
   870  }
   871  
   872  func (h *Handle) releaseStatsReader(reader *statsReader) error {
   873  	if reader.history != nil {
   874  		return nil
   875  	}
   876  	_, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "commit")
   877  	h.mu.Unlock()
   878  	return err
   879  }
   880  
   881  const (
   882  	// StatsStatusInited is the status for extended stats which are just registered but have not been analyzed yet.
   883  	StatsStatusInited uint8 = iota
   884  	// StatsStatusAnalyzed is the status for extended stats which have been collected in analyze.
   885  	StatsStatusAnalyzed
   886  	// StatsStatusDeleted is the status for extended stats which were dropped. These "deleted" records would be removed from storage by GCStats().
   887  	StatsStatusDeleted
   888  )
   889  
   890  // InsertExtendedStats inserts a record into allegrosql.stats_extended and uFIDelate version in allegrosql.stats_spacetime.
   891  func (h *Handle) InsertExtendedStats(statsName, EDB string, colIDs []int64, tp int, blockID int64, ifNotExists bool) (err error) {
   892  	bytes, err := json.Marshal(colIDs)
   893  	if err != nil {
   894  		return errors.Trace(err)
   895  	}
   896  	strDefCausIDs := string(bytes)
   897  	h.mu.Lock()
   898  	defer h.mu.Unlock()
   899  	ctx := context.TODO()
   900  	exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate)
   901  	_, err = exec.InterDircute(ctx, "begin pessimistic")
   902  	if err != nil {
   903  		return errors.Trace(err)
   904  	}
   905  	defer func() {
   906  		err = finishTransaction(ctx, exec, err)
   907  	}()
   908  	txn, err := h.mu.ctx.Txn(true)
   909  	if err != nil {
   910  		return errors.Trace(err)
   911  	}
   912  	version := txn.StartTS()
   913  	allegrosql := fmt.Sprintf("INSERT INTO allegrosql.stats_extended(stats_name, EDB, type, block_id, column_ids, version, status) VALUES ('%s', '%s', %d, %d, '%s', %d, %d)", statsName, EDB, tp, blockID, strDefCausIDs, version, StatsStatusInited)
   914  	_, err = exec.InterDircute(ctx, allegrosql)
   915  	// Key exists, but `if not exists` is specified, so we ignore this error.
   916  	if ekv.ErrKeyExists.Equal(err) && ifNotExists {
   917  		err = nil
   918  	}
   919  	return
   920  }
   921  
   922  // MarkExtendedStatsDeleted uFIDelate the status of allegrosql.stats_extended to be `deleted` and the version of allegrosql.stats_spacetime.
   923  func (h *Handle) MarkExtendedStatsDeleted(statsName, EDB string, blockID int64) (err error) {
   924  	if blockID < 0 {
   925  		allegrosql := fmt.Sprintf("SELECT block_id FROM allegrosql.stats_extended WHERE stats_name = '%s' and EDB = '%s'", statsName, EDB)
   926  		rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql)
   927  		if err != nil {
   928  			return errors.Trace(err)
   929  		}
   930  		if len(rows) == 0 {
   931  			return nil
   932  		}
   933  		blockID = rows[0].GetInt64(0)
   934  	}
   935  	h.mu.Lock()
   936  	defer h.mu.Unlock()
   937  	ctx := context.TODO()
   938  	exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate)
   939  	_, err = exec.InterDircute(ctx, "begin pessimistic")
   940  	if err != nil {
   941  		return errors.Trace(err)
   942  	}
   943  	defer func() {
   944  		err = finishTransaction(ctx, exec, err)
   945  	}()
   946  	txn, err := h.mu.ctx.Txn(true)
   947  	if err != nil {
   948  		return errors.Trace(err)
   949  	}
   950  	version := txn.StartTS()
   951  	sqls := make([]string, 2)
   952  	sqls[0] = fmt.Sprintf("UFIDelATE allegrosql.stats_extended SET version = %d, status = %d WHERE stats_name = '%s' and EDB = '%s'", version, StatsStatusDeleted, statsName, EDB)
   953  	sqls[1] = fmt.Sprintf("UFIDelATE allegrosql.stats_spacetime SET version = %d WHERE block_id = %d", version, blockID)
   954  	return execALLEGROSQLs(ctx, exec, sqls)
   955  }
   956  
   957  // ReloadExtendedStatistics drops the cache for extended statistics and reload data from allegrosql.stats_extended.
   958  func (h *Handle) ReloadExtendedStatistics() error {
   959  	reader, err := h.getStatsReader(nil)
   960  	if err != nil {
   961  		return err
   962  	}
   963  	oldCache := h.statsCache.Load().(statsCache)
   964  	blocks := make([]*statistics.Block, 0, len(oldCache.blocks))
   965  	for physicalID, tbl := range oldCache.blocks {
   966  		t, err := h.extendedStatsFromStorage(reader, tbl.Copy(), physicalID, true)
   967  		if err != nil {
   968  			return err
   969  		}
   970  		blocks = append(blocks, t)
   971  	}
   972  	err = h.releaseStatsReader(reader)
   973  	if err != nil {
   974  		return err
   975  	}
   976  	// Note that this uFIDelate may fail when the statsCache.version has been modified by others.
   977  	h.uFIDelateStatsCache(oldCache.uFIDelate(blocks, nil, oldCache.version))
   978  	return nil
   979  }
   980  
   981  // BuildExtendedStats build extended stats for column groups if needed based on the column samples.
   982  func (h *Handle) BuildExtendedStats(blockID int64, defcaus []*perceptron.DeferredCausetInfo, collectors []*statistics.SampleDefCauslector) (*statistics.ExtendedStatsDefCausl, error) {
   983  	allegrosql := fmt.Sprintf("SELECT stats_name, EDB, type, column_ids FROM allegrosql.stats_extended WHERE block_id = %d and status in (%d, %d)", blockID, StatsStatusAnalyzed, StatsStatusInited)
   984  	rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql)
   985  	if err != nil {
   986  		return nil, errors.Trace(err)
   987  	}
   988  	if len(rows) == 0 {
   989  		return nil, nil
   990  	}
   991  	statsDefCausl := statistics.NewExtendedStatsDefCausl()
   992  	for _, event := range rows {
   993  		key := statistics.ExtendedStatsKey{
   994  			StatsName: event.GetString(0),
   995  			EDB:       event.GetString(1),
   996  		}
   997  		item := &statistics.ExtendedStatsItem{Tp: uint8(event.GetInt64(2))}
   998  		colIDs := event.GetString(3)
   999  		err := json.Unmarshal([]byte(colIDs), &item.DefCausIDs)
  1000  		if err != nil {
  1001  			logutil.BgLogger().Error("invalid column_ids in allegrosql.stats_extended, skip collecting extended stats for this event", zap.String("column_ids", colIDs), zap.Error(err))
  1002  			continue
  1003  		}
  1004  		item = h.fillExtendedStatsItemVals(item, defcaus, collectors)
  1005  		if item != nil {
  1006  			statsDefCausl.Stats[key] = item
  1007  		}
  1008  	}
  1009  	if len(statsDefCausl.Stats) == 0 {
  1010  		return nil, nil
  1011  	}
  1012  	return statsDefCausl, nil
  1013  }
  1014  
  1015  func (h *Handle) fillExtendedStatsItemVals(item *statistics.ExtendedStatsItem, defcaus []*perceptron.DeferredCausetInfo, collectors []*statistics.SampleDefCauslector) *statistics.ExtendedStatsItem {
  1016  	switch item.Tp {
  1017  	case ast.StatsTypeCardinality, ast.StatsTypeDependency:
  1018  		return nil
  1019  	case ast.StatsTypeCorrelation:
  1020  		return h.fillExtStatsCorrVals(item, defcaus, collectors)
  1021  	}
  1022  	return nil
  1023  }
  1024  
  1025  func (h *Handle) fillExtStatsCorrVals(item *statistics.ExtendedStatsItem, defcaus []*perceptron.DeferredCausetInfo, collectors []*statistics.SampleDefCauslector) *statistics.ExtendedStatsItem {
  1026  	colOffsets := make([]int, 0, 2)
  1027  	for _, id := range item.DefCausIDs {
  1028  		for i, col := range defcaus {
  1029  			if col.ID == id {
  1030  				colOffsets = append(colOffsets, i)
  1031  				break
  1032  			}
  1033  		}
  1034  	}
  1035  	if len(colOffsets) != 2 {
  1036  		return nil
  1037  	}
  1038  	// samplesX and samplesY are in order of handle, i.e, their SampleItem.Ordinals are in order.
  1039  	samplesX := collectors[colOffsets[0]].Samples
  1040  	// We would modify Ordinal of samplesY, so we make a deep copy.
  1041  	samplesY := statistics.CopySampleItems(collectors[colOffsets[1]].Samples)
  1042  	sampleNum := len(samplesX)
  1043  	if sampleNum == 1 {
  1044  		item.ScalarVals = float64(1)
  1045  		return item
  1046  	}
  1047  	h.mu.Lock()
  1048  	sc := h.mu.ctx.GetStochastikVars().StmtCtx
  1049  	h.mu.Unlock()
  1050  	var err error
  1051  	samplesX, err = statistics.SortSampleItems(sc, samplesX)
  1052  	if err != nil {
  1053  		return nil
  1054  	}
  1055  	samplesYInXOrder := make([]*statistics.SampleItem, sampleNum)
  1056  	for i, itemX := range samplesX {
  1057  		itemY := samplesY[itemX.Ordinal]
  1058  		itemY.Ordinal = i
  1059  		samplesYInXOrder[i] = itemY
  1060  	}
  1061  	samplesYInYOrder, err := statistics.SortSampleItems(sc, samplesYInXOrder)
  1062  	if err != nil {
  1063  		return nil
  1064  	}
  1065  	var corrXYSum float64
  1066  	for i := 1; i < sampleNum; i++ {
  1067  		corrXYSum += float64(i) * float64(samplesYInYOrder[i].Ordinal)
  1068  	}
  1069  	// X means the ordinal of the item in original sequence, Y means the oridnal of the item in the
  1070  	// sorted sequence, we know that X and Y value sets are both:
  1071  	// 0, 1, ..., sampleNum-1
  1072  	// we can simply compute sum(X) = sum(Y) =
  1073  	//    (sampleNum-1)*sampleNum / 2
  1074  	// and sum(X^2) = sum(Y^2) =
  1075  	//    (sampleNum-1)*sampleNum*(2*sampleNum-1) / 6
  1076  	// We use "Pearson correlation coefficient" to compute the order correlation of columns,
  1077  	// the formula is based on https://en.wikipedia.org/wiki/Pearson_correlation_coefficient.
  1078  	// Note that (itemsCount*corrX2Sum - corrXSum*corrXSum) would never be zero when sampleNum is larger than 1.
  1079  	itemsCount := float64(sampleNum)
  1080  	corrXSum := (itemsCount - 1) * itemsCount / 2.0
  1081  	corrX2Sum := (itemsCount - 1) * itemsCount * (2*itemsCount - 1) / 6.0
  1082  	item.ScalarVals = (itemsCount*corrXYSum - corrXSum*corrXSum) / (itemsCount*corrX2Sum - corrXSum*corrXSum)
  1083  	return item
  1084  }
  1085  
  1086  // SaveExtendedStatsToStorage writes extended stats of a causet into allegrosql.stats_extended.
  1087  func (h *Handle) SaveExtendedStatsToStorage(blockID int64, extStats *statistics.ExtendedStatsDefCausl, isLoad bool) (err error) {
  1088  	if extStats == nil || len(extStats.Stats) == 0 {
  1089  		return nil
  1090  	}
  1091  	h.mu.Lock()
  1092  	defer h.mu.Unlock()
  1093  	ctx := context.TODO()
  1094  	exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate)
  1095  	_, err = exec.InterDircute(ctx, "begin pessimistic")
  1096  	if err != nil {
  1097  		return errors.Trace(err)
  1098  	}
  1099  	defer func() {
  1100  		err = finishTransaction(ctx, exec, err)
  1101  	}()
  1102  	txn, err := h.mu.ctx.Txn(true)
  1103  	if err != nil {
  1104  		return errors.Trace(err)
  1105  	}
  1106  	version := txn.StartTS()
  1107  	sqls := make([]string, 0, 1+len(extStats.Stats))
  1108  	for key, item := range extStats.Stats {
  1109  		bytes, err := json.Marshal(item.DefCausIDs)
  1110  		if err != nil {
  1111  			return errors.Trace(err)
  1112  		}
  1113  		strDefCausIDs := string(bytes)
  1114  		switch item.Tp {
  1115  		case ast.StatsTypeCardinality, ast.StatsTypeCorrelation:
  1116  			// If isLoad is true, it's INSERT; otherwise, it's UFIDelATE.
  1117  			sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_extended values ('%s', '%s', %d, %d, '%s', %f, null, %d, %d)", key.StatsName, key.EDB, item.Tp, blockID, strDefCausIDs, item.ScalarVals, version, StatsStatusAnalyzed))
  1118  		case ast.StatsTypeDependency:
  1119  			sqls = append(sqls, fmt.Sprintf("replace into allegrosql.stats_extended values ('%s', '%s', %d, %d, '%s', null, '%s', %d, %d)", key.StatsName, key.EDB, item.Tp, blockID, strDefCausIDs, item.StringVals, version, StatsStatusAnalyzed))
  1120  		}
  1121  	}
  1122  	if !isLoad {
  1123  		sqls = append(sqls, fmt.Sprintf("UFIDelATE allegrosql.stats_spacetime SET version = %d WHERE block_id = %d", version, blockID))
  1124  	}
  1125  	return execALLEGROSQLs(ctx, exec, sqls)
  1126  }