github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/handle/update.go

github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/handle/update.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package handle
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    27  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    28  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    29  	"github.com/whtcorpsinc/errors"
    30  	"github.com/whtcorpsinc/log"
    31  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb/oracle"
    32  	"github.com/whtcorpsinc/milevadb/ekv"
    33  	"github.com/whtcorpsinc/milevadb/metrics"
    34  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    35  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    36  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    37  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    38  	"github.com/whtcorpsinc/milevadb/soliton/ranger"
    39  	"github.com/whtcorpsinc/milevadb/soliton/sqlexec"
    40  	"github.com/whtcorpsinc/milevadb/soliton/timeutil"
    41  	"github.com/whtcorpsinc/milevadb/statistics"
    42  	"github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx"
    43  	"github.com/whtcorpsinc/milevadb/stochastikctx/variable"
    44  	"github.com/whtcorpsinc/milevadb/types"
    45  	"go.uber.org/zap"
    46  )
    47  
    48  type blockDeltaMap map[int64]variable.TableDelta
    49  
    50  func (m blockDeltaMap) uFIDelate(id int64, delta int64, count int64, colSize *map[int64]int64) {
    51  	item := m[id]
    52  	item.Delta += delta
    53  	item.Count += count
    54  	if item.DefCausSize == nil {
    55  		item.DefCausSize = make(map[int64]int64)
    56  	}
    57  	if colSize != nil {
    58  		for key, val := range *colSize {
    59  			item.DefCausSize[key] += val
    60  		}
    61  	}
    62  	m[id] = item
    63  }
    64  
    65  type errorRateDelta struct {
    66  	PkID         int64
    67  	PkErrorRate  *statistics.ErrorRate
    68  	IdxErrorRate map[int64]*statistics.ErrorRate
    69  }
    70  
    71  type errorRateDeltaMap map[int64]errorRateDelta
    72  
    73  func (m errorRateDeltaMap) uFIDelate(blockID int64, histID int64, rate float64, isIndex bool) {
    74  	item := m[blockID]
    75  	if isIndex {
    76  		if item.IdxErrorRate == nil {
    77  			item.IdxErrorRate = make(map[int64]*statistics.ErrorRate)
    78  		}
    79  		if item.IdxErrorRate[histID] == nil {
    80  			item.IdxErrorRate[histID] = &statistics.ErrorRate{}
    81  		}
    82  		item.IdxErrorRate[histID].UFIDelate(rate)
    83  	} else {
    84  		if item.PkErrorRate == nil {
    85  			item.PkID = histID
    86  			item.PkErrorRate = &statistics.ErrorRate{}
    87  		}
    88  		item.PkErrorRate.UFIDelate(rate)
    89  	}
    90  	m[blockID] = item
    91  }
    92  
    93  func (m errorRateDeltaMap) merge(deltaMap errorRateDeltaMap) {
    94  	for blockID, item := range deltaMap {
    95  		tbl := m[blockID]
    96  		for histID, errorRate := range item.IdxErrorRate {
    97  			if tbl.IdxErrorRate == nil {
    98  				tbl.IdxErrorRate = make(map[int64]*statistics.ErrorRate)
    99  			}
   100  			if tbl.IdxErrorRate[histID] == nil {
   101  				tbl.IdxErrorRate[histID] = &statistics.ErrorRate{}
   102  			}
   103  			tbl.IdxErrorRate[histID].Merge(errorRate)
   104  		}
   105  		if item.PkErrorRate != nil {
   106  			if tbl.PkErrorRate == nil {
   107  				tbl.PkID = item.PkID
   108  				tbl.PkErrorRate = &statistics.ErrorRate{}
   109  			}
   110  			tbl.PkErrorRate.Merge(item.PkErrorRate)
   111  		}
   112  		m[blockID] = tbl
   113  	}
   114  }
   115  
   116  func (m errorRateDeltaMap) clear(blockID int64, histID int64, isIndex bool) {
   117  	item := m[blockID]
   118  	if isIndex {
   119  		delete(item.IdxErrorRate, histID)
   120  	} else {
   121  		item.PkErrorRate = nil
   122  	}
   123  	m[blockID] = item
   124  }
   125  
   126  func (h *Handle) merge(s *StochastikStatsDefCauslector, rateMap errorRateDeltaMap) {
   127  	for id, item := range s.mapper {
   128  		h.globalMap.uFIDelate(id, item.Delta, item.Count, &item.DefCausSize)
   129  	}
   130  	s.mapper = make(blockDeltaMap)
   131  	rateMap.merge(s.rateMap)
   132  	s.rateMap = make(errorRateDeltaMap)
   133  	h.feedback.Merge(s.feedback)
   134  	s.feedback = statistics.NewQueryFeedbackMap()
   135  }
   136  
   137  // StochastikStatsDefCauslector is a list item that holds the delta mapper. If you want to write or read mapper, you must dagger it.
   138  type StochastikStatsDefCauslector struct {
   139  	sync.Mutex
   140  
   141  	mapper   blockDeltaMap
   142  	feedback *statistics.QueryFeedbackMap
   143  	rateMap  errorRateDeltaMap
   144  	next     *StochastikStatsDefCauslector
   145  	// deleted is set to true when a stochastik is closed. Every time we sweep the list, we will remove the useless collector.
   146  	deleted bool
   147  }
   148  
   149  // Delete only sets the deleted flag true, it will be deleted from list when DumpStatsDeltaToKV is called.
   150  func (s *StochastikStatsDefCauslector) Delete() {
   151  	s.Lock()
   152  	defer s.Unlock()
   153  	s.deleted = true
   154  }
   155  
   156  // UFIDelate will uFIDelates the delta and count for one causet id.
   157  func (s *StochastikStatsDefCauslector) UFIDelate(id int64, delta int64, count int64, colSize *map[int64]int64) {
   158  	s.Lock()
   159  	defer s.Unlock()
   160  	s.mapper.uFIDelate(id, delta, count, colSize)
   161  }
   162  
   163  var (
   164  	// MinLogScanCount is the minimum scan count for a feedback to be logged.
   165  	MinLogScanCount = int64(1000)
   166  	// MinLogErrorRate is the minimum error rate for a feedback to be logged.
   167  	MinLogErrorRate = 0.5
   168  )
   169  
   170  // StoreQueryFeedback merges the feedback into stats collector.
   171  func (s *StochastikStatsDefCauslector) StoreQueryFeedback(feedback interface{}, h *Handle) error {
   172  	q := feedback.(*statistics.QueryFeedback)
   173  	if !q.Valid || q.Hist == nil {
   174  		return nil
   175  	}
   176  	err := h.RecalculateExpectCount(q)
   177  	if err != nil {
   178  		return errors.Trace(err)
   179  	}
   180  	rate := q.CalcErrorRate()
   181  	if !(rate >= MinLogErrorRate && (q.Actual() >= MinLogScanCount || q.Expected >= MinLogScanCount)) {
   182  		return nil
   183  	}
   184  	metrics.SignificantFeedbackCounter.Inc()
   185  	metrics.StatsInaccuracyRate.Observe(rate)
   186  	if log.GetLevel() == zap.DebugLevel {
   187  		h.logDetailedInfo(q)
   188  	}
   189  	s.Lock()
   190  	defer s.Unlock()
   191  	isIndex := q.Tp == statistics.IndexType
   192  	s.rateMap.uFIDelate(q.PhysicalID, q.Hist.ID, rate, isIndex)
   193  	s.feedback.Append(q)
   194  	return nil
   195  }
   196  
   197  // NewStochastikStatsDefCauslector allocates a stats collector for a stochastik.
   198  func (h *Handle) NewStochastikStatsDefCauslector() *StochastikStatsDefCauslector {
   199  	h.listHead.Lock()
   200  	defer h.listHead.Unlock()
   201  	newDefCauslector := &StochastikStatsDefCauslector{
   202  		mapper:   make(blockDeltaMap),
   203  		rateMap:  make(errorRateDeltaMap),
   204  		next:     h.listHead.next,
   205  		feedback: statistics.NewQueryFeedbackMap(),
   206  	}
   207  	h.listHead.next = newDefCauslector
   208  	return newDefCauslector
   209  }
   210  
   211  var (
   212  	// DumpStatsDeltaRatio is the lower bound of `Modify Count / Block Count` for stats delta to be dumped.
   213  	DumpStatsDeltaRatio = 1 / 10000.0
   214  	// dumpStatsMaxDuration is the max duration since last uFIDelate.
   215  	dumpStatsMaxDuration = time.Hour
   216  )
   217  
   218  // needDumpStatsDelta returns true when only uFIDelates a small portion of the causet and the time since last uFIDelate
   219  // do not exceed one hour.
   220  func needDumpStatsDelta(h *Handle, id int64, item variable.TableDelta, currentTime time.Time) bool {
   221  	if item.InitTime.IsZero() {
   222  		item.InitTime = currentTime
   223  	}
   224  	tbl, ok := h.statsCache.Load().(statsCache).blocks[id]
   225  	if !ok {
   226  		// No need to dump if the stats is invalid.
   227  		return false
   228  	}
   229  	if currentTime.Sub(item.InitTime) > dumpStatsMaxDuration {
   230  		// Dump the stats to ekv at least once an hour.
   231  		return true
   232  	}
   233  	if tbl.Count == 0 || float64(item.Count)/float64(tbl.Count) > DumpStatsDeltaRatio {
   234  		// Dump the stats when there are many modifications.
   235  		return true
   236  	}
   237  	return false
   238  }
   239  
   240  type dumpMode bool
   241  
   242  const (
   243  	// DumpAll indicates dump all the delta info in to ekv.
   244  	DumpAll dumpMode = true
   245  	// DumFIDelelta indicates dump part of the delta info in to ekv.
   246  	DumFIDelelta dumpMode = false
   247  )
   248  
   249  // sweepList will loop over the list, merge each stochastik's local stats into handle
   250  // and remove closed stochastik's collector.
   251  func (h *Handle) sweepList() {
   252  	prev := h.listHead
   253  	prev.Lock()
   254  	errorRateMap := make(errorRateDeltaMap)
   255  	for curr := prev.next; curr != nil; curr = curr.next {
   256  		curr.Lock()
   257  		// Merge the stochastik stats into handle and error rate map.
   258  		h.merge(curr, errorRateMap)
   259  		if curr.deleted {
   260  			prev.next = curr.next
   261  			// Since the stochastik is already closed, we can safely unlock it here.
   262  			curr.Unlock()
   263  		} else {
   264  			// Unlock the previous dagger, so we only holds at most two stochastik's dagger at the same time.
   265  			prev.Unlock()
   266  			prev = curr
   267  		}
   268  	}
   269  	prev.Unlock()
   270  	h.mu.Lock()
   271  	h.mu.rateMap.merge(errorRateMap)
   272  	h.mu.Unlock()
   273  	h.siftFeedbacks()
   274  }
   275  
   276  // siftFeedbacks eliminates feedbacks which are overlapped with others. It is a tradeoff between
   277  // feedback accuracy and its overhead.
   278  func (h *Handle) siftFeedbacks() {
   279  	sc := &stmtctx.StatementContext{TimeZone: time.UTC}
   280  	for k, qs := range h.feedback.Feedbacks {
   281  		fbs := make([]statistics.Feedback, 0, len(qs)*2)
   282  		for _, q := range qs {
   283  			fbs = append(fbs, q.Feedback...)
   284  		}
   285  		if len(fbs) == 0 {
   286  			delete(h.feedback.Feedbacks, k)
   287  			continue
   288  		}
   289  		h.feedback.Feedbacks[k] = h.feedback.Feedbacks[k][:1]
   290  		h.feedback.Feedbacks[k][0].Feedback, _ = statistics.NonOverlappedFeedbacks(sc, fbs)
   291  	}
   292  	h.feedback.Size = len(h.feedback.Feedbacks)
   293  }
   294  
   295  // DumpStatsDeltaToKV sweeps the whole list and uFIDelates the global map, then we dumps every causet that held in map to KV.
   296  // If the mode is `DumFIDelelta`, it will only dump that delta info that `Modify Count / Block Count` greater than a ratio.
   297  func (h *Handle) DumpStatsDeltaToKV(mode dumpMode) error {
   298  	h.sweepList()
   299  	currentTime := time.Now()
   300  	for id, item := range h.globalMap {
   301  		if mode == DumFIDelelta && !needDumpStatsDelta(h, id, item, currentTime) {
   302  			continue
   303  		}
   304  		uFIDelated, err := h.dumpTableStatCountToKV(id, item)
   305  		if err != nil {
   306  			return errors.Trace(err)
   307  		}
   308  		if uFIDelated {
   309  			h.globalMap.uFIDelate(id, -item.Delta, -item.Count, nil)
   310  		}
   311  		if err = h.dumpTableStatDefCausSizeToKV(id, item); err != nil {
   312  			return errors.Trace(err)
   313  		}
   314  		if uFIDelated {
   315  			delete(h.globalMap, id)
   316  		} else {
   317  			m := h.globalMap[id]
   318  			m.DefCausSize = nil
   319  			h.globalMap[id] = m
   320  		}
   321  	}
   322  	return nil
   323  }
   324  
   325  // dumpTableStatDeltaToKV dumps a single delta with some causet to KV and uFIDelates the version.
   326  func (h *Handle) dumpTableStatCountToKV(id int64, delta variable.TableDelta) (uFIDelated bool, err error) {
   327  	if delta.Count == 0 {
   328  		return true, nil
   329  	}
   330  	h.mu.Lock()
   331  	defer h.mu.Unlock()
   332  	ctx := context.TODO()
   333  	exec := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate)
   334  	_, err = exec.InterDircute(ctx, "begin")
   335  	if err != nil {
   336  		return false, errors.Trace(err)
   337  	}
   338  	defer func() {
   339  		err = finishTransaction(context.Background(), exec, err)
   340  	}()
   341  
   342  	txn, err := h.mu.ctx.Txn(true)
   343  	if err != nil {
   344  		return false, errors.Trace(err)
   345  	}
   346  	startTS := txn.StartTS()
   347  	var allegrosql string
   348  	if delta.Delta < 0 {
   349  		allegrosql = fmt.Sprintf("uFIDelate allegrosql.stats_spacetime set version = %d, count = count - %d, modify_count = modify_count + %d where block_id = %d and count >= %d", startTS, -delta.Delta, delta.Count, id, -delta.Delta)
   350  	} else {
   351  		allegrosql = fmt.Sprintf("uFIDelate allegrosql.stats_spacetime set version = %d, count = count + %d, modify_count = modify_count + %d where block_id = %d", startTS, delta.Delta, delta.Count, id)
   352  	}
   353  	err = execALLEGROSQLs(context.Background(), exec, []string{allegrosql})
   354  	uFIDelated = h.mu.ctx.GetStochastikVars().StmtCtx.AffectedRows() > 0
   355  	return
   356  }
   357  
   358  func (h *Handle) dumpTableStatDefCausSizeToKV(id int64, delta variable.TableDelta) error {
   359  	if len(delta.DefCausSize) == 0 {
   360  		return nil
   361  	}
   362  	values := make([]string, 0, len(delta.DefCausSize))
   363  	for histID, deltaDefCausSize := range delta.DefCausSize {
   364  		if deltaDefCausSize == 0 {
   365  			continue
   366  		}
   367  		values = append(values, fmt.Sprintf("(%d, 0, %d, 0, %d)", id, histID, deltaDefCausSize))
   368  	}
   369  	if len(values) == 0 {
   370  		return nil
   371  	}
   372  	allegrosql := fmt.Sprintf("insert into allegrosql.stats_histograms (block_id, is_index, hist_id, distinct_count, tot_col_size) "+
   373  		"values %s on duplicate key uFIDelate tot_col_size = tot_col_size + values(tot_col_size)", strings.Join(values, ","))
   374  	_, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql)
   375  	return errors.Trace(err)
   376  }
   377  
   378  // DumpStatsFeedbackToKV dumps the stats feedback to KV.
   379  func (h *Handle) DumpStatsFeedbackToKV() error {
   380  	var err error
   381  	for _, fbs := range h.feedback.Feedbacks {
   382  		for _, fb := range fbs {
   383  			if fb.Tp == statistics.PkType {
   384  				err = h.DumpFeedbackToKV(fb)
   385  			} else {
   386  				t, ok := h.statsCache.Load().(statsCache).blocks[fb.PhysicalID]
   387  				if ok {
   388  					err = h.DumpFeedbackForIndex(fb, t)
   389  				}
   390  			}
   391  			if err != nil {
   392  				// For simplicity, we just drop other feedbacks in case of error.
   393  				break
   394  			}
   395  		}
   396  	}
   397  	h.feedback = statistics.NewQueryFeedbackMap()
   398  	return errors.Trace(err)
   399  }
   400  
   401  // DumpFeedbackToKV dumps the given feedback to physical ekv layer.
   402  func (h *Handle) DumpFeedbackToKV(fb *statistics.QueryFeedback) error {
   403  	vals, err := statistics.EncodeFeedback(fb)
   404  	if err != nil {
   405  		logutil.BgLogger().Debug("error occurred when encoding feedback", zap.Error(err))
   406  		return nil
   407  	}
   408  	var isIndex int64
   409  	if fb.Tp == statistics.IndexType {
   410  		isIndex = 1
   411  	}
   412  	allegrosql := fmt.Sprintf("insert into allegrosql.stats_feedback (block_id, hist_id, is_index, feedback) values "+
   413  		"(%d, %d, %d, X'%X')", fb.PhysicalID, fb.Hist.ID, isIndex, vals)
   414  	h.mu.Lock()
   415  	_, err = h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
   416  	h.mu.Unlock()
   417  	if err != nil {
   418  		metrics.DumpFeedbackCounter.WithLabelValues(metrics.LblError).Inc()
   419  	} else {
   420  		metrics.DumpFeedbackCounter.WithLabelValues(metrics.LblOK).Inc()
   421  	}
   422  	return errors.Trace(err)
   423  }
   424  
   425  // UFIDelateStatsByLocalFeedback will uFIDelate statistics by the local feedback.
   426  // Currently, we dump the feedback with the period of 10 minutes, which means
   427  // it takes 10 minutes for a feedback to take effect. However, we can use the
   428  // feedback locally on this milevadb-server, so it could be used more timely.
   429  func (h *Handle) UFIDelateStatsByLocalFeedback(is schemareplicant.SchemaReplicant) {
   430  	h.sweepList()
   431  	for _, fbs := range h.feedback.Feedbacks {
   432  		for _, fb := range fbs {
   433  			h.mu.Lock()
   434  			causet, ok := h.getTableByPhysicalID(is, fb.PhysicalID)
   435  			h.mu.Unlock()
   436  			if !ok {
   437  				continue
   438  			}
   439  			tblStats := h.GetPartitionStats(causet.Meta(), fb.PhysicalID)
   440  			newTblStats := tblStats.Copy()
   441  			if fb.Tp == statistics.IndexType {
   442  				idx, ok := tblStats.Indices[fb.Hist.ID]
   443  				if !ok || idx.Histogram.Len() == 0 {
   444  					continue
   445  				}
   446  				newIdx := *idx
   447  				eqFB, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback)
   448  				newIdx.CMSketch = statistics.UFIDelateCMSketch(idx.CMSketch, eqFB)
   449  				newIdx.Histogram = *statistics.UFIDelateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB})
   450  				newIdx.Histogram.PreCalculateScalar()
   451  				newIdx.Flag = statistics.ResetAnalyzeFlag(newIdx.Flag)
   452  				newTblStats.Indices[fb.Hist.ID] = &newIdx
   453  			} else {
   454  				col, ok := tblStats.DeferredCausets[fb.Hist.ID]
   455  				if !ok || col.Histogram.Len() == 0 {
   456  					continue
   457  				}
   458  				newDefCaus := *col
   459  				// only use the range query to uFIDelate primary key
   460  				_, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback)
   461  				newFB := &statistics.QueryFeedback{Feedback: ranFB}
   462  				newFB = newFB.DecodeIntValues()
   463  				newDefCaus.Histogram = *statistics.UFIDelateHistogram(&col.Histogram, newFB)
   464  				newDefCaus.Flag = statistics.ResetAnalyzeFlag(newDefCaus.Flag)
   465  				newTblStats.DeferredCausets[fb.Hist.ID] = &newDefCaus
   466  			}
   467  			oldCache := h.statsCache.Load().(statsCache)
   468  			h.uFIDelateStatsCache(oldCache.uFIDelate([]*statistics.Block{newTblStats}, nil, oldCache.version))
   469  		}
   470  	}
   471  }
   472  
   473  // UFIDelateErrorRate uFIDelates the error rate of columns from h.rateMap to cache.
   474  func (h *Handle) UFIDelateErrorRate(is schemareplicant.SchemaReplicant) {
   475  	h.mu.Lock()
   476  	tbls := make([]*statistics.Block, 0, len(h.mu.rateMap))
   477  	for id, item := range h.mu.rateMap {
   478  		causet, ok := h.getTableByPhysicalID(is, id)
   479  		if !ok {
   480  			continue
   481  		}
   482  		tbl := h.GetPartitionStats(causet.Meta(), id).Copy()
   483  		if item.PkErrorRate != nil && tbl.DeferredCausets[item.PkID] != nil {
   484  			col := *tbl.DeferredCausets[item.PkID]
   485  			col.ErrorRate.Merge(item.PkErrorRate)
   486  			tbl.DeferredCausets[item.PkID] = &col
   487  		}
   488  		for key, val := range item.IdxErrorRate {
   489  			if tbl.Indices[key] == nil {
   490  				continue
   491  			}
   492  			idx := *tbl.Indices[key]
   493  			idx.ErrorRate.Merge(val)
   494  			tbl.Indices[key] = &idx
   495  		}
   496  		tbls = append(tbls, tbl)
   497  		delete(h.mu.rateMap, id)
   498  	}
   499  	h.mu.Unlock()
   500  	oldCache := h.statsCache.Load().(statsCache)
   501  	h.uFIDelateStatsCache(oldCache.uFIDelate(tbls, nil, oldCache.version))
   502  }
   503  
   504  // HandleUFIDelateStats uFIDelate the stats using feedback.
   505  func (h *Handle) HandleUFIDelateStats(is schemareplicant.SchemaReplicant) error {
   506  	allegrosql := "SELECT distinct block_id from allegrosql.stats_feedback"
   507  	blocks, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql)
   508  	if err != nil {
   509  		return errors.Trace(err)
   510  	}
   511  	if len(blocks) == 0 {
   512  		return nil
   513  	}
   514  
   515  	for _, ptbl := range blocks {
   516  		// this func lets `defer` works normally, where `Close()` should be called before any return
   517  		err = func() error {
   518  			tbl := ptbl.GetInt64(0)
   519  			allegrosql = fmt.Sprintf("select block_id, hist_id, is_index, feedback from allegrosql.stats_feedback where block_id=%d order by hist_id, is_index", tbl)
   520  			rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
   521  			if len(rc) > 0 {
   522  				defer terror.Call(rc[0].Close)
   523  			}
   524  			if err != nil {
   525  				return errors.Trace(err)
   526  			}
   527  			blockID, histID, isIndex := int64(-1), int64(-1), int64(-1)
   528  			var rows []chunk.Row
   529  			for {
   530  				req := rc[0].NewChunk()
   531  				iter := chunk.NewIterator4Chunk(req)
   532  				err := rc[0].Next(context.TODO(), req)
   533  				if err != nil {
   534  					return errors.Trace(err)
   535  				}
   536  				if req.NumRows() == 0 {
   537  					if len(rows) > 0 {
   538  						if err := h.handleSingleHistogramUFIDelate(is, rows); err != nil {
   539  							return errors.Trace(err)
   540  						}
   541  					}
   542  					break
   543  				}
   544  				for event := iter.Begin(); event != iter.End(); event = iter.Next() {
   545  					// len(rows) > 100000 limits the rows to avoid OOM
   546  					if event.GetInt64(0) != blockID || event.GetInt64(1) != histID || event.GetInt64(2) != isIndex || len(rows) > 100000 {
   547  						if len(rows) > 0 {
   548  							if err := h.handleSingleHistogramUFIDelate(is, rows); err != nil {
   549  								return errors.Trace(err)
   550  							}
   551  						}
   552  						blockID, histID, isIndex = event.GetInt64(0), event.GetInt64(1), event.GetInt64(2)
   553  						rows = rows[:0]
   554  					}
   555  					rows = append(rows, event)
   556  				}
   557  			}
   558  			return nil
   559  		}()
   560  		if err != nil {
   561  			return err
   562  		}
   563  	}
   564  	return nil
   565  }
   566  
   567  // handleSingleHistogramUFIDelate uFIDelates the Histogram and CM Sketch using these feedbacks. All the feedbacks for
   568  // the same index or column are gathered in `rows`.
   569  func (h *Handle) handleSingleHistogramUFIDelate(is schemareplicant.SchemaReplicant, rows []chunk.Row) (err error) {
   570  	physicalTableID, histID, isIndex := rows[0].GetInt64(0), rows[0].GetInt64(1), rows[0].GetInt64(2)
   571  	defer func() {
   572  		if err == nil {
   573  			err = errors.Trace(h.deleteOutdatedFeedback(physicalTableID, histID, isIndex))
   574  		}
   575  	}()
   576  	h.mu.Lock()
   577  	causet, ok := h.getTableByPhysicalID(is, physicalTableID)
   578  	h.mu.Unlock()
   579  	// The causet has been deleted.
   580  	if !ok {
   581  		return nil
   582  	}
   583  	var tbl *statistics.Block
   584  	if causet.Meta().GetPartitionInfo() != nil {
   585  		tbl = h.GetPartitionStats(causet.Meta(), physicalTableID)
   586  	} else {
   587  		tbl = h.GetTableStats(causet.Meta())
   588  	}
   589  	var cms *statistics.CMSketch
   590  	var hist *statistics.Histogram
   591  	if isIndex == 1 {
   592  		idx, ok := tbl.Indices[histID]
   593  		if ok && idx.Histogram.Len() > 0 {
   594  			idxHist := idx.Histogram
   595  			hist = &idxHist
   596  			cms = idx.CMSketch.Copy()
   597  		}
   598  	} else {
   599  		col, ok := tbl.DeferredCausets[histID]
   600  		if ok && col.Histogram.Len() > 0 {
   601  			colHist := col.Histogram
   602  			hist = &colHist
   603  		}
   604  	}
   605  	// The column or index has been deleted.
   606  	if hist == nil {
   607  		return nil
   608  	}
   609  	q := &statistics.QueryFeedback{}
   610  	for _, event := range rows {
   611  		err1 := statistics.DecodeFeedback(event.GetBytes(3), q, cms, hist.Tp)
   612  		if err1 != nil {
   613  			logutil.BgLogger().Debug("decode feedback failed", zap.Error(err))
   614  		}
   615  	}
   616  	err = h.dumpStatsUFIDelateToKV(physicalTableID, isIndex, q, hist, cms)
   617  	return errors.Trace(err)
   618  }
   619  
   620  func (h *Handle) deleteOutdatedFeedback(blockID, histID, isIndex int64) error {
   621  	h.mu.Lock()
   622  	defer h.mu.Unlock()
   623  	hasData := true
   624  	for hasData {
   625  		allegrosql := fmt.Sprintf("delete from allegrosql.stats_feedback where block_id = %d and hist_id = %d and is_index = %d limit 10000", blockID, histID, isIndex)
   626  		_, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
   627  		if err != nil {
   628  			return errors.Trace(err)
   629  		}
   630  		hasData = h.mu.ctx.GetStochastikVars().StmtCtx.AffectedRows() > 0
   631  	}
   632  	return nil
   633  }
   634  
   635  func (h *Handle) dumpStatsUFIDelateToKV(blockID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch) error {
   636  	hist = statistics.UFIDelateHistogram(hist, q)
   637  	err := h.SaveStatsToStorage(blockID, -1, int(isIndex), hist, cms, 0)
   638  	metrics.UFIDelateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc()
   639  	return errors.Trace(err)
   640  }
   641  
   642  const (
   643  	// StatsTenantKey is the stats tenant path that is saved to etcd.
   644  	StatsTenantKey = "/milevadb/stats/tenant"
   645  	// StatsPrompt is the prompt for stats tenant manager.
   646  	StatsPrompt = "stats"
   647  )
   648  
   649  // AutoAnalyzeMinCnt means if the count of causet is less than this value, we needn't do auto analyze.
   650  var AutoAnalyzeMinCnt int64 = 1000
   651  
   652  // TableAnalyzed checks if the causet is analyzed.
   653  func TableAnalyzed(tbl *statistics.Block) bool {
   654  	for _, col := range tbl.DeferredCausets {
   655  		if col.Count > 0 {
   656  			return true
   657  		}
   658  	}
   659  	for _, idx := range tbl.Indices {
   660  		if idx.Histogram.Len() > 0 {
   661  			return true
   662  		}
   663  	}
   664  	return false
   665  }
   666  
   667  // NeedAnalyzeTable checks if we need to analyze the causet:
   668  // 1. If the causet has never been analyzed, we need to analyze it when it has
   669  //    not been modified for a while.
   670  // 2. If the causet had been analyzed before, we need to analyze it when
   671  //    "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio" and the current time is
   672  //    between `start` and `end`.
   673  func NeedAnalyzeTable(tbl *statistics.Block, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) (bool, string) {
   674  	analyzed := TableAnalyzed(tbl)
   675  	if !analyzed {
   676  		t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
   677  		dur := time.Since(t)
   678  		return dur >= limit, fmt.Sprintf("causet unanalyzed, time since last uFIDelated %vs", dur)
   679  	}
   680  	// Auto analyze is disabled.
   681  	if autoAnalyzeRatio == 0 {
   682  		return false, ""
   683  	}
   684  	// No need to analyze it.
   685  	if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio {
   686  		return false, ""
   687  	}
   688  	// Tests if current time is within the time period.
   689  	return timeutil.WithinDayTimePeriod(start, end, now), fmt.Sprintf("too many modifications(%v/%v>%v)", tbl.ModifyCount, tbl.Count, autoAnalyzeRatio)
   690  }
   691  
   692  func (h *Handle) getAutoAnalyzeParameters() map[string]string {
   693  	allegrosql := fmt.Sprintf("select variable_name, variable_value from allegrosql.global_variables where variable_name in ('%s', '%s', '%s')",
   694  		variable.MilevaDBAutoAnalyzeRatio, variable.MilevaDBAutoAnalyzeStartTime, variable.MilevaDBAutoAnalyzeEndTime)
   695  	rows, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql)
   696  	if err != nil {
   697  		return map[string]string{}
   698  	}
   699  	parameters := make(map[string]string, len(rows))
   700  	for _, event := range rows {
   701  		parameters[event.GetString(0)] = event.GetString(1)
   702  	}
   703  	return parameters
   704  }
   705  
   706  func parseAutoAnalyzeRatio(ratio string) float64 {
   707  	autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64)
   708  	if err != nil {
   709  		return variable.DefAutoAnalyzeRatio
   710  	}
   711  	return math.Max(autoAnalyzeRatio, 0)
   712  }
   713  
   714  func parseAnalyzePeriod(start, end string) (time.Time, time.Time, error) {
   715  	if start == "" {
   716  		start = variable.DefAutoAnalyzeStartTime
   717  	}
   718  	if end == "" {
   719  		end = variable.DefAutoAnalyzeEndTime
   720  	}
   721  	s, err := time.ParseInLocation(variable.FullDayTimeFormat, start, time.UTC)
   722  	if err != nil {
   723  		return s, s, errors.Trace(err)
   724  	}
   725  	e, err := time.ParseInLocation(variable.FullDayTimeFormat, end, time.UTC)
   726  	return s, e, err
   727  }
   728  
   729  // HandleAutoAnalyze analyzes the newly created causet or index.
   730  func (h *Handle) HandleAutoAnalyze(is schemareplicant.SchemaReplicant) {
   731  	dbs := is.AllSchemaNames()
   732  	parameters := h.getAutoAnalyzeParameters()
   733  	autoAnalyzeRatio := parseAutoAnalyzeRatio(parameters[variable.MilevaDBAutoAnalyzeRatio])
   734  	start, end, err := parseAnalyzePeriod(parameters[variable.MilevaDBAutoAnalyzeStartTime], parameters[variable.MilevaDBAutoAnalyzeEndTime])
   735  	if err != nil {
   736  		logutil.BgLogger().Error("[stats] parse auto analyze period failed", zap.Error(err))
   737  		return
   738  	}
   739  	for _, EDB := range dbs {
   740  		tbls := is.SchemaTables(perceptron.NewCIStr(EDB))
   741  		for _, tbl := range tbls {
   742  			tblInfo := tbl.Meta()
   743  			pi := tblInfo.GetPartitionInfo()
   744  			if pi == nil {
   745  				statsTbl := h.GetTableStats(tblInfo)
   746  				allegrosql := "analyze causet `" + EDB + "`.`" + tblInfo.Name.O + "`"
   747  				analyzed := h.autoAnalyzeTable(tblInfo, statsTbl, start, end, autoAnalyzeRatio, allegrosql)
   748  				if analyzed {
   749  					return
   750  				}
   751  				continue
   752  			}
   753  			for _, def := range pi.Definitions {
   754  				allegrosql := "analyze causet `" + EDB + "`.`" + tblInfo.Name.O + "`" + " partition `" + def.Name.O + "`"
   755  				statsTbl := h.GetPartitionStats(tblInfo, def.ID)
   756  				analyzed := h.autoAnalyzeTable(tblInfo, statsTbl, start, end, autoAnalyzeRatio, allegrosql)
   757  				if analyzed {
   758  					return
   759  				}
   760  				continue
   761  			}
   762  		}
   763  	}
   764  }
   765  
   766  func (h *Handle) autoAnalyzeTable(tblInfo *perceptron.TableInfo, statsTbl *statistics.Block, start, end time.Time, ratio float64, allegrosql string) bool {
   767  	if statsTbl.Pseudo || statsTbl.Count < AutoAnalyzeMinCnt {
   768  		return false
   769  	}
   770  	if needAnalyze, reason := NeedAnalyzeTable(statsTbl, 20*h.Lease(), ratio, start, end, time.Now()); needAnalyze {
   771  		logutil.BgLogger().Info("[stats] auto analyze triggered", zap.String("allegrosql", allegrosql), zap.String("reason", reason))
   772  		h.execAutoAnalyze(allegrosql)
   773  		return true
   774  	}
   775  	for _, idx := range tblInfo.Indices {
   776  		if _, ok := statsTbl.Indices[idx.ID]; !ok && idx.State == perceptron.StatePublic {
   777  			allegrosql = fmt.Sprintf("%s index `%s`", allegrosql, idx.Name.O)
   778  			logutil.BgLogger().Info("[stats] auto analyze for unanalyzed", zap.String("allegrosql", allegrosql))
   779  			h.execAutoAnalyze(allegrosql)
   780  			return true
   781  		}
   782  	}
   783  	return false
   784  }
   785  
   786  func (h *Handle) execAutoAnalyze(allegrosql string) {
   787  	startTime := time.Now()
   788  	_, _, err := h.restrictedInterDirc.InterDircRestrictedALLEGROSQL(allegrosql)
   789  	dur := time.Since(startTime)
   790  	metrics.AutoAnalyzeHistogram.Observe(dur.Seconds())
   791  	if err != nil {
   792  		logutil.BgLogger().Error("[stats] auto analyze failed", zap.String("allegrosql", allegrosql), zap.Duration("cost_time", dur), zap.Error(err))
   793  		metrics.AutoAnalyzeCounter.WithLabelValues("failed").Inc()
   794  	} else {
   795  		metrics.AutoAnalyzeCounter.WithLabelValues("succ").Inc()
   796  	}
   797  }
   798  
   799  // formatBuckets formats bucket from lowBkt to highBkt.
   800  func formatBuckets(hg *statistics.Histogram, lowBkt, highBkt, idxDefCauss int) string {
   801  	if lowBkt == highBkt {
   802  		return hg.BucketToString(lowBkt, idxDefCauss)
   803  	}
   804  	if lowBkt+1 == highBkt {
   805  		return fmt.Sprintf("%s, %s", hg.BucketToString(lowBkt, idxDefCauss), hg.BucketToString(highBkt, idxDefCauss))
   806  	}
   807  	// do not care the midbse buckets
   808  	return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.BucketToString(lowBkt, idxDefCauss),
   809  		highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.BucketToString(highBkt, idxDefCauss))
   810  }
   811  
   812  func colRangeToStr(c *statistics.DeferredCauset, ran *ranger.Range, actual int64, factor float64) string {
   813  	lowCount, lowBkt := c.LessRowCountWithBktIdx(ran.LowVal[0])
   814  	highCount, highBkt := c.LessRowCountWithBktIdx(ran.HighVal[0])
   815  	return fmt.Sprintf("range: %s, actual: %d, expected: %d, buckets: {%s}", ran.String(), actual,
   816  		int64((highCount-lowCount)*factor), formatBuckets(&c.Histogram, lowBkt, highBkt, 0))
   817  }
   818  
   819  func logForIndexRange(idx *statistics.Index, ran *ranger.Range, actual int64, factor float64) string {
   820  	sc := &stmtctx.StatementContext{TimeZone: time.UTC}
   821  	lb, err := codec.EncodeKey(sc, nil, ran.LowVal...)
   822  	if err != nil {
   823  		return ""
   824  	}
   825  	rb, err := codec.EncodeKey(sc, nil, ran.HighVal...)
   826  	if err != nil {
   827  		return ""
   828  	}
   829  	if idx.CMSketch != nil && bytes.Compare(ekv.Key(lb).PrefixNext(), rb) >= 0 {
   830  		str, err := types.CausetsToString(ran.LowVal, true)
   831  		if err != nil {
   832  			return ""
   833  		}
   834  		return fmt.Sprintf("value: %s, actual: %d, expected: %d", str, actual, int64(float64(idx.QueryBytes(lb))*factor))
   835  	}
   836  	l, r := types.NewBytesCauset(lb), types.NewBytesCauset(rb)
   837  	lowCount, lowBkt := idx.LessRowCountWithBktIdx(l)
   838  	highCount, highBkt := idx.LessRowCountWithBktIdx(r)
   839  	return fmt.Sprintf("range: %s, actual: %d, expected: %d, histogram: {%s}", ran.String(), actual,
   840  		int64((highCount-lowCount)*factor), formatBuckets(&idx.Histogram, lowBkt, highBkt, len(idx.Info.DeferredCausets)))
   841  }
   842  
   843  func logForIndex(prefix string, t *statistics.Block, idx *statistics.Index, ranges []*ranger.Range, actual []int64, factor float64) {
   844  	sc := &stmtctx.StatementContext{TimeZone: time.UTC}
   845  	if idx.CMSketch == nil || idx.StatsVer != statistics.Version1 {
   846  		for i, ran := range ranges {
   847  			logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.String("rangeStr", logForIndexRange(idx, ran, actual[i], factor)))
   848  		}
   849  		return
   850  	}
   851  	for i, ran := range ranges {
   852  		rangePosition := statistics.GetOrdinalOfRangeCond(sc, ran)
   853  		// only contains range or equality query
   854  		if rangePosition == 0 || rangePosition == len(ran.LowVal) {
   855  			logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.String("rangeStr", logForIndexRange(idx, ran, actual[i], factor)))
   856  			continue
   857  		}
   858  		equalityString, err := types.CausetsToString(ran.LowVal[:rangePosition], true)
   859  		if err != nil {
   860  			continue
   861  		}
   862  		bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition]...)
   863  		if err != nil {
   864  			continue
   865  		}
   866  		equalityCount := idx.CMSketch.QueryBytes(bytes)
   867  		rang := ranger.Range{
   868  			LowVal:  []types.Causet{ran.LowVal[rangePosition]},
   869  			HighVal: []types.Causet{ran.HighVal[rangePosition]},
   870  		}
   871  		colName := idx.Info.DeferredCausets[rangePosition].Name.L
   872  		// prefer index stats over column stats
   873  		if idxHist := t.IndexStartWithDeferredCauset(colName); idxHist != nil && idxHist.Histogram.Len() > 0 {
   874  			rangeString := logForIndexRange(idxHist, &rang, -1, factor)
   875  			logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.Int64("actual", actual[i]),
   876  				zap.String("equality", equalityString), zap.Uint64("expected equality", equalityCount),
   877  				zap.String("range", rangeString))
   878  		} else if colHist := t.DeferredCausetByName(colName); colHist != nil && colHist.Histogram.Len() > 0 {
   879  			err = convertRangeType(&rang, colHist.Tp, time.UTC)
   880  			if err == nil {
   881  				rangeString := colRangeToStr(colHist, &rang, -1, factor)
   882  				logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.Int64("actual", actual[i]),
   883  					zap.String("equality", equalityString), zap.Uint64("expected equality", equalityCount),
   884  					zap.String("range", rangeString))
   885  			}
   886  		} else {
   887  			count, err := statistics.GetPseudoRowCountByDeferredCausetRanges(sc, float64(t.Count), []*ranger.Range{&rang}, 0)
   888  			if err == nil {
   889  				logutil.BgLogger().Debug(prefix, zap.String("index", idx.Info.Name.O), zap.Int64("actual", actual[i]),
   890  					zap.String("equality", equalityString), zap.Uint64("expected equality", equalityCount),
   891  					zap.Stringer("range", &rang), zap.Float64("pseudo count", math.Round(count)))
   892  			}
   893  		}
   894  	}
   895  }
   896  
   897  func (h *Handle) logDetailedInfo(q *statistics.QueryFeedback) {
   898  	t, ok := h.statsCache.Load().(statsCache).blocks[q.PhysicalID]
   899  	if !ok {
   900  		return
   901  	}
   902  	isIndex := q.Hist.IsIndexHist()
   903  	ranges, err := q.DecodeToRanges(isIndex)
   904  	if err != nil {
   905  		logutil.BgLogger().Debug("decode to ranges failed", zap.Error(err))
   906  		return
   907  	}
   908  	actual := make([]int64, 0, len(q.Feedback))
   909  	for _, fb := range q.Feedback {
   910  		actual = append(actual, fb.Count)
   911  	}
   912  	logPrefix := fmt.Sprintf("[stats-feedback] %s", t.Name)
   913  	if isIndex {
   914  		idx := t.Indices[q.Hist.ID]
   915  		if idx == nil || idx.Histogram.Len() == 0 {
   916  			return
   917  		}
   918  		logForIndex(logPrefix, t, idx, ranges, actual, idx.GetIncreaseFactor(t.Count))
   919  	} else {
   920  		c := t.DeferredCausets[q.Hist.ID]
   921  		if c == nil || c.Histogram.Len() == 0 {
   922  			return
   923  		}
   924  		logForPK(logPrefix, c, ranges, actual, c.GetIncreaseFactor(t.Count))
   925  	}
   926  }
   927  
   928  func logForPK(prefix string, c *statistics.DeferredCauset, ranges []*ranger.Range, actual []int64, factor float64) {
   929  	for i, ran := range ranges {
   930  		if ran.LowVal[0].GetInt64()+1 >= ran.HighVal[0].GetInt64() {
   931  			continue
   932  		}
   933  		logutil.BgLogger().Debug(prefix, zap.String("column", c.Info.Name.O), zap.String("rangeStr", colRangeToStr(c, ran, actual[i], factor)))
   934  	}
   935  }
   936  
   937  // RecalculateExpectCount recalculates the expect event count if the origin event count is estimated by pseudo.
   938  func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error {
   939  	t, ok := h.statsCache.Load().(statsCache).blocks[q.PhysicalID]
   940  	if !ok {
   941  		return nil
   942  	}
   943  	blockPseudo := t.Pseudo || t.IsOutdated()
   944  	if !blockPseudo {
   945  		return nil
   946  	}
   947  	isIndex := q.Hist.Tp.Tp == allegrosql.TypeBlob
   948  	id := q.Hist.ID
   949  	if isIndex && (t.Indices[id] == nil || !t.Indices[id].NotAccurate()) {
   950  		return nil
   951  	}
   952  	if !isIndex && (t.DeferredCausets[id] == nil || !t.DeferredCausets[id].NotAccurate()) {
   953  		return nil
   954  	}
   955  
   956  	sc := &stmtctx.StatementContext{TimeZone: time.UTC}
   957  	ranges, err := q.DecodeToRanges(isIndex)
   958  	if err != nil {
   959  		return errors.Trace(err)
   960  	}
   961  	expected := 0.0
   962  	if isIndex {
   963  		idx := t.Indices[id]
   964  		expected, err = idx.GetRowCount(sc, ranges, t.ModifyCount)
   965  		expected *= idx.GetIncreaseFactor(t.Count)
   966  	} else {
   967  		c := t.DeferredCausets[id]
   968  		expected, err = c.GetDeferredCausetRowCount(sc, ranges, t.ModifyCount, true)
   969  		expected *= c.GetIncreaseFactor(t.Count)
   970  	}
   971  	q.Expected = int64(expected)
   972  	return err
   973  }
   974  
   975  func (h *Handle) dumpRangeFeedback(sc *stmtctx.StatementContext, ran *ranger.Range, rangeCount float64, q *statistics.QueryFeedback) error {
   976  	lowIsNull := ran.LowVal[0].IsNull()
   977  	if q.Tp == statistics.IndexType {
   978  		lower, err := codec.EncodeKey(sc, nil, ran.LowVal[0])
   979  		if err != nil {
   980  			return errors.Trace(err)
   981  		}
   982  		upper, err := codec.EncodeKey(sc, nil, ran.HighVal[0])
   983  		if err != nil {
   984  			return errors.Trace(err)
   985  		}
   986  		ran.LowVal[0].SetBytes(lower)
   987  		ran.HighVal[0].SetBytes(upper)
   988  	} else {
   989  		if !statistics.SupportDeferredCausetType(q.Hist.Tp) {
   990  			return nil
   991  		}
   992  		if ran.LowVal[0].HoTT() == types.HoTTMinNotNull {
   993  			ran.LowVal[0] = types.GetMinValue(q.Hist.Tp)
   994  		}
   995  		if ran.HighVal[0].HoTT() == types.HoTTMaxValue {
   996  			ran.HighVal[0] = types.GetMaxValue(q.Hist.Tp)
   997  		}
   998  	}
   999  	ranges, ok := q.Hist.SplitRange(sc, []*ranger.Range{ran}, q.Tp == statistics.IndexType)
  1000  	if !ok {
  1001  		logutil.BgLogger().Debug("type of histogram and ranges mismatch")
  1002  		return nil
  1003  	}
  1004  	counts := make([]float64, 0, len(ranges))
  1005  	sum := 0.0
  1006  	for i, r := range ranges {
  1007  		// Though after `SplitRange`, we may have ranges like `[l, r]`, we still use
  1008  		// `betweenRowCount` to compute the estimation since the ranges of feedback are all in `[l, r)`
  1009  		// form, that is to say, we ignore the exclusiveness of ranges from `SplitRange` and just use
  1010  		// its result of boundary values.
  1011  		count := q.Hist.BetweenRowCount(r.LowVal[0], r.HighVal[0])
  1012  		// We have to include `NullCount` of histogram for [l, r) cases where l is null because `betweenRowCount`
  1013  		// does not include null values of lower bound.
  1014  		if i == 0 && lowIsNull {
  1015  			count += float64(q.Hist.NullCount)
  1016  		}
  1017  		sum += count
  1018  		counts = append(counts, count)
  1019  	}
  1020  	if sum <= 1 {
  1021  		return nil
  1022  	}
  1023  	// We assume that each part contributes the same error rate.
  1024  	adjustFactor := rangeCount / sum
  1025  	for i, r := range ranges {
  1026  		q.Feedback = append(q.Feedback, statistics.Feedback{Lower: &r.LowVal[0], Upper: &r.HighVal[0], Count: int64(counts[i] * adjustFactor)})
  1027  	}
  1028  	return errors.Trace(h.DumpFeedbackToKV(q))
  1029  }
  1030  
  1031  func convertRangeType(ran *ranger.Range, ft *types.FieldType, loc *time.Location) error {
  1032  	err := statistics.ConvertCausetsType(ran.LowVal, ft, loc)
  1033  	if err != nil {
  1034  		return err
  1035  	}
  1036  	return statistics.ConvertCausetsType(ran.HighVal, ft, loc)
  1037  }
  1038  
  1039  // DumpFeedbackForIndex dumps the feedback for index.
  1040  // For queries that contains both equality and range query, we will split them and UFIDelate accordingly.
  1041  func (h *Handle) DumpFeedbackForIndex(q *statistics.QueryFeedback, t *statistics.Block) error {
  1042  	idx, ok := t.Indices[q.Hist.ID]
  1043  	if !ok {
  1044  		return nil
  1045  	}
  1046  	sc := &stmtctx.StatementContext{TimeZone: time.UTC}
  1047  	if idx.CMSketch == nil || idx.StatsVer != statistics.Version1 {
  1048  		return h.DumpFeedbackToKV(q)
  1049  	}
  1050  	ranges, err := q.DecodeToRanges(true)
  1051  	if err != nil {
  1052  		logutil.BgLogger().Debug("decode feedback ranges fail", zap.Error(err))
  1053  		return nil
  1054  	}
  1055  	for i, ran := range ranges {
  1056  		rangePosition := statistics.GetOrdinalOfRangeCond(sc, ran)
  1057  		// only contains range or equality query
  1058  		if rangePosition == 0 || rangePosition == len(ran.LowVal) {
  1059  			continue
  1060  		}
  1061  
  1062  		bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition]...)
  1063  		if err != nil {
  1064  			logutil.BgLogger().Debug("encode keys fail", zap.Error(err))
  1065  			continue
  1066  		}
  1067  		equalityCount := float64(idx.CMSketch.QueryBytes(bytes)) * idx.GetIncreaseFactor(t.Count)
  1068  		rang := &ranger.Range{
  1069  			LowVal:  []types.Causet{ran.LowVal[rangePosition]},
  1070  			HighVal: []types.Causet{ran.HighVal[rangePosition]},
  1071  		}
  1072  		colName := idx.Info.DeferredCausets[rangePosition].Name.L
  1073  		var rangeCount float64
  1074  		rangeFB := &statistics.QueryFeedback{PhysicalID: q.PhysicalID}
  1075  		// prefer index stats over column stats
  1076  		if idx := t.IndexStartWithDeferredCauset(colName); idx != nil && idx.Histogram.Len() != 0 {
  1077  			rangeCount, err = t.GetRowCountByIndexRanges(sc, idx.ID, []*ranger.Range{rang})
  1078  			rangeFB.Tp, rangeFB.Hist = statistics.IndexType, &idx.Histogram
  1079  		} else if col := t.DeferredCausetByName(colName); col != nil && col.Histogram.Len() != 0 {
  1080  			err = convertRangeType(rang, col.Tp, time.UTC)
  1081  			if err == nil {
  1082  				rangeCount, err = t.GetRowCountByDeferredCausetRanges(sc, col.ID, []*ranger.Range{rang})
  1083  				rangeFB.Tp, rangeFB.Hist = statistics.DefCausType, &col.Histogram
  1084  			}
  1085  		} else {
  1086  			continue
  1087  		}
  1088  		if err != nil {
  1089  			logutil.BgLogger().Debug("get event count by ranges fail", zap.Error(err))
  1090  			continue
  1091  		}
  1092  
  1093  		equalityCount, rangeCount = getNewCountForIndex(equalityCount, rangeCount, float64(t.Count), float64(q.Feedback[i].Count))
  1094  		value := types.NewBytesCauset(bytes)
  1095  		q.Feedback[i] = statistics.Feedback{Lower: &value, Upper: &value, Count: int64(equalityCount)}
  1096  		err = h.dumpRangeFeedback(sc, rang, rangeCount, rangeFB)
  1097  		if err != nil {
  1098  			logutil.BgLogger().Debug("dump range feedback fail", zap.Error(err))
  1099  			continue
  1100  		}
  1101  	}
  1102  	return errors.Trace(h.DumpFeedbackToKV(q))
  1103  }
  1104  
  1105  // minAdjustFactor is the minimum adjust factor of each index feedback.
  1106  // We use it to avoid adjusting too much when the assumption of independence failed.
  1107  const minAdjustFactor = 0.7
  1108  
  1109  // getNewCountForIndex adjust the estimated `eqCount` and `rangeCount` according to the real count.
  1110  // We assumes that `eqCount` and `rangeCount` contribute the same error rate.
  1111  func getNewCountForIndex(eqCount, rangeCount, totalCount, realCount float64) (float64, float64) {
  1112  	estimate := (eqCount / totalCount) * (rangeCount / totalCount) * totalCount
  1113  	if estimate <= 1 {
  1114  		return eqCount, rangeCount
  1115  	}
  1116  	adjustFactor := math.Sqrt(realCount / estimate)
  1117  	adjustFactor = math.Max(adjustFactor, minAdjustFactor)
  1118  	return eqCount * adjustFactor, rangeCount * adjustFactor
  1119  }