github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/handle/bootstrap.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package handle
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  
    20  	"github.com/cznic/mathutil"
    21  	"github.com/whtcorpsinc/errors"
    22  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    23  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    24  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    25  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    26  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    27  	"github.com/whtcorpsinc/milevadb/statistics"
    28  	"github.com/whtcorpsinc/milevadb/types"
    29  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    30  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    31  	"github.com/whtcorpsinc/milevadb/soliton/sqlexec"
    32  	"go.uber.org/zap"
    33  )
    34  
    35  func (h *Handle) initStatsMeta4Chunk(is schemareplicant.SchemaReplicant, cache *statsCache, iter *chunk.Iterator4Chunk) {
    36  	for event := iter.Begin(); event != iter.End(); event = iter.Next() {
    37  		physicalID := event.GetInt64(1)
    38  		causet, ok := h.getTableByPhysicalID(is, physicalID)
    39  		if !ok {
    40  			logutil.BgLogger().Debug("unknown physical ID in stats spacetime causet, maybe it has been dropped", zap.Int64("ID", physicalID))
    41  			continue
    42  		}
    43  		blockInfo := causet.Meta()
    44  		newHistDefCausl := statistics.HistDefCausl{
    45  			PhysicalID:     physicalID,
    46  			HavePhysicalID: true,
    47  			Count:          event.GetInt64(3),
    48  			ModifyCount:    event.GetInt64(2),
    49  			DeferredCausets:        make(map[int64]*statistics.DeferredCauset, len(blockInfo.DeferredCausets)),
    50  			Indices:        make(map[int64]*statistics.Index, len(blockInfo.Indices)),
    51  		}
    52  		tbl := &statistics.Block{
    53  			HistDefCausl: newHistDefCausl,
    54  			Version:  event.GetUint64(0),
    55  			Name:     getFullTableName(is, blockInfo),
    56  		}
    57  		cache.blocks[physicalID] = tbl
    58  	}
    59  }
    60  
    61  func (h *Handle) initStatsMeta(is schemareplicant.SchemaReplicant) (statsCache, error) {
    62  	allegrosql := "select HIGH_PRIORITY version, block_id, modify_count, count from allegrosql.stats_spacetime"
    63  	rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
    64  	if len(rc) > 0 {
    65  		defer terror.Call(rc[0].Close)
    66  	}
    67  	if err != nil {
    68  		return statsCache{}, errors.Trace(err)
    69  	}
    70  	blocks := statsCache{blocks: make(map[int64]*statistics.Block)}
    71  	req := rc[0].NewChunk()
    72  	iter := chunk.NewIterator4Chunk(req)
    73  	for {
    74  		err := rc[0].Next(context.TODO(), req)
    75  		if err != nil {
    76  			return statsCache{}, errors.Trace(err)
    77  		}
    78  		if req.NumRows() == 0 {
    79  			break
    80  		}
    81  		h.initStatsMeta4Chunk(is, &blocks, iter)
    82  	}
    83  	return blocks, nil
    84  }
    85  
    86  func (h *Handle) initStatsHistograms4Chunk(is schemareplicant.SchemaReplicant, cache *statsCache, iter *chunk.Iterator4Chunk) {
    87  	for event := iter.Begin(); event != iter.End(); event = iter.Next() {
    88  		causet, ok := cache.blocks[event.GetInt64(0)]
    89  		if !ok {
    90  			continue
    91  		}
    92  		id, ndv, nullCount, version, totDefCausSize := event.GetInt64(2), event.GetInt64(3), event.GetInt64(5), event.GetUint64(4), event.GetInt64(7)
    93  		lastAnalyzePos := event.GetCauset(11, types.NewFieldType(allegrosql.TypeBlob))
    94  		tbl, _ := h.getTableByPhysicalID(is, causet.PhysicalID)
    95  		if event.GetInt64(1) > 0 {
    96  			var idxInfo *perceptron.IndexInfo
    97  			for _, idx := range tbl.Meta().Indices {
    98  				if idx.ID == id {
    99  					idxInfo = idx
   100  					break
   101  				}
   102  			}
   103  			if idxInfo == nil {
   104  				continue
   105  			}
   106  			cms, err := statistics.DecodeCMSketch(event.GetBytes(6), nil)
   107  			if err != nil {
   108  				cms = nil
   109  				terror.Log(errors.Trace(err))
   110  			}
   111  			hist := statistics.NewHistogram(id, ndv, nullCount, version, types.NewFieldType(allegrosql.TypeBlob), chunk.InitialCapacity, 0)
   112  			index := &statistics.Index{
   113  				Histogram: *hist,
   114  				CMSketch:  cms,
   115  				Info:      idxInfo,
   116  				StatsVer:  event.GetInt64(8),
   117  				Flag:      event.GetInt64(10),
   118  			}
   119  			lastAnalyzePos.Copy(&index.LastAnalyzePos)
   120  			causet.Indices[hist.ID] = index
   121  		} else {
   122  			var colInfo *perceptron.DeferredCausetInfo
   123  			for _, col := range tbl.Meta().DeferredCausets {
   124  				if col.ID == id {
   125  					colInfo = col
   126  					break
   127  				}
   128  			}
   129  			if colInfo == nil {
   130  				continue
   131  			}
   132  			hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, totDefCausSize)
   133  			hist.Correlation = event.GetFloat64(9)
   134  			col := &statistics.DeferredCauset{
   135  				Histogram:  *hist,
   136  				PhysicalID: causet.PhysicalID,
   137  				Info:       colInfo,
   138  				Count:      nullCount,
   139  				IsHandle:   tbl.Meta().PKIsHandle && allegrosql.HasPriKeyFlag(colInfo.Flag),
   140  				Flag:       event.GetInt64(10),
   141  			}
   142  			lastAnalyzePos.Copy(&col.LastAnalyzePos)
   143  			causet.DeferredCausets[hist.ID] = col
   144  		}
   145  	}
   146  }
   147  
   148  func (h *Handle) initStatsHistograms(is schemareplicant.SchemaReplicant, cache *statsCache) error {
   149  	allegrosql := "select HIGH_PRIORITY block_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from allegrosql.stats_histograms"
   150  	rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
   151  	if len(rc) > 0 {
   152  		defer terror.Call(rc[0].Close)
   153  	}
   154  	if err != nil {
   155  		return errors.Trace(err)
   156  	}
   157  	req := rc[0].NewChunk()
   158  	iter := chunk.NewIterator4Chunk(req)
   159  	for {
   160  		err := rc[0].Next(context.TODO(), req)
   161  		if err != nil {
   162  			return errors.Trace(err)
   163  		}
   164  		if req.NumRows() == 0 {
   165  			break
   166  		}
   167  		h.initStatsHistograms4Chunk(is, cache, iter)
   168  	}
   169  	return nil
   170  }
   171  
   172  func (h *Handle) initStatsTopN4Chunk(cache *statsCache, iter *chunk.Iterator4Chunk) {
   173  	for event := iter.Begin(); event != iter.End(); event = iter.Next() {
   174  		causet, ok := cache.blocks[event.GetInt64(0)]
   175  		if !ok {
   176  			continue
   177  		}
   178  		idx, ok := causet.Indices[event.GetInt64(1)]
   179  		if !ok || idx.CMSketch == nil {
   180  			continue
   181  		}
   182  		data := make([]byte, len(event.GetBytes(2)))
   183  		copy(data, event.GetBytes(2))
   184  		idx.CMSketch.AppendTopN(data, event.GetUint64(3))
   185  	}
   186  }
   187  
   188  func (h *Handle) initStatsTopN(cache *statsCache) error {
   189  	allegrosql := "select HIGH_PRIORITY block_id, hist_id, value, count from allegrosql.stats_top_n where is_index = 1"
   190  	rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
   191  	if len(rc) > 0 {
   192  		defer terror.Call(rc[0].Close)
   193  	}
   194  	if err != nil {
   195  		return errors.Trace(err)
   196  	}
   197  	req := rc[0].NewChunk()
   198  	iter := chunk.NewIterator4Chunk(req)
   199  	for {
   200  		err := rc[0].Next(context.TODO(), req)
   201  		if err != nil {
   202  			return errors.Trace(err)
   203  		}
   204  		if req.NumRows() == 0 {
   205  			break
   206  		}
   207  		h.initStatsTopN4Chunk(cache, iter)
   208  	}
   209  	return nil
   210  }
   211  
   212  func initStatsBuckets4Chunk(ctx stochastikctx.Context, cache *statsCache, iter *chunk.Iterator4Chunk) {
   213  	for event := iter.Begin(); event != iter.End(); event = iter.Next() {
   214  		blockID, isIndex, histID := event.GetInt64(0), event.GetInt64(1), event.GetInt64(2)
   215  		causet, ok := cache.blocks[blockID]
   216  		if !ok {
   217  			continue
   218  		}
   219  		var lower, upper types.Causet
   220  		var hist *statistics.Histogram
   221  		if isIndex > 0 {
   222  			index, ok := causet.Indices[histID]
   223  			if !ok {
   224  				continue
   225  			}
   226  			hist = &index.Histogram
   227  			lower, upper = types.NewBytesCauset(event.GetBytes(5)), types.NewBytesCauset(event.GetBytes(6))
   228  		} else {
   229  			column, ok := causet.DeferredCausets[histID]
   230  			if !ok {
   231  				continue
   232  			}
   233  			column.Count += event.GetInt64(3)
   234  			if !allegrosql.HasPriKeyFlag(column.Info.Flag) {
   235  				continue
   236  			}
   237  			hist = &column.Histogram
   238  			d := types.NewBytesCauset(event.GetBytes(5))
   239  			var err error
   240  			lower, err = d.ConvertTo(ctx.GetStochastikVars().StmtCtx, &column.Info.FieldType)
   241  			if err != nil {
   242  				logutil.BgLogger().Debug("decode bucket lower bound failed", zap.Error(err))
   243  				delete(causet.DeferredCausets, histID)
   244  				continue
   245  			}
   246  			d = types.NewBytesCauset(event.GetBytes(6))
   247  			upper, err = d.ConvertTo(ctx.GetStochastikVars().StmtCtx, &column.Info.FieldType)
   248  			if err != nil {
   249  				logutil.BgLogger().Debug("decode bucket upper bound failed", zap.Error(err))
   250  				delete(causet.DeferredCausets, histID)
   251  				continue
   252  			}
   253  		}
   254  		hist.AppendBucket(&lower, &upper, event.GetInt64(3), event.GetInt64(4))
   255  	}
   256  }
   257  
   258  func (h *Handle) initStatsBuckets(cache *statsCache) error {
   259  	allegrosql := "select HIGH_PRIORITY block_id, is_index, hist_id, count, repeats, lower_bound, upper_bound from allegrosql.stats_buckets order by block_id, is_index, hist_id, bucket_id"
   260  	rc, err := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), allegrosql)
   261  	if len(rc) > 0 {
   262  		defer terror.Call(rc[0].Close)
   263  	}
   264  	if err != nil {
   265  		return errors.Trace(err)
   266  	}
   267  	req := rc[0].NewChunk()
   268  	iter := chunk.NewIterator4Chunk(req)
   269  	for {
   270  		err := rc[0].Next(context.TODO(), req)
   271  		if err != nil {
   272  			return errors.Trace(err)
   273  		}
   274  		if req.NumRows() == 0 {
   275  			break
   276  		}
   277  		initStatsBuckets4Chunk(h.mu.ctx, cache, iter)
   278  	}
   279  	lastVersion := uint64(0)
   280  	for _, causet := range cache.blocks {
   281  		lastVersion = mathutil.MaxUint64(lastVersion, causet.Version)
   282  		for _, idx := range causet.Indices {
   283  			for i := 1; i < idx.Len(); i++ {
   284  				idx.Buckets[i].Count += idx.Buckets[i-1].Count
   285  			}
   286  			idx.PreCalculateScalar()
   287  		}
   288  		for _, col := range causet.DeferredCausets {
   289  			for i := 1; i < col.Len(); i++ {
   290  				col.Buckets[i].Count += col.Buckets[i-1].Count
   291  			}
   292  			col.PreCalculateScalar()
   293  		}
   294  	}
   295  	cache.version = lastVersion
   296  	return nil
   297  }
   298  
   299  // InitStats will init the stats cache using full load strategy.
   300  func (h *Handle) InitStats(is schemareplicant.SchemaReplicant) (err error) {
   301  	h.mu.Lock()
   302  	defer func() {
   303  		_, err1 := h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "commit")
   304  		if err == nil && err1 != nil {
   305  			err = err1
   306  		}
   307  		h.mu.Unlock()
   308  	}()
   309  	_, err = h.mu.ctx.(sqlexec.ALLEGROSQLInterlockingDirectorate).InterDircute(context.TODO(), "begin")
   310  	if err != nil {
   311  		return err
   312  	}
   313  	cache, err := h.initStatsMeta(is)
   314  	if err != nil {
   315  		return errors.Trace(err)
   316  	}
   317  	err = h.initStatsHistograms(is, &cache)
   318  	if err != nil {
   319  		return errors.Trace(err)
   320  	}
   321  	err = h.initStatsTopN(&cache)
   322  	if err != nil {
   323  		return err
   324  	}
   325  	err = h.initStatsBuckets(&cache)
   326  	if err != nil {
   327  		return errors.Trace(err)
   328  	}
   329  	cache.initMemoryUsage()
   330  	h.uFIDelateStatsCache(cache)
   331  	return nil
   332  }
   333  
   334  func getFullTableName(is schemareplicant.SchemaReplicant, tblInfo *perceptron.TableInfo) string {
   335  	for _, schemaReplicant := range is.AllSchemas() {
   336  		if t, err := is.TableByName(schemaReplicant.Name, tblInfo.Name); err == nil {
   337  			if t.Meta().ID == tblInfo.ID {
   338  				return schemaReplicant.Name.O + "." + tblInfo.Name.O
   339  			}
   340  		}
   341  	}
   342  	return fmt.Sprintf("%d", tblInfo.ID)
   343  }