github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/stats.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"math"
    22  	"sort"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    27  
    28  	"github.com/matrixorigin/matrixone/pkg/catalog"
    29  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    30  	"github.com/matrixorigin/matrixone/pkg/container/types"
    31  	"github.com/matrixorigin/matrixone/pkg/logutil"
    32  	"github.com/matrixorigin/matrixone/pkg/objectio"
    33  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    34  	pb "github.com/matrixorigin/matrixone/pkg/pb/statsinfo"
    35  	"github.com/matrixorigin/matrixone/pkg/sql/util"
    36  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    37  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/options"
    38  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    39  )
    40  
    41  const DefaultBlockMaxRows = 8192
    42  const BlockNumForceOneCN = 200
    43  const highNDVcolumnThreshHold = 0.95
    44  const statsCacheInitSize = 128
    45  const statsCacheMaxSize = 8192
    46  
    47  type StatsCache struct {
    48  	cache map[uint64]*pb.StatsInfo
    49  }
    50  
    51  func NewStatsCache() *StatsCache {
    52  	return &StatsCache{
    53  		cache: make(map[uint64]*pb.StatsInfo, statsCacheInitSize),
    54  	}
    55  }
    56  
    57  // GetStatsInfo returns the stats info and if the info in the cache needs to be updated.
    58  func (sc *StatsCache) GetStatsInfo(tableID uint64, create bool) *pb.StatsInfo {
    59  	if sc == nil {
    60  		return nil
    61  	}
    62  	if s, ok := sc.cache[tableID]; ok {
    63  		return s
    64  	}
    65  	if create {
    66  		if len(sc.cache) > statsCacheMaxSize {
    67  			sc.cache = make(map[uint64]*pb.StatsInfo, statsCacheInitSize)
    68  			logutil.Infof("statscache entries more than %v in long session, release memory and create new cachepool", statsCacheMaxSize)
    69  		}
    70  		s := NewStatsInfo()
    71  		sc.cache[tableID] = s
    72  		return s
    73  	} else {
    74  		return nil
    75  	}
    76  }
    77  
    78  // SetStatsInfo updates the stats info in the cache.
    79  func (sc *StatsCache) SetStatsInfo(tableID uint64, s *pb.StatsInfo) {
    80  	if sc == nil {
    81  		return
    82  	}
    83  	sc.cache[tableID] = s
    84  }
    85  
    86  func NewStatsInfo() *pb.StatsInfo {
    87  	return &pb.StatsInfo{
    88  		NdvMap:             make(map[string]float64),
    89  		MinValMap:          make(map[string]float64),
    90  		MaxValMap:          make(map[string]float64),
    91  		DataTypeMap:        make(map[string]uint64),
    92  		NullCntMap:         make(map[string]uint64),
    93  		SizeMap:            make(map[string]uint64),
    94  		ShuffleRangeMap:    make(map[string]*pb.ShuffleRange),
    95  		BlockNumber:        0,
    96  		ApproxObjectNumber: 0,
    97  		TableCnt:           0,
    98  	}
    99  }
   100  
   101  type InfoFromZoneMap struct {
   102  	ColumnZMs            []objectio.ZoneMap
   103  	DataTypes            []types.Type
   104  	ColumnNDVs           []float64
   105  	NullCnts             []int64
   106  	ShuffleRanges        []*pb.ShuffleRange
   107  	ColumnSize           []int64
   108  	BlockNumber          int64
   109  	AccurateObjectNumber int64
   110  	ApproxObjectNumber   int64
   111  	TableCnt             float64
   112  }
   113  
   114  func NewInfoFromZoneMap(lenCols int) *InfoFromZoneMap {
   115  	info := &InfoFromZoneMap{
   116  		ColumnZMs:     make([]objectio.ZoneMap, lenCols),
   117  		DataTypes:     make([]types.Type, lenCols),
   118  		ColumnNDVs:    make([]float64, lenCols),
   119  		NullCnts:      make([]int64, lenCols),
   120  		ColumnSize:    make([]int64, lenCols),
   121  		ShuffleRanges: make([]*pb.ShuffleRange, lenCols),
   122  	}
   123  	return info
   124  }
   125  
   126  func UpdateStatsInfo(info *InfoFromZoneMap, tableDef *plan.TableDef, s *pb.StatsInfo) {
   127  	start := time.Now()
   128  	defer func() {
   129  		v2.TxnStatementUpdateStatsInfoMapHistogram.Observe(time.Since(start).Seconds())
   130  	}()
   131  	s.ApproxObjectNumber = info.ApproxObjectNumber
   132  	s.AccurateObjectNumber = info.AccurateObjectNumber
   133  	s.BlockNumber = info.BlockNumber
   134  	s.TableCnt = info.TableCnt
   135  	s.TableName = tableDef.Name
   136  	//calc ndv with min,max,distinct value in zonemap, blocknumer and column type
   137  	//set info in statsInfo
   138  	for i, coldef := range tableDef.Cols[:len(tableDef.Cols)-1] {
   139  		colName := coldef.Name
   140  		s.NdvMap[colName] = info.ColumnNDVs[i]
   141  		s.DataTypeMap[colName] = uint64(info.DataTypes[i].Oid)
   142  		s.NullCntMap[colName] = uint64(info.NullCnts[i])
   143  		s.SizeMap[colName] = uint64(info.ColumnSize[i])
   144  
   145  		if !info.ColumnZMs[i].IsInited() {
   146  			s.MinValMap[colName] = 0
   147  			s.MaxValMap[colName] = 0
   148  			continue
   149  		}
   150  		switch info.DataTypes[i].Oid {
   151  		case types.T_bit:
   152  			s.MinValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMinBuf()))
   153  			s.MaxValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMaxBuf()))
   154  		case types.T_int8:
   155  			s.MinValMap[colName] = float64(types.DecodeInt8(info.ColumnZMs[i].GetMinBuf()))
   156  			s.MaxValMap[colName] = float64(types.DecodeInt8(info.ColumnZMs[i].GetMaxBuf()))
   157  		case types.T_int16:
   158  			s.MinValMap[colName] = float64(types.DecodeInt16(info.ColumnZMs[i].GetMinBuf()))
   159  			s.MaxValMap[colName] = float64(types.DecodeInt16(info.ColumnZMs[i].GetMaxBuf()))
   160  		case types.T_int32:
   161  			s.MinValMap[colName] = float64(types.DecodeInt32(info.ColumnZMs[i].GetMinBuf()))
   162  			s.MaxValMap[colName] = float64(types.DecodeInt32(info.ColumnZMs[i].GetMaxBuf()))
   163  		case types.T_int64:
   164  			s.MinValMap[colName] = float64(types.DecodeInt64(info.ColumnZMs[i].GetMinBuf()))
   165  			s.MaxValMap[colName] = float64(types.DecodeInt64(info.ColumnZMs[i].GetMaxBuf()))
   166  		case types.T_uint8:
   167  			s.MinValMap[colName] = float64(types.DecodeUint8(info.ColumnZMs[i].GetMinBuf()))
   168  			s.MaxValMap[colName] = float64(types.DecodeUint8(info.ColumnZMs[i].GetMaxBuf()))
   169  		case types.T_uint16:
   170  			s.MinValMap[colName] = float64(types.DecodeUint16(info.ColumnZMs[i].GetMinBuf()))
   171  			s.MaxValMap[colName] = float64(types.DecodeUint16(info.ColumnZMs[i].GetMaxBuf()))
   172  		case types.T_uint32:
   173  			s.MinValMap[colName] = float64(types.DecodeUint32(info.ColumnZMs[i].GetMinBuf()))
   174  			s.MaxValMap[colName] = float64(types.DecodeUint32(info.ColumnZMs[i].GetMaxBuf()))
   175  		case types.T_uint64:
   176  			s.MinValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMinBuf()))
   177  			s.MaxValMap[colName] = float64(types.DecodeUint64(info.ColumnZMs[i].GetMaxBuf()))
   178  		case types.T_date:
   179  			s.MinValMap[colName] = float64(types.DecodeDate(info.ColumnZMs[i].GetMinBuf()))
   180  			s.MaxValMap[colName] = float64(types.DecodeDate(info.ColumnZMs[i].GetMaxBuf()))
   181  		case types.T_time:
   182  			s.MinValMap[colName] = float64(types.DecodeTime(info.ColumnZMs[i].GetMinBuf()))
   183  			s.MaxValMap[colName] = float64(types.DecodeTime(info.ColumnZMs[i].GetMaxBuf()))
   184  		case types.T_timestamp:
   185  			s.MinValMap[colName] = float64(types.DecodeTimestamp(info.ColumnZMs[i].GetMinBuf()))
   186  			s.MaxValMap[colName] = float64(types.DecodeTimestamp(info.ColumnZMs[i].GetMaxBuf()))
   187  		case types.T_datetime:
   188  			s.MinValMap[colName] = float64(types.DecodeDatetime(info.ColumnZMs[i].GetMinBuf()))
   189  			s.MaxValMap[colName] = float64(types.DecodeDatetime(info.ColumnZMs[i].GetMaxBuf()))
   190  		case types.T_char, types.T_varchar, types.T_text:
   191  			s.MinValMap[colName] = float64(ByteSliceToUint64(info.ColumnZMs[i].GetMinBuf()))
   192  			s.MaxValMap[colName] = float64(ByteSliceToUint64(info.ColumnZMs[i].GetMaxBuf()))
   193  		}
   194  
   195  		if info.ShuffleRanges[i] != nil {
   196  			if s.MinValMap[colName] != s.MaxValMap[colName] &&
   197  				s.TableCnt > HashMapSizeForShuffle &&
   198  				info.ColumnNDVs[i] >= ShuffleThreshHoldOfNDV &&
   199  				!util.JudgeIsCompositeClusterByColumn(colName) &&
   200  				colName != catalog.CPrimaryKeyColName {
   201  				info.ShuffleRanges[i].Eval()
   202  				s.ShuffleRangeMap[colName] = info.ShuffleRanges[i]
   203  			}
   204  			info.ShuffleRanges[i] = nil
   205  		}
   206  	}
   207  }
   208  
   209  // cols in one table, return if ndv of  multi column is high enough
   210  func isHighNdvCols(cols []int32, tableDef *TableDef, builder *QueryBuilder) bool {
   211  	if tableDef == nil {
   212  		return false
   213  	}
   214  	// first to check if it is primary key.
   215  	if containsAllPKs(cols, tableDef) {
   216  		return true
   217  	}
   218  
   219  	s := builder.getStatsInfoByTableID(tableDef.TblId)
   220  	if s == nil {
   221  		return false
   222  	}
   223  	var totalNDV float64 = 1
   224  	for i := range cols {
   225  		totalNDV *= s.NdvMap[tableDef.Cols[cols[i]].Name]
   226  	}
   227  	return totalNDV > s.TableCnt*highNDVcolumnThreshHold
   228  }
   229  
   230  func (builder *QueryBuilder) getColNDVRatio(cols []int32, tableDef *TableDef) float64 {
   231  	if tableDef == nil {
   232  		return 0
   233  	}
   234  	// first to check if it is primary key.
   235  	if containsAllPKs(cols, tableDef) {
   236  		return 1
   237  	}
   238  
   239  	s := builder.getStatsInfoByTableID(tableDef.TblId)
   240  	if s == nil {
   241  		return 0
   242  	}
   243  	var totalNDV float64 = 1
   244  	for i := range cols {
   245  		totalNDV *= s.NdvMap[tableDef.Cols[cols[i]].Name]
   246  	}
   247  	result := totalNDV / s.TableCnt
   248  	if result > 1 {
   249  		result = 1
   250  	}
   251  	return result
   252  }
   253  
   254  func (builder *QueryBuilder) getStatsInfoByTableID(tableID uint64) *pb.StatsInfo {
   255  	if builder == nil {
   256  		return nil
   257  	}
   258  	sc := builder.compCtx.GetStatsCache()
   259  	if sc == nil {
   260  		return nil
   261  	}
   262  	return sc.GetStatsInfo(tableID, false)
   263  }
   264  
   265  func (builder *QueryBuilder) getStatsInfoByCol(col *plan.ColRef) *pb.StatsInfo {
   266  	if builder == nil {
   267  		return nil
   268  	}
   269  	sc := builder.compCtx.GetStatsCache()
   270  	if sc == nil {
   271  		return nil
   272  	}
   273  	tableDef, ok := builder.tag2Table[col.RelPos]
   274  	if !ok {
   275  		return nil
   276  	}
   277  	//fix column name
   278  	if len(col.Name) == 0 {
   279  		col.Name = tableDef.Cols[col.ColPos].Name
   280  	}
   281  	return sc.GetStatsInfo(tableDef.TblId, false)
   282  }
   283  
   284  func (builder *QueryBuilder) getColNdv(col *plan.ColRef) float64 {
   285  	s := builder.getStatsInfoByCol(col)
   286  	if s == nil {
   287  		return -1
   288  	}
   289  	return s.NdvMap[col.Name]
   290  }
   291  
   292  func getNullSelectivity(arg *plan.Expr, builder *QueryBuilder, isnull bool) float64 {
   293  	switch exprImpl := arg.Expr.(type) {
   294  	case *plan.Expr_Col:
   295  		col := exprImpl.Col
   296  		s := builder.getStatsInfoByCol(col)
   297  		if s == nil {
   298  			break
   299  		}
   300  		nullCnt := float64(s.NullCntMap[col.Name])
   301  		if isnull {
   302  			return nullCnt / s.TableCnt
   303  		} else {
   304  			return 1 - (nullCnt / s.TableCnt)
   305  		}
   306  	}
   307  
   308  	if isnull {
   309  		return 0.1
   310  	} else {
   311  		return 0.9
   312  	}
   313  }
   314  
   315  // this function is used to calculate the ndv of expressions,
   316  // like year(l_orderdate), substring(phone_number), and assume col is the first argument
   317  // if only the ndv of column is needed, please call getColNDV
   318  // if this function fail, it will return -1
   319  func getExprNdv(expr *plan.Expr, builder *QueryBuilder) float64 {
   320  	switch exprImpl := expr.Expr.(type) {
   321  	case *plan.Expr_F:
   322  		funcName := exprImpl.F.Func.ObjName
   323  		switch funcName {
   324  		case "year":
   325  			return getExprNdv(exprImpl.F.Args[0], builder) / 365
   326  		case "substring":
   327  			// no good way to calc ndv for substring
   328  			return math.Min(getExprNdv(exprImpl.F.Args[0], builder), 25)
   329  		default:
   330  			return getExprNdv(exprImpl.F.Args[0], builder)
   331  		}
   332  	case *plan.Expr_Col:
   333  		return builder.getColNdv(exprImpl.Col)
   334  	}
   335  	return -1
   336  }
   337  
   338  func estimateEqualitySelectivity(expr *plan.Expr, builder *QueryBuilder) float64 {
   339  	// only filter like func(col)=1 or col=? can estimate outcnt
   340  	// and only 1 colRef is allowd in the filter. otherwise, no good method to calculate
   341  	col := extractColRefInFilter(expr)
   342  	if col == nil {
   343  		return 0.01
   344  	}
   345  	ndv := getExprNdv(expr, builder)
   346  	if ndv > 0 {
   347  		return 1 / ndv
   348  	}
   349  	return 0.01
   350  }
   351  
   352  func calcSelectivityByMinMax(funcName string, min, max float64, typ types.T, vals []*plan.Literal) (ret float64) {
   353  	switch funcName {
   354  	case ">", ">=":
   355  		if val, ok := getFloat64Value(typ, vals[0]); ok {
   356  			ret = (max - val + 1) / (max - min)
   357  		}
   358  	case "<", "<=":
   359  		if val, ok := getFloat64Value(typ, vals[0]); ok {
   360  			ret = (val - min + 1) / (max - min)
   361  		}
   362  	case "between":
   363  		if lb, ok := getFloat64Value(typ, vals[0]); ok {
   364  			if ub, ok := getFloat64Value(typ, vals[1]); ok {
   365  				ret = (ub - lb + 1) / (max - min)
   366  			}
   367  		}
   368  	default:
   369  		ret = 0.3
   370  	}
   371  	if ret < 0 {
   372  		ret = 0
   373  	}
   374  	if ret > 1 {
   375  		ret = 1
   376  	}
   377  	return ret
   378  }
   379  
   380  func getFloat64Value(typ types.T, lit *plan.Literal) (float64, bool) {
   381  	switch typ {
   382  	case types.T_float32:
   383  		if val, valOk := lit.Value.(*plan.Literal_Fval); valOk {
   384  			return float64(val.Fval), true
   385  		}
   386  	case types.T_float64:
   387  		if val, valOk := lit.Value.(*plan.Literal_Dval); valOk {
   388  			return val.Dval, true
   389  		}
   390  	case types.T_int8:
   391  		if val, valOk := lit.Value.(*plan.Literal_I8Val); valOk {
   392  			return float64(val.I8Val), true
   393  		}
   394  	case types.T_int16:
   395  		if val, valOk := lit.Value.(*plan.Literal_I16Val); valOk {
   396  			return float64(val.I16Val), true
   397  		}
   398  	case types.T_int32:
   399  		if val, valOk := lit.Value.(*plan.Literal_I32Val); valOk {
   400  			return float64(val.I32Val), true
   401  		}
   402  	case types.T_int64:
   403  		if val, valOk := lit.Value.(*plan.Literal_I64Val); valOk {
   404  			return float64(val.I64Val), true
   405  		}
   406  	case types.T_uint8:
   407  		if val, valOk := lit.Value.(*plan.Literal_U8Val); valOk {
   408  			return float64(val.U8Val), true
   409  		}
   410  	case types.T_uint16:
   411  		if val, valOk := lit.Value.(*plan.Literal_U16Val); valOk {
   412  			return float64(val.U16Val), true
   413  		}
   414  	case types.T_uint32:
   415  		if val, valOk := lit.Value.(*plan.Literal_U32Val); valOk {
   416  			return float64(val.U32Val), true
   417  		}
   418  	case types.T_uint64:
   419  		if val, valOk := lit.Value.(*plan.Literal_U64Val); valOk {
   420  			return float64(val.U64Val), true
   421  		}
   422  	case types.T_date:
   423  		if val, valOk := lit.Value.(*plan.Literal_Dateval); valOk {
   424  			return float64(val.Dateval), true
   425  		}
   426  	case types.T_datetime:
   427  		if val, valOk := lit.Value.(*plan.Literal_Datetimeval); valOk {
   428  			return float64(val.Datetimeval), true
   429  		}
   430  	}
   431  
   432  	return 0, false
   433  }
   434  
   435  func estimateNonEqualitySelectivity(expr *plan.Expr, funcName string, builder *QueryBuilder) float64 {
   436  	// only filter like func(col)>1 , or (col=1) or (col=2) can estimate outcnt
   437  	// and only 1 colRef is allowd in the filter. otherwise, no good method to calculate
   438  	col := extractColRefInFilter(expr)
   439  	if col == nil {
   440  		return 0.1
   441  	}
   442  	s := builder.getStatsInfoByCol(col)
   443  	if s == nil {
   444  		return 0.1
   445  	}
   446  	//check strict filter, otherwise can not estimate outcnt by min/max val
   447  	col, litType, literals, colFnName := extractColRefAndLiteralsInFilter(expr)
   448  	if col != nil && len(literals) > 0 {
   449  		typ := types.T(s.DataTypeMap[col.Name])
   450  		if !(typ.IsInteger() || typ.IsDateRelate()) {
   451  			return 0.1
   452  		}
   453  
   454  		switch colFnName {
   455  		case "":
   456  			return calcSelectivityByMinMax(funcName, s.MinValMap[col.Name], s.MaxValMap[col.Name], typ, literals)
   457  		case "year":
   458  			switch typ {
   459  			case types.T_date:
   460  				minVal := types.Date(s.MinValMap[col.Name])
   461  				maxVal := types.Date(s.MaxValMap[col.Name])
   462  				return calcSelectivityByMinMax(funcName, float64(minVal.Year()), float64(maxVal.Year()), litType, literals)
   463  			case types.T_datetime:
   464  				// TODO
   465  			}
   466  		}
   467  	}
   468  
   469  	return 0.1
   470  }
   471  
   472  func estimateExprSelectivity(expr *plan.Expr, builder *QueryBuilder) float64 {
   473  	if expr == nil {
   474  		return 1
   475  	}
   476  
   477  	switch exprImpl := expr.Expr.(type) {
   478  	case *plan.Expr_F:
   479  		funcName := exprImpl.F.Func.ObjName
   480  		switch funcName {
   481  		case "=":
   482  			return estimateEqualitySelectivity(expr, builder)
   483  		case "!=", "<>":
   484  			return 0.9
   485  		case ">", "<", ">=", "<=", "between":
   486  			return estimateNonEqualitySelectivity(expr, funcName, builder)
   487  		case "and":
   488  			sel1 := estimateExprSelectivity(exprImpl.F.Args[0], builder)
   489  			sel2 := estimateExprSelectivity(exprImpl.F.Args[1], builder)
   490  			if canMergeToBetweenAnd(exprImpl.F.Args[0], exprImpl.F.Args[1]) && (sel1+sel2) > 1 {
   491  				return sel1 + sel2 - 1
   492  			} else {
   493  				return andSelectivity(sel1, sel2)
   494  			}
   495  		case "or":
   496  			sel1 := estimateExprSelectivity(exprImpl.F.Args[0], builder)
   497  			sel2 := estimateExprSelectivity(exprImpl.F.Args[1], builder)
   498  			return orSelectivity(sel1, sel2)
   499  		case "not":
   500  			return 1 - estimateExprSelectivity(exprImpl.F.Args[0], builder)
   501  		case "like":
   502  			return 0.2
   503  		case "prefix_eq":
   504  			ndv := getExprNdv(expr, builder)
   505  			if ndv > 10 {
   506  				return 10 / ndv
   507  			}
   508  			return 0.5
   509  		case "in":
   510  			card := float64(exprImpl.F.Args[1].GetVec().Len)
   511  			ndv := getExprNdv(expr, builder)
   512  			if ndv > card {
   513  				return card / ndv
   514  			}
   515  			return 1
   516  		case "prefix_in":
   517  			card := float64(exprImpl.F.Args[1].GetVec().Len)
   518  			ndv := getExprNdv(expr, builder)
   519  			if ndv > 10*card {
   520  				return 10 * card / ndv
   521  			}
   522  			return 0.5
   523  		case "prefix_between":
   524  			return 0.1
   525  		case "isnull", "is_null":
   526  			return getNullSelectivity(exprImpl.F.Args[0], builder, true)
   527  		case "isnotnull", "is_not_null":
   528  			return getNullSelectivity(exprImpl.F.Args[0], builder, false)
   529  		default:
   530  			return 0.15
   531  		}
   532  	case *plan.Expr_Lit:
   533  		return 1
   534  	}
   535  	return 1
   536  }
   537  
   538  func estimateFilterWeight(expr *plan.Expr, w float64) float64 {
   539  	switch expr.Typ.Id {
   540  	case int32(types.T_decimal64):
   541  		w += 64
   542  	case int32(types.T_decimal128):
   543  		w += 128
   544  	case int32(types.T_float32), int32(types.T_float64):
   545  		w += 8
   546  	case int32(types.T_char), int32(types.T_varchar), int32(types.T_text), int32(types.T_json):
   547  		w += 4
   548  	}
   549  	switch exprImpl := expr.Expr.(type) {
   550  	case *plan.Expr_F:
   551  		funcImpl := exprImpl.F
   552  		switch funcImpl.Func.GetObjName() {
   553  		case "like":
   554  			w += 10
   555  		case "cast":
   556  			w += 3
   557  		case "in":
   558  			w += 2
   559  		case "<>", "!=":
   560  			w += 1.2
   561  		case "<", "<=":
   562  			w += 1.1
   563  		default:
   564  			w += 1
   565  		}
   566  		for _, child := range exprImpl.F.Args {
   567  			w += estimateFilterWeight(child, 0)
   568  		}
   569  	}
   570  	return w
   571  }
   572  
   573  // harsh estimate of block selectivity, will improve it in the future
   574  func estimateFilterBlockSelectivity(ctx context.Context, expr *plan.Expr, tableDef *plan.TableDef, s *pb.StatsInfo) float64 {
   575  	if !ExprIsZonemappable(ctx, expr) {
   576  		return 1
   577  	}
   578  	col := extractColRefInFilter(expr)
   579  	if col != nil {
   580  		blocksel := calcBlockSelectivityUsingShuffleRange(s.ShuffleRangeMap[col.Name], expr.Selectivity)
   581  		switch GetSortOrder(tableDef, col.ColPos) {
   582  		case 0:
   583  			blocksel = math.Min(blocksel, 0.2)
   584  		case 1:
   585  			return math.Min(blocksel, 0.5)
   586  		case 2:
   587  			return math.Min(blocksel, 0.7)
   588  		}
   589  		return blocksel
   590  	}
   591  	return 1
   592  }
   593  
   594  func rewriteFilterListByStats(ctx context.Context, nodeID int32, builder *QueryBuilder) {
   595  	node := builder.qry.Nodes[nodeID]
   596  	if len(node.Children) > 0 {
   597  		for _, child := range node.Children {
   598  			rewriteFilterListByStats(ctx, child, builder)
   599  		}
   600  	}
   601  	switch node.NodeType {
   602  	case plan.Node_TABLE_SCAN:
   603  		if node.ObjRef != nil && len(node.FilterList) >= 1 {
   604  			sort.Slice(node.FilterList, func(i, j int) bool {
   605  				cost1 := estimateFilterWeight(node.FilterList[i], 0) * estimateExprSelectivity(node.FilterList[i], builder)
   606  				cost2 := estimateFilterWeight(node.FilterList[j], 0) * estimateExprSelectivity(node.FilterList[j], builder)
   607  				return cost1 <= cost2
   608  			})
   609  			sort.Slice(node.BlockFilterList, func(i, j int) bool {
   610  				blockSel1 := node.BlockFilterList[i].Selectivity
   611  				blockSel2 := node.BlockFilterList[j].Selectivity
   612  				return blockSel1 <= blockSel2
   613  			})
   614  		}
   615  	}
   616  }
   617  
   618  func ReCalcNodeStats(nodeID int32, builder *QueryBuilder, recursive bool, leafNode bool, needResetHashMapStats bool) {
   619  	node := builder.qry.Nodes[nodeID]
   620  	if recursive {
   621  		if len(node.Children) > 0 {
   622  			for _, child := range node.Children {
   623  				ReCalcNodeStats(child, builder, recursive, leafNode, needResetHashMapStats)
   624  			}
   625  		}
   626  	}
   627  
   628  	var leftStats, rightStats, childStats *Stats
   629  	if len(node.Children) == 1 {
   630  		childStats = builder.qry.Nodes[node.Children[0]].Stats
   631  	} else if len(node.Children) == 2 {
   632  		leftStats = builder.qry.Nodes[node.Children[0]].Stats
   633  		rightStats = builder.qry.Nodes[node.Children[1]].Stats
   634  	}
   635  
   636  	if node.Stats == nil {
   637  		if node.NodeType != plan.Node_EXTERNAL_SCAN && node.NodeType != plan.Node_TABLE_SCAN {
   638  			node.Stats = DefaultStats()
   639  		}
   640  	}
   641  
   642  	switch node.NodeType {
   643  	case plan.Node_JOIN:
   644  		if needResetHashMapStats {
   645  			resetHashMapStats(node.Stats)
   646  		}
   647  
   648  		ndv := math.Min(leftStats.Outcnt, rightStats.Outcnt)
   649  		if ndv < 1 {
   650  			ndv = 1
   651  		}
   652  		//assume all join is not cross join
   653  		//will fix this in the future
   654  		//isCrossJoin := (len(node.OnList) == 0)
   655  		isCrossJoin := false
   656  		selectivity := math.Pow(rightStats.Selectivity, math.Pow(leftStats.Selectivity, 0.2))
   657  		selectivity_out := andSelectivity(leftStats.Selectivity, rightStats.Selectivity)
   658  
   659  		for _, pred := range node.OnList {
   660  			if pred.Ndv <= 0 {
   661  				pred.Ndv = getExprNdv(pred, builder)
   662  			}
   663  		}
   664  
   665  		switch node.JoinType {
   666  		case plan.Node_INNER:
   667  			outcnt := leftStats.Outcnt * rightStats.Outcnt / ndv
   668  			if !isCrossJoin {
   669  				outcnt *= selectivity
   670  			}
   671  			if outcnt < rightStats.Outcnt && leftStats.Selectivity > 0.95 {
   672  				outcnt = rightStats.Outcnt
   673  			}
   674  			node.Stats.Outcnt = outcnt
   675  			node.Stats.Cost = leftStats.Cost + rightStats.Cost
   676  			node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   677  			node.Stats.Selectivity = selectivity_out
   678  
   679  		case plan.Node_LEFT:
   680  			node.Stats.Outcnt = leftStats.Outcnt
   681  			node.Stats.Cost = leftStats.Cost + rightStats.Cost
   682  			node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   683  			node.Stats.Selectivity = selectivity_out
   684  
   685  		case plan.Node_RIGHT:
   686  			node.Stats.Outcnt = rightStats.Outcnt
   687  			node.Stats.Cost = leftStats.Cost + rightStats.Cost
   688  			node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   689  			node.Stats.Selectivity = selectivity_out
   690  
   691  		case plan.Node_OUTER:
   692  			node.Stats.Outcnt = leftStats.Outcnt + rightStats.Outcnt
   693  			node.Stats.Cost = leftStats.Cost + rightStats.Cost
   694  			node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   695  			node.Stats.Selectivity = selectivity_out
   696  
   697  		case plan.Node_SEMI, plan.Node_INDEX:
   698  			node.Stats.Outcnt = leftStats.Outcnt * selectivity
   699  			node.Stats.Cost = leftStats.Cost + rightStats.Cost
   700  			node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   701  			node.Stats.Selectivity = selectivity_out
   702  
   703  		case plan.Node_ANTI:
   704  			node.Stats.Outcnt = leftStats.Outcnt * (1 - rightStats.Selectivity) * 0.5
   705  			node.Stats.Cost = leftStats.Cost + rightStats.Cost
   706  			node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   707  			node.Stats.Selectivity = selectivity_out
   708  
   709  		case plan.Node_SINGLE, plan.Node_MARK:
   710  			node.Stats.Outcnt = leftStats.Outcnt
   711  			node.Stats.Cost = leftStats.Cost + rightStats.Cost
   712  			node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   713  			node.Stats.Selectivity = selectivity_out
   714  		}
   715  
   716  	case plan.Node_AGG:
   717  		if needResetHashMapStats {
   718  			resetHashMapStats(node.Stats)
   719  		}
   720  		if len(node.GroupBy) > 0 {
   721  			incnt := childStats.Outcnt
   722  			outcnt := 1.0
   723  			for _, groupby := range node.GroupBy {
   724  				ndv := getExprNdv(groupby, builder)
   725  				if ndv > 1 {
   726  					groupby.Ndv = ndv
   727  					outcnt *= ndv
   728  				}
   729  			}
   730  			if outcnt > incnt {
   731  				outcnt = math.Min(incnt, outcnt*math.Pow(childStats.Selectivity, 0.8))
   732  			}
   733  			node.Stats.Outcnt = outcnt
   734  			node.Stats.Cost = incnt + outcnt
   735  			node.Stats.HashmapStats.HashmapSize = outcnt
   736  			node.Stats.Selectivity = 1
   737  			if len(node.FilterList) > 0 {
   738  				node.Stats.Outcnt *= 0.0001
   739  				node.Stats.Selectivity *= 0.0001
   740  			}
   741  		} else {
   742  			node.Stats.Outcnt = 1
   743  			node.Stats.Cost = childStats.Cost
   744  			node.Stats.HashmapStats.HashmapSize = 1
   745  			node.Stats.Selectivity = 1
   746  		}
   747  
   748  	case plan.Node_UNION:
   749  		if needResetHashMapStats {
   750  			resetHashMapStats(node.Stats)
   751  		}
   752  		node.Stats.Outcnt = (leftStats.Outcnt + rightStats.Outcnt) * 0.7
   753  		node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt
   754  		node.Stats.Selectivity = 1
   755  		node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   756  
   757  	case plan.Node_UNION_ALL:
   758  		node.Stats.Outcnt = leftStats.Outcnt + rightStats.Outcnt
   759  		node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt
   760  		node.Stats.Selectivity = 1
   761  
   762  	case plan.Node_INTERSECT:
   763  		if needResetHashMapStats {
   764  			resetHashMapStats(node.Stats)
   765  		}
   766  		node.Stats.Outcnt = math.Min(leftStats.Outcnt, rightStats.Outcnt) * 0.5
   767  		node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt
   768  		node.Stats.Selectivity = 1
   769  		node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   770  
   771  	case plan.Node_INTERSECT_ALL:
   772  		if needResetHashMapStats {
   773  			resetHashMapStats(node.Stats)
   774  		}
   775  		node.Stats.Outcnt = math.Min(leftStats.Outcnt, rightStats.Outcnt) * 0.7
   776  		node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt
   777  		node.Stats.Selectivity = 1
   778  		node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   779  
   780  	case plan.Node_MINUS:
   781  		if needResetHashMapStats {
   782  			resetHashMapStats(node.Stats)
   783  		}
   784  		minus := math.Max(leftStats.Outcnt, rightStats.Outcnt) - math.Min(leftStats.Outcnt, rightStats.Outcnt)
   785  		node.Stats.Outcnt = minus * 0.5
   786  		node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt
   787  		node.Stats.Selectivity = 1
   788  		node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   789  
   790  	case plan.Node_MINUS_ALL:
   791  		if needResetHashMapStats {
   792  			resetHashMapStats(node.Stats)
   793  		}
   794  		minus := math.Max(leftStats.Outcnt, rightStats.Outcnt) - math.Min(leftStats.Outcnt, rightStats.Outcnt)
   795  		node.Stats.Outcnt = minus * 0.7
   796  		node.Stats.Cost = leftStats.Outcnt + rightStats.Outcnt
   797  		node.Stats.Selectivity = 1
   798  		node.Stats.HashmapStats.HashmapSize = rightStats.Outcnt
   799  
   800  	case plan.Node_VALUE_SCAN:
   801  		if node.RowsetData != nil {
   802  			rowCount := float64(node.RowsetData.RowCount)
   803  			node.Stats.TableCnt = rowCount
   804  			node.Stats.BlockNum = int32(rowCount/float64(options.DefaultBlockMaxRows) + 1)
   805  			node.Stats.Cost = rowCount
   806  			node.Stats.Outcnt = rowCount
   807  			node.Stats.Selectivity = 1
   808  		}
   809  
   810  	case plan.Node_SINK_SCAN:
   811  		sourceNode := builder.qry.Steps[node.GetSourceStep()[0]]
   812  		node.Stats = builder.qry.Nodes[sourceNode].Stats
   813  
   814  	case plan.Node_RECURSIVE_SCAN:
   815  		sourceNode := builder.qry.Steps[node.GetSourceStep()[0]]
   816  		node.Stats = builder.qry.Nodes[sourceNode].Stats
   817  
   818  	case plan.Node_EXTERNAL_SCAN:
   819  		//calc for external scan is heavy, avoid recalc of this
   820  		if node.Stats == nil || node.Stats.TableCnt == 0 {
   821  			node.Stats = getExternalStats(node, builder)
   822  		}
   823  
   824  	case plan.Node_TABLE_SCAN:
   825  		//calc for scan is heavy. use leafNode to judge if scan need to recalculate
   826  		if node.ObjRef != nil && leafNode {
   827  			if len(node.BindingTags) > 0 {
   828  				builder.tag2Table[node.BindingTags[0]] = node.TableDef
   829  			}
   830  			newStats := calcScanStats(node, builder)
   831  			if needResetHashMapStats {
   832  				resetHashMapStats(newStats)
   833  			}
   834  			node.Stats = newStats
   835  		}
   836  
   837  	case plan.Node_FILTER:
   838  		//filters which can not push down to scan nodes. hard to estimate selectivity
   839  		node.Stats.Outcnt = childStats.Outcnt * 0.05
   840  		if node.Stats.Outcnt < 1 {
   841  			node.Stats.Outcnt = 1
   842  		}
   843  		node.Stats.Cost = childStats.Cost
   844  		node.Stats.Selectivity = 0.05
   845  
   846  	case plan.Node_FUNCTION_SCAN:
   847  		if !computeFunctionScan(node.TableDef.TblFunc.Name, node.TblFuncExprList, node.Stats) {
   848  			if len(node.Children) > 0 && childStats != nil {
   849  				node.Stats.Outcnt = childStats.Outcnt
   850  				node.Stats.Cost = childStats.Outcnt
   851  				node.Stats.Selectivity = childStats.Selectivity
   852  			}
   853  		}
   854  
   855  	case plan.Node_INSERT:
   856  		if len(node.Children) > 0 && childStats != nil {
   857  			node.Stats.Outcnt = childStats.Outcnt
   858  			node.Stats.Cost = childStats.Outcnt
   859  			node.Stats.Selectivity = childStats.Selectivity
   860  			node.Stats.Rowsize = GetRowSizeFromTableDef(node.TableDef, true) * 0.8
   861  		}
   862  
   863  	default:
   864  		if len(node.Children) > 0 && childStats != nil {
   865  			node.Stats.Outcnt = childStats.Outcnt
   866  			node.Stats.Cost = childStats.Outcnt
   867  			node.Stats.Selectivity = childStats.Selectivity
   868  		}
   869  	}
   870  
   871  	// if there is a limit, outcnt is limit number
   872  	if node.Limit != nil {
   873  		limitExpr := DeepCopyExpr(node.Limit)
   874  		if _, ok := limitExpr.Expr.(*plan.Expr_F); ok {
   875  			if !hasParam(limitExpr) {
   876  				limitExpr, _ = ConstantFold(batch.EmptyForConstFoldBatch, limitExpr, builder.compCtx.GetProcess(), true)
   877  			}
   878  		}
   879  		if cExpr, ok := limitExpr.Expr.(*plan.Expr_Lit); ok {
   880  			if c, ok := cExpr.Lit.Value.(*plan.Literal_I64Val); ok {
   881  				node.Stats.Outcnt = float64(c.I64Val)
   882  				node.Stats.Selectivity = node.Stats.Outcnt / node.Stats.Cost
   883  			}
   884  		}
   885  	}
   886  }
   887  
   888  func computeFunctionScan(name string, exprs []*Expr, nodeStat *Stats) bool {
   889  	if name != "generate_series" {
   890  		return false
   891  	}
   892  	var cost float64
   893  	var canGetCost bool
   894  	if len(exprs) == 2 {
   895  		if exprs[0].Typ.Id != exprs[1].Typ.Id {
   896  			return false
   897  		}
   898  		cost, canGetCost = getCost(exprs[0], exprs[1], nil)
   899  	} else if len(exprs) == 3 {
   900  		if !(exprs[0].Typ.Id == exprs[1].Typ.Id && exprs[1].Typ.Id == exprs[2].Typ.Id) {
   901  			return false
   902  		}
   903  		cost, canGetCost = getCost(exprs[0], exprs[1], exprs[2])
   904  	} else {
   905  		return false
   906  	}
   907  	if !canGetCost {
   908  		return false
   909  	}
   910  	nodeStat.Outcnt = cost
   911  	nodeStat.TableCnt = cost
   912  	nodeStat.Cost = cost
   913  	nodeStat.Selectivity = 1
   914  	return true
   915  }
   916  
   917  func getCost(start *Expr, end *Expr, step *Expr) (float64, bool) {
   918  	var startNum, endNum, stepNum float64
   919  	var flag1, flag2, flag3 bool
   920  	getInt32Val := func(e *Expr) (float64, bool) {
   921  		if s, ok := e.Expr.(*plan.Expr_Lit); ok {
   922  			if v, ok := s.Lit.Value.(*plan.Literal_I32Val); ok && !s.Lit.Isnull {
   923  				return float64(v.I32Val), true
   924  			}
   925  		}
   926  		return 0, false
   927  	}
   928  	getInt64Val := func(e *Expr) (float64, bool) {
   929  		if s, ok := e.Expr.(*plan.Expr_Lit); ok {
   930  			if v, ok := s.Lit.Value.(*plan.Literal_I64Val); ok && !s.Lit.Isnull {
   931  				return float64(v.I64Val), true
   932  			}
   933  		}
   934  		return 0, false
   935  	}
   936  
   937  	switch start.Typ.Id {
   938  	case int32(types.T_int32):
   939  		startNum, flag1 = getInt32Val(start)
   940  		endNum, flag2 = getInt32Val(end)
   941  		flag3 = true
   942  		if step != nil {
   943  			stepNum, flag3 = getInt32Val(step)
   944  		}
   945  		if !(flag1 && flag2 && flag3) {
   946  			return 0, false
   947  		}
   948  	case int32(types.T_int64):
   949  		startNum, flag1 = getInt64Val(start)
   950  		endNum, flag2 = getInt64Val(end)
   951  		flag3 = true
   952  		if step != nil {
   953  			stepNum, flag3 = getInt64Val(step)
   954  		}
   955  		if !(flag1 && flag2 && flag3) {
   956  			return 0, false
   957  		}
   958  	}
   959  	if step == nil {
   960  		if startNum > endNum {
   961  			stepNum = -1
   962  		} else {
   963  			stepNum = 1
   964  		}
   965  	}
   966  	ret := (endNum - startNum) / stepNum
   967  	if ret < 0 {
   968  		return 0, false
   969  	}
   970  	return ret, true
   971  }
   972  
   973  func foldTableScanFilters(proc *process.Process, qry *Query, nodeId int32) error {
   974  	node := qry.Nodes[nodeId]
   975  	if node.NodeType == plan.Node_TABLE_SCAN && len(node.FilterList) > 0 {
   976  		for i, e := range node.FilterList {
   977  			foldedExpr, err := ConstantFold(batch.EmptyForConstFoldBatch, e, proc, false)
   978  			if err != nil {
   979  				return err
   980  			}
   981  			node.FilterList[i] = foldedExpr
   982  		}
   983  	}
   984  	for _, childId := range node.Children {
   985  		err := foldTableScanFilters(proc, qry, childId)
   986  		if err != nil {
   987  			return err
   988  		}
   989  	}
   990  	return nil
   991  }
   992  
   993  func recalcStatsByRuntimeFilter(node *plan.Node, joinNode *plan.Node, runtimeFilterSel float64) {
   994  	if node.NodeType != plan.Node_TABLE_SCAN {
   995  		return
   996  	}
   997  	node.Stats.Cost *= runtimeFilterSel
   998  	node.Stats.Outcnt *= runtimeFilterSel
   999  	if node.Stats.Cost < 1 {
  1000  		node.Stats.Cost = 1
  1001  	}
  1002  	node.Stats.BlockNum = int32(node.Stats.Outcnt/2) + 1
  1003  }
  1004  
  1005  func calcScanStats(node *plan.Node, builder *QueryBuilder) *plan.Stats {
  1006  	if builder.skipStats {
  1007  		return DefaultStats()
  1008  	}
  1009  	if InternalTable(node.TableDef) {
  1010  		return DefaultStats()
  1011  	}
  1012  	if shouldReturnMinimalStats(node) {
  1013  		return DefaultMinimalStats()
  1014  	}
  1015  
  1016  	//ts := timestamp.Timestamp{}
  1017  	//if node.ScanTS != nil {
  1018  	//	ts = *node.ScanTS
  1019  	//}
  1020  
  1021  	scanSnapshot := node.ScanSnapshot
  1022  	if scanSnapshot == nil {
  1023  		scanSnapshot = &Snapshot{}
  1024  	}
  1025  
  1026  	s, err := builder.compCtx.Stats(node.ObjRef, *scanSnapshot)
  1027  	if err != nil || s == nil {
  1028  		return DefaultStats()
  1029  	}
  1030  
  1031  	stats := new(plan.Stats)
  1032  	stats.TableCnt = s.TableCnt
  1033  	var blockSel float64 = 1
  1034  
  1035  	var blockExprList []*plan.Expr
  1036  	for i := range node.FilterList {
  1037  		node.FilterList[i].Selectivity = estimateExprSelectivity(node.FilterList[i], builder)
  1038  		currentBlockSel := estimateFilterBlockSelectivity(builder.GetContext(), node.FilterList[i], node.TableDef, s)
  1039  		if builder.optimizerHints != nil {
  1040  			if builder.optimizerHints.blockFilter == 1 { //always trying to pushdown blockfilters if zonemappable
  1041  				if ExprIsZonemappable(builder.GetContext(), node.FilterList[i]) {
  1042  					copyOfExpr := DeepCopyExpr(node.FilterList[i])
  1043  					copyOfExpr.Selectivity = currentBlockSel
  1044  					blockExprList = append(blockExprList, copyOfExpr)
  1045  				}
  1046  			} else if builder.optimizerHints.blockFilter == 2 { // never pushdown blockfilters
  1047  				node.BlockFilterList = nil
  1048  			} else {
  1049  				if currentBlockSel < 1 || strings.HasPrefix(node.TableDef.Name, catalog.IndexTableNamePrefix) {
  1050  					copyOfExpr := DeepCopyExpr(node.FilterList[i])
  1051  					copyOfExpr.Selectivity = currentBlockSel
  1052  					blockExprList = append(blockExprList, copyOfExpr)
  1053  				}
  1054  			}
  1055  		} else {
  1056  			if currentBlockSel < 1 || strings.HasPrefix(node.TableDef.Name, catalog.IndexTableNamePrefix) {
  1057  				copyOfExpr := DeepCopyExpr(node.FilterList[i])
  1058  				copyOfExpr.Selectivity = currentBlockSel
  1059  				blockExprList = append(blockExprList, copyOfExpr)
  1060  			}
  1061  		}
  1062  		blockSel = andSelectivity(blockSel, currentBlockSel)
  1063  	}
  1064  	node.BlockFilterList = blockExprList
  1065  	stats.Selectivity = estimateExprSelectivity(colexec.RewriteFilterExprList(node.FilterList), builder)
  1066  	stats.Outcnt = stats.Selectivity * stats.TableCnt
  1067  	stats.Cost = stats.TableCnt * blockSel
  1068  	stats.BlockNum = int32(float64(s.BlockNumber)*blockSel) + 1
  1069  
  1070  	// if there is a limit, outcnt is limit number
  1071  	if node.Limit != nil {
  1072  		if cExpr, ok := node.Limit.Expr.(*plan.Expr_Lit); ok {
  1073  			if c, ok := cExpr.Lit.Value.(*plan.Literal_I64Val); ok {
  1074  				stats.Outcnt = float64(c.I64Val)
  1075  				stats.BlockNum = int32(((stats.Outcnt / stats.Selectivity) / DefaultBlockMaxRows) + 1)
  1076  				stats.Cost = float64(stats.BlockNum * DefaultBlockMaxRows)
  1077  			}
  1078  		}
  1079  	}
  1080  
  1081  	return stats
  1082  }
  1083  
  1084  func shouldReturnMinimalStats(node *plan.Node) bool {
  1085  	return false
  1086  }
  1087  
  1088  func InternalTable(tableDef *TableDef) bool {
  1089  	switch tableDef.TblId {
  1090  	case catalog.MO_DATABASE_ID, catalog.MO_TABLES_ID, catalog.MO_COLUMNS_ID:
  1091  		return true
  1092  	}
  1093  	if strings.HasPrefix(tableDef.Name, "sys_") {
  1094  		return true
  1095  	}
  1096  	if strings.HasPrefix(tableDef.Name, "mo_") {
  1097  		return true
  1098  	}
  1099  	return false
  1100  }
  1101  
  1102  func DefaultHugeStats() *plan.Stats {
  1103  	stats := new(Stats)
  1104  	stats.TableCnt = 10000000
  1105  	stats.Cost = 10000000
  1106  	stats.Outcnt = 10000000
  1107  	stats.Selectivity = 1
  1108  	stats.BlockNum = 1000
  1109  	return stats
  1110  }
  1111  
  1112  func DefaultStats() *plan.Stats {
  1113  	stats := new(Stats)
  1114  	stats.TableCnt = 1000
  1115  	stats.Cost = 1000
  1116  	stats.Outcnt = 1000
  1117  	stats.Selectivity = 1
  1118  	stats.BlockNum = 1
  1119  	return stats
  1120  }
  1121  
  1122  func DefaultMinimalStats() *plan.Stats {
  1123  	stats := new(Stats)
  1124  	stats.TableCnt = 100000
  1125  	stats.Cost = 10
  1126  	stats.Outcnt = 10
  1127  	stats.Selectivity = 0.0001
  1128  	stats.BlockNum = 1
  1129  	return stats
  1130  }
  1131  
  1132  func resetHashMapStats(stats *plan.Stats) {
  1133  	if stats.HashmapStats == nil {
  1134  		stats.HashmapStats = &plan.HashMapStats{}
  1135  	} else {
  1136  		stats.HashmapStats.HashmapSize = 0
  1137  		stats.HashmapStats.HashOnPK = false
  1138  		stats.HashmapStats.Shuffle = false
  1139  	}
  1140  }
  1141  
  1142  func (builder *QueryBuilder) determineBuildAndProbeSide(nodeID int32, recursive bool) {
  1143  	if builder.optimizerHints != nil && builder.optimizerHints.joinOrdering != 0 {
  1144  		return
  1145  	}
  1146  
  1147  	node := builder.qry.Nodes[nodeID]
  1148  	if recursive && len(node.Children) > 0 {
  1149  		for _, child := range node.Children {
  1150  			builder.determineBuildAndProbeSide(child, recursive)
  1151  		}
  1152  	}
  1153  	if node.NodeType != plan.Node_JOIN {
  1154  		return
  1155  	}
  1156  
  1157  	leftChild := builder.qry.Nodes[node.Children[0]]
  1158  	rightChild := builder.qry.Nodes[node.Children[1]]
  1159  	if rightChild.NodeType == plan.Node_FUNCTION_SCAN {
  1160  		return
  1161  	}
  1162  
  1163  	switch node.JoinType {
  1164  	case plan.Node_INNER, plan.Node_OUTER:
  1165  		if leftChild.Stats.Outcnt < rightChild.Stats.Outcnt {
  1166  			node.Children[0], node.Children[1] = node.Children[1], node.Children[0]
  1167  
  1168  		}
  1169  
  1170  	case plan.Node_LEFT, plan.Node_SEMI, plan.Node_ANTI:
  1171  		//right joins does not support non equal join for now
  1172  		if builder.IsEquiJoin(node) && leftChild.Stats.Outcnt*1.2 < rightChild.Stats.Outcnt && !builder.haveOnDuplicateKey {
  1173  			node.BuildOnLeft = true
  1174  		}
  1175  	}
  1176  
  1177  	if builder.hasRecursiveScan(builder.qry.Nodes[node.Children[1]]) {
  1178  		node.Children[0], node.Children[1] = node.Children[1], node.Children[0]
  1179  	}
  1180  }
  1181  
  1182  func (builder *QueryBuilder) hasRecursiveScan(node *plan.Node) bool {
  1183  	if node.NodeType == plan.Node_RECURSIVE_SCAN {
  1184  		return true
  1185  	}
  1186  	for _, nodeID := range node.Children {
  1187  		if builder.hasRecursiveScan(builder.qry.Nodes[nodeID]) {
  1188  			return true
  1189  		}
  1190  	}
  1191  	return false
  1192  }
  1193  
  1194  func compareStats(stats1, stats2 *Stats) bool {
  1195  	// selectivity is first considered to reduce data
  1196  	// when selectivity very close, we first join smaller table
  1197  	if math.Abs(stats1.Selectivity-stats2.Selectivity) > 0.01 {
  1198  		return stats1.Selectivity < stats2.Selectivity
  1199  	} else {
  1200  		// todo we need to calculate ndv of outcnt here
  1201  		return stats1.Outcnt < stats2.Outcnt
  1202  	}
  1203  }
  1204  
  1205  func andSelectivity(s1, s2 float64) float64 {
  1206  	if s1 > 0.15 || s2 > 0.15 || s1*s2 > 0.1 {
  1207  		return s1 * s2
  1208  	}
  1209  	return math.Min(s1, s2) * math.Max(math.Pow(s1, s2), math.Pow(s2, s1))
  1210  }
  1211  
  1212  func orSelectivity(s1, s2 float64) float64 {
  1213  	var s float64
  1214  	if math.Abs(s1-s2) < 0.001 && s1 < 0.2 {
  1215  		s = s1 + s2
  1216  	} else {
  1217  		s = math.Max(s1, s2) * 1.5
  1218  	}
  1219  	if s > 1 {
  1220  		return 1
  1221  	} else {
  1222  		return s
  1223  	}
  1224  }
  1225  
  1226  const blockThresholdForTpQuery = 16
  1227  
  1228  func IsTpQuery(qry *plan.Query) bool {
  1229  	for _, node := range qry.GetNodes() {
  1230  		stats := node.Stats
  1231  		if stats == nil || stats.BlockNum > blockThresholdForTpQuery {
  1232  			return false
  1233  		}
  1234  	}
  1235  	return true
  1236  }
  1237  
  1238  func ReCalcQueryStats(builder *QueryBuilder, query *plan.Query) {
  1239  	for _, rootID := range builder.qry.Steps {
  1240  		ReCalcNodeStats(rootID, builder, true, false, true)
  1241  	}
  1242  }
  1243  
  1244  func PrintStats(qry *plan.Query) string {
  1245  	buf := bytes.NewBuffer(make([]byte, 0, 1024*64))
  1246  	buf.WriteString("Print Stats: \n")
  1247  	for _, node := range qry.GetNodes() {
  1248  		stats := node.Stats
  1249  		buf.WriteString(fmt.Sprintf("Node ID: %v, Node Type %v, ", node.NodeId, node.NodeType))
  1250  		if stats == nil {
  1251  			buf.WriteString("Stats: nil\n")
  1252  		} else {
  1253  			buf.WriteString(fmt.Sprintf("blocknum %v, outcnt %v \n", node.Stats.BlockNum, node.Stats.Outcnt))
  1254  		}
  1255  	}
  1256  	return buf.String()
  1257  }
  1258  
  1259  func DeepCopyStats(stats *plan.Stats) *plan.Stats {
  1260  	if stats == nil {
  1261  		return nil
  1262  	}
  1263  	var hashmapStats *plan.HashMapStats
  1264  	if stats.HashmapStats != nil {
  1265  		hashmapStats = &plan.HashMapStats{
  1266  			HashmapSize:   stats.HashmapStats.HashmapSize,
  1267  			HashOnPK:      stats.HashmapStats.HashOnPK,
  1268  			Shuffle:       stats.HashmapStats.Shuffle,
  1269  			ShuffleColIdx: stats.HashmapStats.ShuffleColIdx,
  1270  			ShuffleType:   stats.HashmapStats.ShuffleType,
  1271  			ShuffleColMin: stats.HashmapStats.ShuffleColMin,
  1272  			ShuffleColMax: stats.HashmapStats.ShuffleColMax,
  1273  			ShuffleMethod: stats.HashmapStats.ShuffleMethod,
  1274  		}
  1275  	}
  1276  	return &plan.Stats{
  1277  		BlockNum:     stats.BlockNum,
  1278  		Rowsize:      stats.Rowsize,
  1279  		Cost:         stats.Cost,
  1280  		Outcnt:       stats.Outcnt,
  1281  		TableCnt:     stats.TableCnt,
  1282  		Selectivity:  stats.Selectivity,
  1283  		HashmapStats: hashmapStats,
  1284  		ForceOneCN:   stats.ForceOneCN,
  1285  	}
  1286  }
  1287  
  1288  func calcBlockSelectivityUsingShuffleRange(s *pb.ShuffleRange, sel float64) float64 {
  1289  	if s == nil {
  1290  		if sel <= 0.01 {
  1291  			return sel * 100
  1292  		} else {
  1293  			return 1
  1294  		}
  1295  	}
  1296  	ret := sel * math.Pow(500, math.Pow(s.Overlap, 2))
  1297  	if ret > 1 {
  1298  		ret = 1
  1299  	}
  1300  	return ret
  1301  }
  1302  
  1303  func (builder *QueryBuilder) canSkipStats() bool {
  1304  	//for now ,only skip stats for select count(*) from xx
  1305  	if len(builder.qry.Steps) != 1 || len(builder.qry.Nodes) != 3 {
  1306  		return false
  1307  	}
  1308  	project := builder.qry.Nodes[builder.qry.Steps[0]]
  1309  	if project.NodeType != plan.Node_PROJECT {
  1310  		return false
  1311  	}
  1312  	agg := builder.qry.Nodes[project.Children[0]]
  1313  	if agg.NodeType != plan.Node_AGG {
  1314  		return false
  1315  	}
  1316  	if len(agg.AggList) != 1 || len(agg.GroupBy) != 0 {
  1317  		return false
  1318  	}
  1319  	if agg.AggList[0].GetF() == nil || agg.AggList[0].GetF().Func.ObjName != "starcount" {
  1320  		return false
  1321  	}
  1322  	scan := builder.qry.Nodes[agg.Children[0]]
  1323  	return scan.NodeType == plan.Node_TABLE_SCAN
  1324  }