github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/petri/acyclic/causet/embedded/stats.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package embedded
    15  
    16  import (
    17  	"math"
    18  	"sort"
    19  
    20  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    21  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    22  	"github.com/whtcorpsinc/errors"
    23  	"github.com/whtcorpsinc/milevadb/causet/property"
    24  	"github.com/whtcorpsinc/milevadb/causet/soliton"
    25  	"github.com/whtcorpsinc/milevadb/memex"
    26  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    27  	"github.com/whtcorpsinc/milevadb/soliton/ranger"
    28  	"github.com/whtcorpsinc/milevadb/statistics"
    29  	"github.com/whtcorpsinc/milevadb/types"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  func (p *basePhysicalCauset) StatsCount() float64 {
    34  	return p.stats.RowCount
    35  }
    36  
    37  // DeriveStats implement LogicalCauset DeriveStats interface.
    38  func (p *LogicalBlockDual) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
    39  	if p.stats != nil {
    40  		return p.stats, nil
    41  	}
    42  	profile := &property.StatsInfo{
    43  		RowCount:    float64(p.RowCount),
    44  		Cardinality: make(map[int64]float64, selfSchema.Len()),
    45  	}
    46  	for _, col := range selfSchema.DeferredCausets {
    47  		profile.Cardinality[col.UniqueID] = float64(p.RowCount)
    48  	}
    49  	p.stats = profile
    50  	return p.stats, nil
    51  }
    52  
    53  // DeriveStats implement LogicalCauset DeriveStats interface.
    54  func (p *LogicalMemBlock) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
    55  	if p.stats != nil {
    56  		return p.stats, nil
    57  	}
    58  	statsBlock := statistics.PseudoBlock(p.BlockInfo)
    59  	stats := &property.StatsInfo{
    60  		RowCount:     float64(statsBlock.Count),
    61  		Cardinality:  make(map[int64]float64, len(p.BlockInfo.DeferredCausets)),
    62  		HistDefCausl: statsBlock.GenerateHistDefCauslFromDeferredCausetInfo(p.BlockInfo.DeferredCausets, p.schemaReplicant.DeferredCausets),
    63  		StatsVersion: statistics.PseudoVersion,
    64  	}
    65  	for _, col := range selfSchema.DeferredCausets {
    66  		stats.Cardinality[col.UniqueID] = float64(statsBlock.Count)
    67  	}
    68  	p.stats = stats
    69  	return p.stats, nil
    70  }
    71  
    72  // DeriveStats implement LogicalCauset DeriveStats interface.
    73  func (p *LogicalShow) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
    74  	if p.stats != nil {
    75  		return p.stats, nil
    76  	}
    77  	// A fake count, just to avoid panic now.
    78  	p.stats = getFakeStats(selfSchema)
    79  	return p.stats, nil
    80  }
    81  
    82  func getFakeStats(schemaReplicant *memex.Schema) *property.StatsInfo {
    83  	profile := &property.StatsInfo{
    84  		RowCount:    1,
    85  		Cardinality: make(map[int64]float64, schemaReplicant.Len()),
    86  	}
    87  	for _, col := range schemaReplicant.DeferredCausets {
    88  		profile.Cardinality[col.UniqueID] = 1
    89  	}
    90  	return profile
    91  }
    92  
    93  // DeriveStats implement LogicalCauset DeriveStats interface.
    94  func (p *LogicalShowDBSJobs) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
    95  	if p.stats != nil {
    96  		return p.stats, nil
    97  	}
    98  	// A fake count, just to avoid panic now.
    99  	p.stats = getFakeStats(selfSchema)
   100  	return p.stats, nil
   101  }
   102  
   103  // RecursiveDeriveStats4Test is a exporter just for test.
   104  func RecursiveDeriveStats4Test(p LogicalCauset) (*property.StatsInfo, error) {
   105  	return p.recursiveDeriveStats(nil)
   106  }
   107  
   108  // GetStats4Test is a exporter just for test.
   109  func GetStats4Test(p LogicalCauset) *property.StatsInfo {
   110  	return p.statsInfo()
   111  }
   112  
   113  func (p *baseLogicalCauset) recursiveDeriveStats(colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   114  	childStats := make([]*property.StatsInfo, len(p.children))
   115  	childSchema := make([]*memex.Schema, len(p.children))
   116  	cumDefCausGroups := p.self.ExtractDefCausGroups(colGroups)
   117  	for i, child := range p.children {
   118  		childProfile, err := child.recursiveDeriveStats(cumDefCausGroups)
   119  		if err != nil {
   120  			return nil, err
   121  		}
   122  		childStats[i] = childProfile
   123  		childSchema[i] = child.Schema()
   124  	}
   125  	return p.self.DeriveStats(childStats, p.self.Schema(), childSchema, colGroups)
   126  }
   127  
   128  // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface.
   129  func (p *baseLogicalCauset) ExtractDefCausGroups(_ [][]*memex.DeferredCauset) [][]*memex.DeferredCauset {
   130  	return nil
   131  }
   132  
   133  // DeriveStats implement LogicalCauset DeriveStats interface.
   134  func (p *baseLogicalCauset) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   135  	if len(childStats) == 1 {
   136  		p.stats = childStats[0]
   137  		return p.stats, nil
   138  	}
   139  	if len(childStats) > 1 {
   140  		err := ErrInternal.GenWithStack("LogicalCausets with more than one child should implement their own DeriveStats().")
   141  		return nil, err
   142  	}
   143  	if p.stats != nil {
   144  		return p.stats, nil
   145  	}
   146  	profile := &property.StatsInfo{
   147  		RowCount:    float64(1),
   148  		Cardinality: make(map[int64]float64, selfSchema.Len()),
   149  	}
   150  	for _, col := range selfSchema.DeferredCausets {
   151  		profile.Cardinality[col.UniqueID] = 1
   152  	}
   153  	p.stats = profile
   154  	return profile, nil
   155  }
   156  
   157  // getDeferredCausetNDV computes estimated NDV of specified column using the original
   158  // histogram of `DataSource` which is retrieved from storage(not the derived one).
   159  func (ds *DataSource) getDeferredCausetNDV(colID int64) (ndv float64) {
   160  	hist, ok := ds.statisticBlock.DeferredCausets[colID]
   161  	if ok && hist.Count > 0 {
   162  		factor := float64(ds.statisticBlock.Count) / float64(hist.Count)
   163  		ndv = float64(hist.NDV) * factor
   164  	} else {
   165  		ndv = float64(ds.statisticBlock.Count) * distinctFactor
   166  	}
   167  	return ndv
   168  }
   169  
   170  func (ds *DataSource) getGroupNDVs(colGroups [][]*memex.DeferredCauset) []property.GroupNDV {
   171  	if colGroups == nil {
   172  		return nil
   173  	}
   174  	tbl := ds.blockStats.HistDefCausl
   175  	ndvs := make([]property.GroupNDV, 0, len(colGroups))
   176  	for idxID, idx := range tbl.Indices {
   177  		idxDefCauss := make([]int64, len(tbl.Idx2DeferredCausetIDs[idxID]))
   178  		copy(idxDefCauss, tbl.Idx2DeferredCausetIDs[idxID])
   179  		sort.Slice(idxDefCauss, func(i, j int) bool {
   180  			return idxDefCauss[i] < idxDefCauss[j]
   181  		})
   182  		for _, g := range colGroups {
   183  			// We only want those exact matches.
   184  			if len(g) != len(idxDefCauss) {
   185  				continue
   186  			}
   187  			match := true
   188  			for i, col := range g {
   189  				// Both slices are sorted according to UniqueID.
   190  				if col.UniqueID != idxDefCauss[i] {
   191  					match = false
   192  					break
   193  				}
   194  			}
   195  			if match {
   196  				ndv := property.GroupNDV{
   197  					DefCauss: idxDefCauss,
   198  					NDV:      float64(idx.NDV),
   199  				}
   200  				ndvs = append(ndvs, ndv)
   201  				break
   202  			}
   203  		}
   204  	}
   205  	return ndvs
   206  }
   207  
   208  func (ds *DataSource) initStats(colGroups [][]*memex.DeferredCauset) {
   209  	if ds.blockStats != nil {
   210  		// Reload GroupNDVs since colGroups may have changed.
   211  		ds.blockStats.GroupNDVs = ds.getGroupNDVs(colGroups)
   212  		return
   213  	}
   214  	if ds.statisticBlock == nil {
   215  		ds.statisticBlock = getStatsBlock(ds.ctx, ds.blockInfo, ds.causet.Meta().ID)
   216  	}
   217  	blockStats := &property.StatsInfo{
   218  		RowCount:     float64(ds.statisticBlock.Count),
   219  		Cardinality:  make(map[int64]float64, ds.schemaReplicant.Len()),
   220  		HistDefCausl: ds.statisticBlock.GenerateHistDefCauslFromDeferredCausetInfo(ds.DeferredCausets, ds.schemaReplicant.DeferredCausets),
   221  		StatsVersion: ds.statisticBlock.Version,
   222  	}
   223  	if ds.statisticBlock.Pseudo {
   224  		blockStats.StatsVersion = statistics.PseudoVersion
   225  	}
   226  	for _, col := range ds.schemaReplicant.DeferredCausets {
   227  		blockStats.Cardinality[col.UniqueID] = ds.getDeferredCausetNDV(col.ID)
   228  	}
   229  	ds.blockStats = blockStats
   230  	ds.blockStats.GroupNDVs = ds.getGroupNDVs(colGroups)
   231  	ds.TblDefCausHists = ds.statisticBlock.ID2UniqueID(ds.TblDefCauss)
   232  }
   233  
   234  func (ds *DataSource) deriveStatsByFilter(conds memex.CNFExprs, filledPaths []*soliton.AccessPath) *property.StatsInfo {
   235  	selectivity, nodes, err := ds.blockStats.HistDefCausl.Selectivity(ds.ctx, conds, filledPaths)
   236  	if err != nil {
   237  		logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err))
   238  		selectivity = SelectionFactor
   239  	}
   240  	stats := ds.blockStats.Scale(selectivity)
   241  	if ds.ctx.GetStochastikVars().OptimizerSelectivityLevel >= 1 {
   242  		stats.HistDefCausl = stats.HistDefCausl.NewHistDefCauslBySelectivity(ds.ctx.GetStochastikVars().StmtCtx, nodes)
   243  	}
   244  	return stats
   245  }
   246  
   247  // DeriveStats implement LogicalCauset DeriveStats interface.
   248  func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   249  	if ds.stats != nil && len(colGroups) == 0 {
   250  		return ds.stats, nil
   251  	}
   252  	ds.initStats(colGroups)
   253  	if ds.stats != nil {
   254  		// Just reload the GroupNDVs.
   255  		selectivity := ds.stats.RowCount / ds.blockStats.RowCount
   256  		ds.stats = ds.blockStats.Scale(selectivity)
   257  		return ds.stats, nil
   258  	}
   259  	// PushDownNot here can convert query 'not (a != 1)' to 'a = 1'.
   260  	for i, expr := range ds.pushedDownConds {
   261  		ds.pushedDownConds[i] = memex.PushDownNot(ds.ctx, expr)
   262  	}
   263  	for _, path := range ds.possibleAccessPaths {
   264  		if path.IsBlockPath() {
   265  			continue
   266  		}
   267  		err := ds.fillIndexPath(path, ds.pushedDownConds)
   268  		if err != nil {
   269  			return nil, err
   270  		}
   271  	}
   272  	ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds, ds.possibleAccessPaths)
   273  	for _, path := range ds.possibleAccessPaths {
   274  		if path.IsBlockPath() {
   275  			noIntervalRanges, err := ds.deriveBlockPathStats(path, ds.pushedDownConds, false)
   276  			if err != nil {
   277  				return nil, err
   278  			}
   279  			// If we have point or empty range, just remove other possible paths.
   280  			if noIntervalRanges || len(path.Ranges) == 0 {
   281  				ds.possibleAccessPaths[0] = path
   282  				ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
   283  				break
   284  			}
   285  			continue
   286  		}
   287  		noIntervalRanges := ds.deriveIndexPathStats(path, ds.pushedDownConds, false)
   288  		// If we have empty range, or point range on unique index, just remove other possible paths.
   289  		if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 {
   290  			ds.possibleAccessPaths[0] = path
   291  			ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
   292  			break
   293  		}
   294  	}
   295  
   296  	// TODO: implement UnionScan + IndexMerge
   297  	isReadOnlyTxn := true
   298  	txn, err := ds.ctx.Txn(false)
   299  	if err != nil {
   300  		return nil, err
   301  	}
   302  	if txn.Valid() && !txn.IsReadOnly() {
   303  		isReadOnlyTxn = false
   304  	}
   305  	// Consider the IndexMergePath. Now, we just generate `IndexMergePath` in DNF case.
   306  	isPossibleIdxMerge := len(ds.pushedDownConds) > 0 && len(ds.possibleAccessPaths) > 1
   307  	stochastikAndStmtPermission := (ds.ctx.GetStochastikVars().GetEnableIndexMerge() || len(ds.indexMergeHints) > 0) && !ds.ctx.GetStochastikVars().StmtCtx.NoIndexMergeHint
   308  	// If there is an index path, we current do not consider `IndexMergePath`.
   309  	needConsiderIndexMerge := true
   310  	for i := 1; i < len(ds.possibleAccessPaths); i++ {
   311  		if len(ds.possibleAccessPaths[i].AccessConds) != 0 {
   312  			needConsiderIndexMerge = false
   313  			break
   314  		}
   315  	}
   316  	if isPossibleIdxMerge && stochastikAndStmtPermission && needConsiderIndexMerge && isReadOnlyTxn {
   317  		ds.generateAndPruneIndexMergePath(ds.indexMergeHints != nil)
   318  	} else if len(ds.indexMergeHints) > 0 {
   319  		ds.indexMergeHints = nil
   320  		ds.ctx.GetStochastikVars().StmtCtx.AppendWarning(errors.Errorf("IndexMerge is inapplicable or disabled"))
   321  	}
   322  	return ds.stats, nil
   323  }
   324  
   325  func (ds *DataSource) generateAndPruneIndexMergePath(needPrune bool) {
   326  	regularPathCount := len(ds.possibleAccessPaths)
   327  	ds.generateIndexMergeOrPaths()
   328  	// If without hints, it means that `enableIndexMerge` is true
   329  	if len(ds.indexMergeHints) == 0 {
   330  		return
   331  	}
   332  	// With hints and without generated IndexMerge paths
   333  	if regularPathCount == len(ds.possibleAccessPaths) {
   334  		ds.indexMergeHints = nil
   335  		ds.ctx.GetStochastikVars().StmtCtx.AppendWarning(errors.Errorf("IndexMerge is inapplicable or disabled"))
   336  		return
   337  	}
   338  	// Do not need to consider the regular paths in find_best_task().
   339  	if needPrune {
   340  		ds.possibleAccessPaths = ds.possibleAccessPaths[regularPathCount:]
   341  	}
   342  }
   343  
   344  // DeriveStats implements LogicalCauset DeriveStats interface.
   345  func (ts *LogicalBlockScan) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (_ *property.StatsInfo, err error) {
   346  	ts.Source.initStats(nil)
   347  	// PushDownNot here can convert query 'not (a != 1)' to 'a = 1'.
   348  	for i, expr := range ts.AccessConds {
   349  		// TODO The memexs may be shared by BlockScan and several IndexScans, there would be redundant
   350  		// `PushDownNot` function call in multiple `DeriveStats` then.
   351  		ts.AccessConds[i] = memex.PushDownNot(ts.ctx, expr)
   352  	}
   353  	ts.stats = ts.Source.deriveStatsByFilter(ts.AccessConds, nil)
   354  	sc := ts.SCtx().GetStochastikVars().StmtCtx
   355  	// ts.Handle could be nil if PK is Handle, and PK column has been pruned.
   356  	// TODO: support clustered index.
   357  	if ts.HandleDefCauss != nil {
   358  		ts.Ranges, err = ranger.BuildBlockRange(ts.AccessConds, sc, ts.HandleDefCauss.GetDefCaus(0).RetType)
   359  	} else {
   360  		isUnsigned := false
   361  		if ts.Source.blockInfo.PKIsHandle {
   362  			if pkDefCausInfo := ts.Source.blockInfo.GetPkDefCausInfo(); pkDefCausInfo != nil {
   363  				isUnsigned = allegrosql.HasUnsignedFlag(pkDefCausInfo.Flag)
   364  			}
   365  		}
   366  		ts.Ranges = ranger.FullIntRange(isUnsigned)
   367  	}
   368  	if err != nil {
   369  		return nil, err
   370  	}
   371  	return ts.stats, nil
   372  }
   373  
   374  // DeriveStats implements LogicalCauset DeriveStats interface.
   375  func (is *LogicalIndexScan) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   376  	is.Source.initStats(nil)
   377  	for i, expr := range is.AccessConds {
   378  		is.AccessConds[i] = memex.PushDownNot(is.ctx, expr)
   379  	}
   380  	is.stats = is.Source.deriveStatsByFilter(is.AccessConds, nil)
   381  	if len(is.AccessConds) == 0 {
   382  		is.Ranges = ranger.FullRange()
   383  	}
   384  	is.IdxDefCauss, is.IdxDefCausLens = memex.IndexInfo2PrefixDefCauss(is.DeferredCausets, selfSchema.DeferredCausets, is.Index)
   385  	is.FullIdxDefCauss, is.FullIdxDefCausLens = memex.IndexInfo2DefCauss(is.DeferredCausets, selfSchema.DeferredCausets, is.Index)
   386  	if !is.Index.Unique && !is.Index.Primary && len(is.Index.DeferredCausets) == len(is.IdxDefCauss) {
   387  		handleDefCaus := is.getPKIsHandleDefCaus(selfSchema)
   388  		if handleDefCaus != nil && !allegrosql.HasUnsignedFlag(handleDefCaus.RetType.Flag) {
   389  			is.IdxDefCauss = append(is.IdxDefCauss, handleDefCaus)
   390  			is.IdxDefCausLens = append(is.IdxDefCausLens, types.UnspecifiedLength)
   391  		}
   392  	}
   393  	return is.stats, nil
   394  }
   395  
   396  // getIndexMergeOrPath generates all possible IndexMergeOrPaths.
   397  func (ds *DataSource) generateIndexMergeOrPaths() {
   398  	usedIndexCount := len(ds.possibleAccessPaths)
   399  	for i, cond := range ds.pushedDownConds {
   400  		sf, ok := cond.(*memex.ScalarFunction)
   401  		if !ok || sf.FuncName.L != ast.LogicOr {
   402  			continue
   403  		}
   404  		var partialPaths = make([]*soliton.AccessPath, 0, usedIndexCount)
   405  		dnfItems := memex.FlattenDNFConditions(sf)
   406  		for _, item := range dnfItems {
   407  			cnfItems := memex.SplitCNFItems(item)
   408  			itemPaths := ds.accessPathsForConds(cnfItems, usedIndexCount)
   409  			if len(itemPaths) == 0 {
   410  				partialPaths = nil
   411  				break
   412  			}
   413  			partialPath := ds.buildIndexMergePartialPath(itemPaths)
   414  			if partialPath == nil {
   415  				partialPaths = nil
   416  				break
   417  			}
   418  			partialPaths = append(partialPaths, partialPath)
   419  		}
   420  		if len(partialPaths) > 1 {
   421  			possiblePath := ds.buildIndexMergeOrPath(partialPaths, i)
   422  			if possiblePath != nil {
   423  				ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath)
   424  			}
   425  		}
   426  	}
   427  }
   428  
   429  // isInIndexMergeHints checks whether current index or primary key is in IndexMerge hints.
   430  func (ds *DataSource) isInIndexMergeHints(name string) bool {
   431  	if len(ds.indexMergeHints) == 0 {
   432  		return true
   433  	}
   434  	for _, hint := range ds.indexMergeHints {
   435  		if hint.indexHint == nil || len(hint.indexHint.IndexNames) == 0 {
   436  			return true
   437  		}
   438  		for _, hintName := range hint.indexHint.IndexNames {
   439  			if name == hintName.String() {
   440  				return true
   441  			}
   442  		}
   443  	}
   444  	return false
   445  }
   446  
   447  // accessPathsForConds generates all possible index paths for conditions.
   448  func (ds *DataSource) accessPathsForConds(conditions []memex.Expression, usedIndexCount int) []*soliton.AccessPath {
   449  	var results = make([]*soliton.AccessPath, 0, usedIndexCount)
   450  	for i := 0; i < usedIndexCount; i++ {
   451  		path := &soliton.AccessPath{}
   452  		if ds.possibleAccessPaths[i].IsBlockPath() {
   453  			if !ds.isInIndexMergeHints("primary") {
   454  				continue
   455  			}
   456  			if ds.blockInfo.IsCommonHandle {
   457  				path.IsCommonHandlePath = true
   458  				path.Index = ds.possibleAccessPaths[i].Index
   459  			} else {
   460  				path.IsIntHandlePath = true
   461  			}
   462  			noIntervalRanges, err := ds.deriveBlockPathStats(path, conditions, true)
   463  			if err != nil {
   464  				logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
   465  				continue
   466  			}
   467  			if len(path.BlockFilters) > 0 || len(path.AccessConds) == 0 {
   468  				// If AccessConds is empty or blockFilter is not empty, we ignore the access path.
   469  				// Now these conditions are too strict.
   470  				// For example, a allegrosql `select * from t where a > 1 or (b < 2 and c > 3)` and causet `t` with indexes
   471  				// on a and b separately. we can generate a `IndexMergePath` with causet filter `a > 1 or (b < 2 and c > 3)`.
   472  				// TODO: solve the above case
   473  				continue
   474  			}
   475  			// If we have point or empty range, just remove other possible paths.
   476  			if noIntervalRanges || len(path.Ranges) == 0 {
   477  				if len(results) == 0 {
   478  					results = append(results, path)
   479  				} else {
   480  					results[0] = path
   481  					results = results[:1]
   482  				}
   483  				break
   484  			}
   485  		} else {
   486  			path.Index = ds.possibleAccessPaths[i].Index
   487  			if !ds.isInIndexMergeHints(path.Index.Name.L) {
   488  				continue
   489  			}
   490  			err := ds.fillIndexPath(path, conditions)
   491  			if err != nil {
   492  				logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err))
   493  				continue
   494  			}
   495  			noIntervalRanges := ds.deriveIndexPathStats(path, conditions, true)
   496  			if len(path.BlockFilters) > 0 || len(path.AccessConds) == 0 {
   497  				// If AccessConds is empty or blockFilter is not empty, we ignore the access path.
   498  				// Now these conditions are too strict.
   499  				// For example, a allegrosql `select * from t where a > 1 or (b < 2 and c > 3)` and causet `t` with indexes
   500  				// on a and b separately. we can generate a `IndexMergePath` with causet filter `a > 1 or (b < 2 and c > 3)`.
   501  				// TODO: solve the above case
   502  				continue
   503  			}
   504  			// If we have empty range, or point range on unique index, just remove other possible paths.
   505  			if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 {
   506  				if len(results) == 0 {
   507  					results = append(results, path)
   508  				} else {
   509  					results[0] = path
   510  					results = results[:1]
   511  				}
   512  				break
   513  			}
   514  		}
   515  		results = append(results, path)
   516  	}
   517  	return results
   518  }
   519  
   520  // buildIndexMergePartialPath chooses the best index path from all possible paths.
   521  // Now we just choose the index with most columns.
   522  // We should improve this strategy, because it is not always better to choose index
   523  // with most columns, e.g, filter is c > 1 and the input indexes are c and c_d_e,
   524  // the former one is enough, and it is less expensive in execution compared with the latter one.
   525  // TODO: improve strategy of the partial path selection
   526  func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*soliton.AccessPath) *soliton.AccessPath {
   527  	if len(indexAccessPaths) == 1 {
   528  		return indexAccessPaths[0]
   529  	}
   530  
   531  	maxDefCaussIndex := 0
   532  	maxDefCauss := len(indexAccessPaths[0].IdxDefCauss)
   533  	for i := 1; i < len(indexAccessPaths); i++ {
   534  		current := len(indexAccessPaths[i].IdxDefCauss)
   535  		if current > maxDefCauss {
   536  			maxDefCaussIndex = i
   537  			maxDefCauss = current
   538  		}
   539  	}
   540  	return indexAccessPaths[maxDefCaussIndex]
   541  }
   542  
   543  // buildIndexMergeOrPath generates one possible IndexMergePath.
   544  func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*soliton.AccessPath, current int) *soliton.AccessPath {
   545  	indexMergePath := &soliton.AccessPath{PartialIndexPaths: partialPaths}
   546  	indexMergePath.BlockFilters = append(indexMergePath.BlockFilters, ds.pushedDownConds[:current]...)
   547  	indexMergePath.BlockFilters = append(indexMergePath.BlockFilters, ds.pushedDownConds[current+1:]...)
   548  	return indexMergePath
   549  }
   550  
   551  // DeriveStats implement LogicalCauset DeriveStats interface.
   552  func (p *LogicalSelection) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   553  	if p.stats != nil {
   554  		return p.stats, nil
   555  	}
   556  	p.stats = childStats[0].Scale(SelectionFactor)
   557  	p.stats.GroupNDVs = nil
   558  	return p.stats, nil
   559  }
   560  
   561  // DeriveStats implement LogicalCauset DeriveStats interface.
   562  func (p *LogicalUnionAll) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   563  	if p.stats != nil {
   564  		return p.stats, nil
   565  	}
   566  	p.stats = &property.StatsInfo{
   567  		Cardinality: make(map[int64]float64, selfSchema.Len()),
   568  	}
   569  	for _, childProfile := range childStats {
   570  		p.stats.RowCount += childProfile.RowCount
   571  		for _, col := range selfSchema.DeferredCausets {
   572  			p.stats.Cardinality[col.UniqueID] += childProfile.Cardinality[col.UniqueID]
   573  		}
   574  	}
   575  	return p.stats, nil
   576  }
   577  
   578  func deriveLimitStats(childProfile *property.StatsInfo, limitCount float64) *property.StatsInfo {
   579  	stats := &property.StatsInfo{
   580  		RowCount:    math.Min(limitCount, childProfile.RowCount),
   581  		Cardinality: make(map[int64]float64, len(childProfile.Cardinality)),
   582  	}
   583  	for id, c := range childProfile.Cardinality {
   584  		stats.Cardinality[id] = math.Min(c, stats.RowCount)
   585  	}
   586  	return stats
   587  }
   588  
   589  // DeriveStats implement LogicalCauset DeriveStats interface.
   590  func (p *LogicalLimit) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   591  	if p.stats != nil {
   592  		return p.stats, nil
   593  	}
   594  	p.stats = deriveLimitStats(childStats[0], float64(p.Count))
   595  	return p.stats, nil
   596  }
   597  
   598  // DeriveStats implement LogicalCauset DeriveStats interface.
   599  func (lt *LogicalTopN) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   600  	if lt.stats != nil {
   601  		return lt.stats, nil
   602  	}
   603  	lt.stats = deriveLimitStats(childStats[0], float64(lt.Count))
   604  	return lt.stats, nil
   605  }
   606  
   607  // getCardinality will return the Cardinality of a couple of columns. We simply return the max one, because we cannot know
   608  // the Cardinality for multi-dimension attributes properly. This is a simple and naive scheme of Cardinality estimation.
   609  func getCardinality(defcaus []*memex.DeferredCauset, schemaReplicant *memex.Schema, profile *property.StatsInfo) float64 {
   610  	cardinality := 1.0
   611  	indices := schemaReplicant.DeferredCausetsIndices(defcaus)
   612  	if indices == nil {
   613  		logutil.BgLogger().Error("column not found in schemaReplicant", zap.Any("columns", defcaus), zap.String("schemaReplicant", schemaReplicant.String()))
   614  		return cardinality
   615  	}
   616  	for _, idx := range indices {
   617  		// It is a very elementary estimation.
   618  		col := schemaReplicant.DeferredCausets[idx]
   619  		cardinality = math.Max(cardinality, profile.Cardinality[col.UniqueID])
   620  	}
   621  	return cardinality
   622  }
   623  
   624  func (p *LogicalProjection) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childProfile *property.StatsInfo, selfSchema *memex.Schema) []property.GroupNDV {
   625  	if len(colGroups) == 0 || len(childProfile.GroupNDVs) == 0 {
   626  		return nil
   627  	}
   628  	exprDefCaus2ProjDefCaus := make(map[int64]int64)
   629  	for i, expr := range p.Exprs {
   630  		exprDefCaus, ok := expr.(*memex.DeferredCauset)
   631  		if !ok {
   632  			continue
   633  		}
   634  		exprDefCaus2ProjDefCaus[exprDefCaus.UniqueID] = selfSchema.DeferredCausets[i].UniqueID
   635  	}
   636  	ndvs := make([]property.GroupNDV, 0, len(childProfile.GroupNDVs))
   637  	for _, childGroupNDV := range childProfile.GroupNDVs {
   638  		projDefCauss := make([]int64, len(childGroupNDV.DefCauss))
   639  		for i, col := range childGroupNDV.DefCauss {
   640  			projDefCaus, ok := exprDefCaus2ProjDefCaus[col]
   641  			if !ok {
   642  				projDefCauss = nil
   643  				break
   644  			}
   645  			projDefCauss[i] = projDefCaus
   646  		}
   647  		if projDefCauss == nil {
   648  			continue
   649  		}
   650  		sort.Slice(projDefCauss, func(i, j int) bool {
   651  			return projDefCauss[i] < projDefCauss[j]
   652  		})
   653  		groupNDV := property.GroupNDV{
   654  			DefCauss: projDefCauss,
   655  			NDV:      childGroupNDV.NDV,
   656  		}
   657  		ndvs = append(ndvs, groupNDV)
   658  	}
   659  	return ndvs
   660  }
   661  
   662  // DeriveStats implement LogicalCauset DeriveStats interface.
   663  func (p *LogicalProjection) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   664  	childProfile := childStats[0]
   665  	if p.stats != nil {
   666  		// Reload GroupNDVs since colGroups may have changed.
   667  		p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childProfile, selfSchema)
   668  		return p.stats, nil
   669  	}
   670  	p.stats = &property.StatsInfo{
   671  		RowCount:    childProfile.RowCount,
   672  		Cardinality: make(map[int64]float64, len(p.Exprs)),
   673  	}
   674  	for i, expr := range p.Exprs {
   675  		defcaus := memex.ExtractDeferredCausets(expr)
   676  		p.stats.Cardinality[selfSchema.DeferredCausets[i].UniqueID] = getCardinality(defcaus, childSchema[0], childProfile)
   677  	}
   678  	p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childProfile, selfSchema)
   679  	return p.stats, nil
   680  }
   681  
   682  // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface.
   683  func (p *LogicalProjection) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset {
   684  	if len(colGroups) == 0 {
   685  		return nil
   686  	}
   687  	extDefCausGroups, _ := p.Schema().ExtractDefCausGroups(colGroups)
   688  	if len(extDefCausGroups) == 0 {
   689  		return nil
   690  	}
   691  	extracted := make([][]*memex.DeferredCauset, 0, len(extDefCausGroups))
   692  	for _, defcaus := range extDefCausGroups {
   693  		exprs := make([]*memex.DeferredCauset, len(defcaus))
   694  		allDefCauss := true
   695  		for i, offset := range defcaus {
   696  			col, ok := p.Exprs[offset].(*memex.DeferredCauset)
   697  			// TODO: for functional dependent projections like `col1 + 1` -> `col2`, we can maintain GroupNDVs actually.
   698  			if !ok {
   699  				allDefCauss = false
   700  				break
   701  			}
   702  			exprs[i] = col
   703  		}
   704  		if allDefCauss {
   705  			extracted = append(extracted, memex.SortDeferredCausets(exprs))
   706  		}
   707  	}
   708  	return extracted
   709  }
   710  
   711  func (la *LogicalAggregation) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childProfile *property.StatsInfo, selfSchema *memex.Schema, gbyDefCauss []*memex.DeferredCauset) []property.GroupNDV {
   712  	if len(colGroups) == 0 || len(childProfile.GroupNDVs) == 0 {
   713  		return nil
   714  	}
   715  	// Check if the child profile provides GroupNDV for the GROUP BY columns.
   716  	// Note that gbyDefCauss may not be the exact GROUP BY columns, e.g, GROUP BY a+b,
   717  	// but we have no other approaches for the cardinality estimation of these cases
   718  	// except for using the independent assumption, unless we can use stats of memex index.
   719  	gbyDefCauss = memex.SortDeferredCausets(gbyDefCauss)
   720  	for _, groupNDV := range childProfile.GroupNDVs {
   721  		if len(gbyDefCauss) != len(groupNDV.DefCauss) {
   722  			continue
   723  		}
   724  		match := true
   725  		for i, col := range groupNDV.DefCauss {
   726  			if col != gbyDefCauss[i].UniqueID {
   727  				match = false
   728  				break
   729  			}
   730  		}
   731  		if match {
   732  			return []property.GroupNDV{groupNDV}
   733  		}
   734  	}
   735  	return nil
   736  }
   737  
   738  // DeriveStats implement LogicalCauset DeriveStats interface.
   739  func (la *LogicalAggregation) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   740  	childProfile := childStats[0]
   741  	gbyDefCauss := make([]*memex.DeferredCauset, 0, len(la.GroupByItems))
   742  	for _, gbyExpr := range la.GroupByItems {
   743  		defcaus := memex.ExtractDeferredCausets(gbyExpr)
   744  		gbyDefCauss = append(gbyDefCauss, defcaus...)
   745  	}
   746  	if la.stats != nil {
   747  		// Reload GroupNDVs since colGroups may have changed.
   748  		la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childProfile, selfSchema, gbyDefCauss)
   749  		return la.stats, nil
   750  	}
   751  	cardinality := getCardinality(gbyDefCauss, childSchema[0], childProfile)
   752  	la.stats = &property.StatsInfo{
   753  		RowCount:    cardinality,
   754  		Cardinality: make(map[int64]float64, selfSchema.Len()),
   755  	}
   756  	// We cannot estimate the Cardinality for every output, so we use a conservative strategy.
   757  	for _, col := range selfSchema.DeferredCausets {
   758  		la.stats.Cardinality[col.UniqueID] = cardinality
   759  	}
   760  	la.inputCount = childProfile.RowCount
   761  	la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childProfile, selfSchema, gbyDefCauss)
   762  	return la.stats, nil
   763  }
   764  
   765  // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface.
   766  func (la *LogicalAggregation) ExtractDefCausGroups(_ [][]*memex.DeferredCauset) [][]*memex.DeferredCauset {
   767  	// Parent colGroups would be dicarded, because aggregation would make NDV of colGroups
   768  	// which does not match GroupByItems invalid.
   769  	// Note that gbyDefCauss may not be the exact GROUP BY columns, e.g, GROUP BY a+b,
   770  	// but we have no other approaches for the cardinality estimation of these cases
   771  	// except for using the independent assumption, unless we can use stats of memex index.
   772  	gbyDefCauss := make([]*memex.DeferredCauset, 0, len(la.GroupByItems))
   773  	for _, gbyExpr := range la.GroupByItems {
   774  		defcaus := memex.ExtractDeferredCausets(gbyExpr)
   775  		gbyDefCauss = append(gbyDefCauss, defcaus...)
   776  	}
   777  	if len(gbyDefCauss) > 0 {
   778  		return [][]*memex.DeferredCauset{memex.SortDeferredCausets(gbyDefCauss)}
   779  	}
   780  	return nil
   781  }
   782  
   783  func (p *LogicalJoin) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childStats []*property.StatsInfo) []property.GroupNDV {
   784  	outerIdx := int(-1)
   785  	if p.JoinType == LeftOuterJoin || p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
   786  		outerIdx = 0
   787  	} else if p.JoinType == RightOuterJoin {
   788  		outerIdx = 1
   789  	}
   790  	if outerIdx >= 0 && len(colGroups) > 0 {
   791  		return childStats[outerIdx].GroupNDVs
   792  	}
   793  	return nil
   794  }
   795  
   796  // DeriveStats implement LogicalCauset DeriveStats interface.
   797  // If the type of join is SemiJoin, the selectivity of it will be same as selection's.
   798  // If the type of join is LeftOuterSemiJoin, it will not add or remove any event. The last column is a boolean value, whose Cardinality should be two.
   799  // If the type of join is inner/outer join, the output of join(s, t) should be N(s) * N(t) / (V(s.key) * V(t.key)) * Min(s.key, t.key).
   800  // N(s) stands for the number of rows in relation s. V(s.key) means the Cardinality of join key in s.
   801  // This is a quite simple strategy: We assume every bucket of relation which will participate join has the same number of rows, and apply cross join for
   802  // every matched bucket.
   803  func (p *LogicalJoin) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   804  	if p.stats != nil {
   805  		// Reload GroupNDVs since colGroups may have changed.
   806  		p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats)
   807  		return p.stats, nil
   808  	}
   809  	leftProfile, rightProfile := childStats[0], childStats[1]
   810  	leftJoinKeys, rightJoinKeys, _, _ := p.GetJoinKeys()
   811  	helper := &fullJoinRowCountHelper{
   812  		cartesian:     0 == len(p.EqualConditions),
   813  		leftProfile:   leftProfile,
   814  		rightProfile:  rightProfile,
   815  		leftJoinKeys:  leftJoinKeys,
   816  		rightJoinKeys: rightJoinKeys,
   817  		leftSchema:    childSchema[0],
   818  		rightSchema:   childSchema[1],
   819  	}
   820  	p.equalCondOutCnt = helper.estimate()
   821  	if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin {
   822  		p.stats = &property.StatsInfo{
   823  			RowCount:    leftProfile.RowCount * SelectionFactor,
   824  			Cardinality: make(map[int64]float64, len(leftProfile.Cardinality)),
   825  		}
   826  		for id, c := range leftProfile.Cardinality {
   827  			p.stats.Cardinality[id] = c * SelectionFactor
   828  		}
   829  		return p.stats, nil
   830  	}
   831  	if p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
   832  		p.stats = &property.StatsInfo{
   833  			RowCount:    leftProfile.RowCount,
   834  			Cardinality: make(map[int64]float64, selfSchema.Len()),
   835  		}
   836  		for id, c := range leftProfile.Cardinality {
   837  			p.stats.Cardinality[id] = c
   838  		}
   839  		p.stats.Cardinality[selfSchema.DeferredCausets[selfSchema.Len()-1].UniqueID] = 2.0
   840  		p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats)
   841  		return p.stats, nil
   842  	}
   843  	count := p.equalCondOutCnt
   844  	if p.JoinType == LeftOuterJoin {
   845  		count = math.Max(count, leftProfile.RowCount)
   846  	} else if p.JoinType == RightOuterJoin {
   847  		count = math.Max(count, rightProfile.RowCount)
   848  	}
   849  	cardinality := make(map[int64]float64, selfSchema.Len())
   850  	for id, c := range leftProfile.Cardinality {
   851  		cardinality[id] = math.Min(c, count)
   852  	}
   853  	for id, c := range rightProfile.Cardinality {
   854  		cardinality[id] = math.Min(c, count)
   855  	}
   856  	p.stats = &property.StatsInfo{
   857  		RowCount:    count,
   858  		Cardinality: cardinality,
   859  	}
   860  	p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats)
   861  	return p.stats, nil
   862  }
   863  
   864  // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface.
   865  func (p *LogicalJoin) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset {
   866  	leftJoinKeys, rightJoinKeys, _, _ := p.GetJoinKeys()
   867  	extracted := make([][]*memex.DeferredCauset, 0, 2+len(colGroups))
   868  	if len(leftJoinKeys) > 1 && (p.JoinType == InnerJoin || p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin) {
   869  		extracted = append(extracted, memex.SortDeferredCausets(leftJoinKeys), memex.SortDeferredCausets(rightJoinKeys))
   870  	}
   871  	var outerSchema *memex.Schema
   872  	if p.JoinType == LeftOuterJoin || p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
   873  		outerSchema = p.Children()[0].Schema()
   874  	} else if p.JoinType == RightOuterJoin {
   875  		outerSchema = p.Children()[1].Schema()
   876  	}
   877  	if len(colGroups) == 0 || outerSchema == nil {
   878  		return extracted
   879  	}
   880  	_, offsets := outerSchema.ExtractDefCausGroups(colGroups)
   881  	if len(offsets) == 0 {
   882  		return extracted
   883  	}
   884  	for _, offset := range offsets {
   885  		extracted = append(extracted, colGroups[offset])
   886  	}
   887  	return extracted
   888  }
   889  
   890  type fullJoinRowCountHelper struct {
   891  	cartesian     bool
   892  	leftProfile   *property.StatsInfo
   893  	rightProfile  *property.StatsInfo
   894  	leftJoinKeys  []*memex.DeferredCauset
   895  	rightJoinKeys []*memex.DeferredCauset
   896  	leftSchema    *memex.Schema
   897  	rightSchema   *memex.Schema
   898  }
   899  
   900  func (h *fullJoinRowCountHelper) estimate() float64 {
   901  	if h.cartesian {
   902  		return h.leftProfile.RowCount * h.rightProfile.RowCount
   903  	}
   904  	leftKeyCardinality := getCardinality(h.leftJoinKeys, h.leftSchema, h.leftProfile)
   905  	rightKeyCardinality := getCardinality(h.rightJoinKeys, h.rightSchema, h.rightProfile)
   906  	count := h.leftProfile.RowCount * h.rightProfile.RowCount / math.Max(leftKeyCardinality, rightKeyCardinality)
   907  	return count
   908  }
   909  
   910  func (la *LogicalApply) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childStats []*property.StatsInfo) []property.GroupNDV {
   911  	if len(colGroups) > 0 && (la.JoinType == LeftOuterSemiJoin || la.JoinType == AntiLeftOuterSemiJoin || la.JoinType == LeftOuterJoin) {
   912  		return childStats[0].GroupNDVs
   913  	}
   914  	return nil
   915  }
   916  
   917  // DeriveStats implement LogicalCauset DeriveStats interface.
   918  func (la *LogicalApply) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   919  	if la.stats != nil {
   920  		// Reload GroupNDVs since colGroups may have changed.
   921  		la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childStats)
   922  		return la.stats, nil
   923  	}
   924  	leftProfile := childStats[0]
   925  	la.stats = &property.StatsInfo{
   926  		RowCount:    leftProfile.RowCount,
   927  		Cardinality: make(map[int64]float64, selfSchema.Len()),
   928  	}
   929  	for id, c := range leftProfile.Cardinality {
   930  		la.stats.Cardinality[id] = c
   931  	}
   932  	if la.JoinType == LeftOuterSemiJoin || la.JoinType == AntiLeftOuterSemiJoin {
   933  		la.stats.Cardinality[selfSchema.DeferredCausets[selfSchema.Len()-1].UniqueID] = 2.0
   934  	} else {
   935  		for i := childSchema[0].Len(); i < selfSchema.Len(); i++ {
   936  			la.stats.Cardinality[selfSchema.DeferredCausets[i].UniqueID] = leftProfile.RowCount
   937  		}
   938  	}
   939  	la.stats.GroupNDVs = la.getGroupNDVs(colGroups, childStats)
   940  	return la.stats, nil
   941  }
   942  
   943  // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface.
   944  func (la *LogicalApply) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset {
   945  	var outerSchema *memex.Schema
   946  	// Apply doesn't have RightOuterJoin.
   947  	if la.JoinType == LeftOuterJoin || la.JoinType == LeftOuterSemiJoin || la.JoinType == AntiLeftOuterSemiJoin {
   948  		outerSchema = la.Children()[0].Schema()
   949  	}
   950  	if len(colGroups) == 0 || outerSchema == nil {
   951  		return nil
   952  	}
   953  	_, offsets := outerSchema.ExtractDefCausGroups(colGroups)
   954  	if len(offsets) == 0 {
   955  		return nil
   956  	}
   957  	extracted := make([][]*memex.DeferredCauset, len(offsets))
   958  	for i, offset := range offsets {
   959  		extracted[i] = colGroups[offset]
   960  	}
   961  	return extracted
   962  }
   963  
   964  // Exists and MaxOneRow produce at most one event, so we set the RowCount of stats one.
   965  func getSingletonStats(schemaReplicant *memex.Schema) *property.StatsInfo {
   966  	ret := &property.StatsInfo{
   967  		RowCount:    1.0,
   968  		Cardinality: make(map[int64]float64, schemaReplicant.Len()),
   969  	}
   970  	for _, col := range schemaReplicant.DeferredCausets {
   971  		ret.Cardinality[col.UniqueID] = 1
   972  	}
   973  	return ret
   974  }
   975  
   976  // DeriveStats implement LogicalCauset DeriveStats interface.
   977  func (p *LogicalMaxOneRow) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, _ [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   978  	if p.stats != nil {
   979  		return p.stats, nil
   980  	}
   981  	p.stats = getSingletonStats(selfSchema)
   982  	return p.stats, nil
   983  }
   984  
   985  func (p *LogicalWindow) getGroupNDVs(colGroups [][]*memex.DeferredCauset, childStats []*property.StatsInfo) []property.GroupNDV {
   986  	if len(colGroups) > 0 {
   987  		return childStats[0].GroupNDVs
   988  	}
   989  	return nil
   990  }
   991  
   992  // DeriveStats implement LogicalCauset DeriveStats interface.
   993  func (p *LogicalWindow) DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) {
   994  	if p.stats != nil {
   995  		// Reload GroupNDVs since colGroups may have changed.
   996  		p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats)
   997  		return p.stats, nil
   998  	}
   999  	childProfile := childStats[0]
  1000  	p.stats = &property.StatsInfo{
  1001  		RowCount:    childProfile.RowCount,
  1002  		Cardinality: make(map[int64]float64, selfSchema.Len()),
  1003  	}
  1004  	childLen := selfSchema.Len() - len(p.WindowFuncDescs)
  1005  	for i := 0; i < childLen; i++ {
  1006  		id := selfSchema.DeferredCausets[i].UniqueID
  1007  		p.stats.Cardinality[id] = childProfile.Cardinality[id]
  1008  	}
  1009  	for i := childLen; i < selfSchema.Len(); i++ {
  1010  		p.stats.Cardinality[selfSchema.DeferredCausets[i].UniqueID] = childProfile.RowCount
  1011  	}
  1012  	p.stats.GroupNDVs = p.getGroupNDVs(colGroups, childStats)
  1013  	return p.stats, nil
  1014  }
  1015  
  1016  // ExtractDefCausGroups implements LogicalCauset ExtractDefCausGroups interface.
  1017  func (p *LogicalWindow) ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset {
  1018  	if len(colGroups) == 0 {
  1019  		return nil
  1020  	}
  1021  	childSchema := p.Children()[0].Schema()
  1022  	_, offsets := childSchema.ExtractDefCausGroups(colGroups)
  1023  	if len(offsets) == 0 {
  1024  		return nil
  1025  	}
  1026  	extracted := make([][]*memex.DeferredCauset, len(offsets))
  1027  	for i, offset := range offsets {
  1028  		extracted[i] = colGroups[offset]
  1029  	}
  1030  	return extracted
  1031  }