github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/allegrosql/optimize.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package cascades
    15  
    16  import (
    17  	"container/list"
    18  	"math"
    19  
    20  	causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded"
    21  	"github.com/whtcorpsinc/milevadb/causet/memo"
    22  	"github.com/whtcorpsinc/milevadb/causet/property"
    23  	"github.com/whtcorpsinc/milevadb/memex"
    24  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    25  )
    26  
    27  // DefaultOptimizer is the optimizer which contains all of the default
    28  // transformation and implementation rules.
    29  var DefaultOptimizer = NewOptimizer()
    30  
    31  // Optimizer is the struct for cascades optimizer.
    32  type Optimizer struct {
    33  	transformationMemruleBatches []TransformationMemruleBatch
    34  	implementationMemruleMap     map[memo.Operand][]ImplementationMemrule
    35  }
    36  
    37  // NewOptimizer returns a cascades optimizer with default transformation
    38  // rules and implementation rules.
    39  func NewOptimizer() *Optimizer {
    40  	return &Optimizer{
    41  		transformationMemruleBatches: DefaultMemruleBatches,
    42  		implementationMemruleMap:     defaultImplementationMap,
    43  	}
    44  }
    45  
    46  // ResetTransformationMemrules resets the transformationMemruleBatches of the optimizer, and returns the optimizer.
    47  func (opt *Optimizer) ResetTransformationMemrules(ruleBatches ...TransformationMemruleBatch) *Optimizer {
    48  	opt.transformationMemruleBatches = ruleBatches
    49  	return opt
    50  }
    51  
    52  // ResetImplementationMemrules resets the implementationMemruleMap of the optimizer, and returns the optimizer.
    53  func (opt *Optimizer) ResetImplementationMemrules(rules map[memo.Operand][]ImplementationMemrule) *Optimizer {
    54  	opt.implementationMemruleMap = rules
    55  	return opt
    56  }
    57  
    58  // GetImplementationMemrules gets all the candidate implementation rules of the optimizer
    59  // for the logical plan node.
    60  func (opt *Optimizer) GetImplementationMemrules(node causetembedded.LogicalCauset) []ImplementationMemrule {
    61  	return opt.implementationMemruleMap[memo.GetOperand(node)]
    62  }
    63  
    64  // FindBestCauset is the optimization entrance of the cascades causet. The
    65  // optimization is composed of 3 phases: preprocessing, exploration and implementation.
    66  //
    67  //------------------------------------------------------------------------------
    68  // Phase 1: Preprocessing
    69  //------------------------------------------------------------------------------
    70  //
    71  // The target of this phase is to preprocess the plan tree by some heuristic
    72  // rules which should always be beneficial, for example DeferredCauset Pruning.
    73  //
    74  //------------------------------------------------------------------------------
    75  // Phase 2: Exploration
    76  //------------------------------------------------------------------------------
    77  //
    78  // The target of this phase is to explore all the logically equivalent
    79  // memexs by exploring all the equivalent group memexs of each group.
    80  //
    81  // At the very beginning, there is only one group memex in a Group. After
    82  // applying some transformation rules on certain memexs of the Group, all
    83  // the equivalent memexs are found and stored in the Group. This procedure
    84  // can be regarded as searching for a weak connected component in a directed
    85  // graph, where nodes are memexs and directed edges are the transformation
    86  // rules.
    87  //
    88  //------------------------------------------------------------------------------
    89  // Phase 3: Implementation
    90  //------------------------------------------------------------------------------
    91  //
    92  // The target of this phase is to search the best physical plan for a Group
    93  // which satisfies a certain required physical property.
    94  //
    95  // In this phase, we need to enumerate all the applicable implementation rules
    96  // for each memex in each group under the required physical property. A
    97  // memo structure is used for a group to reduce the repeated search on the same
    98  // required physical property.
    99  func (opt *Optimizer) FindBestCauset(sctx stochastikctx.Context, logical causetembedded.LogicalCauset) (p causetembedded.PhysicalCauset, cost float64, err error) {
   100  	logical, err = opt.onPhasePreprocessing(sctx, logical)
   101  	if err != nil {
   102  		return nil, 0, err
   103  	}
   104  	rootGroup := memo.Convert2Group(logical)
   105  	err = opt.onPhaseExploration(sctx, rootGroup)
   106  	if err != nil {
   107  		return nil, 0, err
   108  	}
   109  	p, cost, err = opt.onPhaseImplementation(sctx, rootGroup)
   110  	if err != nil {
   111  		return nil, 0, err
   112  	}
   113  	err = p.ResolveIndices()
   114  	return p, cost, err
   115  }
   116  
   117  func (opt *Optimizer) onPhasePreprocessing(sctx stochastikctx.Context, plan causetembedded.LogicalCauset) (causetembedded.LogicalCauset, error) {
   118  	err := plan.PruneDeferredCausets(plan.Schema().DeferredCausets)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  	return plan, nil
   123  }
   124  
   125  func (opt *Optimizer) onPhaseExploration(sctx stochastikctx.Context, g *memo.Group) error {
   126  	for round, ruleBatch := range opt.transformationMemruleBatches {
   127  		for !g.Explored(round) {
   128  			err := opt.exploreGroup(g, round, ruleBatch)
   129  			if err != nil {
   130  				return err
   131  			}
   132  		}
   133  	}
   134  	return nil
   135  }
   136  
   137  func (opt *Optimizer) exploreGroup(g *memo.Group, round int, ruleBatch TransformationMemruleBatch) error {
   138  	if g.Explored(round) {
   139  		return nil
   140  	}
   141  	g.SetExplored(round)
   142  
   143  	for elem := g.Equivalents.Front(); elem != nil; elem = elem.Next() {
   144  		curExpr := elem.Value.(*memo.GroupExpr)
   145  		if curExpr.Explored(round) {
   146  			continue
   147  		}
   148  		curExpr.SetExplored(round)
   149  
   150  		// Explore child groups firstly.
   151  		for _, childGroup := range curExpr.Children {
   152  			for !childGroup.Explored(round) {
   153  				if err := opt.exploreGroup(childGroup, round, ruleBatch); err != nil {
   154  					return err
   155  				}
   156  			}
   157  		}
   158  
   159  		eraseCur, err := opt.findMoreEquiv(g, elem, round, ruleBatch)
   160  		if err != nil {
   161  			return err
   162  		}
   163  		if eraseCur {
   164  			g.Delete(curExpr)
   165  		}
   166  	}
   167  	return nil
   168  }
   169  
   170  // findMoreEquiv finds and applies the matched transformation rules.
   171  func (opt *Optimizer) findMoreEquiv(g *memo.Group, elem *list.Element, round int, ruleBatch TransformationMemruleBatch) (eraseCur bool, err error) {
   172  	expr := elem.Value.(*memo.GroupExpr)
   173  	operand := memo.GetOperand(expr.ExprNode)
   174  	for _, rule := range ruleBatch[operand] {
   175  		pattern := rule.GetPattern()
   176  		if !pattern.Operand.Match(operand) {
   177  			continue
   178  		}
   179  		// Create a binding of the current Group memex and the pattern of
   180  		// the transformation rule to enumerate all the possible memexs.
   181  		iter := memo.NewExprIterFromGroupElem(elem, pattern)
   182  		for ; iter != nil && iter.Matched(); iter.Next() {
   183  			if !rule.Match(iter) {
   184  				continue
   185  			}
   186  
   187  			newExprs, eraseOld, eraseAll, err := rule.OnTransform(iter)
   188  			if err != nil {
   189  				return false, err
   190  			}
   191  
   192  			if eraseAll {
   193  				g.DeleteAll()
   194  				for _, e := range newExprs {
   195  					g.Insert(e)
   196  				}
   197  				// If we delete all of the other GroupExprs, we can break the search.
   198  				g.SetExplored(round)
   199  				return false, nil
   200  			}
   201  
   202  			eraseCur = eraseCur || eraseOld
   203  			for _, e := range newExprs {
   204  				if !g.Insert(e) {
   205  					continue
   206  				}
   207  				// If the new Group memex is successfully inserted into the
   208  				// current Group, mark the Group as unexplored to enable the exploration
   209  				// on the new Group memexs.
   210  				g.SetUnexplored(round)
   211  			}
   212  		}
   213  	}
   214  	return eraseCur, nil
   215  }
   216  
   217  // fillGroupStats computes Stats property for each Group recursively.
   218  func (opt *Optimizer) fillGroupStats(g *memo.Group) (err error) {
   219  	if g.Prop.Stats != nil {
   220  		return nil
   221  	}
   222  	// All GroupExpr in a Group should share same LogicalProperty, so just use
   223  	// first one to compute Stats property.
   224  	elem := g.Equivalents.Front()
   225  	expr := elem.Value.(*memo.GroupExpr)
   226  	childStats := make([]*property.StatsInfo, len(expr.Children))
   227  	childSchema := make([]*memex.Schema, len(expr.Children))
   228  	for i, childGroup := range expr.Children {
   229  		err = opt.fillGroupStats(childGroup)
   230  		if err != nil {
   231  			return err
   232  		}
   233  		childStats[i] = childGroup.Prop.Stats
   234  		childSchema[i] = childGroup.Prop.Schema
   235  	}
   236  	planNode := expr.ExprNode
   237  	g.Prop.Stats, err = planNode.DeriveStats(childStats, g.Prop.Schema, childSchema, nil)
   238  	return err
   239  }
   240  
   241  // onPhaseImplementation starts implementation physical operators from given root Group.
   242  func (opt *Optimizer) onPhaseImplementation(sctx stochastikctx.Context, g *memo.Group) (causetembedded.PhysicalCauset, float64, error) {
   243  	prop := &property.PhysicalProperty{
   244  		ExpectedCnt: math.MaxFloat64,
   245  	}
   246  	preparePossibleProperties(g, make(map[*memo.Group][][]*memex.DeferredCauset))
   247  	// TODO replace MaxFloat64 costLimit by variable from sctx, or other sources.
   248  	impl, err := opt.implGroup(g, prop, math.MaxFloat64)
   249  	if err != nil {
   250  		return nil, 0, err
   251  	}
   252  	if impl == nil {
   253  		return nil, 0, causetembedded.ErrInternal.GenWithStackByArgs("Can't find a proper physical plan for this query")
   254  	}
   255  	return impl.GetCauset(), impl.GetCost(), nil
   256  }
   257  
   258  // implGroup finds the best Implementation which satisfies the required
   259  // physical property for a Group. The best Implementation should have the
   260  // lowest cost among all the applicable Implementations.
   261  //
   262  // g:			the Group to be implemented.
   263  // reqPhysProp: the required physical property.
   264  // costLimit:   the maximum cost of all the Implementations.
   265  func (opt *Optimizer) implGroup(g *memo.Group, reqPhysProp *property.PhysicalProperty, costLimit float64) (memo.Implementation, error) {
   266  	groupImpl := g.GetImpl(reqPhysProp)
   267  	if groupImpl != nil {
   268  		if groupImpl.GetCost() <= costLimit {
   269  			return groupImpl, nil
   270  		}
   271  		return nil, nil
   272  	}
   273  	// Handle implementation rules for each equivalent GroupExpr.
   274  	var childImpls []memo.Implementation
   275  	err := opt.fillGroupStats(g)
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	outCount := math.Min(g.Prop.Stats.RowCount, reqPhysProp.ExpectedCnt)
   280  	for elem := g.Equivalents.Front(); elem != nil; elem = elem.Next() {
   281  		curExpr := elem.Value.(*memo.GroupExpr)
   282  		impls, err := opt.implGroupExpr(curExpr, reqPhysProp)
   283  		if err != nil {
   284  			return nil, err
   285  		}
   286  		for _, impl := range impls {
   287  			childImpls = childImpls[:0]
   288  			for i, childGroup := range curExpr.Children {
   289  				childImpl, err := opt.implGroup(childGroup, impl.GetCauset().GetChildReqProps(i), impl.GetCostLimit(costLimit, childImpls...))
   290  				if err != nil {
   291  					return nil, err
   292  				}
   293  				if childImpl == nil {
   294  					impl.SetCost(math.MaxFloat64)
   295  					break
   296  				}
   297  				childImpls = append(childImpls, childImpl)
   298  			}
   299  			if impl.GetCost() == math.MaxFloat64 {
   300  				continue
   301  			}
   302  			implCost := impl.CalcCost(outCount, childImpls...)
   303  			if implCost > costLimit {
   304  				continue
   305  			}
   306  			if groupImpl == nil || groupImpl.GetCost() > implCost {
   307  				groupImpl = impl.AttachChildren(childImpls...)
   308  				costLimit = implCost
   309  			}
   310  		}
   311  	}
   312  	// Handle enforcer rules for required physical property.
   313  	for _, rule := range GetEnforcerMemrules(g, reqPhysProp) {
   314  		newReqPhysProp := rule.NewProperty(reqPhysProp)
   315  		enforceCost := rule.GetEnforceCost(g)
   316  		childImpl, err := opt.implGroup(g, newReqPhysProp, costLimit-enforceCost)
   317  		if err != nil {
   318  			return nil, err
   319  		}
   320  		if childImpl == nil {
   321  			continue
   322  		}
   323  		impl := rule.OnEnforce(reqPhysProp, childImpl)
   324  		implCost := enforceCost + childImpl.GetCost()
   325  		impl.SetCost(implCost)
   326  		if groupImpl == nil || groupImpl.GetCost() > implCost {
   327  			groupImpl = impl
   328  			costLimit = implCost
   329  		}
   330  	}
   331  	if groupImpl == nil || groupImpl.GetCost() == math.MaxFloat64 {
   332  		return nil, nil
   333  	}
   334  	g.InsertImpl(reqPhysProp, groupImpl)
   335  	return groupImpl, nil
   336  }
   337  
   338  func (opt *Optimizer) implGroupExpr(cur *memo.GroupExpr, reqPhysProp *property.PhysicalProperty) (impls []memo.Implementation, err error) {
   339  	for _, rule := range opt.GetImplementationMemrules(cur.ExprNode) {
   340  		if !rule.Match(cur, reqPhysProp) {
   341  			continue
   342  		}
   343  		curImpls, err := rule.OnImplement(cur, reqPhysProp)
   344  		if err != nil {
   345  			return nil, err
   346  		}
   347  		impls = append(impls, curImpls...)
   348  	}
   349  	return impls, nil
   350  }
   351  
   352  // preparePossibleProperties recursively calls LogicalCauset PreparePossibleProperties
   353  // interface. It will fulfill the the possible properties fields of LogicalAggregation
   354  // and LogicalJoin.
   355  func preparePossibleProperties(g *memo.Group, propertyMap map[*memo.Group][][]*memex.DeferredCauset) [][]*memex.DeferredCauset {
   356  	if prop, ok := propertyMap[g]; ok {
   357  		return prop
   358  	}
   359  	groupPropertyMap := make(map[string][]*memex.DeferredCauset)
   360  	for elem := g.Equivalents.Front(); elem != nil; elem = elem.Next() {
   361  		expr := elem.Value.(*memo.GroupExpr)
   362  		childrenProperties := make([][][]*memex.DeferredCauset, len(expr.Children))
   363  		for i, child := range expr.Children {
   364  			childrenProperties[i] = preparePossibleProperties(child, propertyMap)
   365  		}
   366  		exprProperties := expr.ExprNode.PreparePossibleProperties(expr.Schema(), childrenProperties...)
   367  		for _, newPropDefCauss := range exprProperties {
   368  			// Check if the prop has already been in `groupPropertyMap`.
   369  			newProp := property.PhysicalProperty{Items: property.ItemsFromDefCauss(newPropDefCauss, true)}
   370  			key := newProp.HashCode()
   371  			if _, ok := groupPropertyMap[string(key)]; !ok {
   372  				groupPropertyMap[string(key)] = newPropDefCauss
   373  			}
   374  		}
   375  	}
   376  	resultProps := make([][]*memex.DeferredCauset, 0, len(groupPropertyMap))
   377  	for _, prop := range groupPropertyMap {
   378  		resultProps = append(resultProps, prop)
   379  	}
   380  	propertyMap[g] = resultProps
   381  	return resultProps
   382  }