github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/allegrosql/optimize.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package cascades 15 16 import ( 17 "container/list" 18 "math" 19 20 causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded" 21 "github.com/whtcorpsinc/milevadb/causet/memo" 22 "github.com/whtcorpsinc/milevadb/causet/property" 23 "github.com/whtcorpsinc/milevadb/memex" 24 "github.com/whtcorpsinc/milevadb/stochastikctx" 25 ) 26 27 // DefaultOptimizer is the optimizer which contains all of the default 28 // transformation and implementation rules. 29 var DefaultOptimizer = NewOptimizer() 30 31 // Optimizer is the struct for cascades optimizer. 32 type Optimizer struct { 33 transformationMemruleBatches []TransformationMemruleBatch 34 implementationMemruleMap map[memo.Operand][]ImplementationMemrule 35 } 36 37 // NewOptimizer returns a cascades optimizer with default transformation 38 // rules and implementation rules. 39 func NewOptimizer() *Optimizer { 40 return &Optimizer{ 41 transformationMemruleBatches: DefaultMemruleBatches, 42 implementationMemruleMap: defaultImplementationMap, 43 } 44 } 45 46 // ResetTransformationMemrules resets the transformationMemruleBatches of the optimizer, and returns the optimizer. 47 func (opt *Optimizer) ResetTransformationMemrules(ruleBatches ...TransformationMemruleBatch) *Optimizer { 48 opt.transformationMemruleBatches = ruleBatches 49 return opt 50 } 51 52 // ResetImplementationMemrules resets the implementationMemruleMap of the optimizer, and returns the optimizer. 53 func (opt *Optimizer) ResetImplementationMemrules(rules map[memo.Operand][]ImplementationMemrule) *Optimizer { 54 opt.implementationMemruleMap = rules 55 return opt 56 } 57 58 // GetImplementationMemrules gets all the candidate implementation rules of the optimizer 59 // for the logical plan node. 60 func (opt *Optimizer) GetImplementationMemrules(node causetembedded.LogicalCauset) []ImplementationMemrule { 61 return opt.implementationMemruleMap[memo.GetOperand(node)] 62 } 63 64 // FindBestCauset is the optimization entrance of the cascades causet. The 65 // optimization is composed of 3 phases: preprocessing, exploration and implementation. 66 // 67 //------------------------------------------------------------------------------ 68 // Phase 1: Preprocessing 69 //------------------------------------------------------------------------------ 70 // 71 // The target of this phase is to preprocess the plan tree by some heuristic 72 // rules which should always be beneficial, for example DeferredCauset Pruning. 73 // 74 //------------------------------------------------------------------------------ 75 // Phase 2: Exploration 76 //------------------------------------------------------------------------------ 77 // 78 // The target of this phase is to explore all the logically equivalent 79 // memexs by exploring all the equivalent group memexs of each group. 80 // 81 // At the very beginning, there is only one group memex in a Group. After 82 // applying some transformation rules on certain memexs of the Group, all 83 // the equivalent memexs are found and stored in the Group. This procedure 84 // can be regarded as searching for a weak connected component in a directed 85 // graph, where nodes are memexs and directed edges are the transformation 86 // rules. 87 // 88 //------------------------------------------------------------------------------ 89 // Phase 3: Implementation 90 //------------------------------------------------------------------------------ 91 // 92 // The target of this phase is to search the best physical plan for a Group 93 // which satisfies a certain required physical property. 94 // 95 // In this phase, we need to enumerate all the applicable implementation rules 96 // for each memex in each group under the required physical property. A 97 // memo structure is used for a group to reduce the repeated search on the same 98 // required physical property. 99 func (opt *Optimizer) FindBestCauset(sctx stochastikctx.Context, logical causetembedded.LogicalCauset) (p causetembedded.PhysicalCauset, cost float64, err error) { 100 logical, err = opt.onPhasePreprocessing(sctx, logical) 101 if err != nil { 102 return nil, 0, err 103 } 104 rootGroup := memo.Convert2Group(logical) 105 err = opt.onPhaseExploration(sctx, rootGroup) 106 if err != nil { 107 return nil, 0, err 108 } 109 p, cost, err = opt.onPhaseImplementation(sctx, rootGroup) 110 if err != nil { 111 return nil, 0, err 112 } 113 err = p.ResolveIndices() 114 return p, cost, err 115 } 116 117 func (opt *Optimizer) onPhasePreprocessing(sctx stochastikctx.Context, plan causetembedded.LogicalCauset) (causetembedded.LogicalCauset, error) { 118 err := plan.PruneDeferredCausets(plan.Schema().DeferredCausets) 119 if err != nil { 120 return nil, err 121 } 122 return plan, nil 123 } 124 125 func (opt *Optimizer) onPhaseExploration(sctx stochastikctx.Context, g *memo.Group) error { 126 for round, ruleBatch := range opt.transformationMemruleBatches { 127 for !g.Explored(round) { 128 err := opt.exploreGroup(g, round, ruleBatch) 129 if err != nil { 130 return err 131 } 132 } 133 } 134 return nil 135 } 136 137 func (opt *Optimizer) exploreGroup(g *memo.Group, round int, ruleBatch TransformationMemruleBatch) error { 138 if g.Explored(round) { 139 return nil 140 } 141 g.SetExplored(round) 142 143 for elem := g.Equivalents.Front(); elem != nil; elem = elem.Next() { 144 curExpr := elem.Value.(*memo.GroupExpr) 145 if curExpr.Explored(round) { 146 continue 147 } 148 curExpr.SetExplored(round) 149 150 // Explore child groups firstly. 151 for _, childGroup := range curExpr.Children { 152 for !childGroup.Explored(round) { 153 if err := opt.exploreGroup(childGroup, round, ruleBatch); err != nil { 154 return err 155 } 156 } 157 } 158 159 eraseCur, err := opt.findMoreEquiv(g, elem, round, ruleBatch) 160 if err != nil { 161 return err 162 } 163 if eraseCur { 164 g.Delete(curExpr) 165 } 166 } 167 return nil 168 } 169 170 // findMoreEquiv finds and applies the matched transformation rules. 171 func (opt *Optimizer) findMoreEquiv(g *memo.Group, elem *list.Element, round int, ruleBatch TransformationMemruleBatch) (eraseCur bool, err error) { 172 expr := elem.Value.(*memo.GroupExpr) 173 operand := memo.GetOperand(expr.ExprNode) 174 for _, rule := range ruleBatch[operand] { 175 pattern := rule.GetPattern() 176 if !pattern.Operand.Match(operand) { 177 continue 178 } 179 // Create a binding of the current Group memex and the pattern of 180 // the transformation rule to enumerate all the possible memexs. 181 iter := memo.NewExprIterFromGroupElem(elem, pattern) 182 for ; iter != nil && iter.Matched(); iter.Next() { 183 if !rule.Match(iter) { 184 continue 185 } 186 187 newExprs, eraseOld, eraseAll, err := rule.OnTransform(iter) 188 if err != nil { 189 return false, err 190 } 191 192 if eraseAll { 193 g.DeleteAll() 194 for _, e := range newExprs { 195 g.Insert(e) 196 } 197 // If we delete all of the other GroupExprs, we can break the search. 198 g.SetExplored(round) 199 return false, nil 200 } 201 202 eraseCur = eraseCur || eraseOld 203 for _, e := range newExprs { 204 if !g.Insert(e) { 205 continue 206 } 207 // If the new Group memex is successfully inserted into the 208 // current Group, mark the Group as unexplored to enable the exploration 209 // on the new Group memexs. 210 g.SetUnexplored(round) 211 } 212 } 213 } 214 return eraseCur, nil 215 } 216 217 // fillGroupStats computes Stats property for each Group recursively. 218 func (opt *Optimizer) fillGroupStats(g *memo.Group) (err error) { 219 if g.Prop.Stats != nil { 220 return nil 221 } 222 // All GroupExpr in a Group should share same LogicalProperty, so just use 223 // first one to compute Stats property. 224 elem := g.Equivalents.Front() 225 expr := elem.Value.(*memo.GroupExpr) 226 childStats := make([]*property.StatsInfo, len(expr.Children)) 227 childSchema := make([]*memex.Schema, len(expr.Children)) 228 for i, childGroup := range expr.Children { 229 err = opt.fillGroupStats(childGroup) 230 if err != nil { 231 return err 232 } 233 childStats[i] = childGroup.Prop.Stats 234 childSchema[i] = childGroup.Prop.Schema 235 } 236 planNode := expr.ExprNode 237 g.Prop.Stats, err = planNode.DeriveStats(childStats, g.Prop.Schema, childSchema, nil) 238 return err 239 } 240 241 // onPhaseImplementation starts implementation physical operators from given root Group. 242 func (opt *Optimizer) onPhaseImplementation(sctx stochastikctx.Context, g *memo.Group) (causetembedded.PhysicalCauset, float64, error) { 243 prop := &property.PhysicalProperty{ 244 ExpectedCnt: math.MaxFloat64, 245 } 246 preparePossibleProperties(g, make(map[*memo.Group][][]*memex.DeferredCauset)) 247 // TODO replace MaxFloat64 costLimit by variable from sctx, or other sources. 248 impl, err := opt.implGroup(g, prop, math.MaxFloat64) 249 if err != nil { 250 return nil, 0, err 251 } 252 if impl == nil { 253 return nil, 0, causetembedded.ErrInternal.GenWithStackByArgs("Can't find a proper physical plan for this query") 254 } 255 return impl.GetCauset(), impl.GetCost(), nil 256 } 257 258 // implGroup finds the best Implementation which satisfies the required 259 // physical property for a Group. The best Implementation should have the 260 // lowest cost among all the applicable Implementations. 261 // 262 // g: the Group to be implemented. 263 // reqPhysProp: the required physical property. 264 // costLimit: the maximum cost of all the Implementations. 265 func (opt *Optimizer) implGroup(g *memo.Group, reqPhysProp *property.PhysicalProperty, costLimit float64) (memo.Implementation, error) { 266 groupImpl := g.GetImpl(reqPhysProp) 267 if groupImpl != nil { 268 if groupImpl.GetCost() <= costLimit { 269 return groupImpl, nil 270 } 271 return nil, nil 272 } 273 // Handle implementation rules for each equivalent GroupExpr. 274 var childImpls []memo.Implementation 275 err := opt.fillGroupStats(g) 276 if err != nil { 277 return nil, err 278 } 279 outCount := math.Min(g.Prop.Stats.RowCount, reqPhysProp.ExpectedCnt) 280 for elem := g.Equivalents.Front(); elem != nil; elem = elem.Next() { 281 curExpr := elem.Value.(*memo.GroupExpr) 282 impls, err := opt.implGroupExpr(curExpr, reqPhysProp) 283 if err != nil { 284 return nil, err 285 } 286 for _, impl := range impls { 287 childImpls = childImpls[:0] 288 for i, childGroup := range curExpr.Children { 289 childImpl, err := opt.implGroup(childGroup, impl.GetCauset().GetChildReqProps(i), impl.GetCostLimit(costLimit, childImpls...)) 290 if err != nil { 291 return nil, err 292 } 293 if childImpl == nil { 294 impl.SetCost(math.MaxFloat64) 295 break 296 } 297 childImpls = append(childImpls, childImpl) 298 } 299 if impl.GetCost() == math.MaxFloat64 { 300 continue 301 } 302 implCost := impl.CalcCost(outCount, childImpls...) 303 if implCost > costLimit { 304 continue 305 } 306 if groupImpl == nil || groupImpl.GetCost() > implCost { 307 groupImpl = impl.AttachChildren(childImpls...) 308 costLimit = implCost 309 } 310 } 311 } 312 // Handle enforcer rules for required physical property. 313 for _, rule := range GetEnforcerMemrules(g, reqPhysProp) { 314 newReqPhysProp := rule.NewProperty(reqPhysProp) 315 enforceCost := rule.GetEnforceCost(g) 316 childImpl, err := opt.implGroup(g, newReqPhysProp, costLimit-enforceCost) 317 if err != nil { 318 return nil, err 319 } 320 if childImpl == nil { 321 continue 322 } 323 impl := rule.OnEnforce(reqPhysProp, childImpl) 324 implCost := enforceCost + childImpl.GetCost() 325 impl.SetCost(implCost) 326 if groupImpl == nil || groupImpl.GetCost() > implCost { 327 groupImpl = impl 328 costLimit = implCost 329 } 330 } 331 if groupImpl == nil || groupImpl.GetCost() == math.MaxFloat64 { 332 return nil, nil 333 } 334 g.InsertImpl(reqPhysProp, groupImpl) 335 return groupImpl, nil 336 } 337 338 func (opt *Optimizer) implGroupExpr(cur *memo.GroupExpr, reqPhysProp *property.PhysicalProperty) (impls []memo.Implementation, err error) { 339 for _, rule := range opt.GetImplementationMemrules(cur.ExprNode) { 340 if !rule.Match(cur, reqPhysProp) { 341 continue 342 } 343 curImpls, err := rule.OnImplement(cur, reqPhysProp) 344 if err != nil { 345 return nil, err 346 } 347 impls = append(impls, curImpls...) 348 } 349 return impls, nil 350 } 351 352 // preparePossibleProperties recursively calls LogicalCauset PreparePossibleProperties 353 // interface. It will fulfill the the possible properties fields of LogicalAggregation 354 // and LogicalJoin. 355 func preparePossibleProperties(g *memo.Group, propertyMap map[*memo.Group][][]*memex.DeferredCauset) [][]*memex.DeferredCauset { 356 if prop, ok := propertyMap[g]; ok { 357 return prop 358 } 359 groupPropertyMap := make(map[string][]*memex.DeferredCauset) 360 for elem := g.Equivalents.Front(); elem != nil; elem = elem.Next() { 361 expr := elem.Value.(*memo.GroupExpr) 362 childrenProperties := make([][][]*memex.DeferredCauset, len(expr.Children)) 363 for i, child := range expr.Children { 364 childrenProperties[i] = preparePossibleProperties(child, propertyMap) 365 } 366 exprProperties := expr.ExprNode.PreparePossibleProperties(expr.Schema(), childrenProperties...) 367 for _, newPropDefCauss := range exprProperties { 368 // Check if the prop has already been in `groupPropertyMap`. 369 newProp := property.PhysicalProperty{Items: property.ItemsFromDefCauss(newPropDefCauss, true)} 370 key := newProp.HashCode() 371 if _, ok := groupPropertyMap[string(key)]; !ok { 372 groupPropertyMap[string(key)] = newPropDefCauss 373 } 374 } 375 } 376 resultProps := make([][]*memex.DeferredCauset, 0, len(groupPropertyMap)) 377 for _, prop := range groupPropertyMap { 378 resultProps = append(resultProps, prop) 379 } 380 propertyMap[g] = resultProps 381 return resultProps 382 }