github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/allegrosql/plan.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package embedded 15 16 import ( 17 "fmt" 18 "math" 19 "strconv" 20 21 "github.com/cznic/mathutil" 22 "github.com/whtcorpsinc/errors" 23 "github.com/whtcorpsinc/fidelpb/go-fidelpb" 24 "github.com/whtcorpsinc/milevadb/causet/property" 25 "github.com/whtcorpsinc/milevadb/causet/soliton" 26 "github.com/whtcorpsinc/milevadb/ekv" 27 "github.com/whtcorpsinc/milevadb/memex" 28 "github.com/whtcorpsinc/milevadb/soliton/stringutil" 29 "github.com/whtcorpsinc/milevadb/stochastikctx" 30 "github.com/whtcorpsinc/milevadb/types" 31 ) 32 33 // Causet is the description of an execution flow. 34 // It is created from ast.Node first, then optimized by the optimizer, 35 // finally used by the interlock to create a Cursor which executes the memex. 36 type Causet interface { 37 // Get the schemaReplicant. 38 Schema() *memex.Schema 39 40 // Get the ID. 41 ID() int 42 43 // TP get the plan type. 44 TP() string 45 46 // Get the ID in explain memex 47 ExplainID() fmt.Stringer 48 49 // ExplainInfo returns operator information to be explained. 50 ExplainInfo() string 51 52 // replaceExprDeferredCausets replace all the column reference in the plan's memex node. 53 replaceExprDeferredCausets(replace map[string]*memex.DeferredCauset) 54 55 SCtx() stochastikctx.Context 56 57 // property.StatsInfo will return the property.StatsInfo for this plan. 58 statsInfo() *property.StatsInfo 59 60 // OutputNames returns the outputting names of each column. 61 OutputNames() types.NameSlice 62 63 // SetOutputNames sets the outputting name by the given slice. 64 SetOutputNames(names types.NameSlice) 65 66 SelectBlockOffset() int 67 } 68 69 func enforceProperty(p *property.PhysicalProperty, tsk task, ctx stochastikctx.Context) task { 70 if p.IsEmpty() || tsk.plan() == nil { 71 return tsk 72 } 73 tsk = finishCopTask(ctx, tsk) 74 sortReqProp := &property.PhysicalProperty{TaskTp: property.RootTaskType, Items: p.Items, ExpectedCnt: math.MaxFloat64} 75 sort := PhysicalSort{ByItems: make([]*soliton.ByItems, 0, len(p.Items))}.Init(ctx, tsk.plan().statsInfo(), tsk.plan().SelectBlockOffset(), sortReqProp) 76 for _, col := range p.Items { 77 sort.ByItems = append(sort.ByItems, &soliton.ByItems{Expr: col.DefCaus, Desc: col.Desc}) 78 } 79 return sort.attach2Task(tsk) 80 } 81 82 // optimizeByShuffle insert `PhysicalShuffle` to optimize performance by running in a parallel manner. 83 func optimizeByShuffle(pp PhysicalCauset, tsk task, ctx stochastikctx.Context) task { 84 if tsk.plan() == nil { 85 return tsk 86 } 87 88 // Don't use `tsk.plan()` here, which will probably be different from `pp`. 89 // Eg., when `pp` is `NominalSort`, `tsk.plan()` would be its child. 90 switch p := pp.(type) { 91 case *PhysicalWindow: 92 if shuffle := optimizeByShuffle4Window(p, ctx); shuffle != nil { 93 return shuffle.attach2Task(tsk) 94 } 95 } 96 return tsk 97 } 98 99 func optimizeByShuffle4Window(pp *PhysicalWindow, ctx stochastikctx.Context) *PhysicalShuffle { 100 concurrency := ctx.GetStochastikVars().WindowConcurrency() 101 if concurrency <= 1 { 102 return nil 103 } 104 105 sort, ok := pp.Children()[0].(*PhysicalSort) 106 if !ok { 107 // Multi-thread executing on SORTED data source is not effective enough by current implementation. 108 // TODO: Implement a better one. 109 return nil 110 } 111 tail, dataSource := sort, sort.Children()[0] 112 113 partitionBy := make([]*memex.DeferredCauset, 0, len(pp.PartitionBy)) 114 for _, item := range pp.PartitionBy { 115 partitionBy = append(partitionBy, item.DefCaus) 116 } 117 NDV := int(getCardinality(partitionBy, dataSource.Schema(), dataSource.statsInfo())) 118 if NDV <= 1 { 119 return nil 120 } 121 concurrency = mathutil.Min(concurrency, NDV) 122 123 byItems := make([]memex.Expression, 0, len(pp.PartitionBy)) 124 for _, item := range pp.PartitionBy { 125 byItems = append(byItems, item.DefCaus) 126 } 127 reqProp := &property.PhysicalProperty{ExpectedCnt: math.MaxFloat64} 128 shuffle := PhysicalShuffle{ 129 Concurrency: concurrency, 130 Tail: tail, 131 DataSource: dataSource, 132 SplitterType: PartitionHashSplitterType, 133 HashByItems: byItems, 134 }.Init(ctx, pp.statsInfo(), pp.SelectBlockOffset(), reqProp) 135 return shuffle 136 } 137 138 // LogicalCauset is a tree of logical operators. 139 // We can do a lot of logical optimizations to it, like predicate pushdown and column pruning. 140 type LogicalCauset interface { 141 Causet 142 143 // HashCode encodes a LogicalCauset to fast compare whether a LogicalCauset equals to another. 144 // We use a strict encode method here which ensures there is no conflict. 145 HashCode() []byte 146 147 // PredicatePushDown pushes down the predicates in the where/on/having clauses as deeply as possible. 148 // It will accept a predicate that is an memex slice, and return the memexs that can't be pushed. 149 // Because it might change the root if the having clause exists, we need to return a plan that represents a new root. 150 PredicatePushDown([]memex.Expression) ([]memex.Expression, LogicalCauset) 151 152 // PruneDeferredCausets prunes the unused columns. 153 PruneDeferredCausets([]*memex.DeferredCauset) error 154 155 // findBestTask converts the logical plan to the physical plan. It's a new interface. 156 // It is called recursively from the parent to the children to create the result physical plan. 157 // Some logical plans will convert the children to the physical plans in different ways, and return the one 158 // With the lowest cost and how many plans are found in this function. 159 // planCounter is a counter for causet to force a plan. 160 // If planCounter > 0, the clock_th plan generated in this function will be returned. 161 // If planCounter = 0, the plan generated in this function will not be considered. 162 // If planCounter = -1, then we will not force plan. 163 findBestTask(prop *property.PhysicalProperty, planCounter *CausetCounterTp) (task, int64, error) 164 165 // BuildKeyInfo will collect the information of unique keys into schemaReplicant. 166 // Because this method is also used in cascades causet, we cannot use 167 // things like `p.schemaReplicant` or `p.children` inside it. We should use the `selfSchema` 168 // and `childSchema` instead. 169 BuildKeyInfo(selfSchema *memex.Schema, childSchema []*memex.Schema) 170 171 // pushDownTopN will push down the topN or limit operator during logical optimization. 172 pushDownTopN(topN *LogicalTopN) LogicalCauset 173 174 // recursiveDeriveStats derives statistic info between plans. 175 recursiveDeriveStats(colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) 176 177 // DeriveStats derives statistic info for current plan node given child stats. 178 // We need selfSchema, childSchema here because it makes this method can be used in 179 // cascades causet, where LogicalCauset might not record its children or schemaReplicant. 180 DeriveStats(childStats []*property.StatsInfo, selfSchema *memex.Schema, childSchema []*memex.Schema, colGroups [][]*memex.DeferredCauset) (*property.StatsInfo, error) 181 182 // ExtractDefCausGroups extracts column groups from child operator whose DNVs are required by the current operator. 183 // For example, if current operator is LogicalAggregation of `Group By a, b`, we indicate the child operators to maintain 184 // and propagate the NDV info of column group (a, b), to improve the event count estimation of current LogicalAggregation. 185 // The parameter colGroups are column groups required by upper operators, besides from the column groups derived from 186 // current operator, we should pass down parent colGroups to child operator as many as possible. 187 ExtractDefCausGroups(colGroups [][]*memex.DeferredCauset) [][]*memex.DeferredCauset 188 189 // PreparePossibleProperties is only used for join and aggregation. Like group by a,b,c, all permutation of (a,b,c) is 190 // valid, but the ordered indices in leaf plan is limited. So we can get all possible order properties by a pre-walking. 191 PreparePossibleProperties(schemaReplicant *memex.Schema, childrenProperties ...[][]*memex.DeferredCauset) [][]*memex.DeferredCauset 192 193 // exhaustPhysicalCausets generates all possible plans that can match the required property. 194 // It will return: 195 // 1. All possible plans that can match the required property. 196 // 2. Whether the ALLEGROALLEGROSQL hint can work. Return true if there is no hint. 197 exhaustPhysicalCausets(*property.PhysicalProperty) (physicalCausets []PhysicalCauset, hintCanWork bool) 198 199 // ExtractCorrelatedDefCauss extracts correlated columns inside the LogicalCauset. 200 ExtractCorrelatedDefCauss() []*memex.CorrelatedDeferredCauset 201 202 // MaxOneRow means whether this operator only returns max one event. 203 MaxOneRow() bool 204 205 // Get all the children. 206 Children() []LogicalCauset 207 208 // SetChildren sets the children for the plan. 209 SetChildren(...LogicalCauset) 210 211 // SetChild sets the ith child for the plan. 212 SetChild(i int, child LogicalCauset) 213 214 // rollBackTaskMap roll back all taskMap's logs after TimeStamp TS. 215 rollBackTaskMap(TS uint64) 216 } 217 218 // PhysicalCauset is a tree of the physical operators. 219 type PhysicalCauset interface { 220 Causet 221 222 // attach2Task makes the current physical plan as the father of task's physicalCauset and uFIDelates the cost of 223 // current task. If the child's task is cop task, some operator may close this task and return a new rootTask. 224 attach2Task(...task) task 225 226 // ToPB converts physical plan to fidelpb interlock. 227 ToPB(ctx stochastikctx.Context, storeType ekv.StoreType) (*fidelpb.InterlockingDirectorate, error) 228 229 // getChildReqProps gets the required property by child index. 230 GetChildReqProps(idx int) *property.PhysicalProperty 231 232 // StatsCount returns the count of property.StatsInfo for this plan. 233 StatsCount() float64 234 235 // ExtractCorrelatedDefCauss extracts correlated columns inside the PhysicalCauset. 236 ExtractCorrelatedDefCauss() []*memex.CorrelatedDeferredCauset 237 238 // Get all the children. 239 Children() []PhysicalCauset 240 241 // SetChildren sets the children for the plan. 242 SetChildren(...PhysicalCauset) 243 244 // SetChild sets the ith child for the plan. 245 SetChild(i int, child PhysicalCauset) 246 247 // ResolveIndices resolves the indices for columns. After doing this, the columns can evaluate the rows by their indices. 248 ResolveIndices() error 249 250 // Stats returns the StatsInfo of the plan. 251 Stats() *property.StatsInfo 252 253 // ExplainNormalizedInfo returns operator normalized information for generating digest. 254 ExplainNormalizedInfo() string 255 256 // Clone clones this physical plan. 257 Clone() (PhysicalCauset, error) 258 } 259 260 type baseLogicalCauset struct { 261 baseCauset 262 263 taskMap map[string]task 264 // taskMapBak forms a backlog stack of taskMap, used to roll back the taskMap. 265 taskMapBak []string 266 // taskMapBakTS stores the timestamps of logs. 267 taskMapBakTS []uint64 268 self LogicalCauset 269 maxOneRow bool 270 children []LogicalCauset 271 } 272 273 func (p *baseLogicalCauset) MaxOneRow() bool { 274 return p.maxOneRow 275 } 276 277 // ExplainInfo implements Causet interface. 278 func (p *baseLogicalCauset) ExplainInfo() string { 279 return "" 280 } 281 282 type basePhysicalCauset struct { 283 baseCauset 284 285 childrenReqProps []*property.PhysicalProperty 286 self PhysicalCauset 287 children []PhysicalCauset 288 } 289 290 func (p *basePhysicalCauset) cloneWithSelf(newSelf PhysicalCauset) (*basePhysicalCauset, error) { 291 base := &basePhysicalCauset{ 292 baseCauset: p.baseCauset, 293 self: newSelf, 294 } 295 for _, child := range p.children { 296 cloned, err := child.Clone() 297 if err != nil { 298 return nil, err 299 } 300 base.children = append(base.children, cloned) 301 } 302 for _, prop := range p.childrenReqProps { 303 base.childrenReqProps = append(base.childrenReqProps, prop.Clone()) 304 } 305 return base, nil 306 } 307 308 // Clone implements PhysicalCauset interface. 309 func (p *basePhysicalCauset) Clone() (PhysicalCauset, error) { 310 return nil, errors.Errorf("%T doesn't support cloning", p.self) 311 } 312 313 // ExplainInfo implements Causet interface. 314 func (p *basePhysicalCauset) ExplainInfo() string { 315 return "" 316 } 317 318 // ExplainInfo implements Causet interface. 319 func (p *basePhysicalCauset) ExplainNormalizedInfo() string { 320 return "" 321 } 322 323 func (p *basePhysicalCauset) GetChildReqProps(idx int) *property.PhysicalProperty { 324 return p.childrenReqProps[idx] 325 } 326 327 // ExtractCorrelatedDefCauss implements PhysicalCauset interface. 328 func (p *basePhysicalCauset) ExtractCorrelatedDefCauss() []*memex.CorrelatedDeferredCauset { 329 return nil 330 } 331 332 // GetlogicalTS4TaskMap get the logical TimeStamp now to help rollback the TaskMap changes after that. 333 func (p *baseLogicalCauset) GetlogicalTS4TaskMap() uint64 { 334 p.ctx.GetStochastikVars().StmtCtx.TaskMapBakTS += 1 335 return p.ctx.GetStochastikVars().StmtCtx.TaskMapBakTS 336 } 337 338 func (p *baseLogicalCauset) rollBackTaskMap(TS uint64) { 339 if !p.ctx.GetStochastikVars().StmtCtx.StmtHints.TaskMapNeedBackUp() { 340 return 341 } 342 if len(p.taskMapBak) > 0 { 343 // Rollback all the logs with TimeStamp TS. 344 N := len(p.taskMapBak) 345 for i := 0; i < N; i++ { 346 cur := p.taskMapBak[i] 347 if p.taskMapBakTS[i] < TS { 348 continue 349 } 350 351 // Remove the i_th log. 352 p.taskMapBak = append(p.taskMapBak[:i], p.taskMapBak[i+1:]...) 353 p.taskMapBakTS = append(p.taskMapBakTS[:i], p.taskMapBakTS[i+1:]...) 354 i-- 355 N-- 356 357 // Roll back taskMap. 358 p.taskMap[cur] = nil 359 } 360 } 361 for _, child := range p.children { 362 child.rollBackTaskMap(TS) 363 } 364 } 365 366 func (p *baseLogicalCauset) getTask(prop *property.PhysicalProperty) task { 367 key := prop.HashCode() 368 return p.taskMap[string(key)] 369 } 370 371 func (p *baseLogicalCauset) storeTask(prop *property.PhysicalProperty, task task) { 372 key := prop.HashCode() 373 if p.ctx.GetStochastikVars().StmtCtx.StmtHints.TaskMapNeedBackUp() { 374 // Empty string for useless change. 375 TS := p.GetlogicalTS4TaskMap() 376 p.taskMapBakTS = append(p.taskMapBakTS, TS) 377 p.taskMapBak = append(p.taskMapBak, string(key)) 378 } 379 p.taskMap[string(key)] = task 380 } 381 382 // HasMaxOneRow returns if the LogicalCauset will output at most one event. 383 func HasMaxOneRow(p LogicalCauset, childMaxOneRow []bool) bool { 384 if len(childMaxOneRow) == 0 { 385 // The reason why we use this check is that, this function 386 // is used both in causet/embedded and causet/cascades. 387 // In cascades causet, LogicalCauset may have no `children`. 388 return false 389 } 390 switch x := p.(type) { 391 case *LogicalLock, *LogicalLimit, *LogicalSort, *LogicalSelection, 392 *LogicalApply, *LogicalProjection, *LogicalWindow, *LogicalAggregation: 393 return childMaxOneRow[0] 394 case *LogicalMaxOneRow: 395 return true 396 case *LogicalJoin: 397 switch x.JoinType { 398 case SemiJoin, AntiSemiJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin: 399 return childMaxOneRow[0] 400 default: 401 return childMaxOneRow[0] && childMaxOneRow[1] 402 } 403 } 404 return false 405 } 406 407 // BuildKeyInfo implements LogicalCauset BuildKeyInfo interface. 408 func (p *baseLogicalCauset) BuildKeyInfo(selfSchema *memex.Schema, childSchema []*memex.Schema) { 409 childMaxOneRow := make([]bool, len(p.children)) 410 for i := range p.children { 411 childMaxOneRow[i] = p.children[i].MaxOneRow() 412 } 413 p.maxOneRow = HasMaxOneRow(p.self, childMaxOneRow) 414 } 415 416 // BuildKeyInfo implements LogicalCauset BuildKeyInfo interface. 417 func (p *logicalSchemaProducer) BuildKeyInfo(selfSchema *memex.Schema, childSchema []*memex.Schema) { 418 selfSchema.Keys = nil 419 p.baseLogicalCauset.BuildKeyInfo(selfSchema, childSchema) 420 } 421 422 func newBaseCauset(ctx stochastikctx.Context, tp string, offset int) baseCauset { 423 ctx.GetStochastikVars().CausetID++ 424 id := ctx.GetStochastikVars().CausetID 425 return baseCauset{ 426 tp: tp, 427 id: id, 428 ctx: ctx, 429 blockOffset: offset, 430 } 431 } 432 433 func newBaseLogicalCauset(ctx stochastikctx.Context, tp string, self LogicalCauset, offset int) baseLogicalCauset { 434 return baseLogicalCauset{ 435 taskMap: make(map[string]task), 436 taskMapBak: make([]string, 0, 10), 437 taskMapBakTS: make([]uint64, 0, 10), 438 baseCauset: newBaseCauset(ctx, tp, offset), 439 self: self, 440 } 441 } 442 443 func newBasePhysicalCauset(ctx stochastikctx.Context, tp string, self PhysicalCauset, offset int) basePhysicalCauset { 444 return basePhysicalCauset{ 445 baseCauset: newBaseCauset(ctx, tp, offset), 446 self: self, 447 } 448 } 449 450 func (p *baseLogicalCauset) ExtractCorrelatedDefCauss() []*memex.CorrelatedDeferredCauset { 451 return nil 452 } 453 454 // PruneDeferredCausets implements LogicalCauset interface. 455 func (p *baseLogicalCauset) PruneDeferredCausets(parentUsedDefCauss []*memex.DeferredCauset) error { 456 if len(p.children) == 0 { 457 return nil 458 } 459 return p.children[0].PruneDeferredCausets(parentUsedDefCauss) 460 } 461 462 // baseCauset implements base Causet interface. 463 // Should be used as embedded struct in Causet implementations. 464 type baseCauset struct { 465 tp string 466 id int 467 ctx stochastikctx.Context 468 stats *property.StatsInfo 469 blockOffset int 470 } 471 472 // OutputNames returns the outputting names of each column. 473 func (p *baseCauset) OutputNames() types.NameSlice { 474 return nil 475 } 476 477 func (p *baseCauset) SetOutputNames(names types.NameSlice) { 478 } 479 480 func (p *baseCauset) replaceExprDeferredCausets(replace map[string]*memex.DeferredCauset) { 481 } 482 483 // ID implements Causet ID interface. 484 func (p *baseCauset) ID() int { 485 return p.id 486 } 487 488 // property.StatsInfo implements the Causet interface. 489 func (p *baseCauset) statsInfo() *property.StatsInfo { 490 return p.stats 491 } 492 493 // ExplainInfo implements Causet interface. 494 func (p *baseCauset) ExplainInfo() string { 495 return "N/A" 496 } 497 498 func (p *baseCauset) ExplainID() fmt.Stringer { 499 return stringutil.MemoizeStr(func() string { 500 return p.tp + "_" + strconv.Itoa(p.id) 501 }) 502 } 503 504 // TP implements Causet interface. 505 func (p *baseCauset) TP() string { 506 return p.tp 507 } 508 509 func (p *baseCauset) SelectBlockOffset() int { 510 return p.blockOffset 511 } 512 513 // Stats implements Causet Stats interface. 514 func (p *baseCauset) Stats() *property.StatsInfo { 515 return p.stats 516 } 517 518 // Schema implements Causet Schema interface. 519 func (p *baseLogicalCauset) Schema() *memex.Schema { 520 return p.children[0].Schema() 521 } 522 523 func (p *baseLogicalCauset) OutputNames() types.NameSlice { 524 return p.children[0].OutputNames() 525 } 526 527 func (p *baseLogicalCauset) SetOutputNames(names types.NameSlice) { 528 p.children[0].SetOutputNames(names) 529 } 530 531 // Schema implements Causet Schema interface. 532 func (p *basePhysicalCauset) Schema() *memex.Schema { 533 return p.children[0].Schema() 534 } 535 536 // Children implements LogicalCauset Children interface. 537 func (p *baseLogicalCauset) Children() []LogicalCauset { 538 return p.children 539 } 540 541 // Children implements PhysicalCauset Children interface. 542 func (p *basePhysicalCauset) Children() []PhysicalCauset { 543 return p.children 544 } 545 546 // SetChildren implements LogicalCauset SetChildren interface. 547 func (p *baseLogicalCauset) SetChildren(children ...LogicalCauset) { 548 p.children = children 549 } 550 551 // SetChildren implements PhysicalCauset SetChildren interface. 552 func (p *basePhysicalCauset) SetChildren(children ...PhysicalCauset) { 553 p.children = children 554 } 555 556 // SetChild implements LogicalCauset SetChild interface. 557 func (p *baseLogicalCauset) SetChild(i int, child LogicalCauset) { 558 p.children[i] = child 559 } 560 561 // SetChild implements PhysicalCauset SetChild interface. 562 func (p *basePhysicalCauset) SetChild(i int, child PhysicalCauset) { 563 p.children[i] = child 564 } 565 566 // Context implements Causet Context interface. 567 func (p *baseCauset) SCtx() stochastikctx.Context { 568 return p.ctx 569 }