github.com/dolthub/go-mysql-server@v0.18.0/sql/memo/join_order_builder.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package memo 16 17 import ( 18 "errors" 19 "fmt" 20 "math" 21 "math/bits" 22 "strings" 23 24 "github.com/dolthub/go-mysql-server/sql" 25 "github.com/dolthub/go-mysql-server/sql/expression" 26 "github.com/dolthub/go-mysql-server/sql/plan" 27 ) 28 29 // joinOrderBuilder enumerates valid plans for a join tree. We build the join 30 // tree bottom up, first joining single nodes with join condition "edges", then 31 // single nodes to hypernodes (1+n), and finally hyper nodes to 32 // other hypernodes (n+m). 33 // 34 // Every valid combination of subtrees is considered with two exceptions. 35 // 36 // 1) Cross joins and other joins with degenerate predicates are never pushed 37 // lower in the tree. 38 // 39 // 2) Transformations that are valid but create degenerate filters (new 40 // cross joins) are not considered. 41 // 42 // The logic for this module is sourced from 43 // https://www.researchgate.net/publication/262216932_On_the_correct_and_complete_enumeration_of_the_core_search_space 44 // with help from 45 // https://github.com/cockroachdb/cockroach/blob/master/pkg/sql/opt/xform/join_order_builder.go. 46 // 47 // Two theoretical observations underpin the enumeration: 48 // 49 // 1) Either associativity or l-asscom can be applied to left nesting, but not 50 // both. Either associativity or r-asscom can be applied to right nesting, but 51 // not both. 52 // 53 // 2) Every transformation in the core search space (of two operators) can be 54 // reached by one commutative transformation on each operator and one assoc, 55 // l-asscom, or r-asscom. 56 // 57 // We use these assumptions to implement the dbSube enumeration search, using a 58 // CD-C conflict detection rules to encode reordering applicability: 59 // 60 // 1) Use bitsets to iterate all combinations of join plan subtrees, starting 61 // with two tables and building upwards. For example, a join A x B x C would 62 // start with 10 x 01 (A x B) and build up to sets 100 x 011 (A x (BC)) and 101 63 // x 010 ((AC) x B). 64 // 65 // 2) Attempt to make a new plan with every combination of subtrees 66 // (hypernodes) for every join operator. This takes the form (op s1 s2), where 67 // s1 is the right subtree, s2 is the left subtree, and op is the edge 68 // corresponding to a specific join type and filter. Most of the time one 69 // operator => one edge, except when join conjunctions are split to make 70 // multiple edges for one join operator. We differentiate innerEdges and 71 // nonInnerEdges to avoid innerJoins plans overwriting the often slower 72 // nonInner join plans. A successful edge between two sets adds a join plan to 73 // the memo. 74 // 75 // 3) Save the best plan for every memo group (unordered group of table joins) 76 // moving upwards through the tree, finishing with the optimal memo group for 77 // the join including every table. 78 // 79 // Applicability rules: 80 // 81 // We build applicability rules before exhaustive enumeration to filter valid 82 // plans. We consider a reordering valid by checking: 83 // 84 // 1) Transform compatibility: a Lookup table between two join operator types 85 // 86 // 2) Eligibility sets: left and right subtree dependencies required for a 87 // valid reordering. The syntactic eligibility set (SES) is the table 88 // dependencies of the edge's filter. For example, the SES for a filter a.x = 89 // b.y is (a,b). The total eligibility set (TES) is an expansion of the SES 90 // that conceptually behaves similarly to an SES; i.e. only hypernodes that 91 // completely intersect an edge's TES are valid. The difference is that TES is 92 // expanded based on the original edge's join operator subdependencies. The 93 // paper referenced encodes an algorithms for building a TES to fully encode 94 // associativity, left-asscom, and right-asscom (commutativity is a unary 95 // operator property). 96 // 97 // For example, the INNER JOIN in the query below is subject to assoc(left 98 // join, inner join) = false: 99 // 100 // SELECT * 101 // FROM (SELECT * FROM ab LEFT JOIN uv ON a = u) 102 // INNER JOIN xy 103 // ON x = v 104 // 105 // As a result, the inner join's TES, initialized as the SES(xy, uv), is 106 // expanded to include ab. The final TES(xy, uv, ab) invalidates 107 // LEFT JOIN association during exhaustive enumeration: 108 // 109 // SELECT * 110 // FROM (SELECT * FROM ab LEFT JOIN xy ON x = v) 111 // INNER JOIN uv 112 // ON a = u 113 // 114 // Note the disconnect between the global nature of the TES, which is built 115 // fully before enumeration, and local nature of testing every enumeration 116 // against the pre-computed TES. 117 // 118 // In special cases, the TES is not expressive enough to represent certain 119 // table dependencies. Conflict rules of the form R1 -> R2 are an escape hatch 120 // to require the dependency table set R2 when any subset of R1 is present in a 121 // candidate plan. 122 // 123 // TODO: null rejecting tables 124 type joinOrderBuilder struct { 125 // plans maps from a set of base relations to the memo group for the join tree 126 // that contains those relations (and only those relations). As an example, 127 // the group for [xy, ab, uv] might contain the join trees (xy, (ab, uv)), 128 // ((xy, ab), uv), (ab, (xy, uv)), etc. 129 // 130 // The group for a single base relation is the base relation itself. 131 m *Memo 132 plans map[vertexSet]*ExprGroup 133 edges []edge 134 vertices []RelExpr 135 vertexGroups []GroupId 136 vertexTableIds []sql.TableId 137 innerEdges edgeSet 138 nonInnerEdges edgeSet 139 newPlanCb func(j *joinOrderBuilder, rel RelExpr) 140 forceFastDFSLookupForTest bool 141 hasCrossJoin bool 142 } 143 144 func NewJoinOrderBuilder(memo *Memo) *joinOrderBuilder { 145 return &joinOrderBuilder{ 146 plans: make(map[vertexSet]*ExprGroup), 147 m: memo, 148 vertices: make([]RelExpr, 0), 149 vertexGroups: make([]GroupId, 0), 150 } 151 } 152 153 var ErrUnsupportedReorderNode = errors.New("unsupported join reorder node") 154 155 // useFastReorder determines whether to skip the current brute force join planning and use an alternate 156 // planning algorithm that analyzes the join tree to find a sequence that can be implemented purely as lookup joins. 157 // Currently we only use it for large joins (20+ tables) with no join hints. 158 func (j *joinOrderBuilder) useFastReorder() bool { 159 if j.forceFastDFSLookupForTest { 160 return true 161 } 162 if j.m.hints.order != nil { 163 return false 164 } 165 return len(j.vertices) > 15 166 } 167 168 func (j *joinOrderBuilder) ReorderJoin(n sql.Node) { 169 j.populateSubgraph(n) 170 if j.useFastReorder() { 171 j.buildSingleLookupPlan() 172 return 173 } else if j.hasCrossJoin { 174 // Rely on FastReorder to avoid plans that drop filters with cross joins 175 if j.buildSingleLookupPlan() { 176 return 177 } 178 } 179 // TODO: consider if buildSingleLookupPlan can/should run after ensureClosure. This could allow us to use analysis 180 // from ensureClosure in buildSingleLookupPlan, but the equivalence sets could create multiple possible join orders 181 // for the single-lookup plan, which would complicate things. 182 j.ensureClosure(j.m.root) 183 j.dbSube() 184 return 185 } 186 187 // populateSubgraph recursively tracks new join nodes as edges and new 188 // leaf nodes as vertices to the joinOrderBuilder graph, returning 189 // the subgraph's newly tracked vertices and edges. 190 func (j *joinOrderBuilder) populateSubgraph(n sql.Node) (vertexSet, edgeSet, *ExprGroup) { 191 var group *ExprGroup 192 startV := j.allVertices() 193 startE := j.allEdges() 194 // build operator 195 switch n := n.(type) { 196 case *plan.Filter: 197 return j.buildFilter(n.Child, n.Expression) 198 case *plan.Having: 199 return j.buildFilter(n.Child, n.Cond) 200 case *plan.Limit: 201 _, _, group = j.populateSubgraph(n.Child) 202 group.RelProps.Limit = n.Limit 203 case *plan.Project: 204 return j.buildProject(n) 205 case *plan.Sort: 206 _, _, group = j.populateSubgraph(n.Child) 207 group.RelProps.sort = n.SortFields 208 case *plan.Distinct: 209 _, _, group = j.populateSubgraph(n.Child) 210 group.RelProps.Distinct = HashDistinctOp 211 case *plan.Max1Row: 212 return j.buildMax1Row(n) 213 case *plan.JoinNode: 214 group = j.buildJoinOp(n) 215 if n.Op == plan.JoinTypeCross { 216 j.hasCrossJoin = true 217 } 218 case *plan.SetOp: 219 group = j.buildJoinLeaf(n) 220 case sql.NameableNode: 221 group = j.buildJoinLeaf(n.(plan.TableIdNode)) 222 case *plan.StripRowNode: 223 return j.populateSubgraph(n.Child) 224 case *plan.CachedResults: 225 return j.populateSubgraph(n.Child) 226 default: 227 err := fmt.Errorf("%w: %T", ErrUnsupportedReorderNode, n) 228 j.m.HandleErr(err) 229 } 230 return j.allVertices().difference(startV), j.allEdges().Difference(startE), group 231 } 232 233 // buildSingleLookupPlan attempts to build a plan consisting only of lookup joins. 234 func (j *joinOrderBuilder) buildSingleLookupPlan() bool { 235 fds := j.m.root.RelProps.FuncDeps() 236 fdKey, hasKey := fds.StrictKey() 237 // fdKey is a set of columns which constrain all other columns in the join. 238 // If a chain of lookups exist, then the columns in fdKey must be in the innermost join. 239 if !hasKey { 240 return false 241 } 242 // We need to include all of the fdKey columns in the innermost join. 243 // For now, we just handle the case where the key is exactly one column. 244 if fdKey.Len() != 1 { 245 return false 246 } 247 for _, edge := range j.edges { 248 if !edge.op.joinType.IsInner() { 249 // This optimization currently only supports inner joins. 250 return false 251 } 252 } 253 keyColumn, _ := fdKey.Next(1) 254 var currentlyJoinedTables sql.FastIntSet 255 var currentlyJoinedVertexes vertexSet 256 for i, n := range j.m.Root().RelProps.TableIdNodes() { 257 if n.Columns().Contains(keyColumn) { 258 currentlyJoinedTables.Add(int(n.Id())) 259 currentlyJoinedVertexes = currentlyJoinedVertexes.add(uint64(i)) 260 break 261 } 262 } 263 264 // removedEdges contains the edges that have already been incorporated into the new plan, so we don't repeat them. 265 var succ bool 266 removedEdges := edgeSet{} 267 for removedEdges.Len() < len(j.vertices)-1 { 268 type joinCandidate struct { 269 nextEdgeIdx int 270 nextTableId sql.TableId 271 } 272 var joinCandidates []joinCandidate 273 274 // Find all possible filters that could be used for the next join in the sequence. 275 // Store their corresponding edge and table ids in `joinCandidates`. 276 for i, edge := range j.edges { 277 if removedEdges.Contains(i) { 278 continue 279 } 280 if len(edge.filters) == 0 { 281 continue 282 } 283 if len(edge.filters) != 1 { 284 panic("Found an edge with multiple filters (that was previously validated as an inner join.) This shouldn't be possible.") 285 } 286 filter := edge.filters[0] 287 _, tables, _ := getExprScalarProps(filter) 288 if tables.Len() != 2 { 289 // We have encountered a filter condition more complicated than a simple equality check. 290 // We probably can't optimize this, so bail out. 291 return false 292 } 293 firstTab, _ := tables.Next(1) 294 secondTab, _ := tables.Next(firstTab + 1) 295 if currentlyJoinedTables.Contains(firstTab) { 296 joinCandidates = append(joinCandidates, joinCandidate{ 297 nextEdgeIdx: i, 298 nextTableId: sql.TableId(secondTab), 299 }) 300 } else if currentlyJoinedTables.Contains(secondTab) { 301 joinCandidates = append(joinCandidates, joinCandidate{ 302 nextEdgeIdx: i, 303 nextTableId: sql.TableId(firstTab), 304 }) 305 } 306 } 307 308 if len(joinCandidates) > 1 { 309 // We end up here if there are multiple possible choices for the next join. 310 // This could happen if there are redundant rules. For now, we bail out if this happens. 311 return false 312 } 313 314 if len(joinCandidates) == 0 { 315 // There are still tables left to join, but no more filters that match the already joined tables. 316 // This can happen, for instance, if the remaining table is a single-row table that was cross-joined. 317 // It's probably safe to just join the remaining tables here. 318 remainingVertexes := j.allVertices().difference(currentlyJoinedVertexes) 319 for idx, ok := remainingVertexes.next(0); ok; idx, ok = remainingVertexes.next(idx + 1) { 320 nextVertex := newBitSet(idx) 321 j.addJoin(plan.JoinTypeCross, currentlyJoinedVertexes, nextVertex, nil, nil, false) 322 323 currentlyJoinedVertexes = currentlyJoinedVertexes.union(nextVertex) 324 } 325 return false 326 } 327 328 nextEdgeIdx := joinCandidates[0].nextEdgeIdx 329 nextTableId := joinCandidates[0].nextTableId 330 331 var nextVertex vertexSet 332 for i, id := range j.vertexTableIds { 333 if id == nextTableId { 334 nextVertex = nextVertex.add(uint64(i)) 335 break 336 } 337 } 338 339 edge := j.edges[nextEdgeIdx] 340 341 isRedundant := edge.joinIsRedundant(currentlyJoinedVertexes, nextVertex) 342 j.addJoin(plan.JoinTypeInner, currentlyJoinedVertexes, nextVertex, j.edges[nextEdgeIdx].filters, nil, isRedundant) 343 344 currentlyJoinedVertexes = currentlyJoinedVertexes.union(nextVertex) 345 currentlyJoinedTables.Add(int(nextTableId)) 346 removedEdges.Add(nextEdgeIdx) 347 succ = true 348 } 349 return succ 350 } 351 352 // ensureClosure adds the closure of all transitive equivalency groups 353 // to the join tree. Each transitive edge will add an inner edge, filter, 354 // and join group that inherit join type and tree depth from the original 355 // join tree. 356 func (j *joinOrderBuilder) ensureClosure(grp *ExprGroup) { 357 fds := grp.RelProps.FuncDeps() 358 for _, set := range fds.Equiv().Sets() { 359 for col1, hasNext1 := set.Next(1); hasNext1; col1, hasNext1 = set.Next(col1 + 1) { 360 for col2, hasNext2 := set.Next(col1 + 1); hasNext2; col2, hasNext2 = set.Next(col2 + 1) { 361 if !j.hasEqEdge(col1, col2) { 362 j.makeTransitiveEdge(col1, col2) 363 } 364 } 365 } 366 } 367 } 368 369 // hasEqEdge returns true if the inner edges include a direct equality between 370 // the two given columns (e.g. x = a). 371 func (j joinOrderBuilder) hasEqEdge(leftCol, rightCol sql.ColumnId) bool { 372 for idx, ok := j.innerEdges.Next(0); ok; idx, ok = j.innerEdges.Next(idx + 1) { 373 for _, f := range j.edges[idx].filters { 374 var l *expression.GetField 375 var r *expression.GetField 376 switch f := f.(type) { 377 case *expression.Equals: 378 l, _ = f.Left().(*expression.GetField) 379 r, _ = f.Right().(*expression.GetField) 380 case *expression.NullSafeEquals: 381 l, _ = f.Left().(*expression.GetField) 382 r, _ = f.Right().(*expression.GetField) 383 } 384 if l == nil || r == nil { 385 continue 386 } 387 if (r.Id() == leftCol && l.Id() == rightCol) || 388 (r.Id() == rightCol && l.Id() == leftCol) { 389 return true 390 } 391 } 392 } 393 return false 394 } 395 396 func (j *joinOrderBuilder) findVertexFromCol(col sql.ColumnId) (vertexIndex, GroupId) { 397 for i, v := range j.vertices { 398 if t, ok := v.(SourceRel); ok { 399 if t.Group().RelProps.FuncDeps().All().Contains(col) { 400 return vertexIndex(i), t.Group().Id 401 } 402 } 403 } 404 panic("vertex not found") 405 } 406 407 func (j *joinOrderBuilder) findVertexFromGroup(grp GroupId) vertexIndex { 408 for i, v := range j.vertices { 409 if t, ok := v.(SourceRel); ok { 410 if t.Group().Id == grp { 411 return vertexIndex(i) 412 } 413 } 414 } 415 panic("vertex not found") 416 } 417 418 // makeTransitiveEdge constructs a new join tree edge and memo group 419 // on an equality filter between two columns. 420 func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) { 421 var vert vertexSet 422 v1, _ := j.findVertexFromCol(col1) 423 v2, _ := j.findVertexFromCol(col2) 424 vert = vert.add(v1).add(v2) 425 426 // find edge where the vertices are provided but partitioned 427 var op *operator 428 for _, e := range j.edges { 429 if vert.isSubsetOf(e.op.leftVertices.union(e.op.rightVertices)) && 430 !vert.isSubsetOf(e.op.leftVertices) && 431 !vert.isSubsetOf(e.op.rightVertices) { 432 op = e.op 433 break 434 } 435 } 436 if op == nil || op.joinType.IsPartial() { 437 // columns are common to one table, not a join edge 438 // or, we are trying to semi join after columns have maybe been projected away 439 return 440 } 441 442 var gf1, gf2 *expression.GetField 443 for _, e := range j.edges { 444 if gf1 != nil && gf2 != nil { 445 break 446 } 447 for _, f := range e.filters { 448 if cmp, ok := f.(expression.Comparer); ok { 449 if gf, ok := cmp.Left().(*expression.GetField); ok && gf.Id() == col1 { 450 gf1 = gf 451 } else if ok && gf.Id() == col2 { 452 gf2 = gf 453 } 454 if gf, ok := cmp.Right().(*expression.GetField); ok && gf.Id() == col1 { 455 gf1 = gf 456 } else if ok && gf.Id() == col2 { 457 gf2 = gf 458 } 459 } 460 } 461 } 462 if gf1 == nil || gf2 == nil { 463 return 464 } 465 466 j.edges = append(j.edges, *j.makeEdge(op, expression.NewEquals(gf1, gf2))) 467 j.innerEdges.Add(len(j.edges) - 1) 468 469 } 470 471 func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { 472 leftV, leftE, _ := j.populateSubgraph(n.Left()) 473 rightV, rightE, _ := j.populateSubgraph(n.Right()) 474 typ := n.JoinType() 475 if typ.IsPhysical() { 476 typ = plan.JoinTypeInner 477 } 478 isInner := typ.IsInner() 479 op := &operator{ 480 joinType: typ, 481 leftVertices: leftV, 482 rightVertices: rightV, 483 leftEdges: leftE, 484 rightEdges: rightE, 485 } 486 487 filters := expression.SplitConjunction(n.JoinCond()) 488 union := leftV.union(rightV) 489 group, ok := j.plans[union] 490 if !ok { 491 // TODO: memo and root should be initialized prior to join planning 492 left := j.plans[leftV] 493 right := j.plans[rightV] 494 group = j.memoize(op.joinType, left, right, filters, nil) 495 j.plans[union] = group 496 j.m.root = group 497 } 498 499 if !isInner { 500 j.buildNonInnerEdge(op, filters...) 501 } else { 502 j.buildInnerEdge(op, filters...) 503 } 504 return group 505 } 506 507 func (j *joinOrderBuilder) buildFilter(child sql.Node, e sql.Expression) (vertexSet, edgeSet, *ExprGroup) { 508 // memoize child 509 childV, childE, childGrp := j.populateSubgraph(child) 510 511 filterGrp := j.m.MemoizeFilter(nil, childGrp, expression.SplitConjunction(e)) 512 513 // filter will absorb child relation for join reordering 514 j.plans[childV] = filterGrp 515 516 return childV, childE, filterGrp 517 } 518 519 func (j *joinOrderBuilder) buildProject(n *plan.Project) (vertexSet, edgeSet, *ExprGroup) { 520 // memoize child 521 childV, childE, childGrp := j.populateSubgraph(n.Child) 522 523 projGrp := j.m.MemoizeProject(nil, childGrp, n.Projections) 524 525 // filter will absorb child relation for join reordering 526 j.plans[childV] = projGrp 527 return childV, childE, projGrp 528 } 529 530 func (j *joinOrderBuilder) buildMax1Row(n *plan.Max1Row) (vertexSet, edgeSet, *ExprGroup) { 531 // memoize child 532 childV, childE, childGrp := j.populateSubgraph(n.Child) 533 534 max1Grp := j.m.MemoizeMax1Row(nil, childGrp) 535 536 j.plans[childV] = max1Grp 537 return childV, childE, max1Grp 538 } 539 540 func (j *joinOrderBuilder) buildJoinLeaf(n plan.TableIdNode) *ExprGroup { 541 j.checkSize() 542 543 var rel SourceRel 544 b := &sourceBase{relBase: &relBase{}} 545 switch n := n.(type) { 546 case *plan.ResolvedTable: 547 rel = &TableScan{sourceBase: b, Table: n} 548 case *plan.TableAlias: 549 rel = &TableAlias{sourceBase: b, Table: n} 550 case *plan.RecursiveTable: 551 rel = &RecursiveTable{sourceBase: b, Table: n} 552 case *plan.SubqueryAlias: 553 rel = &SubqueryAlias{sourceBase: b, Table: n} 554 case *plan.RecursiveCte: 555 rel = &RecursiveCte{sourceBase: b, Table: n} 556 case *plan.IndexedTableAccess: 557 rel = &TableScan{sourceBase: b, Table: n.TableNode.(plan.TableIdNode)} 558 case *plan.ValueDerivedTable: 559 rel = &Values{sourceBase: b, Table: n} 560 case *plan.JSONTable: 561 rel = &JSONTable{sourceBase: b, Table: n} 562 case sql.TableFunction: 563 rel = &TableFunc{sourceBase: b, Table: n} 564 case *plan.EmptyTable: 565 rel = &EmptyTable{sourceBase: b, Table: n} 566 case *plan.SetOp: 567 rel = &SetOp{sourceBase: b, Table: n} 568 default: 569 err := fmt.Errorf("%w: %T", ErrUnsupportedReorderNode, n) 570 j.m.HandleErr(err) 571 } 572 573 j.vertices = append(j.vertices, rel) 574 575 // Initialize the plan for this vertex. 576 idx := vertexIndex(len(j.vertices) - 1) 577 relSet := vertexSet(0).add(idx) 578 grp := j.m.memoizeSourceRel(rel) 579 j.plans[relSet] = grp 580 j.vertexGroups = append(j.vertexGroups, grp.Id) 581 j.vertexTableIds = append(j.vertexTableIds, n.Id()) 582 return grp 583 } 584 585 func (j *joinOrderBuilder) buildInnerEdge(op *operator, filters ...sql.Expression) { 586 if len(filters) == 0 { 587 // cross join 588 j.edges = append(j.edges, *j.makeEdge(op)) 589 j.innerEdges.Add(len(j.edges) - 1) 590 return 591 } 592 for _, f := range filters { 593 j.edges = append(j.edges, *j.makeEdge(op, f)) 594 j.innerEdges.Add(len(j.edges) - 1) 595 } 596 } 597 598 func (j *joinOrderBuilder) buildNonInnerEdge(op *operator, filters ...sql.Expression) { 599 // only single edge for non-inner 600 j.edges = append(j.edges, *j.makeEdge(op, filters...)) 601 j.nonInnerEdges.Add(len(j.edges) - 1) 602 } 603 604 func (j *joinOrderBuilder) makeEdge(op *operator, filters ...sql.Expression) *edge { 605 // edge is an instance of operator with a unique set of transform rules depending 606 // on the subset of filters used 607 e := &edge{ 608 op: op, 609 filters: filters, 610 } 611 // we build the graph upwards. so when me make this edge, all 612 // of the dependency operators and edges have already been constructed 613 // TODO: validate malformed join clauses like `ab join xy on a = u` 614 // prior to join planning, execBuilder currently throws getField errors 615 // for these 616 e.populateEdgeProps(j.vertexTableIds, j.edges) 617 return e 618 } 619 620 // checkSize prevents more than 64 tables 621 func (j *joinOrderBuilder) checkSize() { 622 if len(j.vertices) > 1<<7 { 623 panic("tried joining > 64 tables") 624 } 625 } 626 627 // dpSube iterates all disjoint combinations of table sets, 628 // adding plans to the tree when we find two sets that can 629 // be joined 630 func (j *joinOrderBuilder) dbSube() { 631 all := j.allVertices() 632 for subset := vertexSet(1); subset <= all; subset++ { 633 if subset.isSingleton() { 634 continue 635 } 636 for s1 := vertexSet(1); s1 <= subset/2; s1++ { 637 if !s1.isSubsetOf(subset) { 638 continue 639 } 640 s2 := subset.difference(s1) 641 j.addPlans(s1, s2) 642 } 643 } 644 } 645 646 func setPrinter(all, s1, s2 vertexSet) { 647 s1Arr := make([]string, all.len()) 648 for i := range s1Arr { 649 s1Arr[i] = "0" 650 } 651 s2Arr := make([]string, all.len()) 652 for i := range s2Arr { 653 s2Arr[i] = "0" 654 } 655 for idx, ok := s1.next(0); ok; idx, ok = s1.next(idx + 1) { 656 s1Arr[idx] = "1" 657 } 658 for idx, ok := s2.next(0); ok; idx, ok = s2.next(idx + 1) { 659 s2Arr[idx] = "1" 660 } 661 fmt.Printf("s1: %s, s2: %s\n", strings.Join(s1Arr, ""), strings.Join(s2Arr, "")) 662 } 663 664 // addPlans finds operators that let us join (s1 op s2) and (s2 op s1). 665 func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { 666 // all inner filters could be applied 667 if j.plans[s1] == nil || j.plans[s2] == nil { 668 // Both inputs must have plans. 669 // need this to prevent cross-joins higher in tree 670 return 671 } 672 673 //TODO collect all inner join filters that can be used as select filters 674 //TODO collect functional dependencies to avoid redundant filters 675 //TODO relational nodes track functional dependencies 676 677 var innerJoinFilters []sql.Expression 678 var addInnerJoin bool 679 var isRedundant bool 680 for i, ok := j.innerEdges.Next(0); ok; i, ok = j.innerEdges.Next(i + 1) { 681 e := &j.edges[i] 682 // Ensure that this edge forms a valid connection between the two sets. 683 if e.applicable(s1, s2) { 684 if e.filters != nil { 685 innerJoinFilters = append(innerJoinFilters, e.filters...) 686 } 687 isRedundant = isRedundant || e.joinIsRedundant(s1, s2) 688 addInnerJoin = true 689 } 690 } 691 692 // Because transitive closure can accidentally replace nonInner op with inner op, 693 // avoid building inner plans when an op has a valid nonInner plan. 694 for i, ok := j.nonInnerEdges.Next(0); ok; i, ok = j.nonInnerEdges.Next(i + 1) { 695 e := &j.edges[i] 696 if e.applicable(s1, s2) { 697 j.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, e.joinIsRedundant(s1, s2)) 698 return 699 } 700 if e.applicable(s2, s1) { 701 // This is necessary because we only iterate s1 up to subset / 2 702 // in DPSube() 703 j.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, e.joinIsRedundant(s2, s1)) 704 return 705 } 706 } 707 708 if addInnerJoin { 709 // Construct an inner join. Don't add in the case when a non-inner join has 710 // already been constructed, because doing so can lead to a case where an 711 // inner join replaces a non-inner join. 712 if innerJoinFilters == nil { 713 j.addJoin(plan.JoinTypeCross, s1, s2, nil, nil, isRedundant) 714 } else { 715 j.addJoin(plan.JoinTypeInner, s1, s2, innerJoinFilters, nil, isRedundant) 716 } 717 } 718 } 719 720 func (j *joinOrderBuilder) addJoin(op plan.JoinType, s1, s2 vertexSet, joinFilter, selFilters []sql.Expression, isRedundant bool) { 721 if s1.intersects(s2) { 722 panic("sets are not disjoint") 723 } 724 union := s1.union(s2) 725 left := j.plans[s1] 726 right := j.plans[s2] 727 728 group, ok := j.plans[union] 729 if !isRedundant { 730 if !ok { 731 group = j.memoize(op, left, right, joinFilter, selFilters) 732 j.plans[union] = group 733 } else { 734 j.addJoinToGroup(op, left, right, joinFilter, selFilters, group) 735 } 736 } 737 738 if commute(op) { 739 j.addJoinToGroup(op, right, left, joinFilter, selFilters, group) 740 } 741 } 742 743 // addJoinToGroup adds a new plan to existing groups 744 func (j *joinOrderBuilder) addJoinToGroup( 745 op plan.JoinType, 746 left *ExprGroup, 747 right *ExprGroup, 748 joinFilter []sql.Expression, 749 selFilter []sql.Expression, 750 group *ExprGroup, 751 ) { 752 if f, ok := group.First.(*Filter); ok { 753 group = f.Child 754 } 755 rel := j.constructJoin(op, left, right, joinFilter, group) 756 group.Prepend(rel) 757 return 758 } 759 760 // memoize 761 func (j *joinOrderBuilder) memoize( 762 op plan.JoinType, 763 left *ExprGroup, 764 right *ExprGroup, 765 joinFilter []sql.Expression, 766 selFilter []sql.Expression, 767 ) *ExprGroup { 768 rel := j.constructJoin(op, left, right, joinFilter, nil) 769 return j.m.NewExprGroup(rel) 770 } 771 772 func (j *joinOrderBuilder) constructJoin( 773 op plan.JoinType, 774 left *ExprGroup, 775 right *ExprGroup, 776 joinFilter []sql.Expression, 777 group *ExprGroup, 778 ) RelExpr { 779 var rel RelExpr 780 b := &JoinBase{ 781 Op: op, 782 relBase: &relBase{g: group}, 783 Left: left, 784 Right: right, 785 Filter: joinFilter, 786 } 787 switch op { 788 case plan.JoinTypeCross: 789 rel = &CrossJoin{b} 790 case plan.JoinTypeInner: 791 rel = &InnerJoin{b} 792 case plan.JoinTypeFullOuter: 793 rel = &FullOuterJoin{b} 794 case plan.JoinTypeLeftOuter: 795 rel = &LeftJoin{b} 796 case plan.JoinTypeSemi: 797 rel = &SemiJoin{b} 798 case plan.JoinTypeAnti: 799 rel = &AntiJoin{b} 800 case plan.JoinTypeLateralInner, plan.JoinTypeLateralCross, 801 plan.JoinTypeLateralRight, plan.JoinTypeLateralLeft: 802 rel = &LateralJoin{b} 803 b.Op = op 804 default: 805 panic(fmt.Sprintf("unexpected join type: %s", op)) 806 } 807 808 if j.newPlanCb != nil { 809 j.newPlanCb(j, rel) 810 } 811 812 return rel 813 } 814 815 func (j *joinOrderBuilder) allVertices() vertexSet { 816 // all bits set to one 817 return vertexSet((1 << len(j.vertices)) - 1) 818 } 819 820 func (j *joinOrderBuilder) allEdges() edgeSet { 821 all := edgeSet{} 822 for i := range j.edges { 823 all.Add(i) 824 } 825 return all 826 } 827 828 // operator contains the properties of a join operator from the original join 829 // tree. It is used in calculating the total eligibility sets for edges from any 830 // 'parent' joins which were originally above this one in the tree. 831 type operator struct { 832 // joinType is the operator type of the original join operator. 833 joinType plan.JoinType 834 835 // leftVertices is the set of vertexes (base relations) that were in the left 836 // input of the original join operator. 837 leftVertices vertexSet 838 839 // rightVertices is the set of vertexes (base relations) that were in the 840 // right input of the original join operator. 841 rightVertices vertexSet 842 843 // leftEdges is the set of edges that were constructed from join operators 844 // that were in the left input of the original join operator. 845 leftEdges edgeSet 846 847 // rightEdgers is the set of edges that were constructed from join operators 848 // that were in the right input of the original join operator. 849 rightEdges edgeSet 850 } 851 852 // edge is a generalization of a join edge that embeds rules for 853 // determining the applicability of arbitrary subtrees. An edge is added to the 854 // join graph when a new plan can be constructed between two vertexSet. 855 type edge struct { 856 // op is the original join node source for the edge. there are multiple edges 857 // per op for inner joins with conjunct-predicate join conditions. Different predicates 858 // will have different conflict rules. 859 op *operator 860 861 // filters is the set of join filters that will be used to construct new join 862 // ON conditions. 863 filters []sql.Expression 864 freeVars sql.ColSet 865 866 // nullRejectedRels is the set of vertexes on which nulls are rejected by the 867 // filters. We do not set any nullRejectedRels currently, which is not accurate 868 // but prevents potentially invalid transformations. 869 nullRejectedRels vertexSet 870 871 // ses is the syntactic eligibility set of the edge; in other words, it is the 872 // set of base relations (tables) referenced by the filters field. 873 ses vertexSet 874 875 // tes is the total eligibility set of the edge. The TES gives the set of base 876 // relations (vertexes) that must be in the input of any join that uses the 877 // filters from this edge in its ON condition. The TES is initialized with the 878 // SES, and then expanded by the conflict detection algorithm. 879 tes vertexSet 880 881 // rules is a set of conflict rules which must evaluate to true in order for 882 // a join between two sets of vertexes to be valid. 883 rules []conflictRule 884 } 885 886 func (e *edge) populateEdgeProps(tableIds []sql.TableId, edges []edge) { 887 var tables sql.FastIntSet 888 var cols sql.ColSet 889 if len(e.filters) > 0 { 890 for _, e := range e.filters { 891 eCols, eTabs, _ := getExprScalarProps(e) 892 cols = cols.Union(eCols) 893 tables = tables.Union(eTabs) 894 } 895 } 896 897 // TODO vertexes and tableIds? 898 899 e.freeVars = cols 900 901 // TODO implement, we currently limit transforms assuming no strong null safety 902 //e.nullRejectedRels = e.nullRejectingTables(nullAccepting, allNames, allV) 903 904 //SES is vertexSet of all tables referenced in cols 905 e.calcSES(tables, tableIds) 906 // use CD-C to expand dependency sets for operators 907 // front load preventing applicable operators that would push crossjoins 908 e.calcTES(edges) 909 } 910 911 func (e *edge) String() string { 912 b := strings.Builder{} 913 b.WriteString("edge\n") 914 b.WriteString(fmt.Sprintf(" - joinType: %s\n", e.op.joinType.String())) 915 if e.filters != nil { 916 b.WriteString(" - on: ") 917 sep := "" 918 for _, e := range e.filters { 919 b.WriteString(fmt.Sprintf("%s%s", sep, e.String())) 920 } 921 b.WriteString("\n") 922 } 923 b.WriteString(fmt.Sprintf(" - free vars: %s\n", e.freeVars.String())) 924 b.WriteString(fmt.Sprintf(" - ses: %s\n", e.ses.String())) 925 b.WriteString(fmt.Sprintf(" - tes: %s\n", e.tes.String())) 926 b.WriteString(fmt.Sprintf(" - nullRej: %s\n", e.nullRejectedRels.String())) 927 return b.String() 928 } 929 930 // nullRejectingTables is a subset of the SES such that for every 931 // null rejecting table, if all attributes of the table are null, 932 // we can make a strong guarantee that the edge filters will not 933 // evaluate to TRUE (FALSE or NULL are OK). 934 // 935 // For example, the filters (a.x = b.x OR a.x IS NOT NULL) is null 936 // rejecting on (b), but not (a). 937 // 938 // A second more complicated example is null rejecting both on (a,b): 939 // 940 // CASE 941 // WHEN a.x IS NOT NULL THEN a.x = b.x 942 // WHEN a.x <=> 2 THEN TRUE 943 // ELSE NULL 944 // END 945 // 946 // Refer to https://dl.acm.org/doi/10.1145/244810.244812 for more examples. 947 // TODO implement this 948 func (e *edge) nullRejectingTables(nullAccepting []sql.Expression, allNames []string, allV vertexSet) vertexSet { 949 panic("not implemented") 950 } 951 952 // calcSES updates the syntactic eligibility set for an edge. An SES 953 // represents all tables this edge's filters requires as input. 954 func (e *edge) calcSES(tables sql.FastIntSet, tableIds []sql.TableId) { 955 ses := vertexSet(0) 956 for i, ok := tables.Next(0); ok; i, ok = tables.Next(i + 1) { 957 for j, tabId := range tableIds { 958 // table ids, group ids, and vertex ids are all distinct 959 if sql.TableId(i) == tabId { 960 ses = ses.add(vertexIndex(j)) 961 break 962 } 963 } 964 } 965 e.ses = ses 966 } 967 968 // calcTES in place updates an edge's total eligibility set. TES is a way 969 // to expand the eligibility sets (the table dependencies) for an edge to 970 // prevent invalid plans. Most of this is verbatim from the paper, but we 971 // add additional restrictions for cross and left joins. 972 func (e *edge) calcTES(edges []edge) { 973 e.tes = e.ses 974 975 // Degenerate predicates include 1) cross joins and 2) inner joins 976 // whose filters do not restrict that cardinality of the subtree 977 // inputs. We check for both by comparing i) the filter SES to ii) the tables 978 // provided by the left/right subtrees. If one or both do not overlap, 979 // the degenerate edge will be frozen in reference to the original plan 980 // by expanding the TES to require the left/right subtree dependencies. 981 // 982 // note: this is different from the paper, which instead adds a check 983 // to applicable: 984 // op.leftVertices.intersect(s1) || op.rightVertices.intersect(s2) 985 // An operation is only applicable if the left tree provides a subset 986 // of s1 or the right tree provides a subset of s2. This is logically 987 // equivalent to expanding the TES here, but front-loads this logic 988 // because a bigger TES earlier reduces the conflict checking work. 989 if !e.tes.intersects(e.op.leftVertices) { 990 e.tes = e.tes.union(e.op.leftVertices) 991 } 992 if !e.tes.intersects(e.op.rightVertices) { 993 e.tes = e.tes.union(e.op.rightVertices) 994 } 995 996 // left join can't be moved such that we transpose left-dependencies 997 if e.op.joinType.IsLeftOuter() { 998 e.tes = e.tes.union(e.op.leftVertices) 999 } 1000 1001 // CD-C algorithm 1002 // Note: the ordering of the transform(eA, eB) functions are important. 1003 // eA is the subtree child edge targeted for rearrangement. If the ordering 1004 // is switched, the output is nondeterministic. 1005 1006 // iterate every eA in STO(left(eB)) 1007 eB := e 1008 for idx, ok := eB.op.leftEdges.Next(0); ok; idx, ok = eB.op.leftEdges.Next(idx + 1) { 1009 if eB.op.leftVertices.isSubsetOf(eB.tes) { 1010 // Fast path to break out early: the TES includes all relations from the 1011 // left input. 1012 break 1013 } 1014 eA := &edges[idx] 1015 if !assoc(eA, eB) { 1016 // The edges are not associative, so add a conflict rule mapping from the 1017 // right input relations of the child to its left input relations. 1018 rule := conflictRule{from: eA.op.rightVertices} 1019 if eA.op.leftVertices.intersects(eA.ses) { 1020 // A less restrictive conflict rule can be added in this case. 1021 rule.to = eA.op.leftVertices.intersection(eA.ses) 1022 } else { 1023 rule.to = eA.op.leftVertices 1024 } 1025 eB.addRule(rule) 1026 } 1027 if !leftAsscom(eA, eB) { 1028 // Left-asscom does not hold, so add a conflict rule mapping from the 1029 // left input relations of the child to its right input relations. 1030 rule := conflictRule{from: eA.op.leftVertices} 1031 if eA.op.rightVertices.intersects(eA.ses) { 1032 // A less restrictive conflict rule can be added in this case. 1033 rule.to = eA.op.rightVertices.intersection(eA.ses) 1034 } else { 1035 rule.to = eA.op.rightVertices 1036 } 1037 eB.addRule(rule) 1038 } 1039 } 1040 1041 for idx, ok := e.op.rightEdges.Next(0); ok; idx, ok = e.op.rightEdges.Next(idx + 1) { 1042 if e.op.rightVertices.isSubsetOf(e.tes) { 1043 // Fast path to break out early: the TES includes all relations from the 1044 // right input. 1045 break 1046 } 1047 eA := &edges[idx] 1048 if !assoc(eB, eA) { 1049 // The edges are not associative, so add a conflict rule mapping from the 1050 // left input relations of the child to its right input relations. 1051 rule := conflictRule{from: eA.op.leftVertices} 1052 if eA.op.rightVertices.intersects(eA.ses) { 1053 // A less restrictive conflict rule can be added in this case. 1054 rule.to = eA.op.rightVertices.intersection(eA.ses) 1055 } else { 1056 rule.to = eA.op.rightVertices 1057 } 1058 eB.addRule(rule) 1059 } 1060 if !rightAsscom(eB, eA) { 1061 // Right-asscom does not hold, so add a conflict rule mapping from the 1062 // right input relations of the child to its left input relations. 1063 rule := conflictRule{from: eA.op.rightVertices} 1064 if eA.op.leftVertices.intersects(eA.ses) { 1065 // A less restrictive conflict rule can be added in this case. 1066 rule.to = eA.op.leftVertices.intersection(eA.ses) 1067 } else { 1068 rule.to = eA.op.leftVertices 1069 } 1070 eB.addRule(rule) 1071 } 1072 } 1073 } 1074 1075 // addRule adds the given conflict rule to the edge. Before the rule is added to 1076 // the rules set, an effort is made to eliminate the need for the rule. 1077 func (e *edge) addRule(rule conflictRule) { 1078 if rule.from.intersects(e.tes) { 1079 // If the 'from' relation set intersects the total eligibility set, simply 1080 // add the 'to' set to the TES because the rule will always be triggered. 1081 e.tes = e.tes.union(rule.to) 1082 return 1083 } 1084 if rule.to.isSubsetOf(e.tes) { 1085 // If the 'to' relation set is a subset of the total eligibility set, the 1086 // rule is a do-nothing. 1087 return 1088 } 1089 e.rules = append(e.rules, rule) 1090 } 1091 1092 func (e *edge) applicable(s1, s2 vertexSet) bool { 1093 if !e.checkRules(s1, s2) { 1094 // The conflict rules for this edge are not satisfied for a join between s1 1095 // and s2. 1096 return false 1097 } 1098 switch e.op.joinType { 1099 case plan.JoinTypeInner: 1100 // The TES must be a subset of the relations of the candidate join inputs. In 1101 // addition, the TES must intersect both s1 and s2 (the edge must connect the 1102 // two vertex sets). 1103 return e.tes.isSubsetOf(s1.union(s2)) && e.tes.intersects(s1) && e.tes.intersects(s2) 1104 default: 1105 // The left TES must be a subset of the s1 relations, and the right TES must 1106 // be a subset of the s2 relations. In addition, the TES must intersect both 1107 // s1 and s2 (the edge must connect the two vertex sets). 1108 return e.tes.intersection(e.op.leftVertices).isSubsetOf(s1) && 1109 e.tes.intersection(e.op.rightVertices).isSubsetOf(s2) && 1110 e.tes.intersects(s1) && e.tes.intersects(s2) 1111 } 1112 } 1113 1114 // checkRules iterates through the edge's rules and returns false if a conflict 1115 // is detected for the given sets of join input relations. Otherwise, returns 1116 // true. 1117 func (e *edge) checkRules(s1, s2 vertexSet) bool { 1118 s := s1.union(s2) 1119 for _, rule := range e.rules { 1120 if rule.from.intersects(s) && !rule.to.isSubsetOf(s) { 1121 // The join is invalid because it does not obey this conflict rule. 1122 return false 1123 } 1124 } 1125 return true 1126 } 1127 1128 // joinIsRedundant returns true if a join between the two sets of base relations 1129 // was already present in the original join tree. If so, enumerating this join 1130 // would be redundant, so it should be skipped. 1131 func (e *edge) joinIsRedundant(s1, s2 vertexSet) bool { 1132 return e.op.leftVertices == s1 && e.op.rightVertices == s2 1133 } 1134 1135 type assocTransform func(eA, eB *edge) bool 1136 1137 // assoc checks whether the associate is applicable 1138 // to a binary operator tree. We consider 1) generating cross joins, 1139 // and 2) the left/right operator join types for this specific transform. 1140 // The below is a valid association that generates no crossjoin: 1141 // 1142 // (e2 op_a_12 e1) op_b_13 e3 1143 // => 1144 // e2 op_a_12 (e1 op_b_13 e3) 1145 // 1146 // note: important to compare edge ordering for left deep tree. 1147 func assoc(eA, eB *edge) bool { 1148 if eB.ses.intersects(eA.op.leftVertices) || eA.ses.intersects(eB.op.rightVertices) { 1149 // associating two operators can estrange the distant relation. 1150 // for example: 1151 // (e2 op_a_12 e1) op_b_13 e3 1152 // => 1153 // e2 op_a_12 (e1 op_b_13 e3) 1154 // The first operator, a, takes explicit dependencies on e1 and e2. 1155 // The second operator, b, takes explicit dependencies on e1 and e3. 1156 // Associating these two will isolate e2 from op_b for the downward 1157 // transform, and e3 from op_a on the upward transform, both of which 1158 // are valid. The same is not true for the transform below: 1159 // (e2 op_a_12 e1) op_b_32 e3 1160 // => 1161 // e2 op_a_12 (e1 op_b_32 e3) 1162 // Isolating e2 from op_b makes op_b degenerate, producing a cross join. 1163 return false 1164 } 1165 return checkProperty(assocTable, eA, eB) 1166 } 1167 1168 // leftAsscom checks whether the left-associate+commute is applicable 1169 // to a binary operator tree. We consider 1) generating cross joins, 1170 // and 2) the left/right operator join types for this specific transform. 1171 // For example: 1172 // 1173 // (e1 op_a_12 e2) op_b_13 e3 1174 // => 1175 // (e1 op_b_13 e3) op_a_12 e2 1176 func leftAsscom(eA, eB *edge) bool { 1177 if eB.ses.intersects(eA.op.rightVertices) || eA.ses.intersects(eB.op.rightVertices) { 1178 // Associating two operators can estrange the distant relation. 1179 // For example: 1180 // (e1 op_a_12 e2) op_b_23 e3 1181 // => 1182 // (e1 op_b_23 e3) op_a_12 e2 1183 // Isolating e2 from op_b makes op_b degenerate, producing a cross join. 1184 return false 1185 } 1186 return checkProperty(leftAsscomTable, eA, eB) 1187 } 1188 1189 // rAsscom checks whether the right-associate+commute is applicable 1190 // to a binary operator tree. We consider 1) generating cross joins, 1191 // and 2) the left/right operator join types for this specific transform. 1192 // For example: 1193 // 1194 // e1 op_b_13 (e2 op_a_23 e3) 1195 // => 1196 // e2 op_a_23 (e1 op_b_13 e3) 1197 func rightAsscom(eA, eB *edge) bool { 1198 if eB.ses.intersects(eA.op.leftVertices) || eA.ses.intersects(eB.op.leftVertices) { 1199 // Associating two operators can estrange the distant relation. 1200 // For example: 1201 // e3 op_b_23 (e1 op_a_12 e3) 1202 // => 1203 // e2 op_a_12 (e1 op_b_23 e3) 1204 // Isolating e2 from op_b makes op_b degenerate, producing a cross join. 1205 return false 1206 } 1207 return checkProperty(rightAsscomTable, eA, eB) 1208 } 1209 1210 // commute transforms an operator tree by alternating child 1211 // join ordering. 1212 // For example: 1213 // 1214 // e1 op e2 1215 // => 1216 // e2 op e1 1217 func commute(op plan.JoinType) bool { 1218 return op == plan.JoinTypeInner || op == plan.JoinTypeCross 1219 } 1220 1221 // conflictRule is a pair of vertex sets which carry the requirement that if the 1222 // 'from' set intersects a set of prospective join input relations, then the 1223 // 'to' set must be a subset of the input relations (from -> to). Take the 1224 // following query as an example: 1225 // 1226 // SELECT * FROM xy 1227 // INNER JOIN (SELECT * FROM ab LEFT JOIN uv ON a = u) 1228 // ON x = a 1229 // 1230 // During execution of the CD-C algorithm, the following conflict rule would 1231 // be added to inner join edge: [uv -> ab]. This means that, for any join that 1232 // uses this edge, the presence of uv in the set of input relations implies the 1233 // presence of ab. This prevents an inner join between relations xy and uv 1234 // (since then ab would not be an input relation). Note that, in practice, this 1235 // conflict rule would be absorbed into the TES because ab is a part of the 1236 // inner join edge's SES (see the addRule func). 1237 type conflictRule struct { 1238 from vertexSet 1239 to vertexSet 1240 } 1241 1242 // lookupTableEntry is an entry in one of the join property Lookup tables 1243 // defined below (associative, left-asscom and right-asscom properties). A 1244 // lookupTableEntry can be unconditionally true or false, as well as true 1245 // conditional on the null-rejecting properties of the edge filters. 1246 type lookupTableEntry uint8 1247 1248 const ( 1249 // never indicates that the transformation represented by the table entry is 1250 // unconditionally incorrect. 1251 never lookupTableEntry = 0 1252 1253 // always indicates that the transformation represented by the table entry is 1254 // unconditionally correct. 1255 always lookupTableEntry = 1 << (iota - 1) 1256 1257 // filterA indicates that the filters of the "A" join edge must reject 1258 // nulls for the set of vertexes specified by rejectsOnLeftA, rejectsOnRightA, 1259 // etc. 1260 filterA 1261 1262 // filterB indicates that the filters of the "B" join edge must reject 1263 // nulls for the set of vertexes specified by rejectsOnLeftA, rejectsOnRightA, 1264 // etc. 1265 filterB 1266 1267 // rejectsOnLeftA indicates that the filters must reject nulls for the left 1268 // input relations of edge "A". 1269 rejectsOnLeftA 1270 1271 // rejectsOnRightA indicates that the filters must reject nulls for the right 1272 // input relations of edge "A". 1273 rejectsOnRightA 1274 1275 // rejectsOnRightB indicates that the filters must reject nulls for the right 1276 // input relations of edge "B". 1277 rejectsOnRightB 1278 1279 // table2Note1 indicates that the filters of edge "B" must reject nulls on 1280 // the relations of the right input of edge "A". 1281 // Citations: [8] Table 2 Footnote 1. 1282 table2Note1 = filterB | rejectsOnRightA 1283 1284 // table2Note2 indicates that the filters of operators "A" and "B" must reject 1285 // nulls on the relations of the right input of edge "A". 1286 // Citations: [8] Table 2 Footnote 2. 1287 table2Note2 = (filterA | filterB) | rejectsOnRightA 1288 1289 // table3Note1 indicates that the filters of edge "A" must reject nulls on 1290 // the relations of the left input of edge "A". 1291 // Citations: [8] Table 3 Footnote 1. 1292 table3Note1 = filterA | rejectsOnLeftA 1293 1294 // table3Note2 indicates that the filters of edge "B" must reject nulls on 1295 // the relations of the right input of edge "B". 1296 // Citations: [8] Table 3 Footnote 1]2. 1297 table3Note2 = filterB | rejectsOnRightB 1298 1299 // table3Note3 indicates that the filters of operators "A" and "B" must reject 1300 // nulls on the relations of the left input of edge "A". 1301 // Citations: [8] Table 3 Footnote 3. 1302 table3Note3 = (filterA | filterB) | rejectsOnLeftA 1303 1304 // table3Note4 indicates that the filters of operators "A" and "B" must reject 1305 // nulls on the relations of the right input of edge "B". 1306 // Citations: [8] Table 3 Footnote 4. 1307 table3Note4 = (filterA | filterB) | rejectsOnRightB 1308 ) 1309 1310 // assocTable is a Lookup table indicating whether it is correct to apply the 1311 // associative transformation to pairs of join operators. 1312 // citations: [8] table 2 1313 var assocTable = [8][8]lookupTableEntry{ 1314 // cross-B inner-B semi-B anti-B left-B full-B group-B lateral-B 1315 /* cross-A */ {always, always, always, always, always, never, always, never}, 1316 /* inner-A */ {always, always, always, always, always, never, always, never}, 1317 /* semi-A */ {never, never, never, never, never, never, never, never}, 1318 /* anti-A */ {never, never, never, never, never, never, never, never}, 1319 /* left-A */ {never, never, never, never, table2Note1, never, never, never}, 1320 /* full-A */ {never, never, never, never, table2Note1, table2Note2, never}, 1321 /* group-A */ {never, never, never, never, never, never, never, never}, 1322 /* lateral-A */ {never, never, never, never, never, never, never, never}, 1323 } 1324 1325 // leftAsscomTable is a Lookup table indicating whether it is correct to apply 1326 // the left-asscom transformation to pairs of join operators. 1327 // citations: [8] table 3 1328 var leftAsscomTable = [8][8]lookupTableEntry{ 1329 // cross-A inner-B semi-B anti-B left-B full-B group-B lateral-B 1330 /* cross-A */ {always, always, always, always, always, never, always, never}, 1331 /* inner-A */ {always, always, always, always, always, never, always, never}, 1332 /* semi-A */ {always, always, always, always, always, never, always, never}, 1333 /* anti-A */ {always, always, always, always, always, never, always, never}, 1334 /* left-A */ {always, always, always, always, always, table3Note1, always, never}, 1335 /* full-A */ {never, never, never, never, table3Note2, table3Note3, never, never}, 1336 /* group-A */ {always, always, always, always, always, never, always, never}, 1337 /* lateral-A */ {never, never, never, never, never, never, never, never}, 1338 } 1339 1340 // rightAsscomTable is a Lookup table indicating whether it is correct to apply 1341 // the right-asscom transformation to pairs of join operators. 1342 // citations: [8] table 3 1343 var rightAsscomTable = [8][8]lookupTableEntry{ 1344 // cross-B inner-B semi-B anti-B left-B full-B group-B lateral-B 1345 /* cross-A */ {always, always, never, never, never, never, never, never}, 1346 /* inner-A */ {always, always, never, never, never, never, never, never}, 1347 /* semi-A */ {never, never, never, never, never, never, never, never}, 1348 /* anti-A */ {never, never, never, never, never, never, never, never}, 1349 /* left-A */ {never, never, never, never, never, never, never}, 1350 /* full-A */ {never, never, never, never, never, table3Note4, never, never}, 1351 /* group-A */ {never, never, never, never, never, never, never, never}, 1352 /* lateral-A */ {never, never, never, never, never, never, never, never}, 1353 } 1354 1355 // checkProperty returns true if the transformation represented by the given 1356 // property Lookup table is allowed for the two given edges. Note that while 1357 // most table entries are either true or false, some are conditionally true, 1358 // depending on the null-rejecting properties of the edge filters (for example, 1359 // association for two full joins). 1360 func checkProperty(table [8][8]lookupTableEntry, edgeA, edgeB *edge) bool { 1361 entry := table[getOpIdx(edgeA)][getOpIdx(edgeB)] 1362 1363 if entry == never { 1364 // Application of this transformation property is unconditionally incorrect. 1365 return false 1366 } 1367 if entry == always { 1368 // Application of this transformation property is unconditionally correct. 1369 return true 1370 } 1371 1372 // This property is conditionally applicable. Get the relations that must be 1373 // null-rejected by the filters. 1374 var candidateNullRejectRels vertexSet 1375 if entry&rejectsOnLeftA != 0 { 1376 // Filters must null-reject on the left input vertexes of edgeA. 1377 candidateNullRejectRels = edgeA.op.leftVertices 1378 } else if entry&rejectsOnRightA != 0 { 1379 // Filters must null-reject on the right input vertexes of edgeA. 1380 candidateNullRejectRels = edgeA.op.rightVertices 1381 } else if entry&rejectsOnRightB != 0 { 1382 // Filters must null-reject on the right input vertexes of edgeB. 1383 candidateNullRejectRels = edgeA.op.rightVertices 1384 } 1385 1386 // Check whether the edge filters reject nulls on nullRejectRelations. 1387 if entry&filterA != 0 { 1388 // The filters of edgeA must reject nulls on one or more of the relations in 1389 // nullRejectRelations. 1390 if !edgeA.nullRejectedRels.intersects(candidateNullRejectRels) { 1391 return false 1392 } 1393 } 1394 if entry&filterB != 0 { 1395 // The filters of edgeB must reject nulls on one or more of the relations in 1396 // nullRejectRelations. 1397 if !edgeB.nullRejectedRels.intersects(candidateNullRejectRels) { 1398 return false 1399 } 1400 } 1401 return true 1402 } 1403 1404 // getOpIdx returns an index into the join property static Lookup tables given an edge 1405 // with an associated edge type. I originally used int(joinType), but this is fragile 1406 // to reordering the type definitions. 1407 func getOpIdx(e *edge) int { 1408 switch e.op.joinType { 1409 case plan.JoinTypeCross: 1410 return 0 1411 case plan.JoinTypeInner: 1412 return 1 1413 case plan.JoinTypeSemi: 1414 return 2 1415 case plan.JoinTypeAnti: 1416 return 3 1417 case plan.JoinTypeLeftOuter: 1418 return 4 1419 case plan.JoinTypeFullOuter: 1420 return 5 1421 case plan.JoinTypeGroupBy: 1422 return 6 1423 case plan.JoinTypeLateralInner, plan.JoinTypeLateralCross, 1424 plan.JoinTypeLateralRight, plan.JoinTypeLateralLeft: 1425 return 7 1426 default: 1427 panic(fmt.Sprintf("invalid operator: %v", e.op.joinType)) 1428 } 1429 } 1430 1431 type edgeSet = sql.FastIntSet 1432 1433 type bitSet uint64 1434 1435 // vertexSet represents a set of base relations that form the vertexes of the 1436 // join graph. 1437 type vertexSet = bitSet 1438 1439 const maxSetSize = 63 1440 1441 // vertexIndex represents the ordinal position of a base relation in the 1442 // JoinOrderBuilder vertexes field. vertexIndex must be less than maxSetSize. 1443 type vertexIndex = uint64 1444 1445 func newBitSet(idxs ...uint64) (res bitSet) { 1446 for _, idx := range idxs { 1447 res = res.add(idx) 1448 } 1449 return res 1450 } 1451 1452 // add returns a copy of the bitSet with the given element added. 1453 func (s bitSet) add(idx uint64) bitSet { 1454 if idx > maxSetSize { 1455 panic(fmt.Sprintf("cannot insert %d into bitSet", idx)) 1456 } 1457 return s | (1 << idx) 1458 } 1459 1460 // remove returns a copy of the bitSet with the given element removed. 1461 func (s bitSet) remove(idx uint64) bitSet { 1462 if idx > maxSetSize { 1463 panic(fmt.Sprintf("%d is invalid index for bitSet", idx)) 1464 } 1465 return s & ^(1 << idx) 1466 } 1467 1468 // contains returns whether a bitset contains a given element. 1469 func (s bitSet) contains(idx uint64) bool { 1470 if idx > maxSetSize { 1471 panic(fmt.Sprintf("%d is invalid index for bitSet", idx)) 1472 } 1473 return s&(1<<idx) != 0 1474 } 1475 1476 // union returns the set union of this set with the given set. 1477 func (s bitSet) union(o bitSet) bitSet { 1478 return s | o 1479 } 1480 1481 // intersection returns the set intersection of this set with the given set. 1482 func (s bitSet) intersection(o bitSet) bitSet { 1483 return s & o 1484 } 1485 1486 // difference returns the set difference of this set with the given set. 1487 func (s bitSet) difference(o bitSet) bitSet { 1488 return s & ^o 1489 } 1490 1491 // intersects returns true if this set and the given set intersect. 1492 func (s bitSet) intersects(o bitSet) bool { 1493 return s.intersection(o) != 0 1494 } 1495 1496 // isSubsetOf returns true if this set is a subset of the given set. 1497 func (s bitSet) isSubsetOf(o bitSet) bool { 1498 return s.union(o) == o 1499 } 1500 1501 // isSingleton returns true if the set has exactly one element. 1502 func (s bitSet) isSingleton() bool { 1503 return s > 0 && (s&(s-1)) == 0 1504 } 1505 1506 // next returns the next element in the set after the given start index, and 1507 // a bool indicating whether such an element exists. 1508 func (s bitSet) next(startVal uint64) (elem uint64, ok bool) { 1509 if startVal < maxSetSize { 1510 if ntz := bits.TrailingZeros64(uint64(s >> startVal)); ntz < 64 { 1511 return startVal + uint64(ntz), true 1512 } 1513 } 1514 return uint64(math.MaxInt64), false 1515 } 1516 1517 // len returns the number of elements in the set. 1518 func (s bitSet) len() int { 1519 return bits.OnesCount64(uint64(s)) 1520 } 1521 1522 func (s bitSet) String() string { 1523 var str string 1524 var i vertexSet = 1 1525 cnt := 0 1526 for cnt < s.len() { 1527 if (i & s) != 0 { 1528 str += "1" 1529 cnt++ 1530 } else { 1531 str += "0" 1532 } 1533 i = i << 1 1534 } 1535 return str 1536 }