github.com/dolthub/go-mysql-server@v0.18.0/sql/memo/join_order_builder.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package memo
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"math"
    21  	"math/bits"
    22  	"strings"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  	"github.com/dolthub/go-mysql-server/sql/expression"
    26  	"github.com/dolthub/go-mysql-server/sql/plan"
    27  )
    28  
    29  // joinOrderBuilder enumerates valid plans for a join tree.  We build the join
    30  // tree bottom up, first joining single nodes with join condition "edges", then
    31  // single nodes to hypernodes (1+n), and finally hyper nodes to
    32  // other hypernodes (n+m).
    33  //
    34  // Every valid combination of subtrees is considered with two exceptions.
    35  //
    36  // 1) Cross joins and other joins with degenerate predicates are never pushed
    37  // lower in the tree.
    38  //
    39  // 2) Transformations that are valid but create degenerate filters (new
    40  // cross joins) are not considered.
    41  //
    42  // The logic for this module is sourced from
    43  // https://www.researchgate.net/publication/262216932_On_the_correct_and_complete_enumeration_of_the_core_search_space
    44  // with help from
    45  // https://github.com/cockroachdb/cockroach/blob/master/pkg/sql/opt/xform/join_order_builder.go.
    46  //
    47  // Two theoretical observations underpin the enumeration:
    48  //
    49  // 1) Either associativity or l-asscom can be applied to left nesting, but not
    50  // both. Either associativity or r-asscom can be applied to right nesting, but
    51  // not both.
    52  //
    53  // 2) Every transformation in the core search space (of two operators) can be
    54  // reached by one commutative transformation on each operator and one assoc,
    55  // l-asscom, or r-asscom.
    56  //
    57  // We use these assumptions to implement the dbSube enumeration search, using a
    58  // CD-C conflict detection rules to encode reordering applicability:
    59  //
    60  // 1) Use bitsets to iterate all combinations of join plan subtrees, starting
    61  // with two tables and building upwards. For example, a join A x B x C would
    62  // start with 10 x 01 (A x B) and build up to sets 100 x 011 (A x (BC)) and 101
    63  // x 010 ((AC) x B).
    64  //
    65  // 2) Attempt to make a new plan with every combination of subtrees
    66  // (hypernodes) for every join operator. This takes the form (op s1 s2), where
    67  // s1 is the right subtree, s2 is the left subtree, and op is the edge
    68  // corresponding to a specific join type and filter. Most of the time one
    69  // operator => one edge, except when join conjunctions are split to make
    70  // multiple edges for one join operator. We differentiate innerEdges and
    71  // nonInnerEdges to avoid innerJoins plans overwriting the often slower
    72  // nonInner join plans. A successful edge between two sets adds a join plan to
    73  // the memo.
    74  //
    75  // 3) Save the best plan for every memo group (unordered group of table joins)
    76  // moving upwards through the tree, finishing with the optimal memo group for
    77  // the join including every table.
    78  //
    79  // Applicability rules:
    80  //
    81  // We build applicability rules before exhaustive enumeration to filter valid
    82  // plans. We consider a reordering valid by checking:
    83  //
    84  // 1) Transform compatibility: a Lookup table between two join operator types
    85  //
    86  // 2) Eligibility sets: left and right subtree dependencies required for a
    87  // valid reordering. The syntactic eligibility set (SES) is the table
    88  // dependencies of the edge's filter. For example, the SES for a filter a.x =
    89  // b.y is (a,b).  The total eligibility set (TES) is an expansion of the SES
    90  // that conceptually behaves similarly to an SES; i.e. only hypernodes that
    91  // completely intersect an edge's TES are valid. The difference is that TES is
    92  // expanded based on the original edge's join operator subdependencies.  The
    93  // paper referenced encodes an algorithms for building a TES to fully encode
    94  // associativity, left-asscom, and right-asscom (commutativity is a unary
    95  // operator property).
    96  //
    97  // For example, the INNER JOIN in the query below is subject to assoc(left
    98  // join, inner join) = false:
    99  //
   100  //	SELECT *
   101  //	FROM (SELECT * FROM ab LEFT JOIN uv ON a = u)
   102  //	INNER JOIN xy
   103  //	ON x = v
   104  //
   105  // As a result, the inner join's TES, initialized as the SES(xy, uv), is
   106  // expanded to include ab. The final TES(xy, uv, ab) invalidates
   107  // LEFT JOIN association during exhaustive enumeration:
   108  //
   109  //	SELECT *
   110  //	FROM (SELECT * FROM ab LEFT JOIN xy ON x = v)
   111  //	INNER JOIN uv
   112  //	ON a = u
   113  //
   114  // Note the disconnect between the global nature of the TES, which is built
   115  // fully before enumeration, and local nature of testing every enumeration
   116  // against the pre-computed TES.
   117  //
   118  // In special cases, the TES is not expressive enough to represent certain
   119  // table dependencies. Conflict rules of the form R1 -> R2 are an escape hatch
   120  // to require the dependency table set R2 when any subset of R1 is present in a
   121  // candidate plan.
   122  //
   123  // TODO: null rejecting tables
   124  type joinOrderBuilder struct {
   125  	// plans maps from a set of base relations to the memo group for the join tree
   126  	// that contains those relations (and only those relations). As an example,
   127  	// the group for [xy, ab, uv] might contain the join trees (xy, (ab, uv)),
   128  	// ((xy, ab), uv), (ab, (xy, uv)), etc.
   129  	//
   130  	// The group for a single base relation is the base relation itself.
   131  	m                         *Memo
   132  	plans                     map[vertexSet]*ExprGroup
   133  	edges                     []edge
   134  	vertices                  []RelExpr
   135  	vertexGroups              []GroupId
   136  	vertexTableIds            []sql.TableId
   137  	innerEdges                edgeSet
   138  	nonInnerEdges             edgeSet
   139  	newPlanCb                 func(j *joinOrderBuilder, rel RelExpr)
   140  	forceFastDFSLookupForTest bool
   141  	hasCrossJoin              bool
   142  }
   143  
   144  func NewJoinOrderBuilder(memo *Memo) *joinOrderBuilder {
   145  	return &joinOrderBuilder{
   146  		plans:        make(map[vertexSet]*ExprGroup),
   147  		m:            memo,
   148  		vertices:     make([]RelExpr, 0),
   149  		vertexGroups: make([]GroupId, 0),
   150  	}
   151  }
   152  
   153  var ErrUnsupportedReorderNode = errors.New("unsupported join reorder node")
   154  
   155  // useFastReorder determines whether to skip the current brute force join planning and use an alternate
   156  // planning algorithm that analyzes the join tree to find a sequence that can be implemented purely as lookup joins.
   157  // Currently we only use it for large joins (20+ tables) with no join hints.
   158  func (j *joinOrderBuilder) useFastReorder() bool {
   159  	if j.forceFastDFSLookupForTest {
   160  		return true
   161  	}
   162  	if j.m.hints.order != nil {
   163  		return false
   164  	}
   165  	return len(j.vertices) > 15
   166  }
   167  
   168  func (j *joinOrderBuilder) ReorderJoin(n sql.Node) {
   169  	j.populateSubgraph(n)
   170  	if j.useFastReorder() {
   171  		j.buildSingleLookupPlan()
   172  		return
   173  	} else if j.hasCrossJoin {
   174  		// Rely on FastReorder to avoid plans that drop filters with cross joins
   175  		if j.buildSingleLookupPlan() {
   176  			return
   177  		}
   178  	}
   179  	// TODO: consider if buildSingleLookupPlan can/should run after ensureClosure. This could allow us to use analysis
   180  	// from ensureClosure in buildSingleLookupPlan, but the equivalence sets could create multiple possible join orders
   181  	// for the single-lookup plan, which would complicate things.
   182  	j.ensureClosure(j.m.root)
   183  	j.dbSube()
   184  	return
   185  }
   186  
   187  // populateSubgraph recursively tracks new join nodes as edges and new
   188  // leaf nodes as vertices to the joinOrderBuilder graph, returning
   189  // the subgraph's newly tracked vertices and edges.
   190  func (j *joinOrderBuilder) populateSubgraph(n sql.Node) (vertexSet, edgeSet, *ExprGroup) {
   191  	var group *ExprGroup
   192  	startV := j.allVertices()
   193  	startE := j.allEdges()
   194  	// build operator
   195  	switch n := n.(type) {
   196  	case *plan.Filter:
   197  		return j.buildFilter(n.Child, n.Expression)
   198  	case *plan.Having:
   199  		return j.buildFilter(n.Child, n.Cond)
   200  	case *plan.Limit:
   201  		_, _, group = j.populateSubgraph(n.Child)
   202  		group.RelProps.Limit = n.Limit
   203  	case *plan.Project:
   204  		return j.buildProject(n)
   205  	case *plan.Sort:
   206  		_, _, group = j.populateSubgraph(n.Child)
   207  		group.RelProps.sort = n.SortFields
   208  	case *plan.Distinct:
   209  		_, _, group = j.populateSubgraph(n.Child)
   210  		group.RelProps.Distinct = HashDistinctOp
   211  	case *plan.Max1Row:
   212  		return j.buildMax1Row(n)
   213  	case *plan.JoinNode:
   214  		group = j.buildJoinOp(n)
   215  		if n.Op == plan.JoinTypeCross {
   216  			j.hasCrossJoin = true
   217  		}
   218  	case *plan.SetOp:
   219  		group = j.buildJoinLeaf(n)
   220  	case sql.NameableNode:
   221  		group = j.buildJoinLeaf(n.(plan.TableIdNode))
   222  	case *plan.StripRowNode:
   223  		return j.populateSubgraph(n.Child)
   224  	case *plan.CachedResults:
   225  		return j.populateSubgraph(n.Child)
   226  	default:
   227  		err := fmt.Errorf("%w: %T", ErrUnsupportedReorderNode, n)
   228  		j.m.HandleErr(err)
   229  	}
   230  	return j.allVertices().difference(startV), j.allEdges().Difference(startE), group
   231  }
   232  
   233  // buildSingleLookupPlan attempts to build a plan consisting only of lookup joins.
   234  func (j *joinOrderBuilder) buildSingleLookupPlan() bool {
   235  	fds := j.m.root.RelProps.FuncDeps()
   236  	fdKey, hasKey := fds.StrictKey()
   237  	// fdKey is a set of columns which constrain all other columns in the join.
   238  	// If a chain of lookups exist, then the columns in fdKey must be in the innermost join.
   239  	if !hasKey {
   240  		return false
   241  	}
   242  	// We need to include all of the fdKey columns in the innermost join.
   243  	// For now, we just handle the case where the key is exactly one column.
   244  	if fdKey.Len() != 1 {
   245  		return false
   246  	}
   247  	for _, edge := range j.edges {
   248  		if !edge.op.joinType.IsInner() {
   249  			// This optimization currently only supports inner joins.
   250  			return false
   251  		}
   252  	}
   253  	keyColumn, _ := fdKey.Next(1)
   254  	var currentlyJoinedTables sql.FastIntSet
   255  	var currentlyJoinedVertexes vertexSet
   256  	for i, n := range j.m.Root().RelProps.TableIdNodes() {
   257  		if n.Columns().Contains(keyColumn) {
   258  			currentlyJoinedTables.Add(int(n.Id()))
   259  			currentlyJoinedVertexes = currentlyJoinedVertexes.add(uint64(i))
   260  			break
   261  		}
   262  	}
   263  
   264  	// removedEdges contains the edges that have already been incorporated into the new plan, so we don't repeat them.
   265  	var succ bool
   266  	removedEdges := edgeSet{}
   267  	for removedEdges.Len() < len(j.vertices)-1 {
   268  		type joinCandidate struct {
   269  			nextEdgeIdx int
   270  			nextTableId sql.TableId
   271  		}
   272  		var joinCandidates []joinCandidate
   273  
   274  		// Find all possible filters that could be used for the next join in the sequence.
   275  		// Store their corresponding edge and table ids in `joinCandidates`.
   276  		for i, edge := range j.edges {
   277  			if removedEdges.Contains(i) {
   278  				continue
   279  			}
   280  			if len(edge.filters) == 0 {
   281  				continue
   282  			}
   283  			if len(edge.filters) != 1 {
   284  				panic("Found an edge with multiple filters (that was previously validated as an inner join.) This shouldn't be possible.")
   285  			}
   286  			filter := edge.filters[0]
   287  			_, tables, _ := getExprScalarProps(filter)
   288  			if tables.Len() != 2 {
   289  				// We have encountered a filter condition more complicated than a simple equality check.
   290  				// We probably can't optimize this, so bail out.
   291  				return false
   292  			}
   293  			firstTab, _ := tables.Next(1)
   294  			secondTab, _ := tables.Next(firstTab + 1)
   295  			if currentlyJoinedTables.Contains(firstTab) {
   296  				joinCandidates = append(joinCandidates, joinCandidate{
   297  					nextEdgeIdx: i,
   298  					nextTableId: sql.TableId(secondTab),
   299  				})
   300  			} else if currentlyJoinedTables.Contains(secondTab) {
   301  				joinCandidates = append(joinCandidates, joinCandidate{
   302  					nextEdgeIdx: i,
   303  					nextTableId: sql.TableId(firstTab),
   304  				})
   305  			}
   306  		}
   307  
   308  		if len(joinCandidates) > 1 {
   309  			// We end up here if there are multiple possible choices for the next join.
   310  			// This could happen if there are redundant rules. For now, we bail out if this happens.
   311  			return false
   312  		}
   313  
   314  		if len(joinCandidates) == 0 {
   315  			// There are still tables left to join, but no more filters that match the already joined tables.
   316  			// This can happen, for instance, if the remaining table is a single-row table that was cross-joined.
   317  			// It's probably safe to just join the remaining tables here.
   318  			remainingVertexes := j.allVertices().difference(currentlyJoinedVertexes)
   319  			for idx, ok := remainingVertexes.next(0); ok; idx, ok = remainingVertexes.next(idx + 1) {
   320  				nextVertex := newBitSet(idx)
   321  				j.addJoin(plan.JoinTypeCross, currentlyJoinedVertexes, nextVertex, nil, nil, false)
   322  
   323  				currentlyJoinedVertexes = currentlyJoinedVertexes.union(nextVertex)
   324  			}
   325  			return false
   326  		}
   327  
   328  		nextEdgeIdx := joinCandidates[0].nextEdgeIdx
   329  		nextTableId := joinCandidates[0].nextTableId
   330  
   331  		var nextVertex vertexSet
   332  		for i, id := range j.vertexTableIds {
   333  			if id == nextTableId {
   334  				nextVertex = nextVertex.add(uint64(i))
   335  				break
   336  			}
   337  		}
   338  
   339  		edge := j.edges[nextEdgeIdx]
   340  
   341  		isRedundant := edge.joinIsRedundant(currentlyJoinedVertexes, nextVertex)
   342  		j.addJoin(plan.JoinTypeInner, currentlyJoinedVertexes, nextVertex, j.edges[nextEdgeIdx].filters, nil, isRedundant)
   343  
   344  		currentlyJoinedVertexes = currentlyJoinedVertexes.union(nextVertex)
   345  		currentlyJoinedTables.Add(int(nextTableId))
   346  		removedEdges.Add(nextEdgeIdx)
   347  		succ = true
   348  	}
   349  	return succ
   350  }
   351  
   352  // ensureClosure adds the closure of all transitive equivalency groups
   353  // to the join tree. Each transitive edge will add an inner edge, filter,
   354  // and join group that inherit join type and tree depth from the original
   355  // join tree.
   356  func (j *joinOrderBuilder) ensureClosure(grp *ExprGroup) {
   357  	fds := grp.RelProps.FuncDeps()
   358  	for _, set := range fds.Equiv().Sets() {
   359  		for col1, hasNext1 := set.Next(1); hasNext1; col1, hasNext1 = set.Next(col1 + 1) {
   360  			for col2, hasNext2 := set.Next(col1 + 1); hasNext2; col2, hasNext2 = set.Next(col2 + 1) {
   361  				if !j.hasEqEdge(col1, col2) {
   362  					j.makeTransitiveEdge(col1, col2)
   363  				}
   364  			}
   365  		}
   366  	}
   367  }
   368  
   369  // hasEqEdge returns true if the inner edges include a direct equality between
   370  // the two given columns (e.g. x = a).
   371  func (j joinOrderBuilder) hasEqEdge(leftCol, rightCol sql.ColumnId) bool {
   372  	for idx, ok := j.innerEdges.Next(0); ok; idx, ok = j.innerEdges.Next(idx + 1) {
   373  		for _, f := range j.edges[idx].filters {
   374  			var l *expression.GetField
   375  			var r *expression.GetField
   376  			switch f := f.(type) {
   377  			case *expression.Equals:
   378  				l, _ = f.Left().(*expression.GetField)
   379  				r, _ = f.Right().(*expression.GetField)
   380  			case *expression.NullSafeEquals:
   381  				l, _ = f.Left().(*expression.GetField)
   382  				r, _ = f.Right().(*expression.GetField)
   383  			}
   384  			if l == nil || r == nil {
   385  				continue
   386  			}
   387  			if (r.Id() == leftCol && l.Id() == rightCol) ||
   388  				(r.Id() == rightCol && l.Id() == leftCol) {
   389  				return true
   390  			}
   391  		}
   392  	}
   393  	return false
   394  }
   395  
   396  func (j *joinOrderBuilder) findVertexFromCol(col sql.ColumnId) (vertexIndex, GroupId) {
   397  	for i, v := range j.vertices {
   398  		if t, ok := v.(SourceRel); ok {
   399  			if t.Group().RelProps.FuncDeps().All().Contains(col) {
   400  				return vertexIndex(i), t.Group().Id
   401  			}
   402  		}
   403  	}
   404  	panic("vertex not found")
   405  }
   406  
   407  func (j *joinOrderBuilder) findVertexFromGroup(grp GroupId) vertexIndex {
   408  	for i, v := range j.vertices {
   409  		if t, ok := v.(SourceRel); ok {
   410  			if t.Group().Id == grp {
   411  				return vertexIndex(i)
   412  			}
   413  		}
   414  	}
   415  	panic("vertex not found")
   416  }
   417  
   418  // makeTransitiveEdge constructs a new join tree edge and memo group
   419  // on an equality filter between two columns.
   420  func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) {
   421  	var vert vertexSet
   422  	v1, _ := j.findVertexFromCol(col1)
   423  	v2, _ := j.findVertexFromCol(col2)
   424  	vert = vert.add(v1).add(v2)
   425  
   426  	// find edge where the vertices are provided but partitioned
   427  	var op *operator
   428  	for _, e := range j.edges {
   429  		if vert.isSubsetOf(e.op.leftVertices.union(e.op.rightVertices)) &&
   430  			!vert.isSubsetOf(e.op.leftVertices) &&
   431  			!vert.isSubsetOf(e.op.rightVertices) {
   432  			op = e.op
   433  			break
   434  		}
   435  	}
   436  	if op == nil || op.joinType.IsPartial() {
   437  		// columns are common to one table, not a join edge
   438  		// or, we are trying to semi join after columns have maybe been projected away
   439  		return
   440  	}
   441  
   442  	var gf1, gf2 *expression.GetField
   443  	for _, e := range j.edges {
   444  		if gf1 != nil && gf2 != nil {
   445  			break
   446  		}
   447  		for _, f := range e.filters {
   448  			if cmp, ok := f.(expression.Comparer); ok {
   449  				if gf, ok := cmp.Left().(*expression.GetField); ok && gf.Id() == col1 {
   450  					gf1 = gf
   451  				} else if ok && gf.Id() == col2 {
   452  					gf2 = gf
   453  				}
   454  				if gf, ok := cmp.Right().(*expression.GetField); ok && gf.Id() == col1 {
   455  					gf1 = gf
   456  				} else if ok && gf.Id() == col2 {
   457  					gf2 = gf
   458  				}
   459  			}
   460  		}
   461  	}
   462  	if gf1 == nil || gf2 == nil {
   463  		return
   464  	}
   465  
   466  	j.edges = append(j.edges, *j.makeEdge(op, expression.NewEquals(gf1, gf2)))
   467  	j.innerEdges.Add(len(j.edges) - 1)
   468  
   469  }
   470  
   471  func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup {
   472  	leftV, leftE, _ := j.populateSubgraph(n.Left())
   473  	rightV, rightE, _ := j.populateSubgraph(n.Right())
   474  	typ := n.JoinType()
   475  	if typ.IsPhysical() {
   476  		typ = plan.JoinTypeInner
   477  	}
   478  	isInner := typ.IsInner()
   479  	op := &operator{
   480  		joinType:      typ,
   481  		leftVertices:  leftV,
   482  		rightVertices: rightV,
   483  		leftEdges:     leftE,
   484  		rightEdges:    rightE,
   485  	}
   486  
   487  	filters := expression.SplitConjunction(n.JoinCond())
   488  	union := leftV.union(rightV)
   489  	group, ok := j.plans[union]
   490  	if !ok {
   491  		// TODO: memo and root should be initialized prior to join planning
   492  		left := j.plans[leftV]
   493  		right := j.plans[rightV]
   494  		group = j.memoize(op.joinType, left, right, filters, nil)
   495  		j.plans[union] = group
   496  		j.m.root = group
   497  	}
   498  
   499  	if !isInner {
   500  		j.buildNonInnerEdge(op, filters...)
   501  	} else {
   502  		j.buildInnerEdge(op, filters...)
   503  	}
   504  	return group
   505  }
   506  
   507  func (j *joinOrderBuilder) buildFilter(child sql.Node, e sql.Expression) (vertexSet, edgeSet, *ExprGroup) {
   508  	// memoize child
   509  	childV, childE, childGrp := j.populateSubgraph(child)
   510  
   511  	filterGrp := j.m.MemoizeFilter(nil, childGrp, expression.SplitConjunction(e))
   512  
   513  	// filter will absorb child relation for join reordering
   514  	j.plans[childV] = filterGrp
   515  
   516  	return childV, childE, filterGrp
   517  }
   518  
   519  func (j *joinOrderBuilder) buildProject(n *plan.Project) (vertexSet, edgeSet, *ExprGroup) {
   520  	// memoize child
   521  	childV, childE, childGrp := j.populateSubgraph(n.Child)
   522  
   523  	projGrp := j.m.MemoizeProject(nil, childGrp, n.Projections)
   524  
   525  	// filter will absorb child relation for join reordering
   526  	j.plans[childV] = projGrp
   527  	return childV, childE, projGrp
   528  }
   529  
   530  func (j *joinOrderBuilder) buildMax1Row(n *plan.Max1Row) (vertexSet, edgeSet, *ExprGroup) {
   531  	// memoize child
   532  	childV, childE, childGrp := j.populateSubgraph(n.Child)
   533  
   534  	max1Grp := j.m.MemoizeMax1Row(nil, childGrp)
   535  
   536  	j.plans[childV] = max1Grp
   537  	return childV, childE, max1Grp
   538  }
   539  
   540  func (j *joinOrderBuilder) buildJoinLeaf(n plan.TableIdNode) *ExprGroup {
   541  	j.checkSize()
   542  
   543  	var rel SourceRel
   544  	b := &sourceBase{relBase: &relBase{}}
   545  	switch n := n.(type) {
   546  	case *plan.ResolvedTable:
   547  		rel = &TableScan{sourceBase: b, Table: n}
   548  	case *plan.TableAlias:
   549  		rel = &TableAlias{sourceBase: b, Table: n}
   550  	case *plan.RecursiveTable:
   551  		rel = &RecursiveTable{sourceBase: b, Table: n}
   552  	case *plan.SubqueryAlias:
   553  		rel = &SubqueryAlias{sourceBase: b, Table: n}
   554  	case *plan.RecursiveCte:
   555  		rel = &RecursiveCte{sourceBase: b, Table: n}
   556  	case *plan.IndexedTableAccess:
   557  		rel = &TableScan{sourceBase: b, Table: n.TableNode.(plan.TableIdNode)}
   558  	case *plan.ValueDerivedTable:
   559  		rel = &Values{sourceBase: b, Table: n}
   560  	case *plan.JSONTable:
   561  		rel = &JSONTable{sourceBase: b, Table: n}
   562  	case sql.TableFunction:
   563  		rel = &TableFunc{sourceBase: b, Table: n}
   564  	case *plan.EmptyTable:
   565  		rel = &EmptyTable{sourceBase: b, Table: n}
   566  	case *plan.SetOp:
   567  		rel = &SetOp{sourceBase: b, Table: n}
   568  	default:
   569  		err := fmt.Errorf("%w: %T", ErrUnsupportedReorderNode, n)
   570  		j.m.HandleErr(err)
   571  	}
   572  
   573  	j.vertices = append(j.vertices, rel)
   574  
   575  	// Initialize the plan for this vertex.
   576  	idx := vertexIndex(len(j.vertices) - 1)
   577  	relSet := vertexSet(0).add(idx)
   578  	grp := j.m.memoizeSourceRel(rel)
   579  	j.plans[relSet] = grp
   580  	j.vertexGroups = append(j.vertexGroups, grp.Id)
   581  	j.vertexTableIds = append(j.vertexTableIds, n.Id())
   582  	return grp
   583  }
   584  
   585  func (j *joinOrderBuilder) buildInnerEdge(op *operator, filters ...sql.Expression) {
   586  	if len(filters) == 0 {
   587  		// cross join
   588  		j.edges = append(j.edges, *j.makeEdge(op))
   589  		j.innerEdges.Add(len(j.edges) - 1)
   590  		return
   591  	}
   592  	for _, f := range filters {
   593  		j.edges = append(j.edges, *j.makeEdge(op, f))
   594  		j.innerEdges.Add(len(j.edges) - 1)
   595  	}
   596  }
   597  
   598  func (j *joinOrderBuilder) buildNonInnerEdge(op *operator, filters ...sql.Expression) {
   599  	// only single edge for non-inner
   600  	j.edges = append(j.edges, *j.makeEdge(op, filters...))
   601  	j.nonInnerEdges.Add(len(j.edges) - 1)
   602  }
   603  
   604  func (j *joinOrderBuilder) makeEdge(op *operator, filters ...sql.Expression) *edge {
   605  	// edge is an instance of operator with a unique set of transform rules depending
   606  	// on the subset of filters used
   607  	e := &edge{
   608  		op:      op,
   609  		filters: filters,
   610  	}
   611  	// we build the graph upwards. so when me make this edge, all
   612  	// of the dependency operators and edges have already been constructed
   613  	// TODO: validate malformed join clauses like `ab join xy on a = u`
   614  	// prior to join planning, execBuilder currently throws getField errors
   615  	// for these
   616  	e.populateEdgeProps(j.vertexTableIds, j.edges)
   617  	return e
   618  }
   619  
   620  // checkSize prevents more than 64 tables
   621  func (j *joinOrderBuilder) checkSize() {
   622  	if len(j.vertices) > 1<<7 {
   623  		panic("tried joining > 64 tables")
   624  	}
   625  }
   626  
   627  // dpSube iterates all disjoint combinations of table sets,
   628  // adding plans to the tree when we find two sets that can
   629  // be joined
   630  func (j *joinOrderBuilder) dbSube() {
   631  	all := j.allVertices()
   632  	for subset := vertexSet(1); subset <= all; subset++ {
   633  		if subset.isSingleton() {
   634  			continue
   635  		}
   636  		for s1 := vertexSet(1); s1 <= subset/2; s1++ {
   637  			if !s1.isSubsetOf(subset) {
   638  				continue
   639  			}
   640  			s2 := subset.difference(s1)
   641  			j.addPlans(s1, s2)
   642  		}
   643  	}
   644  }
   645  
   646  func setPrinter(all, s1, s2 vertexSet) {
   647  	s1Arr := make([]string, all.len())
   648  	for i := range s1Arr {
   649  		s1Arr[i] = "0"
   650  	}
   651  	s2Arr := make([]string, all.len())
   652  	for i := range s2Arr {
   653  		s2Arr[i] = "0"
   654  	}
   655  	for idx, ok := s1.next(0); ok; idx, ok = s1.next(idx + 1) {
   656  		s1Arr[idx] = "1"
   657  	}
   658  	for idx, ok := s2.next(0); ok; idx, ok = s2.next(idx + 1) {
   659  		s2Arr[idx] = "1"
   660  	}
   661  	fmt.Printf("s1: %s, s2: %s\n", strings.Join(s1Arr, ""), strings.Join(s2Arr, ""))
   662  }
   663  
   664  // addPlans finds operators that let us join (s1 op s2) and (s2 op s1).
   665  func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) {
   666  	// all inner filters could be applied
   667  	if j.plans[s1] == nil || j.plans[s2] == nil {
   668  		// Both inputs must have plans.
   669  		// need this to prevent cross-joins higher in tree
   670  		return
   671  	}
   672  
   673  	//TODO collect all inner join filters that can be used as select filters
   674  	//TODO collect functional dependencies to avoid redundant filters
   675  	//TODO relational nodes track functional dependencies
   676  
   677  	var innerJoinFilters []sql.Expression
   678  	var addInnerJoin bool
   679  	var isRedundant bool
   680  	for i, ok := j.innerEdges.Next(0); ok; i, ok = j.innerEdges.Next(i + 1) {
   681  		e := &j.edges[i]
   682  		// Ensure that this edge forms a valid connection between the two sets.
   683  		if e.applicable(s1, s2) {
   684  			if e.filters != nil {
   685  				innerJoinFilters = append(innerJoinFilters, e.filters...)
   686  			}
   687  			isRedundant = isRedundant || e.joinIsRedundant(s1, s2)
   688  			addInnerJoin = true
   689  		}
   690  	}
   691  
   692  	// Because transitive closure can accidentally replace nonInner op with inner op,
   693  	// avoid building inner plans when an op has a valid nonInner plan.
   694  	for i, ok := j.nonInnerEdges.Next(0); ok; i, ok = j.nonInnerEdges.Next(i + 1) {
   695  		e := &j.edges[i]
   696  		if e.applicable(s1, s2) {
   697  			j.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, e.joinIsRedundant(s1, s2))
   698  			return
   699  		}
   700  		if e.applicable(s2, s1) {
   701  			// This is necessary because we only iterate s1 up to subset / 2
   702  			// in DPSube()
   703  			j.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, e.joinIsRedundant(s2, s1))
   704  			return
   705  		}
   706  	}
   707  
   708  	if addInnerJoin {
   709  		// Construct an inner join. Don't add in the case when a non-inner join has
   710  		// already been constructed, because doing so can lead to a case where an
   711  		// inner join replaces a non-inner join.
   712  		if innerJoinFilters == nil {
   713  			j.addJoin(plan.JoinTypeCross, s1, s2, nil, nil, isRedundant)
   714  		} else {
   715  			j.addJoin(plan.JoinTypeInner, s1, s2, innerJoinFilters, nil, isRedundant)
   716  		}
   717  	}
   718  }
   719  
   720  func (j *joinOrderBuilder) addJoin(op plan.JoinType, s1, s2 vertexSet, joinFilter, selFilters []sql.Expression, isRedundant bool) {
   721  	if s1.intersects(s2) {
   722  		panic("sets are not disjoint")
   723  	}
   724  	union := s1.union(s2)
   725  	left := j.plans[s1]
   726  	right := j.plans[s2]
   727  
   728  	group, ok := j.plans[union]
   729  	if !isRedundant {
   730  		if !ok {
   731  			group = j.memoize(op, left, right, joinFilter, selFilters)
   732  			j.plans[union] = group
   733  		} else {
   734  			j.addJoinToGroup(op, left, right, joinFilter, selFilters, group)
   735  		}
   736  	}
   737  
   738  	if commute(op) {
   739  		j.addJoinToGroup(op, right, left, joinFilter, selFilters, group)
   740  	}
   741  }
   742  
   743  // addJoinToGroup adds a new plan to existing groups
   744  func (j *joinOrderBuilder) addJoinToGroup(
   745  	op plan.JoinType,
   746  	left *ExprGroup,
   747  	right *ExprGroup,
   748  	joinFilter []sql.Expression,
   749  	selFilter []sql.Expression,
   750  	group *ExprGroup,
   751  ) {
   752  	if f, ok := group.First.(*Filter); ok {
   753  		group = f.Child
   754  	}
   755  	rel := j.constructJoin(op, left, right, joinFilter, group)
   756  	group.Prepend(rel)
   757  	return
   758  }
   759  
   760  // memoize
   761  func (j *joinOrderBuilder) memoize(
   762  	op plan.JoinType,
   763  	left *ExprGroup,
   764  	right *ExprGroup,
   765  	joinFilter []sql.Expression,
   766  	selFilter []sql.Expression,
   767  ) *ExprGroup {
   768  	rel := j.constructJoin(op, left, right, joinFilter, nil)
   769  	return j.m.NewExprGroup(rel)
   770  }
   771  
   772  func (j *joinOrderBuilder) constructJoin(
   773  	op plan.JoinType,
   774  	left *ExprGroup,
   775  	right *ExprGroup,
   776  	joinFilter []sql.Expression,
   777  	group *ExprGroup,
   778  ) RelExpr {
   779  	var rel RelExpr
   780  	b := &JoinBase{
   781  		Op:      op,
   782  		relBase: &relBase{g: group},
   783  		Left:    left,
   784  		Right:   right,
   785  		Filter:  joinFilter,
   786  	}
   787  	switch op {
   788  	case plan.JoinTypeCross:
   789  		rel = &CrossJoin{b}
   790  	case plan.JoinTypeInner:
   791  		rel = &InnerJoin{b}
   792  	case plan.JoinTypeFullOuter:
   793  		rel = &FullOuterJoin{b}
   794  	case plan.JoinTypeLeftOuter:
   795  		rel = &LeftJoin{b}
   796  	case plan.JoinTypeSemi:
   797  		rel = &SemiJoin{b}
   798  	case plan.JoinTypeAnti:
   799  		rel = &AntiJoin{b}
   800  	case plan.JoinTypeLateralInner, plan.JoinTypeLateralCross,
   801  		plan.JoinTypeLateralRight, plan.JoinTypeLateralLeft:
   802  		rel = &LateralJoin{b}
   803  		b.Op = op
   804  	default:
   805  		panic(fmt.Sprintf("unexpected join type: %s", op))
   806  	}
   807  
   808  	if j.newPlanCb != nil {
   809  		j.newPlanCb(j, rel)
   810  	}
   811  
   812  	return rel
   813  }
   814  
   815  func (j *joinOrderBuilder) allVertices() vertexSet {
   816  	// all bits set to one
   817  	return vertexSet((1 << len(j.vertices)) - 1)
   818  }
   819  
   820  func (j *joinOrderBuilder) allEdges() edgeSet {
   821  	all := edgeSet{}
   822  	for i := range j.edges {
   823  		all.Add(i)
   824  	}
   825  	return all
   826  }
   827  
   828  // operator contains the properties of a join operator from the original join
   829  // tree. It is used in calculating the total eligibility sets for edges from any
   830  // 'parent' joins which were originally above this one in the tree.
   831  type operator struct {
   832  	// joinType is the operator type of the original join operator.
   833  	joinType plan.JoinType
   834  
   835  	// leftVertices is the set of vertexes (base relations) that were in the left
   836  	// input of the original join operator.
   837  	leftVertices vertexSet
   838  
   839  	// rightVertices is the set of vertexes (base relations) that were in the
   840  	// right input of the original join operator.
   841  	rightVertices vertexSet
   842  
   843  	// leftEdges is the set of edges that were constructed from join operators
   844  	// that were in the left input of the original join operator.
   845  	leftEdges edgeSet
   846  
   847  	// rightEdgers is the set of edges that were constructed from join operators
   848  	// that were in the right input of the original join operator.
   849  	rightEdges edgeSet
   850  }
   851  
   852  // edge is a generalization of a join edge that embeds rules for
   853  // determining the applicability of arbitrary subtrees. An edge is added to the
   854  // join graph when a new plan can be constructed between two vertexSet.
   855  type edge struct {
   856  	// op is the original join node source for the edge. there are multiple edges
   857  	// per op for inner joins with conjunct-predicate join conditions. Different predicates
   858  	// will have different conflict rules.
   859  	op *operator
   860  
   861  	// filters is the set of join filters that will be used to construct new join
   862  	// ON conditions.
   863  	filters  []sql.Expression
   864  	freeVars sql.ColSet
   865  
   866  	// nullRejectedRels is the set of vertexes on which nulls are rejected by the
   867  	// filters. We do not set any nullRejectedRels currently, which is not accurate
   868  	// but prevents potentially invalid transformations.
   869  	nullRejectedRels vertexSet
   870  
   871  	// ses is the syntactic eligibility set of the edge; in other words, it is the
   872  	// set of base relations (tables) referenced by the filters field.
   873  	ses vertexSet
   874  
   875  	// tes is the total eligibility set of the edge. The TES gives the set of base
   876  	// relations (vertexes) that must be in the input of any join that uses the
   877  	// filters from this edge in its ON condition. The TES is initialized with the
   878  	// SES, and then expanded by the conflict detection algorithm.
   879  	tes vertexSet
   880  
   881  	// rules is a set of conflict rules which must evaluate to true in order for
   882  	// a join between two sets of vertexes to be valid.
   883  	rules []conflictRule
   884  }
   885  
   886  func (e *edge) populateEdgeProps(tableIds []sql.TableId, edges []edge) {
   887  	var tables sql.FastIntSet
   888  	var cols sql.ColSet
   889  	if len(e.filters) > 0 {
   890  		for _, e := range e.filters {
   891  			eCols, eTabs, _ := getExprScalarProps(e)
   892  			cols = cols.Union(eCols)
   893  			tables = tables.Union(eTabs)
   894  		}
   895  	}
   896  
   897  	// TODO vertexes and tableIds?
   898  
   899  	e.freeVars = cols
   900  
   901  	// TODO implement, we currently limit transforms assuming no strong null safety
   902  	//e.nullRejectedRels = e.nullRejectingTables(nullAccepting, allNames, allV)
   903  
   904  	//SES is vertexSet of all tables referenced in cols
   905  	e.calcSES(tables, tableIds)
   906  	// use CD-C to expand dependency sets for operators
   907  	// front load preventing applicable operators that would push crossjoins
   908  	e.calcTES(edges)
   909  }
   910  
   911  func (e *edge) String() string {
   912  	b := strings.Builder{}
   913  	b.WriteString("edge\n")
   914  	b.WriteString(fmt.Sprintf("  - joinType: %s\n", e.op.joinType.String()))
   915  	if e.filters != nil {
   916  		b.WriteString(" - on: ")
   917  		sep := ""
   918  		for _, e := range e.filters {
   919  			b.WriteString(fmt.Sprintf("%s%s", sep, e.String()))
   920  		}
   921  		b.WriteString("\n")
   922  	}
   923  	b.WriteString(fmt.Sprintf("  - free vars: %s\n", e.freeVars.String()))
   924  	b.WriteString(fmt.Sprintf("  - ses: %s\n", e.ses.String()))
   925  	b.WriteString(fmt.Sprintf("  - tes: %s\n", e.tes.String()))
   926  	b.WriteString(fmt.Sprintf("  - nullRej: %s\n", e.nullRejectedRels.String()))
   927  	return b.String()
   928  }
   929  
   930  // nullRejectingTables is a subset of the SES such that for every
   931  // null rejecting table, if all attributes of the table are null,
   932  // we can make a strong guarantee that the edge filters will not
   933  // evaluate to TRUE (FALSE or NULL are OK).
   934  //
   935  // For example, the filters (a.x = b.x OR a.x IS NOT NULL) is null
   936  // rejecting on (b), but not (a).
   937  //
   938  // A second more complicated example is null rejecting both on (a,b):
   939  //
   940  //	CASE
   941  //	  WHEN a.x IS NOT NULL THEN a.x = b.x
   942  //	  WHEN a.x <=> 2 THEN TRUE
   943  //	  ELSE NULL
   944  //	END
   945  //
   946  // Refer to https://dl.acm.org/doi/10.1145/244810.244812 for more examples.
   947  // TODO implement this
   948  func (e *edge) nullRejectingTables(nullAccepting []sql.Expression, allNames []string, allV vertexSet) vertexSet {
   949  	panic("not implemented")
   950  }
   951  
   952  // calcSES updates the syntactic eligibility set for an edge. An SES
   953  // represents all tables this edge's filters requires as input.
   954  func (e *edge) calcSES(tables sql.FastIntSet, tableIds []sql.TableId) {
   955  	ses := vertexSet(0)
   956  	for i, ok := tables.Next(0); ok; i, ok = tables.Next(i + 1) {
   957  		for j, tabId := range tableIds {
   958  			// table ids, group ids, and vertex ids are all distinct
   959  			if sql.TableId(i) == tabId {
   960  				ses = ses.add(vertexIndex(j))
   961  				break
   962  			}
   963  		}
   964  	}
   965  	e.ses = ses
   966  }
   967  
   968  // calcTES in place updates an edge's total eligibility set. TES is a way
   969  // to expand the eligibility sets (the table dependencies) for an edge to
   970  // prevent invalid plans. Most of this is verbatim from the paper, but we
   971  // add additional restrictions for cross and left joins.
   972  func (e *edge) calcTES(edges []edge) {
   973  	e.tes = e.ses
   974  
   975  	// Degenerate predicates include 1) cross joins and 2) inner joins
   976  	// whose filters do not restrict that cardinality of the subtree
   977  	// inputs. We check for both by comparing i) the filter SES to ii) the tables
   978  	// provided by the left/right subtrees. If one or both do not overlap,
   979  	// the degenerate edge will be frozen in reference to the original plan
   980  	// by expanding the TES to require the left/right subtree dependencies.
   981  	//
   982  	//	 note: this is different from the paper, which instead adds a check
   983  	//   to applicable:
   984  	//     op.leftVertices.intersect(s1) || op.rightVertices.intersect(s2)
   985  	//   An operation is only applicable if the left tree provides a subset
   986  	//   of s1 or the right tree provides a subset of s2. This is logically
   987  	//   equivalent to expanding the TES here, but front-loads this logic
   988  	//   because a bigger TES earlier reduces the conflict checking work.
   989  	if !e.tes.intersects(e.op.leftVertices) {
   990  		e.tes = e.tes.union(e.op.leftVertices)
   991  	}
   992  	if !e.tes.intersects(e.op.rightVertices) {
   993  		e.tes = e.tes.union(e.op.rightVertices)
   994  	}
   995  
   996  	// left join can't be moved such that we transpose left-dependencies
   997  	if e.op.joinType.IsLeftOuter() {
   998  		e.tes = e.tes.union(e.op.leftVertices)
   999  	}
  1000  
  1001  	// CD-C algorithm
  1002  	// Note: the ordering of the transform(eA, eB) functions are important.
  1003  	// eA is the subtree child edge targeted for rearrangement. If the ordering
  1004  	// is switched, the output is nondeterministic.
  1005  
  1006  	// iterate every eA in STO(left(eB))
  1007  	eB := e
  1008  	for idx, ok := eB.op.leftEdges.Next(0); ok; idx, ok = eB.op.leftEdges.Next(idx + 1) {
  1009  		if eB.op.leftVertices.isSubsetOf(eB.tes) {
  1010  			// Fast path to break out early: the TES includes all relations from the
  1011  			// left input.
  1012  			break
  1013  		}
  1014  		eA := &edges[idx]
  1015  		if !assoc(eA, eB) {
  1016  			// The edges are not associative, so add a conflict rule mapping from the
  1017  			// right input relations of the child to its left input relations.
  1018  			rule := conflictRule{from: eA.op.rightVertices}
  1019  			if eA.op.leftVertices.intersects(eA.ses) {
  1020  				// A less restrictive conflict rule can be added in this case.
  1021  				rule.to = eA.op.leftVertices.intersection(eA.ses)
  1022  			} else {
  1023  				rule.to = eA.op.leftVertices
  1024  			}
  1025  			eB.addRule(rule)
  1026  		}
  1027  		if !leftAsscom(eA, eB) {
  1028  			// Left-asscom does not hold, so add a conflict rule mapping from the
  1029  			// left input relations of the child to its right input relations.
  1030  			rule := conflictRule{from: eA.op.leftVertices}
  1031  			if eA.op.rightVertices.intersects(eA.ses) {
  1032  				// A less restrictive conflict rule can be added in this case.
  1033  				rule.to = eA.op.rightVertices.intersection(eA.ses)
  1034  			} else {
  1035  				rule.to = eA.op.rightVertices
  1036  			}
  1037  			eB.addRule(rule)
  1038  		}
  1039  	}
  1040  
  1041  	for idx, ok := e.op.rightEdges.Next(0); ok; idx, ok = e.op.rightEdges.Next(idx + 1) {
  1042  		if e.op.rightVertices.isSubsetOf(e.tes) {
  1043  			// Fast path to break out early: the TES includes all relations from the
  1044  			// right input.
  1045  			break
  1046  		}
  1047  		eA := &edges[idx]
  1048  		if !assoc(eB, eA) {
  1049  			// The edges are not associative, so add a conflict rule mapping from the
  1050  			// left input relations of the child to its right input relations.
  1051  			rule := conflictRule{from: eA.op.leftVertices}
  1052  			if eA.op.rightVertices.intersects(eA.ses) {
  1053  				// A less restrictive conflict rule can be added in this case.
  1054  				rule.to = eA.op.rightVertices.intersection(eA.ses)
  1055  			} else {
  1056  				rule.to = eA.op.rightVertices
  1057  			}
  1058  			eB.addRule(rule)
  1059  		}
  1060  		if !rightAsscom(eB, eA) {
  1061  			// Right-asscom does not hold, so add a conflict rule mapping from the
  1062  			// right input relations of the child to its left input relations.
  1063  			rule := conflictRule{from: eA.op.rightVertices}
  1064  			if eA.op.leftVertices.intersects(eA.ses) {
  1065  				// A less restrictive conflict rule can be added in this case.
  1066  				rule.to = eA.op.leftVertices.intersection(eA.ses)
  1067  			} else {
  1068  				rule.to = eA.op.leftVertices
  1069  			}
  1070  			eB.addRule(rule)
  1071  		}
  1072  	}
  1073  }
  1074  
  1075  // addRule adds the given conflict rule to the edge. Before the rule is added to
  1076  // the rules set, an effort is made to eliminate the need for the rule.
  1077  func (e *edge) addRule(rule conflictRule) {
  1078  	if rule.from.intersects(e.tes) {
  1079  		// If the 'from' relation set intersects the total eligibility set, simply
  1080  		// add the 'to' set to the TES because the rule will always be triggered.
  1081  		e.tes = e.tes.union(rule.to)
  1082  		return
  1083  	}
  1084  	if rule.to.isSubsetOf(e.tes) {
  1085  		// If the 'to' relation set is a subset of the total eligibility set, the
  1086  		// rule is a do-nothing.
  1087  		return
  1088  	}
  1089  	e.rules = append(e.rules, rule)
  1090  }
  1091  
  1092  func (e *edge) applicable(s1, s2 vertexSet) bool {
  1093  	if !e.checkRules(s1, s2) {
  1094  		// The conflict rules for this edge are not satisfied for a join between s1
  1095  		// and s2.
  1096  		return false
  1097  	}
  1098  	switch e.op.joinType {
  1099  	case plan.JoinTypeInner:
  1100  		// The TES must be a subset of the relations of the candidate join inputs. In
  1101  		// addition, the TES must intersect both s1 and s2 (the edge must connect the
  1102  		// two vertex sets).
  1103  		return e.tes.isSubsetOf(s1.union(s2)) && e.tes.intersects(s1) && e.tes.intersects(s2)
  1104  	default:
  1105  		// The left TES must be a subset of the s1 relations, and the right TES must
  1106  		// be a subset of the s2 relations. In addition, the TES must intersect both
  1107  		// s1 and s2 (the edge must connect the two vertex sets).
  1108  		return e.tes.intersection(e.op.leftVertices).isSubsetOf(s1) &&
  1109  			e.tes.intersection(e.op.rightVertices).isSubsetOf(s2) &&
  1110  			e.tes.intersects(s1) && e.tes.intersects(s2)
  1111  	}
  1112  }
  1113  
  1114  // checkRules iterates through the edge's rules and returns false if a conflict
  1115  // is detected for the given sets of join input relations. Otherwise, returns
  1116  // true.
  1117  func (e *edge) checkRules(s1, s2 vertexSet) bool {
  1118  	s := s1.union(s2)
  1119  	for _, rule := range e.rules {
  1120  		if rule.from.intersects(s) && !rule.to.isSubsetOf(s) {
  1121  			// The join is invalid because it does not obey this conflict rule.
  1122  			return false
  1123  		}
  1124  	}
  1125  	return true
  1126  }
  1127  
  1128  // joinIsRedundant returns true if a join between the two sets of base relations
  1129  // was already present in the original join tree. If so, enumerating this join
  1130  // would be redundant, so it should be skipped.
  1131  func (e *edge) joinIsRedundant(s1, s2 vertexSet) bool {
  1132  	return e.op.leftVertices == s1 && e.op.rightVertices == s2
  1133  }
  1134  
  1135  type assocTransform func(eA, eB *edge) bool
  1136  
  1137  // assoc checks whether the associate is applicable
  1138  // to a binary operator tree. We consider 1) generating cross joins,
  1139  // and 2) the left/right operator join types for this specific transform.
  1140  // The below is a valid association that generates no crossjoin:
  1141  //
  1142  //	(e2 op_a_12 e1) op_b_13 e3
  1143  //	=>
  1144  //	e2 op_a_12 (e1 op_b_13 e3)
  1145  //
  1146  // note: important to compare edge ordering for left deep tree.
  1147  func assoc(eA, eB *edge) bool {
  1148  	if eB.ses.intersects(eA.op.leftVertices) || eA.ses.intersects(eB.op.rightVertices) {
  1149  		// associating two operators can estrange the distant relation.
  1150  		// for example:
  1151  		//   (e2 op_a_12 e1) op_b_13 e3
  1152  		//   =>
  1153  		//   e2 op_a_12 (e1 op_b_13 e3)
  1154  		// The first operator, a, takes explicit dependencies on e1 and e2.
  1155  		// The second operator, b, takes explicit dependencies on e1 and e3.
  1156  		// Associating these two will isolate e2 from op_b for the downward
  1157  		// transform, and e3 from op_a on the upward transform, both of which
  1158  		// are valid. The same is not true for the transform below:
  1159  		//   (e2 op_a_12 e1) op_b_32 e3
  1160  		//   =>
  1161  		//   e2 op_a_12 (e1 op_b_32 e3)
  1162  		// Isolating e2 from op_b makes op_b degenerate, producing a cross join.
  1163  		return false
  1164  	}
  1165  	return checkProperty(assocTable, eA, eB)
  1166  }
  1167  
  1168  // leftAsscom checks whether the left-associate+commute is applicable
  1169  // to a binary operator tree. We consider 1) generating cross joins,
  1170  // and 2) the left/right operator join types for this specific transform.
  1171  // For example:
  1172  //
  1173  //	(e1 op_a_12 e2) op_b_13 e3
  1174  //	=>
  1175  //	(e1 op_b_13 e3) op_a_12 e2
  1176  func leftAsscom(eA, eB *edge) bool {
  1177  	if eB.ses.intersects(eA.op.rightVertices) || eA.ses.intersects(eB.op.rightVertices) {
  1178  		// Associating two operators can estrange the distant relation.
  1179  		// For example:
  1180  		//	(e1 op_a_12 e2) op_b_23 e3
  1181  		//	=>
  1182  		//	(e1 op_b_23 e3) op_a_12 e2
  1183  		// Isolating e2 from op_b makes op_b degenerate, producing a cross join.
  1184  		return false
  1185  	}
  1186  	return checkProperty(leftAsscomTable, eA, eB)
  1187  }
  1188  
  1189  // rAsscom checks whether the right-associate+commute is applicable
  1190  // to a binary operator tree. We consider 1) generating cross joins,
  1191  // and 2) the left/right operator join types for this specific transform.
  1192  // For example:
  1193  //
  1194  //	e1 op_b_13 (e2 op_a_23 e3)
  1195  //	=>
  1196  //	e2 op_a_23 (e1 op_b_13 e3)
  1197  func rightAsscom(eA, eB *edge) bool {
  1198  	if eB.ses.intersects(eA.op.leftVertices) || eA.ses.intersects(eB.op.leftVertices) {
  1199  		// Associating two operators can estrange the distant relation.
  1200  		// For example:
  1201  		//	e3 op_b_23 (e1 op_a_12 e3)
  1202  		//	=>
  1203  		//	e2 op_a_12 (e1 op_b_23 e3)
  1204  		// Isolating e2 from op_b makes op_b degenerate, producing a cross join.
  1205  		return false
  1206  	}
  1207  	return checkProperty(rightAsscomTable, eA, eB)
  1208  }
  1209  
  1210  // commute transforms an operator tree by alternating child
  1211  // join ordering.
  1212  // For example:
  1213  //
  1214  //	e1 op e2
  1215  //	=>
  1216  //	e2 op e1
  1217  func commute(op plan.JoinType) bool {
  1218  	return op == plan.JoinTypeInner || op == plan.JoinTypeCross
  1219  }
  1220  
  1221  // conflictRule is a pair of vertex sets which carry the requirement that if the
  1222  // 'from' set intersects a set of prospective join input relations, then the
  1223  // 'to' set must be a subset of the input relations (from -> to). Take the
  1224  // following query as an example:
  1225  //
  1226  //	SELECT * FROM xy
  1227  //	INNER JOIN (SELECT * FROM ab LEFT JOIN uv ON a = u)
  1228  //	ON x = a
  1229  //
  1230  // During execution of the CD-C algorithm, the following conflict rule would
  1231  // be added to inner join edge: [uv -> ab]. This means that, for any join that
  1232  // uses this edge, the presence of uv in the set of input relations implies the
  1233  // presence of ab. This prevents an inner join between relations xy and uv
  1234  // (since then ab would not be an input relation). Note that, in practice, this
  1235  // conflict rule would be absorbed into the TES because ab is a part of the
  1236  // inner join edge's SES (see the addRule func).
  1237  type conflictRule struct {
  1238  	from vertexSet
  1239  	to   vertexSet
  1240  }
  1241  
  1242  // lookupTableEntry is an entry in one of the join property Lookup tables
  1243  // defined below (associative, left-asscom and right-asscom properties). A
  1244  // lookupTableEntry can be unconditionally true or false, as well as true
  1245  // conditional on the null-rejecting properties of the edge filters.
  1246  type lookupTableEntry uint8
  1247  
  1248  const (
  1249  	// never indicates that the transformation represented by the table entry is
  1250  	// unconditionally incorrect.
  1251  	never lookupTableEntry = 0
  1252  
  1253  	// always indicates that the transformation represented by the table entry is
  1254  	// unconditionally correct.
  1255  	always lookupTableEntry = 1 << (iota - 1)
  1256  
  1257  	// filterA indicates that the filters of the "A" join edge must reject
  1258  	// nulls for the set of vertexes specified by rejectsOnLeftA, rejectsOnRightA,
  1259  	// etc.
  1260  	filterA
  1261  
  1262  	// filterB indicates that the filters of the "B" join edge must reject
  1263  	// nulls for the set of vertexes specified by rejectsOnLeftA, rejectsOnRightA,
  1264  	// etc.
  1265  	filterB
  1266  
  1267  	// rejectsOnLeftA indicates that the filters must reject nulls for the left
  1268  	// input relations of edge "A".
  1269  	rejectsOnLeftA
  1270  
  1271  	// rejectsOnRightA indicates that the filters must reject nulls for the right
  1272  	// input relations of edge "A".
  1273  	rejectsOnRightA
  1274  
  1275  	// rejectsOnRightB indicates that the filters must reject nulls for the right
  1276  	// input relations of edge "B".
  1277  	rejectsOnRightB
  1278  
  1279  	// table2Note1 indicates that the filters of edge "B" must reject nulls on
  1280  	// the relations of the right input of edge "A".
  1281  	// Citations: [8] Table 2 Footnote 1.
  1282  	table2Note1 = filterB | rejectsOnRightA
  1283  
  1284  	// table2Note2 indicates that the filters of operators "A" and "B" must reject
  1285  	// nulls on the relations of the right input of edge "A".
  1286  	// Citations: [8] Table 2 Footnote 2.
  1287  	table2Note2 = (filterA | filterB) | rejectsOnRightA
  1288  
  1289  	// table3Note1 indicates that the filters of edge "A" must reject nulls on
  1290  	// the relations of the left input of edge "A".
  1291  	// Citations: [8] Table 3 Footnote 1.
  1292  	table3Note1 = filterA | rejectsOnLeftA
  1293  
  1294  	// table3Note2 indicates that the filters of edge "B" must reject nulls on
  1295  	// the relations of the right input of edge "B".
  1296  	// Citations: [8] Table 3 Footnote 1]2.
  1297  	table3Note2 = filterB | rejectsOnRightB
  1298  
  1299  	// table3Note3 indicates that the filters of operators "A" and "B" must reject
  1300  	// nulls on the relations of the left input of edge "A".
  1301  	// Citations: [8] Table 3 Footnote 3.
  1302  	table3Note3 = (filterA | filterB) | rejectsOnLeftA
  1303  
  1304  	// table3Note4 indicates that the filters of operators "A" and "B" must reject
  1305  	// nulls on the relations of the right input of edge "B".
  1306  	// Citations: [8] Table 3 Footnote 4.
  1307  	table3Note4 = (filterA | filterB) | rejectsOnRightB
  1308  )
  1309  
  1310  // assocTable is a Lookup table indicating whether it is correct to apply the
  1311  // associative transformation to pairs of join operators.
  1312  // citations: [8] table 2
  1313  var assocTable = [8][8]lookupTableEntry{
  1314  	//             cross-B inner-B semi-B  anti-B  left-B  full-B group-B lateral-B
  1315  	/* cross-A   */ {always, always, always, always, always, never, always, never},
  1316  	/* inner-A   */ {always, always, always, always, always, never, always, never},
  1317  	/* semi-A    */ {never, never, never, never, never, never, never, never},
  1318  	/* anti-A    */ {never, never, never, never, never, never, never, never},
  1319  	/* left-A    */ {never, never, never, never, table2Note1, never, never, never},
  1320  	/* full-A    */ {never, never, never, never, table2Note1, table2Note2, never},
  1321  	/* group-A   */ {never, never, never, never, never, never, never, never},
  1322  	/* lateral-A */ {never, never, never, never, never, never, never, never},
  1323  }
  1324  
  1325  // leftAsscomTable is a Lookup table indicating whether it is correct to apply
  1326  // the left-asscom transformation to pairs of join operators.
  1327  // citations: [8] table 3
  1328  var leftAsscomTable = [8][8]lookupTableEntry{
  1329  	//             cross-A inner-B semi-B  anti-B  left-B  full-B group-B lateral-B
  1330  	/* cross-A   */ {always, always, always, always, always, never, always, never},
  1331  	/* inner-A   */ {always, always, always, always, always, never, always, never},
  1332  	/* semi-A    */ {always, always, always, always, always, never, always, never},
  1333  	/* anti-A    */ {always, always, always, always, always, never, always, never},
  1334  	/* left-A    */ {always, always, always, always, always, table3Note1, always, never},
  1335  	/* full-A    */ {never, never, never, never, table3Note2, table3Note3, never, never},
  1336  	/* group-A   */ {always, always, always, always, always, never, always, never},
  1337  	/* lateral-A */ {never, never, never, never, never, never, never, never},
  1338  }
  1339  
  1340  // rightAsscomTable is a Lookup table indicating whether it is correct to apply
  1341  // the right-asscom transformation to pairs of join operators.
  1342  // citations: [8] table 3
  1343  var rightAsscomTable = [8][8]lookupTableEntry{
  1344  	//             cross-B inner-B semi-B anti-B left-B full-B group-B lateral-B
  1345  	/* cross-A */ {always, always, never, never, never, never, never, never},
  1346  	/* inner-A */ {always, always, never, never, never, never, never, never},
  1347  	/* semi-A  */ {never, never, never, never, never, never, never, never},
  1348  	/* anti-A  */ {never, never, never, never, never, never, never, never},
  1349  	/* left-A  */ {never, never, never, never, never, never, never},
  1350  	/* full-A  */ {never, never, never, never, never, table3Note4, never, never},
  1351  	/* group-A */ {never, never, never, never, never, never, never, never},
  1352  	/* lateral-A */ {never, never, never, never, never, never, never, never},
  1353  }
  1354  
  1355  // checkProperty returns true if the transformation represented by the given
  1356  // property Lookup table is allowed for the two given edges. Note that while
  1357  // most table entries are either true or false, some are conditionally true,
  1358  // depending on the null-rejecting properties of the edge filters (for example,
  1359  // association for two full joins).
  1360  func checkProperty(table [8][8]lookupTableEntry, edgeA, edgeB *edge) bool {
  1361  	entry := table[getOpIdx(edgeA)][getOpIdx(edgeB)]
  1362  
  1363  	if entry == never {
  1364  		// Application of this transformation property is unconditionally incorrect.
  1365  		return false
  1366  	}
  1367  	if entry == always {
  1368  		// Application of this transformation property is unconditionally correct.
  1369  		return true
  1370  	}
  1371  
  1372  	// This property is conditionally applicable. Get the relations that must be
  1373  	// null-rejected by the filters.
  1374  	var candidateNullRejectRels vertexSet
  1375  	if entry&rejectsOnLeftA != 0 {
  1376  		// Filters must null-reject on the left input vertexes of edgeA.
  1377  		candidateNullRejectRels = edgeA.op.leftVertices
  1378  	} else if entry&rejectsOnRightA != 0 {
  1379  		// Filters must null-reject on the right input vertexes of edgeA.
  1380  		candidateNullRejectRels = edgeA.op.rightVertices
  1381  	} else if entry&rejectsOnRightB != 0 {
  1382  		// Filters must null-reject on the right input vertexes of edgeB.
  1383  		candidateNullRejectRels = edgeA.op.rightVertices
  1384  	}
  1385  
  1386  	// Check whether the edge filters reject nulls on nullRejectRelations.
  1387  	if entry&filterA != 0 {
  1388  		// The filters of edgeA must reject nulls on one or more of the relations in
  1389  		// nullRejectRelations.
  1390  		if !edgeA.nullRejectedRels.intersects(candidateNullRejectRels) {
  1391  			return false
  1392  		}
  1393  	}
  1394  	if entry&filterB != 0 {
  1395  		// The filters of edgeB must reject nulls on one or more of the relations in
  1396  		// nullRejectRelations.
  1397  		if !edgeB.nullRejectedRels.intersects(candidateNullRejectRels) {
  1398  			return false
  1399  		}
  1400  	}
  1401  	return true
  1402  }
  1403  
  1404  // getOpIdx returns an index into the join property static Lookup tables given an edge
  1405  // with an associated edge type. I originally used int(joinType), but this is fragile
  1406  // to reordering the type definitions.
  1407  func getOpIdx(e *edge) int {
  1408  	switch e.op.joinType {
  1409  	case plan.JoinTypeCross:
  1410  		return 0
  1411  	case plan.JoinTypeInner:
  1412  		return 1
  1413  	case plan.JoinTypeSemi:
  1414  		return 2
  1415  	case plan.JoinTypeAnti:
  1416  		return 3
  1417  	case plan.JoinTypeLeftOuter:
  1418  		return 4
  1419  	case plan.JoinTypeFullOuter:
  1420  		return 5
  1421  	case plan.JoinTypeGroupBy:
  1422  		return 6
  1423  	case plan.JoinTypeLateralInner, plan.JoinTypeLateralCross,
  1424  		plan.JoinTypeLateralRight, plan.JoinTypeLateralLeft:
  1425  		return 7
  1426  	default:
  1427  		panic(fmt.Sprintf("invalid operator: %v", e.op.joinType))
  1428  	}
  1429  }
  1430  
  1431  type edgeSet = sql.FastIntSet
  1432  
  1433  type bitSet uint64
  1434  
  1435  // vertexSet represents a set of base relations that form the vertexes of the
  1436  // join graph.
  1437  type vertexSet = bitSet
  1438  
  1439  const maxSetSize = 63
  1440  
  1441  // vertexIndex represents the ordinal position of a base relation in the
  1442  // JoinOrderBuilder vertexes field. vertexIndex must be less than maxSetSize.
  1443  type vertexIndex = uint64
  1444  
  1445  func newBitSet(idxs ...uint64) (res bitSet) {
  1446  	for _, idx := range idxs {
  1447  		res = res.add(idx)
  1448  	}
  1449  	return res
  1450  }
  1451  
  1452  // add returns a copy of the bitSet with the given element added.
  1453  func (s bitSet) add(idx uint64) bitSet {
  1454  	if idx > maxSetSize {
  1455  		panic(fmt.Sprintf("cannot insert %d into bitSet", idx))
  1456  	}
  1457  	return s | (1 << idx)
  1458  }
  1459  
  1460  // remove returns a copy of the bitSet with the given element removed.
  1461  func (s bitSet) remove(idx uint64) bitSet {
  1462  	if idx > maxSetSize {
  1463  		panic(fmt.Sprintf("%d is invalid index for bitSet", idx))
  1464  	}
  1465  	return s & ^(1 << idx)
  1466  }
  1467  
  1468  // contains returns whether a bitset contains a given element.
  1469  func (s bitSet) contains(idx uint64) bool {
  1470  	if idx > maxSetSize {
  1471  		panic(fmt.Sprintf("%d is invalid index for bitSet", idx))
  1472  	}
  1473  	return s&(1<<idx) != 0
  1474  }
  1475  
  1476  // union returns the set union of this set with the given set.
  1477  func (s bitSet) union(o bitSet) bitSet {
  1478  	return s | o
  1479  }
  1480  
  1481  // intersection returns the set intersection of this set with the given set.
  1482  func (s bitSet) intersection(o bitSet) bitSet {
  1483  	return s & o
  1484  }
  1485  
  1486  // difference returns the set difference of this set with the given set.
  1487  func (s bitSet) difference(o bitSet) bitSet {
  1488  	return s & ^o
  1489  }
  1490  
  1491  // intersects returns true if this set and the given set intersect.
  1492  func (s bitSet) intersects(o bitSet) bool {
  1493  	return s.intersection(o) != 0
  1494  }
  1495  
  1496  // isSubsetOf returns true if this set is a subset of the given set.
  1497  func (s bitSet) isSubsetOf(o bitSet) bool {
  1498  	return s.union(o) == o
  1499  }
  1500  
  1501  // isSingleton returns true if the set has exactly one element.
  1502  func (s bitSet) isSingleton() bool {
  1503  	return s > 0 && (s&(s-1)) == 0
  1504  }
  1505  
  1506  // next returns the next element in the set after the given start index, and
  1507  // a bool indicating whether such an element exists.
  1508  func (s bitSet) next(startVal uint64) (elem uint64, ok bool) {
  1509  	if startVal < maxSetSize {
  1510  		if ntz := bits.TrailingZeros64(uint64(s >> startVal)); ntz < 64 {
  1511  			return startVal + uint64(ntz), true
  1512  		}
  1513  	}
  1514  	return uint64(math.MaxInt64), false
  1515  }
  1516  
  1517  // len returns the number of elements in the set.
  1518  func (s bitSet) len() int {
  1519  	return bits.OnesCount64(uint64(s))
  1520  }
  1521  
  1522  func (s bitSet) String() string {
  1523  	var str string
  1524  	var i vertexSet = 1
  1525  	cnt := 0
  1526  	for cnt < s.len() {
  1527  		if (i & s) != 0 {
  1528  			str += "1"
  1529  			cnt++
  1530  		} else {
  1531  			str += "0"
  1532  		}
  1533  		i = i << 1
  1534  	}
  1535  	return str
  1536  }