github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/resolve_subqueries.go (about)

     1  // Copyright 2020-2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package analyzer
    16  
    17  import (
    18  	"github.com/dolthub/go-mysql-server/sql"
    19  	"github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors"
    20  	"github.com/dolthub/go-mysql-server/sql/plan"
    21  	"github.com/dolthub/go-mysql-server/sql/transform"
    22  )
    23  
    24  // resolveSubqueries runs analysis on each subquery expression and subquery alias in the specified node tree.
    25  // Subqueries are processed from the top down and a new scope level is created for each subquery when it is sent
    26  // to be analyzed.
    27  func resolveSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
    28  	span, ctx := ctx.Span("resolve_subqueries")
    29  	defer span.End()
    30  
    31  	return resolveSubqueriesHelper(ctx, a, n, scope, sel, false)
    32  }
    33  
    34  func addLeftTablesToScope(outerScope *plan.Scope, leftNode sql.Node) *plan.Scope {
    35  	resTbls := getTablesByName(leftNode)
    36  	subScope := outerScope
    37  	for _, tbl := range resTbls {
    38  		subScope = subScope.NewScopeInJoin(tbl)
    39  	}
    40  	subScope.SetJoin(true)
    41  	return subScope
    42  }
    43  
    44  // finalizeSubqueryLateral ensures that all SubqueryAliases with IsLateral set to true have their children also set to true.
    45  func finalizeSubqueryLateral(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
    46  	return transform.NodeWithOpaque(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
    47  		if parentSQA, ok := n.(*plan.SubqueryAlias); ok && parentSQA.IsLateral {
    48  			newSqaChild, sqaSame, sqaErr := transform.NodeWithOpaque(parentSQA.Child, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
    49  				if sqa, ok := n.(*plan.SubqueryAlias); ok {
    50  					sqa.IsLateral = true
    51  					return sqa, transform.NewTree, nil
    52  				}
    53  				return n, transform.SameTree, nil
    54  			})
    55  			if sqaErr != nil {
    56  				return n, transform.SameTree, sqaErr
    57  			}
    58  			if sqaSame {
    59  				return n, transform.SameTree, nil
    60  			}
    61  			newSqa, err := parentSQA.WithChildren(newSqaChild)
    62  			if err != nil {
    63  				return n, transform.SameTree, err
    64  			}
    65  			return newSqa, transform.NewTree, nil
    66  		}
    67  		return n, transform.SameTree, nil
    68  	})
    69  }
    70  
    71  // finalizeSubqueries runs the final analysis pass on subquery expressions and subquery aliases in the node tree to ensure
    72  // they are fully resolved and that the plan is ready to be executed. The logic is similar to when subqueries are initially
    73  // resolved with resolveSubqueries, but with a few important differences:
    74  //   - finalizeSubqueries processes each subquery once, finalizing parent before child scopes, and should only be included
    75  //     when analyzing a root node at the top of the plan.
    76  //   - resolveSubqueries skips pruneColumns and optimizeJoins for subquery expressions and only runs the OnceBeforeDefault
    77  //     rule set on subquery aliases.
    78  //   - finalizeSubqueries runs a full analysis pass on subquery expressions and runs all rule batches except for OnceBeforeDefault.
    79  func finalizeSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
    80  	span, ctx := ctx.Span("finalize_subqueries")
    81  	defer span.End()
    82  
    83  	node, same1, err := finalizeSubqueriesHelper(ctx, a, n, scope, sel)
    84  	if err != nil {
    85  		return nil, transform.SameTree, err
    86  	}
    87  
    88  	newNode, same2, err := finalizeSubqueryLateral(ctx, a, node, scope, sel)
    89  	if err != nil {
    90  		return nil, transform.SameTree, err
    91  	}
    92  
    93  	return newNode, same1 && same2, nil
    94  }
    95  
    96  // finalizeSubqueriesHelper finalizes all subqueries and subquery expressions,
    97  // fixing parent scopes before recursing into child nodes.
    98  func finalizeSubqueriesHelper(ctx *sql.Context, a *Analyzer, node sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
    99  	var joinParent *plan.JoinNode
   100  	var selFunc transform.SelectorFunc = func(c transform.Context) bool {
   101  		if jp, ok := c.Node.(*plan.JoinNode); ok {
   102  			joinParent = jp
   103  		}
   104  		return true
   105  	}
   106  
   107  	var conFunc transform.CtxFunc = func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {
   108  		n := c.Node
   109  		if sqa, ok := n.(*plan.SubqueryAlias); ok {
   110  			var newSqa sql.Node
   111  			var same2 transform.TreeIdentity
   112  			var err error
   113  			// NOTE: this only really fixes one level of subquery with two joins.
   114  			// This patch will likely not fix cases with more deeply nested joins and subqueries.
   115  			// A real fix would be to re-examine indexes after everything.
   116  			if sqa.OuterScopeVisibility && joinParent != nil {
   117  				if stripChild, ok := joinParent.Right().(*plan.StripRowNode); ok && stripChild.Child == sqa {
   118  					subScope := scope.NewScopeInJoin(joinParent.Children()[0])
   119  					subScope.SetLateralJoin(joinParent.Op.IsLateral())
   120  					newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, true)
   121  				} else {
   122  					// IsLateral means that the subquery should have visibility into the left scope.
   123  					if sqa.IsLateral {
   124  						subScope := addLeftTablesToScope(scope, joinParent.Left())
   125  						subScope.SetLateralJoin(true)
   126  						newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, true)
   127  					} else {
   128  						newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, scope, sel, true)
   129  					}
   130  				}
   131  			} else {
   132  				// IsLateral means that the subquery should have visibility into the left scope.
   133  				if joinParent != nil && sqa.IsLateral {
   134  					subScope := addLeftTablesToScope(scope, joinParent.Left())
   135  					subScope.SetLateralJoin(true)
   136  					newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, true)
   137  				} else {
   138  					newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, scope, sel, true)
   139  				}
   140  			}
   141  
   142  			if err != nil {
   143  				return n, transform.SameTree, err
   144  			}
   145  
   146  			newNode, same1, err := finalizeSubqueriesHelper(ctx, a, newSqa.(*plan.SubqueryAlias).Child, scope.NewScopeFromSubqueryAlias(sqa), sel)
   147  			if err != nil {
   148  				return n, transform.SameTree, err
   149  			}
   150  
   151  			if same1 && same2 {
   152  				return n, transform.SameTree, nil
   153  			} else {
   154  				newNode, err = newSqa.WithChildren(newNode)
   155  				return newNode, transform.NewTree, err
   156  			}
   157  		}
   158  		return transform.OneNodeExprsWithNode(n, func(node sql.Node, e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
   159  			if sq, ok := e.(*plan.Subquery); ok {
   160  				newSq, same2, err := analyzeSubqueryExpression(ctx, a, node, sq, scope, sel, true)
   161  				if err != nil {
   162  					if analyzererrors.ErrValidationResolved.Is(err) {
   163  						// if a parent is unresolved, we want to dig deeper to find the unresolved
   164  						// child dependency
   165  						_, _, err := finalizeSubqueriesHelper(ctx, a, sq.Query, scope.NewScopeFromSubqueryExpression(node, sq.Correlated()), sel)
   166  						if err != nil {
   167  							return e, transform.SameTree, err
   168  						}
   169  					}
   170  					return e, transform.SameTree, err
   171  				}
   172  				newExpr, same1, err := finalizeSubqueriesHelper(ctx, a, newSq.(*plan.Subquery).Query, scope.NewScopeFromSubqueryExpression(node, newSq.(*plan.Subquery).Correlated()), sel)
   173  				if err != nil {
   174  					return e, transform.SameTree, err
   175  				}
   176  
   177  				if same1 && same2 {
   178  					return e, transform.SameTree, nil
   179  				} else {
   180  					return newSq.(*plan.Subquery).WithQuery(newExpr), transform.NewTree, nil
   181  				}
   182  			} else {
   183  				return e, transform.SameTree, nil
   184  			}
   185  		})
   186  	}
   187  
   188  	return transform.NodeWithCtx(node, selFunc, conFunc)
   189  }
   190  
   191  func resolveSubqueriesHelper(ctx *sql.Context, a *Analyzer, node sql.Node, scope *plan.Scope, sel RuleSelector, finalize bool) (sql.Node, transform.TreeIdentity, error) {
   192  	return transform.NodeWithCtx(node, nil, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {
   193  		n := c.Node
   194  		if sqa, ok := n.(*plan.SubqueryAlias); ok {
   195  			// IsLateral means that the subquery should have visibility into the left scope.
   196  			if parent, ok := c.Parent.(*plan.JoinNode); ok && sqa.IsLateral {
   197  				subScope := addLeftTablesToScope(scope, parent.Left())
   198  				return analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, finalize)
   199  			}
   200  			return analyzeSubqueryAlias(ctx, a, sqa, scope, sel, finalize)
   201  		} else {
   202  			return transform.OneNodeExprsWithNode(n, func(node sql.Node, e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
   203  				if sq, ok := e.(*plan.Subquery); ok {
   204  					return analyzeSubqueryExpression(ctx, a, n, sq, scope, sel, finalize)
   205  				} else {
   206  					return e, transform.SameTree, nil
   207  				}
   208  			})
   209  		}
   210  	})
   211  }
   212  
   213  // flattenTableAliases transforms TableAlias nodes that contain a SubqueryAlias or TableAlias node as the immediate
   214  // child so that the top level TableAlias is removed and the nested SubqueryAlias or nested TableAlias is the new top
   215  // level node, making sure to capture the alias name and transfer it to the new node. The parser doesn't directly
   216  // create this nested structure; it occurs as the execution plan is built and altered during analysis, for
   217  // example with CTEs that get plugged into the execution plan as the analyzer processes it.
   218  func flattenTableAliases(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
   219  	span, ctx := ctx.Span("flatten_table_aliases")
   220  	defer span.End()
   221  	return transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
   222  		switch n := n.(type) {
   223  		case *plan.TableAlias:
   224  			if sa, isSA := n.Children()[0].(*plan.SubqueryAlias); isSA {
   225  				return sa.WithName(n.Name()), transform.NewTree, nil
   226  			}
   227  			if ta, isTA := n.Children()[0].(*plan.TableAlias); isTA {
   228  				return ta.WithName(n.Name()), transform.NewTree, nil
   229  			}
   230  			return n, transform.SameTree, nil
   231  		default:
   232  			return n, transform.SameTree, nil
   233  		}
   234  	})
   235  }
   236  
   237  // analyzeSubqueryExpression runs analysis on the specified subquery expression, |sq|. The specified node |n| is the node
   238  // that contains the subquery expression and |finalize| indicates if this is the final run of the analyzer on the query
   239  // before execution, which means all analyzer rules are included, otherwise SubqueryExprResolveSelector is used to prevent
   240  // running pruneColumns and optimizeJoins for all non-final analysis passes.
   241  func analyzeSubqueryExpression(ctx *sql.Context, a *Analyzer, n sql.Node, sq *plan.Subquery, scope *plan.Scope, sel RuleSelector, finalize bool) (sql.Expression, transform.TreeIdentity, error) {
   242  	// We always analyze subquery expressions even if they are resolved, since other transformations to the surrounding
   243  	// query might cause them to need to shift their field indexes.
   244  	subqueryCtx, cancelFunc := ctx.NewSubContext()
   245  	defer cancelFunc()
   246  
   247  	var analyzed sql.Node
   248  	var err error
   249  	if finalize {
   250  		analyzed, _, err = a.analyzeStartingAtBatch(subqueryCtx, sq.Query,
   251  			scope.NewScopeFromSubqueryExpression(n, sq.Correlated()), "default-rules", NewFinalizeSubquerySel(sel))
   252  	} else {
   253  		analyzed, _, err = a.analyzeThroughBatch(subqueryCtx, sq.Query,
   254  			scope.NewScopeFromSubqueryExpression(n, sq.Correlated()), "default-rules", NewResolveSubqueryExprSelector(sel))
   255  	}
   256  	if err != nil {
   257  		// We ignore certain errors during non-final passes of the analyzer, deferring them to later analysis passes.
   258  		// Specifically, if the subquery isn't resolved or a column can't be found in the scope node, wait until a later pass.
   259  		if !finalize && (analyzererrors.ErrValidationResolved.Is(err) || sql.ErrTableColumnNotFound.Is(err) || sql.ErrColumnNotFound.Is(err)) {
   260  			// keep the work we have and defer remainder of analysis of this subquery until a later pass
   261  			return sq.WithQuery(analyzed), transform.NewTree, nil
   262  		}
   263  		return nil, transform.SameTree, err
   264  	}
   265  
   266  	//todo(max): Infinite cycles with subqueries, unions, ctes, catalog.
   267  	// we squashed most negative errors, where a rule fails to report a plan change
   268  	// to the expense of positive errors, where a rule reports a change when the plan
   269  	// is the same before/after.
   270  	// .Resolved() might be useful for fixing these bugs.
   271  	return sq.WithQuery(StripPassthroughNodes(analyzed)).WithExecBuilder(a.ExecBuilder), transform.NewTree, nil
   272  }
   273  
   274  // analyzeSubqueryAlias runs analysis on the specified subquery alias, |sqa|. The |finalize| parameter indicates if this is
   275  // the final run of the analyzer on the query before execution, which means all rules, starting from the default-rules
   276  // batch are processed, otherwise only the once-before-default batch of rules is processed for all other non-final passes.
   277  func analyzeSubqueryAlias(ctx *sql.Context, a *Analyzer, sqa *plan.SubqueryAlias, scope *plan.Scope, sel RuleSelector, finalize bool) (sql.Node, transform.TreeIdentity, error) {
   278  	subScope := scope.NewScopeFromSubqueryAlias(sqa)
   279  
   280  	var child sql.Node
   281  	var same transform.TreeIdentity
   282  	var err error
   283  	if finalize {
   284  		child, same, err = a.analyzeStartingAtBatch(ctx, sqa.Child, subScope, "default-rules", NewFinalizeSubquerySel(sel))
   285  	} else {
   286  		child, same, err = a.analyzeThroughBatch(ctx, sqa.Child, subScope, "default-rules", sel)
   287  	}
   288  	if err != nil {
   289  		return nil, same, err
   290  	}
   291  
   292  	if len(sqa.ColumnNames) > 0 {
   293  		schemaLen := schemaLength(child)
   294  		if schemaLen != len(sqa.ColumnNames) {
   295  			return nil, transform.SameTree, sql.ErrColumnCountMismatch.New()
   296  		}
   297  	}
   298  	if same {
   299  		return sqa, transform.SameTree, nil
   300  	}
   301  	newn, err := sqa.WithChildren(StripPassthroughNodes(child))
   302  	return newn, transform.NewTree, err
   303  }
   304  
   305  // StripPassthroughNodes strips all top-level passthrough nodes meant to apply only to top-level queries (query
   306  // tracking, transaction logic, etc) from the node tree given and return the first non-passthrough child element. This
   307  // is useful for when we invoke the analyzer recursively when e.g. analyzing subqueries or triggers
   308  // TODO: instead of stripping this node off after analysis, it would be better to just not add it in the first place.
   309  func StripPassthroughNodes(n sql.Node) sql.Node {
   310  	nodeIsPassthrough := true
   311  	for nodeIsPassthrough {
   312  		switch tn := n.(type) {
   313  		case *plan.QueryProcess:
   314  			n = tn.Child()
   315  		case *plan.TransactionCommittingNode:
   316  			n = tn.Child()
   317  		default:
   318  			nodeIsPassthrough = false
   319  		}
   320  	}
   321  
   322  	return n
   323  }
   324  
   325  // cacheSubqueryAlisesInJoins will look for joins against subquery aliases that
   326  // will repeatedly execute the subquery, and will insert a *plan.CachedResults
   327  // node on top of those nodes. The left-most child of a join root is an exception
   328  // that cannot be cached.
   329  func cacheSubqueryAliasesInJoins(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
   330  	var recurse func(n sql.Node, parentCached, inJoin, rootJoinT1 bool) (sql.Node, transform.TreeIdentity, error)
   331  	recurse = func(n sql.Node, parentCached, inJoin, foundFirstRel bool) (sql.Node, transform.TreeIdentity, error) {
   332  		_, isOp := n.(sql.OpaqueNode)
   333  		var isCacheableSq bool
   334  		var isCachedRs bool
   335  		var isMax1Row bool
   336  		switch n := n.(type) {
   337  		case *plan.JoinNode:
   338  			if !inJoin {
   339  				inJoin = true
   340  				foundFirstRel = false
   341  			}
   342  		case *plan.SubqueryAlias:
   343  			isCacheableSq = n.CanCacheResults()
   344  		case *plan.CachedResults:
   345  			isCachedRs = true
   346  		case *plan.Max1Row:
   347  			isMax1Row = true
   348  		default:
   349  
   350  		}
   351  
   352  		doCache := isCacheableSq && inJoin && !parentCached
   353  		childInJoin := inJoin && !isOp
   354  
   355  		if inJoin && !foundFirstRel {
   356  			switch n.(type) {
   357  			case sql.Nameable:
   358  				doCache = false
   359  				foundFirstRel = true
   360  			default:
   361  			}
   362  		}
   363  
   364  		children := n.Children()
   365  		var newChildren []sql.Node
   366  		for i, c := range children {
   367  			child, same, _ := recurse(c, doCache || isCachedRs || isMax1Row, childInJoin, foundFirstRel)
   368  			if !same {
   369  				if newChildren == nil {
   370  					newChildren = make([]sql.Node, len(children))
   371  					copy(newChildren, children)
   372  				}
   373  				newChildren[i] = child
   374  			}
   375  		}
   376  
   377  		if len(newChildren) == 0 && !doCache {
   378  			return n, transform.SameTree, nil
   379  		}
   380  
   381  		ret := n
   382  		if len(newChildren) > 0 {
   383  			ret, _ = ret.WithChildren(newChildren...)
   384  		}
   385  		if doCache {
   386  			ret = plan.NewCachedResults(n)
   387  		}
   388  		return ret, transform.NewTree, nil
   389  	}
   390  	return recurse(n, false, false, false)
   391  }