github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/hoist_filters.go (about)

     1  package analyzer
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/dolthub/go-mysql-server/sql"
     7  	"github.com/dolthub/go-mysql-server/sql/expression"
     8  	"github.com/dolthub/go-mysql-server/sql/plan"
     9  	"github.com/dolthub/go-mysql-server/sql/transform"
    10  )
    11  
    12  // hoistOutOfScopeFilters pulls filters upwards into the parent scope
    13  // to decorrelate subqueries for further optimizations.
    14  //
    15  // select * from xy where exists (select * from uv where x = 1)
    16  // =>
    17  // select * from xy where x = 1 and exists (select * from uv)
    18  func hoistOutOfScopeFilters(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
    19  	switch n.(type) {
    20  	case *plan.TriggerBeginEndBlock:
    21  		return n, transform.SameTree, nil
    22  	default:
    23  	}
    24  
    25  	inCorr := sql.ColSet{}
    26  	if sq, ok := n.(*plan.SubqueryAlias); ok {
    27  		inCorr = sq.Correlated
    28  	}
    29  
    30  	// todo: seems like inCorr/outCorr should match
    31  	ret, same, filters, outCorr, err := recurseSubqueryForOuterFilters(n, a, inCorr)
    32  	if len(filters) != 0 {
    33  		return n, transform.SameTree, fmt.Errorf("rule 'hoistOutOfScopeFilters' tried to hoist filters above root node")
    34  	}
    35  
    36  	if sq, ok := ret.(*plan.SubqueryAlias); ok {
    37  		ret = sq.WithCorrelated(outCorr)
    38  	}
    39  	return ret, same, err
    40  }
    41  
    42  // recurseSubqueryForOuterFilters recursively hoists filters that belong
    43  // to an outer scope (maybe higher than the parent). We do a DFS for hoisting
    44  // subquery filters. We do a BFS to extract hoistable filters from subquery
    45  // expressions before checking the normalized subquery and its hoisted
    46  // filters for further hoisting.
    47  func recurseSubqueryForOuterFilters(n sql.Node, a *Analyzer, corr sql.ColSet) (sql.Node, transform.TreeIdentity, []sql.Expression, sql.ColSet, error) {
    48  	var hoistFilters []sql.Expression
    49  	var newCorr sql.ColSet
    50  	ret, same, err := transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
    51  		sq, _ := n.(*plan.SubqueryAlias)
    52  		if sq != nil {
    53  			corrIn := corr.Union(sq.Correlated)
    54  			newQ, same, hoisted, subCorr, err := recurseSubqueryForOuterFilters(sq.Child, a, corrIn)
    55  			if err != nil {
    56  				return n, transform.SameTree, err
    57  			}
    58  			if same {
    59  				return n, transform.SameTree, nil
    60  			}
    61  			if len(hoisted) > 0 {
    62  				hoistFilters = append(hoistFilters, hoisted...)
    63  			}
    64  			newCorr = newCorr.Union(subCorr)
    65  			return sq.WithChild(newQ).WithCorrelated(subCorr), transform.NewTree, nil
    66  		}
    67  		f, _ := n.(*plan.Filter)
    68  		if f == nil {
    69  			return n, transform.SameTree, nil
    70  		}
    71  
    72  		var keepFilters []sql.Expression
    73  		allSame := transform.SameTree
    74  		queue := expression.SplitConjunction(f.Expression)
    75  		for len(queue) > 0 {
    76  			e := queue[0]
    77  			queue = queue[1:]
    78  
    79  			var not bool
    80  			if n, ok := e.(*expression.Not); ok {
    81  				not = true
    82  				e = n.Child
    83  			}
    84  
    85  			var sq *plan.Subquery
    86  			switch e := e.(type) {
    87  			case *plan.InSubquery:
    88  				sq, _ = e.RightChild.(*plan.Subquery)
    89  			case *plan.ExistsSubquery:
    90  				sq = e.Query
    91  			default:
    92  			}
    93  
    94  			// only try to pull filters from correlated subqueries
    95  			if sq != nil && !sq.Correlated().Empty() {
    96  				children := e.Children()
    97  				corrIn := corr.Union(sq.Correlated())
    98  				newQ, same, hoisted, subCorr, err := recurseSubqueryForOuterFilters(sq.Query, a, corrIn)
    99  				if err != nil {
   100  					return n, transform.SameTree, err
   101  				}
   102  				newCorr = newCorr.Union(subCorr)
   103  				allSame = allSame && same
   104  				newSq := sq.WithQuery(newQ)
   105  				newSq = newSq.WithCorrelated(subCorr)
   106  				children[len(children)-1] = newSq
   107  				e, _ = e.WithChildren(children...)
   108  
   109  				if len(hoisted) > 0 {
   110  					if not {
   111  						// hoisted are tied to parent NOT, more elegant simplification
   112  						// required to expose individual expressions for further hoisting
   113  						e = expression.JoinAnd(e, expression.JoinAnd(hoisted...))
   114  					} else {
   115  						queue = append(queue, hoisted...)
   116  					}
   117  				}
   118  			}
   119  
   120  			if not {
   121  				e = expression.NewNot(e)
   122  			}
   123  
   124  			inScope, outOfScope := partitionFilterByScope(e, corr)
   125  			if !inScope.Empty() {
   126  				// maintain reference to correlations that aren't hoisted
   127  				newCorr = newCorr.Union(outOfScope)
   128  				keepFilters = append(keepFilters, e)
   129  			} else {
   130  				// nothing tethers the subquery to this scope
   131  				hoistFilters = append(hoistFilters, e)
   132  			}
   133  		}
   134  
   135  		if len(hoistFilters) > 0 {
   136  			allSame = transform.NewTree
   137  		}
   138  		if allSame {
   139  			return n, transform.SameTree, nil
   140  		}
   141  
   142  		if corr.Empty() {
   143  			// rootscope or equivalent, there is no benefit from hoisting
   144  			// we should materialize filters
   145  			newFilters := append(keepFilters, hoistFilters...)
   146  			hoistFilters = hoistFilters[:0]
   147  			return plan.NewFilter(expression.JoinAnd(newFilters...), f.Child), transform.NewTree, nil
   148  		}
   149  
   150  		if len(keepFilters) == 0 {
   151  			return f.Child, transform.NewTree, nil
   152  		}
   153  		ret := plan.NewFilter(expression.JoinAnd(keepFilters...), f.Child)
   154  		return ret, transform.NewTree, nil
   155  	})
   156  	return ret, same, hoistFilters, newCorr, err
   157  }
   158  
   159  // partitionFilterByScope returns two colsets that include the in and
   160  // out-of-scope columns referenced in this expression.
   161  func partitionFilterByScope(e sql.Expression, corr sql.ColSet) (inScope, outOfScope sql.ColSet) {
   162  	transform.InspectExpr(e, func(e sql.Expression) bool {
   163  		switch e := e.(type) {
   164  		case *expression.GetField:
   165  			// we're searching for anything in-scope
   166  			// return true if not correlated from outerscope
   167  			id := e.Id()
   168  			if corr.Contains(id) {
   169  				outOfScope.Add(id)
   170  			} else {
   171  				inScope.Add(id)
   172  			}
   173  		case *plan.Subquery:
   174  			// TODO cache in-scope on subqueries?
   175  			transform.Inspect(e.Query, func(n sql.Node) bool {
   176  				if ne, ok := n.(sql.Expressioner); ok {
   177  					for _, e := range ne.Expressions() {
   178  						in, out := partitionFilterByScope(e, corr)
   179  						inScope = inScope.Union(in)
   180  						outOfScope = outOfScope.Union(out)
   181  					}
   182  				}
   183  				return true
   184  			})
   185  		default:
   186  		}
   187  		return false
   188  	})
   189  	return
   190  }