github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/pushdown.go (about)

     1  // Copyright 2020-2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package analyzer
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/dolthub/go-mysql-server/sql"
    21  	"github.com/dolthub/go-mysql-server/sql/expression"
    22  	"github.com/dolthub/go-mysql-server/sql/plan"
    23  	"github.com/dolthub/go-mysql-server/sql/transform"
    24  )
    25  
    26  // pushFilters moves filter nodes down to their appropriate relations.
    27  // Filters that reference a single relation will wrap their target tables.
    28  // Filters that reference multiple tables will move as low in the join tree
    29  // as is appropriate. We never move a filter without deleting from the source.
    30  // Related rules: hoistOutOfScopeFilters, moveJoinConditionsToFilter.
    31  func pushFilters(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
    32  	span, ctx := ctx.Span("push_filters")
    33  	defer span.End()
    34  
    35  	if !canDoPushdown(n) {
    36  		return n, transform.SameTree, nil
    37  	}
    38  
    39  	pushdownAboveTables := func(n sql.Node, filters *filterSet) (sql.Node, transform.TreeIdentity, error) {
    40  		return transform.NodeWithCtx(n, filterPushdownChildSelector, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {
    41  			switch node := c.Node.(type) {
    42  			case *plan.Filter:
    43  				// Notably, filters are allowed to be pushed through other filters.
    44  				// This prevents filters hoisted from join conditions from being
    45  				// orphaned in the middle of join trees.
    46  				if f, ok := node.Child.(*plan.Filter); ok {
    47  					if node.Expression == f.Expression {
    48  						return f, transform.NewTree, nil
    49  					}
    50  					return plan.NewFilter(expression.JoinAnd(node.Expression, f.Expression), f.Child), transform.NewTree, nil
    51  				}
    52  				return node, transform.SameTree, nil
    53  			case *plan.TableAlias, *plan.ResolvedTable, *plan.ValueDerivedTable, sql.TableFunction:
    54  				table, same, err := pushdownFiltersToAboveTable(ctx, a, node.(sql.NameableNode), scope, filters)
    55  				if err != nil {
    56  					return nil, transform.SameTree, err
    57  				}
    58  				if same {
    59  					return node, transform.SameTree, nil
    60  				}
    61  				return table, transform.NewTree, nil
    62  			default:
    63  				return node, transform.SameTree, nil
    64  			}
    65  		})
    66  	}
    67  
    68  	tableAliases, err := getTableAliases(n, scope)
    69  	if err != nil {
    70  		return nil, transform.SameTree, err
    71  	}
    72  
    73  	// For each filter node, we want to push its predicates as low as possible.
    74  	return transform.Node(n, func(node sql.Node) (sql.Node, transform.TreeIdentity, error) {
    75  		switch n := node.(type) {
    76  		case *plan.Filter:
    77  			switch n.Child.(type) {
    78  			case *plan.TableAlias, *plan.ResolvedTable, *plan.IndexedTableAccess, *plan.ValueDerivedTable:
    79  				// can't push any lower
    80  				return n, transform.SameTree, nil
    81  			default:
    82  			}
    83  			// Find all col exprs and group them by the table they mention so that we can keep track of which ones
    84  			// have been pushed down and need to be removed from the parent filter
    85  			filtersByTable := getFiltersByTable(n)
    86  			filters := newFilterSet(n.Expression, filtersByTable, tableAliases)
    87  
    88  			// move filter predicates directly above their respective tables in joins
    89  			ret, same, err := pushdownAboveTables(n, filters)
    90  			if same || err != nil {
    91  				return n, transform.SameTree, err
    92  			}
    93  
    94  			retF, ok := ret.(*plan.Filter)
    95  			if !ok {
    96  				return n, transform.SameTree, fmt.Errorf("pushdown mistakenly converted filter to non-filter: %T", ret)
    97  			}
    98  			// remove handled
    99  			newF := removePushedDownPredicates(ctx, a, retF, filters)
   100  			if newF != nil {
   101  				same = transform.NewTree
   102  				ret = newF
   103  			}
   104  			return ret, same, nil
   105  		default:
   106  			return n, transform.SameTree, nil
   107  		}
   108  	})
   109  }
   110  
   111  // pushdownSubqueryAliasFilters attempts to push conditions in filters down to
   112  // individual subquery aliases.
   113  func pushdownSubqueryAliasFilters(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
   114  	span, ctx := ctx.Span("pushdown_subquery_alias_filters")
   115  	defer span.End()
   116  
   117  	if !canDoPushdown(n) {
   118  		return n, transform.SameTree, nil
   119  	}
   120  
   121  	if !hasSubqueryAlias(n) {
   122  		return n, transform.SameTree, nil
   123  	}
   124  
   125  	tableAliases, err := getTableAliases(n, scope)
   126  	if err != nil {
   127  		return nil, transform.SameTree, err
   128  	}
   129  
   130  	return transformPushdownSubqueryAliasFilters(ctx, a, n, scope, tableAliases)
   131  }
   132  
   133  func hasSubqueryAlias(n sql.Node) bool {
   134  	return transform.InspectUp(n, func(n sql.Node) bool {
   135  		_, isSubq := n.(*plan.SubqueryAlias)
   136  		return isSubq
   137  	})
   138  }
   139  
   140  // canDoPushdown returns whether the node given can safely be analyzed for pushdown
   141  func canDoPushdown(n sql.Node) bool {
   142  	if !n.Resolved() {
   143  		return false
   144  	}
   145  
   146  	if plan.IsNoRowNode(n) {
   147  		return false
   148  	}
   149  
   150  	// The values of an insert are analyzed in isolation, so they do get pushdown treatment. But no other DML
   151  	// statements should get pushdown to their target tables.
   152  	switch n.(type) {
   153  	case *plan.InsertInto:
   154  		return false
   155  	}
   156  
   157  	return true
   158  }
   159  
   160  // Pushing down a filter is incompatible with the secondary table in a Left
   161  // or Right join. If we push a predicate on the secondary table below the
   162  // join, we end up not evaluating it in all cases (since the secondary table
   163  // result is sometimes null in these types of joins). It must be evaluated
   164  // only after the join result is computed.
   165  func filterPushdownChildSelector(c transform.Context) bool {
   166  	switch c.Node.(type) {
   167  	case *plan.Limit:
   168  		return false
   169  	}
   170  
   171  	switch n := c.Parent.(type) {
   172  	case *plan.TableAlias:
   173  		return false
   174  	case *plan.Window:
   175  		// Windows operate across the rows they see and cannot have
   176  		// filters pushed below them. Instead, the step will be run
   177  		// again by the Transform function, starting at this node.
   178  		return false
   179  	case *plan.JoinNode:
   180  		switch {
   181  		case n.Op.IsMerge():
   182  			return false
   183  		case n.Op.IsLookup():
   184  			if n.JoinType().IsLeftOuter() {
   185  				return c.ChildNum == 0
   186  			}
   187  			return true
   188  		case n.Op.IsLeftOuter():
   189  			return c.ChildNum == 0
   190  		default:
   191  		}
   192  	default:
   193  	}
   194  	return true
   195  }
   196  
   197  func transformPushdownSubqueryAliasFilters(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, tableAliases TableAliases) (sql.Node, transform.TreeIdentity, error) {
   198  	var filters *filterSet
   199  
   200  	transformFilterNode := func(n *plan.Filter) (sql.Node, transform.TreeIdentity, error) {
   201  		return transform.NodeWithCtx(n, filterPushdownChildSelector, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {
   202  			switch node := c.Node.(type) {
   203  			case *plan.Filter:
   204  				newF := removePushedDownPredicates(ctx, a, node, filters)
   205  				if newF == nil {
   206  					return node, transform.SameTree, nil
   207  				}
   208  				return newF, transform.NewTree, nil
   209  			case *plan.SubqueryAlias:
   210  				return pushdownFiltersUnderSubqueryAlias(ctx, a, node, filters)
   211  			default:
   212  				return node, transform.SameTree, nil
   213  			}
   214  		})
   215  	}
   216  
   217  	// For each filter node, we want to push its predicates as low as possible.
   218  	return transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
   219  		switch n := n.(type) {
   220  		case *plan.Filter:
   221  			// First step is to find all col exprs and group them by the table they mention.
   222  			filtersByTable := getFiltersByTable(n)
   223  			filters = newFilterSet(n.Expression, filtersByTable, tableAliases)
   224  			return transformFilterNode(n)
   225  		default:
   226  			return n, transform.SameTree, nil
   227  		}
   228  	})
   229  }
   230  
   231  // pushdownFiltersToAboveTable introduces a filter node with the given predicate
   232  func pushdownFiltersToAboveTable(
   233  	ctx *sql.Context,
   234  	a *Analyzer,
   235  	tableNode sql.NameableNode,
   236  	scope *plan.Scope,
   237  	filters *filterSet,
   238  ) (sql.Node, transform.TreeIdentity, error) {
   239  	table := getTable(tableNode)
   240  	if table == nil || plan.IsDualTable(table) {
   241  		return tableNode, transform.SameTree, nil
   242  	}
   243  
   244  	// Move any remaining filters for the table directly above the table itself
   245  	var pushedDownFilterExpression sql.Expression
   246  	if tableFilters := filters.availableFiltersForTable(ctx, tableNode.Name()); len(tableFilters) > 0 {
   247  		filters.markFiltersHandled(tableFilters...)
   248  		pushedDownFilterExpression = expression.JoinAnd(tableFilters...)
   249  
   250  		a.Log(
   251  			"pushed down filters %s above table %q, %d filters handled of %d",
   252  			tableFilters,
   253  			tableNode.Name(),
   254  			len(tableFilters),
   255  			len(tableFilters),
   256  		)
   257  	}
   258  
   259  	switch tableNode.(type) {
   260  	case *plan.ResolvedTable, *plan.TableAlias, *plan.ValueDerivedTable:
   261  		if pushedDownFilterExpression != nil {
   262  			return plan.NewFilter(pushedDownFilterExpression, tableNode), transform.NewTree, nil
   263  		}
   264  
   265  		return tableNode, transform.SameTree, nil
   266  	default:
   267  		return nil, transform.SameTree, ErrInvalidNodeType.New("pushdownFiltersToAboveTable", tableNode)
   268  	}
   269  }
   270  
   271  // pushdownFiltersUnderSubqueryAlias takes |filters| applying to the subquery
   272  // alias a moves them under the subquery alias. Because the subquery alias is
   273  // Opaque, it behaves a little bit like a FilteredTable, and pushing the
   274  // filters down below it can help find index usage opportunities later in the
   275  // analysis phase.
   276  func pushdownFiltersUnderSubqueryAlias(ctx *sql.Context, a *Analyzer, sa *plan.SubqueryAlias, filters *filterSet) (sql.Node, transform.TreeIdentity, error) {
   277  	if sa.ScopeMapping == nil {
   278  		return sa, transform.SameTree, nil
   279  	}
   280  	handled := filters.availableFiltersForTable(ctx, sa.Name())
   281  	if len(handled) == 0 {
   282  		return sa, transform.SameTree, nil
   283  	}
   284  	filters.markFiltersHandled(handled...)
   285  	// |handled| is in terms of the parent schema, and in particular the
   286  	// |Source| is the alias name. Rewrite it to refer to the |sa.Child|
   287  	// schema instead.
   288  	expressionsForChild := make([]sql.Expression, len(handled))
   289  	var err error
   290  	for i, h := range handled {
   291  		expressionsForChild[i], _, err = transform.Expr(h, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
   292  			if gt, ok := e.(*expression.GetField); ok {
   293  				gf, ok := sa.ScopeMapping[gt.Id()]
   294  				if !ok {
   295  					return e, transform.SameTree, fmt.Errorf("unable to find child with id: %d", gt.Index())
   296  				}
   297  				return gf, transform.NewTree, nil
   298  			}
   299  			return e, transform.SameTree, nil
   300  		})
   301  		if err != nil {
   302  			return sa, transform.SameTree, err
   303  		}
   304  	}
   305  
   306  	n, err := sa.WithChildren(plan.NewFilter(expression.JoinAnd(expressionsForChild...), sa.Child))
   307  	if err != nil {
   308  		return nil, transform.SameTree, err
   309  	}
   310  	return n, transform.NewTree, nil
   311  }
   312  
   313  // removePushedDownPredicates removes all handled filter predicates from the filter given and returns. If all
   314  // predicates have been handled, it replaces the filter with its child.
   315  func removePushedDownPredicates(ctx *sql.Context, a *Analyzer, node *plan.Filter, filters *filterSet) sql.Node {
   316  	if filters.handledCount() == 0 {
   317  		a.Log("no handled filters, leaving filter untouched")
   318  		return nil
   319  	}
   320  
   321  	// figure out if the filter's filters were all handled
   322  	filterExpressions := expression.SplitConjunction(node.Expression)
   323  	unhandled := subtractExprSet(filterExpressions, filters.handledFilters)
   324  	if len(unhandled) == 0 {
   325  		a.Log("filter node has no unhandled filters, so it will be removed")
   326  		return node.Child
   327  	}
   328  
   329  	if len(unhandled) == len(filterExpressions) {
   330  		a.Log("no filters removed from filter node")
   331  		return nil
   332  	}
   333  
   334  	a.Log(
   335  		"filters removed from filter node: %s\nfilter has now %d filters: %s",
   336  		filters.handledFilters,
   337  		len(unhandled),
   338  		unhandled,
   339  	)
   340  
   341  	return plan.NewFilter(expression.JoinAnd(unhandled...), node.Child)
   342  }