vitess.io/vitess@v0.16.2/go/vt/vtgate/planbuilder/operators/subquery_planning.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package operators
    18  
    19  import (
    20  	"vitess.io/vitess/go/vt/sqlparser"
    21  	"vitess.io/vitess/go/vt/vterrors"
    22  	"vitess.io/vitess/go/vt/vtgate/engine"
    23  	"vitess.io/vitess/go/vt/vtgate/evalengine"
    24  	"vitess.io/vitess/go/vt/vtgate/planbuilder/operators/ops"
    25  	"vitess.io/vitess/go/vt/vtgate/planbuilder/operators/rewrite"
    26  	"vitess.io/vitess/go/vt/vtgate/planbuilder/plancontext"
    27  )
    28  
    29  func optimizeSubQuery(ctx *plancontext.PlanningContext, op *SubQuery) (ops.Operator, rewrite.TreeIdentity, error) {
    30  	var unmerged []*SubQueryOp
    31  
    32  	// first loop over the subqueries and try to merge them into the outer plan
    33  	outer := op.Outer
    34  	for _, inner := range op.Inner {
    35  		innerOp := inner.Inner
    36  
    37  		var preds []sqlparser.Expr
    38  		preds, innerOp = unresolvedAndSource(ctx, innerOp)
    39  		merger := func(a, b *Route) (*Route, error) {
    40  			return mergeSubQueryOp(ctx, a, b, inner)
    41  		}
    42  
    43  		newInner := &SubQueryInner{
    44  			Inner:             inner.Inner,
    45  			ExtractedSubquery: inner.ExtractedSubquery,
    46  		}
    47  		merged, err := tryMergeSubQueryOp(ctx, outer, innerOp, newInner, preds, merger)
    48  		if err != nil {
    49  			return nil, rewrite.SameTree, err
    50  		}
    51  
    52  		if merged != nil {
    53  			outer = merged
    54  			continue
    55  		}
    56  
    57  		if len(preds) == 0 {
    58  			// uncorrelated queries
    59  			sq := &SubQueryOp{
    60  				Extracted: inner.ExtractedSubquery,
    61  				Inner:     innerOp,
    62  			}
    63  			unmerged = append(unmerged, sq)
    64  			continue
    65  		}
    66  
    67  		if inner.ExtractedSubquery.OpCode == int(engine.PulloutExists) {
    68  			correlatedTree, err := createCorrelatedSubqueryOp(ctx, innerOp, outer, preds, inner.ExtractedSubquery)
    69  			if err != nil {
    70  				return nil, rewrite.SameTree, err
    71  			}
    72  			outer = correlatedTree
    73  			continue
    74  		}
    75  
    76  		return nil, rewrite.SameTree, vterrors.VT12001("cross-shard correlated subquery")
    77  	}
    78  
    79  	for _, tree := range unmerged {
    80  		tree.Outer = outer
    81  		outer = tree
    82  	}
    83  	return outer, rewrite.NewTree, nil
    84  }
    85  
    86  func unresolvedAndSource(ctx *plancontext.PlanningContext, op ops.Operator) ([]sqlparser.Expr, ops.Operator) {
    87  	preds := UnresolvedPredicates(op, ctx.SemTable)
    88  	if filter, ok := op.(*Filter); ok {
    89  		if ctx.SemTable.ASTEquals().Exprs(preds, filter.Predicates) {
    90  			// if we are seeing a single filter with only these predicates,
    91  			// we can throw away the filter and just use the source
    92  			return preds, filter.Source
    93  		}
    94  	}
    95  
    96  	return preds, op
    97  }
    98  
    99  func mergeSubQueryOp(ctx *plancontext.PlanningContext, outer *Route, inner *Route, subq *SubQueryInner) (*Route, error) {
   100  	subq.ExtractedSubquery.NeedsRewrite = true
   101  	outer.SysTableTableSchema = append(outer.SysTableTableSchema, inner.SysTableTableSchema...)
   102  	for k, v := range inner.SysTableTableName {
   103  		if outer.SysTableTableName == nil {
   104  			outer.SysTableTableName = map[string]evalengine.Expr{}
   105  		}
   106  		outer.SysTableTableName[k] = v
   107  	}
   108  
   109  	// When merging an inner query with its outer query, we can remove the
   110  	// inner query from the list of predicates that can influence routing of
   111  	// the outer query.
   112  	//
   113  	// Note that not all inner queries necessarily are part of the routing
   114  	// predicates list, so this might be a no-op.
   115  	subQueryWasPredicate := false
   116  	for i, predicate := range outer.SeenPredicates {
   117  		if ctx.SemTable.EqualsExpr(predicate, subq.ExtractedSubquery) {
   118  			outer.SeenPredicates = append(outer.SeenPredicates[:i], outer.SeenPredicates[i+1:]...)
   119  
   120  			subQueryWasPredicate = true
   121  
   122  			// The `ExtractedSubquery` of an inner query is unique (due to the uniqueness of bind variable names)
   123  			// so we can stop after the first match.
   124  			break
   125  		}
   126  	}
   127  
   128  	err := outer.resetRoutingSelections(ctx)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  
   133  	if subQueryWasPredicate {
   134  		// Copy Vindex predicates from the inner route to the upper route.
   135  		// If we can route based on some of these predicates, the routing can improve
   136  		outer.VindexPreds = append(outer.VindexPreds, inner.VindexPreds...)
   137  
   138  		if inner.RouteOpCode == engine.None {
   139  			outer.setSelectNoneOpcode()
   140  		}
   141  	}
   142  
   143  	outer.MergedWith = append(outer.MergedWith, inner)
   144  
   145  	return outer, nil
   146  }
   147  
   148  func isMergeable(ctx *plancontext.PlanningContext, query sqlparser.SelectStatement, op ops.Operator) bool {
   149  	validVindex := func(expr sqlparser.Expr) bool {
   150  		sc := findColumnVindex(ctx, op, expr)
   151  		return sc != nil && sc.IsUnique()
   152  	}
   153  
   154  	if query.GetLimit() != nil {
   155  		return false
   156  	}
   157  
   158  	sel, ok := query.(*sqlparser.Select)
   159  	if !ok {
   160  		return false
   161  	}
   162  
   163  	if len(sel.GroupBy) > 0 {
   164  		// iff we are grouping, we need to check that we can perform the grouping inside a single shard, and we check that
   165  		// by checking that one of the grouping expressions used is a unique single column vindex.
   166  		// TODO: we could also support the case where all the columns of a multi-column vindex are used in the grouping
   167  		for _, gb := range sel.GroupBy {
   168  			if validVindex(gb) {
   169  				return true
   170  			}
   171  		}
   172  		return false
   173  	}
   174  
   175  	// if we have grouping, we have already checked that it's safe, and don't need to check for aggregations
   176  	// but if we don't have groupings, we need to check if there are aggregations that will mess with us
   177  	if sqlparser.ContainsAggregation(sel.SelectExprs) {
   178  		return false
   179  	}
   180  
   181  	if sqlparser.ContainsAggregation(sel.Having) {
   182  		return false
   183  	}
   184  
   185  	return true
   186  }
   187  
   188  func tryMergeSubQueryOp(
   189  	ctx *plancontext.PlanningContext,
   190  	outer, subq ops.Operator,
   191  	subQueryInner *SubQueryInner,
   192  	joinPredicates []sqlparser.Expr,
   193  	merger mergeFunc,
   194  ) (ops.Operator, error) {
   195  	switch outerOp := outer.(type) {
   196  	case *Filter:
   197  		op, err := tryMergeSubQueryOp(ctx, outerOp.Source, subq, subQueryInner, joinPredicates, merger)
   198  		if err != nil || op == nil {
   199  			return nil, err
   200  		}
   201  		outerOp.Source = op
   202  		return outerOp, nil
   203  	case *Route:
   204  		return tryMergeSubqueryWithRoute(ctx, subq, outerOp, joinPredicates, merger, subQueryInner)
   205  	case *ApplyJoin:
   206  		return tryMergeSubqueryWithJoin(ctx, subq, outerOp, joinPredicates, merger, subQueryInner)
   207  	default:
   208  		return nil, nil
   209  	}
   210  }
   211  
   212  func tryMergeSubqueryWithRoute(
   213  	ctx *plancontext.PlanningContext,
   214  	subq ops.Operator,
   215  	outerOp *Route,
   216  	joinPredicates []sqlparser.Expr,
   217  	merger mergeFunc,
   218  	subQueryInner *SubQueryInner,
   219  ) (ops.Operator, error) {
   220  	subqueryRoute, isRoute := subq.(*Route)
   221  	if !isRoute {
   222  		return nil, nil
   223  	}
   224  
   225  	if outerOp.RouteOpCode == engine.Reference && !subqueryRoute.IsSingleShard() {
   226  		return nil, nil
   227  	}
   228  
   229  	merged, err := tryMerge(ctx, outerOp, subq, joinPredicates, merger)
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  
   234  	// If the subqueries could be merged here, we're done
   235  	if merged != nil {
   236  		return merged, err
   237  	}
   238  
   239  	if !isMergeable(ctx, subQueryInner.ExtractedSubquery.Subquery.Select, subq) {
   240  		return nil, nil
   241  	}
   242  
   243  	// Special case: Inner query won't return any results / is not routable.
   244  	if subqueryRoute.RouteOpCode == engine.None {
   245  		merged, err := merger(outerOp, subqueryRoute)
   246  		if err != nil {
   247  			return nil, err
   248  		}
   249  		return merged, err
   250  	}
   251  
   252  	// Inner subqueries can be merged with the outer subquery as long as
   253  	// the inner query is a single column selection, and that single column has a matching
   254  	// vindex on the outer query's operand.
   255  	if canMergeSubqueryOnColumnSelection(ctx, outerOp, subqueryRoute, subQueryInner.ExtractedSubquery) {
   256  		merged, err := merger(outerOp, subqueryRoute)
   257  
   258  		if err != nil {
   259  			return nil, err
   260  		}
   261  
   262  		if merged != nil {
   263  			// since we inlined the subquery into the outer query, new vindex options might have been enabled,
   264  			// so we go over our current options to check if anything better has come up.
   265  			merged.PickBestAvailableVindex()
   266  			return merged, err
   267  		}
   268  	}
   269  	return nil, nil
   270  }
   271  
   272  func tryMergeSubqueryWithJoin(
   273  	ctx *plancontext.PlanningContext,
   274  	subq ops.Operator,
   275  	outerOp *ApplyJoin,
   276  	joinPredicates []sqlparser.Expr,
   277  	merger mergeFunc,
   278  	subQueryInner *SubQueryInner,
   279  ) (ops.PhysicalOperator, error) {
   280  	// Trying to merge the subquery with the left-hand or right-hand side of the join
   281  
   282  	if outerOp.LeftJoin {
   283  		return nil, nil
   284  	}
   285  	newMergefunc := func(a, b *Route) (*Route, error) {
   286  		rt, err := merger(a, b)
   287  		if err != nil {
   288  			return nil, err
   289  		}
   290  		outerOp.RHS, err = rewriteColumnsInSubqueryOpForJoin(ctx, outerOp.RHS, outerOp, subQueryInner)
   291  		return rt, err
   292  	}
   293  	merged, err := tryMergeSubQueryOp(ctx, outerOp.LHS, subq, subQueryInner, joinPredicates, newMergefunc)
   294  	if err != nil {
   295  		return nil, err
   296  	}
   297  	if merged != nil {
   298  		outerOp.LHS = merged
   299  		return outerOp, nil
   300  	}
   301  
   302  	newMergefunc = func(a, b *Route) (*Route, error) {
   303  		rt, err := merger(a, b)
   304  		if err != nil {
   305  			return nil, err
   306  		}
   307  		outerOp.LHS, err = rewriteColumnsInSubqueryOpForJoin(ctx, outerOp.LHS, outerOp, subQueryInner)
   308  		return rt, err
   309  	}
   310  	merged, err = tryMergeSubQueryOp(ctx, outerOp.RHS, subq, subQueryInner, joinPredicates, newMergefunc)
   311  	if err != nil {
   312  		return nil, err
   313  	}
   314  	if merged != nil {
   315  		outerOp.RHS = merged
   316  		return outerOp, nil
   317  	}
   318  	return nil, nil
   319  }
   320  
   321  // rewriteColumnsInSubqueryOpForJoin rewrites the columns that appear from the other side
   322  // of the join. For example, let's say we merged a subquery on the right side of a join tree
   323  // If it was using any columns from the left side then they need to be replaced by bind variables supplied
   324  // from that side.
   325  // outerTree is the joinTree within whose children the subquery lives in
   326  // the child of joinTree which does not contain the subquery is the otherTree
   327  func rewriteColumnsInSubqueryOpForJoin(
   328  	ctx *plancontext.PlanningContext,
   329  	innerOp ops.Operator,
   330  	outerTree *ApplyJoin,
   331  	subQueryInner *SubQueryInner,
   332  ) (ops.Operator, error) {
   333  	resultInnerOp := innerOp
   334  	var rewriteError error
   335  	// go over the entire expression in the subquery
   336  	sqlparser.SafeRewrite(subQueryInner.ExtractedSubquery.Original, nil, func(cursor *sqlparser.Cursor) bool {
   337  		node, ok := cursor.Node().(*sqlparser.ColName)
   338  		if !ok {
   339  			return true
   340  		}
   341  
   342  		// check whether the column name belongs to the other side of the join tree
   343  		if !ctx.SemTable.RecursiveDeps(node).IsSolvedBy(TableID(resultInnerOp)) {
   344  			return true
   345  		}
   346  
   347  		// get the bindVariable for that column name and replace it in the subquery
   348  		bindVar := ctx.ReservedVars.ReserveColName(node)
   349  		cursor.Replace(sqlparser.NewArgument(bindVar))
   350  		// check whether the bindVariable already exists in the joinVars of the other tree
   351  		_, alreadyExists := outerTree.Vars[bindVar]
   352  		if alreadyExists {
   353  			return true
   354  		}
   355  		// if it does not exist, then push this as an output column there and add it to the joinVars
   356  		offset, err := resultInnerOp.AddColumn(ctx, node)
   357  		if err != nil {
   358  			rewriteError = err
   359  			return false
   360  		}
   361  		outerTree.Vars[bindVar] = offset
   362  		return true
   363  	})
   364  
   365  	// update the dependencies for the subquery by removing the dependencies from the innerOp
   366  	tableSet := ctx.SemTable.Direct[subQueryInner.ExtractedSubquery.Subquery]
   367  	ctx.SemTable.Direct[subQueryInner.ExtractedSubquery.Subquery] = tableSet.Remove(TableID(resultInnerOp))
   368  	tableSet = ctx.SemTable.Recursive[subQueryInner.ExtractedSubquery.Subquery]
   369  	ctx.SemTable.Recursive[subQueryInner.ExtractedSubquery.Subquery] = tableSet.Remove(TableID(resultInnerOp))
   370  
   371  	// return any error while rewriting
   372  	return resultInnerOp, rewriteError
   373  }
   374  
   375  func createCorrelatedSubqueryOp(
   376  	ctx *plancontext.PlanningContext,
   377  	innerOp, outerOp ops.Operator,
   378  	preds []sqlparser.Expr,
   379  	extractedSubquery *sqlparser.ExtractedSubquery,
   380  ) (*CorrelatedSubQueryOp, error) {
   381  	newOuter, err := RemovePredicate(ctx, extractedSubquery, outerOp)
   382  	if err != nil {
   383  		return nil, vterrors.VT12001("EXISTS sub-queries are only supported with AND clause")
   384  	}
   385  
   386  	resultOuterOp := newOuter
   387  	vars := map[string]int{}
   388  	bindVars := map[*sqlparser.ColName]string{}
   389  	var lhsCols []*sqlparser.ColName
   390  	for _, pred := range preds {
   391  		var rewriteError error
   392  		sqlparser.SafeRewrite(pred, nil, func(cursor *sqlparser.Cursor) bool {
   393  			node, ok := cursor.Node().(*sqlparser.ColName)
   394  			if !ok {
   395  				return true
   396  			}
   397  
   398  			nodeDeps := ctx.SemTable.RecursiveDeps(node)
   399  			if !nodeDeps.IsSolvedBy(TableID(resultOuterOp)) {
   400  				return true
   401  			}
   402  
   403  			// check whether the bindVariable already exists in the map
   404  			// we do so by checking that the column names are the same and their recursive dependencies are the same
   405  			// so the column names `user.a` and `a` would be considered equal as long as both are bound to the same table
   406  			for colName, bindVar := range bindVars {
   407  				if ctx.SemTable.EqualsExpr(node, colName) {
   408  					cursor.Replace(sqlparser.NewArgument(bindVar))
   409  					return true
   410  				}
   411  			}
   412  
   413  			// get the bindVariable for that column name and replace it in the predicate
   414  			bindVar := ctx.ReservedVars.ReserveColName(node)
   415  			cursor.Replace(sqlparser.NewArgument(bindVar))
   416  			// store it in the map for future comparisons
   417  			bindVars[node] = bindVar
   418  
   419  			// if it does not exist, then push this as an output column in the outerOp and add it to the joinVars
   420  			offset, err := resultOuterOp.AddColumn(ctx, node)
   421  			if err != nil {
   422  				rewriteError = err
   423  				return true
   424  			}
   425  			lhsCols = append(lhsCols, node)
   426  			vars[bindVar] = offset
   427  			return true
   428  		})
   429  		if rewriteError != nil {
   430  			return nil, rewriteError
   431  		}
   432  		var err error
   433  		innerOp, err = innerOp.AddPredicate(ctx, pred)
   434  		if err != nil {
   435  			return nil, err
   436  		}
   437  	}
   438  	return &CorrelatedSubQueryOp{
   439  		Outer:      resultOuterOp,
   440  		Inner:      innerOp,
   441  		Extracted:  extractedSubquery,
   442  		Vars:       vars,
   443  		LHSColumns: lhsCols,
   444  	}, nil
   445  }
   446  
   447  // canMergeSubqueryOnColumnSelection will return true if the predicate used allows us to merge the two subqueries
   448  // into a single Route. This can be done if we are comparing two columns that contain data that is guaranteed
   449  // to exist on the same shard.
   450  func canMergeSubqueryOnColumnSelection(ctx *plancontext.PlanningContext, a, b *Route, predicate *sqlparser.ExtractedSubquery) bool {
   451  	left := predicate.OtherSide
   452  	opCode := predicate.OpCode
   453  	if opCode != int(engine.PulloutValue) && opCode != int(engine.PulloutIn) {
   454  		return false
   455  	}
   456  
   457  	lVindex := findColumnVindex(ctx, a, left)
   458  	if lVindex == nil || !lVindex.IsUnique() {
   459  		return false
   460  	}
   461  
   462  	rightSelection := extractSingleColumnSubquerySelection(predicate.Subquery)
   463  	if rightSelection == nil {
   464  		return false
   465  	}
   466  
   467  	rVindex := findColumnVindex(ctx, b, rightSelection)
   468  	if rVindex == nil {
   469  		return false
   470  	}
   471  	return rVindex == lVindex
   472  }
   473  
   474  // Searches for the single column returned from a subquery, like the `col` in `(SELECT col FROM tbl)`
   475  func extractSingleColumnSubquerySelection(subquery *sqlparser.Subquery) *sqlparser.ColName {
   476  	if subquery.Select.GetColumnCount() != 1 {
   477  		return nil
   478  	}
   479  
   480  	columnExpr := subquery.Select.GetColumns()[0]
   481  
   482  	aliasedExpr, ok := columnExpr.(*sqlparser.AliasedExpr)
   483  	if !ok {
   484  		return nil
   485  	}
   486  
   487  	return getColName(aliasedExpr.Expr)
   488  }