github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/subquery.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package optbuilder
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/server/telemetry"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  const multiRowSubqueryErrText = "more than one row returned by a subquery used as an expression"
    28  
    29  // subquery represents a subquery expression in an expression tree
    30  // after it has been type-checked and added to the memo.
    31  type subquery struct {
    32  	// Subquery is the AST Subquery expression.
    33  	*tree.Subquery
    34  
    35  	// cols contains the output columns of the subquery.
    36  	cols []scopeColumn
    37  
    38  	// node is the top level memo node of the subquery.
    39  	node memo.RelExpr
    40  
    41  	// ordering is the ordering requested by the subquery.
    42  	// It is only consulted in certain cases, however (such as the
    43  	// ArrayFlatten operation).
    44  	ordering opt.Ordering
    45  
    46  	// wrapInTuple is true if the subquery return type should be wrapped in a
    47  	// tuple. This is true for subqueries that may return multiple rows in
    48  	// comparison expressions (e.g., IN, ANY, ALL) and EXISTS expressions.
    49  	wrapInTuple bool
    50  
    51  	// typ is the lazily resolved type of the subquery.
    52  	typ *types.T
    53  
    54  	// outerCols stores the set of outer columns in the subquery. These are
    55  	// columns which are referenced within the subquery but are bound in an
    56  	// outer scope.
    57  	outerCols opt.ColSet
    58  
    59  	// desiredNumColumns specifies the desired number of columns for the subquery.
    60  	// Specifying -1 for desiredNumColumns allows the subquery to return any
    61  	// number of columns and is used when the normal type checking machinery will
    62  	// verify that the correct number of columns is returned.
    63  	desiredNumColumns int
    64  
    65  	// extraColsAllowed indicates that extra columns built from the subquery
    66  	// (such as columns for which orderings have been requested) will not be
    67  	// stripped away.
    68  	extraColsAllowed bool
    69  
    70  	// scope is the input scope of the subquery. It is needed to lazily build
    71  	// the subquery in TypeCheck.
    72  	scope *scope
    73  }
    74  
    75  // isMultiRow returns whether the subquery can return multiple rows.
    76  func (s *subquery) isMultiRow() bool {
    77  	return s.wrapInTuple && !s.Exists
    78  }
    79  
    80  // Walk is part of the tree.Expr interface.
    81  func (s *subquery) Walk(v tree.Visitor) tree.Expr {
    82  	return s
    83  }
    84  
    85  // TypeCheck is part of the tree.Expr interface.
    86  func (s *subquery) TypeCheck(
    87  	_ context.Context, _ *tree.SemaContext, desired *types.T,
    88  ) (tree.TypedExpr, error) {
    89  	if s.typ != nil {
    90  		return s, nil
    91  	}
    92  
    93  	// Convert desired to an array of desired types for building the subquery.
    94  	desiredTypes := desired.TupleContents()
    95  
    96  	// Build the subquery. We cannot build the subquery earlier because we do
    97  	// not know the desired types until TypeCheck is called.
    98  	s.buildSubquery(desiredTypes)
    99  
   100  	// The typing for subqueries is complex, but regular.
   101  	//
   102  	// * If the subquery is part of an EXISTS statement:
   103  	//
   104  	//   The type of the subquery is always "bool".
   105  	//
   106  	// * If the subquery is used in a single-row context:
   107  	//
   108  	//   - If the subquery returns a single column with type "U", the type of the
   109  	//     subquery is the type of the column "U". For example:
   110  	//
   111  	//       SELECT 1 = (SELECT 1)
   112  	//
   113  	//     The type of the subquery is "int".
   114  	//
   115  	//   - If the subquery returns multiple columns, the type of the subquery is
   116  	//     "tuple{C}" where "C" expands to all of the types of the columns of the
   117  	//     subquery. For example:
   118  	//
   119  	//       SELECT (1, 'a') = (SELECT 1, 'a')
   120  	//
   121  	//     The type of the subquery is "tuple{int,string}"
   122  	//
   123  	// * If the subquery is used in a multi-row context:
   124  	//
   125  	//   - If the subquery returns a single column with type "U", the type of the
   126  	//     subquery is the singleton tuple of type "U": "tuple{U}". For example:
   127  	//
   128  	//       SELECT 1 IN (SELECT 1)
   129  	//
   130  	//     The type of the subquery's columns is "int" and the type of the
   131  	//     subquery is "tuple{int}".
   132  	//
   133  	//   - If the subquery returns multiple columns, the type of the subquery is
   134  	//     "tuple{tuple{C}}" where "C expands to all of the types of the columns
   135  	//     of the subquery. For example:
   136  	//
   137  	//       SELECT (1, 'a') IN (SELECT 1, 'a')
   138  	//
   139  	//     The types of the subquery's columns are "int" and "string". These are
   140  	//     wrapped into "tuple{int,string}" to form the row type. And these are
   141  	//     wrapped again to form the subquery type "tuple{tuple{int,string}}".
   142  	//
   143  	// Note that these rules produce a somewhat surprising equivalence:
   144  	//
   145  	//   SELECT (SELECT 1, 2) = (SELECT (1, 2))
   146  	//
   147  	// A subquery which returns a single column tuple is equivalent to a subquery
   148  	// which returns the elements of the tuple as individual columns. While
   149  	// surprising, this is necessary for regularity and in order to handle:
   150  	//
   151  	//   SELECT 1 IN (SELECT 1)
   152  	//
   153  	// Without that auto-unwrapping of single-column subqueries, this query would
   154  	// type check as "<int> IN <tuple{tuple{int}}>" which would fail.
   155  
   156  	if s.Exists {
   157  		s.typ = types.Bool
   158  		return s, nil
   159  	}
   160  
   161  	if len(s.cols) == 1 {
   162  		s.typ = s.cols[0].typ
   163  	} else {
   164  		contents := make([]*types.T, len(s.cols))
   165  		labels := make([]string, len(s.cols))
   166  		for i := range s.cols {
   167  			contents[i] = s.cols[i].typ
   168  			labels[i] = string(s.cols[i].name)
   169  		}
   170  		s.typ = types.MakeLabeledTuple(contents, labels)
   171  	}
   172  
   173  	if s.wrapInTuple {
   174  		// The subquery is in a multi-row context. For example:
   175  		//
   176  		//   SELECT 1 IN (SELECT * FROM t)
   177  		//
   178  		// Wrap the type in a tuple.
   179  		//
   180  		// TODO(peter): Using a tuple type to represent a multi-row
   181  		// subquery works with the current type checking code, but seems
   182  		// semantically incorrect. A tuple represents a fixed number of
   183  		// elements. Instead, we should introduce a new vtuple type.
   184  		s.typ = types.MakeTuple([]*types.T{s.typ})
   185  	}
   186  
   187  	return s, nil
   188  }
   189  
   190  // ResolvedType is part of the tree.TypedExpr interface.
   191  func (s *subquery) ResolvedType() *types.T {
   192  	return s.typ
   193  }
   194  
   195  // Eval is part of the tree.TypedExpr interface.
   196  func (s *subquery) Eval(_ *tree.EvalContext) (tree.Datum, error) {
   197  	panic(errors.AssertionFailedf("subquery must be replaced before evaluation"))
   198  }
   199  
   200  // buildSubquery builds a relational expression that represents this subquery.
   201  // It stores the resulting relational expression in s.node, and also updates
   202  // s.cols and s.ordering with the output columns and ordering of the subquery.
   203  func (s *subquery) buildSubquery(desiredTypes []*types.T) {
   204  	if s.scope.replaceSRFs {
   205  		// We need to save and restore the previous value of the replaceSRFs field in
   206  		// case we are recursively called within a subquery context.
   207  		defer func() { s.scope.replaceSRFs = true }()
   208  		s.scope.replaceSRFs = false
   209  	}
   210  
   211  	// Save and restore the previous value of s.builder.subquery in case we are
   212  	// recursively called within a subquery context.
   213  	outer := s.scope.builder.subquery
   214  	defer func() { s.scope.builder.subquery = outer }()
   215  	s.scope.builder.subquery = s
   216  
   217  	// We must push() here so that the columns in s.scope are correctly identified
   218  	// as outer columns.
   219  	outScope := s.scope.builder.buildStmt(s.Subquery.Select, desiredTypes, s.scope.push())
   220  	ord := outScope.ordering
   221  
   222  	// Treat the subquery result as an anonymous data source (i.e. column names
   223  	// are not qualified). Remove hidden columns, as they are not accessible
   224  	// outside the subquery.
   225  	outScope.setTableAlias("")
   226  	outScope.removeHiddenCols()
   227  
   228  	if s.desiredNumColumns > 0 && len(outScope.cols) != s.desiredNumColumns {
   229  		n := len(outScope.cols)
   230  		switch s.desiredNumColumns {
   231  		case 1:
   232  			panic(pgerror.Newf(pgcode.Syntax,
   233  				"subquery must return only one column, found %d", n))
   234  		default:
   235  			panic(pgerror.Newf(pgcode.Syntax,
   236  				"subquery must return %d columns, found %d", s.desiredNumColumns, n))
   237  		}
   238  	}
   239  
   240  	if len(outScope.extraCols) > 0 && !s.extraColsAllowed {
   241  		// We need to add a projection to remove the extra columns.
   242  		projScope := outScope.push()
   243  		projScope.appendColumnsFromScope(outScope)
   244  		projScope.expr = s.scope.builder.constructProject(outScope.expr.(memo.RelExpr), projScope.cols)
   245  		outScope = projScope
   246  	}
   247  
   248  	s.cols = outScope.cols
   249  	s.node = outScope.expr.(memo.RelExpr)
   250  	s.ordering = ord
   251  }
   252  
   253  // buildSubqueryProjection ensures that a subquery returns exactly one column.
   254  // If the original subquery has more than one column, buildSubqueryProjection
   255  // wraps it in a projection which has a single tuple column containing all the
   256  // original columns: tuple{col1, col2...}.
   257  func (b *Builder) buildSubqueryProjection(
   258  	s *subquery, inScope *scope,
   259  ) (out memo.RelExpr, outScope *scope) {
   260  	out = s.node
   261  	outScope = inScope.replace()
   262  
   263  	switch len(s.cols) {
   264  	case 0:
   265  		// This can be obtained with:
   266  		// CREATE TABLE t(x INT); ALTER TABLE t DROP COLUMN x;
   267  		// SELECT (SELECT * FROM t) = (SELECT * FROM t);
   268  		panic(pgerror.Newf(pgcode.Syntax,
   269  			"subquery must return only one column"))
   270  
   271  	case 1:
   272  		outScope.cols = append(outScope.cols, s.cols[0])
   273  
   274  	default:
   275  		// Wrap the subquery in a projection with a single column.
   276  		// col1, col2... from the subquery becomes tuple{col1, col2...} in the
   277  		// projection.
   278  		cols := make(tree.Exprs, len(s.cols))
   279  		els := make(memo.ScalarListExpr, len(s.cols))
   280  		contents := make([]*types.T, len(s.cols))
   281  		for i := range s.cols {
   282  			cols[i] = &s.cols[i]
   283  			contents[i] = s.cols[i].ResolvedType()
   284  			els[i] = b.factory.ConstructVariable(s.cols[i].id)
   285  		}
   286  		typ := types.MakeTuple(contents)
   287  
   288  		texpr := tree.NewTypedTuple(typ, cols)
   289  		tup := b.factory.ConstructTuple(els, typ)
   290  		col := b.synthesizeColumn(outScope, "", texpr.ResolvedType(), texpr, tup)
   291  		out = b.constructProject(out, []scopeColumn{*col})
   292  	}
   293  
   294  	telemetry.Inc(sqltelemetry.SubqueryUseCounter)
   295  
   296  	return out, outScope
   297  }
   298  
   299  // buildSingleRowSubquery builds a set of memo groups that represent the given
   300  // subquery. This function should only be called for subqueries in a single-row
   301  // context, such as `SELECT (1, 'a') = (SELECT 1, 'a')`.
   302  //
   303  // See Builder.buildStmt for a description of the remaining input and
   304  // return values.
   305  func (b *Builder) buildSingleRowSubquery(
   306  	s *subquery, inScope *scope,
   307  ) (out opt.ScalarExpr, outScope *scope) {
   308  	subqueryPrivate := memo.SubqueryPrivate{OriginalExpr: s.Subquery}
   309  	if s.Exists {
   310  		return b.factory.ConstructExists(s.node, &subqueryPrivate), inScope
   311  	}
   312  
   313  	var input memo.RelExpr
   314  	input, outScope = b.buildSubqueryProjection(s, inScope)
   315  
   316  	// Wrap the subquery in a Max1Row operator to enforce that it should return
   317  	// at most one row. Max1Row may be removed by the optimizer later if it can
   318  	// prove statically that the subquery always returns at most one row.
   319  	input = b.factory.ConstructMax1Row(input, multiRowSubqueryErrText)
   320  
   321  	out = b.factory.ConstructSubquery(input, &subqueryPrivate)
   322  	return out, outScope
   323  }
   324  
   325  // buildMultiRowSubquery transforms a multi-row subquery into a single-row
   326  // subquery for IN, NOT IN, ANY, SOME and ALL expressions. It performs the
   327  // transformation using the Any operator, which returns true if any of the
   328  // values returned by the subquery are true, else returns NULL if any of the
   329  // values are NULL, else returns false.
   330  //
   331  // We use the following transformations:
   332  //
   333  //   <var> IN (<subquery>)
   334  //     ==> ConstructAny(<subquery>, <var>, EqOp)
   335  //
   336  //   <var> NOT IN (<subquery>)
   337  //    ==> ConstructNot(ConstructAny(<subquery>, <var>, EqOp))
   338  //
   339  //   <var> <comp> {SOME|ANY}(<subquery>)
   340  //     ==> ConstructAny(<subquery>, <var>, <comp>)
   341  //
   342  //   <var> <comp> ALL(<subquery>)
   343  //     ==> ConstructNot(ConstructAny(<subquery>, <var>, Negate(<comp>)))
   344  //
   345  func (b *Builder) buildMultiRowSubquery(
   346  	c *tree.ComparisonExpr, inScope *scope, colRefs *opt.ColSet,
   347  ) (out opt.ScalarExpr, outScope *scope) {
   348  	var input memo.RelExpr
   349  	s := c.Right.(*subquery)
   350  	input, outScope = b.buildSubqueryProjection(s, inScope)
   351  
   352  	scalar := b.buildScalar(c.TypedLeft(), inScope, nil, nil, colRefs)
   353  	outScope = outScope.replace()
   354  
   355  	var cmp opt.Operator
   356  	switch c.Operator {
   357  	case tree.In, tree.NotIn:
   358  		// <var> = x
   359  		cmp = opt.EqOp
   360  
   361  	case tree.Any, tree.Some, tree.All:
   362  		// <var> <comp> x
   363  		cmp = opt.ComparisonOpMap[c.SubOperator]
   364  		if c.Operator == tree.All {
   365  			// NOT(<var> <comp> x)
   366  			cmp = opt.NegateOpMap[cmp]
   367  		}
   368  
   369  	default:
   370  		panic(errors.AssertionFailedf(
   371  			"buildMultiRowSubquery called with operator %v", c.Operator,
   372  		))
   373  	}
   374  
   375  	// Construct the outer Any(...) operator.
   376  	out = b.factory.ConstructAny(input, scalar, &memo.SubqueryPrivate{
   377  		Cmp:          cmp,
   378  		OriginalExpr: s.Subquery,
   379  	})
   380  	switch c.Operator {
   381  	case tree.NotIn, tree.All:
   382  		// NOT Any(...)
   383  		out = b.factory.ConstructNot(out)
   384  	}
   385  
   386  	return out, outScope
   387  }
   388  
   389  var _ tree.Expr = &subquery{}
   390  var _ tree.TypedExpr = &subquery{}
   391  
   392  // SubqueryExpr implements the SubqueryExpr interface.
   393  func (*subquery) SubqueryExpr() {}
   394  
   395  var _ tree.SubqueryExpr = &subquery{}