github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/groupby.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/groupby.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package optbuilder
    12  
    13  // This file has builder code specific to aggregations (queries with GROUP BY,
    14  // HAVING, or aggregate functions).
    15  //
    16  // We build such queries using three operators:
    17  //
    18  //  - a pre-projection: a ProjectOp which generates the columns needed for
    19  //    the aggregation:
    20  //      - group by expressions
    21  //      - arguments to aggregation functions
    22  //
    23  //  - the aggregation: a GroupByOp which has the pre-projection as the
    24  //    input and produces columns with the results of the aggregation
    25  //    functions. The group by columns are also passed through.
    26  //
    27  //  - a post-projection: calculates expressions using the results of the
    28  //    aggregations; this is analogous to the ProjectOp that we would use for a
    29  //    no-aggregation Select.
    30  //
    31  // For example:
    32  //   SELECT 1 + MIN(v*2) FROM kv GROUP BY k+3
    33  //
    34  //   pre-projection:  k+3 (as col1), v*2 (as col2)
    35  //   aggregation:     group by col1, calculate MIN(col2) (as col3)
    36  //   post-projection: 1 + col3
    37  
    38  import (
    39  	"context"
    40  
    41  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    42  	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
    43  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    44  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    45  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    46  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    47  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    48  	"github.com/cockroachdb/errors"
    49  )
    50  
    51  // groupby information stored in scopes.
    52  type groupby struct {
    53  	// We use two scopes:
    54  	//   - aggInScope contains columns that are used as input by the
    55  	//     GroupBy operator, specifically:
    56  	//      - columns for the arguments to aggregate functions
    57  	//      - grouping columns (from the GROUP BY)
    58  	//     The grouping columns always come second.
    59  	//
    60  	//   - aggOutScope contains columns that are produced by the GroupBy operator,
    61  	//     specifically:
    62  	//      - columns for the results of the aggregate functions.
    63  	//      - the grouping columns
    64  	//
    65  	// For example:
    66  	//
    67  	//   SELECT 1 + MIN(v*2) FROM kv GROUP BY k+3
    68  	//
    69  	//   aggInScope:   v*2 (as col2), k+3 (as col1)
    70  	//   aggOutScope:  MIN(col2) (as col3), k+3 (as col1)
    71  	//
    72  	// Any aggregate functions which contain column references to this scope
    73  	// trigger the creation of new grouping columns in the grouping scope. In
    74  	// addition, if an aggregate function contains no column references, then the
    75  	// aggregate will be added to the "nearest" grouping scope. For example:
    76  	//   SELECT MAX(1) FROM t1
    77  	//
    78  	// TODO(radu): we aren't really using these as scopes, just as scopeColumn
    79  	// containers. Perhaps separate the necessary functionality in a separate
    80  	// structure and pass that instead of scopes.
    81  	aggInScope  *scope
    82  	aggOutScope *scope
    83  
    84  	// aggs contains information about aggregate functions encountered.
    85  	aggs []aggregateInfo
    86  
    87  	// groupStrs contains a string representation of each GROUP BY expression
    88  	// using symbolic notation. These strings are used to determine if SELECT
    89  	// and HAVING expressions contain sub-expressions matching a GROUP BY
    90  	// expression. This enables queries such as:
    91  	//   SELECT x+y FROM t GROUP BY x+y
    92  	// but not:
    93  	//   SELECT x+y FROM t GROUP BY y+x
    94  	//
    95  	// Each string maps to the grouping column in an aggOutScope scope that
    96  	// projects that expression.
    97  	groupStrs groupByStrSet
    98  
    99  	// buildingGroupingCols is true while the grouping columns are being built.
   100  	// It is used to ensure that the builder does not throw a grouping error
   101  	// prematurely.
   102  	buildingGroupingCols bool
   103  }
   104  
   105  // groupByStrSet is a set of stringified GROUP BY expressions that map to the
   106  // grouping column in an aggOutScope scope that projects that expression. It
   107  // is used to enforce scoping rules, since any non-aggregate, variable
   108  // expression in the SELECT list must be a GROUP BY expression or be composed
   109  // of GROUP BY expressions. For example, this query is legal:
   110  //
   111  //   SELECT COUNT(*), k + v FROM kv GROUP by k, v
   112  //
   113  // but this query is not:
   114  //
   115  //   SELECT COUNT(*), k + v FROM kv GROUP BY k - v
   116  //
   117  type groupByStrSet map[string]*scopeColumn
   118  
   119  // hasNonCommutativeAggregates checks whether any of the aggregates are
   120  // non-commutative or ordering sensitive.
   121  func (g *groupby) hasNonCommutativeAggregates() bool {
   122  	for i := range g.aggs {
   123  		if !g.aggs[i].isCommutative() {
   124  			return true
   125  		}
   126  	}
   127  	return false
   128  }
   129  
   130  // groupingCols returns the columns in the aggInScope corresponding to grouping
   131  // columns.
   132  func (g *groupby) groupingCols() []scopeColumn {
   133  	// Grouping cols are always clustered at the end of the column list.
   134  	return g.aggInScope.cols[len(g.aggInScope.cols)-len(g.groupStrs):]
   135  }
   136  
   137  // getAggregateArgCols returns the columns in the aggInScope corresponding to
   138  // arguments to aggregate functions. If the aggregate has a filter, the column
   139  // corresponding to the filter's input will immediately follow the arguments.
   140  func (g *groupby) aggregateArgCols() []scopeColumn {
   141  	return g.aggInScope.cols[:len(g.aggInScope.cols)-len(g.groupStrs)]
   142  }
   143  
   144  // getAggregateResultCols returns the columns in the aggOutScope corresponding
   145  // to aggregate functions.
   146  func (g *groupby) aggregateResultCols() []scopeColumn {
   147  	// Aggregates are always clustered at the beginning of the column list, in
   148  	// the same order as s.groupby.aggs.
   149  	return g.aggOutScope.cols[:len(g.aggs)]
   150  }
   151  
   152  // hasAggregates returns true if the enclosing scope has aggregate functions.
   153  func (g *groupby) hasAggregates() bool {
   154  	return len(g.aggs) > 0
   155  }
   156  
   157  // findAggregate finds the given aggregate among the bound variables
   158  // in this scope. Returns nil if the aggregate is not found.
   159  func (g *groupby) findAggregate(agg aggregateInfo) *scopeColumn {
   160  	if g.aggs == nil {
   161  		return nil
   162  	}
   163  
   164  	for i, a := range g.aggs {
   165  		// Find an existing aggregate that uses the same function overload.
   166  		if a.def.Overload == agg.def.Overload && a.distinct == agg.distinct && a.filter == agg.filter {
   167  			// Now check that the arguments are identical.
   168  			if len(a.args) == len(agg.args) {
   169  				match := true
   170  				for j, arg := range a.args {
   171  					if arg != agg.args[j] {
   172  						match = false
   173  						break
   174  					}
   175  				}
   176  
   177  				// If agg is ordering sensitive, check if the orders match as well.
   178  				if match && !agg.isCommutative() {
   179  					if len(a.OrderBy) != len(agg.OrderBy) {
   180  						match = false
   181  					} else {
   182  						for j := range a.OrderBy {
   183  							if !a.OrderBy[j].Equal(agg.OrderBy[j]) {
   184  								match = false
   185  								break
   186  							}
   187  						}
   188  					}
   189  				}
   190  
   191  				if match {
   192  					// Aggregate already exists, so return information about the
   193  					// existing column that computes it.
   194  					return &g.aggregateResultCols()[i]
   195  				}
   196  			}
   197  		}
   198  	}
   199  
   200  	return nil
   201  }
   202  
   203  // aggregateInfo stores information about an aggregation function call.
   204  type aggregateInfo struct {
   205  	*tree.FuncExpr
   206  
   207  	def      memo.FunctionPrivate
   208  	distinct bool
   209  	args     memo.ScalarListExpr
   210  	filter   opt.ScalarExpr
   211  
   212  	// col is the output column of the aggregation.
   213  	col *scopeColumn
   214  
   215  	// colRefs contains the set of columns referenced by the arguments of the
   216  	// aggregation. It is used to determine the appropriate scope for this
   217  	// aggregation.
   218  	colRefs opt.ColSet
   219  }
   220  
   221  // Walk is part of the tree.Expr interface.
   222  func (a *aggregateInfo) Walk(v tree.Visitor) tree.Expr {
   223  	return a
   224  }
   225  
   226  // TypeCheck is part of the tree.Expr interface.
   227  func (a *aggregateInfo) TypeCheck(
   228  	ctx context.Context, semaCtx *tree.SemaContext, desired *types.T,
   229  ) (tree.TypedExpr, error) {
   230  	if _, err := a.FuncExpr.TypeCheck(ctx, semaCtx, desired); err != nil {
   231  		return nil, err
   232  	}
   233  	return a, nil
   234  }
   235  
   236  // isOrderedSetAggregate returns true if the given aggregate operator is an
   237  // ordered-set aggregate.
   238  func (a aggregateInfo) isOrderedSetAggregate() bool {
   239  	switch a.def.Name {
   240  	case "percentile_disc_impl", "percentile_cont_impl":
   241  		return true
   242  	default:
   243  		return false
   244  	}
   245  }
   246  
   247  // isOrderingSensitive returns true if the given aggregate operator is
   248  // ordering sensitive. That is, it can give different results based on the order
   249  // values are fed to it.
   250  func (a aggregateInfo) isOrderingSensitive() bool {
   251  	if a.isOrderedSetAggregate() {
   252  		return true
   253  	}
   254  	switch a.def.Name {
   255  	case "array_agg", "concat_agg", "string_agg", "json_agg", "jsonb_agg":
   256  		return true
   257  	default:
   258  		return false
   259  	}
   260  }
   261  
   262  // isCommutative checks whether the aggregate is commutative. That is, if it is
   263  // ordering insensitive or if no ordering is specified.
   264  func (a aggregateInfo) isCommutative() bool {
   265  	return a.OrderBy == nil || !a.isOrderingSensitive()
   266  }
   267  
   268  // Eval is part of the tree.TypedExpr interface.
   269  func (a *aggregateInfo) Eval(_ *tree.EvalContext) (tree.Datum, error) {
   270  	panic(errors.AssertionFailedf("aggregateInfo must be replaced before evaluation"))
   271  }
   272  
   273  var _ tree.Expr = &aggregateInfo{}
   274  var _ tree.TypedExpr = &aggregateInfo{}
   275  
   276  func (b *Builder) needsAggregation(sel *tree.SelectClause, scope *scope) bool {
   277  	// We have an aggregation if:
   278  	//  - we have a GROUP BY, or
   279  	//  - we have a HAVING clause, or
   280  	//  - we have aggregate functions in the SELECT, DISTINCT ON and/or ORDER BY expressions.
   281  	return len(sel.GroupBy) > 0 ||
   282  		sel.Having != nil ||
   283  		(scope.groupby != nil && scope.groupby.hasAggregates())
   284  }
   285  
   286  func (b *Builder) constructGroupBy(
   287  	input memo.RelExpr, groupingColSet opt.ColSet, aggCols []scopeColumn, ordering opt.Ordering,
   288  ) memo.RelExpr {
   289  	aggs := make(memo.AggregationsExpr, 0, len(aggCols))
   290  
   291  	// Deduplicate the columns; we don't need to produce the same aggregation
   292  	// multiple times.
   293  	colSet := opt.ColSet{}
   294  	for i := range aggCols {
   295  		if id, scalar := aggCols[i].id, aggCols[i].scalar; !colSet.Contains(id) {
   296  			if scalar == nil {
   297  				// A "pass through" column (i.e. a VariableOp) is not legal as an
   298  				// aggregation.
   299  				panic(errors.AssertionFailedf("variable as aggregation"))
   300  			}
   301  			aggs = append(aggs, b.factory.ConstructAggregationsItem(scalar, id))
   302  			colSet.Add(id)
   303  		}
   304  	}
   305  
   306  	private := memo.GroupingPrivate{GroupingCols: groupingColSet}
   307  
   308  	// The ordering of the GROUP BY is inherited from the input. This ordering is
   309  	// only useful for intra-group ordering (for order-sensitive aggregations like
   310  	// ARRAY_AGG). So we add the grouping columns as optional columns.
   311  	private.Ordering.FromOrderingWithOptCols(ordering, groupingColSet)
   312  
   313  	if groupingColSet.Empty() {
   314  		return b.factory.ConstructScalarGroupBy(input, aggs, &private)
   315  	}
   316  	return b.factory.ConstructGroupBy(input, aggs, &private)
   317  }
   318  
   319  // buildGroupingColumns builds the grouping columns and adds them to the
   320  // groupby scopes that will be used to build the aggregation expression.
   321  // Returns the slice of grouping columns.
   322  func (b *Builder) buildGroupingColumns(sel *tree.SelectClause, projectionsScope, fromScope *scope) {
   323  	if fromScope.groupby == nil {
   324  		fromScope.initGrouping()
   325  	}
   326  	g := fromScope.groupby
   327  
   328  	// The "from" columns are visible to any grouping expressions.
   329  	b.buildGroupingList(sel.GroupBy, sel.Exprs, projectionsScope, fromScope)
   330  
   331  	// Copy the grouping columns to the aggOutScope.
   332  	g.aggOutScope.appendColumns(g.groupingCols())
   333  }
   334  
   335  // buildAggregation builds the aggregation operators and constructs the
   336  // GroupBy expression. Returns the output scope for the aggregation operation.
   337  func (b *Builder) buildAggregation(having opt.ScalarExpr, fromScope *scope) (outScope *scope) {
   338  	g := fromScope.groupby
   339  
   340  	groupingCols := g.groupingCols()
   341  
   342  	// Build ColSet of grouping columns.
   343  	var groupingColSet opt.ColSet
   344  	for i := range groupingCols {
   345  		groupingColSet.Add(groupingCols[i].id)
   346  	}
   347  
   348  	// If there are any aggregates that are ordering sensitive, build the
   349  	// aggregations as window functions over each group.
   350  	if g.hasNonCommutativeAggregates() {
   351  		return b.buildAggregationAsWindow(groupingColSet, having, fromScope)
   352  	}
   353  
   354  	aggInfos := g.aggs
   355  
   356  	// Construct the aggregation operators.
   357  	haveOrderingSensitiveAgg := false
   358  	aggCols := g.aggregateResultCols()
   359  	argCols := g.aggregateArgCols()
   360  	var fromCols opt.ColSet
   361  	if b.subquery != nil {
   362  		// Only calculate the set of fromScope columns if it will be used below.
   363  		fromCols = fromScope.colSet()
   364  	}
   365  	for i, agg := range aggInfos {
   366  		// First accumulate the arguments to the aggregate function. These are
   367  		// always variables referencing columns in the GroupBy input expression,
   368  		// except in the case of string_agg, where the second argument must be
   369  		// a constant expression.
   370  		args := make([]opt.ScalarExpr, 0, 2)
   371  		for range agg.args {
   372  			colID := argCols[0].id
   373  			args = append(args, b.factory.ConstructVariable(colID))
   374  
   375  			// Skip past argCols that have been handled. There may be variable
   376  			// number of them, so need to set up for next aggregate function.
   377  			argCols = argCols[1:]
   378  		}
   379  
   380  		// Construct the aggregate function from its name and arguments and store
   381  		// it in the corresponding scope column.
   382  		aggCols[i].scalar = b.constructAggregate(agg.def.Name, args)
   383  
   384  		// Wrap the aggregate function with an AggDistinct operator if DISTINCT
   385  		// was specified in the query.
   386  		if agg.distinct {
   387  			aggCols[i].scalar = b.factory.ConstructAggDistinct(aggCols[i].scalar)
   388  		}
   389  
   390  		// Wrap the aggregate function or the AggDistinct in an AggFilter operator
   391  		// if FILTER (WHERE ...) was specified in the query.
   392  		// TODO(justin): add a norm rule to push these filters below GroupBy where
   393  		// possible.
   394  		if agg.filter != nil {
   395  			// Column containing filter expression is always after the argument
   396  			// columns (which have already been processed).
   397  			colID := argCols[0].id
   398  			argCols = argCols[1:]
   399  			variable := b.factory.ConstructVariable(colID)
   400  			aggCols[i].scalar = b.factory.ConstructAggFilter(aggCols[i].scalar, variable)
   401  		}
   402  
   403  		if agg.isOrderingSensitive() {
   404  			haveOrderingSensitiveAgg = true
   405  		}
   406  
   407  		if b.subquery != nil {
   408  			// Update the subquery with any outer columns from the aggregate
   409  			// arguments. The outer columns were not added in finishBuildScalarRef
   410  			// because at the time the arguments were built, we did not know which
   411  			// was the appropriate scope for the aggregation. (buildAggregateFunction
   412  			// ensures that finishBuildScalarRef doesn't add the outer columns by
   413  			// temporarily setting b.subquery to nil. See buildAggregateFunction
   414  			// for more details.)
   415  			b.subquery.outerCols.UnionWith(agg.colRefs.Difference(fromCols))
   416  		}
   417  	}
   418  
   419  	if haveOrderingSensitiveAgg {
   420  		g.aggInScope.copyOrdering(fromScope)
   421  	}
   422  
   423  	// Construct the pre-projection, which renders the grouping columns and the
   424  	// aggregate arguments, as well as any additional order by columns.
   425  	b.constructProjectForScope(fromScope, g.aggInScope)
   426  
   427  	g.aggOutScope.expr = b.constructGroupBy(
   428  		g.aggInScope.expr.(memo.RelExpr),
   429  		groupingColSet,
   430  		aggCols,
   431  		g.aggInScope.ordering,
   432  	)
   433  
   434  	// Wrap with having filter if it exists.
   435  	if having != nil {
   436  		input := g.aggOutScope.expr.(memo.RelExpr)
   437  		filters := memo.FiltersExpr{b.factory.ConstructFiltersItem(having)}
   438  		g.aggOutScope.expr = b.factory.ConstructSelect(input, filters)
   439  	}
   440  
   441  	return g.aggOutScope
   442  }
   443  
   444  // analyzeHaving analyzes the having clause and returns it as a typed
   445  // expression. fromScope contains the name bindings that are visible for this
   446  // HAVING clause (e.g., passed in from an enclosing statement).
   447  func (b *Builder) analyzeHaving(having *tree.Where, fromScope *scope) tree.TypedExpr {
   448  	if having == nil {
   449  		return nil
   450  	}
   451  
   452  	// We need to save and restore the previous value of the field in semaCtx
   453  	// in case we are recursively called within a subquery context.
   454  	defer b.semaCtx.Properties.Restore(b.semaCtx.Properties)
   455  	b.semaCtx.Properties.Require(
   456  		exprKindHaving.String(), tree.RejectWindowApplications|tree.RejectGenerators,
   457  	)
   458  	fromScope.context = exprKindHaving
   459  	return fromScope.resolveAndRequireType(having.Expr, types.Bool)
   460  }
   461  
   462  // buildHaving builds a set of memo groups that represent the given HAVING
   463  // clause. fromScope contains the name bindings that are visible for this HAVING
   464  // clause (e.g., passed in from an enclosing statement).
   465  //
   466  // The return value corresponds to the top-level memo group ID for this
   467  // HAVING clause.
   468  func (b *Builder) buildHaving(having tree.TypedExpr, fromScope *scope) opt.ScalarExpr {
   469  	if having == nil {
   470  		return nil
   471  	}
   472  
   473  	return b.buildScalar(having, fromScope, nil, nil, nil)
   474  }
   475  
   476  // buildGroupingList builds a set of memo groups that represent a list of
   477  // GROUP BY expressions, adding the group-by expressions as columns to
   478  // aggInScope and populating groupStrs.
   479  //
   480  // groupBy   The given GROUP BY expressions.
   481  // selects   The select expressions are needed in case one of the GROUP BY
   482  //           expressions is an index into to the select list. For example,
   483  //               SELECT count(*), k FROM t GROUP BY 2
   484  //           indicates that the grouping is on the second select expression, k.
   485  // fromScope The scope for the input to the aggregation (the FROM clause).
   486  func (b *Builder) buildGroupingList(
   487  	groupBy tree.GroupBy, selects tree.SelectExprs, projectionsScope *scope, fromScope *scope,
   488  ) {
   489  	g := fromScope.groupby
   490  	g.groupStrs = make(groupByStrSet, len(groupBy))
   491  	if g.aggInScope.cols == nil {
   492  		g.aggInScope.cols = make([]scopeColumn, 0, len(groupBy))
   493  	}
   494  
   495  	// The buildingGroupingCols flag is used to ensure that a grouping error is
   496  	// not called prematurely. For example:
   497  	//   SELECT count(*), a FROM ab GROUP BY a
   498  	// is legal, but
   499  	//   SELECT count(*), b FROM ab GROUP BY a
   500  	// will throw the error, `column "b" must appear in the GROUP BY clause or be
   501  	// used in an aggregate function`. The builder cannot know whether there is
   502  	// a grouping error until the grouping columns are fully built.
   503  	g.buildingGroupingCols = true
   504  	for _, e := range groupBy {
   505  		b.buildGrouping(e, selects, projectionsScope, fromScope, g.aggInScope)
   506  	}
   507  	g.buildingGroupingCols = false
   508  }
   509  
   510  // buildGrouping builds a set of memo groups that represent a GROUP BY
   511  // expression. The expression (or expressions, if we have a star) is added to
   512  // groupStrs and to the aggInScope.
   513  //
   514  //
   515  // groupBy          The given GROUP BY expression.
   516  // selects          The select expressions are needed in case the GROUP BY
   517  //                  expression is an index into to the select list.
   518  // projectionsScope The scope that contains the columns for the SELECT targets
   519  //                  (used when GROUP BY refers to a target by alias).
   520  // fromScope        The scope for the input to the aggregation (the FROM
   521  //                  clause).
   522  // aggInScope       The scope that will contain the grouping expressions as well
   523  //                  as the aggregate function arguments.
   524  func (b *Builder) buildGrouping(
   525  	groupBy tree.Expr, selects tree.SelectExprs, projectionsScope, fromScope, aggInScope *scope,
   526  ) {
   527  	// Unwrap parenthesized expressions like "((a))" to "a".
   528  	groupBy = tree.StripParens(groupBy)
   529  	alias := ""
   530  
   531  	// Comment below pasted from PostgreSQL (findTargetListEntrySQL92 in
   532  	// src/backend/parser/parse_clause.c).
   533  	//
   534  	// Handle two special cases as mandated by the SQL92 spec:
   535  	//
   536  	// 1. Bare ColumnName (no qualifier or subscripts)
   537  	//    For a bare identifier, we search for a matching column name
   538  	//    in the existing target list.  Multiple matches are an error
   539  	//    unless they refer to identical values; for example,
   540  	//    we allow  SELECT a, a FROM table ORDER BY a
   541  	//    but not   SELECT a AS b, b FROM table ORDER BY b
   542  	//    If no match is found, we fall through and treat the identifier
   543  	//    as an expression.
   544  	//    For GROUP BY, it is incorrect to match the grouping item against
   545  	//    targetlist entries: according to SQL92, an identifier in GROUP BY
   546  	//    is a reference to a column name exposed by FROM, not to a target
   547  	//    list column.  However, many implementations (including pre-7.0
   548  	//    PostgreSQL) accept this anyway.  So for GROUP BY, we look first
   549  	//    to see if the identifier matches any FROM column name, and only
   550  	//    try for a targetlist name if it doesn't.  This ensures that we
   551  	//    adhere to the spec in the case where the name could be both.
   552  	//    DISTINCT ON isn't in the standard, so we can do what we like there;
   553  	//    we choose to make it work like ORDER BY, on the rather flimsy
   554  	//    grounds that ordinary DISTINCT works on targetlist entries.
   555  	//
   556  	// 2. IntegerConstant
   557  	//    This means to use the n'th item in the existing target list.
   558  	//    Note that it would make no sense to order/group/distinct by an
   559  	//    actual constant, so this does not create a conflict with SQL99.
   560  	//    GROUP BY column-number is not allowed by SQL92, but since
   561  	//    the standard has no other behavior defined for this syntax,
   562  	//    we may as well accept this common extension.
   563  
   564  	// This function sets groupBy and alias in these special cases.
   565  	func() {
   566  		// Check whether the GROUP BY clause refers to a column in the SELECT list
   567  		// by index, e.g. `SELECT a, SUM(b) FROM y GROUP BY 1` (case 2 above).
   568  		if col := colIndex(len(selects), groupBy, "GROUP BY"); col != -1 {
   569  			groupBy, alias = selects[col].Expr, string(selects[col].As)
   570  			return
   571  		}
   572  
   573  		if name, ok := groupBy.(*tree.UnresolvedName); ok {
   574  			if name.NumParts != 1 || name.Star {
   575  				return
   576  			}
   577  			// Case 1 above.
   578  			targetName := name.Parts[0]
   579  
   580  			// We must prefer a match against a FROM-clause column (but ignore upper
   581  			// scopes); in this case we let the general case below handle the reference.
   582  			for i := range fromScope.cols {
   583  				if string(fromScope.cols[i].name) == targetName {
   584  					return
   585  				}
   586  			}
   587  			// See if it matches exactly one of the target lists.
   588  			var match *scopeColumn
   589  			for i := range projectionsScope.cols {
   590  				if col := &projectionsScope.cols[i]; string(col.name) == targetName {
   591  					if match != nil {
   592  						// Multiple matches are only allowed if they refer to identical
   593  						// expressions.
   594  						if match.getExprStr() != col.getExprStr() {
   595  							panic(pgerror.Newf(pgcode.AmbiguousColumn, "GROUP BY %q is ambiguous", targetName))
   596  						}
   597  					}
   598  					match = col
   599  				}
   600  			}
   601  			if match != nil {
   602  				groupBy, alias = match.expr, targetName
   603  			}
   604  		}
   605  	}()
   606  
   607  	// We need to save and restore the previous value of the field in semaCtx
   608  	// in case we are recursively called within a subquery context.
   609  	defer b.semaCtx.Properties.Restore(b.semaCtx.Properties)
   610  
   611  	// Make sure the GROUP BY columns have no special functions.
   612  	b.semaCtx.Properties.Require(exprKindGroupBy.String(), tree.RejectSpecial)
   613  	fromScope.context = exprKindGroupBy
   614  
   615  	// Resolve types, expand stars, and flatten tuples.
   616  	exprs := b.expandStarAndResolveType(groupBy, fromScope)
   617  	exprs = flattenTuples(exprs)
   618  
   619  	// Finally, build each of the GROUP BY columns.
   620  	for _, e := range exprs {
   621  		// If a grouping column has already been added, don't add it again.
   622  		// GROUP BY a, a is semantically equivalent to GROUP BY a.
   623  		exprStr := symbolicExprStr(e)
   624  		if _, ok := fromScope.groupby.groupStrs[exprStr]; ok {
   625  			continue
   626  		}
   627  
   628  		// Save a representation of the GROUP BY expression for validation of the
   629  		// SELECT and HAVING expressions. This enables queries such as:
   630  		//   SELECT x+y FROM t GROUP BY x+y
   631  		col := b.addColumn(aggInScope, alias, e)
   632  		b.buildScalar(e, fromScope, aggInScope, col, nil)
   633  		fromScope.groupby.groupStrs[exprStr] = col
   634  	}
   635  }
   636  
   637  // buildAggArg builds a scalar expression which is used as an input in some form
   638  // to an aggregate expression. The scopeColumn for the built expression will
   639  // be added to tempScope.
   640  func (b *Builder) buildAggArg(
   641  	e tree.TypedExpr, info *aggregateInfo, tempScope, fromScope *scope,
   642  ) opt.ScalarExpr {
   643  	// This synthesizes a new tempScope column, unless the argument is a
   644  	// simple VariableOp.
   645  	col := b.addColumn(tempScope, "" /* alias */, e)
   646  	b.buildScalar(e, fromScope, tempScope, col, &info.colRefs)
   647  	if col.scalar != nil {
   648  		return col.scalar
   649  	}
   650  	return b.factory.ConstructVariable(col.id)
   651  }
   652  
   653  // translateAggName translates the aggregate name if needed. This is used
   654  // to override the output column name of an aggregation.
   655  // See isOrderedSetAggregate.
   656  func translateAggName(name string) string {
   657  	switch name {
   658  	case "percentile_disc_impl":
   659  		return "percentile_disc"
   660  	case "percentile_cont_impl":
   661  		return "percentile_cont"
   662  	}
   663  	return name
   664  }
   665  
   666  // buildAggregateFunction is called when we are building a function which is an
   667  // aggregate. Any non-trivial parameters (i.e. not column reference) to the
   668  // aggregate function are extracted and added to aggInScope. The aggregate
   669  // function expression itself is added to aggOutScope. For example:
   670  //
   671  //   SELECT SUM(x+1) FROM xy
   672  //   =>
   673  //   aggInScope : x+1 AS column1
   674  //   aggOutScope: SUM(column1)
   675  //
   676  // buildAggregateFunction returns a pointer to the aggregateInfo containing
   677  // the function definition, fully built arguments, and the aggregate output
   678  // column.
   679  func (b *Builder) buildAggregateFunction(
   680  	f *tree.FuncExpr, def *memo.FunctionPrivate, fromScope *scope,
   681  ) *aggregateInfo {
   682  	tempScope := fromScope.startAggFunc()
   683  	tempScopeColsBefore := len(tempScope.cols)
   684  
   685  	info := aggregateInfo{
   686  		FuncExpr: f,
   687  		def:      *def,
   688  		distinct: (f.Type == tree.DistinctFuncType),
   689  		args:     make(memo.ScalarListExpr, len(f.Exprs)),
   690  	}
   691  
   692  	// Temporarily set b.subquery to nil so we don't add outer columns to the
   693  	// wrong scope.
   694  	subq := b.subquery
   695  	b.subquery = nil
   696  	defer func() { b.subquery = subq }()
   697  
   698  	for i, pexpr := range f.Exprs {
   699  		info.args[i] = b.buildAggArg(pexpr.(tree.TypedExpr), &info, tempScope, fromScope)
   700  	}
   701  
   702  	// If we have a filter, add it to tempScope after all the arguments. We'll
   703  	// later extract the column that gets added here in buildAggregation.
   704  	if f.Filter != nil {
   705  		info.filter = b.buildAggArg(f.Filter.(tree.TypedExpr), &info, tempScope, fromScope)
   706  	}
   707  
   708  	// If we have ORDER BY, add the ordering columns to the tempScope.
   709  	if f.OrderBy != nil {
   710  		for _, o := range f.OrderBy {
   711  			b.buildAggArg(o.Expr.(tree.TypedExpr), &info, tempScope, fromScope)
   712  		}
   713  	}
   714  
   715  	// Find the appropriate aggregation scopes for this aggregate now that we
   716  	// know which columns it references. If necessary, we'll move the columns
   717  	// for the arguments from tempScope to aggInScope below.
   718  	g := fromScope.endAggFunc(info.colRefs)
   719  
   720  	// If we already have the same aggregation, reuse it. Otherwise add it
   721  	// to the list of aggregates that need to be computed by the groupby
   722  	// expression and synthesize a column for the aggregation result.
   723  	info.col = g.findAggregate(info)
   724  	if info.col == nil {
   725  		// Translate function name if needed.
   726  		funcName := translateAggName(def.Name)
   727  
   728  		// Use 0 as the group for now; it will be filled in later by the
   729  		// buildAggregation method.
   730  		info.col = b.synthesizeColumn(g.aggOutScope, funcName, f.ResolvedType(), f, nil /* scalar */)
   731  
   732  		// Move the columns for the aggregate input expressions to the correct scope.
   733  		if g.aggInScope != tempScope {
   734  			g.aggInScope.cols = append(g.aggInScope.cols, tempScope.cols[tempScopeColsBefore:]...)
   735  			tempScope.cols = tempScope.cols[:tempScopeColsBefore]
   736  		}
   737  
   738  		// Add the aggregate to the list of aggregates that need to be computed by
   739  		// the groupby expression.
   740  		g.aggs = append(g.aggs, info)
   741  	} else {
   742  		// Undo the adding of the args.
   743  		// TODO(radu): is there a cleaner way to do this?
   744  		tempScope.cols = tempScope.cols[:tempScopeColsBefore]
   745  	}
   746  
   747  	return &info
   748  }
   749  
   750  func (b *Builder) constructWindowFn(name string, args []opt.ScalarExpr) opt.ScalarExpr {
   751  	switch name {
   752  	case "rank":
   753  		return b.factory.ConstructRank()
   754  	case "row_number":
   755  		return b.factory.ConstructRowNumber()
   756  	case "dense_rank":
   757  		return b.factory.ConstructDenseRank()
   758  	case "percent_rank":
   759  		return b.factory.ConstructPercentRank()
   760  	case "cume_dist":
   761  		return b.factory.ConstructCumeDist()
   762  	case "ntile":
   763  		return b.factory.ConstructNtile(args[0])
   764  	case "lag":
   765  		return b.factory.ConstructLag(args[0], args[1], args[2])
   766  	case "lead":
   767  		return b.factory.ConstructLead(args[0], args[1], args[2])
   768  	case "first_value":
   769  		return b.factory.ConstructFirstValue(args[0])
   770  	case "last_value":
   771  		return b.factory.ConstructLastValue(args[0])
   772  	case "nth_value":
   773  		return b.factory.ConstructNthValue(args[0], args[1])
   774  	default:
   775  		return b.constructAggregate(name, args)
   776  	}
   777  }
   778  
   779  func (b *Builder) constructAggregate(name string, args []opt.ScalarExpr) opt.ScalarExpr {
   780  	switch name {
   781  	case "array_agg":
   782  		return b.factory.ConstructArrayAgg(args[0])
   783  	case "avg":
   784  		return b.factory.ConstructAvg(args[0])
   785  	case "bit_and":
   786  		return b.factory.ConstructBitAndAgg(args[0])
   787  	case "bit_or":
   788  		return b.factory.ConstructBitOrAgg(args[0])
   789  	case "bool_and", "every":
   790  		return b.factory.ConstructBoolAnd(args[0])
   791  	case "bool_or":
   792  		return b.factory.ConstructBoolOr(args[0])
   793  	case "concat_agg":
   794  		return b.factory.ConstructConcatAgg(args[0])
   795  	case "corr":
   796  		return b.factory.ConstructCorr(args[0], args[1])
   797  	case "count":
   798  		return b.factory.ConstructCount(args[0])
   799  	case "count_rows":
   800  		return b.factory.ConstructCountRows()
   801  	case "max":
   802  		return b.factory.ConstructMax(args[0])
   803  	case "min":
   804  		return b.factory.ConstructMin(args[0])
   805  	case "sum_int":
   806  		return b.factory.ConstructSumInt(args[0])
   807  	case "sum":
   808  		return b.factory.ConstructSum(args[0])
   809  	case "sqrdiff":
   810  		return b.factory.ConstructSqrDiff(args[0])
   811  	case "variance", "var_samp":
   812  		return b.factory.ConstructVariance(args[0])
   813  	case "stddev", "stddev_samp":
   814  		return b.factory.ConstructStdDev(args[0])
   815  	case "xor_agg":
   816  		return b.factory.ConstructXorAgg(args[0])
   817  	case "json_agg":
   818  		return b.factory.ConstructJsonAgg(args[0])
   819  	case "jsonb_agg":
   820  		return b.factory.ConstructJsonbAgg(args[0])
   821  	case "string_agg":
   822  		return b.factory.ConstructStringAgg(args[0], args[1])
   823  	case "percentile_disc_impl":
   824  		return b.factory.ConstructPercentileDisc(args[0], args[1])
   825  	case "percentile_cont_impl":
   826  		return b.factory.ConstructPercentileCont(args[0], args[1])
   827  	}
   828  	panic(errors.AssertionFailedf("unhandled aggregate: %s", name))
   829  }
   830  
   831  func isAggregate(def *tree.FunctionDefinition) bool {
   832  	return def.Class == tree.AggregateClass
   833  }
   834  
   835  func isWindow(def *tree.FunctionDefinition) bool {
   836  	return def.Class == tree.WindowClass
   837  }
   838  
   839  func isGenerator(def *tree.FunctionDefinition) bool {
   840  	return def.Class == tree.GeneratorClass
   841  }
   842  
   843  func isSQLFn(def *tree.FunctionDefinition) bool {
   844  	return def.Class == tree.SQLClass
   845  }
   846  
   847  func newGroupingError(name *tree.Name) error {
   848  	return pgerror.Newf(pgcode.Grouping,
   849  		"column \"%s\" must appear in the GROUP BY clause or be used in an aggregate function",
   850  		tree.ErrString(name),
   851  	)
   852  }
   853  
   854  // allowImplicitGroupingColumn returns true if col is part of a table and the
   855  // the groupby metadata indicates that we are grouping on the entire PK of that
   856  // table. In that case, we can allow col as an "implicit" grouping column, even
   857  // if it is not specified in the query.
   858  func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bool {
   859  	md := b.factory.Metadata()
   860  	colMeta := md.ColumnMeta(colID)
   861  	if colMeta.Table == 0 {
   862  		return false
   863  	}
   864  	// Get all the PK columns.
   865  	tab := md.Table(colMeta.Table)
   866  	var pkCols opt.ColSet
   867  	if tab.IndexCount() == 0 {
   868  		// Virtual tables have no indexes.
   869  		return false
   870  	}
   871  	primaryIndex := tab.Index(cat.PrimaryIndex)
   872  	for i := 0; i < primaryIndex.KeyColumnCount(); i++ {
   873  		pkCols.Add(colMeta.Table.ColumnID(primaryIndex.Column(i).Ordinal))
   874  	}
   875  	// Remove PK columns that are grouping cols and see if there's anything left.
   876  	groupingCols := g.groupingCols()
   877  	for i := range groupingCols {
   878  		pkCols.Remove(groupingCols[i].id)
   879  	}
   880  	return pkCols.Empty()
   881  }