github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/groupby.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package optbuilder 12 13 // This file has builder code specific to aggregations (queries with GROUP BY, 14 // HAVING, or aggregate functions). 15 // 16 // We build such queries using three operators: 17 // 18 // - a pre-projection: a ProjectOp which generates the columns needed for 19 // the aggregation: 20 // - group by expressions 21 // - arguments to aggregation functions 22 // 23 // - the aggregation: a GroupByOp which has the pre-projection as the 24 // input and produces columns with the results of the aggregation 25 // functions. The group by columns are also passed through. 26 // 27 // - a post-projection: calculates expressions using the results of the 28 // aggregations; this is analogous to the ProjectOp that we would use for a 29 // no-aggregation Select. 30 // 31 // For example: 32 // SELECT 1 + MIN(v*2) FROM kv GROUP BY k+3 33 // 34 // pre-projection: k+3 (as col1), v*2 (as col2) 35 // aggregation: group by col1, calculate MIN(col2) (as col3) 36 // post-projection: 1 + col3 37 38 import ( 39 "context" 40 41 "github.com/cockroachdb/cockroach/pkg/sql/opt" 42 "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" 43 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 44 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 45 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 46 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 47 "github.com/cockroachdb/cockroach/pkg/sql/types" 48 "github.com/cockroachdb/errors" 49 ) 50 51 // groupby information stored in scopes. 52 type groupby struct { 53 // We use two scopes: 54 // - aggInScope contains columns that are used as input by the 55 // GroupBy operator, specifically: 56 // - columns for the arguments to aggregate functions 57 // - grouping columns (from the GROUP BY) 58 // The grouping columns always come second. 59 // 60 // - aggOutScope contains columns that are produced by the GroupBy operator, 61 // specifically: 62 // - columns for the results of the aggregate functions. 63 // - the grouping columns 64 // 65 // For example: 66 // 67 // SELECT 1 + MIN(v*2) FROM kv GROUP BY k+3 68 // 69 // aggInScope: v*2 (as col2), k+3 (as col1) 70 // aggOutScope: MIN(col2) (as col3), k+3 (as col1) 71 // 72 // Any aggregate functions which contain column references to this scope 73 // trigger the creation of new grouping columns in the grouping scope. In 74 // addition, if an aggregate function contains no column references, then the 75 // aggregate will be added to the "nearest" grouping scope. For example: 76 // SELECT MAX(1) FROM t1 77 // 78 // TODO(radu): we aren't really using these as scopes, just as scopeColumn 79 // containers. Perhaps separate the necessary functionality in a separate 80 // structure and pass that instead of scopes. 81 aggInScope *scope 82 aggOutScope *scope 83 84 // aggs contains information about aggregate functions encountered. 85 aggs []aggregateInfo 86 87 // groupStrs contains a string representation of each GROUP BY expression 88 // using symbolic notation. These strings are used to determine if SELECT 89 // and HAVING expressions contain sub-expressions matching a GROUP BY 90 // expression. This enables queries such as: 91 // SELECT x+y FROM t GROUP BY x+y 92 // but not: 93 // SELECT x+y FROM t GROUP BY y+x 94 // 95 // Each string maps to the grouping column in an aggOutScope scope that 96 // projects that expression. 97 groupStrs groupByStrSet 98 99 // buildingGroupingCols is true while the grouping columns are being built. 100 // It is used to ensure that the builder does not throw a grouping error 101 // prematurely. 102 buildingGroupingCols bool 103 } 104 105 // groupByStrSet is a set of stringified GROUP BY expressions that map to the 106 // grouping column in an aggOutScope scope that projects that expression. It 107 // is used to enforce scoping rules, since any non-aggregate, variable 108 // expression in the SELECT list must be a GROUP BY expression or be composed 109 // of GROUP BY expressions. For example, this query is legal: 110 // 111 // SELECT COUNT(*), k + v FROM kv GROUP by k, v 112 // 113 // but this query is not: 114 // 115 // SELECT COUNT(*), k + v FROM kv GROUP BY k - v 116 // 117 type groupByStrSet map[string]*scopeColumn 118 119 // hasNonCommutativeAggregates checks whether any of the aggregates are 120 // non-commutative or ordering sensitive. 121 func (g *groupby) hasNonCommutativeAggregates() bool { 122 for i := range g.aggs { 123 if !g.aggs[i].isCommutative() { 124 return true 125 } 126 } 127 return false 128 } 129 130 // groupingCols returns the columns in the aggInScope corresponding to grouping 131 // columns. 132 func (g *groupby) groupingCols() []scopeColumn { 133 // Grouping cols are always clustered at the end of the column list. 134 return g.aggInScope.cols[len(g.aggInScope.cols)-len(g.groupStrs):] 135 } 136 137 // getAggregateArgCols returns the columns in the aggInScope corresponding to 138 // arguments to aggregate functions. If the aggregate has a filter, the column 139 // corresponding to the filter's input will immediately follow the arguments. 140 func (g *groupby) aggregateArgCols() []scopeColumn { 141 return g.aggInScope.cols[:len(g.aggInScope.cols)-len(g.groupStrs)] 142 } 143 144 // getAggregateResultCols returns the columns in the aggOutScope corresponding 145 // to aggregate functions. 146 func (g *groupby) aggregateResultCols() []scopeColumn { 147 // Aggregates are always clustered at the beginning of the column list, in 148 // the same order as s.groupby.aggs. 149 return g.aggOutScope.cols[:len(g.aggs)] 150 } 151 152 // hasAggregates returns true if the enclosing scope has aggregate functions. 153 func (g *groupby) hasAggregates() bool { 154 return len(g.aggs) > 0 155 } 156 157 // findAggregate finds the given aggregate among the bound variables 158 // in this scope. Returns nil if the aggregate is not found. 159 func (g *groupby) findAggregate(agg aggregateInfo) *scopeColumn { 160 if g.aggs == nil { 161 return nil 162 } 163 164 for i, a := range g.aggs { 165 // Find an existing aggregate that uses the same function overload. 166 if a.def.Overload == agg.def.Overload && a.distinct == agg.distinct && a.filter == agg.filter { 167 // Now check that the arguments are identical. 168 if len(a.args) == len(agg.args) { 169 match := true 170 for j, arg := range a.args { 171 if arg != agg.args[j] { 172 match = false 173 break 174 } 175 } 176 177 // If agg is ordering sensitive, check if the orders match as well. 178 if match && !agg.isCommutative() { 179 if len(a.OrderBy) != len(agg.OrderBy) { 180 match = false 181 } else { 182 for j := range a.OrderBy { 183 if !a.OrderBy[j].Equal(agg.OrderBy[j]) { 184 match = false 185 break 186 } 187 } 188 } 189 } 190 191 if match { 192 // Aggregate already exists, so return information about the 193 // existing column that computes it. 194 return &g.aggregateResultCols()[i] 195 } 196 } 197 } 198 } 199 200 return nil 201 } 202 203 // aggregateInfo stores information about an aggregation function call. 204 type aggregateInfo struct { 205 *tree.FuncExpr 206 207 def memo.FunctionPrivate 208 distinct bool 209 args memo.ScalarListExpr 210 filter opt.ScalarExpr 211 212 // col is the output column of the aggregation. 213 col *scopeColumn 214 215 // colRefs contains the set of columns referenced by the arguments of the 216 // aggregation. It is used to determine the appropriate scope for this 217 // aggregation. 218 colRefs opt.ColSet 219 } 220 221 // Walk is part of the tree.Expr interface. 222 func (a *aggregateInfo) Walk(v tree.Visitor) tree.Expr { 223 return a 224 } 225 226 // TypeCheck is part of the tree.Expr interface. 227 func (a *aggregateInfo) TypeCheck( 228 ctx context.Context, semaCtx *tree.SemaContext, desired *types.T, 229 ) (tree.TypedExpr, error) { 230 if _, err := a.FuncExpr.TypeCheck(ctx, semaCtx, desired); err != nil { 231 return nil, err 232 } 233 return a, nil 234 } 235 236 // isOrderedSetAggregate returns true if the given aggregate operator is an 237 // ordered-set aggregate. 238 func (a aggregateInfo) isOrderedSetAggregate() bool { 239 switch a.def.Name { 240 case "percentile_disc_impl", "percentile_cont_impl": 241 return true 242 default: 243 return false 244 } 245 } 246 247 // isOrderingSensitive returns true if the given aggregate operator is 248 // ordering sensitive. That is, it can give different results based on the order 249 // values are fed to it. 250 func (a aggregateInfo) isOrderingSensitive() bool { 251 if a.isOrderedSetAggregate() { 252 return true 253 } 254 switch a.def.Name { 255 case "array_agg", "concat_agg", "string_agg", "json_agg", "jsonb_agg": 256 return true 257 default: 258 return false 259 } 260 } 261 262 // isCommutative checks whether the aggregate is commutative. That is, if it is 263 // ordering insensitive or if no ordering is specified. 264 func (a aggregateInfo) isCommutative() bool { 265 return a.OrderBy == nil || !a.isOrderingSensitive() 266 } 267 268 // Eval is part of the tree.TypedExpr interface. 269 func (a *aggregateInfo) Eval(_ *tree.EvalContext) (tree.Datum, error) { 270 panic(errors.AssertionFailedf("aggregateInfo must be replaced before evaluation")) 271 } 272 273 var _ tree.Expr = &aggregateInfo{} 274 var _ tree.TypedExpr = &aggregateInfo{} 275 276 func (b *Builder) needsAggregation(sel *tree.SelectClause, scope *scope) bool { 277 // We have an aggregation if: 278 // - we have a GROUP BY, or 279 // - we have a HAVING clause, or 280 // - we have aggregate functions in the SELECT, DISTINCT ON and/or ORDER BY expressions. 281 return len(sel.GroupBy) > 0 || 282 sel.Having != nil || 283 (scope.groupby != nil && scope.groupby.hasAggregates()) 284 } 285 286 func (b *Builder) constructGroupBy( 287 input memo.RelExpr, groupingColSet opt.ColSet, aggCols []scopeColumn, ordering opt.Ordering, 288 ) memo.RelExpr { 289 aggs := make(memo.AggregationsExpr, 0, len(aggCols)) 290 291 // Deduplicate the columns; we don't need to produce the same aggregation 292 // multiple times. 293 colSet := opt.ColSet{} 294 for i := range aggCols { 295 if id, scalar := aggCols[i].id, aggCols[i].scalar; !colSet.Contains(id) { 296 if scalar == nil { 297 // A "pass through" column (i.e. a VariableOp) is not legal as an 298 // aggregation. 299 panic(errors.AssertionFailedf("variable as aggregation")) 300 } 301 aggs = append(aggs, b.factory.ConstructAggregationsItem(scalar, id)) 302 colSet.Add(id) 303 } 304 } 305 306 private := memo.GroupingPrivate{GroupingCols: groupingColSet} 307 308 // The ordering of the GROUP BY is inherited from the input. This ordering is 309 // only useful for intra-group ordering (for order-sensitive aggregations like 310 // ARRAY_AGG). So we add the grouping columns as optional columns. 311 private.Ordering.FromOrderingWithOptCols(ordering, groupingColSet) 312 313 if groupingColSet.Empty() { 314 return b.factory.ConstructScalarGroupBy(input, aggs, &private) 315 } 316 return b.factory.ConstructGroupBy(input, aggs, &private) 317 } 318 319 // buildGroupingColumns builds the grouping columns and adds them to the 320 // groupby scopes that will be used to build the aggregation expression. 321 // Returns the slice of grouping columns. 322 func (b *Builder) buildGroupingColumns(sel *tree.SelectClause, projectionsScope, fromScope *scope) { 323 if fromScope.groupby == nil { 324 fromScope.initGrouping() 325 } 326 g := fromScope.groupby 327 328 // The "from" columns are visible to any grouping expressions. 329 b.buildGroupingList(sel.GroupBy, sel.Exprs, projectionsScope, fromScope) 330 331 // Copy the grouping columns to the aggOutScope. 332 g.aggOutScope.appendColumns(g.groupingCols()) 333 } 334 335 // buildAggregation builds the aggregation operators and constructs the 336 // GroupBy expression. Returns the output scope for the aggregation operation. 337 func (b *Builder) buildAggregation(having opt.ScalarExpr, fromScope *scope) (outScope *scope) { 338 g := fromScope.groupby 339 340 groupingCols := g.groupingCols() 341 342 // Build ColSet of grouping columns. 343 var groupingColSet opt.ColSet 344 for i := range groupingCols { 345 groupingColSet.Add(groupingCols[i].id) 346 } 347 348 // If there are any aggregates that are ordering sensitive, build the 349 // aggregations as window functions over each group. 350 if g.hasNonCommutativeAggregates() { 351 return b.buildAggregationAsWindow(groupingColSet, having, fromScope) 352 } 353 354 aggInfos := g.aggs 355 356 // Construct the aggregation operators. 357 haveOrderingSensitiveAgg := false 358 aggCols := g.aggregateResultCols() 359 argCols := g.aggregateArgCols() 360 var fromCols opt.ColSet 361 if b.subquery != nil { 362 // Only calculate the set of fromScope columns if it will be used below. 363 fromCols = fromScope.colSet() 364 } 365 for i, agg := range aggInfos { 366 // First accumulate the arguments to the aggregate function. These are 367 // always variables referencing columns in the GroupBy input expression, 368 // except in the case of string_agg, where the second argument must be 369 // a constant expression. 370 args := make([]opt.ScalarExpr, 0, 2) 371 for range agg.args { 372 colID := argCols[0].id 373 args = append(args, b.factory.ConstructVariable(colID)) 374 375 // Skip past argCols that have been handled. There may be variable 376 // number of them, so need to set up for next aggregate function. 377 argCols = argCols[1:] 378 } 379 380 // Construct the aggregate function from its name and arguments and store 381 // it in the corresponding scope column. 382 aggCols[i].scalar = b.constructAggregate(agg.def.Name, args) 383 384 // Wrap the aggregate function with an AggDistinct operator if DISTINCT 385 // was specified in the query. 386 if agg.distinct { 387 aggCols[i].scalar = b.factory.ConstructAggDistinct(aggCols[i].scalar) 388 } 389 390 // Wrap the aggregate function or the AggDistinct in an AggFilter operator 391 // if FILTER (WHERE ...) was specified in the query. 392 // TODO(justin): add a norm rule to push these filters below GroupBy where 393 // possible. 394 if agg.filter != nil { 395 // Column containing filter expression is always after the argument 396 // columns (which have already been processed). 397 colID := argCols[0].id 398 argCols = argCols[1:] 399 variable := b.factory.ConstructVariable(colID) 400 aggCols[i].scalar = b.factory.ConstructAggFilter(aggCols[i].scalar, variable) 401 } 402 403 if agg.isOrderingSensitive() { 404 haveOrderingSensitiveAgg = true 405 } 406 407 if b.subquery != nil { 408 // Update the subquery with any outer columns from the aggregate 409 // arguments. The outer columns were not added in finishBuildScalarRef 410 // because at the time the arguments were built, we did not know which 411 // was the appropriate scope for the aggregation. (buildAggregateFunction 412 // ensures that finishBuildScalarRef doesn't add the outer columns by 413 // temporarily setting b.subquery to nil. See buildAggregateFunction 414 // for more details.) 415 b.subquery.outerCols.UnionWith(agg.colRefs.Difference(fromCols)) 416 } 417 } 418 419 if haveOrderingSensitiveAgg { 420 g.aggInScope.copyOrdering(fromScope) 421 } 422 423 // Construct the pre-projection, which renders the grouping columns and the 424 // aggregate arguments, as well as any additional order by columns. 425 b.constructProjectForScope(fromScope, g.aggInScope) 426 427 g.aggOutScope.expr = b.constructGroupBy( 428 g.aggInScope.expr.(memo.RelExpr), 429 groupingColSet, 430 aggCols, 431 g.aggInScope.ordering, 432 ) 433 434 // Wrap with having filter if it exists. 435 if having != nil { 436 input := g.aggOutScope.expr.(memo.RelExpr) 437 filters := memo.FiltersExpr{b.factory.ConstructFiltersItem(having)} 438 g.aggOutScope.expr = b.factory.ConstructSelect(input, filters) 439 } 440 441 return g.aggOutScope 442 } 443 444 // analyzeHaving analyzes the having clause and returns it as a typed 445 // expression. fromScope contains the name bindings that are visible for this 446 // HAVING clause (e.g., passed in from an enclosing statement). 447 func (b *Builder) analyzeHaving(having *tree.Where, fromScope *scope) tree.TypedExpr { 448 if having == nil { 449 return nil 450 } 451 452 // We need to save and restore the previous value of the field in semaCtx 453 // in case we are recursively called within a subquery context. 454 defer b.semaCtx.Properties.Restore(b.semaCtx.Properties) 455 b.semaCtx.Properties.Require( 456 exprKindHaving.String(), tree.RejectWindowApplications|tree.RejectGenerators, 457 ) 458 fromScope.context = exprKindHaving 459 return fromScope.resolveAndRequireType(having.Expr, types.Bool) 460 } 461 462 // buildHaving builds a set of memo groups that represent the given HAVING 463 // clause. fromScope contains the name bindings that are visible for this HAVING 464 // clause (e.g., passed in from an enclosing statement). 465 // 466 // The return value corresponds to the top-level memo group ID for this 467 // HAVING clause. 468 func (b *Builder) buildHaving(having tree.TypedExpr, fromScope *scope) opt.ScalarExpr { 469 if having == nil { 470 return nil 471 } 472 473 return b.buildScalar(having, fromScope, nil, nil, nil) 474 } 475 476 // buildGroupingList builds a set of memo groups that represent a list of 477 // GROUP BY expressions, adding the group-by expressions as columns to 478 // aggInScope and populating groupStrs. 479 // 480 // groupBy The given GROUP BY expressions. 481 // selects The select expressions are needed in case one of the GROUP BY 482 // expressions is an index into to the select list. For example, 483 // SELECT count(*), k FROM t GROUP BY 2 484 // indicates that the grouping is on the second select expression, k. 485 // fromScope The scope for the input to the aggregation (the FROM clause). 486 func (b *Builder) buildGroupingList( 487 groupBy tree.GroupBy, selects tree.SelectExprs, projectionsScope *scope, fromScope *scope, 488 ) { 489 g := fromScope.groupby 490 g.groupStrs = make(groupByStrSet, len(groupBy)) 491 if g.aggInScope.cols == nil { 492 g.aggInScope.cols = make([]scopeColumn, 0, len(groupBy)) 493 } 494 495 // The buildingGroupingCols flag is used to ensure that a grouping error is 496 // not called prematurely. For example: 497 // SELECT count(*), a FROM ab GROUP BY a 498 // is legal, but 499 // SELECT count(*), b FROM ab GROUP BY a 500 // will throw the error, `column "b" must appear in the GROUP BY clause or be 501 // used in an aggregate function`. The builder cannot know whether there is 502 // a grouping error until the grouping columns are fully built. 503 g.buildingGroupingCols = true 504 for _, e := range groupBy { 505 b.buildGrouping(e, selects, projectionsScope, fromScope, g.aggInScope) 506 } 507 g.buildingGroupingCols = false 508 } 509 510 // buildGrouping builds a set of memo groups that represent a GROUP BY 511 // expression. The expression (or expressions, if we have a star) is added to 512 // groupStrs and to the aggInScope. 513 // 514 // 515 // groupBy The given GROUP BY expression. 516 // selects The select expressions are needed in case the GROUP BY 517 // expression is an index into to the select list. 518 // projectionsScope The scope that contains the columns for the SELECT targets 519 // (used when GROUP BY refers to a target by alias). 520 // fromScope The scope for the input to the aggregation (the FROM 521 // clause). 522 // aggInScope The scope that will contain the grouping expressions as well 523 // as the aggregate function arguments. 524 func (b *Builder) buildGrouping( 525 groupBy tree.Expr, selects tree.SelectExprs, projectionsScope, fromScope, aggInScope *scope, 526 ) { 527 // Unwrap parenthesized expressions like "((a))" to "a". 528 groupBy = tree.StripParens(groupBy) 529 alias := "" 530 531 // Comment below pasted from PostgreSQL (findTargetListEntrySQL92 in 532 // src/backend/parser/parse_clause.c). 533 // 534 // Handle two special cases as mandated by the SQL92 spec: 535 // 536 // 1. Bare ColumnName (no qualifier or subscripts) 537 // For a bare identifier, we search for a matching column name 538 // in the existing target list. Multiple matches are an error 539 // unless they refer to identical values; for example, 540 // we allow SELECT a, a FROM table ORDER BY a 541 // but not SELECT a AS b, b FROM table ORDER BY b 542 // If no match is found, we fall through and treat the identifier 543 // as an expression. 544 // For GROUP BY, it is incorrect to match the grouping item against 545 // targetlist entries: according to SQL92, an identifier in GROUP BY 546 // is a reference to a column name exposed by FROM, not to a target 547 // list column. However, many implementations (including pre-7.0 548 // PostgreSQL) accept this anyway. So for GROUP BY, we look first 549 // to see if the identifier matches any FROM column name, and only 550 // try for a targetlist name if it doesn't. This ensures that we 551 // adhere to the spec in the case where the name could be both. 552 // DISTINCT ON isn't in the standard, so we can do what we like there; 553 // we choose to make it work like ORDER BY, on the rather flimsy 554 // grounds that ordinary DISTINCT works on targetlist entries. 555 // 556 // 2. IntegerConstant 557 // This means to use the n'th item in the existing target list. 558 // Note that it would make no sense to order/group/distinct by an 559 // actual constant, so this does not create a conflict with SQL99. 560 // GROUP BY column-number is not allowed by SQL92, but since 561 // the standard has no other behavior defined for this syntax, 562 // we may as well accept this common extension. 563 564 // This function sets groupBy and alias in these special cases. 565 func() { 566 // Check whether the GROUP BY clause refers to a column in the SELECT list 567 // by index, e.g. `SELECT a, SUM(b) FROM y GROUP BY 1` (case 2 above). 568 if col := colIndex(len(selects), groupBy, "GROUP BY"); col != -1 { 569 groupBy, alias = selects[col].Expr, string(selects[col].As) 570 return 571 } 572 573 if name, ok := groupBy.(*tree.UnresolvedName); ok { 574 if name.NumParts != 1 || name.Star { 575 return 576 } 577 // Case 1 above. 578 targetName := name.Parts[0] 579 580 // We must prefer a match against a FROM-clause column (but ignore upper 581 // scopes); in this case we let the general case below handle the reference. 582 for i := range fromScope.cols { 583 if string(fromScope.cols[i].name) == targetName { 584 return 585 } 586 } 587 // See if it matches exactly one of the target lists. 588 var match *scopeColumn 589 for i := range projectionsScope.cols { 590 if col := &projectionsScope.cols[i]; string(col.name) == targetName { 591 if match != nil { 592 // Multiple matches are only allowed if they refer to identical 593 // expressions. 594 if match.getExprStr() != col.getExprStr() { 595 panic(pgerror.Newf(pgcode.AmbiguousColumn, "GROUP BY %q is ambiguous", targetName)) 596 } 597 } 598 match = col 599 } 600 } 601 if match != nil { 602 groupBy, alias = match.expr, targetName 603 } 604 } 605 }() 606 607 // We need to save and restore the previous value of the field in semaCtx 608 // in case we are recursively called within a subquery context. 609 defer b.semaCtx.Properties.Restore(b.semaCtx.Properties) 610 611 // Make sure the GROUP BY columns have no special functions. 612 b.semaCtx.Properties.Require(exprKindGroupBy.String(), tree.RejectSpecial) 613 fromScope.context = exprKindGroupBy 614 615 // Resolve types, expand stars, and flatten tuples. 616 exprs := b.expandStarAndResolveType(groupBy, fromScope) 617 exprs = flattenTuples(exprs) 618 619 // Finally, build each of the GROUP BY columns. 620 for _, e := range exprs { 621 // If a grouping column has already been added, don't add it again. 622 // GROUP BY a, a is semantically equivalent to GROUP BY a. 623 exprStr := symbolicExprStr(e) 624 if _, ok := fromScope.groupby.groupStrs[exprStr]; ok { 625 continue 626 } 627 628 // Save a representation of the GROUP BY expression for validation of the 629 // SELECT and HAVING expressions. This enables queries such as: 630 // SELECT x+y FROM t GROUP BY x+y 631 col := b.addColumn(aggInScope, alias, e) 632 b.buildScalar(e, fromScope, aggInScope, col, nil) 633 fromScope.groupby.groupStrs[exprStr] = col 634 } 635 } 636 637 // buildAggArg builds a scalar expression which is used as an input in some form 638 // to an aggregate expression. The scopeColumn for the built expression will 639 // be added to tempScope. 640 func (b *Builder) buildAggArg( 641 e tree.TypedExpr, info *aggregateInfo, tempScope, fromScope *scope, 642 ) opt.ScalarExpr { 643 // This synthesizes a new tempScope column, unless the argument is a 644 // simple VariableOp. 645 col := b.addColumn(tempScope, "" /* alias */, e) 646 b.buildScalar(e, fromScope, tempScope, col, &info.colRefs) 647 if col.scalar != nil { 648 return col.scalar 649 } 650 return b.factory.ConstructVariable(col.id) 651 } 652 653 // translateAggName translates the aggregate name if needed. This is used 654 // to override the output column name of an aggregation. 655 // See isOrderedSetAggregate. 656 func translateAggName(name string) string { 657 switch name { 658 case "percentile_disc_impl": 659 return "percentile_disc" 660 case "percentile_cont_impl": 661 return "percentile_cont" 662 } 663 return name 664 } 665 666 // buildAggregateFunction is called when we are building a function which is an 667 // aggregate. Any non-trivial parameters (i.e. not column reference) to the 668 // aggregate function are extracted and added to aggInScope. The aggregate 669 // function expression itself is added to aggOutScope. For example: 670 // 671 // SELECT SUM(x+1) FROM xy 672 // => 673 // aggInScope : x+1 AS column1 674 // aggOutScope: SUM(column1) 675 // 676 // buildAggregateFunction returns a pointer to the aggregateInfo containing 677 // the function definition, fully built arguments, and the aggregate output 678 // column. 679 func (b *Builder) buildAggregateFunction( 680 f *tree.FuncExpr, def *memo.FunctionPrivate, fromScope *scope, 681 ) *aggregateInfo { 682 tempScope := fromScope.startAggFunc() 683 tempScopeColsBefore := len(tempScope.cols) 684 685 info := aggregateInfo{ 686 FuncExpr: f, 687 def: *def, 688 distinct: (f.Type == tree.DistinctFuncType), 689 args: make(memo.ScalarListExpr, len(f.Exprs)), 690 } 691 692 // Temporarily set b.subquery to nil so we don't add outer columns to the 693 // wrong scope. 694 subq := b.subquery 695 b.subquery = nil 696 defer func() { b.subquery = subq }() 697 698 for i, pexpr := range f.Exprs { 699 info.args[i] = b.buildAggArg(pexpr.(tree.TypedExpr), &info, tempScope, fromScope) 700 } 701 702 // If we have a filter, add it to tempScope after all the arguments. We'll 703 // later extract the column that gets added here in buildAggregation. 704 if f.Filter != nil { 705 info.filter = b.buildAggArg(f.Filter.(tree.TypedExpr), &info, tempScope, fromScope) 706 } 707 708 // If we have ORDER BY, add the ordering columns to the tempScope. 709 if f.OrderBy != nil { 710 for _, o := range f.OrderBy { 711 b.buildAggArg(o.Expr.(tree.TypedExpr), &info, tempScope, fromScope) 712 } 713 } 714 715 // Find the appropriate aggregation scopes for this aggregate now that we 716 // know which columns it references. If necessary, we'll move the columns 717 // for the arguments from tempScope to aggInScope below. 718 g := fromScope.endAggFunc(info.colRefs) 719 720 // If we already have the same aggregation, reuse it. Otherwise add it 721 // to the list of aggregates that need to be computed by the groupby 722 // expression and synthesize a column for the aggregation result. 723 info.col = g.findAggregate(info) 724 if info.col == nil { 725 // Translate function name if needed. 726 funcName := translateAggName(def.Name) 727 728 // Use 0 as the group for now; it will be filled in later by the 729 // buildAggregation method. 730 info.col = b.synthesizeColumn(g.aggOutScope, funcName, f.ResolvedType(), f, nil /* scalar */) 731 732 // Move the columns for the aggregate input expressions to the correct scope. 733 if g.aggInScope != tempScope { 734 g.aggInScope.cols = append(g.aggInScope.cols, tempScope.cols[tempScopeColsBefore:]...) 735 tempScope.cols = tempScope.cols[:tempScopeColsBefore] 736 } 737 738 // Add the aggregate to the list of aggregates that need to be computed by 739 // the groupby expression. 740 g.aggs = append(g.aggs, info) 741 } else { 742 // Undo the adding of the args. 743 // TODO(radu): is there a cleaner way to do this? 744 tempScope.cols = tempScope.cols[:tempScopeColsBefore] 745 } 746 747 return &info 748 } 749 750 func (b *Builder) constructWindowFn(name string, args []opt.ScalarExpr) opt.ScalarExpr { 751 switch name { 752 case "rank": 753 return b.factory.ConstructRank() 754 case "row_number": 755 return b.factory.ConstructRowNumber() 756 case "dense_rank": 757 return b.factory.ConstructDenseRank() 758 case "percent_rank": 759 return b.factory.ConstructPercentRank() 760 case "cume_dist": 761 return b.factory.ConstructCumeDist() 762 case "ntile": 763 return b.factory.ConstructNtile(args[0]) 764 case "lag": 765 return b.factory.ConstructLag(args[0], args[1], args[2]) 766 case "lead": 767 return b.factory.ConstructLead(args[0], args[1], args[2]) 768 case "first_value": 769 return b.factory.ConstructFirstValue(args[0]) 770 case "last_value": 771 return b.factory.ConstructLastValue(args[0]) 772 case "nth_value": 773 return b.factory.ConstructNthValue(args[0], args[1]) 774 default: 775 return b.constructAggregate(name, args) 776 } 777 } 778 779 func (b *Builder) constructAggregate(name string, args []opt.ScalarExpr) opt.ScalarExpr { 780 switch name { 781 case "array_agg": 782 return b.factory.ConstructArrayAgg(args[0]) 783 case "avg": 784 return b.factory.ConstructAvg(args[0]) 785 case "bit_and": 786 return b.factory.ConstructBitAndAgg(args[0]) 787 case "bit_or": 788 return b.factory.ConstructBitOrAgg(args[0]) 789 case "bool_and", "every": 790 return b.factory.ConstructBoolAnd(args[0]) 791 case "bool_or": 792 return b.factory.ConstructBoolOr(args[0]) 793 case "concat_agg": 794 return b.factory.ConstructConcatAgg(args[0]) 795 case "corr": 796 return b.factory.ConstructCorr(args[0], args[1]) 797 case "count": 798 return b.factory.ConstructCount(args[0]) 799 case "count_rows": 800 return b.factory.ConstructCountRows() 801 case "max": 802 return b.factory.ConstructMax(args[0]) 803 case "min": 804 return b.factory.ConstructMin(args[0]) 805 case "sum_int": 806 return b.factory.ConstructSumInt(args[0]) 807 case "sum": 808 return b.factory.ConstructSum(args[0]) 809 case "sqrdiff": 810 return b.factory.ConstructSqrDiff(args[0]) 811 case "variance", "var_samp": 812 return b.factory.ConstructVariance(args[0]) 813 case "stddev", "stddev_samp": 814 return b.factory.ConstructStdDev(args[0]) 815 case "xor_agg": 816 return b.factory.ConstructXorAgg(args[0]) 817 case "json_agg": 818 return b.factory.ConstructJsonAgg(args[0]) 819 case "jsonb_agg": 820 return b.factory.ConstructJsonbAgg(args[0]) 821 case "string_agg": 822 return b.factory.ConstructStringAgg(args[0], args[1]) 823 case "percentile_disc_impl": 824 return b.factory.ConstructPercentileDisc(args[0], args[1]) 825 case "percentile_cont_impl": 826 return b.factory.ConstructPercentileCont(args[0], args[1]) 827 } 828 panic(errors.AssertionFailedf("unhandled aggregate: %s", name)) 829 } 830 831 func isAggregate(def *tree.FunctionDefinition) bool { 832 return def.Class == tree.AggregateClass 833 } 834 835 func isWindow(def *tree.FunctionDefinition) bool { 836 return def.Class == tree.WindowClass 837 } 838 839 func isGenerator(def *tree.FunctionDefinition) bool { 840 return def.Class == tree.GeneratorClass 841 } 842 843 func isSQLFn(def *tree.FunctionDefinition) bool { 844 return def.Class == tree.SQLClass 845 } 846 847 func newGroupingError(name *tree.Name) error { 848 return pgerror.Newf(pgcode.Grouping, 849 "column \"%s\" must appear in the GROUP BY clause or be used in an aggregate function", 850 tree.ErrString(name), 851 ) 852 } 853 854 // allowImplicitGroupingColumn returns true if col is part of a table and the 855 // the groupby metadata indicates that we are grouping on the entire PK of that 856 // table. In that case, we can allow col as an "implicit" grouping column, even 857 // if it is not specified in the query. 858 func (b *Builder) allowImplicitGroupingColumn(colID opt.ColumnID, g *groupby) bool { 859 md := b.factory.Metadata() 860 colMeta := md.ColumnMeta(colID) 861 if colMeta.Table == 0 { 862 return false 863 } 864 // Get all the PK columns. 865 tab := md.Table(colMeta.Table) 866 var pkCols opt.ColSet 867 if tab.IndexCount() == 0 { 868 // Virtual tables have no indexes. 869 return false 870 } 871 primaryIndex := tab.Index(cat.PrimaryIndex) 872 for i := 0; i < primaryIndex.KeyColumnCount(); i++ { 873 pkCols.Add(colMeta.Table.ColumnID(primaryIndex.Column(i).Ordinal)) 874 } 875 // Remove PK columns that are grouping cols and see if there's anything left. 876 groupingCols := g.groupingCols() 877 for i := range groupingCols { 878 pkCols.Remove(groupingCols[i].id) 879 } 880 return pkCols.Empty() 881 }