github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/distinct.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package optbuilder
    12  
    13  import (
    14  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    15  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    16  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    19  )
    20  
    21  // constructDistinct wraps inScope.group in a DistinctOn operator corresponding
    22  // to a SELECT DISTINCT statement.
    23  func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr {
    24  	// We are doing a distinct along all the projected columns.
    25  	var private memo.GroupingPrivate
    26  	for i := range inScope.cols {
    27  		if !inScope.cols[i].hidden {
    28  			private.GroupingCols.Add(inScope.cols[i].id)
    29  		}
    30  	}
    31  
    32  	// Check that the ordering only refers to projected columns.
    33  	// This will cause an error for queries like:
    34  	//   SELECT DISTINCT a FROM t ORDER BY b
    35  	// Note: this behavior is consistent with PostgreSQL.
    36  	for _, col := range inScope.ordering {
    37  		if !private.GroupingCols.Contains(col.ID()) {
    38  			panic(pgerror.Newf(
    39  				pgcode.InvalidColumnReference,
    40  				"for SELECT DISTINCT, ORDER BY expressions must appear in select list",
    41  			))
    42  		}
    43  	}
    44  
    45  	// We don't set def.Ordering. Because the ordering can only refer to projected
    46  	// columns, it does not affect the results; it doesn't need to be required of
    47  	// the DistinctOn input.
    48  	input := inScope.expr.(memo.RelExpr)
    49  	return b.factory.ConstructDistinctOn(input, memo.EmptyAggregationsExpr, &private)
    50  }
    51  
    52  // buildDistinctOn builds a set of memo groups that represent a DISTINCT ON
    53  // expression. If nullsAreDistinct is true, then construct the UpsertDistinctOn
    54  // operator rather than the DistinctOn operator (see the UpsertDistinctOn
    55  // operator comment for details on the differences). The errorOnDup parameter
    56  // controls whether multiple rows in the same distinct group trigger an error.
    57  // This can only take on a value in the EnsureDistinctOn and
    58  // EnsureUpsertDistinctOn cases.
    59  func (b *Builder) buildDistinctOn(
    60  	distinctOnCols opt.ColSet, inScope *scope, nullsAreDistinct bool, errorOnDup string,
    61  ) (outScope *scope) {
    62  	// When there is a DISTINCT ON clause, the ORDER BY clause is restricted to either:
    63  	//  1. Contain a subset of columns from the ON list, or
    64  	//  2. Start with a permutation of all columns from the ON list.
    65  	//
    66  	// In case 1, the ORDER BY simply specifies an output ordering as usual.
    67  	// Example:
    68  	//   SELECT DISTINCT ON (a, b) c, d FROM t ORDER BY a, b
    69  	//
    70  	// In case 2, the ORDER BY columns serve two separate semantic purposes:
    71  	//  - the prefix that contains the ON columns specifies an output ordering;
    72  	//  - the rest of the columns affect the actual results of the query: for each
    73  	//    set of values, the chosen row is the first according to that ordering.
    74  	// Example:
    75  	//   SELECT DISTINCT ON (a) b, c FROM t ORDER BY a, e
    76  	//   This means: for each value of a, choose the (b, c) from the row with the
    77  	//   smallest e value, and order these results by a.
    78  	//
    79  	// Note: this behavior is consistent with PostgreSQL.
    80  
    81  	// Check that the DISTINCT ON expressions match the initial ORDER BY
    82  	// expressions.
    83  	var seen opt.ColSet
    84  	for _, col := range inScope.ordering {
    85  		if !distinctOnCols.Contains(col.ID()) {
    86  			panic(pgerror.Newf(
    87  				pgcode.InvalidColumnReference,
    88  				"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
    89  			))
    90  		}
    91  		seen.Add(col.ID())
    92  		if seen.Equals(distinctOnCols) {
    93  			// All DISTINCT ON columns showed up; other columns are allowed in the
    94  			// rest of the ORDER BY (case 2 above).
    95  			break
    96  		}
    97  	}
    98  
    99  	private := memo.GroupingPrivate{GroupingCols: distinctOnCols.Copy(),
   100  		NullsAreDistinct: nullsAreDistinct, ErrorOnDup: errorOnDup}
   101  
   102  	// The ordering is used for intra-group ordering. Ordering with respect to the
   103  	// DISTINCT ON columns doesn't affect intra-group ordering, so we add these
   104  	// columns as optional.
   105  	private.Ordering.FromOrderingWithOptCols(inScope.ordering, distinctOnCols)
   106  
   107  	// Set up a new scope for the output of DISTINCT ON. This scope differs from
   108  	// the input scope in that it doesn't have "extra" ORDER BY columns, e.g.
   109  	// column e in case 2 example:
   110  	//   SELECT DISTINCT ON (a) b, c FROM t ORDER BY a, e
   111  	//
   112  	//             +-------------------------+
   113  	//             |   inScope  |  outScope  |
   114  	// +-----------+------------+------------+
   115  	// |   cols    |    b, c    |    b, c    |
   116  	// | extraCols |    a, e    |     a      |
   117  	// | ordering  |   a+, e+   |     a+     |
   118  	// +-----------+------------+------------+
   119  	outScope = inScope.replace()
   120  	outScope.cols = make([]scopeColumn, 0, len(inScope.cols))
   121  	// Add the output columns.
   122  	for i := range inScope.cols {
   123  		outScope.cols = append(outScope.cols, inScope.cols[i])
   124  	}
   125  
   126  	// Add any extra ON columns.
   127  	outScope.extraCols = make([]scopeColumn, 0, len(inScope.extraCols))
   128  	for i := range inScope.extraCols {
   129  		if distinctOnCols.Contains(inScope.extraCols[i].id) {
   130  			outScope.extraCols = append(outScope.extraCols, inScope.extraCols[i])
   131  		}
   132  	}
   133  
   134  	// Retain the prefix of the ordering that refers to the ON columns.
   135  	outScope.ordering = inScope.ordering
   136  	for i, col := range inScope.ordering {
   137  		if !distinctOnCols.Contains(col.ID()) {
   138  			outScope.ordering = outScope.ordering[:i]
   139  			break
   140  		}
   141  	}
   142  
   143  	aggs := make(memo.AggregationsExpr, 0, len(inScope.cols))
   144  
   145  	// Build FirstAgg for all visible columns except the DistinctOnCols
   146  	// (and eliminate duplicates).
   147  	excluded := distinctOnCols.Copy()
   148  	for i := range outScope.cols {
   149  		if id := outScope.cols[i].id; !excluded.Contains(id) {
   150  			excluded.Add(id)
   151  			aggs = append(aggs, b.factory.ConstructAggregationsItem(
   152  				b.factory.ConstructFirstAgg(b.factory.ConstructVariable(id)),
   153  				id,
   154  			))
   155  		}
   156  	}
   157  
   158  	input := inScope.expr.(memo.RelExpr)
   159  	if nullsAreDistinct {
   160  		if errorOnDup == "" {
   161  			outScope.expr = b.factory.ConstructUpsertDistinctOn(input, aggs, &private)
   162  		} else {
   163  			outScope.expr = b.factory.ConstructEnsureUpsertDistinctOn(input, aggs, &private)
   164  		}
   165  	} else {
   166  		if errorOnDup == "" {
   167  			outScope.expr = b.factory.ConstructDistinctOn(input, aggs, &private)
   168  		} else {
   169  			outScope.expr = b.factory.ConstructEnsureDistinctOn(input, aggs, &private)
   170  		}
   171  	}
   172  	return outScope
   173  }
   174  
   175  // analyzeDistinctOnArgs analyzes the DISTINCT ON columns and adds the
   176  // resulting typed expressions to distinctOnScope.
   177  func (b *Builder) analyzeDistinctOnArgs(
   178  	distinctOn tree.DistinctOn, inScope, projectionsScope *scope,
   179  ) (distinctOnScope *scope) {
   180  	if len(distinctOn) == 0 {
   181  		return nil
   182  	}
   183  
   184  	distinctOnScope = inScope.push()
   185  	distinctOnScope.cols = make([]scopeColumn, 0, len(distinctOn))
   186  
   187  	// We need to save and restore the previous value of the field in
   188  	// semaCtx in case we are recursively called within a subquery
   189  	// context.
   190  	defer b.semaCtx.Properties.Restore(b.semaCtx.Properties)
   191  	b.semaCtx.Properties.Require(exprKindDistinctOn.String(), tree.RejectGenerators)
   192  	inScope.context = exprKindDistinctOn
   193  
   194  	for i := range distinctOn {
   195  		b.analyzeExtraArgument(distinctOn[i], inScope, projectionsScope, distinctOnScope)
   196  	}
   197  	return distinctOnScope
   198  }
   199  
   200  // buildDistinctOnArgs builds the DISTINCT ON columns, adding to
   201  // projectionsScope.extraCols as necessary.
   202  // The set of DISTINCT ON columns is stored in projectionsScope.distinctOnCols.
   203  func (b *Builder) buildDistinctOnArgs(inScope, projectionsScope, distinctOnScope *scope) {
   204  	if distinctOnScope == nil {
   205  		return
   206  	}
   207  
   208  	for i := range distinctOnScope.cols {
   209  		b.addOrderByOrDistinctOnColumn(
   210  			inScope, projectionsScope, distinctOnScope, &distinctOnScope.cols[i],
   211  		)
   212  	}
   213  	projectionsScope.addExtraColumns(distinctOnScope.cols)
   214  	projectionsScope.distinctOnCols = distinctOnScope.colSet()
   215  }