github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/distinct.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package optbuilder 12 13 import ( 14 "github.com/cockroachdb/cockroach/pkg/sql/opt" 15 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 16 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 17 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 18 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 19 ) 20 21 // constructDistinct wraps inScope.group in a DistinctOn operator corresponding 22 // to a SELECT DISTINCT statement. 23 func (b *Builder) constructDistinct(inScope *scope) memo.RelExpr { 24 // We are doing a distinct along all the projected columns. 25 var private memo.GroupingPrivate 26 for i := range inScope.cols { 27 if !inScope.cols[i].hidden { 28 private.GroupingCols.Add(inScope.cols[i].id) 29 } 30 } 31 32 // Check that the ordering only refers to projected columns. 33 // This will cause an error for queries like: 34 // SELECT DISTINCT a FROM t ORDER BY b 35 // Note: this behavior is consistent with PostgreSQL. 36 for _, col := range inScope.ordering { 37 if !private.GroupingCols.Contains(col.ID()) { 38 panic(pgerror.Newf( 39 pgcode.InvalidColumnReference, 40 "for SELECT DISTINCT, ORDER BY expressions must appear in select list", 41 )) 42 } 43 } 44 45 // We don't set def.Ordering. Because the ordering can only refer to projected 46 // columns, it does not affect the results; it doesn't need to be required of 47 // the DistinctOn input. 48 input := inScope.expr.(memo.RelExpr) 49 return b.factory.ConstructDistinctOn(input, memo.EmptyAggregationsExpr, &private) 50 } 51 52 // buildDistinctOn builds a set of memo groups that represent a DISTINCT ON 53 // expression. If nullsAreDistinct is true, then construct the UpsertDistinctOn 54 // operator rather than the DistinctOn operator (see the UpsertDistinctOn 55 // operator comment for details on the differences). The errorOnDup parameter 56 // controls whether multiple rows in the same distinct group trigger an error. 57 // This can only take on a value in the EnsureDistinctOn and 58 // EnsureUpsertDistinctOn cases. 59 func (b *Builder) buildDistinctOn( 60 distinctOnCols opt.ColSet, inScope *scope, nullsAreDistinct bool, errorOnDup string, 61 ) (outScope *scope) { 62 // When there is a DISTINCT ON clause, the ORDER BY clause is restricted to either: 63 // 1. Contain a subset of columns from the ON list, or 64 // 2. Start with a permutation of all columns from the ON list. 65 // 66 // In case 1, the ORDER BY simply specifies an output ordering as usual. 67 // Example: 68 // SELECT DISTINCT ON (a, b) c, d FROM t ORDER BY a, b 69 // 70 // In case 2, the ORDER BY columns serve two separate semantic purposes: 71 // - the prefix that contains the ON columns specifies an output ordering; 72 // - the rest of the columns affect the actual results of the query: for each 73 // set of values, the chosen row is the first according to that ordering. 74 // Example: 75 // SELECT DISTINCT ON (a) b, c FROM t ORDER BY a, e 76 // This means: for each value of a, choose the (b, c) from the row with the 77 // smallest e value, and order these results by a. 78 // 79 // Note: this behavior is consistent with PostgreSQL. 80 81 // Check that the DISTINCT ON expressions match the initial ORDER BY 82 // expressions. 83 var seen opt.ColSet 84 for _, col := range inScope.ordering { 85 if !distinctOnCols.Contains(col.ID()) { 86 panic(pgerror.Newf( 87 pgcode.InvalidColumnReference, 88 "SELECT DISTINCT ON expressions must match initial ORDER BY expressions", 89 )) 90 } 91 seen.Add(col.ID()) 92 if seen.Equals(distinctOnCols) { 93 // All DISTINCT ON columns showed up; other columns are allowed in the 94 // rest of the ORDER BY (case 2 above). 95 break 96 } 97 } 98 99 private := memo.GroupingPrivate{GroupingCols: distinctOnCols.Copy(), 100 NullsAreDistinct: nullsAreDistinct, ErrorOnDup: errorOnDup} 101 102 // The ordering is used for intra-group ordering. Ordering with respect to the 103 // DISTINCT ON columns doesn't affect intra-group ordering, so we add these 104 // columns as optional. 105 private.Ordering.FromOrderingWithOptCols(inScope.ordering, distinctOnCols) 106 107 // Set up a new scope for the output of DISTINCT ON. This scope differs from 108 // the input scope in that it doesn't have "extra" ORDER BY columns, e.g. 109 // column e in case 2 example: 110 // SELECT DISTINCT ON (a) b, c FROM t ORDER BY a, e 111 // 112 // +-------------------------+ 113 // | inScope | outScope | 114 // +-----------+------------+------------+ 115 // | cols | b, c | b, c | 116 // | extraCols | a, e | a | 117 // | ordering | a+, e+ | a+ | 118 // +-----------+------------+------------+ 119 outScope = inScope.replace() 120 outScope.cols = make([]scopeColumn, 0, len(inScope.cols)) 121 // Add the output columns. 122 for i := range inScope.cols { 123 outScope.cols = append(outScope.cols, inScope.cols[i]) 124 } 125 126 // Add any extra ON columns. 127 outScope.extraCols = make([]scopeColumn, 0, len(inScope.extraCols)) 128 for i := range inScope.extraCols { 129 if distinctOnCols.Contains(inScope.extraCols[i].id) { 130 outScope.extraCols = append(outScope.extraCols, inScope.extraCols[i]) 131 } 132 } 133 134 // Retain the prefix of the ordering that refers to the ON columns. 135 outScope.ordering = inScope.ordering 136 for i, col := range inScope.ordering { 137 if !distinctOnCols.Contains(col.ID()) { 138 outScope.ordering = outScope.ordering[:i] 139 break 140 } 141 } 142 143 aggs := make(memo.AggregationsExpr, 0, len(inScope.cols)) 144 145 // Build FirstAgg for all visible columns except the DistinctOnCols 146 // (and eliminate duplicates). 147 excluded := distinctOnCols.Copy() 148 for i := range outScope.cols { 149 if id := outScope.cols[i].id; !excluded.Contains(id) { 150 excluded.Add(id) 151 aggs = append(aggs, b.factory.ConstructAggregationsItem( 152 b.factory.ConstructFirstAgg(b.factory.ConstructVariable(id)), 153 id, 154 )) 155 } 156 } 157 158 input := inScope.expr.(memo.RelExpr) 159 if nullsAreDistinct { 160 if errorOnDup == "" { 161 outScope.expr = b.factory.ConstructUpsertDistinctOn(input, aggs, &private) 162 } else { 163 outScope.expr = b.factory.ConstructEnsureUpsertDistinctOn(input, aggs, &private) 164 } 165 } else { 166 if errorOnDup == "" { 167 outScope.expr = b.factory.ConstructDistinctOn(input, aggs, &private) 168 } else { 169 outScope.expr = b.factory.ConstructEnsureDistinctOn(input, aggs, &private) 170 } 171 } 172 return outScope 173 } 174 175 // analyzeDistinctOnArgs analyzes the DISTINCT ON columns and adds the 176 // resulting typed expressions to distinctOnScope. 177 func (b *Builder) analyzeDistinctOnArgs( 178 distinctOn tree.DistinctOn, inScope, projectionsScope *scope, 179 ) (distinctOnScope *scope) { 180 if len(distinctOn) == 0 { 181 return nil 182 } 183 184 distinctOnScope = inScope.push() 185 distinctOnScope.cols = make([]scopeColumn, 0, len(distinctOn)) 186 187 // We need to save and restore the previous value of the field in 188 // semaCtx in case we are recursively called within a subquery 189 // context. 190 defer b.semaCtx.Properties.Restore(b.semaCtx.Properties) 191 b.semaCtx.Properties.Require(exprKindDistinctOn.String(), tree.RejectGenerators) 192 inScope.context = exprKindDistinctOn 193 194 for i := range distinctOn { 195 b.analyzeExtraArgument(distinctOn[i], inScope, projectionsScope, distinctOnScope) 196 } 197 return distinctOnScope 198 } 199 200 // buildDistinctOnArgs builds the DISTINCT ON columns, adding to 201 // projectionsScope.extraCols as necessary. 202 // The set of DISTINCT ON columns is stored in projectionsScope.distinctOnCols. 203 func (b *Builder) buildDistinctOnArgs(inScope, projectionsScope, distinctOnScope *scope) { 204 if distinctOnScope == nil { 205 return 206 } 207 208 for i := range distinctOnScope.cols { 209 b.addOrderByOrDistinctOnColumn( 210 inScope, projectionsScope, distinctOnScope, &distinctOnScope.cols[i], 211 ) 212 } 213 projectionsScope.addExtraColumns(distinctOnScope.cols) 214 projectionsScope.distinctOnCols = distinctOnScope.colSet() 215 }