github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/subquery.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package optbuilder 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 17 "github.com/cockroachdb/cockroach/pkg/sql/opt" 18 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 19 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 20 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 21 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 22 "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" 23 "github.com/cockroachdb/cockroach/pkg/sql/types" 24 "github.com/cockroachdb/errors" 25 ) 26 27 const multiRowSubqueryErrText = "more than one row returned by a subquery used as an expression" 28 29 // subquery represents a subquery expression in an expression tree 30 // after it has been type-checked and added to the memo. 31 type subquery struct { 32 // Subquery is the AST Subquery expression. 33 *tree.Subquery 34 35 // cols contains the output columns of the subquery. 36 cols []scopeColumn 37 38 // node is the top level memo node of the subquery. 39 node memo.RelExpr 40 41 // ordering is the ordering requested by the subquery. 42 // It is only consulted in certain cases, however (such as the 43 // ArrayFlatten operation). 44 ordering opt.Ordering 45 46 // wrapInTuple is true if the subquery return type should be wrapped in a 47 // tuple. This is true for subqueries that may return multiple rows in 48 // comparison expressions (e.g., IN, ANY, ALL) and EXISTS expressions. 49 wrapInTuple bool 50 51 // typ is the lazily resolved type of the subquery. 52 typ *types.T 53 54 // outerCols stores the set of outer columns in the subquery. These are 55 // columns which are referenced within the subquery but are bound in an 56 // outer scope. 57 outerCols opt.ColSet 58 59 // desiredNumColumns specifies the desired number of columns for the subquery. 60 // Specifying -1 for desiredNumColumns allows the subquery to return any 61 // number of columns and is used when the normal type checking machinery will 62 // verify that the correct number of columns is returned. 63 desiredNumColumns int 64 65 // extraColsAllowed indicates that extra columns built from the subquery 66 // (such as columns for which orderings have been requested) will not be 67 // stripped away. 68 extraColsAllowed bool 69 70 // scope is the input scope of the subquery. It is needed to lazily build 71 // the subquery in TypeCheck. 72 scope *scope 73 } 74 75 // isMultiRow returns whether the subquery can return multiple rows. 76 func (s *subquery) isMultiRow() bool { 77 return s.wrapInTuple && !s.Exists 78 } 79 80 // Walk is part of the tree.Expr interface. 81 func (s *subquery) Walk(v tree.Visitor) tree.Expr { 82 return s 83 } 84 85 // TypeCheck is part of the tree.Expr interface. 86 func (s *subquery) TypeCheck( 87 _ context.Context, _ *tree.SemaContext, desired *types.T, 88 ) (tree.TypedExpr, error) { 89 if s.typ != nil { 90 return s, nil 91 } 92 93 // Convert desired to an array of desired types for building the subquery. 94 desiredTypes := desired.TupleContents() 95 96 // Build the subquery. We cannot build the subquery earlier because we do 97 // not know the desired types until TypeCheck is called. 98 s.buildSubquery(desiredTypes) 99 100 // The typing for subqueries is complex, but regular. 101 // 102 // * If the subquery is part of an EXISTS statement: 103 // 104 // The type of the subquery is always "bool". 105 // 106 // * If the subquery is used in a single-row context: 107 // 108 // - If the subquery returns a single column with type "U", the type of the 109 // subquery is the type of the column "U". For example: 110 // 111 // SELECT 1 = (SELECT 1) 112 // 113 // The type of the subquery is "int". 114 // 115 // - If the subquery returns multiple columns, the type of the subquery is 116 // "tuple{C}" where "C" expands to all of the types of the columns of the 117 // subquery. For example: 118 // 119 // SELECT (1, 'a') = (SELECT 1, 'a') 120 // 121 // The type of the subquery is "tuple{int,string}" 122 // 123 // * If the subquery is used in a multi-row context: 124 // 125 // - If the subquery returns a single column with type "U", the type of the 126 // subquery is the singleton tuple of type "U": "tuple{U}". For example: 127 // 128 // SELECT 1 IN (SELECT 1) 129 // 130 // The type of the subquery's columns is "int" and the type of the 131 // subquery is "tuple{int}". 132 // 133 // - If the subquery returns multiple columns, the type of the subquery is 134 // "tuple{tuple{C}}" where "C expands to all of the types of the columns 135 // of the subquery. For example: 136 // 137 // SELECT (1, 'a') IN (SELECT 1, 'a') 138 // 139 // The types of the subquery's columns are "int" and "string". These are 140 // wrapped into "tuple{int,string}" to form the row type. And these are 141 // wrapped again to form the subquery type "tuple{tuple{int,string}}". 142 // 143 // Note that these rules produce a somewhat surprising equivalence: 144 // 145 // SELECT (SELECT 1, 2) = (SELECT (1, 2)) 146 // 147 // A subquery which returns a single column tuple is equivalent to a subquery 148 // which returns the elements of the tuple as individual columns. While 149 // surprising, this is necessary for regularity and in order to handle: 150 // 151 // SELECT 1 IN (SELECT 1) 152 // 153 // Without that auto-unwrapping of single-column subqueries, this query would 154 // type check as "<int> IN <tuple{tuple{int}}>" which would fail. 155 156 if s.Exists { 157 s.typ = types.Bool 158 return s, nil 159 } 160 161 if len(s.cols) == 1 { 162 s.typ = s.cols[0].typ 163 } else { 164 contents := make([]*types.T, len(s.cols)) 165 labels := make([]string, len(s.cols)) 166 for i := range s.cols { 167 contents[i] = s.cols[i].typ 168 labels[i] = string(s.cols[i].name) 169 } 170 s.typ = types.MakeLabeledTuple(contents, labels) 171 } 172 173 if s.wrapInTuple { 174 // The subquery is in a multi-row context. For example: 175 // 176 // SELECT 1 IN (SELECT * FROM t) 177 // 178 // Wrap the type in a tuple. 179 // 180 // TODO(peter): Using a tuple type to represent a multi-row 181 // subquery works with the current type checking code, but seems 182 // semantically incorrect. A tuple represents a fixed number of 183 // elements. Instead, we should introduce a new vtuple type. 184 s.typ = types.MakeTuple([]*types.T{s.typ}) 185 } 186 187 return s, nil 188 } 189 190 // ResolvedType is part of the tree.TypedExpr interface. 191 func (s *subquery) ResolvedType() *types.T { 192 return s.typ 193 } 194 195 // Eval is part of the tree.TypedExpr interface. 196 func (s *subquery) Eval(_ *tree.EvalContext) (tree.Datum, error) { 197 panic(errors.AssertionFailedf("subquery must be replaced before evaluation")) 198 } 199 200 // buildSubquery builds a relational expression that represents this subquery. 201 // It stores the resulting relational expression in s.node, and also updates 202 // s.cols and s.ordering with the output columns and ordering of the subquery. 203 func (s *subquery) buildSubquery(desiredTypes []*types.T) { 204 if s.scope.replaceSRFs { 205 // We need to save and restore the previous value of the replaceSRFs field in 206 // case we are recursively called within a subquery context. 207 defer func() { s.scope.replaceSRFs = true }() 208 s.scope.replaceSRFs = false 209 } 210 211 // Save and restore the previous value of s.builder.subquery in case we are 212 // recursively called within a subquery context. 213 outer := s.scope.builder.subquery 214 defer func() { s.scope.builder.subquery = outer }() 215 s.scope.builder.subquery = s 216 217 // We must push() here so that the columns in s.scope are correctly identified 218 // as outer columns. 219 outScope := s.scope.builder.buildStmt(s.Subquery.Select, desiredTypes, s.scope.push()) 220 ord := outScope.ordering 221 222 // Treat the subquery result as an anonymous data source (i.e. column names 223 // are not qualified). Remove hidden columns, as they are not accessible 224 // outside the subquery. 225 outScope.setTableAlias("") 226 outScope.removeHiddenCols() 227 228 if s.desiredNumColumns > 0 && len(outScope.cols) != s.desiredNumColumns { 229 n := len(outScope.cols) 230 switch s.desiredNumColumns { 231 case 1: 232 panic(pgerror.Newf(pgcode.Syntax, 233 "subquery must return only one column, found %d", n)) 234 default: 235 panic(pgerror.Newf(pgcode.Syntax, 236 "subquery must return %d columns, found %d", s.desiredNumColumns, n)) 237 } 238 } 239 240 if len(outScope.extraCols) > 0 && !s.extraColsAllowed { 241 // We need to add a projection to remove the extra columns. 242 projScope := outScope.push() 243 projScope.appendColumnsFromScope(outScope) 244 projScope.expr = s.scope.builder.constructProject(outScope.expr.(memo.RelExpr), projScope.cols) 245 outScope = projScope 246 } 247 248 s.cols = outScope.cols 249 s.node = outScope.expr.(memo.RelExpr) 250 s.ordering = ord 251 } 252 253 // buildSubqueryProjection ensures that a subquery returns exactly one column. 254 // If the original subquery has more than one column, buildSubqueryProjection 255 // wraps it in a projection which has a single tuple column containing all the 256 // original columns: tuple{col1, col2...}. 257 func (b *Builder) buildSubqueryProjection( 258 s *subquery, inScope *scope, 259 ) (out memo.RelExpr, outScope *scope) { 260 out = s.node 261 outScope = inScope.replace() 262 263 switch len(s.cols) { 264 case 0: 265 // This can be obtained with: 266 // CREATE TABLE t(x INT); ALTER TABLE t DROP COLUMN x; 267 // SELECT (SELECT * FROM t) = (SELECT * FROM t); 268 panic(pgerror.Newf(pgcode.Syntax, 269 "subquery must return only one column")) 270 271 case 1: 272 outScope.cols = append(outScope.cols, s.cols[0]) 273 274 default: 275 // Wrap the subquery in a projection with a single column. 276 // col1, col2... from the subquery becomes tuple{col1, col2...} in the 277 // projection. 278 cols := make(tree.Exprs, len(s.cols)) 279 els := make(memo.ScalarListExpr, len(s.cols)) 280 contents := make([]*types.T, len(s.cols)) 281 for i := range s.cols { 282 cols[i] = &s.cols[i] 283 contents[i] = s.cols[i].ResolvedType() 284 els[i] = b.factory.ConstructVariable(s.cols[i].id) 285 } 286 typ := types.MakeTuple(contents) 287 288 texpr := tree.NewTypedTuple(typ, cols) 289 tup := b.factory.ConstructTuple(els, typ) 290 col := b.synthesizeColumn(outScope, "", texpr.ResolvedType(), texpr, tup) 291 out = b.constructProject(out, []scopeColumn{*col}) 292 } 293 294 telemetry.Inc(sqltelemetry.SubqueryUseCounter) 295 296 return out, outScope 297 } 298 299 // buildSingleRowSubquery builds a set of memo groups that represent the given 300 // subquery. This function should only be called for subqueries in a single-row 301 // context, such as `SELECT (1, 'a') = (SELECT 1, 'a')`. 302 // 303 // See Builder.buildStmt for a description of the remaining input and 304 // return values. 305 func (b *Builder) buildSingleRowSubquery( 306 s *subquery, inScope *scope, 307 ) (out opt.ScalarExpr, outScope *scope) { 308 subqueryPrivate := memo.SubqueryPrivate{OriginalExpr: s.Subquery} 309 if s.Exists { 310 return b.factory.ConstructExists(s.node, &subqueryPrivate), inScope 311 } 312 313 var input memo.RelExpr 314 input, outScope = b.buildSubqueryProjection(s, inScope) 315 316 // Wrap the subquery in a Max1Row operator to enforce that it should return 317 // at most one row. Max1Row may be removed by the optimizer later if it can 318 // prove statically that the subquery always returns at most one row. 319 input = b.factory.ConstructMax1Row(input, multiRowSubqueryErrText) 320 321 out = b.factory.ConstructSubquery(input, &subqueryPrivate) 322 return out, outScope 323 } 324 325 // buildMultiRowSubquery transforms a multi-row subquery into a single-row 326 // subquery for IN, NOT IN, ANY, SOME and ALL expressions. It performs the 327 // transformation using the Any operator, which returns true if any of the 328 // values returned by the subquery are true, else returns NULL if any of the 329 // values are NULL, else returns false. 330 // 331 // We use the following transformations: 332 // 333 // <var> IN (<subquery>) 334 // ==> ConstructAny(<subquery>, <var>, EqOp) 335 // 336 // <var> NOT IN (<subquery>) 337 // ==> ConstructNot(ConstructAny(<subquery>, <var>, EqOp)) 338 // 339 // <var> <comp> {SOME|ANY}(<subquery>) 340 // ==> ConstructAny(<subquery>, <var>, <comp>) 341 // 342 // <var> <comp> ALL(<subquery>) 343 // ==> ConstructNot(ConstructAny(<subquery>, <var>, Negate(<comp>))) 344 // 345 func (b *Builder) buildMultiRowSubquery( 346 c *tree.ComparisonExpr, inScope *scope, colRefs *opt.ColSet, 347 ) (out opt.ScalarExpr, outScope *scope) { 348 var input memo.RelExpr 349 s := c.Right.(*subquery) 350 input, outScope = b.buildSubqueryProjection(s, inScope) 351 352 scalar := b.buildScalar(c.TypedLeft(), inScope, nil, nil, colRefs) 353 outScope = outScope.replace() 354 355 var cmp opt.Operator 356 switch c.Operator { 357 case tree.In, tree.NotIn: 358 // <var> = x 359 cmp = opt.EqOp 360 361 case tree.Any, tree.Some, tree.All: 362 // <var> <comp> x 363 cmp = opt.ComparisonOpMap[c.SubOperator] 364 if c.Operator == tree.All { 365 // NOT(<var> <comp> x) 366 cmp = opt.NegateOpMap[cmp] 367 } 368 369 default: 370 panic(errors.AssertionFailedf( 371 "buildMultiRowSubquery called with operator %v", c.Operator, 372 )) 373 } 374 375 // Construct the outer Any(...) operator. 376 out = b.factory.ConstructAny(input, scalar, &memo.SubqueryPrivate{ 377 Cmp: cmp, 378 OriginalExpr: s.Subquery, 379 }) 380 switch c.Operator { 381 case tree.NotIn, tree.All: 382 // NOT Any(...) 383 out = b.factory.ConstructNot(out) 384 } 385 386 return out, outScope 387 } 388 389 var _ tree.Expr = &subquery{} 390 var _ tree.TypedExpr = &subquery{} 391 392 // SubqueryExpr implements the SubqueryExpr interface. 393 func (*subquery) SubqueryExpr() {} 394 395 var _ tree.SubqueryExpr = &subquery{}