github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/exec/execbuilder/relational.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package execbuilder 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "math" 18 19 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 20 "github.com/cockroachdb/cockroach/pkg/sql/opt" 21 "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" 22 "github.com/cockroachdb/cockroach/pkg/sql/opt/exec" 23 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 24 "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" 25 "github.com/cockroachdb/cockroach/pkg/sql/opt/ordering" 26 "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" 27 "github.com/cockroachdb/cockroach/pkg/sql/opt/xform" 28 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 29 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 30 "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins" 31 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 32 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 33 "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" 34 "github.com/cockroachdb/cockroach/pkg/util" 35 "github.com/cockroachdb/cockroach/pkg/util/encoding" 36 "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" 37 "github.com/cockroachdb/cockroach/pkg/util/log" 38 "github.com/cockroachdb/errors" 39 ) 40 41 type execPlan struct { 42 root exec.Node 43 44 // outputCols is a map from opt.ColumnID to exec.NodeColumnOrdinal. It maps 45 // columns in the output set of a relational expression to indices in the 46 // result columns of the exec.Node. 47 // 48 // The reason we need to keep track of this (instead of using just the 49 // relational properties) is that the relational properties don't force a 50 // single "schema": any ordering of the output columns is possible. We choose 51 // the schema that is most convenient: for scans, we use the table's column 52 // ordering. Consider: 53 // SELECT a, b FROM t WHERE a = b 54 // and the following two cases: 55 // 1. The table is defined as (k INT PRIMARY KEY, a INT, b INT). The scan will 56 // return (k, a, b). 57 // 2. The table is defined as (k INT PRIMARY KEY, b INT, a INT). The scan will 58 // return (k, b, a). 59 // In these two cases, the relational properties are effectively the same. 60 // 61 // An alternative to this would be to always use a "canonical" schema, for 62 // example the output columns in increasing index order. However, this would 63 // require a lot of otherwise unnecessary projections. 64 // 65 // Note: conceptually, this could be a ColList; however, the map is more 66 // convenient when converting VariableOps to IndexedVars. 67 outputCols opt.ColMap 68 } 69 70 // numOutputCols returns the number of columns emitted by the execPlan's Node. 71 // This will typically be equal to ep.outputCols.Len(), but might be different 72 // if the node outputs the same optimizer ColumnID multiple times. 73 // TODO(justin): we should keep track of this instead of computing it each time. 74 func (ep *execPlan) numOutputCols() int { 75 return numOutputColsInMap(ep.outputCols) 76 } 77 78 // numOutputColsInMap returns the number of slots required to fill in all of 79 // the columns referred to by this ColMap. 80 func numOutputColsInMap(m opt.ColMap) int { 81 max, ok := m.MaxValue() 82 if !ok { 83 return 0 84 } 85 return max + 1 86 } 87 88 // makeBuildScalarCtx returns a buildScalarCtx that can be used with expressions 89 // that refer the output columns of this plan. 90 func (ep *execPlan) makeBuildScalarCtx() buildScalarCtx { 91 return buildScalarCtx{ 92 ivh: tree.MakeIndexedVarHelper(nil /* container */, ep.numOutputCols()), 93 ivarMap: ep.outputCols, 94 } 95 } 96 97 // getNodeColumnOrdinal takes a column that is known to be produced by the execPlan 98 // and returns the ordinal index of that column in the result columns of the 99 // node. 100 func (ep *execPlan) getNodeColumnOrdinal(col opt.ColumnID) exec.NodeColumnOrdinal { 101 ord, ok := ep.outputCols.Get(int(col)) 102 if !ok { 103 panic(errors.AssertionFailedf("column %d not in input", log.Safe(col))) 104 } 105 return exec.NodeColumnOrdinal(ord) 106 } 107 108 func (ep *execPlan) getNodeColumnOrdinalSet(cols opt.ColSet) exec.NodeColumnOrdinalSet { 109 var res exec.NodeColumnOrdinalSet 110 cols.ForEach(func(colID opt.ColumnID) { 111 res.Add(int(ep.getNodeColumnOrdinal(colID))) 112 }) 113 return res 114 } 115 116 // reqOrdering converts the provided ordering of a relational expression to an 117 // OutputOrdering (according to the outputCols map). 118 func (ep *execPlan) reqOrdering(expr memo.RelExpr) exec.OutputOrdering { 119 return exec.OutputOrdering(ep.sqlOrdering(expr.ProvidedPhysical().Ordering)) 120 } 121 122 // sqlOrdering converts an Ordering to a ColumnOrdering (according to the 123 // outputCols map). 124 func (ep *execPlan) sqlOrdering(ordering opt.Ordering) sqlbase.ColumnOrdering { 125 if ordering.Empty() { 126 return nil 127 } 128 colOrder := make(sqlbase.ColumnOrdering, len(ordering)) 129 for i := range ordering { 130 colOrder[i].ColIdx = int(ep.getNodeColumnOrdinal(ordering[i].ID())) 131 if ordering[i].Descending() { 132 colOrder[i].Direction = encoding.Descending 133 } else { 134 colOrder[i].Direction = encoding.Ascending 135 } 136 } 137 138 return colOrder 139 } 140 141 func (b *Builder) buildRelational(e memo.RelExpr) (execPlan, error) { 142 var ep execPlan 143 var err error 144 145 if opt.IsDDLOp(e) { 146 // Mark the statement as containing DDL for use 147 // in the SQL executor. 148 b.IsDDL = true 149 150 // This will set the system DB trigger for transactions containing 151 // schema-modifying statements that have no effect, such as 152 // `BEGIN; INSERT INTO ...; CREATE TABLE IF NOT EXISTS ...; COMMIT;` 153 // where the table already exists. This will generate some false schema 154 // cache refreshes, but that's expected to be quite rare in practice. 155 if err := b.evalCtx.Txn.SetSystemConfigTrigger(); err != nil { 156 return execPlan{}, errors.WithSecondaryError( 157 unimplemented.NewWithIssuef(26508, 158 "schema change statement cannot follow a statement that has written in the same transaction"), 159 err) 160 } 161 } 162 163 // Raise error if mutation op is part of a read-only transaction. 164 if opt.IsMutationOp(e) && b.evalCtx.TxnReadOnly { 165 return execPlan{}, pgerror.Newf(pgcode.ReadOnlySQLTransaction, 166 "cannot execute %s in a read-only transaction", b.statementTag(e)) 167 } 168 169 // Collect usage telemetry for relational node, if appropriate. 170 if !b.disableTelemetry { 171 if c := opt.OpTelemetryCounters[e.Op()]; c != nil { 172 telemetry.Inc(c) 173 } 174 } 175 176 var saveTableName string 177 if b.nameGen != nil { 178 // Don't save tables for operators that don't produce any columns (most 179 // importantly, for SET which is used to disable saving of tables). 180 if !e.Relational().OutputCols.Empty() { 181 // This function must be called in a pre-order traversal of the tree. 182 saveTableName = b.nameGen.GenerateName(e.Op()) 183 } 184 } 185 186 switch t := e.(type) { 187 case *memo.ValuesExpr: 188 ep, err = b.buildValues(t) 189 190 case *memo.ScanExpr: 191 ep, err = b.buildScan(t) 192 193 case *memo.SelectExpr: 194 ep, err = b.buildSelect(t) 195 196 case *memo.ProjectExpr: 197 ep, err = b.buildProject(t) 198 199 case *memo.GroupByExpr, *memo.ScalarGroupByExpr: 200 ep, err = b.buildGroupBy(e) 201 202 case *memo.DistinctOnExpr, *memo.EnsureDistinctOnExpr, *memo.UpsertDistinctOnExpr, 203 *memo.EnsureUpsertDistinctOnExpr: 204 ep, err = b.buildDistinct(t) 205 206 case *memo.LimitExpr, *memo.OffsetExpr: 207 ep, err = b.buildLimitOffset(e) 208 209 case *memo.SortExpr: 210 ep, err = b.buildSort(t) 211 212 case *memo.IndexJoinExpr: 213 ep, err = b.buildIndexJoin(t) 214 215 case *memo.LookupJoinExpr: 216 ep, err = b.buildLookupJoin(t) 217 218 case *memo.GeoLookupJoinExpr: 219 ep, err = b.buildGeoLookupJoin(t) 220 221 case *memo.ZigzagJoinExpr: 222 ep, err = b.buildZigzagJoin(t) 223 224 case *memo.OrdinalityExpr: 225 ep, err = b.buildOrdinality(t) 226 227 case *memo.MergeJoinExpr: 228 ep, err = b.buildMergeJoin(t) 229 230 case *memo.Max1RowExpr: 231 ep, err = b.buildMax1Row(t) 232 233 case *memo.ProjectSetExpr: 234 ep, err = b.buildProjectSet(t) 235 236 case *memo.WindowExpr: 237 ep, err = b.buildWindow(t) 238 239 case *memo.SequenceSelectExpr: 240 ep, err = b.buildSequenceSelect(t) 241 242 case *memo.InsertExpr: 243 ep, err = b.buildInsert(t) 244 245 case *memo.UpdateExpr: 246 ep, err = b.buildUpdate(t) 247 248 case *memo.UpsertExpr: 249 ep, err = b.buildUpsert(t) 250 251 case *memo.DeleteExpr: 252 ep, err = b.buildDelete(t) 253 254 case *memo.CreateTableExpr: 255 ep, err = b.buildCreateTable(t) 256 257 case *memo.CreateViewExpr: 258 ep, err = b.buildCreateView(t) 259 260 case *memo.WithExpr: 261 ep, err = b.buildWith(t) 262 263 case *memo.WithScanExpr: 264 ep, err = b.buildWithScan(t) 265 266 case *memo.RecursiveCTEExpr: 267 ep, err = b.buildRecursiveCTE(t) 268 269 case *memo.ExplainExpr: 270 ep, err = b.buildExplain(t) 271 272 case *memo.ShowTraceForSessionExpr: 273 ep, err = b.buildShowTrace(t) 274 275 case *memo.OpaqueRelExpr, *memo.OpaqueMutationExpr, *memo.OpaqueDDLExpr: 276 ep, err = b.buildOpaque(t.Private().(*memo.OpaqueRelPrivate)) 277 278 case *memo.AlterTableSplitExpr: 279 ep, err = b.buildAlterTableSplit(t) 280 281 case *memo.AlterTableUnsplitExpr: 282 ep, err = b.buildAlterTableUnsplit(t) 283 284 case *memo.AlterTableUnsplitAllExpr: 285 ep, err = b.buildAlterTableUnsplitAll(t) 286 287 case *memo.AlterTableRelocateExpr: 288 ep, err = b.buildAlterTableRelocate(t) 289 290 case *memo.ControlJobsExpr: 291 ep, err = b.buildControlJobs(t) 292 293 case *memo.CancelQueriesExpr: 294 ep, err = b.buildCancelQueries(t) 295 296 case *memo.CancelSessionsExpr: 297 ep, err = b.buildCancelSessions(t) 298 299 case *memo.ExportExpr: 300 ep, err = b.buildExport(t) 301 302 default: 303 switch { 304 case opt.IsSetOp(e): 305 ep, err = b.buildSetOp(e) 306 307 case opt.IsJoinNonApplyOp(e): 308 ep, err = b.buildHashJoin(e) 309 310 case opt.IsJoinApplyOp(e): 311 ep, err = b.buildApplyJoin(e) 312 313 default: 314 err = errors.AssertionFailedf("no execbuild for %T", t) 315 } 316 } 317 if err != nil { 318 return execPlan{}, err 319 } 320 321 // In race builds, assert that the exec plan output columns match the opt 322 // plan output columns. 323 if util.RaceEnabled { 324 optCols := e.Relational().OutputCols 325 var execCols opt.ColSet 326 ep.outputCols.ForEach(func(key, val int) { 327 execCols.Add(opt.ColumnID(key)) 328 }) 329 if !execCols.Equals(optCols) { 330 return execPlan{}, errors.AssertionFailedf( 331 "exec columns do not match opt columns: expected %v, got %v", optCols, execCols) 332 } 333 } 334 335 if saveTableName != "" { 336 ep, err = b.applySaveTable(ep, e, saveTableName) 337 if err != nil { 338 return execPlan{}, err 339 } 340 } 341 342 // Wrap the expression in a render expression if presentation requires it. 343 if p := e.RequiredPhysical(); !p.Presentation.Any() { 344 ep, err = b.applyPresentation(ep, p) 345 } 346 return ep, err 347 } 348 349 func (b *Builder) buildValues(values *memo.ValuesExpr) (execPlan, error) { 350 rows, err := b.buildValuesRows(values) 351 if err != nil { 352 return execPlan{}, err 353 } 354 return b.constructValues(rows, values.Cols) 355 } 356 357 func (b *Builder) buildValuesRows(values *memo.ValuesExpr) ([][]tree.TypedExpr, error) { 358 numCols := len(values.Cols) 359 360 rows := make([][]tree.TypedExpr, len(values.Rows)) 361 rowBuf := make([]tree.TypedExpr, len(rows)*numCols) 362 scalarCtx := buildScalarCtx{} 363 for i := range rows { 364 tup := values.Rows[i].(*memo.TupleExpr) 365 if len(tup.Elems) != numCols { 366 return nil, fmt.Errorf("inconsistent row length %d vs %d", len(tup.Elems), numCols) 367 } 368 // Chop off prefix of rowBuf and limit its capacity. 369 rows[i] = rowBuf[:numCols:numCols] 370 rowBuf = rowBuf[numCols:] 371 var err error 372 for j := 0; j < numCols; j++ { 373 rows[i][j], err = b.buildScalar(&scalarCtx, tup.Elems[j]) 374 if err != nil { 375 return nil, err 376 } 377 } 378 } 379 return rows, nil 380 } 381 382 func (b *Builder) constructValues(rows [][]tree.TypedExpr, cols opt.ColList) (execPlan, error) { 383 md := b.mem.Metadata() 384 resultCols := make(sqlbase.ResultColumns, len(cols)) 385 for i, col := range cols { 386 colMeta := md.ColumnMeta(col) 387 resultCols[i].Name = colMeta.Alias 388 resultCols[i].Typ = colMeta.Type 389 } 390 node, err := b.factory.ConstructValues(rows, resultCols) 391 if err != nil { 392 return execPlan{}, err 393 } 394 ep := execPlan{root: node} 395 for i, col := range cols { 396 ep.outputCols.Set(int(col), i) 397 } 398 399 return ep, nil 400 } 401 402 // getColumns returns the set of column ordinals in the table for the set of 403 // column IDs, along with a mapping from the column IDs to output ordinals 404 // (starting with outputOrdinalStart). 405 func (b *Builder) getColumns( 406 cols opt.ColSet, tableID opt.TableID, 407 ) (exec.TableColumnOrdinalSet, opt.ColMap) { 408 var needed exec.TableColumnOrdinalSet 409 var output opt.ColMap 410 411 columnCount := b.mem.Metadata().Table(tableID).DeletableColumnCount() 412 n := 0 413 for i := 0; i < columnCount; i++ { 414 colID := tableID.ColumnID(i) 415 if cols.Contains(colID) { 416 needed.Add(i) 417 output.Set(int(colID), n) 418 n++ 419 } 420 } 421 422 return needed, output 423 } 424 425 // indexConstraintMaxResults returns the maximum number of results for a scan; 426 // the scan is guaranteed never to return more results than this. Iff this hint 427 // is invalid, 0 is returned. 428 func (b *Builder) indexConstraintMaxResults(scan *memo.ScanExpr) uint64 { 429 c := scan.Constraint 430 if c == nil || c.IsContradiction() || c.IsUnconstrained() { 431 return 0 432 } 433 434 numCols := c.Columns.Count() 435 var indexCols opt.ColSet 436 for i := 0; i < numCols; i++ { 437 indexCols.Add(c.Columns.Get(i).ID()) 438 } 439 rel := scan.Relational() 440 if !rel.FuncDeps.ColsAreLaxKey(indexCols) { 441 return 0 442 } 443 444 return c.CalculateMaxResults(b.evalCtx, indexCols, rel.NotNullCols) 445 } 446 447 func (b *Builder) buildScan(scan *memo.ScanExpr) (execPlan, error) { 448 md := b.mem.Metadata() 449 tab := md.Table(scan.Table) 450 451 // Check if we tried to force a specific index but there was no Scan with that 452 // index in the memo. 453 if scan.Flags.ForceIndex && scan.Flags.Index != scan.Index { 454 idx := tab.Index(scan.Flags.Index) 455 var err error 456 if idx.IsInverted() { 457 err = fmt.Errorf("index \"%s\" is inverted and cannot be used for this query", idx.Name()) 458 } else { 459 // This should never happen. 460 err = fmt.Errorf("index \"%s\" cannot be used for this query", idx.Name()) 461 } 462 return execPlan{}, err 463 } 464 465 needed, output := b.getColumns(scan.Cols, scan.Table) 466 res := execPlan{outputCols: output} 467 468 // Get the estimated row count from the statistics. 469 // Note: if this memo was originally created as part of a PREPARE 470 // statement or was stored in the query cache, the column stats would have 471 // been removed by DetachMemo. Update that function if the column stats are 472 // needed here in the future. 473 rowCount := scan.Relational().Stats.RowCount 474 if !scan.Relational().Stats.Available { 475 // When there are no statistics available, we construct a scan node with 476 // the estimated row count of zero rows. 477 rowCount = 0 478 } 479 480 if scan.PartitionConstrainedScan { 481 sqltelemetry.IncrementPartitioningCounter(sqltelemetry.PartitionConstrainedScan) 482 } 483 484 softLimit := int64(math.Ceil(scan.RequiredPhysical().LimitHint)) 485 hardLimit := scan.HardLimit.RowCount() 486 487 locking := scan.Locking 488 if b.forceForUpdateLocking { 489 locking = forUpdateLocking 490 } 491 492 root, err := b.factory.ConstructScan( 493 tab, 494 tab.Index(scan.Index), 495 needed, 496 scan.Constraint, 497 hardLimit, 498 softLimit, 499 // HardLimit.Reverse() is taken into account by ScanIsReverse. 500 ordering.ScanIsReverse(scan, &scan.RequiredPhysical().Ordering), 501 b.indexConstraintMaxResults(scan), 502 res.reqOrdering(scan), 503 rowCount, 504 locking, 505 ) 506 if err != nil { 507 return execPlan{}, err 508 } 509 res.root = root 510 return res, nil 511 } 512 513 func (b *Builder) buildSelect(sel *memo.SelectExpr) (execPlan, error) { 514 input, err := b.buildRelational(sel.Input) 515 if err != nil { 516 return execPlan{}, err 517 } 518 ctx := input.makeBuildScalarCtx() 519 filter, err := b.buildScalar(&ctx, &sel.Filters) 520 if err != nil { 521 return execPlan{}, err 522 } 523 // A filtering node does not modify the schema. 524 res := execPlan{outputCols: input.outputCols} 525 reqOrder := res.reqOrdering(sel) 526 res.root, err = b.factory.ConstructFilter(input.root, filter, reqOrder) 527 if err != nil { 528 return execPlan{}, err 529 } 530 return res, nil 531 } 532 533 // applySimpleProject adds a simple projection on top of an existing plan. 534 func (b *Builder) applySimpleProject( 535 input execPlan, cols opt.ColSet, providedOrd opt.Ordering, 536 ) (execPlan, error) { 537 // We have only pass-through columns. 538 colList := make([]exec.NodeColumnOrdinal, 0, cols.Len()) 539 var res execPlan 540 cols.ForEach(func(i opt.ColumnID) { 541 res.outputCols.Set(int(i), len(colList)) 542 colList = append(colList, input.getNodeColumnOrdinal(i)) 543 }) 544 var err error 545 res.root, err = b.factory.ConstructSimpleProject( 546 input.root, colList, nil /* colNames */, exec.OutputOrdering(res.sqlOrdering(providedOrd)), 547 ) 548 if err != nil { 549 return execPlan{}, err 550 } 551 return res, nil 552 } 553 554 func (b *Builder) buildProject(prj *memo.ProjectExpr) (execPlan, error) { 555 md := b.mem.Metadata() 556 input, err := b.buildRelational(prj.Input) 557 if err != nil { 558 return execPlan{}, err 559 } 560 561 projections := prj.Projections 562 if len(projections) == 0 { 563 // We have only pass-through columns. 564 return b.applySimpleProject(input, prj.Passthrough, prj.ProvidedPhysical().Ordering) 565 } 566 567 var res execPlan 568 exprs := make(tree.TypedExprs, 0, len(projections)+prj.Passthrough.Len()) 569 cols := make(sqlbase.ResultColumns, 0, len(exprs)) 570 ctx := input.makeBuildScalarCtx() 571 for i := range projections { 572 item := &projections[i] 573 expr, err := b.buildScalar(&ctx, item.Element) 574 if err != nil { 575 return execPlan{}, err 576 } 577 res.outputCols.Set(int(item.Col), i) 578 exprs = append(exprs, expr) 579 cols = append(cols, sqlbase.ResultColumn{ 580 Name: md.ColumnMeta(item.Col).Alias, 581 Typ: item.Typ, 582 }) 583 } 584 prj.Passthrough.ForEach(func(colID opt.ColumnID) { 585 res.outputCols.Set(int(colID), len(exprs)) 586 indexedVar := b.indexedVar(&ctx, md, colID) 587 exprs = append(exprs, indexedVar) 588 meta := md.ColumnMeta(colID) 589 cols = append(cols, sqlbase.ResultColumn{ 590 Name: meta.Alias, 591 Typ: meta.Type, 592 }) 593 }) 594 reqOrdering := res.reqOrdering(prj) 595 res.root, err = b.factory.ConstructRender(input.root, cols, exprs, reqOrdering) 596 if err != nil { 597 return execPlan{}, err 598 } 599 return res, nil 600 } 601 602 func (b *Builder) buildApplyJoin(join memo.RelExpr) (execPlan, error) { 603 switch join.Op() { 604 case opt.InnerJoinApplyOp, opt.LeftJoinApplyOp, opt.SemiJoinApplyOp, opt.AntiJoinApplyOp: 605 default: 606 return execPlan{}, fmt.Errorf("couldn't execute correlated subquery with op %s", join.Op()) 607 } 608 joinType := joinOpToJoinType(join.Op()) 609 leftExpr := join.Child(0).(memo.RelExpr) 610 leftProps := leftExpr.Relational() 611 rightExpr := join.Child(1).(memo.RelExpr) 612 rightProps := rightExpr.Relational() 613 filters := join.Child(2).(*memo.FiltersExpr) 614 615 leftPlan, err := b.buildRelational(leftExpr) 616 if err != nil { 617 return execPlan{}, err 618 } 619 620 // Make a copy of the required props for the right side. 621 rightRequiredProps := *rightExpr.RequiredPhysical() 622 // The right-hand side will produce the output columns in order. 623 rightRequiredProps.Presentation = b.makePresentation(rightProps.OutputCols) 624 625 // leftBoundCols is the set of columns that this apply join binds. 626 leftBoundCols := leftProps.OutputCols.Intersection(rightProps.OuterCols) 627 // leftBoundColMap is a map from opt.ColumnID to opt.ColumnOrdinal that maps 628 // a column bound by the left side of this apply join to the column ordinal 629 // in the left side that contains the binding. 630 var leftBoundColMap opt.ColMap 631 for col, ok := leftBoundCols.Next(0); ok; col, ok = leftBoundCols.Next(col + 1) { 632 v, ok := leftPlan.outputCols.Get(int(col)) 633 if !ok { 634 return execPlan{}, fmt.Errorf("couldn't find binding column %d in left output columns", col) 635 } 636 leftBoundColMap.Set(int(col), v) 637 } 638 639 // Now, the cool part! We set up an ApplyJoinPlanRightSideFn which plans the 640 // right side given a particular left side row. We do this planning in a 641 // separate memo, but we use the same exec.Factory. 642 // 643 // Note: we put o outside of the function so we allocate it only once. 644 var o xform.Optimizer 645 planRightSideFn := func(leftRow tree.Datums) (exec.Plan, error) { 646 o.Init(b.evalCtx, b.catalog) 647 f := o.Factory() 648 649 // Copy the right expression into a new memo, replacing each bound column 650 // with the corresponding value from the left row. 651 var replaceFn norm.ReplaceFunc 652 replaceFn = func(e opt.Expr) opt.Expr { 653 switch t := e.(type) { 654 case *memo.VariableExpr: 655 if leftOrd, ok := leftBoundColMap.Get(int(t.Col)); ok { 656 return f.ConstructConstVal(leftRow[leftOrd], t.Typ) 657 } 658 } 659 return f.CopyAndReplaceDefault(e, replaceFn) 660 } 661 f.CopyAndReplace(rightExpr, &rightRequiredProps, replaceFn) 662 663 newRightSide, err := o.Optimize() 664 if err != nil { 665 return nil, err 666 } 667 668 eb := New(b.factory, f.Memo(), b.catalog, newRightSide, b.evalCtx) 669 eb.disableTelemetry = true 670 plan, err := eb.Build() 671 if err != nil { 672 if errors.IsAssertionFailure(err) { 673 // Enhance the error with the EXPLAIN (OPT, VERBOSE) of the inner 674 // expression. 675 fmtFlags := memo.ExprFmtHideQualifications | memo.ExprFmtHideScalars | memo.ExprFmtHideTypes 676 explainOpt := o.FormatExpr(newRightSide, fmtFlags) 677 err = errors.WithDetailf(err, "newRightSide:\n%s", explainOpt) 678 } 679 return nil, err 680 } 681 return plan, nil 682 } 683 684 // The right plan will always produce the columns in the presentation, in 685 // the same order. 686 var rightOutputCols opt.ColMap 687 for i := range rightRequiredProps.Presentation { 688 rightOutputCols.Set(int(rightRequiredProps.Presentation[i].ID), i) 689 } 690 allCols := joinOutputMap(leftPlan.outputCols, rightOutputCols) 691 692 var onExpr tree.TypedExpr 693 if len(*filters) != 0 { 694 scalarCtx := buildScalarCtx{ 695 ivh: tree.MakeIndexedVarHelper(nil /* container */, numOutputColsInMap(allCols)), 696 ivarMap: allCols, 697 } 698 onExpr, err = b.buildScalar(&scalarCtx, filters) 699 if err != nil { 700 return execPlan{}, err 701 } 702 } 703 704 var outputCols opt.ColMap 705 if joinType == sqlbase.LeftSemiJoin || joinType == sqlbase.LeftAntiJoin { 706 // For semi and anti join, only the left columns are output. 707 outputCols = leftPlan.outputCols 708 } else { 709 outputCols = allCols 710 } 711 712 ep := execPlan{outputCols: outputCols} 713 714 ep.root, err = b.factory.ConstructApplyJoin( 715 joinType, 716 leftPlan.root, 717 b.presentationToResultColumns(rightRequiredProps.Presentation), 718 onExpr, 719 planRightSideFn, 720 ) 721 if err != nil { 722 return execPlan{}, err 723 } 724 return ep, nil 725 } 726 727 // makePresentation creates a Presentation that contains the given columns, in 728 // order of their IDs. 729 func (b *Builder) makePresentation(cols opt.ColSet) physical.Presentation { 730 md := b.mem.Metadata() 731 result := make(physical.Presentation, 0, cols.Len()) 732 cols.ForEach(func(col opt.ColumnID) { 733 result = append(result, opt.AliasedColumn{ 734 Alias: md.ColumnMeta(col).Alias, 735 ID: col, 736 }) 737 }) 738 return result 739 } 740 741 // presentationToResultColumns returns ResultColumns corresponding to the 742 // columns in a presentation. 743 func (b *Builder) presentationToResultColumns(pres physical.Presentation) sqlbase.ResultColumns { 744 md := b.mem.Metadata() 745 result := make(sqlbase.ResultColumns, len(pres)) 746 for i := range pres { 747 result[i] = sqlbase.ResultColumn{ 748 Name: pres[i].Alias, 749 Typ: md.ColumnMeta(pres[i].ID).Type, 750 } 751 } 752 return result 753 } 754 755 func (b *Builder) buildHashJoin(join memo.RelExpr) (execPlan, error) { 756 if f := join.Private().(*memo.JoinPrivate).Flags; !f.Has(memo.AllowHashJoinStoreRight) { 757 // We need to do a bit of reverse engineering here to determine what the 758 // hint was. 759 hint := tree.AstLookup 760 if f.Has(memo.AllowMergeJoin) { 761 hint = tree.AstMerge 762 } 763 764 return execPlan{}, errors.Errorf( 765 "could not produce a query plan conforming to the %s JOIN hint", hint, 766 ) 767 } 768 769 joinType := joinOpToJoinType(join.Op()) 770 leftExpr := join.Child(0).(memo.RelExpr) 771 rightExpr := join.Child(1).(memo.RelExpr) 772 filters := join.Child(2).(*memo.FiltersExpr) 773 774 leftEq, rightEq := memo.ExtractJoinEqualityColumns( 775 leftExpr.Relational().OutputCols, 776 rightExpr.Relational().OutputCols, 777 *filters, 778 ) 779 if !b.disableTelemetry { 780 if len(leftEq) > 0 { 781 telemetry.Inc(sqltelemetry.JoinAlgoHashUseCounter) 782 } else { 783 telemetry.Inc(sqltelemetry.JoinAlgoCrossUseCounter) 784 } 785 telemetry.Inc(opt.JoinTypeToUseCounter(join.Op())) 786 } 787 788 left, right, onExpr, outputCols, err := b.initJoinBuild( 789 leftExpr, 790 rightExpr, 791 memo.ExtractRemainingJoinFilters(*filters, leftEq, rightEq), 792 joinType, 793 ) 794 if err != nil { 795 return execPlan{}, err 796 } 797 ep := execPlan{outputCols: outputCols} 798 799 // Convert leftEq/rightEq to ordinals. 800 eqColsBuf := make([]exec.NodeColumnOrdinal, 2*len(leftEq)) 801 leftEqOrdinals := eqColsBuf[:len(leftEq):len(leftEq)] 802 rightEqOrdinals := eqColsBuf[len(leftEq):] 803 for i := range leftEq { 804 leftEqOrdinals[i] = left.getNodeColumnOrdinal(leftEq[i]) 805 rightEqOrdinals[i] = right.getNodeColumnOrdinal(rightEq[i]) 806 } 807 808 leftEqColsAreKey := leftExpr.Relational().FuncDeps.ColsAreStrictKey(leftEq.ToSet()) 809 rightEqColsAreKey := rightExpr.Relational().FuncDeps.ColsAreStrictKey(rightEq.ToSet()) 810 811 ep.root, err = b.factory.ConstructHashJoin( 812 joinType, 813 left.root, right.root, 814 leftEqOrdinals, rightEqOrdinals, 815 leftEqColsAreKey, rightEqColsAreKey, 816 onExpr, 817 ) 818 if err != nil { 819 return execPlan{}, err 820 } 821 return ep, nil 822 } 823 824 func (b *Builder) buildMergeJoin(join *memo.MergeJoinExpr) (execPlan, error) { 825 if !b.disableTelemetry { 826 telemetry.Inc(sqltelemetry.JoinAlgoMergeUseCounter) 827 telemetry.Inc(opt.JoinTypeToUseCounter(join.JoinType)) 828 } 829 830 joinType := joinOpToJoinType(join.JoinType) 831 832 left, right, onExpr, outputCols, err := b.initJoinBuild( 833 join.Left, join.Right, join.On, joinType, 834 ) 835 if err != nil { 836 return execPlan{}, err 837 } 838 leftOrd := left.sqlOrdering(join.LeftEq) 839 rightOrd := right.sqlOrdering(join.RightEq) 840 ep := execPlan{outputCols: outputCols} 841 reqOrd := ep.reqOrdering(join) 842 leftEqColsAreKey := join.Left.Relational().FuncDeps.ColsAreStrictKey(join.LeftEq.ColSet()) 843 rightEqColsAreKey := join.Right.Relational().FuncDeps.ColsAreStrictKey(join.RightEq.ColSet()) 844 ep.root, err = b.factory.ConstructMergeJoin( 845 joinType, 846 left.root, right.root, 847 onExpr, 848 leftOrd, rightOrd, reqOrd, 849 leftEqColsAreKey, rightEqColsAreKey, 850 ) 851 if err != nil { 852 return execPlan{}, err 853 } 854 return ep, nil 855 } 856 857 // initJoinBuild builds the inputs to the join as well as the ON expression. 858 func (b *Builder) initJoinBuild( 859 leftChild memo.RelExpr, 860 rightChild memo.RelExpr, 861 filters memo.FiltersExpr, 862 joinType sqlbase.JoinType, 863 ) (leftPlan, rightPlan execPlan, onExpr tree.TypedExpr, outputCols opt.ColMap, _ error) { 864 leftPlan, err := b.buildRelational(leftChild) 865 if err != nil { 866 return execPlan{}, execPlan{}, nil, opt.ColMap{}, err 867 } 868 rightPlan, err = b.buildRelational(rightChild) 869 if err != nil { 870 return execPlan{}, execPlan{}, nil, opt.ColMap{}, err 871 } 872 873 allCols := joinOutputMap(leftPlan.outputCols, rightPlan.outputCols) 874 875 ctx := buildScalarCtx{ 876 ivh: tree.MakeIndexedVarHelper(nil /* container */, numOutputColsInMap(allCols)), 877 ivarMap: allCols, 878 } 879 880 if len(filters) != 0 { 881 onExpr, err = b.buildScalar(&ctx, &filters) 882 if err != nil { 883 return execPlan{}, execPlan{}, nil, opt.ColMap{}, err 884 } 885 } 886 887 if joinType == sqlbase.LeftSemiJoin || joinType == sqlbase.LeftAntiJoin { 888 // For semi and anti join, only the left columns are output. 889 return leftPlan, rightPlan, onExpr, leftPlan.outputCols, nil 890 } 891 return leftPlan, rightPlan, onExpr, allCols, nil 892 } 893 894 // joinOutputMap determines the outputCols map for a (non-semi/anti) join, given 895 // the outputCols maps for its inputs. 896 func joinOutputMap(left, right opt.ColMap) opt.ColMap { 897 numLeftCols := numOutputColsInMap(left) 898 899 res := left.Copy() 900 right.ForEach(func(colIdx, rightIdx int) { 901 res.Set(colIdx, rightIdx+numLeftCols) 902 }) 903 return res 904 } 905 906 func joinOpToJoinType(op opt.Operator) sqlbase.JoinType { 907 switch op { 908 case opt.InnerJoinOp, opt.InnerJoinApplyOp: 909 return sqlbase.InnerJoin 910 911 case opt.LeftJoinOp, opt.LeftJoinApplyOp: 912 return sqlbase.LeftOuterJoin 913 914 case opt.RightJoinOp: 915 return sqlbase.RightOuterJoin 916 917 case opt.FullJoinOp: 918 return sqlbase.FullOuterJoin 919 920 case opt.SemiJoinOp, opt.SemiJoinApplyOp: 921 return sqlbase.LeftSemiJoin 922 923 case opt.AntiJoinOp, opt.AntiJoinApplyOp: 924 return sqlbase.LeftAntiJoin 925 926 default: 927 panic(errors.AssertionFailedf("not a join op %s", log.Safe(op))) 928 } 929 } 930 931 func (b *Builder) buildGroupBy(groupBy memo.RelExpr) (execPlan, error) { 932 input, err := b.buildGroupByInput(groupBy) 933 if err != nil { 934 return execPlan{}, err 935 } 936 937 var ep execPlan 938 groupingCols := groupBy.Private().(*memo.GroupingPrivate).GroupingCols 939 groupingColIdx := make([]exec.NodeColumnOrdinal, 0, groupingCols.Len()) 940 for i, ok := groupingCols.Next(0); ok; i, ok = groupingCols.Next(i + 1) { 941 ep.outputCols.Set(int(i), len(groupingColIdx)) 942 groupingColIdx = append(groupingColIdx, input.getNodeColumnOrdinal(i)) 943 } 944 945 aggregations := *groupBy.Child(1).(*memo.AggregationsExpr) 946 aggInfos := make([]exec.AggInfo, len(aggregations)) 947 for i := range aggregations { 948 item := &aggregations[i] 949 agg := item.Agg 950 951 var filterOrd exec.NodeColumnOrdinal = -1 952 if aggFilter, ok := agg.(*memo.AggFilterExpr); ok { 953 filter, ok := aggFilter.Filter.(*memo.VariableExpr) 954 if !ok { 955 return execPlan{}, errors.AssertionFailedf("only VariableOp args supported") 956 } 957 filterOrd = input.getNodeColumnOrdinal(filter.Col) 958 agg = aggFilter.Input 959 } 960 961 distinct := false 962 if aggDistinct, ok := agg.(*memo.AggDistinctExpr); ok { 963 distinct = true 964 agg = aggDistinct.Input 965 } 966 967 name, overload := memo.FindAggregateOverload(agg) 968 969 // Accumulate variable arguments in argCols and constant arguments in 970 // constArgs. Constant arguments must follow variable arguments. 971 var argCols []exec.NodeColumnOrdinal 972 var constArgs tree.Datums 973 for j, n := 0, agg.ChildCount(); j < n; j++ { 974 child := agg.Child(j) 975 if variable, ok := child.(*memo.VariableExpr); ok { 976 if len(constArgs) != 0 { 977 return execPlan{}, errors.Errorf("constant args must come after variable args") 978 } 979 argCols = append(argCols, input.getNodeColumnOrdinal(variable.Col)) 980 } else { 981 if len(argCols) == 0 { 982 return execPlan{}, errors.Errorf("a constant arg requires at least one variable arg") 983 } 984 constArgs = append(constArgs, memo.ExtractConstDatum(child)) 985 } 986 } 987 988 aggInfos[i] = exec.AggInfo{ 989 FuncName: name, 990 Builtin: overload, 991 Distinct: distinct, 992 ResultType: item.Agg.DataType(), 993 ArgCols: argCols, 994 ConstArgs: constArgs, 995 Filter: filterOrd, 996 } 997 ep.outputCols.Set(int(item.Col), len(groupingColIdx)+i) 998 } 999 1000 if groupBy.Op() == opt.ScalarGroupByOp { 1001 ep.root, err = b.factory.ConstructScalarGroupBy(input.root, aggInfos) 1002 } else { 1003 groupBy := groupBy.(*memo.GroupByExpr) 1004 groupingColOrder := input.sqlOrdering(ordering.StreamingGroupingColOrdering( 1005 &groupBy.GroupingPrivate, &groupBy.RequiredPhysical().Ordering, 1006 )) 1007 reqOrdering := ep.reqOrdering(groupBy) 1008 ep.root, err = b.factory.ConstructGroupBy( 1009 input.root, groupingColIdx, groupingColOrder, aggInfos, reqOrdering, 1010 ) 1011 } 1012 if err != nil { 1013 return execPlan{}, err 1014 } 1015 return ep, nil 1016 } 1017 1018 func (b *Builder) buildDistinct(distinct memo.RelExpr) (execPlan, error) { 1019 private := distinct.Private().(*memo.GroupingPrivate) 1020 1021 if private.GroupingCols.Empty() { 1022 // A DistinctOn with no grouping columns should have been converted to a 1023 // LIMIT 1 or Max1Row by normalization rules. 1024 return execPlan{}, fmt.Errorf("cannot execute distinct on no columns") 1025 } 1026 input, err := b.buildGroupByInput(distinct) 1027 if err != nil { 1028 return execPlan{}, err 1029 } 1030 1031 distinctCols := input.getNodeColumnOrdinalSet(private.GroupingCols) 1032 var orderedCols exec.NodeColumnOrdinalSet 1033 ordering := ordering.StreamingGroupingColOrdering( 1034 private, &distinct.RequiredPhysical().Ordering, 1035 ) 1036 for i := range ordering { 1037 orderedCols.Add(int(input.getNodeColumnOrdinal(ordering[i].ID()))) 1038 } 1039 ep := execPlan{outputCols: input.outputCols} 1040 1041 reqOrdering := ep.reqOrdering(distinct) 1042 ep.root, err = b.factory.ConstructDistinct( 1043 input.root, distinctCols, orderedCols, reqOrdering, 1044 private.NullsAreDistinct, private.ErrorOnDup) 1045 if err != nil { 1046 return execPlan{}, err 1047 } 1048 1049 // buildGroupByInput can add extra sort column(s), so discard those if they 1050 // are present by using an additional projection. 1051 outCols := distinct.Relational().OutputCols 1052 if input.outputCols.Len() == outCols.Len() { 1053 return ep, nil 1054 } 1055 return b.ensureColumns( 1056 ep, opt.ColSetToList(outCols), nil /* colNames */, distinct.ProvidedPhysical().Ordering, 1057 ) 1058 } 1059 1060 func (b *Builder) buildGroupByInput(groupBy memo.RelExpr) (execPlan, error) { 1061 groupByInput := groupBy.Child(0).(memo.RelExpr) 1062 input, err := b.buildRelational(groupByInput) 1063 if err != nil { 1064 return execPlan{}, err 1065 } 1066 1067 // TODO(radu): this is a one-off fix for an otherwise bigger gap: we should 1068 // have a more general mechanism (through physical properties or otherwise) to 1069 // figure out unneeded columns and project them away as necessary. The 1070 // optimizer doesn't guarantee that it adds ProjectOps everywhere. 1071 // 1072 // We address just the GroupBy case for now because there is a particularly 1073 // important case with COUNT(*) where we can remove all input columns, which 1074 // leads to significant speedup. 1075 private := groupBy.Private().(*memo.GroupingPrivate) 1076 neededCols := private.GroupingCols.Copy() 1077 aggs := *groupBy.Child(1).(*memo.AggregationsExpr) 1078 for i := range aggs { 1079 neededCols.UnionWith(memo.ExtractAggInputColumns(aggs[i].Agg)) 1080 } 1081 1082 // In rare cases, we might need a column only for its ordering, for example: 1083 // SELECT concat_agg(s) FROM (SELECT s FROM kv ORDER BY k) 1084 // In this case we can't project the column away as it is still needed by 1085 // distsql to maintain the desired ordering. 1086 for _, c := range groupByInput.ProvidedPhysical().Ordering { 1087 neededCols.Add(c.ID()) 1088 } 1089 1090 if neededCols.Equals(groupByInput.Relational().OutputCols) { 1091 // All columns produced by the input are used. 1092 return input, nil 1093 } 1094 1095 // The input is producing columns that are not useful; set up a projection. 1096 cols := make([]exec.NodeColumnOrdinal, 0, neededCols.Len()) 1097 var newOutputCols opt.ColMap 1098 for colID, ok := neededCols.Next(0); ok; colID, ok = neededCols.Next(colID + 1) { 1099 ordinal, ordOk := input.outputCols.Get(int(colID)) 1100 if !ordOk { 1101 panic(errors.AssertionFailedf("needed column not produced by group-by input")) 1102 } 1103 newOutputCols.Set(int(colID), len(cols)) 1104 cols = append(cols, exec.NodeColumnOrdinal(ordinal)) 1105 } 1106 1107 input.outputCols = newOutputCols 1108 reqOrdering := input.reqOrdering(groupByInput) 1109 input.root, err = b.factory.ConstructSimpleProject( 1110 input.root, cols, nil /* colNames */, reqOrdering, 1111 ) 1112 if err != nil { 1113 return execPlan{}, err 1114 } 1115 return input, nil 1116 } 1117 1118 func (b *Builder) buildSetOp(set memo.RelExpr) (execPlan, error) { 1119 leftExpr := set.Child(0).(memo.RelExpr) 1120 left, err := b.buildRelational(leftExpr) 1121 if err != nil { 1122 return execPlan{}, err 1123 } 1124 rightExpr := set.Child(1).(memo.RelExpr) 1125 right, err := b.buildRelational(rightExpr) 1126 if err != nil { 1127 return execPlan{}, err 1128 } 1129 1130 private := set.Private().(*memo.SetPrivate) 1131 1132 // We need to make sure that the two sides render the columns in the same 1133 // order; otherwise we add projections. 1134 // 1135 // In most cases the projection is needed only to reorder the columns, but not 1136 // always. For example: 1137 // (SELECT a, a, b FROM ab) UNION (SELECT x, y, z FROM xyz) 1138 // The left input could be just a scan that produces two columns. 1139 // 1140 // TODO(radu): we don't have to respect the exact order in the two ColLists; 1141 // if one side has the right columns but in a different permutation, we could 1142 // set up a matching projection on the other side. For example: 1143 // (SELECT b, c, a FROM abc) UNION (SELECT z, y, x FROM xyz) 1144 // The expression for this could be a UnionOp on top of two ScanOps (any 1145 // internal projections could be removed by normalization rules). 1146 // The scans produce columns `a, b, c` and `x, y, z` respectively. We could 1147 // leave `b, c, a` as is and project the other side to `x, z, y`. 1148 // Note that (unless this is part of a larger query) the presentation property 1149 // will ensure that the columns are presented correctly in the output (i.e. in 1150 // the order `b, c, a`). 1151 left, err = b.ensureColumns( 1152 left, private.LeftCols, nil /* colNames */, leftExpr.ProvidedPhysical().Ordering, 1153 ) 1154 if err != nil { 1155 return execPlan{}, err 1156 } 1157 right, err = b.ensureColumns( 1158 right, private.RightCols, nil /* colNames */, rightExpr.ProvidedPhysical().Ordering, 1159 ) 1160 if err != nil { 1161 return execPlan{}, err 1162 } 1163 1164 var typ tree.UnionType 1165 var all bool 1166 switch set.Op() { 1167 case opt.UnionOp: 1168 typ, all = tree.UnionOp, false 1169 case opt.UnionAllOp: 1170 typ, all = tree.UnionOp, true 1171 case opt.IntersectOp: 1172 typ, all = tree.IntersectOp, false 1173 case opt.IntersectAllOp: 1174 typ, all = tree.IntersectOp, true 1175 case opt.ExceptOp: 1176 typ, all = tree.ExceptOp, false 1177 case opt.ExceptAllOp: 1178 typ, all = tree.ExceptOp, true 1179 default: 1180 panic(errors.AssertionFailedf("invalid operator %s", log.Safe(set.Op()))) 1181 } 1182 1183 node, err := b.factory.ConstructSetOp(typ, all, left.root, right.root) 1184 if err != nil { 1185 return execPlan{}, err 1186 } 1187 ep := execPlan{root: node} 1188 for i, col := range private.OutCols { 1189 ep.outputCols.Set(int(col), i) 1190 } 1191 return ep, nil 1192 } 1193 1194 // buildLimitOffset builds a plan for a LimitOp or OffsetOp 1195 func (b *Builder) buildLimitOffset(e memo.RelExpr) (execPlan, error) { 1196 input, err := b.buildRelational(e.Child(0).(memo.RelExpr)) 1197 if err != nil { 1198 return execPlan{}, err 1199 } 1200 // LIMIT/OFFSET expression should never need buildScalarContext, because it 1201 // can't refer to the input expression. 1202 expr, err := b.buildScalar(nil, e.Child(1).(opt.ScalarExpr)) 1203 if err != nil { 1204 return execPlan{}, err 1205 } 1206 var node exec.Node 1207 if e.Op() == opt.LimitOp { 1208 node, err = b.factory.ConstructLimit(input.root, expr, nil) 1209 } else { 1210 node, err = b.factory.ConstructLimit(input.root, nil, expr) 1211 } 1212 if err != nil { 1213 return execPlan{}, err 1214 } 1215 return execPlan{root: node, outputCols: input.outputCols}, nil 1216 } 1217 1218 func (b *Builder) buildSort(sort *memo.SortExpr) (execPlan, error) { 1219 input, err := b.buildRelational(sort.Input) 1220 if err != nil { 1221 return execPlan{}, err 1222 } 1223 1224 ordering := sort.ProvidedPhysical().Ordering 1225 inputOrdering := sort.Input.ProvidedPhysical().Ordering 1226 alreadyOrderedPrefix := 0 1227 for i := range inputOrdering { 1228 if i == len(ordering) { 1229 return execPlan{}, errors.AssertionFailedf("sort ordering already provided by input") 1230 } 1231 if inputOrdering[i] != ordering[i] { 1232 break 1233 } 1234 alreadyOrderedPrefix = i + 1 1235 } 1236 1237 node, err := b.factory.ConstructSort(input.root, input.sqlOrdering(ordering), alreadyOrderedPrefix) 1238 if err != nil { 1239 return execPlan{}, err 1240 } 1241 return execPlan{root: node, outputCols: input.outputCols}, nil 1242 } 1243 1244 func (b *Builder) buildOrdinality(ord *memo.OrdinalityExpr) (execPlan, error) { 1245 input, err := b.buildRelational(ord.Input) 1246 if err != nil { 1247 return execPlan{}, err 1248 } 1249 1250 colName := b.mem.Metadata().ColumnMeta(ord.ColID).Alias 1251 1252 node, err := b.factory.ConstructOrdinality(input.root, colName) 1253 if err != nil { 1254 return execPlan{}, err 1255 } 1256 1257 // We have one additional ordinality column, which is ordered at the end of 1258 // the list. 1259 outputCols := input.outputCols.Copy() 1260 outputCols.Set(int(ord.ColID), outputCols.Len()) 1261 1262 return execPlan{root: node, outputCols: outputCols}, nil 1263 } 1264 1265 func (b *Builder) buildIndexJoin(join *memo.IndexJoinExpr) (execPlan, error) { 1266 input, err := b.buildRelational(join.Input) 1267 if err != nil { 1268 return execPlan{}, err 1269 } 1270 1271 md := b.mem.Metadata() 1272 tab := md.Table(join.Table) 1273 1274 // TODO(radu): the distsql implementation of index join assumes that the input 1275 // starts with the PK columns in order (#40749). 1276 pri := tab.Index(cat.PrimaryIndex) 1277 keyCols := make([]exec.NodeColumnOrdinal, pri.KeyColumnCount()) 1278 for i := range keyCols { 1279 keyCols[i] = input.getNodeColumnOrdinal(join.Table.ColumnID(pri.Column(i).Ordinal)) 1280 } 1281 1282 cols := join.Cols 1283 needed, output := b.getColumns(cols, join.Table) 1284 res := execPlan{outputCols: output} 1285 res.root, err = b.factory.ConstructIndexJoin( 1286 input.root, tab, keyCols, needed, res.reqOrdering(join), 1287 ) 1288 if err != nil { 1289 return execPlan{}, err 1290 } 1291 1292 return res, nil 1293 } 1294 1295 func (b *Builder) buildLookupJoin(join *memo.LookupJoinExpr) (execPlan, error) { 1296 if !b.disableTelemetry { 1297 telemetry.Inc(sqltelemetry.JoinAlgoLookupUseCounter) 1298 telemetry.Inc(opt.JoinTypeToUseCounter(join.JoinType)) 1299 } 1300 1301 input, err := b.buildRelational(join.Input) 1302 if err != nil { 1303 return execPlan{}, err 1304 } 1305 1306 md := b.mem.Metadata() 1307 1308 keyCols := make([]exec.NodeColumnOrdinal, len(join.KeyCols)) 1309 for i, c := range join.KeyCols { 1310 keyCols[i] = input.getNodeColumnOrdinal(c) 1311 } 1312 1313 inputCols := join.Input.Relational().OutputCols 1314 lookupCols := join.Cols.Difference(inputCols) 1315 1316 lookupOrdinals, lookupColMap := b.getColumns(lookupCols, join.Table) 1317 allCols := joinOutputMap(input.outputCols, lookupColMap) 1318 1319 res := execPlan{outputCols: allCols} 1320 if join.JoinType == opt.SemiJoinOp || join.JoinType == opt.AntiJoinOp { 1321 // For semi and anti join, only the left columns are output. 1322 res.outputCols = input.outputCols 1323 } 1324 1325 ctx := buildScalarCtx{ 1326 ivh: tree.MakeIndexedVarHelper(nil /* container */, allCols.Len()), 1327 ivarMap: allCols, 1328 } 1329 onExpr, err := b.buildScalar(&ctx, &join.On) 1330 if err != nil { 1331 return execPlan{}, err 1332 } 1333 1334 tab := md.Table(join.Table) 1335 idx := tab.Index(join.Index) 1336 var eqCols opt.ColSet 1337 for i := range join.KeyCols { 1338 eqCols.Add(join.Table.ColumnID(idx.Column(i).Ordinal)) 1339 } 1340 1341 res.root, err = b.factory.ConstructLookupJoin( 1342 joinOpToJoinType(join.JoinType), 1343 input.root, 1344 tab, 1345 idx, 1346 keyCols, 1347 join.LookupColsAreTableKey, 1348 lookupOrdinals, 1349 onExpr, 1350 res.reqOrdering(join), 1351 ) 1352 if err != nil { 1353 return execPlan{}, err 1354 } 1355 1356 // Apply a post-projection if Cols doesn't contain all input columns. 1357 if !inputCols.SubsetOf(join.Cols) { 1358 return b.applySimpleProject(res, join.Cols, join.ProvidedPhysical().Ordering) 1359 } 1360 return res, nil 1361 } 1362 1363 func (b *Builder) buildGeoLookupJoin(join *memo.GeoLookupJoinExpr) (execPlan, error) { 1364 input, err := b.buildRelational(join.Input) 1365 if err != nil { 1366 return execPlan{}, err 1367 } 1368 1369 md := b.mem.Metadata() 1370 1371 inputCols := join.Input.Relational().OutputCols 1372 lookupCols := join.Cols.Difference(inputCols) 1373 1374 lookupOrdinals, lookupColMap := b.getColumns(lookupCols, join.Table) 1375 allCols := joinOutputMap(input.outputCols, lookupColMap) 1376 1377 res := execPlan{outputCols: allCols} 1378 if join.JoinType == opt.SemiJoinOp || join.JoinType == opt.AntiJoinOp { 1379 // For semi and anti join, only the left columns are output. 1380 res.outputCols = input.outputCols 1381 } 1382 1383 ctx := buildScalarCtx{ 1384 ivh: tree.MakeIndexedVarHelper(nil /* container */, allCols.Len()), 1385 ivarMap: allCols, 1386 } 1387 onExpr, err := b.buildScalar(&ctx, &join.On) 1388 if err != nil { 1389 return execPlan{}, err 1390 } 1391 1392 tab := md.Table(join.Table) 1393 idx := tab.Index(join.Index) 1394 1395 res.root, err = b.factory.ConstructGeoLookupJoin( 1396 joinOpToJoinType(join.JoinType), 1397 join.GeoRelationshipType, 1398 input.root, 1399 tab, 1400 idx, 1401 input.getNodeColumnOrdinal(join.GeoCol), 1402 lookupOrdinals, 1403 onExpr, 1404 res.reqOrdering(join), 1405 ) 1406 if err != nil { 1407 return execPlan{}, err 1408 } 1409 1410 // Apply a post-projection if Cols doesn't contain all input columns. 1411 if !inputCols.SubsetOf(join.Cols) { 1412 return b.applySimpleProject(res, join.Cols, join.ProvidedPhysical().Ordering) 1413 } 1414 return res, nil 1415 } 1416 1417 func (b *Builder) buildZigzagJoin(join *memo.ZigzagJoinExpr) (execPlan, error) { 1418 md := b.mem.Metadata() 1419 1420 leftTable := md.Table(join.LeftTable) 1421 rightTable := md.Table(join.RightTable) 1422 leftIndex := leftTable.Index(join.LeftIndex) 1423 rightIndex := rightTable.Index(join.RightIndex) 1424 1425 leftEqCols := make([]exec.NodeColumnOrdinal, len(join.LeftEqCols)) 1426 rightEqCols := make([]exec.NodeColumnOrdinal, len(join.RightEqCols)) 1427 for i := range join.LeftEqCols { 1428 leftEqCols[i] = exec.NodeColumnOrdinal(join.LeftTable.ColumnOrdinal(join.LeftEqCols[i])) 1429 rightEqCols[i] = exec.NodeColumnOrdinal(join.RightTable.ColumnOrdinal(join.RightEqCols[i])) 1430 } 1431 leftCols := md.TableMeta(join.LeftTable).IndexColumns(join.LeftIndex).Intersection(join.Cols) 1432 rightCols := md.TableMeta(join.RightTable).IndexColumns(join.RightIndex).Intersection(join.Cols) 1433 // Remove duplicate columns, if any. 1434 rightCols.DifferenceWith(leftCols) 1435 1436 leftOrdinals, leftColMap := b.getColumns(leftCols, join.LeftTable) 1437 rightOrdinals, rightColMap := b.getColumns(rightCols, join.RightTable) 1438 1439 allCols := joinOutputMap(leftColMap, rightColMap) 1440 1441 res := execPlan{outputCols: allCols} 1442 1443 ctx := buildScalarCtx{ 1444 ivh: tree.MakeIndexedVarHelper(nil /* container */, leftColMap.Len()+rightColMap.Len()), 1445 ivarMap: allCols, 1446 } 1447 onExpr, err := b.buildScalar(&ctx, &join.On) 1448 if err != nil { 1449 return execPlan{}, err 1450 } 1451 1452 // Build the fixed value scalars. These are represented as one value node 1453 // per side of the join, containing one row/tuple with fixed values for 1454 // a prefix of that index's columns. 1455 fixedVals := make([]exec.Node, 2) 1456 fixedCols := []opt.ColList{join.LeftFixedCols, join.RightFixedCols} 1457 for i := range join.FixedVals { 1458 tup := join.FixedVals[i].(*memo.TupleExpr) 1459 valExprs := make([]tree.TypedExpr, len(tup.Elems)) 1460 for j := range tup.Elems { 1461 valExprs[j], err = b.buildScalar(&ctx, tup.Elems[j]) 1462 if err != nil { 1463 return execPlan{}, err 1464 } 1465 } 1466 valuesPlan, err := b.constructValues([][]tree.TypedExpr{valExprs}, fixedCols[i]) 1467 if err != nil { 1468 return execPlan{}, err 1469 } 1470 fixedVals[i] = valuesPlan.root 1471 } 1472 1473 res.root, err = b.factory.ConstructZigzagJoin( 1474 leftTable, 1475 leftIndex, 1476 rightTable, 1477 rightIndex, 1478 leftEqCols, 1479 rightEqCols, 1480 leftOrdinals, 1481 rightOrdinals, 1482 onExpr, 1483 fixedVals, 1484 res.reqOrdering(join), 1485 ) 1486 if err != nil { 1487 return execPlan{}, err 1488 } 1489 1490 return res, nil 1491 } 1492 1493 func (b *Builder) buildMax1Row(max1Row *memo.Max1RowExpr) (execPlan, error) { 1494 input, err := b.buildRelational(max1Row.Input) 1495 if err != nil { 1496 return execPlan{}, err 1497 } 1498 1499 node, err := b.factory.ConstructMax1Row(input.root, max1Row.ErrorText) 1500 if err != nil { 1501 return execPlan{}, err 1502 } 1503 return execPlan{root: node, outputCols: input.outputCols}, nil 1504 } 1505 1506 func (b *Builder) buildWith(with *memo.WithExpr) (execPlan, error) { 1507 value, err := b.buildRelational(with.Binding) 1508 if err != nil { 1509 return execPlan{}, err 1510 } 1511 1512 var label bytes.Buffer 1513 fmt.Fprintf(&label, "buffer %d", with.ID) 1514 if with.Name != "" { 1515 fmt.Fprintf(&label, " (%s)", with.Name) 1516 } 1517 1518 buffer, err := b.factory.ConstructBuffer(value.root, label.String()) 1519 if err != nil { 1520 return execPlan{}, err 1521 } 1522 1523 // TODO(justin): if the binding here has a spoolNode at its root, we can 1524 // remove it, since subquery execution also guarantees complete execution. 1525 1526 // Add the buffer as a subquery so it gets executed ahead of time, and is 1527 // available to be referenced by other queries. 1528 b.subqueries = append(b.subqueries, exec.Subquery{ 1529 ExprNode: with.OriginalExpr, 1530 // TODO(justin): this is wasteful: both the subquery and the bufferNode 1531 // will buffer up all the results. This should be fixed by either making 1532 // the buffer point directly to the subquery results or adding a new 1533 // subquery mode that reads and discards all rows. This could possibly also 1534 // be fixed by ensuring that bufferNode exhausts its input (and forcing it 1535 // to behave like a spoolNode) and using the EXISTS mode. 1536 Mode: exec.SubqueryAllRows, 1537 Root: buffer, 1538 }) 1539 1540 b.addBuiltWithExpr(with.ID, value.outputCols, buffer) 1541 1542 return b.buildRelational(with.Main) 1543 } 1544 1545 func (b *Builder) buildRecursiveCTE(rec *memo.RecursiveCTEExpr) (execPlan, error) { 1546 initial, err := b.buildRelational(rec.Initial) 1547 if err != nil { 1548 return execPlan{}, err 1549 } 1550 1551 // Make sure we have the columns in the correct order. 1552 initial, err = b.ensureColumns(initial, rec.InitialCols, nil /* colNames */, nil /* ordering */) 1553 if err != nil { 1554 return execPlan{}, err 1555 } 1556 1557 // Renumber the columns so they match the columns expected by the recursive 1558 // query. 1559 initial.outputCols = util.FastIntMap{} 1560 for i, col := range rec.OutCols { 1561 initial.outputCols.Set(int(col), i) 1562 } 1563 1564 // To implement exec.RecursiveCTEIterationFn, we create a special Builder. 1565 1566 innerBldTemplate := &Builder{ 1567 factory: b.factory, 1568 mem: b.mem, 1569 catalog: b.catalog, 1570 evalCtx: b.evalCtx, 1571 // If the recursive query itself contains CTEs, building it in the function 1572 // below will add to withExprs. Cap the slice to force reallocation on any 1573 // appends, so that they don't overwrite overwrite later appends by our 1574 // original builder. 1575 withExprs: b.withExprs[:len(b.withExprs):len(b.withExprs)], 1576 } 1577 1578 fn := func(bufferRef exec.BufferNode) (exec.Plan, error) { 1579 // Use a separate builder each time. 1580 innerBld := *innerBldTemplate 1581 innerBld.addBuiltWithExpr(rec.WithID, initial.outputCols, bufferRef) 1582 plan, err := innerBld.build(rec.Recursive) 1583 if err != nil { 1584 return nil, err 1585 } 1586 // Ensure columns are output in the same order. 1587 plan, err = innerBld.ensureColumns( 1588 plan, rec.RecursiveCols, nil /* colNames */, nil, /* ordering */ 1589 ) 1590 if err != nil { 1591 return nil, err 1592 } 1593 return innerBld.factory.ConstructPlan(plan.root, innerBld.subqueries, innerBld.cascades, innerBld.checks) 1594 } 1595 1596 label := fmt.Sprintf("working buffer (%s)", rec.Name) 1597 var ep execPlan 1598 ep.root, err = b.factory.ConstructRecursiveCTE(initial.root, fn, label) 1599 if err != nil { 1600 return execPlan{}, err 1601 } 1602 for i, col := range rec.OutCols { 1603 ep.outputCols.Set(int(col), i) 1604 } 1605 return ep, nil 1606 } 1607 1608 func (b *Builder) buildWithScan(withScan *memo.WithScanExpr) (execPlan, error) { 1609 e := b.findBuiltWithExpr(withScan.With) 1610 if e == nil { 1611 err := errors.WithHint( 1612 errors.Errorf("couldn't find WITH expression %q with ID %d", withScan.Name, withScan.With), 1613 "references to WITH expressions from correlated subqueries are unsupported", 1614 ) 1615 return execPlan{}, err 1616 } 1617 1618 var label bytes.Buffer 1619 fmt.Fprintf(&label, "buffer %d", withScan.With) 1620 if withScan.Name != "" { 1621 fmt.Fprintf(&label, " (%s)", withScan.Name) 1622 } 1623 1624 node, err := b.factory.ConstructScanBuffer(e.bufferNode, label.String()) 1625 if err != nil { 1626 return execPlan{}, err 1627 } 1628 res := execPlan{root: node} 1629 1630 if maxVal, _ := e.outputCols.MaxValue(); len(withScan.InCols) == maxVal+1 { 1631 // We are outputting all columns. Just set up the map. 1632 1633 // The ColumnIDs from the With expression need to get remapped according to 1634 // the mapping in the withScan to get the actual colMap for this expression. 1635 for i := range withScan.InCols { 1636 idx, _ := e.outputCols.Get(int(withScan.InCols[i])) 1637 res.outputCols.Set(int(withScan.OutCols[i]), idx) 1638 } 1639 } else { 1640 // We need a projection. 1641 cols := make([]exec.NodeColumnOrdinal, len(withScan.InCols)) 1642 for i := range withScan.InCols { 1643 col, ok := e.outputCols.Get(int(withScan.InCols[i])) 1644 if !ok { 1645 panic(errors.AssertionFailedf("column %d not in input", log.Safe(withScan.InCols[i]))) 1646 } 1647 cols[i] = exec.NodeColumnOrdinal(col) 1648 res.outputCols.Set(int(withScan.OutCols[i]), i) 1649 } 1650 res.root, err = b.factory.ConstructSimpleProject( 1651 res.root, cols, nil, /* colNames */ 1652 exec.OutputOrdering(res.sqlOrdering(withScan.ProvidedPhysical().Ordering)), 1653 ) 1654 if err != nil { 1655 return execPlan{}, err 1656 } 1657 } 1658 return res, nil 1659 1660 } 1661 1662 func (b *Builder) buildProjectSet(projectSet *memo.ProjectSetExpr) (execPlan, error) { 1663 input, err := b.buildRelational(projectSet.Input) 1664 if err != nil { 1665 return execPlan{}, err 1666 } 1667 1668 zip := projectSet.Zip 1669 md := b.mem.Metadata() 1670 scalarCtx := input.makeBuildScalarCtx() 1671 1672 exprs := make(tree.TypedExprs, len(zip)) 1673 zipCols := make(sqlbase.ResultColumns, 0, len(zip)) 1674 numColsPerGen := make([]int, len(zip)) 1675 1676 ep := execPlan{outputCols: input.outputCols} 1677 n := ep.numOutputCols() 1678 1679 for i := range zip { 1680 item := &zip[i] 1681 exprs[i], err = b.buildScalar(&scalarCtx, item.Fn) 1682 if err != nil { 1683 return execPlan{}, err 1684 } 1685 1686 for _, col := range item.Cols { 1687 colMeta := md.ColumnMeta(col) 1688 zipCols = append(zipCols, sqlbase.ResultColumn{Name: colMeta.Alias, Typ: colMeta.Type}) 1689 1690 ep.outputCols.Set(int(col), n) 1691 n++ 1692 } 1693 1694 numColsPerGen[i] = len(item.Cols) 1695 } 1696 1697 ep.root, err = b.factory.ConstructProjectSet(input.root, exprs, zipCols, numColsPerGen) 1698 if err != nil { 1699 return execPlan{}, err 1700 } 1701 1702 return ep, nil 1703 } 1704 1705 func (b *Builder) resultColumn(id opt.ColumnID) sqlbase.ResultColumn { 1706 colMeta := b.mem.Metadata().ColumnMeta(id) 1707 return sqlbase.ResultColumn{ 1708 Name: colMeta.Alias, 1709 Typ: colMeta.Type, 1710 } 1711 } 1712 1713 // extractFromOffset extracts the start bound expression of a window function 1714 // that uses the OFFSET windowing mode for its start bound. 1715 func (b *Builder) extractFromOffset(e opt.ScalarExpr) (_ opt.ScalarExpr, ok bool) { 1716 if opt.IsWindowOp(e) || opt.IsAggregateOp(e) { 1717 return nil, false 1718 } 1719 if modifier, ok := e.(*memo.WindowFromOffsetExpr); ok { 1720 return modifier.Offset, true 1721 } 1722 return b.extractFromOffset(e.Child(0).(opt.ScalarExpr)) 1723 } 1724 1725 // extractToOffset extracts the end bound expression of a window function 1726 // that uses the OFFSET windowing mode for its end bound. 1727 func (b *Builder) extractToOffset(e opt.ScalarExpr) (_ opt.ScalarExpr, ok bool) { 1728 if opt.IsWindowOp(e) || opt.IsAggregateOp(e) { 1729 return nil, false 1730 } 1731 if modifier, ok := e.(*memo.WindowToOffsetExpr); ok { 1732 return modifier.Offset, true 1733 } 1734 return b.extractToOffset(e.Child(0).(opt.ScalarExpr)) 1735 } 1736 1737 // extractFilter extracts a FILTER expression from a window function tower. 1738 // Returns the expression and true if there was a filter, and false otherwise. 1739 func (b *Builder) extractFilter(e opt.ScalarExpr) (opt.ScalarExpr, bool) { 1740 if opt.IsWindowOp(e) || opt.IsAggregateOp(e) { 1741 return nil, false 1742 } 1743 if filter, ok := e.(*memo.AggFilterExpr); ok { 1744 return filter.Filter, true 1745 } 1746 return b.extractFilter(e.Child(0).(opt.ScalarExpr)) 1747 } 1748 1749 // extractWindowFunction extracts the window function being computed from a 1750 // potential tower of modifiers attached to the Function field of a 1751 // WindowsItem. 1752 func (b *Builder) extractWindowFunction(e opt.ScalarExpr) opt.ScalarExpr { 1753 if opt.IsWindowOp(e) || opt.IsAggregateOp(e) { 1754 return e 1755 } 1756 return b.extractWindowFunction(e.Child(0).(opt.ScalarExpr)) 1757 } 1758 1759 func (b *Builder) isOffsetMode(boundType tree.WindowFrameBoundType) bool { 1760 return boundType == tree.OffsetPreceding || boundType == tree.OffsetFollowing 1761 } 1762 1763 func (b *Builder) buildFrame(input execPlan, w *memo.WindowsItem) (*tree.WindowFrame, error) { 1764 scalarCtx := input.makeBuildScalarCtx() 1765 newDef := &tree.WindowFrame{ 1766 Mode: w.Frame.Mode, 1767 Bounds: tree.WindowFrameBounds{ 1768 StartBound: &tree.WindowFrameBound{ 1769 BoundType: w.Frame.StartBoundType, 1770 }, 1771 EndBound: &tree.WindowFrameBound{ 1772 BoundType: w.Frame.EndBoundType, 1773 }, 1774 }, 1775 Exclusion: w.Frame.FrameExclusion, 1776 } 1777 if boundExpr, ok := b.extractFromOffset(w.Function); ok { 1778 if !b.isOffsetMode(w.Frame.StartBoundType) { 1779 panic(errors.AssertionFailedf("expected offset to only be present in offset mode")) 1780 } 1781 offset, err := b.buildScalar(&scalarCtx, boundExpr) 1782 if err != nil { 1783 return nil, err 1784 } 1785 if offset == tree.DNull { 1786 return nil, pgerror.Newf(pgcode.NullValueNotAllowed, "frame starting offset must not be null") 1787 } 1788 newDef.Bounds.StartBound.OffsetExpr = offset 1789 } 1790 1791 if boundExpr, ok := b.extractToOffset(w.Function); ok { 1792 if !b.isOffsetMode(newDef.Bounds.EndBound.BoundType) { 1793 panic(errors.AssertionFailedf("expected offset to only be present in offset mode")) 1794 } 1795 offset, err := b.buildScalar(&scalarCtx, boundExpr) 1796 if err != nil { 1797 return nil, err 1798 } 1799 if offset == tree.DNull { 1800 return nil, pgerror.Newf(pgcode.NullValueNotAllowed, "frame ending offset must not be null") 1801 } 1802 newDef.Bounds.EndBound.OffsetExpr = offset 1803 } 1804 return newDef, nil 1805 } 1806 1807 func (b *Builder) buildWindow(w *memo.WindowExpr) (execPlan, error) { 1808 input, err := b.buildRelational(w.Input) 1809 if err != nil { 1810 return execPlan{}, err 1811 } 1812 1813 // Rearrange the input so that the input has all the passthrough columns 1814 // followed by all the argument columns. 1815 1816 passthrough := w.Input.Relational().OutputCols 1817 1818 desiredCols := opt.ColList{} 1819 passthrough.ForEach(func(i opt.ColumnID) { 1820 desiredCols = append(desiredCols, i) 1821 }) 1822 1823 // TODO(justin): this call to ensureColumns is kind of unfortunate because it 1824 // can result in an extra render beneath each window function. Figure out a 1825 // way to alleviate this. 1826 input, err = b.ensureColumns(input, desiredCols, nil, opt.Ordering{}) 1827 if err != nil { 1828 return execPlan{}, err 1829 } 1830 1831 ctx := input.makeBuildScalarCtx() 1832 1833 ord := w.Ordering.ToOrdering() 1834 1835 orderingExprs := make(tree.OrderBy, len(ord)) 1836 for i, c := range ord { 1837 direction := tree.Ascending 1838 if c.Descending() { 1839 direction = tree.Descending 1840 } 1841 orderingExprs[i] = &tree.Order{ 1842 Expr: b.indexedVar(&ctx, b.mem.Metadata(), c.ID()), 1843 Direction: direction, 1844 } 1845 } 1846 1847 partitionIdxs := make([]exec.NodeColumnOrdinal, w.Partition.Len()) 1848 partitionExprs := make(tree.Exprs, w.Partition.Len()) 1849 1850 i := 0 1851 w.Partition.ForEach(func(col opt.ColumnID) { 1852 ordinal, _ := input.outputCols.Get(int(col)) 1853 partitionIdxs[i] = exec.NodeColumnOrdinal(ordinal) 1854 partitionExprs[i] = b.indexedVar(&ctx, b.mem.Metadata(), col) 1855 i++ 1856 }) 1857 1858 argIdxs := make([][]exec.NodeColumnOrdinal, len(w.Windows)) 1859 filterIdxs := make([]int, len(w.Windows)) 1860 exprs := make([]*tree.FuncExpr, len(w.Windows)) 1861 1862 for i := range w.Windows { 1863 item := &w.Windows[i] 1864 fn := b.extractWindowFunction(item.Function) 1865 name, overload := memo.FindWindowOverload(fn) 1866 if !b.disableTelemetry { 1867 telemetry.Inc(sqltelemetry.WindowFunctionCounter(name)) 1868 } 1869 props, _ := builtins.GetBuiltinProperties(name) 1870 1871 args := make([]tree.TypedExpr, fn.ChildCount()) 1872 argIdxs[i] = make([]exec.NodeColumnOrdinal, fn.ChildCount()) 1873 for j, n := 0, fn.ChildCount(); j < n; j++ { 1874 col := fn.Child(j).(*memo.VariableExpr).Col 1875 args[j] = b.indexedVar(&ctx, b.mem.Metadata(), col) 1876 idx, _ := input.outputCols.Get(int(col)) 1877 argIdxs[i][j] = exec.NodeColumnOrdinal(idx) 1878 } 1879 1880 frame, err := b.buildFrame(input, item) 1881 if err != nil { 1882 return execPlan{}, err 1883 } 1884 1885 var builtFilter tree.TypedExpr 1886 filter, ok := b.extractFilter(item.Function) 1887 if ok { 1888 f, ok := filter.(*memo.VariableExpr) 1889 if !ok { 1890 panic(errors.AssertionFailedf("expected FILTER expression to be a VariableExpr")) 1891 } 1892 filterIdxs[i], _ = input.outputCols.Get(int(f.Col)) 1893 1894 builtFilter, err = b.buildScalar(&ctx, filter) 1895 if err != nil { 1896 return execPlan{}, err 1897 } 1898 } else { 1899 filterIdxs[i] = -1 1900 } 1901 1902 exprs[i] = tree.NewTypedFuncExpr( 1903 tree.WrapFunction(name), 1904 0, 1905 args, 1906 builtFilter, 1907 &tree.WindowDef{ 1908 Partitions: partitionExprs, 1909 OrderBy: orderingExprs, 1910 Frame: frame, 1911 }, 1912 overload.FixedReturnType(), 1913 props, 1914 overload, 1915 ) 1916 } 1917 1918 resultCols := make(sqlbase.ResultColumns, w.Relational().OutputCols.Len()) 1919 1920 // All the passthrough cols will keep their ordinal index. 1921 passthrough.ForEach(func(col opt.ColumnID) { 1922 ordinal, _ := input.outputCols.Get(int(col)) 1923 resultCols[ordinal] = b.resultColumn(col) 1924 }) 1925 1926 var outputCols opt.ColMap 1927 input.outputCols.ForEach(func(key, val int) { 1928 if passthrough.Contains(opt.ColumnID(key)) { 1929 outputCols.Set(key, val) 1930 } 1931 }) 1932 1933 outputIdxs := make([]int, len(w.Windows)) 1934 1935 // Because of the way we arranged the input columns, we will be outputting 1936 // the window columns at the end (which is exactly what the execution engine 1937 // will do as well). 1938 windowStart := passthrough.Len() 1939 for i := range w.Windows { 1940 resultCols[windowStart+i] = b.resultColumn(w.Windows[i].Col) 1941 outputCols.Set(int(w.Windows[i].Col), windowStart+i) 1942 outputIdxs[i] = windowStart + i 1943 } 1944 1945 var rangeOffsetColumn exec.NodeColumnOrdinal 1946 if ord.Empty() { 1947 idx, _ := input.outputCols.Get(int(w.RangeOffsetColumn)) 1948 rangeOffsetColumn = exec.NodeColumnOrdinal(idx) 1949 } 1950 node, err := b.factory.ConstructWindow(input.root, exec.WindowInfo{ 1951 Cols: resultCols, 1952 Exprs: exprs, 1953 OutputIdxs: outputIdxs, 1954 ArgIdxs: argIdxs, 1955 FilterIdxs: filterIdxs, 1956 Partition: partitionIdxs, 1957 Ordering: input.sqlOrdering(ord), 1958 RangeOffsetColumn: rangeOffsetColumn, 1959 }) 1960 if err != nil { 1961 return execPlan{}, err 1962 } 1963 1964 return execPlan{ 1965 root: node, 1966 outputCols: outputCols, 1967 }, nil 1968 } 1969 1970 func (b *Builder) buildSequenceSelect(seqSel *memo.SequenceSelectExpr) (execPlan, error) { 1971 seq := b.mem.Metadata().Sequence(seqSel.Sequence) 1972 node, err := b.factory.ConstructSequenceSelect(seq) 1973 if err != nil { 1974 return execPlan{}, err 1975 } 1976 1977 ep := execPlan{root: node} 1978 for i, c := range seqSel.Cols { 1979 ep.outputCols.Set(int(c), i) 1980 } 1981 1982 return ep, nil 1983 } 1984 1985 func (b *Builder) applySaveTable( 1986 input execPlan, e memo.RelExpr, saveTableName string, 1987 ) (execPlan, error) { 1988 name := tree.NewTableName(tree.Name(opt.SaveTablesDatabase), tree.Name(saveTableName)) 1989 1990 // Ensure that the column names are unique and match the names used by the 1991 // opttester. 1992 outputCols := e.Relational().OutputCols 1993 colNames := make([]string, outputCols.Len()) 1994 colNameGen := memo.NewColumnNameGenerator(e) 1995 for col, ok := outputCols.Next(0); ok; col, ok = outputCols.Next(col + 1) { 1996 ord, _ := input.outputCols.Get(int(col)) 1997 colNames[ord] = colNameGen.GenerateName(col) 1998 } 1999 2000 var err error 2001 input.root, err = b.factory.ConstructSaveTable(input.root, name, colNames) 2002 if err != nil { 2003 return execPlan{}, err 2004 } 2005 return input, err 2006 } 2007 2008 func (b *Builder) buildOpaque(opaque *memo.OpaqueRelPrivate) (execPlan, error) { 2009 node, err := b.factory.ConstructOpaque(opaque.Metadata) 2010 if err != nil { 2011 return execPlan{}, err 2012 } 2013 2014 ep := execPlan{root: node} 2015 for i, c := range opaque.Columns { 2016 ep.outputCols.Set(int(c), i) 2017 } 2018 2019 return ep, nil 2020 } 2021 2022 // needProjection figures out what projection is needed on top of the input plan 2023 // to produce the given list of columns. If the input plan already produces 2024 // the columns (in the same order), returns needProj=false. 2025 func (b *Builder) needProjection( 2026 input execPlan, colList opt.ColList, 2027 ) (_ []exec.NodeColumnOrdinal, needProj bool) { 2028 if input.numOutputCols() == len(colList) { 2029 identity := true 2030 for i, col := range colList { 2031 if ord, ok := input.outputCols.Get(int(col)); !ok || ord != i { 2032 identity = false 2033 break 2034 } 2035 } 2036 if identity { 2037 return nil, false 2038 } 2039 } 2040 cols := make([]exec.NodeColumnOrdinal, 0, len(colList)) 2041 for _, col := range colList { 2042 if col != 0 { 2043 cols = append(cols, input.getNodeColumnOrdinal(col)) 2044 } 2045 } 2046 return cols, true 2047 } 2048 2049 // ensureColumns applies a projection as necessary to make the output match the 2050 // given list of columns; colNames is optional. 2051 func (b *Builder) ensureColumns( 2052 input execPlan, colList opt.ColList, colNames []string, provided opt.Ordering, 2053 ) (execPlan, error) { 2054 cols, needProj := b.needProjection(input, colList) 2055 if !needProj { 2056 // No projection necessary. 2057 if colNames != nil { 2058 var err error 2059 input.root, err = b.factory.RenameColumns(input.root, colNames) 2060 if err != nil { 2061 return execPlan{}, err 2062 } 2063 } 2064 return input, nil 2065 } 2066 var res execPlan 2067 for i, col := range colList { 2068 res.outputCols.Set(int(col), i) 2069 } 2070 reqOrdering := exec.OutputOrdering(res.sqlOrdering(provided)) 2071 var err error 2072 res.root, err = b.factory.ConstructSimpleProject(input.root, cols, colNames, reqOrdering) 2073 return res, err 2074 } 2075 2076 // applyPresentation adds a projection to a plan to satisfy a required 2077 // Presentation property. 2078 func (b *Builder) applyPresentation(input execPlan, p *physical.Required) (execPlan, error) { 2079 pres := p.Presentation 2080 colList := make(opt.ColList, len(pres)) 2081 colNames := make([]string, len(pres)) 2082 for i := range pres { 2083 colList[i] = pres[i].ID 2084 colNames[i] = pres[i].Alias 2085 } 2086 // The ordering is not useful for a top-level projection (it is used by the 2087 // distsql planner for internal nodes); we might not even be able to represent 2088 // it because it can refer to columns not in the presentation. 2089 return b.ensureColumns(input, colList, colNames, nil /* provided */) 2090 } 2091 2092 // getEnvData consolidates the information that must be presented in 2093 // EXPLAIN (opt, env). 2094 func (b *Builder) getEnvData() exec.ExplainEnvData { 2095 envOpts := exec.ExplainEnvData{ShowEnv: true} 2096 var err error 2097 envOpts.Tables, envOpts.Sequences, envOpts.Views, err = b.mem.Metadata().AllDataSourceNames( 2098 func(ds cat.DataSource) (cat.DataSourceName, error) { 2099 return b.catalog.FullyQualifiedName(context.TODO(), ds) 2100 }, 2101 ) 2102 if err != nil { 2103 panic(err) 2104 } 2105 2106 return envOpts 2107 } 2108 2109 // statementTag returns a string that can be used in an error message regarding 2110 // the given expression. 2111 func (b *Builder) statementTag(expr memo.RelExpr) string { 2112 switch expr.Op() { 2113 case opt.OpaqueRelOp, opt.OpaqueMutationOp, opt.OpaqueDDLOp: 2114 return expr.Private().(*memo.OpaqueRelPrivate).Metadata.String() 2115 2116 default: 2117 return expr.Op().SyntaxTag() 2118 } 2119 }