github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/exec/execbuilder/relational.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package execbuilder
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"math"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/server/telemetry"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/opt/ordering"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/opt/xform"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/sem/builtins"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
    34  	"github.com/cockroachdb/cockroach/pkg/util"
    35  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    36  	"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
    37  	"github.com/cockroachdb/cockroach/pkg/util/log"
    38  	"github.com/cockroachdb/errors"
    39  )
    40  
    41  type execPlan struct {
    42  	root exec.Node
    43  
    44  	// outputCols is a map from opt.ColumnID to exec.NodeColumnOrdinal. It maps
    45  	// columns in the output set of a relational expression to indices in the
    46  	// result columns of the exec.Node.
    47  	//
    48  	// The reason we need to keep track of this (instead of using just the
    49  	// relational properties) is that the relational properties don't force a
    50  	// single "schema": any ordering of the output columns is possible. We choose
    51  	// the schema that is most convenient: for scans, we use the table's column
    52  	// ordering. Consider:
    53  	//   SELECT a, b FROM t WHERE a = b
    54  	// and the following two cases:
    55  	//   1. The table is defined as (k INT PRIMARY KEY, a INT, b INT). The scan will
    56  	//      return (k, a, b).
    57  	//   2. The table is defined as (k INT PRIMARY KEY, b INT, a INT). The scan will
    58  	//      return (k, b, a).
    59  	// In these two cases, the relational properties are effectively the same.
    60  	//
    61  	// An alternative to this would be to always use a "canonical" schema, for
    62  	// example the output columns in increasing index order. However, this would
    63  	// require a lot of otherwise unnecessary projections.
    64  	//
    65  	// Note: conceptually, this could be a ColList; however, the map is more
    66  	// convenient when converting VariableOps to IndexedVars.
    67  	outputCols opt.ColMap
    68  }
    69  
    70  // numOutputCols returns the number of columns emitted by the execPlan's Node.
    71  // This will typically be equal to ep.outputCols.Len(), but might be different
    72  // if the node outputs the same optimizer ColumnID multiple times.
    73  // TODO(justin): we should keep track of this instead of computing it each time.
    74  func (ep *execPlan) numOutputCols() int {
    75  	return numOutputColsInMap(ep.outputCols)
    76  }
    77  
    78  // numOutputColsInMap returns the number of slots required to fill in all of
    79  // the columns referred to by this ColMap.
    80  func numOutputColsInMap(m opt.ColMap) int {
    81  	max, ok := m.MaxValue()
    82  	if !ok {
    83  		return 0
    84  	}
    85  	return max + 1
    86  }
    87  
    88  // makeBuildScalarCtx returns a buildScalarCtx that can be used with expressions
    89  // that refer the output columns of this plan.
    90  func (ep *execPlan) makeBuildScalarCtx() buildScalarCtx {
    91  	return buildScalarCtx{
    92  		ivh:     tree.MakeIndexedVarHelper(nil /* container */, ep.numOutputCols()),
    93  		ivarMap: ep.outputCols,
    94  	}
    95  }
    96  
    97  // getNodeColumnOrdinal takes a column that is known to be produced by the execPlan
    98  // and returns the ordinal index of that column in the result columns of the
    99  // node.
   100  func (ep *execPlan) getNodeColumnOrdinal(col opt.ColumnID) exec.NodeColumnOrdinal {
   101  	ord, ok := ep.outputCols.Get(int(col))
   102  	if !ok {
   103  		panic(errors.AssertionFailedf("column %d not in input", log.Safe(col)))
   104  	}
   105  	return exec.NodeColumnOrdinal(ord)
   106  }
   107  
   108  func (ep *execPlan) getNodeColumnOrdinalSet(cols opt.ColSet) exec.NodeColumnOrdinalSet {
   109  	var res exec.NodeColumnOrdinalSet
   110  	cols.ForEach(func(colID opt.ColumnID) {
   111  		res.Add(int(ep.getNodeColumnOrdinal(colID)))
   112  	})
   113  	return res
   114  }
   115  
   116  // reqOrdering converts the provided ordering of a relational expression to an
   117  // OutputOrdering (according to the outputCols map).
   118  func (ep *execPlan) reqOrdering(expr memo.RelExpr) exec.OutputOrdering {
   119  	return exec.OutputOrdering(ep.sqlOrdering(expr.ProvidedPhysical().Ordering))
   120  }
   121  
   122  // sqlOrdering converts an Ordering to a ColumnOrdering (according to the
   123  // outputCols map).
   124  func (ep *execPlan) sqlOrdering(ordering opt.Ordering) sqlbase.ColumnOrdering {
   125  	if ordering.Empty() {
   126  		return nil
   127  	}
   128  	colOrder := make(sqlbase.ColumnOrdering, len(ordering))
   129  	for i := range ordering {
   130  		colOrder[i].ColIdx = int(ep.getNodeColumnOrdinal(ordering[i].ID()))
   131  		if ordering[i].Descending() {
   132  			colOrder[i].Direction = encoding.Descending
   133  		} else {
   134  			colOrder[i].Direction = encoding.Ascending
   135  		}
   136  	}
   137  
   138  	return colOrder
   139  }
   140  
   141  func (b *Builder) buildRelational(e memo.RelExpr) (execPlan, error) {
   142  	var ep execPlan
   143  	var err error
   144  
   145  	if opt.IsDDLOp(e) {
   146  		// Mark the statement as containing DDL for use
   147  		// in the SQL executor.
   148  		b.IsDDL = true
   149  
   150  		// This will set the system DB trigger for transactions containing
   151  		// schema-modifying statements that have no effect, such as
   152  		// `BEGIN; INSERT INTO ...; CREATE TABLE IF NOT EXISTS ...; COMMIT;`
   153  		// where the table already exists. This will generate some false schema
   154  		// cache refreshes, but that's expected to be quite rare in practice.
   155  		if err := b.evalCtx.Txn.SetSystemConfigTrigger(); err != nil {
   156  			return execPlan{}, errors.WithSecondaryError(
   157  				unimplemented.NewWithIssuef(26508,
   158  					"schema change statement cannot follow a statement that has written in the same transaction"),
   159  				err)
   160  		}
   161  	}
   162  
   163  	// Raise error if mutation op is part of a read-only transaction.
   164  	if opt.IsMutationOp(e) && b.evalCtx.TxnReadOnly {
   165  		return execPlan{}, pgerror.Newf(pgcode.ReadOnlySQLTransaction,
   166  			"cannot execute %s in a read-only transaction", b.statementTag(e))
   167  	}
   168  
   169  	// Collect usage telemetry for relational node, if appropriate.
   170  	if !b.disableTelemetry {
   171  		if c := opt.OpTelemetryCounters[e.Op()]; c != nil {
   172  			telemetry.Inc(c)
   173  		}
   174  	}
   175  
   176  	var saveTableName string
   177  	if b.nameGen != nil {
   178  		// Don't save tables for operators that don't produce any columns (most
   179  		// importantly, for SET which is used to disable saving of tables).
   180  		if !e.Relational().OutputCols.Empty() {
   181  			// This function must be called in a pre-order traversal of the tree.
   182  			saveTableName = b.nameGen.GenerateName(e.Op())
   183  		}
   184  	}
   185  
   186  	switch t := e.(type) {
   187  	case *memo.ValuesExpr:
   188  		ep, err = b.buildValues(t)
   189  
   190  	case *memo.ScanExpr:
   191  		ep, err = b.buildScan(t)
   192  
   193  	case *memo.SelectExpr:
   194  		ep, err = b.buildSelect(t)
   195  
   196  	case *memo.ProjectExpr:
   197  		ep, err = b.buildProject(t)
   198  
   199  	case *memo.GroupByExpr, *memo.ScalarGroupByExpr:
   200  		ep, err = b.buildGroupBy(e)
   201  
   202  	case *memo.DistinctOnExpr, *memo.EnsureDistinctOnExpr, *memo.UpsertDistinctOnExpr,
   203  		*memo.EnsureUpsertDistinctOnExpr:
   204  		ep, err = b.buildDistinct(t)
   205  
   206  	case *memo.LimitExpr, *memo.OffsetExpr:
   207  		ep, err = b.buildLimitOffset(e)
   208  
   209  	case *memo.SortExpr:
   210  		ep, err = b.buildSort(t)
   211  
   212  	case *memo.IndexJoinExpr:
   213  		ep, err = b.buildIndexJoin(t)
   214  
   215  	case *memo.LookupJoinExpr:
   216  		ep, err = b.buildLookupJoin(t)
   217  
   218  	case *memo.GeoLookupJoinExpr:
   219  		ep, err = b.buildGeoLookupJoin(t)
   220  
   221  	case *memo.ZigzagJoinExpr:
   222  		ep, err = b.buildZigzagJoin(t)
   223  
   224  	case *memo.OrdinalityExpr:
   225  		ep, err = b.buildOrdinality(t)
   226  
   227  	case *memo.MergeJoinExpr:
   228  		ep, err = b.buildMergeJoin(t)
   229  
   230  	case *memo.Max1RowExpr:
   231  		ep, err = b.buildMax1Row(t)
   232  
   233  	case *memo.ProjectSetExpr:
   234  		ep, err = b.buildProjectSet(t)
   235  
   236  	case *memo.WindowExpr:
   237  		ep, err = b.buildWindow(t)
   238  
   239  	case *memo.SequenceSelectExpr:
   240  		ep, err = b.buildSequenceSelect(t)
   241  
   242  	case *memo.InsertExpr:
   243  		ep, err = b.buildInsert(t)
   244  
   245  	case *memo.UpdateExpr:
   246  		ep, err = b.buildUpdate(t)
   247  
   248  	case *memo.UpsertExpr:
   249  		ep, err = b.buildUpsert(t)
   250  
   251  	case *memo.DeleteExpr:
   252  		ep, err = b.buildDelete(t)
   253  
   254  	case *memo.CreateTableExpr:
   255  		ep, err = b.buildCreateTable(t)
   256  
   257  	case *memo.CreateViewExpr:
   258  		ep, err = b.buildCreateView(t)
   259  
   260  	case *memo.WithExpr:
   261  		ep, err = b.buildWith(t)
   262  
   263  	case *memo.WithScanExpr:
   264  		ep, err = b.buildWithScan(t)
   265  
   266  	case *memo.RecursiveCTEExpr:
   267  		ep, err = b.buildRecursiveCTE(t)
   268  
   269  	case *memo.ExplainExpr:
   270  		ep, err = b.buildExplain(t)
   271  
   272  	case *memo.ShowTraceForSessionExpr:
   273  		ep, err = b.buildShowTrace(t)
   274  
   275  	case *memo.OpaqueRelExpr, *memo.OpaqueMutationExpr, *memo.OpaqueDDLExpr:
   276  		ep, err = b.buildOpaque(t.Private().(*memo.OpaqueRelPrivate))
   277  
   278  	case *memo.AlterTableSplitExpr:
   279  		ep, err = b.buildAlterTableSplit(t)
   280  
   281  	case *memo.AlterTableUnsplitExpr:
   282  		ep, err = b.buildAlterTableUnsplit(t)
   283  
   284  	case *memo.AlterTableUnsplitAllExpr:
   285  		ep, err = b.buildAlterTableUnsplitAll(t)
   286  
   287  	case *memo.AlterTableRelocateExpr:
   288  		ep, err = b.buildAlterTableRelocate(t)
   289  
   290  	case *memo.ControlJobsExpr:
   291  		ep, err = b.buildControlJobs(t)
   292  
   293  	case *memo.CancelQueriesExpr:
   294  		ep, err = b.buildCancelQueries(t)
   295  
   296  	case *memo.CancelSessionsExpr:
   297  		ep, err = b.buildCancelSessions(t)
   298  
   299  	case *memo.ExportExpr:
   300  		ep, err = b.buildExport(t)
   301  
   302  	default:
   303  		switch {
   304  		case opt.IsSetOp(e):
   305  			ep, err = b.buildSetOp(e)
   306  
   307  		case opt.IsJoinNonApplyOp(e):
   308  			ep, err = b.buildHashJoin(e)
   309  
   310  		case opt.IsJoinApplyOp(e):
   311  			ep, err = b.buildApplyJoin(e)
   312  
   313  		default:
   314  			err = errors.AssertionFailedf("no execbuild for %T", t)
   315  		}
   316  	}
   317  	if err != nil {
   318  		return execPlan{}, err
   319  	}
   320  
   321  	// In race builds, assert that the exec plan output columns match the opt
   322  	// plan output columns.
   323  	if util.RaceEnabled {
   324  		optCols := e.Relational().OutputCols
   325  		var execCols opt.ColSet
   326  		ep.outputCols.ForEach(func(key, val int) {
   327  			execCols.Add(opt.ColumnID(key))
   328  		})
   329  		if !execCols.Equals(optCols) {
   330  			return execPlan{}, errors.AssertionFailedf(
   331  				"exec columns do not match opt columns: expected %v, got %v", optCols, execCols)
   332  		}
   333  	}
   334  
   335  	if saveTableName != "" {
   336  		ep, err = b.applySaveTable(ep, e, saveTableName)
   337  		if err != nil {
   338  			return execPlan{}, err
   339  		}
   340  	}
   341  
   342  	// Wrap the expression in a render expression if presentation requires it.
   343  	if p := e.RequiredPhysical(); !p.Presentation.Any() {
   344  		ep, err = b.applyPresentation(ep, p)
   345  	}
   346  	return ep, err
   347  }
   348  
   349  func (b *Builder) buildValues(values *memo.ValuesExpr) (execPlan, error) {
   350  	rows, err := b.buildValuesRows(values)
   351  	if err != nil {
   352  		return execPlan{}, err
   353  	}
   354  	return b.constructValues(rows, values.Cols)
   355  }
   356  
   357  func (b *Builder) buildValuesRows(values *memo.ValuesExpr) ([][]tree.TypedExpr, error) {
   358  	numCols := len(values.Cols)
   359  
   360  	rows := make([][]tree.TypedExpr, len(values.Rows))
   361  	rowBuf := make([]tree.TypedExpr, len(rows)*numCols)
   362  	scalarCtx := buildScalarCtx{}
   363  	for i := range rows {
   364  		tup := values.Rows[i].(*memo.TupleExpr)
   365  		if len(tup.Elems) != numCols {
   366  			return nil, fmt.Errorf("inconsistent row length %d vs %d", len(tup.Elems), numCols)
   367  		}
   368  		// Chop off prefix of rowBuf and limit its capacity.
   369  		rows[i] = rowBuf[:numCols:numCols]
   370  		rowBuf = rowBuf[numCols:]
   371  		var err error
   372  		for j := 0; j < numCols; j++ {
   373  			rows[i][j], err = b.buildScalar(&scalarCtx, tup.Elems[j])
   374  			if err != nil {
   375  				return nil, err
   376  			}
   377  		}
   378  	}
   379  	return rows, nil
   380  }
   381  
   382  func (b *Builder) constructValues(rows [][]tree.TypedExpr, cols opt.ColList) (execPlan, error) {
   383  	md := b.mem.Metadata()
   384  	resultCols := make(sqlbase.ResultColumns, len(cols))
   385  	for i, col := range cols {
   386  		colMeta := md.ColumnMeta(col)
   387  		resultCols[i].Name = colMeta.Alias
   388  		resultCols[i].Typ = colMeta.Type
   389  	}
   390  	node, err := b.factory.ConstructValues(rows, resultCols)
   391  	if err != nil {
   392  		return execPlan{}, err
   393  	}
   394  	ep := execPlan{root: node}
   395  	for i, col := range cols {
   396  		ep.outputCols.Set(int(col), i)
   397  	}
   398  
   399  	return ep, nil
   400  }
   401  
   402  // getColumns returns the set of column ordinals in the table for the set of
   403  // column IDs, along with a mapping from the column IDs to output ordinals
   404  // (starting with outputOrdinalStart).
   405  func (b *Builder) getColumns(
   406  	cols opt.ColSet, tableID opt.TableID,
   407  ) (exec.TableColumnOrdinalSet, opt.ColMap) {
   408  	var needed exec.TableColumnOrdinalSet
   409  	var output opt.ColMap
   410  
   411  	columnCount := b.mem.Metadata().Table(tableID).DeletableColumnCount()
   412  	n := 0
   413  	for i := 0; i < columnCount; i++ {
   414  		colID := tableID.ColumnID(i)
   415  		if cols.Contains(colID) {
   416  			needed.Add(i)
   417  			output.Set(int(colID), n)
   418  			n++
   419  		}
   420  	}
   421  
   422  	return needed, output
   423  }
   424  
   425  // indexConstraintMaxResults returns the maximum number of results for a scan;
   426  // the scan is guaranteed never to return more results than this. Iff this hint
   427  // is invalid, 0 is returned.
   428  func (b *Builder) indexConstraintMaxResults(scan *memo.ScanExpr) uint64 {
   429  	c := scan.Constraint
   430  	if c == nil || c.IsContradiction() || c.IsUnconstrained() {
   431  		return 0
   432  	}
   433  
   434  	numCols := c.Columns.Count()
   435  	var indexCols opt.ColSet
   436  	for i := 0; i < numCols; i++ {
   437  		indexCols.Add(c.Columns.Get(i).ID())
   438  	}
   439  	rel := scan.Relational()
   440  	if !rel.FuncDeps.ColsAreLaxKey(indexCols) {
   441  		return 0
   442  	}
   443  
   444  	return c.CalculateMaxResults(b.evalCtx, indexCols, rel.NotNullCols)
   445  }
   446  
   447  func (b *Builder) buildScan(scan *memo.ScanExpr) (execPlan, error) {
   448  	md := b.mem.Metadata()
   449  	tab := md.Table(scan.Table)
   450  
   451  	// Check if we tried to force a specific index but there was no Scan with that
   452  	// index in the memo.
   453  	if scan.Flags.ForceIndex && scan.Flags.Index != scan.Index {
   454  		idx := tab.Index(scan.Flags.Index)
   455  		var err error
   456  		if idx.IsInverted() {
   457  			err = fmt.Errorf("index \"%s\" is inverted and cannot be used for this query", idx.Name())
   458  		} else {
   459  			// This should never happen.
   460  			err = fmt.Errorf("index \"%s\" cannot be used for this query", idx.Name())
   461  		}
   462  		return execPlan{}, err
   463  	}
   464  
   465  	needed, output := b.getColumns(scan.Cols, scan.Table)
   466  	res := execPlan{outputCols: output}
   467  
   468  	// Get the estimated row count from the statistics.
   469  	// Note: if this memo was originally created as part of a PREPARE
   470  	// statement or was stored in the query cache, the column stats would have
   471  	// been removed by DetachMemo. Update that function if the column stats are
   472  	// needed here in the future.
   473  	rowCount := scan.Relational().Stats.RowCount
   474  	if !scan.Relational().Stats.Available {
   475  		// When there are no statistics available, we construct a scan node with
   476  		// the estimated row count of zero rows.
   477  		rowCount = 0
   478  	}
   479  
   480  	if scan.PartitionConstrainedScan {
   481  		sqltelemetry.IncrementPartitioningCounter(sqltelemetry.PartitionConstrainedScan)
   482  	}
   483  
   484  	softLimit := int64(math.Ceil(scan.RequiredPhysical().LimitHint))
   485  	hardLimit := scan.HardLimit.RowCount()
   486  
   487  	locking := scan.Locking
   488  	if b.forceForUpdateLocking {
   489  		locking = forUpdateLocking
   490  	}
   491  
   492  	root, err := b.factory.ConstructScan(
   493  		tab,
   494  		tab.Index(scan.Index),
   495  		needed,
   496  		scan.Constraint,
   497  		hardLimit,
   498  		softLimit,
   499  		// HardLimit.Reverse() is taken into account by ScanIsReverse.
   500  		ordering.ScanIsReverse(scan, &scan.RequiredPhysical().Ordering),
   501  		b.indexConstraintMaxResults(scan),
   502  		res.reqOrdering(scan),
   503  		rowCount,
   504  		locking,
   505  	)
   506  	if err != nil {
   507  		return execPlan{}, err
   508  	}
   509  	res.root = root
   510  	return res, nil
   511  }
   512  
   513  func (b *Builder) buildSelect(sel *memo.SelectExpr) (execPlan, error) {
   514  	input, err := b.buildRelational(sel.Input)
   515  	if err != nil {
   516  		return execPlan{}, err
   517  	}
   518  	ctx := input.makeBuildScalarCtx()
   519  	filter, err := b.buildScalar(&ctx, &sel.Filters)
   520  	if err != nil {
   521  		return execPlan{}, err
   522  	}
   523  	// A filtering node does not modify the schema.
   524  	res := execPlan{outputCols: input.outputCols}
   525  	reqOrder := res.reqOrdering(sel)
   526  	res.root, err = b.factory.ConstructFilter(input.root, filter, reqOrder)
   527  	if err != nil {
   528  		return execPlan{}, err
   529  	}
   530  	return res, nil
   531  }
   532  
   533  // applySimpleProject adds a simple projection on top of an existing plan.
   534  func (b *Builder) applySimpleProject(
   535  	input execPlan, cols opt.ColSet, providedOrd opt.Ordering,
   536  ) (execPlan, error) {
   537  	// We have only pass-through columns.
   538  	colList := make([]exec.NodeColumnOrdinal, 0, cols.Len())
   539  	var res execPlan
   540  	cols.ForEach(func(i opt.ColumnID) {
   541  		res.outputCols.Set(int(i), len(colList))
   542  		colList = append(colList, input.getNodeColumnOrdinal(i))
   543  	})
   544  	var err error
   545  	res.root, err = b.factory.ConstructSimpleProject(
   546  		input.root, colList, nil /* colNames */, exec.OutputOrdering(res.sqlOrdering(providedOrd)),
   547  	)
   548  	if err != nil {
   549  		return execPlan{}, err
   550  	}
   551  	return res, nil
   552  }
   553  
   554  func (b *Builder) buildProject(prj *memo.ProjectExpr) (execPlan, error) {
   555  	md := b.mem.Metadata()
   556  	input, err := b.buildRelational(prj.Input)
   557  	if err != nil {
   558  		return execPlan{}, err
   559  	}
   560  
   561  	projections := prj.Projections
   562  	if len(projections) == 0 {
   563  		// We have only pass-through columns.
   564  		return b.applySimpleProject(input, prj.Passthrough, prj.ProvidedPhysical().Ordering)
   565  	}
   566  
   567  	var res execPlan
   568  	exprs := make(tree.TypedExprs, 0, len(projections)+prj.Passthrough.Len())
   569  	cols := make(sqlbase.ResultColumns, 0, len(exprs))
   570  	ctx := input.makeBuildScalarCtx()
   571  	for i := range projections {
   572  		item := &projections[i]
   573  		expr, err := b.buildScalar(&ctx, item.Element)
   574  		if err != nil {
   575  			return execPlan{}, err
   576  		}
   577  		res.outputCols.Set(int(item.Col), i)
   578  		exprs = append(exprs, expr)
   579  		cols = append(cols, sqlbase.ResultColumn{
   580  			Name: md.ColumnMeta(item.Col).Alias,
   581  			Typ:  item.Typ,
   582  		})
   583  	}
   584  	prj.Passthrough.ForEach(func(colID opt.ColumnID) {
   585  		res.outputCols.Set(int(colID), len(exprs))
   586  		indexedVar := b.indexedVar(&ctx, md, colID)
   587  		exprs = append(exprs, indexedVar)
   588  		meta := md.ColumnMeta(colID)
   589  		cols = append(cols, sqlbase.ResultColumn{
   590  			Name: meta.Alias,
   591  			Typ:  meta.Type,
   592  		})
   593  	})
   594  	reqOrdering := res.reqOrdering(prj)
   595  	res.root, err = b.factory.ConstructRender(input.root, cols, exprs, reqOrdering)
   596  	if err != nil {
   597  		return execPlan{}, err
   598  	}
   599  	return res, nil
   600  }
   601  
   602  func (b *Builder) buildApplyJoin(join memo.RelExpr) (execPlan, error) {
   603  	switch join.Op() {
   604  	case opt.InnerJoinApplyOp, opt.LeftJoinApplyOp, opt.SemiJoinApplyOp, opt.AntiJoinApplyOp:
   605  	default:
   606  		return execPlan{}, fmt.Errorf("couldn't execute correlated subquery with op %s", join.Op())
   607  	}
   608  	joinType := joinOpToJoinType(join.Op())
   609  	leftExpr := join.Child(0).(memo.RelExpr)
   610  	leftProps := leftExpr.Relational()
   611  	rightExpr := join.Child(1).(memo.RelExpr)
   612  	rightProps := rightExpr.Relational()
   613  	filters := join.Child(2).(*memo.FiltersExpr)
   614  
   615  	leftPlan, err := b.buildRelational(leftExpr)
   616  	if err != nil {
   617  		return execPlan{}, err
   618  	}
   619  
   620  	// Make a copy of the required props for the right side.
   621  	rightRequiredProps := *rightExpr.RequiredPhysical()
   622  	// The right-hand side will produce the output columns in order.
   623  	rightRequiredProps.Presentation = b.makePresentation(rightProps.OutputCols)
   624  
   625  	// leftBoundCols is the set of columns that this apply join binds.
   626  	leftBoundCols := leftProps.OutputCols.Intersection(rightProps.OuterCols)
   627  	// leftBoundColMap is a map from opt.ColumnID to opt.ColumnOrdinal that maps
   628  	// a column bound by the left side of this apply join to the column ordinal
   629  	// in the left side that contains the binding.
   630  	var leftBoundColMap opt.ColMap
   631  	for col, ok := leftBoundCols.Next(0); ok; col, ok = leftBoundCols.Next(col + 1) {
   632  		v, ok := leftPlan.outputCols.Get(int(col))
   633  		if !ok {
   634  			return execPlan{}, fmt.Errorf("couldn't find binding column %d in left output columns", col)
   635  		}
   636  		leftBoundColMap.Set(int(col), v)
   637  	}
   638  
   639  	// Now, the cool part! We set up an ApplyJoinPlanRightSideFn which plans the
   640  	// right side given a particular left side row. We do this planning in a
   641  	// separate memo, but we use the same exec.Factory.
   642  	//
   643  	// Note: we put o outside of the function so we allocate it only once.
   644  	var o xform.Optimizer
   645  	planRightSideFn := func(leftRow tree.Datums) (exec.Plan, error) {
   646  		o.Init(b.evalCtx, b.catalog)
   647  		f := o.Factory()
   648  
   649  		// Copy the right expression into a new memo, replacing each bound column
   650  		// with the corresponding value from the left row.
   651  		var replaceFn norm.ReplaceFunc
   652  		replaceFn = func(e opt.Expr) opt.Expr {
   653  			switch t := e.(type) {
   654  			case *memo.VariableExpr:
   655  				if leftOrd, ok := leftBoundColMap.Get(int(t.Col)); ok {
   656  					return f.ConstructConstVal(leftRow[leftOrd], t.Typ)
   657  				}
   658  			}
   659  			return f.CopyAndReplaceDefault(e, replaceFn)
   660  		}
   661  		f.CopyAndReplace(rightExpr, &rightRequiredProps, replaceFn)
   662  
   663  		newRightSide, err := o.Optimize()
   664  		if err != nil {
   665  			return nil, err
   666  		}
   667  
   668  		eb := New(b.factory, f.Memo(), b.catalog, newRightSide, b.evalCtx)
   669  		eb.disableTelemetry = true
   670  		plan, err := eb.Build()
   671  		if err != nil {
   672  			if errors.IsAssertionFailure(err) {
   673  				// Enhance the error with the EXPLAIN (OPT, VERBOSE) of the inner
   674  				// expression.
   675  				fmtFlags := memo.ExprFmtHideQualifications | memo.ExprFmtHideScalars | memo.ExprFmtHideTypes
   676  				explainOpt := o.FormatExpr(newRightSide, fmtFlags)
   677  				err = errors.WithDetailf(err, "newRightSide:\n%s", explainOpt)
   678  			}
   679  			return nil, err
   680  		}
   681  		return plan, nil
   682  	}
   683  
   684  	// The right plan will always produce the columns in the presentation, in
   685  	// the same order.
   686  	var rightOutputCols opt.ColMap
   687  	for i := range rightRequiredProps.Presentation {
   688  		rightOutputCols.Set(int(rightRequiredProps.Presentation[i].ID), i)
   689  	}
   690  	allCols := joinOutputMap(leftPlan.outputCols, rightOutputCols)
   691  
   692  	var onExpr tree.TypedExpr
   693  	if len(*filters) != 0 {
   694  		scalarCtx := buildScalarCtx{
   695  			ivh:     tree.MakeIndexedVarHelper(nil /* container */, numOutputColsInMap(allCols)),
   696  			ivarMap: allCols,
   697  		}
   698  		onExpr, err = b.buildScalar(&scalarCtx, filters)
   699  		if err != nil {
   700  			return execPlan{}, err
   701  		}
   702  	}
   703  
   704  	var outputCols opt.ColMap
   705  	if joinType == sqlbase.LeftSemiJoin || joinType == sqlbase.LeftAntiJoin {
   706  		// For semi and anti join, only the left columns are output.
   707  		outputCols = leftPlan.outputCols
   708  	} else {
   709  		outputCols = allCols
   710  	}
   711  
   712  	ep := execPlan{outputCols: outputCols}
   713  
   714  	ep.root, err = b.factory.ConstructApplyJoin(
   715  		joinType,
   716  		leftPlan.root,
   717  		b.presentationToResultColumns(rightRequiredProps.Presentation),
   718  		onExpr,
   719  		planRightSideFn,
   720  	)
   721  	if err != nil {
   722  		return execPlan{}, err
   723  	}
   724  	return ep, nil
   725  }
   726  
   727  // makePresentation creates a Presentation that contains the given columns, in
   728  // order of their IDs.
   729  func (b *Builder) makePresentation(cols opt.ColSet) physical.Presentation {
   730  	md := b.mem.Metadata()
   731  	result := make(physical.Presentation, 0, cols.Len())
   732  	cols.ForEach(func(col opt.ColumnID) {
   733  		result = append(result, opt.AliasedColumn{
   734  			Alias: md.ColumnMeta(col).Alias,
   735  			ID:    col,
   736  		})
   737  	})
   738  	return result
   739  }
   740  
   741  // presentationToResultColumns returns ResultColumns corresponding to the
   742  // columns in a presentation.
   743  func (b *Builder) presentationToResultColumns(pres physical.Presentation) sqlbase.ResultColumns {
   744  	md := b.mem.Metadata()
   745  	result := make(sqlbase.ResultColumns, len(pres))
   746  	for i := range pres {
   747  		result[i] = sqlbase.ResultColumn{
   748  			Name: pres[i].Alias,
   749  			Typ:  md.ColumnMeta(pres[i].ID).Type,
   750  		}
   751  	}
   752  	return result
   753  }
   754  
   755  func (b *Builder) buildHashJoin(join memo.RelExpr) (execPlan, error) {
   756  	if f := join.Private().(*memo.JoinPrivate).Flags; !f.Has(memo.AllowHashJoinStoreRight) {
   757  		// We need to do a bit of reverse engineering here to determine what the
   758  		// hint was.
   759  		hint := tree.AstLookup
   760  		if f.Has(memo.AllowMergeJoin) {
   761  			hint = tree.AstMerge
   762  		}
   763  
   764  		return execPlan{}, errors.Errorf(
   765  			"could not produce a query plan conforming to the %s JOIN hint", hint,
   766  		)
   767  	}
   768  
   769  	joinType := joinOpToJoinType(join.Op())
   770  	leftExpr := join.Child(0).(memo.RelExpr)
   771  	rightExpr := join.Child(1).(memo.RelExpr)
   772  	filters := join.Child(2).(*memo.FiltersExpr)
   773  
   774  	leftEq, rightEq := memo.ExtractJoinEqualityColumns(
   775  		leftExpr.Relational().OutputCols,
   776  		rightExpr.Relational().OutputCols,
   777  		*filters,
   778  	)
   779  	if !b.disableTelemetry {
   780  		if len(leftEq) > 0 {
   781  			telemetry.Inc(sqltelemetry.JoinAlgoHashUseCounter)
   782  		} else {
   783  			telemetry.Inc(sqltelemetry.JoinAlgoCrossUseCounter)
   784  		}
   785  		telemetry.Inc(opt.JoinTypeToUseCounter(join.Op()))
   786  	}
   787  
   788  	left, right, onExpr, outputCols, err := b.initJoinBuild(
   789  		leftExpr,
   790  		rightExpr,
   791  		memo.ExtractRemainingJoinFilters(*filters, leftEq, rightEq),
   792  		joinType,
   793  	)
   794  	if err != nil {
   795  		return execPlan{}, err
   796  	}
   797  	ep := execPlan{outputCols: outputCols}
   798  
   799  	// Convert leftEq/rightEq to ordinals.
   800  	eqColsBuf := make([]exec.NodeColumnOrdinal, 2*len(leftEq))
   801  	leftEqOrdinals := eqColsBuf[:len(leftEq):len(leftEq)]
   802  	rightEqOrdinals := eqColsBuf[len(leftEq):]
   803  	for i := range leftEq {
   804  		leftEqOrdinals[i] = left.getNodeColumnOrdinal(leftEq[i])
   805  		rightEqOrdinals[i] = right.getNodeColumnOrdinal(rightEq[i])
   806  	}
   807  
   808  	leftEqColsAreKey := leftExpr.Relational().FuncDeps.ColsAreStrictKey(leftEq.ToSet())
   809  	rightEqColsAreKey := rightExpr.Relational().FuncDeps.ColsAreStrictKey(rightEq.ToSet())
   810  
   811  	ep.root, err = b.factory.ConstructHashJoin(
   812  		joinType,
   813  		left.root, right.root,
   814  		leftEqOrdinals, rightEqOrdinals,
   815  		leftEqColsAreKey, rightEqColsAreKey,
   816  		onExpr,
   817  	)
   818  	if err != nil {
   819  		return execPlan{}, err
   820  	}
   821  	return ep, nil
   822  }
   823  
   824  func (b *Builder) buildMergeJoin(join *memo.MergeJoinExpr) (execPlan, error) {
   825  	if !b.disableTelemetry {
   826  		telemetry.Inc(sqltelemetry.JoinAlgoMergeUseCounter)
   827  		telemetry.Inc(opt.JoinTypeToUseCounter(join.JoinType))
   828  	}
   829  
   830  	joinType := joinOpToJoinType(join.JoinType)
   831  
   832  	left, right, onExpr, outputCols, err := b.initJoinBuild(
   833  		join.Left, join.Right, join.On, joinType,
   834  	)
   835  	if err != nil {
   836  		return execPlan{}, err
   837  	}
   838  	leftOrd := left.sqlOrdering(join.LeftEq)
   839  	rightOrd := right.sqlOrdering(join.RightEq)
   840  	ep := execPlan{outputCols: outputCols}
   841  	reqOrd := ep.reqOrdering(join)
   842  	leftEqColsAreKey := join.Left.Relational().FuncDeps.ColsAreStrictKey(join.LeftEq.ColSet())
   843  	rightEqColsAreKey := join.Right.Relational().FuncDeps.ColsAreStrictKey(join.RightEq.ColSet())
   844  	ep.root, err = b.factory.ConstructMergeJoin(
   845  		joinType,
   846  		left.root, right.root,
   847  		onExpr,
   848  		leftOrd, rightOrd, reqOrd,
   849  		leftEqColsAreKey, rightEqColsAreKey,
   850  	)
   851  	if err != nil {
   852  		return execPlan{}, err
   853  	}
   854  	return ep, nil
   855  }
   856  
   857  // initJoinBuild builds the inputs to the join as well as the ON expression.
   858  func (b *Builder) initJoinBuild(
   859  	leftChild memo.RelExpr,
   860  	rightChild memo.RelExpr,
   861  	filters memo.FiltersExpr,
   862  	joinType sqlbase.JoinType,
   863  ) (leftPlan, rightPlan execPlan, onExpr tree.TypedExpr, outputCols opt.ColMap, _ error) {
   864  	leftPlan, err := b.buildRelational(leftChild)
   865  	if err != nil {
   866  		return execPlan{}, execPlan{}, nil, opt.ColMap{}, err
   867  	}
   868  	rightPlan, err = b.buildRelational(rightChild)
   869  	if err != nil {
   870  		return execPlan{}, execPlan{}, nil, opt.ColMap{}, err
   871  	}
   872  
   873  	allCols := joinOutputMap(leftPlan.outputCols, rightPlan.outputCols)
   874  
   875  	ctx := buildScalarCtx{
   876  		ivh:     tree.MakeIndexedVarHelper(nil /* container */, numOutputColsInMap(allCols)),
   877  		ivarMap: allCols,
   878  	}
   879  
   880  	if len(filters) != 0 {
   881  		onExpr, err = b.buildScalar(&ctx, &filters)
   882  		if err != nil {
   883  			return execPlan{}, execPlan{}, nil, opt.ColMap{}, err
   884  		}
   885  	}
   886  
   887  	if joinType == sqlbase.LeftSemiJoin || joinType == sqlbase.LeftAntiJoin {
   888  		// For semi and anti join, only the left columns are output.
   889  		return leftPlan, rightPlan, onExpr, leftPlan.outputCols, nil
   890  	}
   891  	return leftPlan, rightPlan, onExpr, allCols, nil
   892  }
   893  
   894  // joinOutputMap determines the outputCols map for a (non-semi/anti) join, given
   895  // the outputCols maps for its inputs.
   896  func joinOutputMap(left, right opt.ColMap) opt.ColMap {
   897  	numLeftCols := numOutputColsInMap(left)
   898  
   899  	res := left.Copy()
   900  	right.ForEach(func(colIdx, rightIdx int) {
   901  		res.Set(colIdx, rightIdx+numLeftCols)
   902  	})
   903  	return res
   904  }
   905  
   906  func joinOpToJoinType(op opt.Operator) sqlbase.JoinType {
   907  	switch op {
   908  	case opt.InnerJoinOp, opt.InnerJoinApplyOp:
   909  		return sqlbase.InnerJoin
   910  
   911  	case opt.LeftJoinOp, opt.LeftJoinApplyOp:
   912  		return sqlbase.LeftOuterJoin
   913  
   914  	case opt.RightJoinOp:
   915  		return sqlbase.RightOuterJoin
   916  
   917  	case opt.FullJoinOp:
   918  		return sqlbase.FullOuterJoin
   919  
   920  	case opt.SemiJoinOp, opt.SemiJoinApplyOp:
   921  		return sqlbase.LeftSemiJoin
   922  
   923  	case opt.AntiJoinOp, opt.AntiJoinApplyOp:
   924  		return sqlbase.LeftAntiJoin
   925  
   926  	default:
   927  		panic(errors.AssertionFailedf("not a join op %s", log.Safe(op)))
   928  	}
   929  }
   930  
   931  func (b *Builder) buildGroupBy(groupBy memo.RelExpr) (execPlan, error) {
   932  	input, err := b.buildGroupByInput(groupBy)
   933  	if err != nil {
   934  		return execPlan{}, err
   935  	}
   936  
   937  	var ep execPlan
   938  	groupingCols := groupBy.Private().(*memo.GroupingPrivate).GroupingCols
   939  	groupingColIdx := make([]exec.NodeColumnOrdinal, 0, groupingCols.Len())
   940  	for i, ok := groupingCols.Next(0); ok; i, ok = groupingCols.Next(i + 1) {
   941  		ep.outputCols.Set(int(i), len(groupingColIdx))
   942  		groupingColIdx = append(groupingColIdx, input.getNodeColumnOrdinal(i))
   943  	}
   944  
   945  	aggregations := *groupBy.Child(1).(*memo.AggregationsExpr)
   946  	aggInfos := make([]exec.AggInfo, len(aggregations))
   947  	for i := range aggregations {
   948  		item := &aggregations[i]
   949  		agg := item.Agg
   950  
   951  		var filterOrd exec.NodeColumnOrdinal = -1
   952  		if aggFilter, ok := agg.(*memo.AggFilterExpr); ok {
   953  			filter, ok := aggFilter.Filter.(*memo.VariableExpr)
   954  			if !ok {
   955  				return execPlan{}, errors.AssertionFailedf("only VariableOp args supported")
   956  			}
   957  			filterOrd = input.getNodeColumnOrdinal(filter.Col)
   958  			agg = aggFilter.Input
   959  		}
   960  
   961  		distinct := false
   962  		if aggDistinct, ok := agg.(*memo.AggDistinctExpr); ok {
   963  			distinct = true
   964  			agg = aggDistinct.Input
   965  		}
   966  
   967  		name, overload := memo.FindAggregateOverload(agg)
   968  
   969  		// Accumulate variable arguments in argCols and constant arguments in
   970  		// constArgs. Constant arguments must follow variable arguments.
   971  		var argCols []exec.NodeColumnOrdinal
   972  		var constArgs tree.Datums
   973  		for j, n := 0, agg.ChildCount(); j < n; j++ {
   974  			child := agg.Child(j)
   975  			if variable, ok := child.(*memo.VariableExpr); ok {
   976  				if len(constArgs) != 0 {
   977  					return execPlan{}, errors.Errorf("constant args must come after variable args")
   978  				}
   979  				argCols = append(argCols, input.getNodeColumnOrdinal(variable.Col))
   980  			} else {
   981  				if len(argCols) == 0 {
   982  					return execPlan{}, errors.Errorf("a constant arg requires at least one variable arg")
   983  				}
   984  				constArgs = append(constArgs, memo.ExtractConstDatum(child))
   985  			}
   986  		}
   987  
   988  		aggInfos[i] = exec.AggInfo{
   989  			FuncName:   name,
   990  			Builtin:    overload,
   991  			Distinct:   distinct,
   992  			ResultType: item.Agg.DataType(),
   993  			ArgCols:    argCols,
   994  			ConstArgs:  constArgs,
   995  			Filter:     filterOrd,
   996  		}
   997  		ep.outputCols.Set(int(item.Col), len(groupingColIdx)+i)
   998  	}
   999  
  1000  	if groupBy.Op() == opt.ScalarGroupByOp {
  1001  		ep.root, err = b.factory.ConstructScalarGroupBy(input.root, aggInfos)
  1002  	} else {
  1003  		groupBy := groupBy.(*memo.GroupByExpr)
  1004  		groupingColOrder := input.sqlOrdering(ordering.StreamingGroupingColOrdering(
  1005  			&groupBy.GroupingPrivate, &groupBy.RequiredPhysical().Ordering,
  1006  		))
  1007  		reqOrdering := ep.reqOrdering(groupBy)
  1008  		ep.root, err = b.factory.ConstructGroupBy(
  1009  			input.root, groupingColIdx, groupingColOrder, aggInfos, reqOrdering,
  1010  		)
  1011  	}
  1012  	if err != nil {
  1013  		return execPlan{}, err
  1014  	}
  1015  	return ep, nil
  1016  }
  1017  
  1018  func (b *Builder) buildDistinct(distinct memo.RelExpr) (execPlan, error) {
  1019  	private := distinct.Private().(*memo.GroupingPrivate)
  1020  
  1021  	if private.GroupingCols.Empty() {
  1022  		// A DistinctOn with no grouping columns should have been converted to a
  1023  		// LIMIT 1 or Max1Row by normalization rules.
  1024  		return execPlan{}, fmt.Errorf("cannot execute distinct on no columns")
  1025  	}
  1026  	input, err := b.buildGroupByInput(distinct)
  1027  	if err != nil {
  1028  		return execPlan{}, err
  1029  	}
  1030  
  1031  	distinctCols := input.getNodeColumnOrdinalSet(private.GroupingCols)
  1032  	var orderedCols exec.NodeColumnOrdinalSet
  1033  	ordering := ordering.StreamingGroupingColOrdering(
  1034  		private, &distinct.RequiredPhysical().Ordering,
  1035  	)
  1036  	for i := range ordering {
  1037  		orderedCols.Add(int(input.getNodeColumnOrdinal(ordering[i].ID())))
  1038  	}
  1039  	ep := execPlan{outputCols: input.outputCols}
  1040  
  1041  	reqOrdering := ep.reqOrdering(distinct)
  1042  	ep.root, err = b.factory.ConstructDistinct(
  1043  		input.root, distinctCols, orderedCols, reqOrdering,
  1044  		private.NullsAreDistinct, private.ErrorOnDup)
  1045  	if err != nil {
  1046  		return execPlan{}, err
  1047  	}
  1048  
  1049  	// buildGroupByInput can add extra sort column(s), so discard those if they
  1050  	// are present by using an additional projection.
  1051  	outCols := distinct.Relational().OutputCols
  1052  	if input.outputCols.Len() == outCols.Len() {
  1053  		return ep, nil
  1054  	}
  1055  	return b.ensureColumns(
  1056  		ep, opt.ColSetToList(outCols), nil /* colNames */, distinct.ProvidedPhysical().Ordering,
  1057  	)
  1058  }
  1059  
  1060  func (b *Builder) buildGroupByInput(groupBy memo.RelExpr) (execPlan, error) {
  1061  	groupByInput := groupBy.Child(0).(memo.RelExpr)
  1062  	input, err := b.buildRelational(groupByInput)
  1063  	if err != nil {
  1064  		return execPlan{}, err
  1065  	}
  1066  
  1067  	// TODO(radu): this is a one-off fix for an otherwise bigger gap: we should
  1068  	// have a more general mechanism (through physical properties or otherwise) to
  1069  	// figure out unneeded columns and project them away as necessary. The
  1070  	// optimizer doesn't guarantee that it adds ProjectOps everywhere.
  1071  	//
  1072  	// We address just the GroupBy case for now because there is a particularly
  1073  	// important case with COUNT(*) where we can remove all input columns, which
  1074  	// leads to significant speedup.
  1075  	private := groupBy.Private().(*memo.GroupingPrivate)
  1076  	neededCols := private.GroupingCols.Copy()
  1077  	aggs := *groupBy.Child(1).(*memo.AggregationsExpr)
  1078  	for i := range aggs {
  1079  		neededCols.UnionWith(memo.ExtractAggInputColumns(aggs[i].Agg))
  1080  	}
  1081  
  1082  	// In rare cases, we might need a column only for its ordering, for example:
  1083  	//   SELECT concat_agg(s) FROM (SELECT s FROM kv ORDER BY k)
  1084  	// In this case we can't project the column away as it is still needed by
  1085  	// distsql to maintain the desired ordering.
  1086  	for _, c := range groupByInput.ProvidedPhysical().Ordering {
  1087  		neededCols.Add(c.ID())
  1088  	}
  1089  
  1090  	if neededCols.Equals(groupByInput.Relational().OutputCols) {
  1091  		// All columns produced by the input are used.
  1092  		return input, nil
  1093  	}
  1094  
  1095  	// The input is producing columns that are not useful; set up a projection.
  1096  	cols := make([]exec.NodeColumnOrdinal, 0, neededCols.Len())
  1097  	var newOutputCols opt.ColMap
  1098  	for colID, ok := neededCols.Next(0); ok; colID, ok = neededCols.Next(colID + 1) {
  1099  		ordinal, ordOk := input.outputCols.Get(int(colID))
  1100  		if !ordOk {
  1101  			panic(errors.AssertionFailedf("needed column not produced by group-by input"))
  1102  		}
  1103  		newOutputCols.Set(int(colID), len(cols))
  1104  		cols = append(cols, exec.NodeColumnOrdinal(ordinal))
  1105  	}
  1106  
  1107  	input.outputCols = newOutputCols
  1108  	reqOrdering := input.reqOrdering(groupByInput)
  1109  	input.root, err = b.factory.ConstructSimpleProject(
  1110  		input.root, cols, nil /* colNames */, reqOrdering,
  1111  	)
  1112  	if err != nil {
  1113  		return execPlan{}, err
  1114  	}
  1115  	return input, nil
  1116  }
  1117  
  1118  func (b *Builder) buildSetOp(set memo.RelExpr) (execPlan, error) {
  1119  	leftExpr := set.Child(0).(memo.RelExpr)
  1120  	left, err := b.buildRelational(leftExpr)
  1121  	if err != nil {
  1122  		return execPlan{}, err
  1123  	}
  1124  	rightExpr := set.Child(1).(memo.RelExpr)
  1125  	right, err := b.buildRelational(rightExpr)
  1126  	if err != nil {
  1127  		return execPlan{}, err
  1128  	}
  1129  
  1130  	private := set.Private().(*memo.SetPrivate)
  1131  
  1132  	// We need to make sure that the two sides render the columns in the same
  1133  	// order; otherwise we add projections.
  1134  	//
  1135  	// In most cases the projection is needed only to reorder the columns, but not
  1136  	// always. For example:
  1137  	//  (SELECT a, a, b FROM ab) UNION (SELECT x, y, z FROM xyz)
  1138  	// The left input could be just a scan that produces two columns.
  1139  	//
  1140  	// TODO(radu): we don't have to respect the exact order in the two ColLists;
  1141  	// if one side has the right columns but in a different permutation, we could
  1142  	// set up a matching projection on the other side. For example:
  1143  	//   (SELECT b, c, a FROM abc) UNION (SELECT z, y, x FROM xyz)
  1144  	// The expression for this could be a UnionOp on top of two ScanOps (any
  1145  	// internal projections could be removed by normalization rules).
  1146  	// The scans produce columns `a, b, c` and `x, y, z` respectively. We could
  1147  	// leave `b, c, a` as is and project the other side to `x, z, y`.
  1148  	// Note that (unless this is part of a larger query) the presentation property
  1149  	// will ensure that the columns are presented correctly in the output (i.e. in
  1150  	// the order `b, c, a`).
  1151  	left, err = b.ensureColumns(
  1152  		left, private.LeftCols, nil /* colNames */, leftExpr.ProvidedPhysical().Ordering,
  1153  	)
  1154  	if err != nil {
  1155  		return execPlan{}, err
  1156  	}
  1157  	right, err = b.ensureColumns(
  1158  		right, private.RightCols, nil /* colNames */, rightExpr.ProvidedPhysical().Ordering,
  1159  	)
  1160  	if err != nil {
  1161  		return execPlan{}, err
  1162  	}
  1163  
  1164  	var typ tree.UnionType
  1165  	var all bool
  1166  	switch set.Op() {
  1167  	case opt.UnionOp:
  1168  		typ, all = tree.UnionOp, false
  1169  	case opt.UnionAllOp:
  1170  		typ, all = tree.UnionOp, true
  1171  	case opt.IntersectOp:
  1172  		typ, all = tree.IntersectOp, false
  1173  	case opt.IntersectAllOp:
  1174  		typ, all = tree.IntersectOp, true
  1175  	case opt.ExceptOp:
  1176  		typ, all = tree.ExceptOp, false
  1177  	case opt.ExceptAllOp:
  1178  		typ, all = tree.ExceptOp, true
  1179  	default:
  1180  		panic(errors.AssertionFailedf("invalid operator %s", log.Safe(set.Op())))
  1181  	}
  1182  
  1183  	node, err := b.factory.ConstructSetOp(typ, all, left.root, right.root)
  1184  	if err != nil {
  1185  		return execPlan{}, err
  1186  	}
  1187  	ep := execPlan{root: node}
  1188  	for i, col := range private.OutCols {
  1189  		ep.outputCols.Set(int(col), i)
  1190  	}
  1191  	return ep, nil
  1192  }
  1193  
  1194  // buildLimitOffset builds a plan for a LimitOp or OffsetOp
  1195  func (b *Builder) buildLimitOffset(e memo.RelExpr) (execPlan, error) {
  1196  	input, err := b.buildRelational(e.Child(0).(memo.RelExpr))
  1197  	if err != nil {
  1198  		return execPlan{}, err
  1199  	}
  1200  	// LIMIT/OFFSET expression should never need buildScalarContext, because it
  1201  	// can't refer to the input expression.
  1202  	expr, err := b.buildScalar(nil, e.Child(1).(opt.ScalarExpr))
  1203  	if err != nil {
  1204  		return execPlan{}, err
  1205  	}
  1206  	var node exec.Node
  1207  	if e.Op() == opt.LimitOp {
  1208  		node, err = b.factory.ConstructLimit(input.root, expr, nil)
  1209  	} else {
  1210  		node, err = b.factory.ConstructLimit(input.root, nil, expr)
  1211  	}
  1212  	if err != nil {
  1213  		return execPlan{}, err
  1214  	}
  1215  	return execPlan{root: node, outputCols: input.outputCols}, nil
  1216  }
  1217  
  1218  func (b *Builder) buildSort(sort *memo.SortExpr) (execPlan, error) {
  1219  	input, err := b.buildRelational(sort.Input)
  1220  	if err != nil {
  1221  		return execPlan{}, err
  1222  	}
  1223  
  1224  	ordering := sort.ProvidedPhysical().Ordering
  1225  	inputOrdering := sort.Input.ProvidedPhysical().Ordering
  1226  	alreadyOrderedPrefix := 0
  1227  	for i := range inputOrdering {
  1228  		if i == len(ordering) {
  1229  			return execPlan{}, errors.AssertionFailedf("sort ordering already provided by input")
  1230  		}
  1231  		if inputOrdering[i] != ordering[i] {
  1232  			break
  1233  		}
  1234  		alreadyOrderedPrefix = i + 1
  1235  	}
  1236  
  1237  	node, err := b.factory.ConstructSort(input.root, input.sqlOrdering(ordering), alreadyOrderedPrefix)
  1238  	if err != nil {
  1239  		return execPlan{}, err
  1240  	}
  1241  	return execPlan{root: node, outputCols: input.outputCols}, nil
  1242  }
  1243  
  1244  func (b *Builder) buildOrdinality(ord *memo.OrdinalityExpr) (execPlan, error) {
  1245  	input, err := b.buildRelational(ord.Input)
  1246  	if err != nil {
  1247  		return execPlan{}, err
  1248  	}
  1249  
  1250  	colName := b.mem.Metadata().ColumnMeta(ord.ColID).Alias
  1251  
  1252  	node, err := b.factory.ConstructOrdinality(input.root, colName)
  1253  	if err != nil {
  1254  		return execPlan{}, err
  1255  	}
  1256  
  1257  	// We have one additional ordinality column, which is ordered at the end of
  1258  	// the list.
  1259  	outputCols := input.outputCols.Copy()
  1260  	outputCols.Set(int(ord.ColID), outputCols.Len())
  1261  
  1262  	return execPlan{root: node, outputCols: outputCols}, nil
  1263  }
  1264  
  1265  func (b *Builder) buildIndexJoin(join *memo.IndexJoinExpr) (execPlan, error) {
  1266  	input, err := b.buildRelational(join.Input)
  1267  	if err != nil {
  1268  		return execPlan{}, err
  1269  	}
  1270  
  1271  	md := b.mem.Metadata()
  1272  	tab := md.Table(join.Table)
  1273  
  1274  	// TODO(radu): the distsql implementation of index join assumes that the input
  1275  	// starts with the PK columns in order (#40749).
  1276  	pri := tab.Index(cat.PrimaryIndex)
  1277  	keyCols := make([]exec.NodeColumnOrdinal, pri.KeyColumnCount())
  1278  	for i := range keyCols {
  1279  		keyCols[i] = input.getNodeColumnOrdinal(join.Table.ColumnID(pri.Column(i).Ordinal))
  1280  	}
  1281  
  1282  	cols := join.Cols
  1283  	needed, output := b.getColumns(cols, join.Table)
  1284  	res := execPlan{outputCols: output}
  1285  	res.root, err = b.factory.ConstructIndexJoin(
  1286  		input.root, tab, keyCols, needed, res.reqOrdering(join),
  1287  	)
  1288  	if err != nil {
  1289  		return execPlan{}, err
  1290  	}
  1291  
  1292  	return res, nil
  1293  }
  1294  
  1295  func (b *Builder) buildLookupJoin(join *memo.LookupJoinExpr) (execPlan, error) {
  1296  	if !b.disableTelemetry {
  1297  		telemetry.Inc(sqltelemetry.JoinAlgoLookupUseCounter)
  1298  		telemetry.Inc(opt.JoinTypeToUseCounter(join.JoinType))
  1299  	}
  1300  
  1301  	input, err := b.buildRelational(join.Input)
  1302  	if err != nil {
  1303  		return execPlan{}, err
  1304  	}
  1305  
  1306  	md := b.mem.Metadata()
  1307  
  1308  	keyCols := make([]exec.NodeColumnOrdinal, len(join.KeyCols))
  1309  	for i, c := range join.KeyCols {
  1310  		keyCols[i] = input.getNodeColumnOrdinal(c)
  1311  	}
  1312  
  1313  	inputCols := join.Input.Relational().OutputCols
  1314  	lookupCols := join.Cols.Difference(inputCols)
  1315  
  1316  	lookupOrdinals, lookupColMap := b.getColumns(lookupCols, join.Table)
  1317  	allCols := joinOutputMap(input.outputCols, lookupColMap)
  1318  
  1319  	res := execPlan{outputCols: allCols}
  1320  	if join.JoinType == opt.SemiJoinOp || join.JoinType == opt.AntiJoinOp {
  1321  		// For semi and anti join, only the left columns are output.
  1322  		res.outputCols = input.outputCols
  1323  	}
  1324  
  1325  	ctx := buildScalarCtx{
  1326  		ivh:     tree.MakeIndexedVarHelper(nil /* container */, allCols.Len()),
  1327  		ivarMap: allCols,
  1328  	}
  1329  	onExpr, err := b.buildScalar(&ctx, &join.On)
  1330  	if err != nil {
  1331  		return execPlan{}, err
  1332  	}
  1333  
  1334  	tab := md.Table(join.Table)
  1335  	idx := tab.Index(join.Index)
  1336  	var eqCols opt.ColSet
  1337  	for i := range join.KeyCols {
  1338  		eqCols.Add(join.Table.ColumnID(idx.Column(i).Ordinal))
  1339  	}
  1340  
  1341  	res.root, err = b.factory.ConstructLookupJoin(
  1342  		joinOpToJoinType(join.JoinType),
  1343  		input.root,
  1344  		tab,
  1345  		idx,
  1346  		keyCols,
  1347  		join.LookupColsAreTableKey,
  1348  		lookupOrdinals,
  1349  		onExpr,
  1350  		res.reqOrdering(join),
  1351  	)
  1352  	if err != nil {
  1353  		return execPlan{}, err
  1354  	}
  1355  
  1356  	// Apply a post-projection if Cols doesn't contain all input columns.
  1357  	if !inputCols.SubsetOf(join.Cols) {
  1358  		return b.applySimpleProject(res, join.Cols, join.ProvidedPhysical().Ordering)
  1359  	}
  1360  	return res, nil
  1361  }
  1362  
  1363  func (b *Builder) buildGeoLookupJoin(join *memo.GeoLookupJoinExpr) (execPlan, error) {
  1364  	input, err := b.buildRelational(join.Input)
  1365  	if err != nil {
  1366  		return execPlan{}, err
  1367  	}
  1368  
  1369  	md := b.mem.Metadata()
  1370  
  1371  	inputCols := join.Input.Relational().OutputCols
  1372  	lookupCols := join.Cols.Difference(inputCols)
  1373  
  1374  	lookupOrdinals, lookupColMap := b.getColumns(lookupCols, join.Table)
  1375  	allCols := joinOutputMap(input.outputCols, lookupColMap)
  1376  
  1377  	res := execPlan{outputCols: allCols}
  1378  	if join.JoinType == opt.SemiJoinOp || join.JoinType == opt.AntiJoinOp {
  1379  		// For semi and anti join, only the left columns are output.
  1380  		res.outputCols = input.outputCols
  1381  	}
  1382  
  1383  	ctx := buildScalarCtx{
  1384  		ivh:     tree.MakeIndexedVarHelper(nil /* container */, allCols.Len()),
  1385  		ivarMap: allCols,
  1386  	}
  1387  	onExpr, err := b.buildScalar(&ctx, &join.On)
  1388  	if err != nil {
  1389  		return execPlan{}, err
  1390  	}
  1391  
  1392  	tab := md.Table(join.Table)
  1393  	idx := tab.Index(join.Index)
  1394  
  1395  	res.root, err = b.factory.ConstructGeoLookupJoin(
  1396  		joinOpToJoinType(join.JoinType),
  1397  		join.GeoRelationshipType,
  1398  		input.root,
  1399  		tab,
  1400  		idx,
  1401  		input.getNodeColumnOrdinal(join.GeoCol),
  1402  		lookupOrdinals,
  1403  		onExpr,
  1404  		res.reqOrdering(join),
  1405  	)
  1406  	if err != nil {
  1407  		return execPlan{}, err
  1408  	}
  1409  
  1410  	// Apply a post-projection if Cols doesn't contain all input columns.
  1411  	if !inputCols.SubsetOf(join.Cols) {
  1412  		return b.applySimpleProject(res, join.Cols, join.ProvidedPhysical().Ordering)
  1413  	}
  1414  	return res, nil
  1415  }
  1416  
  1417  func (b *Builder) buildZigzagJoin(join *memo.ZigzagJoinExpr) (execPlan, error) {
  1418  	md := b.mem.Metadata()
  1419  
  1420  	leftTable := md.Table(join.LeftTable)
  1421  	rightTable := md.Table(join.RightTable)
  1422  	leftIndex := leftTable.Index(join.LeftIndex)
  1423  	rightIndex := rightTable.Index(join.RightIndex)
  1424  
  1425  	leftEqCols := make([]exec.NodeColumnOrdinal, len(join.LeftEqCols))
  1426  	rightEqCols := make([]exec.NodeColumnOrdinal, len(join.RightEqCols))
  1427  	for i := range join.LeftEqCols {
  1428  		leftEqCols[i] = exec.NodeColumnOrdinal(join.LeftTable.ColumnOrdinal(join.LeftEqCols[i]))
  1429  		rightEqCols[i] = exec.NodeColumnOrdinal(join.RightTable.ColumnOrdinal(join.RightEqCols[i]))
  1430  	}
  1431  	leftCols := md.TableMeta(join.LeftTable).IndexColumns(join.LeftIndex).Intersection(join.Cols)
  1432  	rightCols := md.TableMeta(join.RightTable).IndexColumns(join.RightIndex).Intersection(join.Cols)
  1433  	// Remove duplicate columns, if any.
  1434  	rightCols.DifferenceWith(leftCols)
  1435  
  1436  	leftOrdinals, leftColMap := b.getColumns(leftCols, join.LeftTable)
  1437  	rightOrdinals, rightColMap := b.getColumns(rightCols, join.RightTable)
  1438  
  1439  	allCols := joinOutputMap(leftColMap, rightColMap)
  1440  
  1441  	res := execPlan{outputCols: allCols}
  1442  
  1443  	ctx := buildScalarCtx{
  1444  		ivh:     tree.MakeIndexedVarHelper(nil /* container */, leftColMap.Len()+rightColMap.Len()),
  1445  		ivarMap: allCols,
  1446  	}
  1447  	onExpr, err := b.buildScalar(&ctx, &join.On)
  1448  	if err != nil {
  1449  		return execPlan{}, err
  1450  	}
  1451  
  1452  	// Build the fixed value scalars. These are represented as one value node
  1453  	// per side of the join, containing one row/tuple with fixed values for
  1454  	// a prefix of that index's columns.
  1455  	fixedVals := make([]exec.Node, 2)
  1456  	fixedCols := []opt.ColList{join.LeftFixedCols, join.RightFixedCols}
  1457  	for i := range join.FixedVals {
  1458  		tup := join.FixedVals[i].(*memo.TupleExpr)
  1459  		valExprs := make([]tree.TypedExpr, len(tup.Elems))
  1460  		for j := range tup.Elems {
  1461  			valExprs[j], err = b.buildScalar(&ctx, tup.Elems[j])
  1462  			if err != nil {
  1463  				return execPlan{}, err
  1464  			}
  1465  		}
  1466  		valuesPlan, err := b.constructValues([][]tree.TypedExpr{valExprs}, fixedCols[i])
  1467  		if err != nil {
  1468  			return execPlan{}, err
  1469  		}
  1470  		fixedVals[i] = valuesPlan.root
  1471  	}
  1472  
  1473  	res.root, err = b.factory.ConstructZigzagJoin(
  1474  		leftTable,
  1475  		leftIndex,
  1476  		rightTable,
  1477  		rightIndex,
  1478  		leftEqCols,
  1479  		rightEqCols,
  1480  		leftOrdinals,
  1481  		rightOrdinals,
  1482  		onExpr,
  1483  		fixedVals,
  1484  		res.reqOrdering(join),
  1485  	)
  1486  	if err != nil {
  1487  		return execPlan{}, err
  1488  	}
  1489  
  1490  	return res, nil
  1491  }
  1492  
  1493  func (b *Builder) buildMax1Row(max1Row *memo.Max1RowExpr) (execPlan, error) {
  1494  	input, err := b.buildRelational(max1Row.Input)
  1495  	if err != nil {
  1496  		return execPlan{}, err
  1497  	}
  1498  
  1499  	node, err := b.factory.ConstructMax1Row(input.root, max1Row.ErrorText)
  1500  	if err != nil {
  1501  		return execPlan{}, err
  1502  	}
  1503  	return execPlan{root: node, outputCols: input.outputCols}, nil
  1504  }
  1505  
  1506  func (b *Builder) buildWith(with *memo.WithExpr) (execPlan, error) {
  1507  	value, err := b.buildRelational(with.Binding)
  1508  	if err != nil {
  1509  		return execPlan{}, err
  1510  	}
  1511  
  1512  	var label bytes.Buffer
  1513  	fmt.Fprintf(&label, "buffer %d", with.ID)
  1514  	if with.Name != "" {
  1515  		fmt.Fprintf(&label, " (%s)", with.Name)
  1516  	}
  1517  
  1518  	buffer, err := b.factory.ConstructBuffer(value.root, label.String())
  1519  	if err != nil {
  1520  		return execPlan{}, err
  1521  	}
  1522  
  1523  	// TODO(justin): if the binding here has a spoolNode at its root, we can
  1524  	// remove it, since subquery execution also guarantees complete execution.
  1525  
  1526  	// Add the buffer as a subquery so it gets executed ahead of time, and is
  1527  	// available to be referenced by other queries.
  1528  	b.subqueries = append(b.subqueries, exec.Subquery{
  1529  		ExprNode: with.OriginalExpr,
  1530  		// TODO(justin): this is wasteful: both the subquery and the bufferNode
  1531  		// will buffer up all the results.  This should be fixed by either making
  1532  		// the buffer point directly to the subquery results or adding a new
  1533  		// subquery mode that reads and discards all rows. This could possibly also
  1534  		// be fixed by ensuring that bufferNode exhausts its input (and forcing it
  1535  		// to behave like a spoolNode) and using the EXISTS mode.
  1536  		Mode: exec.SubqueryAllRows,
  1537  		Root: buffer,
  1538  	})
  1539  
  1540  	b.addBuiltWithExpr(with.ID, value.outputCols, buffer)
  1541  
  1542  	return b.buildRelational(with.Main)
  1543  }
  1544  
  1545  func (b *Builder) buildRecursiveCTE(rec *memo.RecursiveCTEExpr) (execPlan, error) {
  1546  	initial, err := b.buildRelational(rec.Initial)
  1547  	if err != nil {
  1548  		return execPlan{}, err
  1549  	}
  1550  
  1551  	// Make sure we have the columns in the correct order.
  1552  	initial, err = b.ensureColumns(initial, rec.InitialCols, nil /* colNames */, nil /* ordering */)
  1553  	if err != nil {
  1554  		return execPlan{}, err
  1555  	}
  1556  
  1557  	// Renumber the columns so they match the columns expected by the recursive
  1558  	// query.
  1559  	initial.outputCols = util.FastIntMap{}
  1560  	for i, col := range rec.OutCols {
  1561  		initial.outputCols.Set(int(col), i)
  1562  	}
  1563  
  1564  	// To implement exec.RecursiveCTEIterationFn, we create a special Builder.
  1565  
  1566  	innerBldTemplate := &Builder{
  1567  		factory: b.factory,
  1568  		mem:     b.mem,
  1569  		catalog: b.catalog,
  1570  		evalCtx: b.evalCtx,
  1571  		// If the recursive query itself contains CTEs, building it in the function
  1572  		// below will add to withExprs. Cap the slice to force reallocation on any
  1573  		// appends, so that they don't overwrite overwrite later appends by our
  1574  		// original builder.
  1575  		withExprs: b.withExprs[:len(b.withExprs):len(b.withExprs)],
  1576  	}
  1577  
  1578  	fn := func(bufferRef exec.BufferNode) (exec.Plan, error) {
  1579  		// Use a separate builder each time.
  1580  		innerBld := *innerBldTemplate
  1581  		innerBld.addBuiltWithExpr(rec.WithID, initial.outputCols, bufferRef)
  1582  		plan, err := innerBld.build(rec.Recursive)
  1583  		if err != nil {
  1584  			return nil, err
  1585  		}
  1586  		// Ensure columns are output in the same order.
  1587  		plan, err = innerBld.ensureColumns(
  1588  			plan, rec.RecursiveCols, nil /* colNames */, nil, /* ordering */
  1589  		)
  1590  		if err != nil {
  1591  			return nil, err
  1592  		}
  1593  		return innerBld.factory.ConstructPlan(plan.root, innerBld.subqueries, innerBld.cascades, innerBld.checks)
  1594  	}
  1595  
  1596  	label := fmt.Sprintf("working buffer (%s)", rec.Name)
  1597  	var ep execPlan
  1598  	ep.root, err = b.factory.ConstructRecursiveCTE(initial.root, fn, label)
  1599  	if err != nil {
  1600  		return execPlan{}, err
  1601  	}
  1602  	for i, col := range rec.OutCols {
  1603  		ep.outputCols.Set(int(col), i)
  1604  	}
  1605  	return ep, nil
  1606  }
  1607  
  1608  func (b *Builder) buildWithScan(withScan *memo.WithScanExpr) (execPlan, error) {
  1609  	e := b.findBuiltWithExpr(withScan.With)
  1610  	if e == nil {
  1611  		err := errors.WithHint(
  1612  			errors.Errorf("couldn't find WITH expression %q with ID %d", withScan.Name, withScan.With),
  1613  			"references to WITH expressions from correlated subqueries are unsupported",
  1614  		)
  1615  		return execPlan{}, err
  1616  	}
  1617  
  1618  	var label bytes.Buffer
  1619  	fmt.Fprintf(&label, "buffer %d", withScan.With)
  1620  	if withScan.Name != "" {
  1621  		fmt.Fprintf(&label, " (%s)", withScan.Name)
  1622  	}
  1623  
  1624  	node, err := b.factory.ConstructScanBuffer(e.bufferNode, label.String())
  1625  	if err != nil {
  1626  		return execPlan{}, err
  1627  	}
  1628  	res := execPlan{root: node}
  1629  
  1630  	if maxVal, _ := e.outputCols.MaxValue(); len(withScan.InCols) == maxVal+1 {
  1631  		// We are outputting all columns. Just set up the map.
  1632  
  1633  		// The ColumnIDs from the With expression need to get remapped according to
  1634  		// the mapping in the withScan to get the actual colMap for this expression.
  1635  		for i := range withScan.InCols {
  1636  			idx, _ := e.outputCols.Get(int(withScan.InCols[i]))
  1637  			res.outputCols.Set(int(withScan.OutCols[i]), idx)
  1638  		}
  1639  	} else {
  1640  		// We need a projection.
  1641  		cols := make([]exec.NodeColumnOrdinal, len(withScan.InCols))
  1642  		for i := range withScan.InCols {
  1643  			col, ok := e.outputCols.Get(int(withScan.InCols[i]))
  1644  			if !ok {
  1645  				panic(errors.AssertionFailedf("column %d not in input", log.Safe(withScan.InCols[i])))
  1646  			}
  1647  			cols[i] = exec.NodeColumnOrdinal(col)
  1648  			res.outputCols.Set(int(withScan.OutCols[i]), i)
  1649  		}
  1650  		res.root, err = b.factory.ConstructSimpleProject(
  1651  			res.root, cols, nil, /* colNames */
  1652  			exec.OutputOrdering(res.sqlOrdering(withScan.ProvidedPhysical().Ordering)),
  1653  		)
  1654  		if err != nil {
  1655  			return execPlan{}, err
  1656  		}
  1657  	}
  1658  	return res, nil
  1659  
  1660  }
  1661  
  1662  func (b *Builder) buildProjectSet(projectSet *memo.ProjectSetExpr) (execPlan, error) {
  1663  	input, err := b.buildRelational(projectSet.Input)
  1664  	if err != nil {
  1665  		return execPlan{}, err
  1666  	}
  1667  
  1668  	zip := projectSet.Zip
  1669  	md := b.mem.Metadata()
  1670  	scalarCtx := input.makeBuildScalarCtx()
  1671  
  1672  	exprs := make(tree.TypedExprs, len(zip))
  1673  	zipCols := make(sqlbase.ResultColumns, 0, len(zip))
  1674  	numColsPerGen := make([]int, len(zip))
  1675  
  1676  	ep := execPlan{outputCols: input.outputCols}
  1677  	n := ep.numOutputCols()
  1678  
  1679  	for i := range zip {
  1680  		item := &zip[i]
  1681  		exprs[i], err = b.buildScalar(&scalarCtx, item.Fn)
  1682  		if err != nil {
  1683  			return execPlan{}, err
  1684  		}
  1685  
  1686  		for _, col := range item.Cols {
  1687  			colMeta := md.ColumnMeta(col)
  1688  			zipCols = append(zipCols, sqlbase.ResultColumn{Name: colMeta.Alias, Typ: colMeta.Type})
  1689  
  1690  			ep.outputCols.Set(int(col), n)
  1691  			n++
  1692  		}
  1693  
  1694  		numColsPerGen[i] = len(item.Cols)
  1695  	}
  1696  
  1697  	ep.root, err = b.factory.ConstructProjectSet(input.root, exprs, zipCols, numColsPerGen)
  1698  	if err != nil {
  1699  		return execPlan{}, err
  1700  	}
  1701  
  1702  	return ep, nil
  1703  }
  1704  
  1705  func (b *Builder) resultColumn(id opt.ColumnID) sqlbase.ResultColumn {
  1706  	colMeta := b.mem.Metadata().ColumnMeta(id)
  1707  	return sqlbase.ResultColumn{
  1708  		Name: colMeta.Alias,
  1709  		Typ:  colMeta.Type,
  1710  	}
  1711  }
  1712  
  1713  // extractFromOffset extracts the start bound expression of a window function
  1714  // that uses the OFFSET windowing mode for its start bound.
  1715  func (b *Builder) extractFromOffset(e opt.ScalarExpr) (_ opt.ScalarExpr, ok bool) {
  1716  	if opt.IsWindowOp(e) || opt.IsAggregateOp(e) {
  1717  		return nil, false
  1718  	}
  1719  	if modifier, ok := e.(*memo.WindowFromOffsetExpr); ok {
  1720  		return modifier.Offset, true
  1721  	}
  1722  	return b.extractFromOffset(e.Child(0).(opt.ScalarExpr))
  1723  }
  1724  
  1725  // extractToOffset extracts the end bound expression of a window function
  1726  // that uses the OFFSET windowing mode for its end bound.
  1727  func (b *Builder) extractToOffset(e opt.ScalarExpr) (_ opt.ScalarExpr, ok bool) {
  1728  	if opt.IsWindowOp(e) || opt.IsAggregateOp(e) {
  1729  		return nil, false
  1730  	}
  1731  	if modifier, ok := e.(*memo.WindowToOffsetExpr); ok {
  1732  		return modifier.Offset, true
  1733  	}
  1734  	return b.extractToOffset(e.Child(0).(opt.ScalarExpr))
  1735  }
  1736  
  1737  // extractFilter extracts a FILTER expression from a window function tower.
  1738  // Returns the expression and true if there was a filter, and false otherwise.
  1739  func (b *Builder) extractFilter(e opt.ScalarExpr) (opt.ScalarExpr, bool) {
  1740  	if opt.IsWindowOp(e) || opt.IsAggregateOp(e) {
  1741  		return nil, false
  1742  	}
  1743  	if filter, ok := e.(*memo.AggFilterExpr); ok {
  1744  		return filter.Filter, true
  1745  	}
  1746  	return b.extractFilter(e.Child(0).(opt.ScalarExpr))
  1747  }
  1748  
  1749  // extractWindowFunction extracts the window function being computed from a
  1750  // potential tower of modifiers attached to the Function field of a
  1751  // WindowsItem.
  1752  func (b *Builder) extractWindowFunction(e opt.ScalarExpr) opt.ScalarExpr {
  1753  	if opt.IsWindowOp(e) || opt.IsAggregateOp(e) {
  1754  		return e
  1755  	}
  1756  	return b.extractWindowFunction(e.Child(0).(opt.ScalarExpr))
  1757  }
  1758  
  1759  func (b *Builder) isOffsetMode(boundType tree.WindowFrameBoundType) bool {
  1760  	return boundType == tree.OffsetPreceding || boundType == tree.OffsetFollowing
  1761  }
  1762  
  1763  func (b *Builder) buildFrame(input execPlan, w *memo.WindowsItem) (*tree.WindowFrame, error) {
  1764  	scalarCtx := input.makeBuildScalarCtx()
  1765  	newDef := &tree.WindowFrame{
  1766  		Mode: w.Frame.Mode,
  1767  		Bounds: tree.WindowFrameBounds{
  1768  			StartBound: &tree.WindowFrameBound{
  1769  				BoundType: w.Frame.StartBoundType,
  1770  			},
  1771  			EndBound: &tree.WindowFrameBound{
  1772  				BoundType: w.Frame.EndBoundType,
  1773  			},
  1774  		},
  1775  		Exclusion: w.Frame.FrameExclusion,
  1776  	}
  1777  	if boundExpr, ok := b.extractFromOffset(w.Function); ok {
  1778  		if !b.isOffsetMode(w.Frame.StartBoundType) {
  1779  			panic(errors.AssertionFailedf("expected offset to only be present in offset mode"))
  1780  		}
  1781  		offset, err := b.buildScalar(&scalarCtx, boundExpr)
  1782  		if err != nil {
  1783  			return nil, err
  1784  		}
  1785  		if offset == tree.DNull {
  1786  			return nil, pgerror.Newf(pgcode.NullValueNotAllowed, "frame starting offset must not be null")
  1787  		}
  1788  		newDef.Bounds.StartBound.OffsetExpr = offset
  1789  	}
  1790  
  1791  	if boundExpr, ok := b.extractToOffset(w.Function); ok {
  1792  		if !b.isOffsetMode(newDef.Bounds.EndBound.BoundType) {
  1793  			panic(errors.AssertionFailedf("expected offset to only be present in offset mode"))
  1794  		}
  1795  		offset, err := b.buildScalar(&scalarCtx, boundExpr)
  1796  		if err != nil {
  1797  			return nil, err
  1798  		}
  1799  		if offset == tree.DNull {
  1800  			return nil, pgerror.Newf(pgcode.NullValueNotAllowed, "frame ending offset must not be null")
  1801  		}
  1802  		newDef.Bounds.EndBound.OffsetExpr = offset
  1803  	}
  1804  	return newDef, nil
  1805  }
  1806  
  1807  func (b *Builder) buildWindow(w *memo.WindowExpr) (execPlan, error) {
  1808  	input, err := b.buildRelational(w.Input)
  1809  	if err != nil {
  1810  		return execPlan{}, err
  1811  	}
  1812  
  1813  	// Rearrange the input so that the input has all the passthrough columns
  1814  	// followed by all the argument columns.
  1815  
  1816  	passthrough := w.Input.Relational().OutputCols
  1817  
  1818  	desiredCols := opt.ColList{}
  1819  	passthrough.ForEach(func(i opt.ColumnID) {
  1820  		desiredCols = append(desiredCols, i)
  1821  	})
  1822  
  1823  	// TODO(justin): this call to ensureColumns is kind of unfortunate because it
  1824  	// can result in an extra render beneath each window function. Figure out a
  1825  	// way to alleviate this.
  1826  	input, err = b.ensureColumns(input, desiredCols, nil, opt.Ordering{})
  1827  	if err != nil {
  1828  		return execPlan{}, err
  1829  	}
  1830  
  1831  	ctx := input.makeBuildScalarCtx()
  1832  
  1833  	ord := w.Ordering.ToOrdering()
  1834  
  1835  	orderingExprs := make(tree.OrderBy, len(ord))
  1836  	for i, c := range ord {
  1837  		direction := tree.Ascending
  1838  		if c.Descending() {
  1839  			direction = tree.Descending
  1840  		}
  1841  		orderingExprs[i] = &tree.Order{
  1842  			Expr:      b.indexedVar(&ctx, b.mem.Metadata(), c.ID()),
  1843  			Direction: direction,
  1844  		}
  1845  	}
  1846  
  1847  	partitionIdxs := make([]exec.NodeColumnOrdinal, w.Partition.Len())
  1848  	partitionExprs := make(tree.Exprs, w.Partition.Len())
  1849  
  1850  	i := 0
  1851  	w.Partition.ForEach(func(col opt.ColumnID) {
  1852  		ordinal, _ := input.outputCols.Get(int(col))
  1853  		partitionIdxs[i] = exec.NodeColumnOrdinal(ordinal)
  1854  		partitionExprs[i] = b.indexedVar(&ctx, b.mem.Metadata(), col)
  1855  		i++
  1856  	})
  1857  
  1858  	argIdxs := make([][]exec.NodeColumnOrdinal, len(w.Windows))
  1859  	filterIdxs := make([]int, len(w.Windows))
  1860  	exprs := make([]*tree.FuncExpr, len(w.Windows))
  1861  
  1862  	for i := range w.Windows {
  1863  		item := &w.Windows[i]
  1864  		fn := b.extractWindowFunction(item.Function)
  1865  		name, overload := memo.FindWindowOverload(fn)
  1866  		if !b.disableTelemetry {
  1867  			telemetry.Inc(sqltelemetry.WindowFunctionCounter(name))
  1868  		}
  1869  		props, _ := builtins.GetBuiltinProperties(name)
  1870  
  1871  		args := make([]tree.TypedExpr, fn.ChildCount())
  1872  		argIdxs[i] = make([]exec.NodeColumnOrdinal, fn.ChildCount())
  1873  		for j, n := 0, fn.ChildCount(); j < n; j++ {
  1874  			col := fn.Child(j).(*memo.VariableExpr).Col
  1875  			args[j] = b.indexedVar(&ctx, b.mem.Metadata(), col)
  1876  			idx, _ := input.outputCols.Get(int(col))
  1877  			argIdxs[i][j] = exec.NodeColumnOrdinal(idx)
  1878  		}
  1879  
  1880  		frame, err := b.buildFrame(input, item)
  1881  		if err != nil {
  1882  			return execPlan{}, err
  1883  		}
  1884  
  1885  		var builtFilter tree.TypedExpr
  1886  		filter, ok := b.extractFilter(item.Function)
  1887  		if ok {
  1888  			f, ok := filter.(*memo.VariableExpr)
  1889  			if !ok {
  1890  				panic(errors.AssertionFailedf("expected FILTER expression to be a VariableExpr"))
  1891  			}
  1892  			filterIdxs[i], _ = input.outputCols.Get(int(f.Col))
  1893  
  1894  			builtFilter, err = b.buildScalar(&ctx, filter)
  1895  			if err != nil {
  1896  				return execPlan{}, err
  1897  			}
  1898  		} else {
  1899  			filterIdxs[i] = -1
  1900  		}
  1901  
  1902  		exprs[i] = tree.NewTypedFuncExpr(
  1903  			tree.WrapFunction(name),
  1904  			0,
  1905  			args,
  1906  			builtFilter,
  1907  			&tree.WindowDef{
  1908  				Partitions: partitionExprs,
  1909  				OrderBy:    orderingExprs,
  1910  				Frame:      frame,
  1911  			},
  1912  			overload.FixedReturnType(),
  1913  			props,
  1914  			overload,
  1915  		)
  1916  	}
  1917  
  1918  	resultCols := make(sqlbase.ResultColumns, w.Relational().OutputCols.Len())
  1919  
  1920  	// All the passthrough cols will keep their ordinal index.
  1921  	passthrough.ForEach(func(col opt.ColumnID) {
  1922  		ordinal, _ := input.outputCols.Get(int(col))
  1923  		resultCols[ordinal] = b.resultColumn(col)
  1924  	})
  1925  
  1926  	var outputCols opt.ColMap
  1927  	input.outputCols.ForEach(func(key, val int) {
  1928  		if passthrough.Contains(opt.ColumnID(key)) {
  1929  			outputCols.Set(key, val)
  1930  		}
  1931  	})
  1932  
  1933  	outputIdxs := make([]int, len(w.Windows))
  1934  
  1935  	// Because of the way we arranged the input columns, we will be outputting
  1936  	// the window columns at the end (which is exactly what the execution engine
  1937  	// will do as well).
  1938  	windowStart := passthrough.Len()
  1939  	for i := range w.Windows {
  1940  		resultCols[windowStart+i] = b.resultColumn(w.Windows[i].Col)
  1941  		outputCols.Set(int(w.Windows[i].Col), windowStart+i)
  1942  		outputIdxs[i] = windowStart + i
  1943  	}
  1944  
  1945  	var rangeOffsetColumn exec.NodeColumnOrdinal
  1946  	if ord.Empty() {
  1947  		idx, _ := input.outputCols.Get(int(w.RangeOffsetColumn))
  1948  		rangeOffsetColumn = exec.NodeColumnOrdinal(idx)
  1949  	}
  1950  	node, err := b.factory.ConstructWindow(input.root, exec.WindowInfo{
  1951  		Cols:              resultCols,
  1952  		Exprs:             exprs,
  1953  		OutputIdxs:        outputIdxs,
  1954  		ArgIdxs:           argIdxs,
  1955  		FilterIdxs:        filterIdxs,
  1956  		Partition:         partitionIdxs,
  1957  		Ordering:          input.sqlOrdering(ord),
  1958  		RangeOffsetColumn: rangeOffsetColumn,
  1959  	})
  1960  	if err != nil {
  1961  		return execPlan{}, err
  1962  	}
  1963  
  1964  	return execPlan{
  1965  		root:       node,
  1966  		outputCols: outputCols,
  1967  	}, nil
  1968  }
  1969  
  1970  func (b *Builder) buildSequenceSelect(seqSel *memo.SequenceSelectExpr) (execPlan, error) {
  1971  	seq := b.mem.Metadata().Sequence(seqSel.Sequence)
  1972  	node, err := b.factory.ConstructSequenceSelect(seq)
  1973  	if err != nil {
  1974  		return execPlan{}, err
  1975  	}
  1976  
  1977  	ep := execPlan{root: node}
  1978  	for i, c := range seqSel.Cols {
  1979  		ep.outputCols.Set(int(c), i)
  1980  	}
  1981  
  1982  	return ep, nil
  1983  }
  1984  
  1985  func (b *Builder) applySaveTable(
  1986  	input execPlan, e memo.RelExpr, saveTableName string,
  1987  ) (execPlan, error) {
  1988  	name := tree.NewTableName(tree.Name(opt.SaveTablesDatabase), tree.Name(saveTableName))
  1989  
  1990  	// Ensure that the column names are unique and match the names used by the
  1991  	// opttester.
  1992  	outputCols := e.Relational().OutputCols
  1993  	colNames := make([]string, outputCols.Len())
  1994  	colNameGen := memo.NewColumnNameGenerator(e)
  1995  	for col, ok := outputCols.Next(0); ok; col, ok = outputCols.Next(col + 1) {
  1996  		ord, _ := input.outputCols.Get(int(col))
  1997  		colNames[ord] = colNameGen.GenerateName(col)
  1998  	}
  1999  
  2000  	var err error
  2001  	input.root, err = b.factory.ConstructSaveTable(input.root, name, colNames)
  2002  	if err != nil {
  2003  		return execPlan{}, err
  2004  	}
  2005  	return input, err
  2006  }
  2007  
  2008  func (b *Builder) buildOpaque(opaque *memo.OpaqueRelPrivate) (execPlan, error) {
  2009  	node, err := b.factory.ConstructOpaque(opaque.Metadata)
  2010  	if err != nil {
  2011  		return execPlan{}, err
  2012  	}
  2013  
  2014  	ep := execPlan{root: node}
  2015  	for i, c := range opaque.Columns {
  2016  		ep.outputCols.Set(int(c), i)
  2017  	}
  2018  
  2019  	return ep, nil
  2020  }
  2021  
  2022  // needProjection figures out what projection is needed on top of the input plan
  2023  // to produce the given list of columns. If the input plan already produces
  2024  // the columns (in the same order), returns needProj=false.
  2025  func (b *Builder) needProjection(
  2026  	input execPlan, colList opt.ColList,
  2027  ) (_ []exec.NodeColumnOrdinal, needProj bool) {
  2028  	if input.numOutputCols() == len(colList) {
  2029  		identity := true
  2030  		for i, col := range colList {
  2031  			if ord, ok := input.outputCols.Get(int(col)); !ok || ord != i {
  2032  				identity = false
  2033  				break
  2034  			}
  2035  		}
  2036  		if identity {
  2037  			return nil, false
  2038  		}
  2039  	}
  2040  	cols := make([]exec.NodeColumnOrdinal, 0, len(colList))
  2041  	for _, col := range colList {
  2042  		if col != 0 {
  2043  			cols = append(cols, input.getNodeColumnOrdinal(col))
  2044  		}
  2045  	}
  2046  	return cols, true
  2047  }
  2048  
  2049  // ensureColumns applies a projection as necessary to make the output match the
  2050  // given list of columns; colNames is optional.
  2051  func (b *Builder) ensureColumns(
  2052  	input execPlan, colList opt.ColList, colNames []string, provided opt.Ordering,
  2053  ) (execPlan, error) {
  2054  	cols, needProj := b.needProjection(input, colList)
  2055  	if !needProj {
  2056  		// No projection necessary.
  2057  		if colNames != nil {
  2058  			var err error
  2059  			input.root, err = b.factory.RenameColumns(input.root, colNames)
  2060  			if err != nil {
  2061  				return execPlan{}, err
  2062  			}
  2063  		}
  2064  		return input, nil
  2065  	}
  2066  	var res execPlan
  2067  	for i, col := range colList {
  2068  		res.outputCols.Set(int(col), i)
  2069  	}
  2070  	reqOrdering := exec.OutputOrdering(res.sqlOrdering(provided))
  2071  	var err error
  2072  	res.root, err = b.factory.ConstructSimpleProject(input.root, cols, colNames, reqOrdering)
  2073  	return res, err
  2074  }
  2075  
  2076  // applyPresentation adds a projection to a plan to satisfy a required
  2077  // Presentation property.
  2078  func (b *Builder) applyPresentation(input execPlan, p *physical.Required) (execPlan, error) {
  2079  	pres := p.Presentation
  2080  	colList := make(opt.ColList, len(pres))
  2081  	colNames := make([]string, len(pres))
  2082  	for i := range pres {
  2083  		colList[i] = pres[i].ID
  2084  		colNames[i] = pres[i].Alias
  2085  	}
  2086  	// The ordering is not useful for a top-level projection (it is used by the
  2087  	// distsql planner for internal nodes); we might not even be able to represent
  2088  	// it because it can refer to columns not in the presentation.
  2089  	return b.ensureColumns(input, colList, colNames, nil /* provided */)
  2090  }
  2091  
  2092  // getEnvData consolidates the information that must be presented in
  2093  // EXPLAIN (opt, env).
  2094  func (b *Builder) getEnvData() exec.ExplainEnvData {
  2095  	envOpts := exec.ExplainEnvData{ShowEnv: true}
  2096  	var err error
  2097  	envOpts.Tables, envOpts.Sequences, envOpts.Views, err = b.mem.Metadata().AllDataSourceNames(
  2098  		func(ds cat.DataSource) (cat.DataSourceName, error) {
  2099  			return b.catalog.FullyQualifiedName(context.TODO(), ds)
  2100  		},
  2101  	)
  2102  	if err != nil {
  2103  		panic(err)
  2104  	}
  2105  
  2106  	return envOpts
  2107  }
  2108  
  2109  // statementTag returns a string that can be used in an error message regarding
  2110  // the given expression.
  2111  func (b *Builder) statementTag(expr memo.RelExpr) string {
  2112  	switch expr.Op() {
  2113  	case opt.OpaqueRelOp, opt.OpaqueMutationOp, opt.OpaqueDDLOp:
  2114  		return expr.Private().(*memo.OpaqueRelPrivate).Metadata.String()
  2115  
  2116  	default:
  2117  		return expr.Op().SyntaxTag()
  2118  	}
  2119  }