github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/physicalplan/physical_plan.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/physicalplan/physical_plan.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // This file defines structures and basic functionality that is useful when
    12  // building distsql plans. It does not contain the actual physical planning
    13  // code.
    14  
    15  package physicalplan
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    27  	"github.com/cockroachdb/cockroach/pkg/util"
    28  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  // Processor contains the information associated with a processor in a plan.
    33  type Processor struct {
    34  	// Node where the processor must be instantiated.
    35  	Node roachpb.NodeID
    36  
    37  	// Spec for the processor; note that the StreamEndpointSpecs in the input
    38  	// synchronizers and output routers are not set until the end of the planning
    39  	// process.
    40  	Spec execinfrapb.ProcessorSpec
    41  }
    42  
    43  // ProcessorIdx identifies a processor by its index in PhysicalPlan.Processors.
    44  type ProcessorIdx int
    45  
    46  // Stream connects the output router of one processor to an input synchronizer
    47  // of another processor.
    48  type Stream struct {
    49  	// SourceProcessor index (within the same plan).
    50  	SourceProcessor ProcessorIdx
    51  
    52  	// SourceRouterSlot identifies the position of this stream among the streams
    53  	// that originate from the same router. This is important when routing by hash
    54  	// where the order of the streams in the OutputRouterSpec matters.
    55  	SourceRouterSlot int
    56  
    57  	// DestProcessor index (within the same plan).
    58  	DestProcessor ProcessorIdx
    59  
    60  	// DestInput identifies the input of DestProcessor (some processors have
    61  	// multiple inputs).
    62  	DestInput int
    63  }
    64  
    65  // PhysicalPlan represents a network of processors and streams along with
    66  // information about the results output by this network. The results come from
    67  // unconnected output routers of a subset of processors; all these routers
    68  // output the same kind of data (same schema).
    69  type PhysicalPlan struct {
    70  	// Processors in the plan.
    71  	Processors []Processor
    72  
    73  	// LocalProcessors contains all of the planNodeToRowSourceWrappers that were
    74  	// installed in this physical plan to wrap any planNodes that couldn't be
    75  	// properly translated into DistSQL processors. This will be empty if no
    76  	// wrapping had to happen.
    77  	LocalProcessors []execinfra.LocalProcessor
    78  
    79  	// LocalProcessorIndexes contains pointers to all of the RowSourceIdx fields
    80  	// of the  LocalPlanNodeSpecs that were created. This list is in the same
    81  	// order as LocalProcessors, and is kept up-to-date so that LocalPlanNodeSpecs
    82  	// always have the correct index into the LocalProcessors slice.
    83  	LocalProcessorIndexes []*uint32
    84  
    85  	// Streams accumulates the streams in the plan - both local (intra-node) and
    86  	// remote (inter-node); when we have a final plan, the streams are used to
    87  	// generate processor input and output specs (see PopulateEndpoints).
    88  	Streams []Stream
    89  
    90  	// ResultRouters identifies the output routers which output the results of the
    91  	// plan. These are the routers to which we have to connect new streams in
    92  	// order to extend the plan.
    93  	//
    94  	// The processors which have this routers are all part of the same "stage":
    95  	// they have the same "schema" and PostProcessSpec.
    96  	//
    97  	// We assume all processors have a single output so we only need the processor
    98  	// index.
    99  	ResultRouters []ProcessorIdx
   100  
   101  	// ResultTypes is the schema (column types) of the rows produced by the
   102  	// ResultRouters.
   103  	//
   104  	// This is aliased with InputSyncSpec.ColumnTypes, so it must not be modified
   105  	// in-place during planning.
   106  	ResultTypes []*types.T
   107  
   108  	// MergeOrdering is the ordering guarantee for the result streams that must be
   109  	// maintained when the streams eventually merge. The column indexes refer to
   110  	// columns for the rows produced by ResultRouters.
   111  	//
   112  	// Empty when there is a single result router. The reason is that maintaining
   113  	// an ordering sometimes requires to add columns to streams for the sole
   114  	// reason of correctly merging the streams later (see AddProjection); we don't
   115  	// want to pay this cost if we don't have multiple streams to merge.
   116  	MergeOrdering execinfrapb.Ordering
   117  
   118  	// Used internally for numbering stages.
   119  	stageCounter int32
   120  
   121  	// Used internally to avoid creating flow IDs for local flows. This boolean
   122  	// specifies whether there is more than one node involved in a plan.
   123  	remotePlan bool
   124  
   125  	// MaxEstimatedRowCount tracks the maximum estimated row count that a table
   126  	// reader in this plan will output. This information is used to decide
   127  	// whether to use the vectorized execution engine.
   128  	MaxEstimatedRowCount uint64
   129  	// TotalEstimatedScannedRows is the sum of the row count estimate of all the
   130  	// table readers in the plan.
   131  	TotalEstimatedScannedRows uint64
   132  }
   133  
   134  // NewStageID creates a stage identifier that can be used in processor specs.
   135  func (p *PhysicalPlan) NewStageID() int32 {
   136  	p.stageCounter++
   137  	return p.stageCounter
   138  }
   139  
   140  // AddProcessor adds a processor to a PhysicalPlan and returns the index that
   141  // can be used to refer to that processor.
   142  func (p *PhysicalPlan) AddProcessor(proc Processor) ProcessorIdx {
   143  	idx := ProcessorIdx(len(p.Processors))
   144  	p.Processors = append(p.Processors, proc)
   145  	return idx
   146  }
   147  
   148  // SetMergeOrdering sets p.MergeOrdering.
   149  func (p *PhysicalPlan) SetMergeOrdering(o execinfrapb.Ordering) {
   150  	if len(p.ResultRouters) > 1 {
   151  		p.MergeOrdering = o
   152  	} else {
   153  		p.MergeOrdering = execinfrapb.Ordering{}
   154  	}
   155  }
   156  
   157  // AddNoGroupingStage adds a processor for each result router, on the same node
   158  // with the source of the stream; all processors have the same core. This is for
   159  // stages that correspond to logical blocks that don't require any grouping
   160  // (e.g. evaluator, sorting, etc).
   161  func (p *PhysicalPlan) AddNoGroupingStage(
   162  	core execinfrapb.ProcessorCoreUnion,
   163  	post execinfrapb.PostProcessSpec,
   164  	outputTypes []*types.T,
   165  	newOrdering execinfrapb.Ordering,
   166  ) {
   167  	p.AddNoGroupingStageWithCoreFunc(
   168  		func(_ int, _ *Processor) execinfrapb.ProcessorCoreUnion { return core },
   169  		post,
   170  		outputTypes,
   171  		newOrdering,
   172  	)
   173  }
   174  
   175  // AddNoGroupingStageWithCoreFunc is like AddNoGroupingStage, but creates a core
   176  // spec based on the input processor's spec.
   177  func (p *PhysicalPlan) AddNoGroupingStageWithCoreFunc(
   178  	coreFunc func(int, *Processor) execinfrapb.ProcessorCoreUnion,
   179  	post execinfrapb.PostProcessSpec,
   180  	outputTypes []*types.T,
   181  	newOrdering execinfrapb.Ordering,
   182  ) {
   183  	stageID := p.NewStageID()
   184  	for i, resultProc := range p.ResultRouters {
   185  		prevProc := &p.Processors[resultProc]
   186  
   187  		proc := Processor{
   188  			Node: prevProc.Node,
   189  			Spec: execinfrapb.ProcessorSpec{
   190  				Input: []execinfrapb.InputSyncSpec{{
   191  					Type:        execinfrapb.InputSyncSpec_UNORDERED,
   192  					ColumnTypes: p.ResultTypes,
   193  				}},
   194  				Core: coreFunc(int(resultProc), prevProc),
   195  				Post: post,
   196  				Output: []execinfrapb.OutputRouterSpec{{
   197  					Type: execinfrapb.OutputRouterSpec_PASS_THROUGH,
   198  				}},
   199  				StageID: stageID,
   200  			},
   201  		}
   202  
   203  		pIdx := p.AddProcessor(proc)
   204  
   205  		p.Streams = append(p.Streams, Stream{
   206  			SourceProcessor:  resultProc,
   207  			DestProcessor:    pIdx,
   208  			SourceRouterSlot: 0,
   209  			DestInput:        0,
   210  		})
   211  
   212  		p.ResultRouters[i] = pIdx
   213  	}
   214  	p.ResultTypes = outputTypes
   215  	p.SetMergeOrdering(newOrdering)
   216  }
   217  
   218  // MergeResultStreams connects a set of resultRouters to a synchronizer. The
   219  // synchronizer is configured with the provided ordering.
   220  func (p *PhysicalPlan) MergeResultStreams(
   221  	resultRouters []ProcessorIdx,
   222  	sourceRouterSlot int,
   223  	ordering execinfrapb.Ordering,
   224  	destProcessor ProcessorIdx,
   225  	destInput int,
   226  ) {
   227  	proc := &p.Processors[destProcessor]
   228  	if len(ordering.Columns) == 0 || len(resultRouters) == 1 {
   229  		proc.Spec.Input[destInput].Type = execinfrapb.InputSyncSpec_UNORDERED
   230  	} else {
   231  		proc.Spec.Input[destInput].Type = execinfrapb.InputSyncSpec_ORDERED
   232  		proc.Spec.Input[destInput].Ordering = ordering
   233  	}
   234  
   235  	for _, resultProc := range resultRouters {
   236  		p.Streams = append(p.Streams, Stream{
   237  			SourceProcessor:  resultProc,
   238  			SourceRouterSlot: sourceRouterSlot,
   239  			DestProcessor:    destProcessor,
   240  			DestInput:        destInput,
   241  		})
   242  	}
   243  }
   244  
   245  // AddSingleGroupStage adds a "single group" stage (one that cannot be
   246  // parallelized) which consists of a single processor on the specified node. The
   247  // previous stage (ResultRouters) are all connected to this processor.
   248  func (p *PhysicalPlan) AddSingleGroupStage(
   249  	nodeID roachpb.NodeID,
   250  	core execinfrapb.ProcessorCoreUnion,
   251  	post execinfrapb.PostProcessSpec,
   252  	outputTypes []*types.T,
   253  ) {
   254  	proc := Processor{
   255  		Node: nodeID,
   256  		Spec: execinfrapb.ProcessorSpec{
   257  			Input: []execinfrapb.InputSyncSpec{{
   258  				// The other fields will be filled in by mergeResultStreams.
   259  				ColumnTypes: p.ResultTypes,
   260  			}},
   261  			Core: core,
   262  			Post: post,
   263  			Output: []execinfrapb.OutputRouterSpec{{
   264  				Type: execinfrapb.OutputRouterSpec_PASS_THROUGH,
   265  			}},
   266  			StageID: p.NewStageID(),
   267  		},
   268  	}
   269  
   270  	pIdx := p.AddProcessor(proc)
   271  
   272  	// Connect the result routers to the processor.
   273  	p.MergeResultStreams(p.ResultRouters, 0, p.MergeOrdering, pIdx, 0)
   274  
   275  	// We now have a single result stream.
   276  	p.ResultRouters = p.ResultRouters[:1]
   277  	p.ResultRouters[0] = pIdx
   278  
   279  	p.ResultTypes = outputTypes
   280  	p.MergeOrdering = execinfrapb.Ordering{}
   281  }
   282  
   283  // CheckLastStagePost checks that the processors of the last stage of the
   284  // PhysicalPlan have identical post-processing, returning an error if not.
   285  func (p *PhysicalPlan) CheckLastStagePost() error {
   286  	post := p.Processors[p.ResultRouters[0]].Spec.Post
   287  
   288  	// All processors of a stage should be identical in terms of post-processing;
   289  	// verify this assumption.
   290  	for i := 1; i < len(p.ResultRouters); i++ {
   291  		pi := &p.Processors[p.ResultRouters[i]].Spec.Post
   292  		if pi.Filter != post.Filter ||
   293  			pi.Projection != post.Projection ||
   294  			len(pi.OutputColumns) != len(post.OutputColumns) ||
   295  			len(pi.RenderExprs) != len(post.RenderExprs) {
   296  			return errors.Errorf("inconsistent post-processing: %v vs %v", post, pi)
   297  		}
   298  		for j, col := range pi.OutputColumns {
   299  			if col != post.OutputColumns[j] {
   300  				return errors.Errorf("inconsistent post-processing: %v vs %v", post, pi)
   301  			}
   302  		}
   303  		for j, col := range pi.RenderExprs {
   304  			if col != post.RenderExprs[j] {
   305  				return errors.Errorf("inconsistent post-processing: %v vs %v", post, pi)
   306  			}
   307  		}
   308  	}
   309  
   310  	return nil
   311  }
   312  
   313  // GetLastStagePost returns the PostProcessSpec for the processors in the last
   314  // stage (ResultRouters).
   315  func (p *PhysicalPlan) GetLastStagePost() execinfrapb.PostProcessSpec {
   316  	if err := p.CheckLastStagePost(); err != nil {
   317  		panic(err)
   318  	}
   319  	return p.Processors[p.ResultRouters[0]].Spec.Post
   320  }
   321  
   322  // SetLastStagePost changes the PostProcess spec of the processors in the last
   323  // stage (ResultRouters).
   324  // The caller must update the ordering via SetOrdering.
   325  func (p *PhysicalPlan) SetLastStagePost(post execinfrapb.PostProcessSpec, outputTypes []*types.T) {
   326  	for _, pIdx := range p.ResultRouters {
   327  		p.Processors[pIdx].Spec.Post = post
   328  	}
   329  	p.ResultTypes = outputTypes
   330  }
   331  
   332  func isIdentityProjection(columns []uint32, numExistingCols int) bool {
   333  	if len(columns) != numExistingCols {
   334  		return false
   335  	}
   336  	for i, c := range columns {
   337  		if c != uint32(i) {
   338  			return false
   339  		}
   340  	}
   341  	return true
   342  }
   343  
   344  // AddProjection applies a projection to a plan. The new plan outputs the
   345  // columns of the old plan as listed in the slice. The Ordering is updated;
   346  // columns in the ordering are added to the projection as needed.
   347  //
   348  // The PostProcessSpec may not be updated if the resulting projection keeps all
   349  // the columns in their original order.
   350  //
   351  // Note: the columns slice is relinquished to this function, which can modify it
   352  // or use it directly in specs.
   353  func (p *PhysicalPlan) AddProjection(columns []uint32) {
   354  	// If the projection we are trying to apply projects every column, don't
   355  	// update the spec.
   356  	if isIdentityProjection(columns, len(p.ResultTypes)) {
   357  		return
   358  	}
   359  
   360  	// Update the ordering.
   361  	if len(p.MergeOrdering.Columns) > 0 {
   362  		newOrdering := make([]execinfrapb.Ordering_Column, len(p.MergeOrdering.Columns))
   363  		for i, c := range p.MergeOrdering.Columns {
   364  			// Look for the column in the new projection.
   365  			found := -1
   366  			for j, projCol := range columns {
   367  				if projCol == c.ColIdx {
   368  					found = j
   369  				}
   370  			}
   371  			if found == -1 {
   372  				// We have a column that is not in the projection but will be necessary
   373  				// later when the streams are merged; add it.
   374  				found = len(columns)
   375  				columns = append(columns, c.ColIdx)
   376  			}
   377  			newOrdering[i].ColIdx = uint32(found)
   378  			newOrdering[i].Direction = c.Direction
   379  		}
   380  		p.MergeOrdering.Columns = newOrdering
   381  	}
   382  
   383  	newResultTypes := make([]*types.T, len(columns))
   384  	for i, c := range columns {
   385  		newResultTypes[i] = p.ResultTypes[c]
   386  	}
   387  
   388  	post := p.GetLastStagePost()
   389  
   390  	if post.RenderExprs != nil {
   391  		// Apply the projection to the existing rendering; in other words, keep
   392  		// only the renders needed by the new output columns, and reorder them
   393  		// accordingly.
   394  		oldRenders := post.RenderExprs
   395  		post.RenderExprs = make([]execinfrapb.Expression, len(columns))
   396  		for i, c := range columns {
   397  			post.RenderExprs[i] = oldRenders[c]
   398  		}
   399  	} else {
   400  		// There is no existing rendering; we can use OutputColumns to set the
   401  		// projection.
   402  		if post.Projection {
   403  			// We already had a projection: compose it with the new one.
   404  			for i, c := range columns {
   405  				columns[i] = post.OutputColumns[c]
   406  			}
   407  		}
   408  		post.OutputColumns = columns
   409  		post.Projection = true
   410  	}
   411  
   412  	p.SetLastStagePost(post, newResultTypes)
   413  }
   414  
   415  // exprColumn returns the column that is referenced by the expression, if the
   416  // expression is just an IndexedVar.
   417  //
   418  // See MakeExpression for a description of indexVarMap.
   419  func exprColumn(expr tree.TypedExpr, indexVarMap []int) (int, bool) {
   420  	v, ok := expr.(*tree.IndexedVar)
   421  	if !ok {
   422  		return -1, false
   423  	}
   424  	return indexVarMap[v.Idx], true
   425  }
   426  
   427  // AddRendering adds a rendering (expression evaluation) to the output of a
   428  // plan. The rendering is achieved either through an adjustment on the last
   429  // stage post-process spec, or via a new stage.
   430  //
   431  // The Ordering is updated; columns in the ordering are added to the render
   432  // expressions as necessary.
   433  //
   434  // See MakeExpression for a description of indexVarMap.
   435  func (p *PhysicalPlan) AddRendering(
   436  	exprs []tree.TypedExpr, exprCtx ExprContext, indexVarMap []int, outTypes []*types.T,
   437  ) error {
   438  	// First check if we need an Evaluator, or we are just shuffling values. We
   439  	// also check if the rendering is a no-op ("identity").
   440  	needRendering := false
   441  	identity := (len(exprs) == len(p.ResultTypes))
   442  
   443  	for exprIdx, e := range exprs {
   444  		varIdx, ok := exprColumn(e, indexVarMap)
   445  		if !ok {
   446  			needRendering = true
   447  			break
   448  		}
   449  		identity = identity && (varIdx == exprIdx)
   450  	}
   451  
   452  	if !needRendering {
   453  		if identity {
   454  			// Nothing to do.
   455  			return nil
   456  		}
   457  		// We don't need to do any rendering: the expressions effectively describe
   458  		// just a projection.
   459  		cols := make([]uint32, len(exprs))
   460  		for i, e := range exprs {
   461  			streamCol, _ := exprColumn(e, indexVarMap)
   462  			if streamCol == -1 {
   463  				panic(fmt.Sprintf("render %d refers to column not in source: %s", i, e))
   464  			}
   465  			cols[i] = uint32(streamCol)
   466  		}
   467  		p.AddProjection(cols)
   468  		return nil
   469  	}
   470  
   471  	post := p.GetLastStagePost()
   472  	if len(post.RenderExprs) > 0 {
   473  		post = execinfrapb.PostProcessSpec{}
   474  		// The last stage contains render expressions. The new renders refer to
   475  		// the output of these, so we need to add another "no-op" stage to which
   476  		// to attach the new rendering.
   477  		p.AddNoGroupingStage(
   478  			execinfrapb.ProcessorCoreUnion{Noop: &execinfrapb.NoopCoreSpec{}},
   479  			post,
   480  			p.ResultTypes,
   481  			p.MergeOrdering,
   482  		)
   483  	}
   484  
   485  	compositeMap := indexVarMap
   486  	if post.Projection {
   487  		compositeMap = reverseProjection(post.OutputColumns, indexVarMap)
   488  	}
   489  	post.RenderExprs = make([]execinfrapb.Expression, len(exprs))
   490  	for i, e := range exprs {
   491  		var err error
   492  		post.RenderExprs[i], err = MakeExpression(e, exprCtx, compositeMap)
   493  		if err != nil {
   494  			return err
   495  		}
   496  	}
   497  
   498  	if len(p.MergeOrdering.Columns) > 0 {
   499  		outTypes = outTypes[:len(outTypes):len(outTypes)]
   500  		newOrdering := make([]execinfrapb.Ordering_Column, len(p.MergeOrdering.Columns))
   501  		for i, c := range p.MergeOrdering.Columns {
   502  			found := -1
   503  			// Look for the column in the new projection.
   504  			for exprIdx, e := range exprs {
   505  				if varIdx, ok := exprColumn(e, indexVarMap); ok && varIdx == int(c.ColIdx) {
   506  					found = exprIdx
   507  					break
   508  				}
   509  			}
   510  			if found == -1 {
   511  				// We have a column that is not being rendered but will be necessary
   512  				// later when the streams are merged; add it.
   513  
   514  				// The new expression refers to column post.OutputColumns[c.ColIdx].
   515  				internalColIdx := c.ColIdx
   516  				if post.Projection {
   517  					internalColIdx = post.OutputColumns[internalColIdx]
   518  				}
   519  				newExpr, err := MakeExpression(tree.NewTypedOrdinalReference(
   520  					int(internalColIdx),
   521  					p.ResultTypes[c.ColIdx]),
   522  					exprCtx, nil /* indexVarMap */)
   523  				if err != nil {
   524  					return err
   525  				}
   526  
   527  				found = len(post.RenderExprs)
   528  				post.RenderExprs = append(post.RenderExprs, newExpr)
   529  				outTypes = append(outTypes, p.ResultTypes[c.ColIdx])
   530  			}
   531  			newOrdering[i].ColIdx = uint32(found)
   532  			newOrdering[i].Direction = c.Direction
   533  		}
   534  		p.MergeOrdering.Columns = newOrdering
   535  	}
   536  
   537  	post.Projection = false
   538  	post.OutputColumns = nil
   539  	p.SetLastStagePost(post, outTypes)
   540  	return nil
   541  }
   542  
   543  // reverseProjection remaps expression variable indices to refer to internal
   544  // columns (i.e. before post-processing) of a processor instead of output
   545  // columns (i.e. after post-processing).
   546  //
   547  // Inputs:
   548  //   indexVarMap is a mapping from columns that appear in an expression
   549  //               (planNode columns) to columns in the output stream of a
   550  //               processor.
   551  //   outputColumns is the list of output columns in the processor's
   552  //                 PostProcessSpec; it is effectively a mapping from the output
   553  //                 schema to the internal schema of a processor.
   554  //
   555  // Result: a "composite map" that maps the planNode columns to the internal
   556  //         columns of the processor.
   557  //
   558  // For efficiency, the indexVarMap and the resulting map are represented as
   559  // slices, with missing elements having values -1.
   560  //
   561  // Used when adding expressions (filtering, rendering) to a processor's
   562  // PostProcessSpec. For example:
   563  //
   564  //  TableReader // table columns A,B,C,D
   565  //  Internal schema (before post-processing): A, B, C, D
   566  //  OutputColumns:  [1 3]
   567  //  Output schema (after post-processing): B, D
   568  //
   569  //  Expression "B < D" might be represented as:
   570  //    IndexedVar(4) < IndexedVar(1)
   571  //  with associated indexVarMap:
   572  //    [-1 1 -1 -1 0]  // 1->1, 4->0
   573  //  This is effectively equivalent to "IndexedVar(0) < IndexedVar(1)"; 0 means
   574  //  the first output column (B), 1 means the second output column (D).
   575  //
   576  //  To get an index var map that refers to the internal schema:
   577  //    reverseProjection(
   578  //      [1 3],           // OutputColumns
   579  //      [-1 1 -1 -1 0],
   580  //    ) =
   581  //      [-1 3 -1 -1 1]   // 1->3, 4->1
   582  //  This is effectively equivalent to "IndexedVar(1) < IndexedVar(3)"; 1
   583  //  means the second internal column (B), 3 means the fourth internal column
   584  //  (D).
   585  func reverseProjection(outputColumns []uint32, indexVarMap []int) []int {
   586  	if indexVarMap == nil {
   587  		panic("no indexVarMap")
   588  	}
   589  	compositeMap := make([]int, len(indexVarMap))
   590  	for i, col := range indexVarMap {
   591  		if col == -1 {
   592  			compositeMap[i] = -1
   593  		} else {
   594  			compositeMap[i] = int(outputColumns[col])
   595  		}
   596  	}
   597  	return compositeMap
   598  }
   599  
   600  // AddFilter adds a filter on the output of a plan. The filter is added either
   601  // as a post-processing step to the last stage or to a new "no-op" stage, as
   602  // necessary.
   603  //
   604  // See MakeExpression for a description of indexVarMap.
   605  func (p *PhysicalPlan) AddFilter(
   606  	expr tree.TypedExpr, exprCtx ExprContext, indexVarMap []int,
   607  ) error {
   608  	if expr == nil {
   609  		return errors.Errorf("nil filter")
   610  	}
   611  	post := p.GetLastStagePost()
   612  	if len(post.RenderExprs) > 0 || post.Offset != 0 || post.Limit != 0 {
   613  		// The last stage contains render expressions or a limit. The filter refers
   614  		// to the output as described by the existing spec, so we need to add
   615  		// another "no-op" stage to which to attach the filter.
   616  		//
   617  		// In general, we might be able to push the filter "through" the rendering;
   618  		// but the higher level planning code should figure this out when
   619  		// propagating filters.
   620  		post = execinfrapb.PostProcessSpec{}
   621  		p.AddNoGroupingStage(
   622  			execinfrapb.ProcessorCoreUnion{Noop: &execinfrapb.NoopCoreSpec{}},
   623  			post,
   624  			p.ResultTypes,
   625  			p.MergeOrdering,
   626  		)
   627  	}
   628  
   629  	compositeMap := indexVarMap
   630  	if post.Projection {
   631  		compositeMap = reverseProjection(post.OutputColumns, indexVarMap)
   632  	}
   633  	filter, err := MakeExpression(expr, exprCtx, compositeMap)
   634  	if err != nil {
   635  		return err
   636  	}
   637  	if !post.Filter.Empty() {
   638  		// Either Expr or LocalExpr will be set (not both).
   639  		if filter.Expr != "" {
   640  			filter.Expr = fmt.Sprintf("(%s) AND (%s)", post.Filter.Expr, filter.Expr)
   641  		} else if filter.LocalExpr != nil {
   642  			filter.LocalExpr = tree.NewTypedAndExpr(
   643  				post.Filter.LocalExpr,
   644  				filter.LocalExpr,
   645  			)
   646  		}
   647  	}
   648  	for _, pIdx := range p.ResultRouters {
   649  		p.Processors[pIdx].Spec.Post.Filter = filter
   650  	}
   651  	return nil
   652  }
   653  
   654  // emptyPlan creates a plan with a single processor that generates no rows; the
   655  // output stream has the given types.
   656  func emptyPlan(types []*types.T, node roachpb.NodeID) PhysicalPlan {
   657  	s := execinfrapb.ValuesCoreSpec{
   658  		Columns: make([]execinfrapb.DatumInfo, len(types)),
   659  	}
   660  	for i, t := range types {
   661  		s.Columns[i].Encoding = sqlbase.DatumEncoding_VALUE
   662  		s.Columns[i].Type = t
   663  	}
   664  
   665  	return PhysicalPlan{
   666  		Processors: []Processor{{
   667  			Node: node,
   668  			Spec: execinfrapb.ProcessorSpec{
   669  				Core:   execinfrapb.ProcessorCoreUnion{Values: &s},
   670  				Output: make([]execinfrapb.OutputRouterSpec, 1),
   671  			},
   672  		}},
   673  		ResultRouters: []ProcessorIdx{0},
   674  		ResultTypes:   types,
   675  	}
   676  }
   677  
   678  // AddLimit adds a limit and/or offset to the results of the current plan. If
   679  // there are multiple result streams, they are joined into a single processor
   680  // that is placed on the given node.
   681  //
   682  // For no limit, count should be MaxInt64.
   683  func (p *PhysicalPlan) AddLimit(
   684  	count int64, offset int64, exprCtx ExprContext, node roachpb.NodeID,
   685  ) error {
   686  	if count < 0 {
   687  		return errors.Errorf("negative limit")
   688  	}
   689  	if offset < 0 {
   690  		return errors.Errorf("negative offset")
   691  	}
   692  	// limitZero is set to true if the limit is a legitimate LIMIT 0 requested by
   693  	// the user. This needs to be tracked as a separate condition because DistSQL
   694  	// uses count=0 to mean no limit, not a limit of 0. Normally, DistSQL will
   695  	// short circuit 0-limit plans, but wrapped local planNodes sometimes need to
   696  	// be fully-executed despite having 0 limit, so if we do in fact have a
   697  	// limit-0 case when there's local planNodes around, we add an empty plan
   698  	// instead of completely eliding the 0-limit plan.
   699  	limitZero := false
   700  	if count == 0 {
   701  		if len(p.LocalProcessors) == 0 {
   702  			*p = emptyPlan(p.ResultTypes, node)
   703  			return nil
   704  		}
   705  		count = 1
   706  		limitZero = true
   707  	}
   708  
   709  	if len(p.ResultRouters) == 1 {
   710  		// We only have one processor producing results. Just update its PostProcessSpec.
   711  		// SELECT FROM (SELECT OFFSET 10 LIMIT 1000) OFFSET 5 LIMIT 20 becomes
   712  		// SELECT OFFSET 10+5 LIMIT min(1000, 20).
   713  		post := p.GetLastStagePost()
   714  		if offset != 0 {
   715  			if post.Limit > 0 && post.Limit <= uint64(offset) {
   716  				// The previous limit is not enough to reach the offset; we know there
   717  				// will be no results. For example:
   718  				//   SELECT * FROM (SELECT * FROM .. LIMIT 5) OFFSET 10
   719  				// TODO(radu): perform this optimization while propagating filters
   720  				// instead of having to detect it here.
   721  				if len(p.LocalProcessors) == 0 {
   722  					// Even though we know there will be no results, we don't elide the
   723  					// plan if there are local processors. See comment above limitZero
   724  					// for why.
   725  					*p = emptyPlan(p.ResultTypes, node)
   726  					return nil
   727  				}
   728  				count = 1
   729  				limitZero = true
   730  			}
   731  			// If we're collapsing an offset into a stage that already has a limit,
   732  			// we have to be careful, since offsets always are applied first, before
   733  			// limits. So, if the last stage already has a limit, we subtract the
   734  			// offset from that limit to preserve correctness.
   735  			//
   736  			// As an example, consider the requirement of applying an offset of 3 on
   737  			// top of a limit of 10. In this case, we need to emit 7 result rows. But
   738  			// just propagating the offset blindly would produce 10 result rows, an
   739  			// incorrect result.
   740  			post.Offset += uint64(offset)
   741  			if post.Limit > 0 {
   742  				// Note that this can't fall below 0 - we would have already caught this
   743  				// case above and returned an empty plan.
   744  				post.Limit -= uint64(offset)
   745  			}
   746  		}
   747  		if count != math.MaxInt64 && (post.Limit == 0 || post.Limit > uint64(count)) {
   748  			post.Limit = uint64(count)
   749  		}
   750  		p.SetLastStagePost(post, p.ResultTypes)
   751  		if limitZero {
   752  			if err := p.AddFilter(tree.DBoolFalse, exprCtx, nil); err != nil {
   753  				return err
   754  			}
   755  		}
   756  		return nil
   757  	}
   758  
   759  	// We have multiple processors producing results. We will add a single
   760  	// processor stage that limits. As an optimization, we also set a
   761  	// "local" limit on each processor producing results.
   762  	if count != math.MaxInt64 {
   763  		post := p.GetLastStagePost()
   764  		// If we have OFFSET 10 LIMIT 5, we may need as much as 15 rows from any
   765  		// processor.
   766  		localLimit := uint64(count + offset)
   767  		if post.Limit == 0 || post.Limit > localLimit {
   768  			post.Limit = localLimit
   769  			p.SetLastStagePost(post, p.ResultTypes)
   770  		}
   771  	}
   772  
   773  	post := execinfrapb.PostProcessSpec{
   774  		Offset: uint64(offset),
   775  	}
   776  	if count != math.MaxInt64 {
   777  		post.Limit = uint64(count)
   778  	}
   779  	p.AddSingleGroupStage(
   780  		node,
   781  		execinfrapb.ProcessorCoreUnion{Noop: &execinfrapb.NoopCoreSpec{}},
   782  		post,
   783  		p.ResultTypes,
   784  	)
   785  	if limitZero {
   786  		if err := p.AddFilter(tree.DBoolFalse, exprCtx, nil); err != nil {
   787  			return err
   788  		}
   789  	}
   790  	return nil
   791  }
   792  
   793  // PopulateEndpoints processes p.Streams and adds the corresponding
   794  // StreamEndpointSpecs to the processors' input and output specs. This should be
   795  // used when the plan is completed and ready to be executed.
   796  func (p *PhysicalPlan) PopulateEndpoints() {
   797  	// Note: instead of using p.Streams, we could fill in the input/output specs
   798  	// directly throughout the planning code, but this makes the rest of the code
   799  	// a bit simpler.
   800  	for sIdx, s := range p.Streams {
   801  		p1 := &p.Processors[s.SourceProcessor]
   802  		p2 := &p.Processors[s.DestProcessor]
   803  		endpoint := execinfrapb.StreamEndpointSpec{StreamID: execinfrapb.StreamID(sIdx)}
   804  		if p1.Node == p2.Node {
   805  			endpoint.Type = execinfrapb.StreamEndpointSpec_LOCAL
   806  		} else {
   807  			endpoint.Type = execinfrapb.StreamEndpointSpec_REMOTE
   808  		}
   809  		p2.Spec.Input[s.DestInput].Streams = append(p2.Spec.Input[s.DestInput].Streams, endpoint)
   810  		if endpoint.Type == execinfrapb.StreamEndpointSpec_REMOTE {
   811  			if !p.remotePlan {
   812  				p.remotePlan = true
   813  			}
   814  			endpoint.TargetNodeID = p2.Node
   815  		}
   816  
   817  		router := &p1.Spec.Output[0]
   818  		// We are about to put this stream on the len(router.Streams) position in
   819  		// the router; verify this matches the sourceRouterSlot. We expect it to
   820  		// because the streams should be in order; if that assumption changes we can
   821  		// reorder them here according to sourceRouterSlot.
   822  		if len(router.Streams) != s.SourceRouterSlot {
   823  			panic(fmt.Sprintf(
   824  				"sourceRouterSlot mismatch: %d, expected %d", len(router.Streams), s.SourceRouterSlot,
   825  			))
   826  		}
   827  		router.Streams = append(router.Streams, endpoint)
   828  	}
   829  }
   830  
   831  // GenerateFlowSpecs takes a plan (with populated endpoints) and generates the
   832  // set of FlowSpecs (one per node involved in the plan).
   833  //
   834  // gateway is the current node's NodeID.
   835  func (p *PhysicalPlan) GenerateFlowSpecs(
   836  	gateway roachpb.NodeID,
   837  ) map[roachpb.NodeID]*execinfrapb.FlowSpec {
   838  	// Only generate a flow ID for a remote plan because it will need to be
   839  	// referenced by remote nodes when connecting streams. This id generation is
   840  	// skipped for performance reasons on local flows.
   841  	flowID := execinfrapb.FlowID{}
   842  	if p.remotePlan {
   843  		flowID.UUID = uuid.MakeV4()
   844  	}
   845  	flows := make(map[roachpb.NodeID]*execinfrapb.FlowSpec, 1)
   846  
   847  	for _, proc := range p.Processors {
   848  		flowSpec, ok := flows[proc.Node]
   849  		if !ok {
   850  			flowSpec = NewFlowSpec(flowID, gateway)
   851  			flows[proc.Node] = flowSpec
   852  		}
   853  		flowSpec.Processors = append(flowSpec.Processors, proc.Spec)
   854  	}
   855  	return flows
   856  }
   857  
   858  // MergePlans merges the processors and streams of two plan into a new plan.
   859  // The result routers for each side are also returned (they point at processors
   860  // in the merged plan).
   861  func MergePlans(
   862  	left, right *PhysicalPlan,
   863  ) (mergedPlan PhysicalPlan, leftRouters []ProcessorIdx, rightRouters []ProcessorIdx) {
   864  	mergedPlan.Processors = append(left.Processors, right.Processors...)
   865  	rightProcStart := ProcessorIdx(len(left.Processors))
   866  
   867  	mergedPlan.Streams = append(left.Streams, right.Streams...)
   868  
   869  	// Update the processor indices in the right streams.
   870  	for i := len(left.Streams); i < len(mergedPlan.Streams); i++ {
   871  		mergedPlan.Streams[i].SourceProcessor += rightProcStart
   872  		mergedPlan.Streams[i].DestProcessor += rightProcStart
   873  	}
   874  
   875  	// Renumber the stages from the right plan.
   876  	for i := rightProcStart; int(i) < len(mergedPlan.Processors); i++ {
   877  		s := &mergedPlan.Processors[i].Spec
   878  		if s.StageID != 0 {
   879  			s.StageID += left.stageCounter
   880  		}
   881  	}
   882  	mergedPlan.stageCounter = left.stageCounter + right.stageCounter
   883  
   884  	mergedPlan.LocalProcessors = append(left.LocalProcessors, right.LocalProcessors...)
   885  	mergedPlan.LocalProcessorIndexes = append(left.LocalProcessorIndexes, right.LocalProcessorIndexes...)
   886  	// Update the local processor indices in the right streams.
   887  	for i := len(left.LocalProcessorIndexes); i < len(mergedPlan.LocalProcessorIndexes); i++ {
   888  		*mergedPlan.LocalProcessorIndexes[i] += uint32(len(left.LocalProcessorIndexes))
   889  	}
   890  
   891  	leftRouters = left.ResultRouters
   892  	rightRouters = append([]ProcessorIdx(nil), right.ResultRouters...)
   893  	// Update the processor indices in the right routers.
   894  	for i := range rightRouters {
   895  		rightRouters[i] += rightProcStart
   896  	}
   897  
   898  	mergedPlan.TotalEstimatedScannedRows = left.TotalEstimatedScannedRows + right.TotalEstimatedScannedRows
   899  	// NB(dt): AFAIK no one looks at the MaxEstimatedRowCount of the overall plan
   900  	// but it is maintained here too just for completeness.
   901  	mergedPlan.MaxEstimatedRowCount = left.MaxEstimatedRowCount
   902  	if right.MaxEstimatedRowCount > left.MaxEstimatedRowCount {
   903  		mergedPlan.MaxEstimatedRowCount = left.MaxEstimatedRowCount
   904  	}
   905  
   906  	return mergedPlan, leftRouters, rightRouters
   907  }
   908  
   909  // MergeResultTypes reconciles the ResultTypes between two plans. It enforces
   910  // that each pair of ColumnTypes must either match or be null, in which case the
   911  // non-null type is used. This logic is necessary for cases like
   912  // SELECT NULL UNION SELECT 1.
   913  func MergeResultTypes(left, right []*types.T) ([]*types.T, error) {
   914  	if len(left) != len(right) {
   915  		return nil, errors.Errorf("ResultTypes length mismatch: %d and %d", len(left), len(right))
   916  	}
   917  	merged := make([]*types.T, len(left))
   918  	for i := range left {
   919  		leftType, rightType := left[i], right[i]
   920  		if rightType.Family() == types.UnknownFamily {
   921  			merged[i] = leftType
   922  		} else if leftType.Family() == types.UnknownFamily {
   923  			merged[i] = rightType
   924  		} else if equivalentTypes(leftType, rightType) {
   925  			merged[i] = leftType
   926  		} else {
   927  			return nil, errors.Errorf(
   928  				"conflicting ColumnTypes: %s and %s", leftType.DebugString(), rightType.DebugString())
   929  		}
   930  	}
   931  	return merged, nil
   932  }
   933  
   934  // equivalentType checks whether a column type is equivalent to another for the
   935  // purpose of UNION. Precision, Width, Oid, etc. do not affect the merging of
   936  // values.
   937  func equivalentTypes(c, other *types.T) bool {
   938  	return c.Equivalent(other)
   939  }
   940  
   941  // AddJoinStage adds join processors at each of the specified nodes, and wires
   942  // the left and right-side outputs to these processors.
   943  func (p *PhysicalPlan) AddJoinStage(
   944  	nodes []roachpb.NodeID,
   945  	core execinfrapb.ProcessorCoreUnion,
   946  	post execinfrapb.PostProcessSpec,
   947  	leftEqCols, rightEqCols []uint32,
   948  	leftTypes, rightTypes []*types.T,
   949  	leftMergeOrd, rightMergeOrd execinfrapb.Ordering,
   950  	leftRouters, rightRouters []ProcessorIdx,
   951  ) {
   952  	pIdxStart := ProcessorIdx(len(p.Processors))
   953  	stageID := p.NewStageID()
   954  
   955  	for _, n := range nodes {
   956  		inputs := make([]execinfrapb.InputSyncSpec, 0, 2)
   957  		inputs = append(inputs, execinfrapb.InputSyncSpec{ColumnTypes: leftTypes})
   958  		inputs = append(inputs, execinfrapb.InputSyncSpec{ColumnTypes: rightTypes})
   959  
   960  		proc := Processor{
   961  			Node: n,
   962  			Spec: execinfrapb.ProcessorSpec{
   963  				Input:   inputs,
   964  				Core:    core,
   965  				Post:    post,
   966  				Output:  []execinfrapb.OutputRouterSpec{{Type: execinfrapb.OutputRouterSpec_PASS_THROUGH}},
   967  				StageID: stageID,
   968  			},
   969  		}
   970  		p.Processors = append(p.Processors, proc)
   971  	}
   972  
   973  	if len(nodes) > 1 {
   974  		// Parallel hash or merge join: we distribute rows (by hash of
   975  		// equality columns) to len(nodes) join processors.
   976  
   977  		// Set up the left routers.
   978  		for _, resultProc := range leftRouters {
   979  			p.Processors[resultProc].Spec.Output[0] = execinfrapb.OutputRouterSpec{
   980  				Type:        execinfrapb.OutputRouterSpec_BY_HASH,
   981  				HashColumns: leftEqCols,
   982  			}
   983  		}
   984  		// Set up the right routers.
   985  		for _, resultProc := range rightRouters {
   986  			p.Processors[resultProc].Spec.Output[0] = execinfrapb.OutputRouterSpec{
   987  				Type:        execinfrapb.OutputRouterSpec_BY_HASH,
   988  				HashColumns: rightEqCols,
   989  			}
   990  		}
   991  	}
   992  	p.ResultRouters = p.ResultRouters[:0]
   993  
   994  	// Connect the left and right routers to the output joiners. Each joiner
   995  	// corresponds to a hash bucket.
   996  	for bucket := 0; bucket < len(nodes); bucket++ {
   997  		pIdx := pIdxStart + ProcessorIdx(bucket)
   998  
   999  		// Connect left routers to the processor's first input. Currently the join
  1000  		// node doesn't care about the orderings of the left and right results.
  1001  		p.MergeResultStreams(leftRouters, bucket, leftMergeOrd, pIdx, 0)
  1002  		// Connect right routers to the processor's second input if it has one.
  1003  		p.MergeResultStreams(rightRouters, bucket, rightMergeOrd, pIdx, 1)
  1004  
  1005  		p.ResultRouters = append(p.ResultRouters, pIdx)
  1006  	}
  1007  }
  1008  
  1009  // AddDistinctSetOpStage creates a distinct stage and a join stage to implement
  1010  // INTERSECT and EXCEPT plans.
  1011  //
  1012  // TODO(abhimadan): If there's a strong key on the left or right side, we
  1013  // can elide the distinct stage on that side.
  1014  func (p *PhysicalPlan) AddDistinctSetOpStage(
  1015  	nodes []roachpb.NodeID,
  1016  	joinCore execinfrapb.ProcessorCoreUnion,
  1017  	distinctCores []execinfrapb.ProcessorCoreUnion,
  1018  	post execinfrapb.PostProcessSpec,
  1019  	eqCols []uint32,
  1020  	leftTypes, rightTypes []*types.T,
  1021  	leftMergeOrd, rightMergeOrd execinfrapb.Ordering,
  1022  	leftRouters, rightRouters []ProcessorIdx,
  1023  ) {
  1024  	const numSides = 2
  1025  	inputResultTypes := [numSides][]*types.T{leftTypes, rightTypes}
  1026  	inputMergeOrderings := [numSides]execinfrapb.Ordering{leftMergeOrd, rightMergeOrd}
  1027  	inputResultRouters := [numSides][]ProcessorIdx{leftRouters, rightRouters}
  1028  
  1029  	// Create distinct stages for the left and right sides, where left and right
  1030  	// sources are sent by hash to the node which will contain the join processor.
  1031  	// The distinct stage must be before the join stage for EXCEPT queries to
  1032  	// produce correct results (e.g., (VALUES (1),(1),(2)) EXCEPT (VALUES (1))
  1033  	// would return (1),(2) instead of (2) if there was no distinct processor
  1034  	// before the EXCEPT ALL join).
  1035  	distinctIdxStart := len(p.Processors)
  1036  	distinctProcs := make(map[roachpb.NodeID][]ProcessorIdx)
  1037  
  1038  	for side, types := range inputResultTypes {
  1039  		distinctStageID := p.NewStageID()
  1040  		for _, n := range nodes {
  1041  			proc := Processor{
  1042  				Node: n,
  1043  				Spec: execinfrapb.ProcessorSpec{
  1044  					Input: []execinfrapb.InputSyncSpec{
  1045  						{ColumnTypes: types},
  1046  					},
  1047  					Core:    distinctCores[side],
  1048  					Post:    execinfrapb.PostProcessSpec{},
  1049  					Output:  []execinfrapb.OutputRouterSpec{{Type: execinfrapb.OutputRouterSpec_PASS_THROUGH}},
  1050  					StageID: distinctStageID,
  1051  				},
  1052  			}
  1053  			pIdx := p.AddProcessor(proc)
  1054  			distinctProcs[n] = append(distinctProcs[n], pIdx)
  1055  		}
  1056  	}
  1057  
  1058  	if len(nodes) > 1 {
  1059  		// Set up the left routers.
  1060  		for _, resultProc := range leftRouters {
  1061  			p.Processors[resultProc].Spec.Output[0] = execinfrapb.OutputRouterSpec{
  1062  				Type:        execinfrapb.OutputRouterSpec_BY_HASH,
  1063  				HashColumns: eqCols,
  1064  			}
  1065  		}
  1066  		// Set up the right routers.
  1067  		for _, resultProc := range rightRouters {
  1068  			p.Processors[resultProc].Spec.Output[0] = execinfrapb.OutputRouterSpec{
  1069  				Type:        execinfrapb.OutputRouterSpec_BY_HASH,
  1070  				HashColumns: eqCols,
  1071  			}
  1072  		}
  1073  	}
  1074  
  1075  	// Connect the left and right streams to the distinct processors.
  1076  	for side, routers := range inputResultRouters {
  1077  		// Get the processor index offset for the current side.
  1078  		sideOffset := side * len(nodes)
  1079  		for bucket := 0; bucket < len(nodes); bucket++ {
  1080  			pIdx := ProcessorIdx(distinctIdxStart + sideOffset + bucket)
  1081  			p.MergeResultStreams(routers, bucket, inputMergeOrderings[side], pIdx, 0)
  1082  		}
  1083  	}
  1084  
  1085  	// Create a join stage, where the distinct processors on the same node are
  1086  	// connected to a join processor.
  1087  	joinStageID := p.NewStageID()
  1088  	p.ResultRouters = p.ResultRouters[:0]
  1089  
  1090  	for _, n := range nodes {
  1091  		proc := Processor{
  1092  			Node: n,
  1093  			Spec: execinfrapb.ProcessorSpec{
  1094  				Input: []execinfrapb.InputSyncSpec{
  1095  					{ColumnTypes: leftTypes},
  1096  					{ColumnTypes: rightTypes},
  1097  				},
  1098  				Core:    joinCore,
  1099  				Post:    post,
  1100  				Output:  []execinfrapb.OutputRouterSpec{{Type: execinfrapb.OutputRouterSpec_PASS_THROUGH}},
  1101  				StageID: joinStageID,
  1102  			},
  1103  		}
  1104  		pIdx := p.AddProcessor(proc)
  1105  
  1106  		for side, distinctProc := range distinctProcs[n] {
  1107  			p.Streams = append(p.Streams, Stream{
  1108  				SourceProcessor:  distinctProc,
  1109  				SourceRouterSlot: 0,
  1110  				DestProcessor:    pIdx,
  1111  				DestInput:        side,
  1112  			})
  1113  		}
  1114  
  1115  		p.ResultRouters = append(p.ResultRouters, pIdx)
  1116  	}
  1117  }
  1118  
  1119  // EnsureSingleStreamPerNode goes over the ResultRouters and merges any group of
  1120  // routers that are on the same node, using a no-op processor.
  1121  //
  1122  // TODO(radu): a no-op processor is not ideal if the next processor is on the
  1123  // same node. A fix for that is much more complicated, requiring remembering
  1124  // extra state in the PhysicalPlan.
  1125  func (p *PhysicalPlan) EnsureSingleStreamPerNode() {
  1126  	// Fast path - check if we need to do anything.
  1127  	var nodes util.FastIntSet
  1128  	var foundDuplicates bool
  1129  	for _, pIdx := range p.ResultRouters {
  1130  		proc := &p.Processors[pIdx]
  1131  		if nodes.Contains(int(proc.Node)) {
  1132  			foundDuplicates = true
  1133  			break
  1134  		}
  1135  		nodes.Add(int(proc.Node))
  1136  	}
  1137  	if !foundDuplicates {
  1138  		return
  1139  	}
  1140  	streams := make([]ProcessorIdx, 0, 2)
  1141  
  1142  	for i := 0; i < len(p.ResultRouters); i++ {
  1143  		pIdx := p.ResultRouters[i]
  1144  		node := p.Processors[p.ResultRouters[i]].Node
  1145  		streams = append(streams[:0], pIdx)
  1146  		// Find all streams on the same node.
  1147  		for j := i + 1; j < len(p.ResultRouters); {
  1148  			if p.Processors[p.ResultRouters[j]].Node == node {
  1149  				streams = append(streams, p.ResultRouters[j])
  1150  				// Remove the stream.
  1151  				copy(p.ResultRouters[j:], p.ResultRouters[j+1:])
  1152  				p.ResultRouters = p.ResultRouters[:len(p.ResultRouters)-1]
  1153  			} else {
  1154  				j++
  1155  			}
  1156  		}
  1157  		if len(streams) == 1 {
  1158  			// Nothing to do for this node.
  1159  			continue
  1160  		}
  1161  
  1162  		// Merge the streams into a no-op processor.
  1163  		proc := Processor{
  1164  			Node: node,
  1165  			Spec: execinfrapb.ProcessorSpec{
  1166  				Input: []execinfrapb.InputSyncSpec{{
  1167  					// The other fields will be filled in by MergeResultStreams.
  1168  					ColumnTypes: p.ResultTypes,
  1169  				}},
  1170  				Core:   execinfrapb.ProcessorCoreUnion{Noop: &execinfrapb.NoopCoreSpec{}},
  1171  				Output: []execinfrapb.OutputRouterSpec{{Type: execinfrapb.OutputRouterSpec_PASS_THROUGH}},
  1172  			},
  1173  		}
  1174  		mergedProcIdx := p.AddProcessor(proc)
  1175  		p.MergeResultStreams(streams, 0 /* sourceRouterSlot */, p.MergeOrdering, mergedProcIdx, 0 /* destInput */)
  1176  		p.ResultRouters[i] = mergedProcIdx
  1177  	}
  1178  }