github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/execinfra/processorsbase.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package execinfra
    12  
    13  import (
    14  	"context"
    15  	"math"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    22  	"github.com/cockroachdb/cockroach/pkg/util"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    25  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    26  	"github.com/cockroachdb/errors"
    27  	opentracing "github.com/opentracing/opentracing-go"
    28  )
    29  
    30  // Processor is a common interface implemented by all processors, used by the
    31  // higher-level flow orchestration code.
    32  type Processor interface {
    33  	// OutputTypes returns the column types of the results (that are to be fed
    34  	// through an output router).
    35  	OutputTypes() []*types.T
    36  
    37  	// Run is the main loop of the processor.
    38  	Run(context.Context)
    39  }
    40  
    41  // ProcOutputHelper is a helper type that performs filtering and projection on
    42  // the output of a processor.
    43  type ProcOutputHelper struct {
    44  	numInternalCols int
    45  	// output can be optionally passed in for use with EmitRow and
    46  	// rowexec.emitHelper.
    47  	// If output is nil, one can invoke ProcessRow to obtain the
    48  	// post-processed row directly.
    49  	output   RowReceiver
    50  	RowAlloc sqlbase.EncDatumRowAlloc
    51  
    52  	filter *ExprHelper
    53  	// renderExprs has length > 0 if we have a rendering. Only one of renderExprs
    54  	// and outputCols can be set.
    55  	renderExprs []ExprHelper
    56  	// outputCols is non-nil if we have a projection. Only one of renderExprs and
    57  	// outputCols can be set. Note that 0-length projections are possible, in
    58  	// which case outputCols will be 0-length but non-nil.
    59  	outputCols []uint32
    60  
    61  	outputRow sqlbase.EncDatumRow
    62  
    63  	// OutputTypes is the schema of the rows produced by the processor after
    64  	// post-processing (i.e. the rows that are pushed through a router).
    65  	//
    66  	// If renderExprs is set, these types correspond to the types of those
    67  	// expressions.
    68  	// If outputCols is set, these types correspond to the types of
    69  	// those columns.
    70  	// If neither is set, this is the internal schema of the processor.
    71  	OutputTypes []*types.T
    72  
    73  	// offset is the number of rows that are suppressed.
    74  	offset uint64
    75  	// maxRowIdx is the number of rows after which we can stop (offset + limit),
    76  	// or MaxUint64 if there is no limit.
    77  	maxRowIdx uint64
    78  
    79  	rowIdx uint64
    80  }
    81  
    82  // Reset resets this ProcOutputHelper, retaining allocated memory in its slices.
    83  func (h *ProcOutputHelper) Reset() {
    84  	*h = ProcOutputHelper{
    85  		renderExprs: h.renderExprs[:0],
    86  		OutputTypes: h.OutputTypes[:0],
    87  	}
    88  }
    89  
    90  // Init sets up a ProcOutputHelper. The types describe the internal schema of
    91  // the processor (as described for each processor core spec); they can be
    92  // omitted if there is no filtering expression.
    93  // Note that the types slice may be stored directly; the caller should not
    94  // modify it.
    95  func (h *ProcOutputHelper) Init(
    96  	post *execinfrapb.PostProcessSpec, typs []*types.T, evalCtx *tree.EvalContext, output RowReceiver,
    97  ) error {
    98  	if !post.Projection && len(post.OutputColumns) > 0 {
    99  		return errors.Errorf("post-processing has projection unset but output columns set: %s", post)
   100  	}
   101  	if post.Projection && len(post.RenderExprs) > 0 {
   102  		return errors.Errorf("post-processing has both projection and rendering: %s", post)
   103  	}
   104  	h.output = output
   105  	h.numInternalCols = len(typs)
   106  	if post.Filter != (execinfrapb.Expression{}) {
   107  		h.filter = &ExprHelper{}
   108  		if err := h.filter.Init(post.Filter, typs, evalCtx); err != nil {
   109  			return err
   110  		}
   111  	}
   112  	if post.Projection {
   113  		for _, col := range post.OutputColumns {
   114  			if int(col) >= h.numInternalCols {
   115  				return errors.Errorf("invalid output column %d (only %d available)", col, h.numInternalCols)
   116  			}
   117  		}
   118  		h.outputCols = post.OutputColumns
   119  		if h.outputCols == nil {
   120  			// nil indicates no projection; use an empty slice.
   121  			h.outputCols = make([]uint32, 0)
   122  		}
   123  		nOutputCols := len(h.outputCols)
   124  		if cap(h.OutputTypes) >= nOutputCols {
   125  			h.OutputTypes = h.OutputTypes[:nOutputCols]
   126  		} else {
   127  			h.OutputTypes = make([]*types.T, nOutputCols)
   128  		}
   129  		for i, c := range h.outputCols {
   130  			h.OutputTypes[i] = typs[c]
   131  		}
   132  	} else if nRenders := len(post.RenderExprs); nRenders > 0 {
   133  		if cap(h.renderExprs) >= nRenders {
   134  			h.renderExprs = h.renderExprs[:nRenders]
   135  		} else {
   136  			h.renderExprs = make([]ExprHelper, nRenders)
   137  		}
   138  		if cap(h.OutputTypes) >= nRenders {
   139  			h.OutputTypes = h.OutputTypes[:nRenders]
   140  		} else {
   141  			h.OutputTypes = make([]*types.T, nRenders)
   142  		}
   143  		for i, expr := range post.RenderExprs {
   144  			h.renderExprs[i] = ExprHelper{}
   145  			if err := h.renderExprs[i].Init(expr, typs, evalCtx); err != nil {
   146  				return err
   147  			}
   148  			h.OutputTypes[i] = h.renderExprs[i].Expr.ResolvedType()
   149  		}
   150  	} else {
   151  		// No rendering or projection.
   152  		if cap(h.OutputTypes) >= len(typs) {
   153  			h.OutputTypes = h.OutputTypes[:len(typs)]
   154  		} else {
   155  			h.OutputTypes = make([]*types.T, len(typs))
   156  		}
   157  		copy(h.OutputTypes, typs)
   158  	}
   159  	if h.outputCols != nil || len(h.renderExprs) > 0 {
   160  		// We're rendering or projecting, so allocate an output row.
   161  		h.outputRow = h.RowAlloc.AllocRow(len(h.OutputTypes))
   162  	}
   163  
   164  	h.offset = post.Offset
   165  	if post.Limit == 0 || post.Limit >= math.MaxUint64-h.offset {
   166  		h.maxRowIdx = math.MaxUint64
   167  	} else {
   168  		h.maxRowIdx = h.offset + post.Limit
   169  	}
   170  
   171  	return nil
   172  }
   173  
   174  // NeededColumns calculates the set of internal processor columns that are
   175  // actually used by the post-processing stage.
   176  func (h *ProcOutputHelper) NeededColumns() (colIdxs util.FastIntSet) {
   177  	if h.outputCols == nil && len(h.renderExprs) == 0 {
   178  		// No projection or rendering; all columns are needed.
   179  		colIdxs.AddRange(0, h.numInternalCols-1)
   180  		return colIdxs
   181  	}
   182  
   183  	// Add all explicit output columns.
   184  	for _, c := range h.outputCols {
   185  		colIdxs.Add(int(c))
   186  	}
   187  
   188  	for i := 0; i < h.numInternalCols; i++ {
   189  		// See if filter requires this column.
   190  		if h.filter != nil && h.filter.Vars.IndexedVarUsed(i) {
   191  			colIdxs.Add(i)
   192  			continue
   193  		}
   194  
   195  		// See if render expressions require this column.
   196  		for j := range h.renderExprs {
   197  			if h.renderExprs[j].Vars.IndexedVarUsed(i) {
   198  				colIdxs.Add(i)
   199  				break
   200  			}
   201  		}
   202  	}
   203  
   204  	return colIdxs
   205  }
   206  
   207  // EmitRow sends a row through the post-processing stage. The same row can be
   208  // reused.
   209  //
   210  // It returns the consumer's status that was observed when pushing this row. If
   211  // an error is returned, it's coming from the ProcOutputHelper's filtering or
   212  // rendering processing; the output has not been closed and it's the caller's
   213  // responsibility to push the error to the output.
   214  //
   215  // Note: check out rowexec.emitHelper() for a useful wrapper.
   216  func (h *ProcOutputHelper) EmitRow(
   217  	ctx context.Context, row sqlbase.EncDatumRow,
   218  ) (ConsumerStatus, error) {
   219  	if h.output == nil {
   220  		panic("output RowReceiver not initialized for emitting rows")
   221  	}
   222  
   223  	outRow, ok, err := h.ProcessRow(ctx, row)
   224  	if err != nil {
   225  		// The status doesn't matter.
   226  		return NeedMoreRows, err
   227  	}
   228  	if outRow == nil {
   229  		if ok {
   230  			return NeedMoreRows, nil
   231  		}
   232  		return DrainRequested, nil
   233  	}
   234  
   235  	if log.V(3) {
   236  		log.InfofDepth(ctx, 1, "pushing row %s", outRow.String(h.OutputTypes))
   237  	}
   238  	if r := h.output.Push(outRow, nil); r != NeedMoreRows {
   239  		log.VEventf(ctx, 1, "no more rows required. drain requested: %t",
   240  			r == DrainRequested)
   241  		return r, nil
   242  	}
   243  	if h.rowIdx == h.maxRowIdx {
   244  		log.VEventf(ctx, 1, "hit row limit; asking producer to drain")
   245  		return DrainRequested, nil
   246  	}
   247  	status := NeedMoreRows
   248  	if !ok {
   249  		status = DrainRequested
   250  	}
   251  	return status, nil
   252  }
   253  
   254  // ProcessRow sends the invoked row through the post-processing stage and returns
   255  // the post-processed row. Results from ProcessRow aren't safe past the next call
   256  // to ProcessRow.
   257  //
   258  // The moreRowsOK retval is true if more rows can be processed, false if the
   259  // limit has been reached (if there's a limit). Upon seeing a false value, the
   260  // caller is expected to start draining. Note that both a row and
   261  // moreRowsOK=false can be returned at the same time: the row that satisfies the
   262  // limit is returned at the same time as a DrainRequested status. In that case,
   263  // the caller is supposed to both deal with the row and start draining.
   264  func (h *ProcOutputHelper) ProcessRow(
   265  	ctx context.Context, row sqlbase.EncDatumRow,
   266  ) (_ sqlbase.EncDatumRow, moreRowsOK bool, _ error) {
   267  	if h.rowIdx >= h.maxRowIdx {
   268  		return nil, false, nil
   269  	}
   270  
   271  	if h.filter != nil {
   272  		// Filtering.
   273  		passes, err := h.filter.EvalFilter(row)
   274  		if err != nil {
   275  			return nil, false, err
   276  		}
   277  		if !passes {
   278  			if log.V(4) {
   279  				log.Infof(ctx, "filtered out row %s", row.String(h.filter.Types))
   280  			}
   281  			return nil, true, nil
   282  		}
   283  	}
   284  	h.rowIdx++
   285  	if h.rowIdx <= h.offset {
   286  		// Suppress row.
   287  		return nil, true, nil
   288  	}
   289  
   290  	if len(h.renderExprs) > 0 {
   291  		// Rendering.
   292  		for i := range h.renderExprs {
   293  			datum, err := h.renderExprs[i].Eval(row)
   294  			if err != nil {
   295  				return nil, false, err
   296  			}
   297  			h.outputRow[i] = sqlbase.DatumToEncDatum(h.OutputTypes[i], datum)
   298  		}
   299  	} else if h.outputCols != nil {
   300  		// Projection.
   301  		for i, col := range h.outputCols {
   302  			h.outputRow[i] = row[col]
   303  		}
   304  	} else {
   305  		// No rendering or projection.
   306  		return row, h.rowIdx < h.maxRowIdx, nil
   307  	}
   308  
   309  	// If this row satisfies the limit, the caller is told to drain.
   310  	return h.outputRow, h.rowIdx < h.maxRowIdx, nil
   311  }
   312  
   313  // Output returns the output of the ProcOutputHelper.
   314  func (h *ProcOutputHelper) Output() RowReceiver {
   315  	return h.output
   316  }
   317  
   318  // Close signals to the output that there will be no more rows.
   319  func (h *ProcOutputHelper) Close() {
   320  	h.output.ProducerDone()
   321  }
   322  
   323  // consumerClosed stops output of additional rows from ProcessRow.
   324  func (h *ProcOutputHelper) consumerClosed() {
   325  	h.rowIdx = h.maxRowIdx
   326  }
   327  
   328  // ProcessorConstructor is a function that creates a Processor. It is
   329  // abstracted away so that we could create mixed flows (i.e. a vectorized flow
   330  // with wrapped processors) without bringing a dependency on sql/rowexec
   331  // package into sql/colexec package.
   332  type ProcessorConstructor func(
   333  	ctx context.Context,
   334  	flowCtx *FlowCtx,
   335  	processorID int32,
   336  	core *execinfrapb.ProcessorCoreUnion,
   337  	post *execinfrapb.PostProcessSpec,
   338  	inputs []RowSource,
   339  	outputs []RowReceiver,
   340  	localProcessors []LocalProcessor,
   341  ) (Processor, error)
   342  
   343  // ProcessorBase is supposed to be embedded by Processors. It provides
   344  // facilities for dealing with filtering and projection (through a
   345  // ProcOutputHelper) and for implementing the RowSource interface (draining,
   346  // trailing metadata).
   347  //
   348  // If a Processor implements the RowSource interface, it's implementation is
   349  // expected to look something like this:
   350  //
   351  //   // concatProcessor concatenates rows from two sources (first returns rows
   352  //   // from the left, then from the right).
   353  //   type concatProcessor struct {
   354  //     ProcessorBase
   355  //     l, r RowSource
   356  //
   357  //     // leftConsumed is set once we've exhausted the left input; once set, we start
   358  //     // consuming the right input.
   359  //     leftConsumed bool
   360  //   }
   361  //
   362  //   func newConcatProcessor(
   363  //     FlowCtx *FlowCtx, l RowSource, r RowSource, post *PostProcessSpec, output RowReceiver,
   364  //   ) (*concatProcessor, error) {
   365  //     p := &concatProcessor{l: l, r: r}
   366  //     if err := p.init(
   367  //       post, l.OutputTypes(), FlowCtx, output,
   368  //       // We pass the inputs to the helper, to be consumed by DrainHelper() later.
   369  //       ProcStateOpts{
   370  //         InputsToDrain: []RowSource{l, r},
   371  //         // If the proc needed to return any metadata at the end other than the
   372  //         // tracing info, or if it needed to cleanup any resources other than those
   373  //         // handled by InternalClose() (say, close some memory account), it'd pass
   374  //         // a TrailingMetaCallback here.
   375  //       },
   376  //     ); err != nil {
   377  //       return nil, err
   378  //     }
   379  //     return p, nil
   380  //   }
   381  //
   382  //   // Start is part of the RowSource interface.
   383  //   func (p *concatProcessor) Start(ctx context.Context) context.Context {
   384  //     p.l.Start(ctx)
   385  //     p.r.Start(ctx)
   386  //     return p.StartInternal(ctx, concatProcName)
   387  //   }
   388  //
   389  //   // Next is part of the RowSource interface.
   390  //   func (p *concatProcessor) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
   391  //     // Loop while we haven't produced a row or a metadata record. We loop around
   392  //     // in several cases, including when the filtering rejected a row coming.
   393  //     for p.State == StateRunning {
   394  //       var row sqlbase.EncDatumRow
   395  //       var meta *ProducerMetadata
   396  //       if !p.leftConsumed {
   397  //         row, meta = p.l.Next()
   398  //       } else {
   399  //         row, meta = p.r.Next()
   400  //       }
   401  //
   402  //       if meta != nil {
   403  //         // If we got an error, we need to forward it along and remember that we're
   404  //         // draining.
   405  //         if meta.Err != nil {
   406  //           p.MoveToDraining(nil /* err */)
   407  //         }
   408  //         return nil, meta
   409  //       }
   410  //       if row == nil {
   411  //         if !p.leftConsumed {
   412  //           p.leftConsumed = true
   413  //         } else {
   414  //           // In this case we know that both inputs are consumed, so we could
   415  //           // transition directly to StateTrailingMeta, but implementations are
   416  //           // encouraged to just use MoveToDraining() for uniformity; DrainHelper()
   417  //           // will transition to StateTrailingMeta() quickly.
   418  //           p.MoveToDraining(nil /* err */)
   419  //           break
   420  //         }
   421  //         continue
   422  //       }
   423  //
   424  //       if outRow := p.ProcessRowHelper(row); outRow != nil {
   425  //         return outRow, nil
   426  //       }
   427  //     }
   428  //     return nil, p.DrainHelper()
   429  //   }
   430  //
   431  //   // ConsumerDone is part of the RowSource interface.
   432  //   func (p *concatProcessor) ConsumerDone() {
   433  //     p.MoveToDraining(nil /* err */)
   434  //   }
   435  //
   436  //   // ConsumerClosed is part of the RowSource interface.
   437  //   func (p *concatProcessor) ConsumerClosed() {
   438  //     // The consumer is done, Next() will not be called again.
   439  //     p.InternalClose()
   440  //   }
   441  //
   442  type ProcessorBase struct {
   443  	self RowSource
   444  
   445  	processorID int32
   446  
   447  	Out     ProcOutputHelper
   448  	FlowCtx *FlowCtx
   449  
   450  	// EvalCtx is used for expression evaluation. It overrides the one in flowCtx.
   451  	EvalCtx *tree.EvalContext
   452  
   453  	// MemMonitor is the processor's memory monitor.
   454  	MemMonitor *mon.BytesMonitor
   455  
   456  	// Closed is set by InternalClose(). Once set, the processor's tracing span
   457  	// has been closed.
   458  	Closed bool
   459  
   460  	// Ctx and span contain the tracing state while the processor is active
   461  	// (i.e. hasn't been closed). Initialized using flowCtx.Ctx (which should not be otherwise
   462  	// used).
   463  	Ctx  context.Context
   464  	span opentracing.Span
   465  	// origCtx is the context from which ctx was derived. InternalClose() resets
   466  	// ctx to this.
   467  	origCtx context.Context
   468  
   469  	State procState
   470  
   471  	// FinishTrace, if set, will be called before getting the trace data from
   472  	// the span and adding the recording to the trailing metadata. Useful for
   473  	// adding any extra information (e.g. stats) that should be captured in a
   474  	// trace.
   475  	FinishTrace func()
   476  
   477  	// trailingMetaCallback, if set, will be called by moveToTrailingMeta(). The
   478  	// callback is expected to close all inputs, do other cleanup on the processor
   479  	// (including calling InternalClose()) and generate the trailing meta that
   480  	// needs to be returned to the consumer. As a special case,
   481  	// moveToTrailingMeta() handles getting the tracing information into
   482  	// trailingMeta, so the callback doesn't need to worry about that.
   483  	//
   484  	// If no callback is specified, InternalClose() will be called automatically.
   485  	// So, if no trailing metadata other than the trace needs to be returned (and
   486  	// other than what has otherwise been manually put in trailingMeta) and no
   487  	// closing other than InternalClose is needed, then no callback needs to be
   488  	// specified.
   489  	trailingMetaCallback func(context.Context) []execinfrapb.ProducerMetadata
   490  	// trailingMeta is scratch space where metadata is stored to be returned
   491  	// later.
   492  	trailingMeta []execinfrapb.ProducerMetadata
   493  
   494  	// inputsToDrain, if not empty, contains inputs to be drained by
   495  	// DrainHelper(). MoveToDraining() calls ConsumerDone() on them,
   496  	// InternalClose() calls ConsumerClosed() on then.
   497  	//
   498  	// ConsumerDone() is called on all inputs at once and then inputs are drained
   499  	// one by one (in StateDraining, inputsToDrain[0] is the one currently being
   500  	// drained).
   501  	inputsToDrain []RowSource
   502  }
   503  
   504  // Reset resets this ProcessorBase, retaining allocated memory in slices.
   505  func (pb *ProcessorBase) Reset() {
   506  	pb.Out.Reset()
   507  	*pb = ProcessorBase{
   508  		Out:           pb.Out,
   509  		trailingMeta:  pb.trailingMeta[:0],
   510  		inputsToDrain: pb.inputsToDrain[:0],
   511  	}
   512  }
   513  
   514  // procState represents the standard states that a processor can be in. These
   515  // states are relevant when the processor is using the draining utilities in
   516  // ProcessorBase.
   517  type procState int
   518  
   519  //go:generate stringer -type=procState
   520  const (
   521  	// StateRunning is the common state of a processor: it's producing rows for
   522  	// its consumer and forwarding metadata from its input. Different processors
   523  	// might have sub-states internally.
   524  	//
   525  	// If the consumer calls ConsumerDone or if the ProcOutputHelper.maxRowIdx is
   526  	// reached, then the processor will transition to StateDraining. If the input
   527  	// is exhausted, then the processor can transition to StateTrailingMeta
   528  	// directly, although most always go through StateDraining.
   529  	StateRunning procState = iota
   530  
   531  	// StateDraining is the state in which the processor is forwarding metadata
   532  	// from its input and otherwise ignoring all rows. Once the input is
   533  	// exhausted, the processor will transition to StateTrailingMeta.
   534  	//
   535  	// In StateDraining, processors are required to swallow
   536  	// ReadWithinUncertaintyIntervalErrors received from its sources. We're
   537  	// already draining, so we don't care about whatever data generated this
   538  	// uncertainty error. Besides generally seeming like a good idea, doing this
   539  	// allows us to offer a nice guarantee to SQL clients: a read-only query that
   540  	// produces at most one row, run as an implicit txn, never produces retriable
   541  	// errors, regardless of the size of the row being returned (in relation to
   542  	// the size of the result buffer on the connection). One would naively expect
   543  	// that to be true: either the error happens before any rows have been
   544  	// delivered to the client, in which case the auto-retries kick in, or, if a
   545  	// row has been delivered, then the query is done and so how can there be an
   546  	// error? What our naive friend is ignoring is that, if it weren't for this
   547  	// code, it'd be possible for a retriable error to sneak in after the query's
   548  	// limit has been satisfied but while processors are still draining. Note
   549  	// that uncertainty errors are not retried automatically by the leaf
   550  	// TxnCoordSenders (i.e. by refresh txn interceptor).
   551  	//
   552  	// Other categories of errors might be safe to ignore too; however we
   553  	// can't ignore all of them. Generally, we need to ensure that all the
   554  	// trailing metadata (e.g. LeafTxnFinalState's) make it to the gateway for
   555  	// successful flows. If an error is telling us that some metadata might
   556  	// have been dropped, we can't ignore that.
   557  	StateDraining
   558  
   559  	// StateTrailingMeta is the state in which the processor is outputting final
   560  	// metadata such as the tracing information or the LeafTxnFinalState. Once all the
   561  	// trailing metadata has been produced, the processor transitions to
   562  	// StateExhausted.
   563  	StateTrailingMeta
   564  
   565  	// StateExhausted is the state of a processor that has no more rows or
   566  	// metadata to produce.
   567  	StateExhausted
   568  )
   569  
   570  // MoveToDraining switches the processor to the StateDraining. Only metadata is
   571  // returned from now on. In this state, the processor is expected to drain its
   572  // inputs (commonly by using DrainHelper()).
   573  //
   574  // If the processor has no input (ProcStateOpts.intputToDrain was not specified
   575  // at init() time), then we move straight to the StateTrailingMeta.
   576  //
   577  // An error can be optionally passed. It will be the first piece of metadata
   578  // returned by DrainHelper().
   579  func (pb *ProcessorBase) MoveToDraining(err error) {
   580  	if pb.State != StateRunning {
   581  		// Calling MoveToDraining in any state is allowed in order to facilitate the
   582  		// ConsumerDone() implementations that just call this unconditionally.
   583  		// However, calling it with an error in states other than StateRunning is
   584  		// not permitted.
   585  		if err != nil {
   586  			log.Fatalf(pb.Ctx, "MoveToDraining called in state %s with err: %s",
   587  				pb.State, err)
   588  		}
   589  		return
   590  	}
   591  
   592  	if err != nil {
   593  		pb.trailingMeta = append(pb.trailingMeta, execinfrapb.ProducerMetadata{Err: err})
   594  	}
   595  	if len(pb.inputsToDrain) > 0 {
   596  		// We go to StateDraining here. DrainHelper() will transition to
   597  		// StateTrailingMeta when the inputs are drained (including if the inputs
   598  		// are already drained).
   599  		pb.State = StateDraining
   600  		for _, input := range pb.inputsToDrain {
   601  			input.ConsumerDone()
   602  		}
   603  	} else {
   604  		pb.moveToTrailingMeta()
   605  	}
   606  }
   607  
   608  // DrainHelper is supposed to be used in states draining and trailingMetadata.
   609  // It deals with optionally draining an input and returning trailing meta. It
   610  // also moves from StateDraining to StateTrailingMeta when appropriate.
   611  func (pb *ProcessorBase) DrainHelper() *execinfrapb.ProducerMetadata {
   612  	if pb.State == StateRunning {
   613  		log.Fatal(pb.Ctx, "drain helper called in StateRunning")
   614  	}
   615  
   616  	// trailingMeta always has priority; it seems like a good idea because it
   617  	// causes metadata to be sent quickly after it is produced (e.g. the error
   618  	// passed to MoveToDraining()).
   619  	if len(pb.trailingMeta) > 0 {
   620  		return pb.popTrailingMeta()
   621  	}
   622  
   623  	if pb.State != StateDraining {
   624  		return nil
   625  	}
   626  
   627  	// Ignore all rows; only return meta.
   628  	for {
   629  		input := pb.inputsToDrain[0]
   630  
   631  		row, meta := input.Next()
   632  		if row == nil && meta == nil {
   633  			pb.inputsToDrain = pb.inputsToDrain[1:]
   634  			if len(pb.inputsToDrain) == 0 {
   635  				pb.moveToTrailingMeta()
   636  				return pb.popTrailingMeta()
   637  			}
   638  			continue
   639  		}
   640  		if meta != nil {
   641  			// Swallow ReadWithinUncertaintyIntervalErrors. See comments on
   642  			// StateDraining.
   643  			if err := meta.Err; err != nil {
   644  				// We only look for UnhandledRetryableErrors. Local reads (which would
   645  				// be transformed by the Root TxnCoordSender into
   646  				// TransactionRetryWithProtoRefreshErrors) don't have any uncertainty.
   647  				if ure := (*roachpb.UnhandledRetryableError)(nil); errors.As(err, &ure) {
   648  					uncertain := ure.PErr.Detail.GetReadWithinUncertaintyInterval()
   649  					if uncertain != nil {
   650  						continue
   651  					}
   652  				}
   653  			}
   654  			return meta
   655  		}
   656  	}
   657  }
   658  
   659  // popTrailingMeta peels off one piece of trailing metadata or advances to
   660  // StateExhausted if there's no more trailing metadata.
   661  func (pb *ProcessorBase) popTrailingMeta() *execinfrapb.ProducerMetadata {
   662  	if len(pb.trailingMeta) > 0 {
   663  		meta := &pb.trailingMeta[0]
   664  		pb.trailingMeta = pb.trailingMeta[1:]
   665  		return meta
   666  	}
   667  	pb.State = StateExhausted
   668  	return nil
   669  }
   670  
   671  // moveToTrailingMeta switches the processor to the "trailing meta" state: only
   672  // trailing metadata is returned from now on. For simplicity, processors are
   673  // encouraged to always use MoveToDraining() instead of this method, even when
   674  // there's nothing to drain. moveToDrain() or DrainHelper() will internally call
   675  // moveToTrailingMeta().
   676  //
   677  // trailingMetaCallback, if any, is called; it is expected to close the
   678  // processor's inputs.
   679  //
   680  // This method is to be called when the processor is done producing rows and
   681  // draining its inputs (if it wants to drain them).
   682  func (pb *ProcessorBase) moveToTrailingMeta() {
   683  	if pb.State == StateTrailingMeta || pb.State == StateExhausted {
   684  		log.Fatalf(pb.Ctx, "moveToTrailingMeta called in state: %s", pb.State)
   685  	}
   686  
   687  	if pb.FinishTrace != nil {
   688  		pb.FinishTrace()
   689  	}
   690  
   691  	pb.State = StateTrailingMeta
   692  	if pb.span != nil {
   693  		if trace := GetTraceData(pb.Ctx); trace != nil {
   694  			pb.trailingMeta = append(pb.trailingMeta, execinfrapb.ProducerMetadata{TraceData: trace})
   695  		}
   696  	}
   697  	// trailingMetaCallback is called after reading the tracing data because it
   698  	// generally calls InternalClose, indirectly, which switches the context and
   699  	// the span.
   700  	if pb.trailingMetaCallback != nil {
   701  		pb.trailingMeta = append(pb.trailingMeta, pb.trailingMetaCallback(pb.Ctx)...)
   702  	} else {
   703  		pb.InternalClose()
   704  	}
   705  }
   706  
   707  // ProcessRowHelper is a wrapper on top of ProcOutputHelper.ProcessRow(). It
   708  // takes care of handling errors and drain requests by moving the processor to
   709  // StateDraining.
   710  //
   711  // It takes a row and returns the row after processing. The return value can be
   712  // nil, in which case the caller shouldn't return anything to its consumer; it
   713  // should continue processing other rows, with the awareness that the processor
   714  // might have been transitioned to the draining phase.
   715  func (pb *ProcessorBase) ProcessRowHelper(row sqlbase.EncDatumRow) sqlbase.EncDatumRow {
   716  	outRow, ok, err := pb.Out.ProcessRow(pb.Ctx, row)
   717  	if err != nil {
   718  		pb.MoveToDraining(err)
   719  		return nil
   720  	}
   721  	if !ok {
   722  		pb.MoveToDraining(nil /* err */)
   723  	}
   724  	// Note that outRow might be nil here.
   725  	// TODO(yuzefovich): there is a problem with this logging when MetadataTest*
   726  	// processors are planned - there is a mismatch between the row and the
   727  	// output types (rendering is added to the stage of test processors and the
   728  	// actual processors that are inputs to the test ones have an unset post
   729  	// processing; I think that we need to set the post processing on the stages
   730  	// of processors below the test ones).
   731  	//if outRow != nil && log.V(3) && pb.Ctx != nil {
   732  	//	log.InfofDepth(pb.Ctx, 1, "pushing row %s", outRow.String(pb.Out.OutputTypes))
   733  	//}
   734  	return outRow
   735  }
   736  
   737  // OutputTypes is part of the processor interface.
   738  func (pb *ProcessorBase) OutputTypes() []*types.T {
   739  	return pb.Out.OutputTypes
   740  }
   741  
   742  // Run is part of the processor interface.
   743  func (pb *ProcessorBase) Run(ctx context.Context) {
   744  	if pb.Out.output == nil {
   745  		panic("processor output not initialized for emitting rows")
   746  	}
   747  	ctx = pb.self.Start(ctx)
   748  	Run(ctx, pb.self, pb.Out.output)
   749  }
   750  
   751  // ProcStateOpts contains fields used by the ProcessorBase's family of functions
   752  // that deal with draining and trailing metadata: the ProcessorBase implements
   753  // generic useful functionality that needs to call back into the Processor.
   754  type ProcStateOpts struct {
   755  	// TrailingMetaCallback, if specified, is a callback to be called by
   756  	// moveToTrailingMeta(). See ProcessorBase.TrailingMetaCallback.
   757  	TrailingMetaCallback func(context.Context) []execinfrapb.ProducerMetadata
   758  	// InputsToDrain, if specified, will be drained by DrainHelper().
   759  	// MoveToDraining() calls ConsumerDone() on them, InternalClose() calls
   760  	// ConsumerClosed() on them.
   761  	InputsToDrain []RowSource
   762  }
   763  
   764  // Init initializes the ProcessorBase.
   765  func (pb *ProcessorBase) Init(
   766  	self RowSource,
   767  	post *execinfrapb.PostProcessSpec,
   768  	types []*types.T,
   769  	flowCtx *FlowCtx,
   770  	processorID int32,
   771  	output RowReceiver,
   772  	memMonitor *mon.BytesMonitor,
   773  	opts ProcStateOpts,
   774  ) error {
   775  	return pb.InitWithEvalCtx(
   776  		self, post, types, flowCtx, flowCtx.NewEvalCtx(), processorID, output, memMonitor, opts,
   777  	)
   778  }
   779  
   780  // InitWithEvalCtx initializes the ProcessorBase with a given EvalContext.
   781  func (pb *ProcessorBase) InitWithEvalCtx(
   782  	self RowSource,
   783  	post *execinfrapb.PostProcessSpec,
   784  	types []*types.T,
   785  	flowCtx *FlowCtx,
   786  	evalCtx *tree.EvalContext,
   787  	processorID int32,
   788  	output RowReceiver,
   789  	memMonitor *mon.BytesMonitor,
   790  	opts ProcStateOpts,
   791  ) error {
   792  	pb.self = self
   793  	pb.FlowCtx = flowCtx
   794  	pb.EvalCtx = evalCtx
   795  	pb.processorID = processorID
   796  	pb.MemMonitor = memMonitor
   797  	pb.trailingMetaCallback = opts.TrailingMetaCallback
   798  	pb.inputsToDrain = opts.InputsToDrain
   799  
   800  	// Hydrate all types used in the processor.
   801  	if err := execinfrapb.HydrateTypeSlice(evalCtx, types); err != nil {
   802  		return err
   803  	}
   804  
   805  	return pb.Out.Init(post, types, pb.EvalCtx, output)
   806  }
   807  
   808  // AddInputToDrain adds an input to drain when moving the processor to a
   809  // draining state.
   810  func (pb *ProcessorBase) AddInputToDrain(input RowSource) {
   811  	pb.inputsToDrain = append(pb.inputsToDrain, input)
   812  }
   813  
   814  // AppendTrailingMeta appends metadata to the trailing metadata without changing
   815  // the state to draining (as opposed to MoveToDraining).
   816  func (pb *ProcessorBase) AppendTrailingMeta(meta execinfrapb.ProducerMetadata) {
   817  	pb.trailingMeta = append(pb.trailingMeta, meta)
   818  }
   819  
   820  // ProcessorSpan creates a child span for a processor (if we are doing any
   821  // tracing). The returned span needs to be finished using tracing.FinishSpan.
   822  func ProcessorSpan(ctx context.Context, name string) (context.Context, opentracing.Span) {
   823  	return tracing.ChildSpanSeparateRecording(ctx, name)
   824  }
   825  
   826  // StartInternal prepares the ProcessorBase for execution. It returns the
   827  // annotated context that's also stored in pb.Ctx.
   828  func (pb *ProcessorBase) StartInternal(ctx context.Context, name string) context.Context {
   829  	pb.origCtx = ctx
   830  	pb.Ctx, pb.span = ProcessorSpan(ctx, name)
   831  	if pb.span != nil && tracing.IsRecording(pb.span) {
   832  		pb.span.SetTag(execinfrapb.FlowIDTagKey, pb.FlowCtx.ID.String())
   833  		pb.span.SetTag(execinfrapb.ProcessorIDTagKey, pb.processorID)
   834  	}
   835  	pb.EvalCtx.Context = pb.Ctx
   836  	return pb.Ctx
   837  }
   838  
   839  // InternalClose helps processors implement the RowSource interface, performing
   840  // common close functionality. Returns true iff the processor was not already
   841  // closed.
   842  //
   843  // Notably, it calls ConsumerClosed() on all the inputsToDrain.
   844  //
   845  //   if pb.InternalClose() {
   846  //     // Perform processor specific close work.
   847  //   }
   848  func (pb *ProcessorBase) InternalClose() bool {
   849  	closing := !pb.Closed
   850  	// Protection around double closing is useful for allowing ConsumerClosed() to
   851  	// be called on processors that have already closed themselves by moving to
   852  	// StateTrailingMeta.
   853  	if closing {
   854  		for _, input := range pb.inputsToDrain {
   855  			input.ConsumerClosed()
   856  		}
   857  
   858  		pb.Closed = true
   859  		tracing.FinishSpan(pb.span)
   860  		pb.span = nil
   861  		// Reset the context so that any incidental uses after this point do not
   862  		// access the finished span.
   863  		pb.Ctx = pb.origCtx
   864  
   865  		// This prevents Next() from returning more rows.
   866  		pb.Out.consumerClosed()
   867  	}
   868  	return closing
   869  }
   870  
   871  // ConsumerDone is part of the RowSource interface.
   872  func (pb *ProcessorBase) ConsumerDone() {
   873  	pb.MoveToDraining(nil /* err */)
   874  }
   875  
   876  // NewMonitor is a utility function used by processors to create a new
   877  // memory monitor with the given name and start it. The returned monitor must
   878  // be closed.
   879  func NewMonitor(ctx context.Context, parent *mon.BytesMonitor, name string) *mon.BytesMonitor {
   880  	monitor := mon.MakeMonitorInheritWithLimit(name, 0 /* limit */, parent)
   881  	monitor.Start(ctx, parent, mon.BoundAccount{})
   882  	return &monitor
   883  }
   884  
   885  // NewLimitedMonitor is a utility function used by processors to create a new
   886  // limited memory monitor with the given name and start it. The returned
   887  // monitor must be closed. The limit is determined by SettingWorkMemBytes but
   888  // overridden to 1 if config.TestingKnobs.ForceDiskSpill is set or
   889  // config.TestingKnobs.MemoryLimitBytes if not.
   890  func NewLimitedMonitor(
   891  	ctx context.Context, parent *mon.BytesMonitor, config *ServerConfig, name string,
   892  ) *mon.BytesMonitor {
   893  	limit := GetWorkMemLimit(config)
   894  	if config.TestingKnobs.ForceDiskSpill {
   895  		limit = 1
   896  	}
   897  	limitedMon := mon.MakeMonitorInheritWithLimit(name, limit, parent)
   898  	limitedMon.Start(ctx, parent, mon.BoundAccount{})
   899  	return &limitedMon
   900  }
   901  
   902  // LocalProcessor is a RowSourcedProcessor that needs to be initialized with
   903  // its post processing spec and output row receiver. Most processors can accept
   904  // these objects at creation time.
   905  type LocalProcessor interface {
   906  	RowSourcedProcessor
   907  	// InitWithOutput initializes this processor.
   908  	InitWithOutput(post *execinfrapb.PostProcessSpec, output RowReceiver) error
   909  	// SetInput initializes this LocalProcessor with an input RowSource. Not all
   910  	// LocalProcessors need inputs, but this needs to be called if a
   911  	// LocalProcessor expects to get its data from another RowSource.
   912  	SetInput(ctx context.Context, input RowSource) error
   913  }