github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/execinfra/base.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package execinfra
    12  
    13  import (
    14  	"context"
    15  	"sync"
    16  	"sync/atomic"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/kv"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    25  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    26  	"github.com/cockroachdb/errors"
    27  	"github.com/opentracing/opentracing-go"
    28  )
    29  
    30  // RowChannelBufSize is the default buffer size of a RowChannel.
    31  const RowChannelBufSize = 16
    32  
    33  // ConsumerStatus is the type returned by RowReceiver.Push(), informing a
    34  // producer of a consumer's state.
    35  type ConsumerStatus uint32
    36  
    37  //go:generate stringer -type=ConsumerStatus
    38  
    39  const (
    40  	// NeedMoreRows indicates that the consumer is still expecting more rows.
    41  	NeedMoreRows ConsumerStatus = iota
    42  	// DrainRequested indicates that the consumer will not process any more data
    43  	// rows, but will accept trailing metadata from the producer.
    44  	DrainRequested
    45  	// ConsumerClosed indicates that the consumer will not process any more data
    46  	// rows or metadata. This is also commonly returned in case the consumer has
    47  	// encountered an error.
    48  	ConsumerClosed
    49  )
    50  
    51  // RowReceiver is any component of a flow that receives rows from another
    52  // component. It can be an input synchronizer, a router, or a mailbox.
    53  type RowReceiver interface {
    54  	// Push sends a record to the consumer of this RowReceiver. Exactly one of the
    55  	// row/meta must be specified (i.e. either row needs to be non-nil or meta
    56  	// needs to be non-Empty()). May block.
    57  	//
    58  	// The return value indicates the current status of the consumer. Depending on
    59  	// it, producers are expected to drain or shut down. In all cases,
    60  	// ProducerDone() needs to be called (after draining is done, if draining was
    61  	// requested).
    62  	//
    63  	// Unless specifically permitted by the underlying implementation, (see
    64  	// copyingRowReceiver, for example), the sender must not modify the row
    65  	// and the metadata after calling this function.
    66  	//
    67  	// After DrainRequested is returned, it is expected that all future calls only
    68  	// carry metadata (however that is not enforced and implementations should be
    69  	// prepared to discard non-metadata rows). If ConsumerClosed is returned,
    70  	// implementations have to ignore further calls to Push() (such calls are
    71  	// allowed because there might be multiple producers for a single RowReceiver
    72  	// and they might not all be aware of the last status returned).
    73  	//
    74  	// Implementations of Push() must be thread-safe.
    75  	Push(row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata) ConsumerStatus
    76  
    77  	// Types returns the types of the EncDatumRow that this RowReceiver expects
    78  	// to be pushed.
    79  	Types() []*types.T
    80  
    81  	// ProducerDone is called when the producer has pushed all the rows and
    82  	// metadata; it causes the RowReceiver to process all rows and clean up.
    83  	//
    84  	// ProducerDone() cannot be called concurrently with Push(), and after it
    85  	// is called, no other method can be called.
    86  	ProducerDone()
    87  }
    88  
    89  // RowSource is any component of a flow that produces rows that can be consumed
    90  // by another component.
    91  //
    92  // Communication components generally (e.g. RowBuffer, RowChannel) implement
    93  // this interface. Some processors also implement it (in addition to
    94  // implementing the Processor interface) - in which case those
    95  // processors can be "fused" with their consumer (i.e. run in the consumer's
    96  // goroutine).
    97  type RowSource interface {
    98  	// OutputTypes returns the schema for the rows in this source.
    99  	OutputTypes() []*types.T
   100  
   101  	// Start prepares the RowSource for future Next() calls and takes in the
   102  	// context in which these future calls should operate. Start needs to be
   103  	// called before Next/ConsumerDone/ConsumerClosed.
   104  	//
   105  	// RowSources that consume other RowSources are expected to Start() their
   106  	// inputs.
   107  	//
   108  	// Implementations are expected to hold on to the provided context. They may
   109  	// choose to derive and annotate it (Processors generally do). For convenience,
   110  	// the possibly updated context is returned.
   111  	Start(context.Context) context.Context
   112  
   113  	// Next returns the next record from the source. At most one of the return
   114  	// values will be non-empty. Both of them can be empty when the RowSource has
   115  	// been exhausted - no more records are coming and any further method calls
   116  	// will be no-ops.
   117  	//
   118  	// EncDatumRows returned by Next() are only valid until the next call to
   119  	// Next(), although the EncDatums inside them stay valid forever.
   120  	//
   121  	// A ProducerMetadata record may contain an error. In that case, this
   122  	// interface is oblivious about the semantics: implementers may continue
   123  	// returning different rows on future calls, or may return an empty record
   124  	// (thus asking the consumer to stop asking for rows). In particular,
   125  	// implementers are not required to only return metadata records from this
   126  	// point on (which means, for example, that they're not required to
   127  	// automatically ask every producer to drain, in case there's multiple
   128  	// producers). Therefore, consumers need to be aware that some rows might have
   129  	// been skipped in case they continue to consume rows. Usually a consumer
   130  	// should react to an error by calling ConsumerDone(), thus asking the
   131  	// RowSource to drain, and separately discard any future data rows. A consumer
   132  	// receiving an error should also call ConsumerDone() on any other input it
   133  	// has.
   134  	Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata)
   135  
   136  	// ConsumerDone lets the source know that we will not need any more data
   137  	// rows. The source is expected to start draining and only send metadata
   138  	// rows. May be called multiple times on a RowSource, even after
   139  	// ConsumerClosed has been called.
   140  	//
   141  	// May block. If the consumer of the source stops consuming rows before
   142  	// Next indicates that there are no more rows, ConsumerDone() and/or
   143  	// ConsumerClosed() must be called; it is a no-op to call these methods after
   144  	// all the rows were consumed (i.e. after Next() returned an empty row).
   145  	ConsumerDone()
   146  
   147  	// ConsumerClosed informs the source that the consumer is done and will not
   148  	// make any more calls to Next(). Must only be called once on a given
   149  	// RowSource.
   150  	//
   151  	// Like ConsumerDone(), if the consumer of the source stops consuming rows
   152  	// before Next indicates that there are no more rows, ConsumerDone() and/or
   153  	// ConsumerClosed() must be called; it is a no-op to call these methods after
   154  	// all the rows were consumed (i.e. after Next() returned an empty row).
   155  	ConsumerClosed()
   156  }
   157  
   158  // RowSourcedProcessor is the union of RowSource and Processor.
   159  type RowSourcedProcessor interface {
   160  	RowSource
   161  	Run(context.Context)
   162  }
   163  
   164  // Run reads records from the source and outputs them to the receiver, properly
   165  // draining the source of metadata and closing both the source and receiver.
   166  //
   167  // src needs to have been Start()ed before calling this.
   168  func Run(ctx context.Context, src RowSource, dst RowReceiver) {
   169  	for {
   170  		row, meta := src.Next()
   171  		// Emit the row; stop if no more rows are needed.
   172  		if row != nil || meta != nil {
   173  			switch dst.Push(row, meta) {
   174  			case NeedMoreRows:
   175  				continue
   176  			case DrainRequested:
   177  				DrainAndForwardMetadata(ctx, src, dst)
   178  				dst.ProducerDone()
   179  				return
   180  			case ConsumerClosed:
   181  				src.ConsumerClosed()
   182  				dst.ProducerDone()
   183  				return
   184  			}
   185  		}
   186  		// row == nil && meta == nil: the source has been fully drained.
   187  		dst.ProducerDone()
   188  		return
   189  	}
   190  }
   191  
   192  // Releasable is an interface for objects than can be Released back into a
   193  // memory pool when finished.
   194  type Releasable interface {
   195  	// Release allows this object to be returned to a memory pool. Objects must
   196  	// not be used after Release is called.
   197  	Release()
   198  }
   199  
   200  // DrainAndForwardMetadata calls src.ConsumerDone() (thus asking src for
   201  // draining metadata) and then forwards all the metadata to dst.
   202  //
   203  // When this returns, src has been properly closed (regardless of the presence
   204  // or absence of an error). dst, however, has not been closed; someone else must
   205  // call dst.ProducerDone() when all producers have finished draining.
   206  //
   207  // It is OK to call DrainAndForwardMetadata() multiple times concurrently on the
   208  // same dst (as RowReceiver.Push() is guaranteed to be thread safe).
   209  func DrainAndForwardMetadata(ctx context.Context, src RowSource, dst RowReceiver) {
   210  	src.ConsumerDone()
   211  	for {
   212  		row, meta := src.Next()
   213  		if meta == nil {
   214  			if row == nil {
   215  				return
   216  			}
   217  			continue
   218  		}
   219  		if row != nil {
   220  			log.Fatalf(
   221  				ctx, "both row data and metadata in the same record. row: %s meta: %+v",
   222  				row.String(src.OutputTypes()), meta,
   223  			)
   224  		}
   225  
   226  		switch dst.Push(nil /* row */, meta) {
   227  		case ConsumerClosed:
   228  			src.ConsumerClosed()
   229  			return
   230  		case NeedMoreRows:
   231  		case DrainRequested:
   232  		}
   233  	}
   234  }
   235  
   236  // GetTraceData returns the trace data.
   237  func GetTraceData(ctx context.Context) []tracing.RecordedSpan {
   238  	if sp := opentracing.SpanFromContext(ctx); sp != nil {
   239  		return tracing.GetRecording(sp)
   240  	}
   241  	return nil
   242  }
   243  
   244  // SendTraceData collects the tracing information from the ctx and pushes it to
   245  // dst. The ConsumerStatus returned by dst is ignored.
   246  //
   247  // Note that the tracing data is distinct between different processors, since
   248  // each one gets its own trace "recording group".
   249  func SendTraceData(ctx context.Context, dst RowReceiver) {
   250  	if rec := GetTraceData(ctx); rec != nil {
   251  		dst.Push(nil /* row */, &execinfrapb.ProducerMetadata{TraceData: rec})
   252  	}
   253  }
   254  
   255  // GetLeafTxnFinalState returns the txn metadata from a transaction if
   256  // it is present and the transaction is a leaf transaction. It returns
   257  // nil when called on a Root. This is done as a convenience allowing
   258  // DistSQL processors to be oblivious about whether they're running in
   259  // a Leaf or a Root.
   260  //
   261  // NOTE(andrei): As of 04/2018, the txn is shared by all processors scheduled on
   262  // a node, and so it's possible for multiple processors to send the same
   263  // LeafTxnFinalState. The root TxnCoordSender doesn't care if it receives the same
   264  // thing multiple times.
   265  func GetLeafTxnFinalState(ctx context.Context, txn *kv.Txn) *roachpb.LeafTxnFinalState {
   266  	if txn.Type() != kv.LeafTxn {
   267  		return nil
   268  	}
   269  	txnMeta, err := txn.GetLeafTxnFinalState(ctx)
   270  	if err != nil {
   271  		// TODO(knz): plumb errors through the callers.
   272  		panic(errors.Wrap(err, "in execinfra.GetLeafTxnFinalState"))
   273  	}
   274  
   275  	if txnMeta.Txn.ID == uuid.Nil {
   276  		return nil
   277  	}
   278  	return &txnMeta
   279  }
   280  
   281  // DrainAndClose is a version of DrainAndForwardMetadata that drains multiple
   282  // sources. These sources are assumed to be the only producers left for dst, so
   283  // dst is closed once they're all exhausted (this is different from
   284  // DrainAndForwardMetadata).
   285  //
   286  // If cause is specified, it is forwarded to the consumer before all the drain
   287  // metadata. This is intended to have been the error, if any, that caused the
   288  // draining.
   289  //
   290  // pushTrailingMeta is called after draining the sources and before calling
   291  // dst.ProducerDone(). It gives the caller the opportunity to push some trailing
   292  // metadata (e.g. tracing information and txn updates, if applicable).
   293  //
   294  // srcs can be nil.
   295  //
   296  // All errors are forwarded to the producer.
   297  func DrainAndClose(
   298  	ctx context.Context,
   299  	dst RowReceiver,
   300  	cause error,
   301  	pushTrailingMeta func(context.Context),
   302  	srcs ...RowSource,
   303  ) {
   304  	if cause != nil {
   305  		// We ignore the returned ConsumerStatus and rely on the
   306  		// DrainAndForwardMetadata() calls below to close srcs in all cases.
   307  		_ = dst.Push(nil /* row */, &execinfrapb.ProducerMetadata{Err: cause})
   308  	}
   309  	if len(srcs) > 0 {
   310  		var wg sync.WaitGroup
   311  		for _, input := range srcs[1:] {
   312  			wg.Add(1)
   313  			go func(input RowSource) {
   314  				DrainAndForwardMetadata(ctx, input, dst)
   315  				wg.Done()
   316  			}(input)
   317  		}
   318  		DrainAndForwardMetadata(ctx, srcs[0], dst)
   319  		wg.Wait()
   320  	}
   321  	pushTrailingMeta(ctx)
   322  	dst.ProducerDone()
   323  }
   324  
   325  // NoMetadataRowSource is a wrapper on top of a RowSource that automatically
   326  // forwards metadata to a RowReceiver. Data rows are returned through an
   327  // interface similar to RowSource, except that, since metadata is taken care of,
   328  // only the data rows are returned.
   329  //
   330  // The point of this struct is that it'd be burdensome for some row consumers to
   331  // have to deal with metadata.
   332  type NoMetadataRowSource struct {
   333  	src          RowSource
   334  	metadataSink RowReceiver
   335  }
   336  
   337  // MakeNoMetadataRowSource builds a NoMetadataRowSource.
   338  func MakeNoMetadataRowSource(src RowSource, sink RowReceiver) NoMetadataRowSource {
   339  	return NoMetadataRowSource{src: src, metadataSink: sink}
   340  }
   341  
   342  // NextRow is analogous to RowSource.Next. If the producer sends an error, we
   343  // can't just forward it to metadataSink. We need to let the consumer know so
   344  // that it's not under the impression that everything is hunky-dory and it can
   345  // continue consuming rows. So, this interface returns the error. Just like with
   346  // a raw RowSource, the consumer should generally call ConsumerDone() and drain.
   347  func (rs *NoMetadataRowSource) NextRow() (sqlbase.EncDatumRow, error) {
   348  	for {
   349  		row, meta := rs.src.Next()
   350  		if meta == nil {
   351  			return row, nil
   352  		}
   353  		if meta.Err != nil {
   354  			return nil, meta.Err
   355  		}
   356  		// We forward the metadata and ignore the returned ConsumerStatus. There's
   357  		// no good way to use that status here; eventually the consumer of this
   358  		// NoMetadataRowSource will figure out the same status and act on it as soon
   359  		// as a non-metadata row is received.
   360  		_ = rs.metadataSink.Push(nil /* row */, meta)
   361  	}
   362  }
   363  
   364  // RowChannelMsg is the message used in the channels that implement
   365  // local physical streams (i.e. the RowChannel's).
   366  type RowChannelMsg struct {
   367  	// Only one of these fields will be set.
   368  	Row  sqlbase.EncDatumRow
   369  	Meta *execinfrapb.ProducerMetadata
   370  }
   371  
   372  // rowSourceBase provides common functionality for RowSource implementations
   373  // that need to track consumer status. It is intended to be used by RowSource
   374  // implementations into which data is pushed by a producer async, as opposed to
   375  // RowSources that pull data synchronously from their inputs, which don't need
   376  // to deal with concurrent calls to ConsumerDone() / ConsumerClosed()).
   377  // Things like the RowChannel falls in the first category; processors generally
   378  // fall in the latter.
   379  type rowSourceBase struct {
   380  	// ConsumerStatus is an atomic used in implementation of the
   381  	// RowSource.Consumer{Done,Closed} methods to signal that the consumer is
   382  	// done accepting rows or is no longer accepting data.
   383  	ConsumerStatus ConsumerStatus
   384  }
   385  
   386  // consumerDone helps processors implement RowSource.ConsumerDone.
   387  func (rb *rowSourceBase) consumerDone() {
   388  	atomic.CompareAndSwapUint32((*uint32)(&rb.ConsumerStatus),
   389  		uint32(NeedMoreRows), uint32(DrainRequested))
   390  }
   391  
   392  // consumerClosed helps processors implement RowSource.ConsumerClosed. The name
   393  // is only used for debug messages.
   394  func (rb *rowSourceBase) consumerClosed(name string) {
   395  	status := ConsumerStatus(atomic.LoadUint32((*uint32)(&rb.ConsumerStatus)))
   396  	if status == ConsumerClosed {
   397  		log.ReportOrPanic(context.Background(), nil, "%s already closed", log.Safe(name))
   398  	}
   399  	atomic.StoreUint32((*uint32)(&rb.ConsumerStatus), uint32(ConsumerClosed))
   400  }
   401  
   402  // RowChannel is a thin layer over a RowChannelMsg channel, which can be used to
   403  // transfer rows between goroutines.
   404  type RowChannel struct {
   405  	rowSourceBase
   406  
   407  	types []*types.T
   408  
   409  	// The channel on which rows are delivered.
   410  	C <-chan RowChannelMsg
   411  
   412  	// dataChan is the same channel as C.
   413  	dataChan chan RowChannelMsg
   414  
   415  	// numSenders is an atomic counter that keeps track of how many senders have
   416  	// yet to call ProducerDone().
   417  	numSenders int32
   418  }
   419  
   420  var _ RowReceiver = &RowChannel{}
   421  var _ RowSource = &RowChannel{}
   422  
   423  // InitWithNumSenders initializes the RowChannel with the default buffer size.
   424  // numSenders is the number of producers that will be pushing to this channel.
   425  // RowChannel will not be closed until it receives numSenders calls to
   426  // ProducerDone().
   427  func (rc *RowChannel) InitWithNumSenders(types []*types.T, numSenders int) {
   428  	rc.InitWithBufSizeAndNumSenders(types, RowChannelBufSize, numSenders)
   429  }
   430  
   431  // InitWithBufSizeAndNumSenders initializes the RowChannel with a given buffer
   432  // size and number of senders.
   433  func (rc *RowChannel) InitWithBufSizeAndNumSenders(types []*types.T, chanBufSize, numSenders int) {
   434  	rc.types = types
   435  	rc.dataChan = make(chan RowChannelMsg, chanBufSize)
   436  	rc.C = rc.dataChan
   437  	atomic.StoreInt32(&rc.numSenders, int32(numSenders))
   438  }
   439  
   440  // Push is part of the RowReceiver interface.
   441  func (rc *RowChannel) Push(
   442  	row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata,
   443  ) ConsumerStatus {
   444  	consumerStatus := ConsumerStatus(
   445  		atomic.LoadUint32((*uint32)(&rc.ConsumerStatus)))
   446  	switch consumerStatus {
   447  	case NeedMoreRows:
   448  		rc.dataChan <- RowChannelMsg{Row: row, Meta: meta}
   449  	case DrainRequested:
   450  		// If we're draining, only forward metadata.
   451  		if meta != nil {
   452  			rc.dataChan <- RowChannelMsg{Meta: meta}
   453  		}
   454  	case ConsumerClosed:
   455  		// If the consumer is gone, swallow all the rows and the metadata.
   456  	}
   457  	return consumerStatus
   458  }
   459  
   460  // ProducerDone is part of the RowReceiver interface.
   461  func (rc *RowChannel) ProducerDone() {
   462  	newVal := atomic.AddInt32(&rc.numSenders, -1)
   463  	if newVal < 0 {
   464  		panic("too many ProducerDone() calls")
   465  	}
   466  	if newVal == 0 {
   467  		close(rc.dataChan)
   468  	}
   469  }
   470  
   471  // OutputTypes is part of the RowSource interface.
   472  func (rc *RowChannel) OutputTypes() []*types.T {
   473  	return rc.types
   474  }
   475  
   476  // Start is part of the RowSource interface.
   477  func (rc *RowChannel) Start(ctx context.Context) context.Context { return ctx }
   478  
   479  // Next is part of the RowSource interface.
   480  func (rc *RowChannel) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
   481  	d, ok := <-rc.C
   482  	if !ok {
   483  		// No more rows.
   484  		return nil, nil
   485  	}
   486  	return d.Row, d.Meta
   487  }
   488  
   489  // ConsumerDone is part of the RowSource interface.
   490  func (rc *RowChannel) ConsumerDone() {
   491  	rc.consumerDone()
   492  }
   493  
   494  // ConsumerClosed is part of the RowSource interface.
   495  func (rc *RowChannel) ConsumerClosed() {
   496  	rc.consumerClosed("RowChannel")
   497  	numSenders := atomic.LoadInt32(&rc.numSenders)
   498  	// Drain (at most) numSenders messages in case senders are blocked trying to
   499  	// emit a row.
   500  	// Note that, if the producer is done, then it has also closed the
   501  	// channel this will not block. The producer might be neither blocked nor
   502  	// closed, though; hence the no data case.
   503  	for i := int32(0); i < numSenders; i++ {
   504  		select {
   505  		case <-rc.dataChan:
   506  		default:
   507  		}
   508  	}
   509  }
   510  
   511  // Types is part of the RowReceiver interface.
   512  func (rc *RowChannel) Types() []*types.T {
   513  	return rc.types
   514  }