github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/flowinfra/outbox.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package flowinfra
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"io"
    17  	"sync"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/rpc"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    26  	"github.com/cockroachdb/cockroach/pkg/util/contextutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/log"
    29  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    30  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    31  	opentracing "github.com/opentracing/opentracing-go"
    32  	"google.golang.org/grpc"
    33  )
    34  
    35  const outboxBufRows = 16
    36  const outboxFlushPeriod = 100 * time.Microsecond
    37  
    38  type flowStream interface {
    39  	Send(*execinfrapb.ProducerMessage) error
    40  	Recv() (*execinfrapb.ConsumerSignal, error)
    41  }
    42  
    43  // Outbox implements an outgoing mailbox as a RowReceiver that receives rows and
    44  // sends them to a gRPC stream. Its core logic runs in a goroutine. We send rows
    45  // when we accumulate outboxBufRows or every outboxFlushPeriod (whichever comes
    46  // first).
    47  type Outbox struct {
    48  	// RowChannel implements the RowReceiver interface.
    49  	execinfra.RowChannel
    50  
    51  	flowCtx  *execinfra.FlowCtx
    52  	streamID execinfrapb.StreamID
    53  	nodeID   roachpb.NodeID
    54  	// The rows received from the RowChannel will be forwarded on this stream once
    55  	// it is established.
    56  	stream flowStream
    57  
    58  	encoder StreamEncoder
    59  	// numRows is the number of rows that have been accumulated in the encoder.
    60  	numRows int
    61  
    62  	// flowCtxCancel is the cancellation function for this flow's ctx; context
    63  	// cancellation is used to stop processors on this flow. It is invoked
    64  	// whenever the consumer returns an error on the stream above. Set
    65  	// to a non-null value in start().
    66  	flowCtxCancel context.CancelFunc
    67  
    68  	err error
    69  
    70  	statsCollectionEnabled bool
    71  	stats                  OutboxStats
    72  }
    73  
    74  var _ execinfra.RowReceiver = &Outbox{}
    75  var _ Startable = &Outbox{}
    76  
    77  // NewOutbox creates a new Outbox.
    78  func NewOutbox(
    79  	flowCtx *execinfra.FlowCtx,
    80  	nodeID roachpb.NodeID,
    81  	flowID execinfrapb.FlowID,
    82  	streamID execinfrapb.StreamID,
    83  ) *Outbox {
    84  	m := &Outbox{flowCtx: flowCtx, nodeID: nodeID}
    85  	m.encoder.SetHeaderFields(flowID, streamID)
    86  	m.streamID = streamID
    87  	return m
    88  }
    89  
    90  // NewOutboxSyncFlowStream sets up an outbox for the special "sync flow"
    91  // stream. The flow context should be provided via SetFlowCtx when it is
    92  // available.
    93  func NewOutboxSyncFlowStream(stream execinfrapb.DistSQL_RunSyncFlowServer) *Outbox {
    94  	return &Outbox{stream: stream}
    95  }
    96  
    97  // SetFlowCtx sets the flow context for the Outbox.
    98  func (m *Outbox) SetFlowCtx(flowCtx *execinfra.FlowCtx) {
    99  	m.flowCtx = flowCtx
   100  }
   101  
   102  // Init initializes the Outbox.
   103  func (m *Outbox) Init(typs []*types.T) {
   104  	if typs == nil {
   105  		// We check for nil to detect uninitialized cases; but we support 0-length
   106  		// rows.
   107  		typs = make([]*types.T, 0)
   108  	}
   109  	m.RowChannel.InitWithNumSenders(typs, 1)
   110  	m.encoder.Init(typs)
   111  }
   112  
   113  // addRow encodes a row into rowBuf. If enough rows were accumulated, flush() is
   114  // called.
   115  //
   116  // If an error is returned, the outbox's stream might or might not be usable; if
   117  // it's not usable, it will have been set to nil. The error might be a
   118  // communication error, in which case the other side of the stream should get it
   119  // too, or it might be an encoding error, in which case we've forwarded it on
   120  // the stream.
   121  func (m *Outbox) addRow(
   122  	ctx context.Context, row sqlbase.EncDatumRow, meta *execinfrapb.ProducerMetadata,
   123  ) error {
   124  	mustFlush := false
   125  	var encodingErr error
   126  	if meta != nil {
   127  		m.encoder.AddMetadata(ctx, *meta)
   128  		// If we hit an error, let's forward it ASAP. The consumer will probably
   129  		// close.
   130  		mustFlush = meta.Err != nil
   131  	} else {
   132  		encodingErr = m.encoder.AddRow(row)
   133  		if encodingErr != nil {
   134  			m.encoder.AddMetadata(ctx, execinfrapb.ProducerMetadata{Err: encodingErr})
   135  			mustFlush = true
   136  		}
   137  	}
   138  	m.numRows++
   139  	var flushErr error
   140  	if m.numRows >= outboxBufRows || mustFlush {
   141  		flushErr = m.flush(ctx)
   142  	}
   143  	if encodingErr != nil {
   144  		return encodingErr
   145  	}
   146  	return flushErr
   147  }
   148  
   149  // flush sends the rows accumulated so far in a ProducerMessage. Any error
   150  // returned indicates that sending a message on the outbox's stream failed, and
   151  // thus the stream can't be used any more. The stream is also set to nil if
   152  // an error is returned.
   153  func (m *Outbox) flush(ctx context.Context) error {
   154  	if m.numRows == 0 && m.encoder.HasHeaderBeenSent() {
   155  		return nil
   156  	}
   157  	msg := m.encoder.FormMessage(ctx)
   158  	if m.statsCollectionEnabled {
   159  		m.stats.BytesSent += int64(msg.Size())
   160  	}
   161  
   162  	if log.V(3) {
   163  		log.Infof(ctx, "flushing outbox")
   164  	}
   165  	sendErr := m.stream.Send(msg)
   166  	for _, rpm := range msg.Data.Metadata {
   167  		if metricsMeta, ok := rpm.Value.(*execinfrapb.RemoteProducerMetadata_Metrics_); ok {
   168  			metricsMeta.Metrics.Release()
   169  		}
   170  	}
   171  	if sendErr != nil {
   172  		// Make sure the stream is not used any more.
   173  		m.stream = nil
   174  		if log.V(1) {
   175  			log.Errorf(ctx, "outbox flush error: %s", sendErr)
   176  		}
   177  	} else if log.V(3) {
   178  		log.Infof(ctx, "outbox flushed")
   179  	}
   180  	if sendErr != nil {
   181  		return sendErr
   182  	}
   183  
   184  	m.numRows = 0
   185  	return nil
   186  }
   187  
   188  // mainLoop reads from m.RowChannel and writes to the output stream through
   189  // addRow()/flush() until the producer doesn't have any more data to send or an
   190  // error happened.
   191  //
   192  // If the consumer asks the producer to drain, mainLoop() will relay this
   193  // information and, again, wait until the producer doesn't have any more data to
   194  // send (the producer is supposed to only send trailing metadata once it
   195  // receives this signal).
   196  //
   197  // If an error is returned, it's either a communication error from the outbox's
   198  // stream, or otherwise the error has already been forwarded on the stream.
   199  // Depending on the specific error, the stream might or might not need to be
   200  // closed. In case it doesn't, m.stream has been set to nil.
   201  func (m *Outbox) mainLoop(ctx context.Context) error {
   202  	// No matter what happens, we need to make sure we close our RowChannel, since
   203  	// writers could be writing to it as soon as we are started.
   204  	defer m.RowChannel.ConsumerClosed()
   205  
   206  	var span opentracing.Span
   207  	ctx, span = execinfra.ProcessorSpan(ctx, "outbox")
   208  	if span != nil && tracing.IsRecording(span) {
   209  		m.statsCollectionEnabled = true
   210  		span.SetTag(execinfrapb.FlowIDTagKey, m.flowCtx.ID.String())
   211  		span.SetTag(execinfrapb.StreamIDTagKey, m.streamID)
   212  	}
   213  	// spanFinished specifies whether we called tracing.FinishSpan on the span.
   214  	// Some code paths (e.g. stats collection) need to prematurely call
   215  	// FinishSpan to get trace data.
   216  	spanFinished := false
   217  	defer func() {
   218  		if !spanFinished {
   219  			tracing.FinishSpan(span)
   220  		}
   221  	}()
   222  
   223  	if m.stream == nil {
   224  		var conn *grpc.ClientConn
   225  		var err error
   226  		conn, err = m.flowCtx.Cfg.NodeDialer.DialNoBreaker(ctx, m.nodeID, rpc.DefaultClass)
   227  		if err != nil {
   228  			// Log any Dial errors. This does not have a verbosity check due to being
   229  			// a critical part of query execution: if this step doesn't work, the
   230  			// receiving side might end up hanging or timing out.
   231  			log.Infof(ctx, "outbox: connection dial error: %+v", err)
   232  			return err
   233  		}
   234  		client := execinfrapb.NewDistSQLClient(conn)
   235  		if log.V(2) {
   236  			log.Infof(ctx, "outbox: calling FlowStream")
   237  		}
   238  		// The context used here escapes, so it has to be a background context.
   239  		m.stream, err = client.FlowStream(context.TODO())
   240  		if err != nil {
   241  			if log.V(1) {
   242  				log.Infof(ctx, "FlowStream error: %s", err)
   243  			}
   244  			return err
   245  		}
   246  		if log.V(2) {
   247  			log.Infof(ctx, "outbox: FlowStream returned")
   248  		}
   249  	}
   250  
   251  	var flushTimer timeutil.Timer
   252  	defer flushTimer.Stop()
   253  
   254  	draining := false
   255  
   256  	// TODO(andrei): It's unfortunate that we're spawning a goroutine for every
   257  	// outgoing stream, but I'm not sure what to do instead. The streams don't
   258  	// have a non-blocking API. We could start this goroutine only after a
   259  	// timeout, but that timeout would affect queries that use flows with
   260  	// LimitHint's (so, queries where the consumer is expected to quickly ask the
   261  	// producer to drain). Perhaps what we want is a way to tell when all the rows
   262  	// corresponding to the first KV batch have been sent and only start the
   263  	// goroutine if more batches are needed to satisfy the query.
   264  	listenToConsumerCtx, cancel := contextutil.WithCancel(ctx)
   265  	drainCh, err := m.listenForDrainSignalFromConsumer(listenToConsumerCtx)
   266  	defer cancel()
   267  	if err != nil {
   268  		return err
   269  	}
   270  
   271  	// Send a first message that will contain the header (i.e. the StreamID), so
   272  	// that the stream is properly initialized on the consumer. The consumer has
   273  	// a timeout in which inbound streams must be established.
   274  	if err := m.flush(ctx); err != nil {
   275  		return err
   276  	}
   277  
   278  	for {
   279  		select {
   280  		case msg, ok := <-m.RowChannel.C:
   281  			if !ok {
   282  				// No more data.
   283  				if m.statsCollectionEnabled {
   284  					err := m.flush(ctx)
   285  					if err != nil {
   286  						return err
   287  					}
   288  					if m.flowCtx.Cfg.TestingKnobs.DeterministicStats {
   289  						m.stats.BytesSent = 0
   290  					}
   291  					tracing.SetSpanStats(span, &m.stats)
   292  					tracing.FinishSpan(span)
   293  					spanFinished = true
   294  					if trace := execinfra.GetTraceData(ctx); trace != nil {
   295  						err := m.addRow(ctx, nil, &execinfrapb.ProducerMetadata{TraceData: trace})
   296  						if err != nil {
   297  							return err
   298  						}
   299  					}
   300  				}
   301  				return m.flush(ctx)
   302  			}
   303  			if !draining || msg.Meta != nil {
   304  				// If we're draining, we ignore all the rows and just send metadata.
   305  				err := m.addRow(ctx, msg.Row, msg.Meta)
   306  				if err != nil {
   307  					return err
   308  				}
   309  				if msg.Meta != nil {
   310  					// Now that we have added metadata, it is safe to release it to the
   311  					// pool.
   312  					msg.Meta.Release()
   313  				}
   314  				// If the message to add was metadata, a flush was already forced. If
   315  				// this is our first row, restart the flushTimer.
   316  				if m.numRows == 1 {
   317  					flushTimer.Reset(outboxFlushPeriod)
   318  				}
   319  			}
   320  		case <-flushTimer.C:
   321  			flushTimer.Read = true
   322  			err := m.flush(ctx)
   323  			if err != nil {
   324  				return err
   325  			}
   326  		case drainSignal := <-drainCh:
   327  			if drainSignal.err != nil {
   328  				// Stop work from proceeding in this flow. This also causes FlowStream
   329  				// RPCs that have this node as consumer to return errors.
   330  				m.flowCtxCancel()
   331  				// The consumer either doesn't care any more (it returned from the
   332  				// FlowStream RPC with an error if the outbox established the stream or
   333  				// it canceled the client context if the consumer established the
   334  				// stream through a RunSyncFlow RPC), or there was a communication error
   335  				// and the stream is dead. In any case, the stream has been closed and
   336  				// the consumer will not consume more rows from this outbox. Make sure
   337  				// the stream is not used any more.
   338  				m.stream = nil
   339  				return drainSignal.err
   340  			}
   341  			drainCh = nil
   342  			if drainSignal.drainRequested {
   343  				// Enter draining mode.
   344  				draining = true
   345  				m.RowChannel.ConsumerDone()
   346  			} else {
   347  				// No draining required. We're done; no need to consume any more.
   348  				// m.RowChannel.ConsumerClosed() is called in a defer above.
   349  				return nil
   350  			}
   351  		}
   352  	}
   353  }
   354  
   355  // drainSignal is a signal received from the consumer telling the producer that
   356  // it doesn't need any more rows and optionally asking the producer to drain.
   357  type drainSignal struct {
   358  	// drainRequested, if set, means that the consumer is interested in the
   359  	// trailing metadata that the producer might have. If not set, the producer
   360  	// should close immediately (the consumer is probably gone by now).
   361  	drainRequested bool
   362  	// err, if set, is either the error that the consumer returned when closing
   363  	// the FlowStream RPC or a communication error.
   364  	err error
   365  }
   366  
   367  // listenForDrainSignalFromConsumer returns a channel that will be pinged once the
   368  // consumer has closed its send-side of the stream, or has sent a drain signal.
   369  //
   370  // This method runs a task that will run until either the consumer closes the
   371  // stream or until the caller cancels the context. The caller has to cancel the
   372  // context once it no longer reads from the channel, otherwise this method might
   373  // deadlock when attempting to write to the channel.
   374  func (m *Outbox) listenForDrainSignalFromConsumer(ctx context.Context) (<-chan drainSignal, error) {
   375  	ch := make(chan drainSignal, 1)
   376  
   377  	stream := m.stream
   378  	if err := m.flowCtx.Cfg.Stopper.RunAsyncTask(ctx, "drain", func(ctx context.Context) {
   379  		sendDrainSignal := func(drainRequested bool, err error) bool {
   380  			select {
   381  			case ch <- drainSignal{drainRequested: drainRequested, err: err}:
   382  				return true
   383  			case <-ctx.Done():
   384  				// Listening for consumer signals has been canceled. This generally
   385  				// means that the main outbox routine is no longer listening to these
   386  				// signals but, in the RunSyncFlow case, it may also mean that the
   387  				// client (the consumer) has canceled the RPC. In that case, the main
   388  				// routine is still listening (and this branch of the select has been
   389  				// randomly selected; the other was also available), so we have to
   390  				// notify it. Thus, we attempt sending again.
   391  				select {
   392  				case ch <- drainSignal{drainRequested: drainRequested, err: err}:
   393  					return true
   394  				default:
   395  					return false
   396  				}
   397  			}
   398  		}
   399  
   400  		for {
   401  			signal, err := stream.Recv()
   402  			if err == io.EOF {
   403  				sendDrainSignal(false, nil)
   404  				return
   405  			}
   406  			if err != nil {
   407  				sendDrainSignal(false, err)
   408  				return
   409  			}
   410  			switch {
   411  			case signal.DrainRequest != nil:
   412  				if !sendDrainSignal(true, nil) {
   413  					return
   414  				}
   415  			case signal.SetupFlowRequest != nil:
   416  				log.Fatalf(ctx, "Unexpected SetupFlowRequest. "+
   417  					"This SyncFlow specific message should have been handled in RunSyncFlow.")
   418  			case signal.Handshake != nil:
   419  				log.Eventf(ctx, "Consumer sent handshake. Consuming flow scheduled: %t",
   420  					signal.Handshake.ConsumerScheduled)
   421  			}
   422  		}
   423  	}); err != nil {
   424  		return nil, err
   425  	}
   426  	return ch, nil
   427  }
   428  
   429  func (m *Outbox) run(ctx context.Context, wg *sync.WaitGroup) {
   430  	err := m.mainLoop(ctx)
   431  	if stream, ok := m.stream.(execinfrapb.DistSQL_FlowStreamClient); ok {
   432  		closeErr := stream.CloseSend()
   433  		if err == nil {
   434  			err = closeErr
   435  		}
   436  	}
   437  	m.err = err
   438  	if wg != nil {
   439  		wg.Done()
   440  	}
   441  }
   442  
   443  // Start starts the outbox.
   444  func (m *Outbox) Start(ctx context.Context, wg *sync.WaitGroup, flowCtxCancel context.CancelFunc) {
   445  	if m.Types() == nil {
   446  		panic("outbox not initialized")
   447  	}
   448  	if wg != nil {
   449  		wg.Add(1)
   450  	}
   451  	m.flowCtxCancel = flowCtxCancel
   452  	go m.run(ctx, wg)
   453  }
   454  
   455  // Err returns the error (if any occurred) while Outbox was running.
   456  func (m *Outbox) Err() error {
   457  	return m.err
   458  }
   459  
   460  const outboxTagPrefix = "outbox."
   461  
   462  // Stats implements the SpanStats interface.
   463  func (os *OutboxStats) Stats() map[string]string {
   464  	statsMap := make(map[string]string)
   465  	statsMap[outboxTagPrefix+"bytes_sent"] = humanizeutil.IBytes(os.BytesSent)
   466  	return statsMap
   467  }
   468  
   469  // StatsForQueryPlan implements the DistSQLSpanStats interface.
   470  func (os *OutboxStats) StatsForQueryPlan() []string {
   471  	return []string{fmt.Sprintf("bytes sent: %s", humanizeutil.IBytes(os.BytesSent))}
   472  }