github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colflow/colrpc/inbox.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colrpc
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"io"
    17  	"sync"
    18  
    19  	"github.com/apache/arrow/go/arrow/array"
    20  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    21  	"github.com/cockroachdb/cockroach/pkg/col/colserde"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    27  	"github.com/cockroachdb/cockroach/pkg/util/log"
    28  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    29  	"github.com/cockroachdb/logtags"
    30  )
    31  
    32  // flowStreamServer is a utility interface used to mock out the RPC layer.
    33  type flowStreamServer interface {
    34  	Send(*execinfrapb.ConsumerSignal) error
    35  	Recv() (*execinfrapb.ProducerMessage, error)
    36  }
    37  
    38  // Inbox is used to expose data from remote flows through an exec.Operator
    39  // interface. FlowStream RPC handlers should call RunWithStream (which blocks
    40  // until operation terminates, gracefully or unexpectedly) to pass the stream
    41  // to the inbox. Next may be called before RunWithStream, it will just block
    42  // until the stream is made available or its context is canceled. Note that
    43  // ownership of the stream is passed from the RunWithStream goroutine to the
    44  // Next goroutine. In exchange, the RunWithStream goroutine receives the first
    45  // context passed into Next and listens for cancellation. Returning from
    46  // RunWithStream (or more specifically, the RPC handler) will unblock Next by
    47  // closing the stream.
    48  type Inbox struct {
    49  	colexecbase.ZeroInputNode
    50  	typs []*types.T
    51  
    52  	converter  *colserde.ArrowBatchConverter
    53  	serializer *colserde.RecordBatchSerializer
    54  
    55  	// streamID is used to overwrite a caller's streamID
    56  	// in the ctx argument of Next and DrainMeta.
    57  	streamID execinfrapb.StreamID
    58  
    59  	// streamCh is the channel over which the stream is passed from the stream
    60  	// handler to the reader goroutine.
    61  	streamCh chan flowStreamServer
    62  	// contextCh is the channel over which the reader goroutine passes the first
    63  	// context to the stream handler so that it can listen for context
    64  	// cancellation.
    65  	contextCh chan context.Context
    66  
    67  	// timeoutCh is the channel over which an error will be sent if the reader
    68  	// goroutine should exit while waiting for a stream.
    69  	timeoutCh chan error
    70  
    71  	// errCh is the channel that RunWithStream will block on, waiting until the
    72  	// Inbox does not need a stream any more. An error will only be sent on this
    73  	// channel in the event of a cancellation or a non-io.EOF error originating
    74  	// from a stream.Recv.
    75  	errCh chan error
    76  
    77  	// ctxInterceptorFn is a callback to expose the inbox's context
    78  	// right after init. To be used for unit testing.
    79  	ctxInterceptorFn func(context.Context)
    80  
    81  	// We need two mutexes because a single mutex is insufficient to handle
    82  	// concurrent calls to Next() and DrainMeta(). See comment in DrainMeta.
    83  	stateMu struct {
    84  		syncutil.Mutex
    85  		// initialized prevents double initialization. Should not be used by the
    86  		// RunWithStream goroutine.
    87  		initialized bool
    88  		// done prevents double closing. It should not be used by the RunWithStream
    89  		// goroutine.
    90  		done bool
    91  		// nextRunning indicates whether Next goroutine is running at the moment.
    92  		nextRunning bool
    93  		// nextExited is a condition variable on which DrainMeta might block in
    94  		// order to wait for Next goroutine to exit.
    95  		nextExited *sync.Cond
    96  		// nextShouldExit indicates to Next goroutine that it should exit. It must
    97  		// only be updated by DrainMeta goroutine.
    98  		nextShouldExit bool
    99  		// bufferedMeta buffers any metadata found in Next when reading from the
   100  		// stream and is returned by DrainMeta.
   101  		bufferedMeta []execinfrapb.ProducerMetadata
   102  	}
   103  
   104  	streamMu struct {
   105  		syncutil.Mutex
   106  		// stream is the RPC stream. It is set when RunWithStream is called but
   107  		// only the Next and DrainMeta goroutines may access it.
   108  		stream flowStreamServer
   109  	}
   110  
   111  	scratch struct {
   112  		data []*array.Data
   113  		b    coldata.Batch
   114  	}
   115  }
   116  
   117  var _ colexecbase.Operator = &Inbox{}
   118  
   119  // NewInbox creates a new Inbox.
   120  func NewInbox(
   121  	allocator *colmem.Allocator, typs []*types.T, streamID execinfrapb.StreamID,
   122  ) (*Inbox, error) {
   123  	c, err := colserde.NewArrowBatchConverter(typs)
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  	s, err := colserde.NewRecordBatchSerializer(typs)
   128  	if err != nil {
   129  		return nil, err
   130  	}
   131  	i := &Inbox{
   132  		typs:       typs,
   133  		converter:  c,
   134  		serializer: s,
   135  		streamID:   streamID,
   136  		streamCh:   make(chan flowStreamServer, 1),
   137  		contextCh:  make(chan context.Context, 1),
   138  		timeoutCh:  make(chan error, 1),
   139  		errCh:      make(chan error, 1),
   140  	}
   141  	i.scratch.data = make([]*array.Data, len(typs))
   142  	i.scratch.b = allocator.NewMemBatch(typs)
   143  	i.stateMu.bufferedMeta = make([]execinfrapb.ProducerMetadata, 0)
   144  	i.stateMu.nextExited = sync.NewCond(&i.stateMu)
   145  	return i, nil
   146  }
   147  
   148  // maybeInitLocked calls Inbox.initLocked if the inbox is not initialized and
   149  // returns an error if the initialization was not successful. Usually this is
   150  // because the given context is canceled before the remote stream arrives.
   151  // NOTE: i.stateMu *must* be held when calling this function.
   152  func (i *Inbox) maybeInitLocked(ctx context.Context) error {
   153  	if !i.stateMu.initialized {
   154  		if err := i.initLocked(ctx); err != nil {
   155  			return err
   156  		}
   157  		i.stateMu.initialized = true
   158  	}
   159  	return nil
   160  }
   161  
   162  // initLocked initializes the Inbox for operation by blocking until
   163  // RunWithStream sets the stream to read from. ctx ownership is retained until
   164  // the stream arrives (to allow for unblocking the wait for a stream), at which
   165  // point ownership is transferred to RunWithStream. This should only be called
   166  // from the reader goroutine when it needs a stream.
   167  // NOTE: i.stateMu *must* be held when calling this function because it is
   168  // sufficient to protect access to i.streamMu.stream since the stream will only
   169  // be accessed after the initialization.
   170  func (i *Inbox) initLocked(ctx context.Context) error {
   171  	// Wait for the stream to be initialized. We're essentially waiting for the
   172  	// remote connection.
   173  	select {
   174  	case i.streamMu.stream = <-i.streamCh:
   175  	case err := <-i.timeoutCh:
   176  		i.errCh <- fmt.Errorf("%s: remote stream arrived too late", err)
   177  		return err
   178  	case <-ctx.Done():
   179  		i.errCh <- fmt.Errorf("%s: Inbox while waiting for stream", ctx.Err())
   180  		return ctx.Err()
   181  	}
   182  
   183  	if i.ctxInterceptorFn != nil {
   184  		i.ctxInterceptorFn(ctx)
   185  	}
   186  	i.contextCh <- ctx
   187  	return nil
   188  }
   189  
   190  // closeLocked closes the inbox, ensuring that any call to RunWithStream will
   191  // return immediately. closeLocked is idempotent.
   192  // NOTE: i.stateMu *must* be held when calling this function.
   193  func (i *Inbox) closeLocked() {
   194  	if !i.stateMu.done {
   195  		i.stateMu.done = true
   196  		close(i.errCh)
   197  	}
   198  }
   199  
   200  // RunWithStream sets the Inbox's stream and waits until either streamCtx is
   201  // canceled, a caller of Next cancels the first context passed into Next, or
   202  // an EOF is encountered on the stream by the Next goroutine.
   203  func (i *Inbox) RunWithStream(streamCtx context.Context, stream flowStreamServer) error {
   204  	streamCtx = logtags.AddTag(streamCtx, "streamID", i.streamID)
   205  	log.VEvent(streamCtx, 2, "Inbox handling stream")
   206  	defer log.VEvent(streamCtx, 2, "Inbox exited stream handler")
   207  	// Pass the stream to the reader goroutine (non-blocking) and get the context
   208  	// to listen for cancellation.
   209  	i.streamCh <- stream
   210  	var readerCtx context.Context
   211  	select {
   212  	case err := <-i.errCh:
   213  		return err
   214  	case readerCtx = <-i.contextCh:
   215  		log.VEvent(streamCtx, 2, "Inbox reader arrived")
   216  	case <-streamCtx.Done():
   217  		return fmt.Errorf("%s: streamCtx while waiting for reader (remote client canceled)", streamCtx.Err())
   218  	}
   219  
   220  	// Now wait for one of the events described in the method comment. If a
   221  	// cancellation is encountered, nothing special must be done to cancel the
   222  	// reader goroutine as returning from the handler will close the stream.
   223  	select {
   224  	case err := <-i.errCh:
   225  		// nil will be read from errCh when the channel is closed.
   226  		return err
   227  	case <-readerCtx.Done():
   228  		// The reader canceled the stream.
   229  		return fmt.Errorf("%s: readerCtx in Inbox stream handler (local reader canceled)", readerCtx.Err())
   230  	case <-streamCtx.Done():
   231  		// The client canceled the stream.
   232  		return fmt.Errorf("%s: streamCtx in Inbox stream handler (remote client canceled)", streamCtx.Err())
   233  	}
   234  }
   235  
   236  // Timeout sends the given error to any readers waiting for a stream to be
   237  // established (i.e. RunWithStream to be called).
   238  func (i *Inbox) Timeout(err error) {
   239  	i.timeoutCh <- err
   240  }
   241  
   242  // Init is part of the Operator interface.
   243  func (i *Inbox) Init() {}
   244  
   245  // Next returns the next batch. It will block until there is data available.
   246  // The Inbox will exit when either the context passed in on the first call to
   247  // Next is canceled or when DrainMeta goroutine tells it to do so.
   248  func (i *Inbox) Next(ctx context.Context) coldata.Batch {
   249  	i.stateMu.Lock()
   250  	stateMuLocked := true
   251  	i.stateMu.nextRunning = true
   252  	defer func() {
   253  		i.stateMu.nextRunning = false
   254  		i.stateMu.nextExited.Signal()
   255  		i.stateMu.Unlock()
   256  	}()
   257  	if i.stateMu.done {
   258  		return coldata.ZeroBatch
   259  	}
   260  
   261  	ctx = logtags.AddTag(ctx, "streamID", i.streamID)
   262  
   263  	defer func() {
   264  		// Catch any panics that occur and close the errCh in order to not leak the
   265  		// goroutine listening for context cancellation. errCh must still be closed
   266  		// during normal termination.
   267  		if err := recover(); err != nil {
   268  			if !stateMuLocked {
   269  				// The panic occurred while we were Recv'ing when we were holding
   270  				// i.streamMu and were not holding i.stateMu.
   271  				i.stateMu.Lock()
   272  				i.streamMu.Unlock()
   273  			}
   274  			i.closeLocked()
   275  			colexecerror.InternalError(err)
   276  		}
   277  	}()
   278  
   279  	// NOTE: It is very important to close i.errCh only when execution terminates
   280  	// ungracefully or when DrainMeta has been called (which indicates a graceful
   281  	// termination). DrainMeta will use the stream to read any remaining metadata
   282  	// after Next returns a zero-length batch during normal execution.
   283  	if err := i.maybeInitLocked(ctx); err != nil {
   284  		// An error occurred while initializing the Inbox and is likely caused by
   285  		// the connection issues. It is expected that such an error can occur.
   286  		colexecerror.ExpectedError(err)
   287  	}
   288  
   289  	for {
   290  		// DrainMeta goroutine indicated to us that we should exit. We do so
   291  		// without closing errCh since DrainMeta still needs the stream.
   292  		if i.stateMu.nextShouldExit {
   293  			return coldata.ZeroBatch
   294  		}
   295  
   296  		i.stateMu.Unlock()
   297  		stateMuLocked = false
   298  		i.streamMu.Lock()
   299  		m, err := i.streamMu.stream.Recv()
   300  		i.streamMu.Unlock()
   301  		i.stateMu.Lock()
   302  		stateMuLocked = true
   303  		if err != nil {
   304  			if err == io.EOF {
   305  				// Done.
   306  				i.closeLocked()
   307  				return coldata.ZeroBatch
   308  			}
   309  			i.errCh <- err
   310  			colexecerror.ExpectedError(err)
   311  		}
   312  		if len(m.Data.Metadata) != 0 {
   313  			for _, rpm := range m.Data.Metadata {
   314  				meta, ok := execinfrapb.RemoteProducerMetaToLocalMeta(ctx, rpm)
   315  				if !ok {
   316  					continue
   317  				}
   318  				i.stateMu.bufferedMeta = append(i.stateMu.bufferedMeta, meta)
   319  			}
   320  			// Continue until we get the next batch or EOF.
   321  			continue
   322  		}
   323  		if len(m.Data.RawBytes) == 0 {
   324  			// Protect against Deserialization panics by skipping empty messages.
   325  			continue
   326  		}
   327  		i.scratch.data = i.scratch.data[:0]
   328  		if err := i.serializer.Deserialize(&i.scratch.data, m.Data.RawBytes); err != nil {
   329  			colexecerror.InternalError(err)
   330  		}
   331  		if err := i.converter.ArrowToBatch(i.scratch.data, i.scratch.b); err != nil {
   332  			colexecerror.InternalError(err)
   333  		}
   334  		return i.scratch.b
   335  	}
   336  }
   337  
   338  func (i *Inbox) sendDrainSignal(ctx context.Context) error {
   339  	log.VEvent(ctx, 2, "Inbox sending drain signal to Outbox")
   340  	// It is safe to Send without holding the mutex because it is legal to call
   341  	// Send and Recv from different goroutines.
   342  	if err := i.streamMu.stream.Send(&execinfrapb.ConsumerSignal{DrainRequest: &execinfrapb.DrainRequest{}}); err != nil {
   343  		if log.V(1) {
   344  			log.Warningf(ctx, "Inbox unable to send drain signal to Outbox: %+v", err)
   345  		}
   346  		return err
   347  	}
   348  	return nil
   349  }
   350  
   351  // DrainMeta is part of the MetadataGenerator interface. DrainMeta may be
   352  // called concurrently with Next.
   353  // Note: DrainMeta will cause Next goroutine to finish.
   354  func (i *Inbox) DrainMeta(ctx context.Context) []execinfrapb.ProducerMetadata {
   355  	i.stateMu.Lock()
   356  	defer i.stateMu.Unlock()
   357  	allMeta := i.stateMu.bufferedMeta
   358  	i.stateMu.bufferedMeta = i.stateMu.bufferedMeta[:0]
   359  
   360  	if i.stateMu.done {
   361  		return allMeta
   362  	}
   363  
   364  	ctx = logtags.AddTag(ctx, "streamID", i.streamID)
   365  
   366  	// We want draining the Inbox to work regardless of whether or not we have a
   367  	// goroutine in Next. We essentially need to do two things: 1) Is the stream
   368  	// safe to use? If yes, then 2) Make sure nobody else is receiving.
   369  	// Unfortunately, there is no way to cancel a Recv on a stream, so we need to
   370  	// do this by sending the message. However, we can't unconditionally send a
   371  	// message since we don't know the state of the stream (is it initialized?).
   372  	// This leaves us with having two separate mutexes, one for the state and
   373  	// another one for the stream (to make sure we wait until the Next goroutine
   374  	// has finished Recv'ing).
   375  	drainSignalSent := false
   376  	if i.stateMu.initialized {
   377  		if err := i.sendDrainSignal(ctx); err != nil {
   378  			return allMeta
   379  		}
   380  		drainSignalSent = true
   381  		i.stateMu.nextShouldExit = true
   382  		for i.stateMu.nextRunning {
   383  			i.stateMu.nextExited.Wait()
   384  		}
   385  		// It is possible that Next goroutine has buffered more metadata, so we
   386  		// need to grab it.
   387  		allMeta = append(allMeta, i.stateMu.bufferedMeta...)
   388  		i.stateMu.bufferedMeta = i.stateMu.bufferedMeta[:0]
   389  	}
   390  
   391  	// Note that unlocking defer from above will execute after this defer because
   392  	// the unlocking one will be pushed below on the stack, so we still will have
   393  	// the lock when this one is executed.
   394  	defer i.closeLocked()
   395  
   396  	if err := i.maybeInitLocked(ctx); err != nil {
   397  		if log.V(1) {
   398  			log.Warningf(ctx, "Inbox unable to initialize stream while draining metadata: %+v", err)
   399  		}
   400  		return allMeta
   401  	}
   402  	if !drainSignalSent {
   403  		if err := i.sendDrainSignal(ctx); err != nil {
   404  			return allMeta
   405  		}
   406  	}
   407  
   408  	i.streamMu.Lock()
   409  	defer i.streamMu.Unlock()
   410  	for {
   411  		msg, err := i.streamMu.stream.Recv()
   412  		if err != nil {
   413  			if err == io.EOF {
   414  				break
   415  			}
   416  			if log.V(1) {
   417  				log.Warningf(ctx, "Inbox Recv connection error while draining metadata: %+v", err)
   418  			}
   419  			return allMeta
   420  		}
   421  		for _, remoteMeta := range msg.Data.Metadata {
   422  			meta, ok := execinfrapb.RemoteProducerMetaToLocalMeta(ctx, remoteMeta)
   423  			if !ok {
   424  				continue
   425  			}
   426  			allMeta = append(allMeta, meta)
   427  		}
   428  	}
   429  
   430  	return allMeta
   431  }