github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colflow/colrpc/inbox_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colrpc
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"io"
    18  	"sync"
    19  	"sync/atomic"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    24  	"github.com/cockroachdb/cockroach/pkg/col/coldatatestutils"
    25  	"github.com/cockroachdb/cockroach/pkg/col/colserde"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    30  	"github.com/cockroachdb/cockroach/pkg/testutils"
    31  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    32  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    33  	"github.com/cockroachdb/errors"
    34  	"github.com/stretchr/testify/require"
    35  )
    36  
    37  type callbackFlowStreamServer struct {
    38  	server flowStreamServer
    39  	sendCb func()
    40  	recvCb func()
    41  }
    42  
    43  func (s callbackFlowStreamServer) Send(cs *execinfrapb.ConsumerSignal) error {
    44  	if s.sendCb != nil {
    45  		s.sendCb()
    46  	}
    47  	return s.server.Send(cs)
    48  }
    49  
    50  func (s callbackFlowStreamServer) Recv() (*execinfrapb.ProducerMessage, error) {
    51  	if s.recvCb != nil {
    52  		s.recvCb()
    53  	}
    54  	return s.server.Recv()
    55  }
    56  
    57  var _ flowStreamServer = callbackFlowStreamServer{}
    58  
    59  func TestInboxCancellation(t *testing.T) {
    60  	defer leaktest.AfterTest(t)()
    61  
    62  	typs := []*types.T{types.Int}
    63  	t.Run("ReaderWaitingForStreamHandler", func(t *testing.T) {
    64  		inbox, err := NewInbox(testAllocator, typs, execinfrapb.StreamID(0))
    65  		require.NoError(t, err)
    66  		ctx, cancelFn := context.WithCancel(context.Background())
    67  		// Cancel the context.
    68  		cancelFn()
    69  		// Next should not block if the context is canceled.
    70  		err = colexecerror.CatchVectorizedRuntimeError(func() { inbox.Next(ctx) })
    71  		require.True(t, testutils.IsError(err, "context canceled"), err)
    72  		// Now, the remote stream arrives.
    73  		err = inbox.RunWithStream(context.Background(), mockFlowStreamServer{})
    74  		require.True(t, testutils.IsError(err, "while waiting for stream"), err)
    75  	})
    76  
    77  	t.Run("DuringRecv", func(t *testing.T) {
    78  		rpcLayer := makeMockFlowStreamRPCLayer()
    79  		inbox, err := NewInbox(testAllocator, typs, execinfrapb.StreamID(0))
    80  		require.NoError(t, err)
    81  		ctx, cancelFn := context.WithCancel(context.Background())
    82  
    83  		// Setup reader and stream.
    84  		go func() {
    85  			inbox.Next(ctx)
    86  		}()
    87  		recvCalled := make(chan struct{})
    88  		streamHandlerErrCh := handleStream(context.Background(), inbox, callbackFlowStreamServer{
    89  			server: rpcLayer.server,
    90  			recvCb: func() {
    91  				recvCalled <- struct{}{}
    92  			},
    93  		}, func() { close(rpcLayer.server.csChan) })
    94  
    95  		// Now wait for the Inbox to call Recv on the stream.
    96  		<-recvCalled
    97  
    98  		// Cancel the context.
    99  		cancelFn()
   100  		err = <-streamHandlerErrCh
   101  		require.True(t, testutils.IsError(err, "readerCtx in Inbox stream handler"), err)
   102  
   103  		// The mock RPC layer does not unblock the Recv for us on the server side,
   104  		// so manually send an io.EOF to the reader goroutine.
   105  		close(rpcLayer.server.pmChan)
   106  	})
   107  
   108  	t.Run("StreamHandlerWaitingForReader", func(t *testing.T) {
   109  		rpcLayer := makeMockFlowStreamRPCLayer()
   110  		inbox, err := NewInbox(testAllocator, typs, execinfrapb.StreamID(0))
   111  		require.NoError(t, err)
   112  
   113  		ctx, cancelFn := context.WithCancel(context.Background())
   114  
   115  		cancelFn()
   116  		// A stream arrives but there is no reader.
   117  		err = <-handleStream(ctx, inbox, rpcLayer.server, func() { close(rpcLayer.client.csChan) })
   118  		require.True(t, testutils.IsError(err, "while waiting for reader"), err)
   119  	})
   120  }
   121  
   122  // TestInboxNextPanicDoesntLeakGoroutines verifies that goroutines that are
   123  // spawned as part of an Inbox's normal operation are cleaned up even on a
   124  // panic.
   125  func TestInboxNextPanicDoesntLeakGoroutines(t *testing.T) {
   126  	defer leaktest.AfterTest(t)()
   127  
   128  	inbox, err := NewInbox(testAllocator, []*types.T{types.Int}, execinfrapb.StreamID(0))
   129  	require.NoError(t, err)
   130  
   131  	rpcLayer := makeMockFlowStreamRPCLayer()
   132  	streamHandlerErrCh := handleStream(context.Background(), inbox, rpcLayer.server, func() { close(rpcLayer.client.csChan) })
   133  
   134  	m := &execinfrapb.ProducerMessage{}
   135  	m.Data.RawBytes = []byte("garbage")
   136  
   137  	go func() {
   138  		_ = rpcLayer.client.Send(m)
   139  	}()
   140  
   141  	// inbox.Next should panic given that the deserializer will encounter garbage
   142  	// data.
   143  	require.Panics(t, func() { inbox.Next(context.Background()) })
   144  
   145  	// We require no error from the stream handler as nothing was canceled. The
   146  	// panic is bubbled up through the Next chain on the Inbox's host.
   147  	require.NoError(t, <-streamHandlerErrCh)
   148  }
   149  
   150  func TestInboxTimeout(t *testing.T) {
   151  	defer leaktest.AfterTest(t)()
   152  
   153  	ctx := context.Background()
   154  
   155  	inbox, err := NewInbox(testAllocator, []*types.T{types.Int}, execinfrapb.StreamID(0))
   156  	require.NoError(t, err)
   157  
   158  	var (
   159  		readerErrCh = make(chan error)
   160  		rpcLayer    = makeMockFlowStreamRPCLayer()
   161  	)
   162  	go func() {
   163  		readerErrCh <- colexecerror.CatchVectorizedRuntimeError(func() { inbox.Next(ctx) })
   164  	}()
   165  
   166  	// Timeout the inbox.
   167  	const timeoutErr = "timeout error"
   168  	inbox.Timeout(errors.New(timeoutErr))
   169  
   170  	// Ensure that the reader gets the error.
   171  	readerErr := <-readerErrCh
   172  	require.True(t, testutils.IsError(readerErr, timeoutErr), readerErr)
   173  
   174  	// And now the stream arrives.
   175  	streamHandlerErrCh := handleStream(ctx, inbox, rpcLayer.server, nil /* doneFn */)
   176  	streamErr := <-streamHandlerErrCh
   177  	require.True(t, testutils.IsError(streamErr, "stream arrived too late"), streamErr)
   178  }
   179  
   180  // TestInboxShutdown is a random test that spawns a goroutine for handling a
   181  // FlowStream RPC (setting up an inbound stream, or RunWithStream), a goroutine
   182  // to read from an Inbox (Next goroutine), and a goroutine to drain the Inbox
   183  // (DrainMeta goroutine). These goroutines race against each other and the
   184  // desired state is that everything is cleaned up at the end. Examples of
   185  // scenarios that are tested by this test include but are not limited to:
   186  //  - DrainMeta called before Next and before a stream arrives.
   187  //  - DrainMeta called concurrently with Next with an active stream.
   188  //  - A forceful cancellation of Next but no call to DrainMeta.
   189  func TestInboxShutdown(t *testing.T) {
   190  	defer leaktest.AfterTest(t)()
   191  
   192  	var (
   193  		rng, _ = randutil.NewPseudoRand()
   194  		// infiniteBatches will influence whether or not we're likely to test a
   195  		// graceful shutdown (since other shutdown mechanisms might happen before
   196  		// we reach the end of the data stream). If infiniteBatches is true,
   197  		// shutdown scenarios in the middle of data processing are always tested. If
   198  		// false, they sometimes will be.
   199  		infiniteBatches    = rng.Float64() < 0.5
   200  		drainMetaSleep     = time.Millisecond * time.Duration(rng.Intn(10))
   201  		nextSleep          = time.Millisecond * time.Duration(rng.Intn(10))
   202  		runWithStreamSleep = time.Millisecond * time.Duration(rng.Intn(10))
   203  		typs               = []*types.T{types.Int}
   204  		batch              = coldatatestutils.RandomBatch(testAllocator, rng, typs, coldata.BatchSize(), 0 /* length */, rng.Float64())
   205  	)
   206  
   207  	for _, runDrainMetaGoroutine := range []bool{false, true} {
   208  		for _, runNextGoroutine := range []bool{false, true} {
   209  			for _, runRunWithStreamGoroutine := range []bool{false, true} {
   210  				if runDrainMetaGoroutine == false && runNextGoroutine == false && runRunWithStreamGoroutine == true {
   211  					// This is sort of like a remote node connecting to the inbox, but the
   212  					// inbox will never be spawned. This is dealt with by another part of
   213  					// the code (the flow registry times out inbound RPCs if a consumer is
   214  					// not scheduled in time), so this case is skipped.
   215  					continue
   216  				}
   217  				rpcLayer := makeMockFlowStreamRPCLayer()
   218  
   219  				t.Run(fmt.Sprintf(
   220  					"drain=%t/next=%t/stream=%t/inf=%t",
   221  					runDrainMetaGoroutine, runNextGoroutine, runRunWithStreamGoroutine, infiniteBatches,
   222  				), func(t *testing.T) {
   223  					inboxCtx, inboxCancel := context.WithCancel(context.Background())
   224  					inboxMemAccount := testMemMonitor.MakeBoundAccount()
   225  					defer inboxMemAccount.Close(inboxCtx)
   226  					inbox, err := NewInbox(
   227  						colmem.NewAllocator(inboxCtx, &inboxMemAccount, coldata.StandardColumnFactory),
   228  						typs, execinfrapb.StreamID(0),
   229  					)
   230  					require.NoError(t, err)
   231  					c, err := colserde.NewArrowBatchConverter(typs)
   232  					require.NoError(t, err)
   233  					r, err := colserde.NewRecordBatchSerializer(typs)
   234  					require.NoError(t, err)
   235  
   236  					goroutines := []struct {
   237  						name           string
   238  						asyncOperation func() chan error
   239  					}{
   240  						{
   241  							name: "RunWithStream",
   242  							asyncOperation: func() chan error {
   243  								errCh := make(chan error)
   244  								go func() {
   245  									var wg sync.WaitGroup
   246  									defer close(errCh)
   247  									if runWithStreamSleep != 0 {
   248  										time.Sleep(runWithStreamSleep)
   249  									}
   250  									if !runRunWithStreamGoroutine {
   251  										// The inbox needs to be timed out. This is called by the inbound
   252  										// stream code during normal operation. This timeout simulates a
   253  										// stream not arriving in time.
   254  										inbox.Timeout(errors.New("artificial timeout"))
   255  										return
   256  									}
   257  									quitSending := make(chan struct{})
   258  									wg.Add(1)
   259  									go func() {
   260  										defer wg.Done()
   261  										arrowData, err := c.BatchToArrow(batch)
   262  										if err != nil {
   263  											errCh <- err
   264  											return
   265  										}
   266  										var buffer bytes.Buffer
   267  										_, _, err = r.Serialize(&buffer, arrowData)
   268  										if err != nil {
   269  											errCh <- err
   270  											return
   271  										}
   272  										var draining uint32
   273  										if runDrainMetaGoroutine {
   274  											// Listen for the drain signal.
   275  											wg.Add(1)
   276  											go func() {
   277  												defer wg.Done()
   278  												for {
   279  													cs, err := rpcLayer.client.Recv()
   280  													if cs != nil && cs.DrainRequest != nil {
   281  														atomic.StoreUint32(&draining, 1)
   282  														return
   283  													}
   284  													// TODO(asubiotto): Generate some metadata and test
   285  													//  that it is received.
   286  													if err != nil {
   287  														if err == io.EOF {
   288  															return
   289  														}
   290  														errCh <- err
   291  													}
   292  												}
   293  											}()
   294  										}
   295  										msg := &execinfrapb.ProducerMessage{Data: execinfrapb.ProducerData{RawBytes: buffer.Bytes()}}
   296  										batchesToSend := rng.Intn(65536)
   297  										for i := 0; infiniteBatches || i < batchesToSend; i++ {
   298  											if atomic.LoadUint32(&draining) == 1 {
   299  												break
   300  											}
   301  											quitLoop := false
   302  											select {
   303  											case rpcLayer.client.pmChan <- msg:
   304  											case <-quitSending:
   305  												quitLoop = true
   306  											}
   307  											if quitLoop {
   308  												break
   309  											}
   310  										}
   311  										if err := rpcLayer.client.CloseSend(); err != nil {
   312  											errCh <- err
   313  										}
   314  									}()
   315  									// Use context.Background() because it's separate from the
   316  									// inbox context.
   317  									handleErr := <-handleStream(context.Background(), inbox, rpcLayer.server, func() { close(rpcLayer.server.csChan) })
   318  									close(quitSending)
   319  									wg.Wait()
   320  									errCh <- handleErr
   321  								}()
   322  								return errCh
   323  							},
   324  						},
   325  						{
   326  							name: "Next",
   327  							asyncOperation: func() chan error {
   328  								errCh := make(chan error)
   329  								go func() {
   330  									defer close(errCh)
   331  									if !runNextGoroutine {
   332  										return
   333  									}
   334  									if nextSleep != 0 {
   335  										time.Sleep(nextSleep)
   336  									}
   337  									var (
   338  										done bool
   339  										err  error
   340  									)
   341  									for !done && err == nil {
   342  										err = colexecerror.CatchVectorizedRuntimeError(func() { b := inbox.Next(inboxCtx); done = b.Length() == 0 })
   343  									}
   344  									errCh <- err
   345  								}()
   346  								return errCh
   347  							},
   348  						},
   349  						{
   350  							name: "DrainMeta",
   351  							asyncOperation: func() chan error {
   352  								errCh := make(chan error)
   353  								go func() {
   354  									defer func() {
   355  										inboxCancel()
   356  										close(errCh)
   357  									}()
   358  									// Sleep before checking for whether to run a drain meta
   359  									// goroutine or not, because we want to insert a potential delay
   360  									// before canceling the inbox context in any case.
   361  									if drainMetaSleep != 0 {
   362  										time.Sleep(drainMetaSleep)
   363  									}
   364  									if !runDrainMetaGoroutine {
   365  										return
   366  									}
   367  									_ = inbox.DrainMeta(inboxCtx)
   368  								}()
   369  								return errCh
   370  							},
   371  						},
   372  					}
   373  
   374  					// goroutineIndices will be shuffled around to randomly change the order in
   375  					// which the goroutines are spawned.
   376  					goroutineIndices := make([]int, len(goroutines))
   377  					for i := range goroutineIndices {
   378  						goroutineIndices[i] = i
   379  					}
   380  					rng.Shuffle(len(goroutineIndices), func(i, j int) { goroutineIndices[i], goroutineIndices[j] = goroutineIndices[j], goroutineIndices[i] })
   381  					errChans := make([]chan error, 0, len(goroutines))
   382  					for _, i := range goroutineIndices {
   383  						errChans = append(errChans, goroutines[i].asyncOperation())
   384  					}
   385  
   386  					for i, errCh := range errChans {
   387  						for err := <-errCh; err != nil; err = <-errCh {
   388  							if !testutils.IsError(err, "context canceled|artificial timeout") {
   389  								// Error to keep on draining errors but mark this test as failed.
   390  								t.Errorf("unexpected error %v from %s goroutine", err, goroutines[goroutineIndices[i]].name)
   391  							}
   392  						}
   393  					}
   394  				})
   395  			}
   396  		}
   397  	}
   398  }