github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colflow/colrpc/inbox_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colrpc 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "io" 18 "sync" 19 "sync/atomic" 20 "testing" 21 "time" 22 23 "github.com/cockroachdb/cockroach/pkg/col/coldata" 24 "github.com/cockroachdb/cockroach/pkg/col/coldatatestutils" 25 "github.com/cockroachdb/cockroach/pkg/col/colserde" 26 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 27 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 28 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 29 "github.com/cockroachdb/cockroach/pkg/sql/types" 30 "github.com/cockroachdb/cockroach/pkg/testutils" 31 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 32 "github.com/cockroachdb/cockroach/pkg/util/randutil" 33 "github.com/cockroachdb/errors" 34 "github.com/stretchr/testify/require" 35 ) 36 37 type callbackFlowStreamServer struct { 38 server flowStreamServer 39 sendCb func() 40 recvCb func() 41 } 42 43 func (s callbackFlowStreamServer) Send(cs *execinfrapb.ConsumerSignal) error { 44 if s.sendCb != nil { 45 s.sendCb() 46 } 47 return s.server.Send(cs) 48 } 49 50 func (s callbackFlowStreamServer) Recv() (*execinfrapb.ProducerMessage, error) { 51 if s.recvCb != nil { 52 s.recvCb() 53 } 54 return s.server.Recv() 55 } 56 57 var _ flowStreamServer = callbackFlowStreamServer{} 58 59 func TestInboxCancellation(t *testing.T) { 60 defer leaktest.AfterTest(t)() 61 62 typs := []*types.T{types.Int} 63 t.Run("ReaderWaitingForStreamHandler", func(t *testing.T) { 64 inbox, err := NewInbox(testAllocator, typs, execinfrapb.StreamID(0)) 65 require.NoError(t, err) 66 ctx, cancelFn := context.WithCancel(context.Background()) 67 // Cancel the context. 68 cancelFn() 69 // Next should not block if the context is canceled. 70 err = colexecerror.CatchVectorizedRuntimeError(func() { inbox.Next(ctx) }) 71 require.True(t, testutils.IsError(err, "context canceled"), err) 72 // Now, the remote stream arrives. 73 err = inbox.RunWithStream(context.Background(), mockFlowStreamServer{}) 74 require.True(t, testutils.IsError(err, "while waiting for stream"), err) 75 }) 76 77 t.Run("DuringRecv", func(t *testing.T) { 78 rpcLayer := makeMockFlowStreamRPCLayer() 79 inbox, err := NewInbox(testAllocator, typs, execinfrapb.StreamID(0)) 80 require.NoError(t, err) 81 ctx, cancelFn := context.WithCancel(context.Background()) 82 83 // Setup reader and stream. 84 go func() { 85 inbox.Next(ctx) 86 }() 87 recvCalled := make(chan struct{}) 88 streamHandlerErrCh := handleStream(context.Background(), inbox, callbackFlowStreamServer{ 89 server: rpcLayer.server, 90 recvCb: func() { 91 recvCalled <- struct{}{} 92 }, 93 }, func() { close(rpcLayer.server.csChan) }) 94 95 // Now wait for the Inbox to call Recv on the stream. 96 <-recvCalled 97 98 // Cancel the context. 99 cancelFn() 100 err = <-streamHandlerErrCh 101 require.True(t, testutils.IsError(err, "readerCtx in Inbox stream handler"), err) 102 103 // The mock RPC layer does not unblock the Recv for us on the server side, 104 // so manually send an io.EOF to the reader goroutine. 105 close(rpcLayer.server.pmChan) 106 }) 107 108 t.Run("StreamHandlerWaitingForReader", func(t *testing.T) { 109 rpcLayer := makeMockFlowStreamRPCLayer() 110 inbox, err := NewInbox(testAllocator, typs, execinfrapb.StreamID(0)) 111 require.NoError(t, err) 112 113 ctx, cancelFn := context.WithCancel(context.Background()) 114 115 cancelFn() 116 // A stream arrives but there is no reader. 117 err = <-handleStream(ctx, inbox, rpcLayer.server, func() { close(rpcLayer.client.csChan) }) 118 require.True(t, testutils.IsError(err, "while waiting for reader"), err) 119 }) 120 } 121 122 // TestInboxNextPanicDoesntLeakGoroutines verifies that goroutines that are 123 // spawned as part of an Inbox's normal operation are cleaned up even on a 124 // panic. 125 func TestInboxNextPanicDoesntLeakGoroutines(t *testing.T) { 126 defer leaktest.AfterTest(t)() 127 128 inbox, err := NewInbox(testAllocator, []*types.T{types.Int}, execinfrapb.StreamID(0)) 129 require.NoError(t, err) 130 131 rpcLayer := makeMockFlowStreamRPCLayer() 132 streamHandlerErrCh := handleStream(context.Background(), inbox, rpcLayer.server, func() { close(rpcLayer.client.csChan) }) 133 134 m := &execinfrapb.ProducerMessage{} 135 m.Data.RawBytes = []byte("garbage") 136 137 go func() { 138 _ = rpcLayer.client.Send(m) 139 }() 140 141 // inbox.Next should panic given that the deserializer will encounter garbage 142 // data. 143 require.Panics(t, func() { inbox.Next(context.Background()) }) 144 145 // We require no error from the stream handler as nothing was canceled. The 146 // panic is bubbled up through the Next chain on the Inbox's host. 147 require.NoError(t, <-streamHandlerErrCh) 148 } 149 150 func TestInboxTimeout(t *testing.T) { 151 defer leaktest.AfterTest(t)() 152 153 ctx := context.Background() 154 155 inbox, err := NewInbox(testAllocator, []*types.T{types.Int}, execinfrapb.StreamID(0)) 156 require.NoError(t, err) 157 158 var ( 159 readerErrCh = make(chan error) 160 rpcLayer = makeMockFlowStreamRPCLayer() 161 ) 162 go func() { 163 readerErrCh <- colexecerror.CatchVectorizedRuntimeError(func() { inbox.Next(ctx) }) 164 }() 165 166 // Timeout the inbox. 167 const timeoutErr = "timeout error" 168 inbox.Timeout(errors.New(timeoutErr)) 169 170 // Ensure that the reader gets the error. 171 readerErr := <-readerErrCh 172 require.True(t, testutils.IsError(readerErr, timeoutErr), readerErr) 173 174 // And now the stream arrives. 175 streamHandlerErrCh := handleStream(ctx, inbox, rpcLayer.server, nil /* doneFn */) 176 streamErr := <-streamHandlerErrCh 177 require.True(t, testutils.IsError(streamErr, "stream arrived too late"), streamErr) 178 } 179 180 // TestInboxShutdown is a random test that spawns a goroutine for handling a 181 // FlowStream RPC (setting up an inbound stream, or RunWithStream), a goroutine 182 // to read from an Inbox (Next goroutine), and a goroutine to drain the Inbox 183 // (DrainMeta goroutine). These goroutines race against each other and the 184 // desired state is that everything is cleaned up at the end. Examples of 185 // scenarios that are tested by this test include but are not limited to: 186 // - DrainMeta called before Next and before a stream arrives. 187 // - DrainMeta called concurrently with Next with an active stream. 188 // - A forceful cancellation of Next but no call to DrainMeta. 189 func TestInboxShutdown(t *testing.T) { 190 defer leaktest.AfterTest(t)() 191 192 var ( 193 rng, _ = randutil.NewPseudoRand() 194 // infiniteBatches will influence whether or not we're likely to test a 195 // graceful shutdown (since other shutdown mechanisms might happen before 196 // we reach the end of the data stream). If infiniteBatches is true, 197 // shutdown scenarios in the middle of data processing are always tested. If 198 // false, they sometimes will be. 199 infiniteBatches = rng.Float64() < 0.5 200 drainMetaSleep = time.Millisecond * time.Duration(rng.Intn(10)) 201 nextSleep = time.Millisecond * time.Duration(rng.Intn(10)) 202 runWithStreamSleep = time.Millisecond * time.Duration(rng.Intn(10)) 203 typs = []*types.T{types.Int} 204 batch = coldatatestutils.RandomBatch(testAllocator, rng, typs, coldata.BatchSize(), 0 /* length */, rng.Float64()) 205 ) 206 207 for _, runDrainMetaGoroutine := range []bool{false, true} { 208 for _, runNextGoroutine := range []bool{false, true} { 209 for _, runRunWithStreamGoroutine := range []bool{false, true} { 210 if runDrainMetaGoroutine == false && runNextGoroutine == false && runRunWithStreamGoroutine == true { 211 // This is sort of like a remote node connecting to the inbox, but the 212 // inbox will never be spawned. This is dealt with by another part of 213 // the code (the flow registry times out inbound RPCs if a consumer is 214 // not scheduled in time), so this case is skipped. 215 continue 216 } 217 rpcLayer := makeMockFlowStreamRPCLayer() 218 219 t.Run(fmt.Sprintf( 220 "drain=%t/next=%t/stream=%t/inf=%t", 221 runDrainMetaGoroutine, runNextGoroutine, runRunWithStreamGoroutine, infiniteBatches, 222 ), func(t *testing.T) { 223 inboxCtx, inboxCancel := context.WithCancel(context.Background()) 224 inboxMemAccount := testMemMonitor.MakeBoundAccount() 225 defer inboxMemAccount.Close(inboxCtx) 226 inbox, err := NewInbox( 227 colmem.NewAllocator(inboxCtx, &inboxMemAccount, coldata.StandardColumnFactory), 228 typs, execinfrapb.StreamID(0), 229 ) 230 require.NoError(t, err) 231 c, err := colserde.NewArrowBatchConverter(typs) 232 require.NoError(t, err) 233 r, err := colserde.NewRecordBatchSerializer(typs) 234 require.NoError(t, err) 235 236 goroutines := []struct { 237 name string 238 asyncOperation func() chan error 239 }{ 240 { 241 name: "RunWithStream", 242 asyncOperation: func() chan error { 243 errCh := make(chan error) 244 go func() { 245 var wg sync.WaitGroup 246 defer close(errCh) 247 if runWithStreamSleep != 0 { 248 time.Sleep(runWithStreamSleep) 249 } 250 if !runRunWithStreamGoroutine { 251 // The inbox needs to be timed out. This is called by the inbound 252 // stream code during normal operation. This timeout simulates a 253 // stream not arriving in time. 254 inbox.Timeout(errors.New("artificial timeout")) 255 return 256 } 257 quitSending := make(chan struct{}) 258 wg.Add(1) 259 go func() { 260 defer wg.Done() 261 arrowData, err := c.BatchToArrow(batch) 262 if err != nil { 263 errCh <- err 264 return 265 } 266 var buffer bytes.Buffer 267 _, _, err = r.Serialize(&buffer, arrowData) 268 if err != nil { 269 errCh <- err 270 return 271 } 272 var draining uint32 273 if runDrainMetaGoroutine { 274 // Listen for the drain signal. 275 wg.Add(1) 276 go func() { 277 defer wg.Done() 278 for { 279 cs, err := rpcLayer.client.Recv() 280 if cs != nil && cs.DrainRequest != nil { 281 atomic.StoreUint32(&draining, 1) 282 return 283 } 284 // TODO(asubiotto): Generate some metadata and test 285 // that it is received. 286 if err != nil { 287 if err == io.EOF { 288 return 289 } 290 errCh <- err 291 } 292 } 293 }() 294 } 295 msg := &execinfrapb.ProducerMessage{Data: execinfrapb.ProducerData{RawBytes: buffer.Bytes()}} 296 batchesToSend := rng.Intn(65536) 297 for i := 0; infiniteBatches || i < batchesToSend; i++ { 298 if atomic.LoadUint32(&draining) == 1 { 299 break 300 } 301 quitLoop := false 302 select { 303 case rpcLayer.client.pmChan <- msg: 304 case <-quitSending: 305 quitLoop = true 306 } 307 if quitLoop { 308 break 309 } 310 } 311 if err := rpcLayer.client.CloseSend(); err != nil { 312 errCh <- err 313 } 314 }() 315 // Use context.Background() because it's separate from the 316 // inbox context. 317 handleErr := <-handleStream(context.Background(), inbox, rpcLayer.server, func() { close(rpcLayer.server.csChan) }) 318 close(quitSending) 319 wg.Wait() 320 errCh <- handleErr 321 }() 322 return errCh 323 }, 324 }, 325 { 326 name: "Next", 327 asyncOperation: func() chan error { 328 errCh := make(chan error) 329 go func() { 330 defer close(errCh) 331 if !runNextGoroutine { 332 return 333 } 334 if nextSleep != 0 { 335 time.Sleep(nextSleep) 336 } 337 var ( 338 done bool 339 err error 340 ) 341 for !done && err == nil { 342 err = colexecerror.CatchVectorizedRuntimeError(func() { b := inbox.Next(inboxCtx); done = b.Length() == 0 }) 343 } 344 errCh <- err 345 }() 346 return errCh 347 }, 348 }, 349 { 350 name: "DrainMeta", 351 asyncOperation: func() chan error { 352 errCh := make(chan error) 353 go func() { 354 defer func() { 355 inboxCancel() 356 close(errCh) 357 }() 358 // Sleep before checking for whether to run a drain meta 359 // goroutine or not, because we want to insert a potential delay 360 // before canceling the inbox context in any case. 361 if drainMetaSleep != 0 { 362 time.Sleep(drainMetaSleep) 363 } 364 if !runDrainMetaGoroutine { 365 return 366 } 367 _ = inbox.DrainMeta(inboxCtx) 368 }() 369 return errCh 370 }, 371 }, 372 } 373 374 // goroutineIndices will be shuffled around to randomly change the order in 375 // which the goroutines are spawned. 376 goroutineIndices := make([]int, len(goroutines)) 377 for i := range goroutineIndices { 378 goroutineIndices[i] = i 379 } 380 rng.Shuffle(len(goroutineIndices), func(i, j int) { goroutineIndices[i], goroutineIndices[j] = goroutineIndices[j], goroutineIndices[i] }) 381 errChans := make([]chan error, 0, len(goroutines)) 382 for _, i := range goroutineIndices { 383 errChans = append(errChans, goroutines[i].asyncOperation()) 384 } 385 386 for i, errCh := range errChans { 387 for err := <-errCh; err != nil; err = <-errCh { 388 if !testutils.IsError(err, "context canceled|artificial timeout") { 389 // Error to keep on draining errors but mark this test as failed. 390 t.Errorf("unexpected error %v from %s goroutine", err, goroutines[goroutineIndices[i]].name) 391 } 392 } 393 } 394 }) 395 } 396 } 397 } 398 }