github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/flowinfra/outbox_test.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package flowinfra 12 13 import ( 14 "context" 15 "fmt" 16 "io" 17 "net" 18 "sync" 19 "testing" 20 "time" 21 22 "github.com/cockroachdb/cockroach/pkg/roachpb" 23 "github.com/cockroachdb/cockroach/pkg/rpc" 24 "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" 25 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 26 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 27 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 28 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 29 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 30 "github.com/cockroachdb/cockroach/pkg/sql/types" 31 "github.com/cockroachdb/cockroach/pkg/testutils" 32 "github.com/cockroachdb/cockroach/pkg/util/hlc" 33 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 34 "github.com/cockroachdb/cockroach/pkg/util/stop" 35 "github.com/cockroachdb/cockroach/pkg/util/uuid" 36 "github.com/cockroachdb/errors" 37 ) 38 39 // staticAddressResolver maps StaticNodeID to the given address. 40 func staticAddressResolver(addr net.Addr) nodedialer.AddressResolver { 41 return func(nodeID roachpb.NodeID) (net.Addr, error) { 42 if nodeID == execinfra.StaticNodeID { 43 return addr, nil 44 } 45 return nil, errors.Errorf("node %d not found", nodeID) 46 } 47 } 48 49 func TestOutbox(t *testing.T) { 50 defer leaktest.AfterTest(t)() 51 52 // Create a mock server that the outbox will connect and push rows to. 53 stopper := stop.NewStopper() 54 defer stopper.Stop(context.Background()) 55 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 56 clusterID, mockServer, addr, err := execinfrapb.StartMockDistSQLServer(clock, stopper, execinfra.StaticNodeID) 57 if err != nil { 58 t.Fatal(err) 59 } 60 st := cluster.MakeTestingClusterSettings() 61 evalCtx := tree.MakeTestingEvalContext(st) 62 defer evalCtx.Stop(context.Background()) 63 64 clientRPC := rpc.NewInsecureTestingContextWithClusterID(clock, stopper, clusterID) 65 flowCtx := execinfra.FlowCtx{ 66 EvalCtx: &evalCtx, 67 Cfg: &execinfra.ServerConfig{ 68 Settings: st, 69 Stopper: stopper, 70 NodeDialer: nodedialer.New(clientRPC, staticAddressResolver(addr)), 71 }, 72 } 73 flowID := execinfrapb.FlowID{UUID: uuid.MakeV4()} 74 streamID := execinfrapb.StreamID(42) 75 outbox := NewOutbox(&flowCtx, execinfra.StaticNodeID, flowID, streamID) 76 outbox.Init(sqlbase.OneIntCol) 77 var outboxWG sync.WaitGroup 78 ctx, cancel := context.WithCancel(context.Background()) 79 defer cancel() 80 // Start the outbox. This should cause the stream to connect, even though 81 // we're not sending any rows. 82 outbox.Start(ctx, &outboxWG, cancel) 83 84 // Start a producer. It will send one row 0, then send rows -1 until a drain 85 // request is observed, then send row 2 and some metadata. 86 producerC := make(chan error) 87 go func() { 88 producerC <- func() error { 89 row := sqlbase.EncDatumRow{ 90 sqlbase.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(0))), 91 } 92 if consumerStatus := outbox.Push(row, nil /* meta */); consumerStatus != execinfra.NeedMoreRows { 93 return errors.Errorf("expected status: %d, got: %d", execinfra.NeedMoreRows, consumerStatus) 94 } 95 96 // Send rows until the drain request is observed. 97 for { 98 row = sqlbase.EncDatumRow{ 99 sqlbase.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(-1))), 100 } 101 consumerStatus := outbox.Push(row, nil /* meta */) 102 if consumerStatus == execinfra.DrainRequested { 103 break 104 } 105 if consumerStatus == execinfra.ConsumerClosed { 106 return errors.Errorf("consumer closed prematurely") 107 } 108 } 109 110 // Now send another row that the outbox will discard. 111 row = sqlbase.EncDatumRow{sqlbase.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(2)))} 112 if consumerStatus := outbox.Push(row, nil /* meta */); consumerStatus != execinfra.DrainRequested { 113 return errors.Errorf("expected status: %d, got: %d", execinfra.NeedMoreRows, consumerStatus) 114 } 115 116 // Send some metadata. 117 outbox.Push(nil /* row */, &execinfrapb.ProducerMetadata{Err: errors.Errorf("meta 0")}) 118 outbox.Push(nil /* row */, &execinfrapb.ProducerMetadata{Err: errors.Errorf("meta 1")}) 119 // Send the termination signal. 120 outbox.ProducerDone() 121 122 return nil 123 }() 124 }() 125 126 // Wait for the outbox to connect the stream. 127 streamNotification := <-mockServer.InboundStreams 128 serverStream := streamNotification.Stream 129 130 // Consume everything that the outbox sends on the stream. 131 var decoder StreamDecoder 132 var rows sqlbase.EncDatumRows 133 var metas []execinfrapb.ProducerMetadata 134 drainSignalSent := false 135 for { 136 msg, err := serverStream.Recv() 137 if err != nil { 138 if err == io.EOF { 139 break 140 } 141 t.Fatal(err) 142 } 143 err = decoder.AddMessage(context.Background(), msg) 144 if err != nil { 145 t.Fatal(err) 146 } 147 rows, metas = testGetDecodedRows(t, &decoder, rows, metas) 148 // Eliminate the "-1" rows, that were sent before the producer found out 149 // about the draining. 150 last := -1 151 for i := 0; i < len(rows); i++ { 152 if rows[i].String(sqlbase.OneIntCol) != "[-1]" { 153 last = i 154 continue 155 } 156 for j := i; j < len(rows); j++ { 157 if rows[j].String(sqlbase.OneIntCol) == "[-1]" { 158 continue 159 } 160 rows[i] = rows[j] 161 i = j 162 last = j 163 break 164 } 165 } 166 rows = rows[0 : last+1] 167 168 // After we receive one row, we're going to ask the producer to drain. 169 if !drainSignalSent && len(rows) > 0 { 170 sig := execinfrapb.ConsumerSignal{DrainRequest: &execinfrapb.DrainRequest{}} 171 if err := serverStream.Send(&sig); err != nil { 172 t.Fatal(err) 173 } 174 drainSignalSent = true 175 } 176 } 177 if err := <-producerC; err != nil { 178 t.Fatalf("%+v", err) 179 } 180 181 if len(metas) != 2 { 182 t.Fatalf("expected 2 metadata records, got: %d", len(metas)) 183 } 184 for i, m := range metas { 185 expectedStr := fmt.Sprintf("meta %d", i) 186 if !testutils.IsError(m.Err, expectedStr) { 187 t.Fatalf("expected: %q, got: %q", expectedStr, m.Err.Error()) 188 } 189 } 190 str := rows.String(sqlbase.OneIntCol) 191 expected := "[[0]]" 192 if str != expected { 193 t.Errorf("invalid results: %s, expected %s'", str, expected) 194 } 195 196 // The outbox should shut down since the producer closed. 197 outboxWG.Wait() 198 // Signal the server to shut down the stream. 199 streamNotification.Donec <- nil 200 } 201 202 // Test that an outbox connects its stream as soon as possible (i.e. before 203 // receiving any rows). This is important, since there's a timeout on waiting on 204 // the server-side for the streams to be connected. 205 func TestOutboxInitializesStreamBeforeReceivingAnyRows(t *testing.T) { 206 defer leaktest.AfterTest(t)() 207 208 stopper := stop.NewStopper() 209 defer stopper.Stop(context.Background()) 210 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 211 clusterID, mockServer, addr, err := execinfrapb.StartMockDistSQLServer(clock, stopper, execinfra.StaticNodeID) 212 if err != nil { 213 t.Fatal(err) 214 } 215 216 st := cluster.MakeTestingClusterSettings() 217 evalCtx := tree.MakeTestingEvalContext(st) 218 defer evalCtx.Stop(context.Background()) 219 220 clientRPC := rpc.NewInsecureTestingContextWithClusterID(clock, stopper, clusterID) 221 flowCtx := execinfra.FlowCtx{ 222 EvalCtx: &evalCtx, 223 Cfg: &execinfra.ServerConfig{ 224 Settings: st, 225 Stopper: stopper, 226 NodeDialer: nodedialer.New(clientRPC, staticAddressResolver(addr)), 227 }, 228 } 229 flowID := execinfrapb.FlowID{UUID: uuid.MakeV4()} 230 streamID := execinfrapb.StreamID(42) 231 outbox := NewOutbox(&flowCtx, execinfra.StaticNodeID, flowID, streamID) 232 233 var outboxWG sync.WaitGroup 234 ctx, cancel := context.WithCancel(context.Background()) 235 defer cancel() 236 outbox.Init(sqlbase.OneIntCol) 237 // Start the outbox. This should cause the stream to connect, even though 238 // we're not sending any rows. 239 outbox.Start(ctx, &outboxWG, cancel) 240 241 streamNotification := <-mockServer.InboundStreams 242 serverStream := streamNotification.Stream 243 producerMsg, err := serverStream.Recv() 244 if err != nil { 245 t.Fatal(err) 246 } 247 if producerMsg.Header == nil { 248 t.Fatal("missing header") 249 } 250 if producerMsg.Header.FlowID != flowID || producerMsg.Header.StreamID != streamID { 251 t.Fatalf("wrong header: %v", producerMsg) 252 } 253 254 // Signal the server to shut down the stream. This should also prompt the 255 // outbox (the client) to terminate its loop. 256 streamNotification.Donec <- nil 257 outboxWG.Wait() 258 } 259 260 // Test that the outbox responds to the consumer shutting down in an unexpected 261 // way by closing. 262 func TestOutboxClosesWhenConsumerCloses(t *testing.T) { 263 defer leaktest.AfterTest(t)() 264 265 testCases := []struct { 266 // When set, the outbox will establish the stream with a FlowRpc call. When 267 // not set, the consumer will establish the stream with RunSyncFlow. 268 outboxIsClient bool 269 // Only takes effect with outboxIsClient is set. When set, the consumer 270 // (i.e. the server) returns an error from RunSyncFlow. This error will be 271 // translated into a grpc error received by the client (i.e. the outbox) in 272 // its stream.Recv()) call. Otherwise, the client doesn't return an error 273 // (and the outbox should receive io.EOF). 274 serverReturnsError bool 275 }{ 276 {outboxIsClient: true, serverReturnsError: false}, 277 {outboxIsClient: true, serverReturnsError: true}, 278 {outboxIsClient: false}, 279 } 280 for _, tc := range testCases { 281 t.Run("", func(t *testing.T) { 282 stopper := stop.NewStopper() 283 defer stopper.Stop(context.Background()) 284 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 285 clusterID, mockServer, addr, err := execinfrapb.StartMockDistSQLServer(clock, stopper, execinfra.StaticNodeID) 286 if err != nil { 287 t.Fatal(err) 288 } 289 290 st := cluster.MakeTestingClusterSettings() 291 evalCtx := tree.MakeTestingEvalContext(st) 292 defer evalCtx.Stop(context.Background()) 293 294 clientRPC := rpc.NewInsecureTestingContextWithClusterID(clock, stopper, clusterID) 295 flowCtx := execinfra.FlowCtx{ 296 EvalCtx: &evalCtx, 297 Cfg: &execinfra.ServerConfig{ 298 Settings: st, 299 Stopper: stopper, 300 NodeDialer: nodedialer.New(clientRPC, staticAddressResolver(addr)), 301 }, 302 } 303 flowID := execinfrapb.FlowID{UUID: uuid.MakeV4()} 304 streamID := execinfrapb.StreamID(42) 305 var outbox *Outbox 306 var wg sync.WaitGroup 307 var expectedErr error 308 consumerReceivedMsg := make(chan struct{}) 309 ctx, cancel := context.WithCancel(context.Background()) 310 defer cancel() 311 if tc.outboxIsClient { 312 outbox = NewOutbox(&flowCtx, execinfra.StaticNodeID, flowID, streamID) 313 outbox.Init(sqlbase.OneIntCol) 314 outbox.Start(ctx, &wg, cancel) 315 316 // Wait for the outbox to connect the stream. 317 streamNotification := <-mockServer.InboundStreams 318 // Wait for the consumer to receive the header message that the outbox 319 // sends on start. If we don't wait, the consumer returning from the 320 // FlowStream() RPC races with the outbox sending the header msg and the 321 // send might get an io.EOF error. 322 if _, err := streamNotification.Stream.Recv(); err != nil { 323 t.Errorf("expected err: %q, got %v", expectedErr, err) 324 } 325 326 // Have the server return from the FlowStream call. This should prompt the 327 // outbox to finish. 328 if tc.serverReturnsError { 329 expectedErr = errors.Errorf("FlowStream server error") 330 } else { 331 expectedErr = nil 332 } 333 streamNotification.Donec <- expectedErr 334 } else { 335 // We're going to perform a RunSyncFlow call and then have the client 336 // cancel the call's context. 337 conn, err := flowCtx.Cfg.NodeDialer.Dial(ctx, execinfra.StaticNodeID, rpc.DefaultClass) 338 if err != nil { 339 t.Fatal(err) 340 } 341 client := execinfrapb.NewDistSQLClient(conn) 342 var outStream execinfrapb.DistSQL_RunSyncFlowClient 343 ctx, cancel := context.WithCancel(context.Background()) 344 defer cancel() 345 expectedErr = errors.Errorf("context canceled") 346 go func() { 347 outStream, err = client.RunSyncFlow(ctx) 348 if err != nil { 349 t.Error(err) 350 } 351 // Check that Recv() receives an error once the context is canceled. 352 // Perhaps this is not terribly important to test; one can argue that 353 // the client should either not be Recv()ing after it canceled the 354 // ctx or that it otherwise should otherwise be aware of the 355 // cancellation when processing the results, but I've put it here 356 // because bidi streams are confusing and this provides some 357 // information. 358 for { 359 _, err := outStream.Recv() 360 if err == nil { 361 consumerReceivedMsg <- struct{}{} 362 continue 363 } 364 if !testutils.IsError(err, expectedErr.Error()) { 365 t.Errorf("expected err: %q, got %v", expectedErr, err) 366 } 367 break 368 } 369 }() 370 // Wait for the consumer to connect. 371 call := <-mockServer.RunSyncFlowCalls 372 outbox = NewOutboxSyncFlowStream(call.Stream) 373 outbox.SetFlowCtx(&execinfra.FlowCtx{ 374 Cfg: &execinfra.ServerConfig{ 375 Settings: cluster.MakeTestingClusterSettings(), 376 Stopper: stopper, 377 }, 378 }) 379 outbox.Init(sqlbase.OneIntCol) 380 // In a RunSyncFlow call, the outbox runs under the call's context. 381 outbox.Start(call.Stream.Context(), &wg, cancel) 382 // Wait for the consumer to receive the header message that the outbox 383 // sends on start. If we don't wait, the context cancellation races with 384 // the outbox sending the header msg; if the cancellation makes it to 385 // the outbox right as the outbox is trying to send the header, the 386 // outbox might finish with a "the stream has been done" error instead 387 // of "context canceled". 388 <-consumerReceivedMsg 389 // cancel the RPC's context. This is how a streaming RPC client can inform 390 // the server that it's done. We expect the outbox to finish. 391 cancel() 392 defer func() { 393 // Allow the RunSyncFlow RPC to finish. 394 call.Donec <- nil 395 }() 396 } 397 398 wg.Wait() 399 if expectedErr == nil { 400 if outbox.err != nil { 401 t.Fatalf("unexpected outbox.err: %s", outbox.err) 402 } 403 } else { 404 // We use error string comparison because we actually expect a grpc 405 // error wrapping the expected error. 406 if !testutils.IsError(outbox.err, expectedErr.Error()) { 407 t.Fatalf("expected err: %q, got %v", expectedErr, outbox.err) 408 } 409 } 410 }) 411 } 412 } 413 414 // Test Outbox cancels flow context when FlowStream returns a non-nil error. 415 func TestOutboxCancelsFlowOnError(t *testing.T) { 416 defer leaktest.AfterTest(t)() 417 418 stopper := stop.NewStopper() 419 defer stopper.Stop(context.Background()) 420 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 421 clusterID, mockServer, addr, err := execinfrapb.StartMockDistSQLServer(clock, stopper, execinfra.StaticNodeID) 422 if err != nil { 423 t.Fatal(err) 424 } 425 426 st := cluster.MakeTestingClusterSettings() 427 evalCtx := tree.MakeTestingEvalContext(st) 428 defer evalCtx.Stop(context.Background()) 429 430 clientRPC := rpc.NewInsecureTestingContextWithClusterID(clock, stopper, clusterID) 431 flowCtx := execinfra.FlowCtx{ 432 EvalCtx: &evalCtx, 433 Cfg: &execinfra.ServerConfig{ 434 Settings: st, 435 Stopper: stopper, 436 NodeDialer: nodedialer.New(clientRPC, staticAddressResolver(addr)), 437 }, 438 } 439 flowID := execinfrapb.FlowID{UUID: uuid.MakeV4()} 440 streamID := execinfrapb.StreamID(42) 441 var outbox *Outbox 442 var wg sync.WaitGroup 443 ctx, cancel := context.WithCancel(context.Background()) 444 defer cancel() 445 446 // We could test this on ctx.cancel(), but this mock 447 // cancellation method is simpler. 448 ctxCanceled := false 449 mockCancel := func() { 450 ctxCanceled = true 451 } 452 453 outbox = NewOutbox(&flowCtx, execinfra.StaticNodeID, flowID, streamID) 454 outbox.Init(sqlbase.OneIntCol) 455 outbox.Start(ctx, &wg, mockCancel) 456 457 // Wait for the outbox to connect the stream. 458 streamNotification := <-mockServer.InboundStreams 459 if _, err := streamNotification.Stream.Recv(); err != nil { 460 t.Fatal(err) 461 } 462 463 streamNotification.Donec <- sqlbase.QueryCanceledError 464 465 wg.Wait() 466 if !ctxCanceled { 467 t.Fatal("flow ctx was not canceled") 468 } 469 } 470 471 // Test that the outbox unblocks its producers if it fails to connect during 472 // startup. 473 func TestOutboxUnblocksProducers(t *testing.T) { 474 defer leaktest.AfterTest(t)() 475 476 stopper := stop.NewStopper() 477 ctx := context.Background() 478 defer stopper.Stop(ctx) 479 480 st := cluster.MakeTestingClusterSettings() 481 evalCtx := tree.MakeTestingEvalContext(st) 482 defer evalCtx.Stop(ctx) 483 flowCtx := execinfra.FlowCtx{ 484 EvalCtx: &evalCtx, 485 Cfg: &execinfra.ServerConfig{ 486 Settings: st, 487 Stopper: stopper, 488 // a nil nodeDialer will always fail to connect. 489 NodeDialer: nil, 490 }, 491 } 492 flowID := execinfrapb.FlowID{UUID: uuid.MakeV4()} 493 streamID := execinfrapb.StreamID(42) 494 var outbox *Outbox 495 var wg sync.WaitGroup 496 ctx, cancel := context.WithCancel(ctx) 497 defer cancel() 498 499 outbox = NewOutbox(&flowCtx, execinfra.StaticNodeID, flowID, streamID) 500 outbox.Init(sqlbase.OneIntCol) 501 502 // Fill up the outbox. 503 for i := 0; i < outboxBufRows; i++ { 504 outbox.Push(nil, &execinfrapb.ProducerMetadata{}) 505 } 506 507 var blockedPusherWg sync.WaitGroup 508 blockedPusherWg.Add(1) 509 go func() { 510 // Push to the outbox one last time, which will block since the channel 511 // is full. 512 outbox.Push(nil, &execinfrapb.ProducerMetadata{}) 513 // We should become unblocked once outbox.Start fails. 514 blockedPusherWg.Done() 515 }() 516 517 // This outbox will fail to connect, because it has a nil nodeDialer. 518 outbox.Start(ctx, &wg, cancel) 519 520 wg.Wait() 521 // Also, make sure that pushing to the outbox after its failed shows that 522 // it's been correctly ConsumerClosed. 523 status := outbox.Push(nil, &execinfrapb.ProducerMetadata{}) 524 if status != execinfra.ConsumerClosed { 525 t.Fatalf("expected status=ConsumerClosed, got %s", status) 526 } 527 528 blockedPusherWg.Wait() 529 } 530 531 func BenchmarkOutbox(b *testing.B) { 532 defer leaktest.AfterTest(b)() 533 534 // Create a mock server that the outbox will connect and push rows to. 535 stopper := stop.NewStopper() 536 defer stopper.Stop(context.Background()) 537 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 538 clusterID, mockServer, addr, err := execinfrapb.StartMockDistSQLServer(clock, stopper, execinfra.StaticNodeID) 539 if err != nil { 540 b.Fatal(err) 541 } 542 st := cluster.MakeTestingClusterSettings() 543 for _, numCols := range []int{1, 2, 4, 8} { 544 row := sqlbase.EncDatumRow{} 545 for i := 0; i < numCols; i++ { 546 row = append(row, sqlbase.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(2)))) 547 } 548 b.Run(fmt.Sprintf("numCols=%d", numCols), func(b *testing.B) { 549 flowID := execinfrapb.FlowID{UUID: uuid.MakeV4()} 550 streamID := execinfrapb.StreamID(42) 551 evalCtx := tree.MakeTestingEvalContext(st) 552 defer evalCtx.Stop(context.Background()) 553 554 clientRPC := rpc.NewInsecureTestingContextWithClusterID(clock, stopper, clusterID) 555 flowCtx := execinfra.FlowCtx{ 556 EvalCtx: &evalCtx, 557 Cfg: &execinfra.ServerConfig{ 558 Settings: st, 559 Stopper: stopper, 560 NodeDialer: nodedialer.New(clientRPC, staticAddressResolver(addr)), 561 }, 562 } 563 outbox := NewOutbox(&flowCtx, execinfra.StaticNodeID, flowID, streamID) 564 outbox.Init(sqlbase.MakeIntCols(numCols)) 565 var outboxWG sync.WaitGroup 566 ctx, cancel := context.WithCancel(context.Background()) 567 defer cancel() 568 // Start the outbox. This should cause the stream to connect, even though 569 // we're not sending any rows. 570 outbox.Start(ctx, &outboxWG, cancel) 571 572 // Wait for the outbox to connect the stream. 573 streamNotification := <-mockServer.InboundStreams 574 serverStream := streamNotification.Stream 575 go func() { 576 for { 577 _, err := serverStream.Recv() 578 if err != nil { 579 break 580 } 581 } 582 }() 583 584 b.SetBytes(int64(numCols * 8)) 585 for i := 0; i < b.N; i++ { 586 if err := outbox.addRow(ctx, row, nil); err != nil { 587 b.Fatal(err) 588 } 589 } 590 outbox.ProducerDone() 591 outboxWG.Wait() 592 streamNotification.Donec <- nil 593 }) 594 } 595 }