github.com/yous1230/fabric@v2.0.0-beta.0.20191224111736-74345bee6ac2+incompatible/orderer/common/cluster/comm_test.go (about) 1 /* 2 Copyright IBM Corp. 2017 All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package cluster_test 8 9 import ( 10 "context" 11 "crypto/rand" 12 "crypto/x509" 13 "fmt" 14 "net" 15 "strings" 16 "sync" 17 "sync/atomic" 18 "testing" 19 "time" 20 21 "github.com/golang/protobuf/proto" 22 "github.com/hyperledger/fabric-protos-go/common" 23 "github.com/hyperledger/fabric-protos-go/orderer" 24 "github.com/hyperledger/fabric/common/crypto/tlsgen" 25 "github.com/hyperledger/fabric/common/flogging" 26 "github.com/hyperledger/fabric/common/metrics" 27 "github.com/hyperledger/fabric/common/metrics/disabled" 28 "github.com/hyperledger/fabric/common/metrics/metricsfakes" 29 comm_utils "github.com/hyperledger/fabric/core/comm" 30 "github.com/hyperledger/fabric/orderer/common/cluster" 31 "github.com/hyperledger/fabric/orderer/common/cluster/mocks" 32 "github.com/onsi/gomega" 33 "github.com/pkg/errors" 34 "github.com/stretchr/testify/assert" 35 "github.com/stretchr/testify/mock" 36 "go.uber.org/zap" 37 "go.uber.org/zap/zapcore" 38 "google.golang.org/grpc" 39 ) 40 41 const ( 42 testChannel = "test" 43 testChannel2 = "test2" 44 timeout = time.Second * 10 45 ) 46 47 var ( 48 // CA that generates TLS key-pairs. 49 // We use only one CA because the authentication 50 // is based on TLS pinning 51 ca = createCAOrPanic() 52 53 lastNodeID uint64 54 55 testSubReq = &orderer.SubmitRequest{ 56 Channel: "test", 57 } 58 59 testReq = &orderer.SubmitRequest{ 60 Channel: "test", 61 Payload: &common.Envelope{ 62 Payload: []byte("test"), 63 }, 64 } 65 66 testReq2 = &orderer.SubmitRequest{ 67 Channel: testChannel2, 68 Payload: &common.Envelope{ 69 Payload: []byte(testChannel2), 70 }, 71 } 72 73 testRes = &orderer.SubmitResponse{ 74 Info: "test", 75 } 76 77 fooReq = wrapSubmitReq(&orderer.SubmitRequest{ 78 Channel: "foo", 79 }) 80 81 fooRes = &orderer.SubmitResponse{ 82 Info: "foo", 83 } 84 85 barReq = wrapSubmitReq(&orderer.SubmitRequest{ 86 Channel: "bar", 87 }) 88 89 barRes = &orderer.SubmitResponse{ 90 Info: "bar", 91 } 92 93 testConsensusReq = &orderer.StepRequest{ 94 Payload: &orderer.StepRequest_ConsensusRequest{ 95 ConsensusRequest: &orderer.ConsensusRequest{ 96 Payload: []byte{1, 2, 3}, 97 Channel: testChannel, 98 }, 99 }, 100 } 101 102 channelExtractor = &mockChannelExtractor{} 103 ) 104 105 func nextUnusedID() uint64 { 106 return atomic.AddUint64(&lastNodeID, 1) 107 } 108 109 func createCAOrPanic() tlsgen.CA { 110 ca, err := tlsgen.NewCA() 111 if err != nil { 112 panic(fmt.Sprintf("failed creating CA: %+v", err)) 113 } 114 return ca 115 } 116 117 type mockChannelExtractor struct{} 118 119 func (*mockChannelExtractor) TargetChannel(msg proto.Message) string { 120 switch req := msg.(type) { 121 case *orderer.ConsensusRequest: 122 return req.Channel 123 case *orderer.SubmitRequest: 124 return req.Channel 125 default: 126 return "" 127 } 128 } 129 130 type clusterNode struct { 131 lock sync.Mutex 132 frozen bool 133 freezeCond sync.Cond 134 dialer *cluster.PredicateDialer 135 handler *mocks.Handler 136 nodeInfo cluster.RemoteNode 137 srv *comm_utils.GRPCServer 138 bindAddress string 139 clientConfig comm_utils.ClientConfig 140 serverConfig comm_utils.ServerConfig 141 c *cluster.Comm 142 } 143 144 func (cn *clusterNode) Step(stream orderer.Cluster_StepServer) error { 145 cn.waitIfFrozen() 146 req, err := stream.Recv() 147 if err != nil { 148 return err 149 } 150 if submitReq := req.GetSubmitRequest(); submitReq != nil { 151 return cn.c.DispatchSubmit(stream.Context(), submitReq) 152 } 153 if err := cn.c.DispatchConsensus(stream.Context(), req.GetConsensusRequest()); err != nil { 154 return err 155 } 156 return stream.Send(&orderer.StepResponse{}) 157 } 158 159 func (cn *clusterNode) waitIfFrozen() { 160 cn.lock.Lock() 161 // There is no freeze after an unfreeze so no need 162 // for a for loop. 163 if cn.frozen { 164 cn.freezeCond.Wait() 165 return 166 } 167 cn.lock.Unlock() 168 } 169 170 func (cn *clusterNode) freeze() { 171 cn.lock.Lock() 172 defer cn.lock.Unlock() 173 cn.frozen = true 174 } 175 176 func (cn *clusterNode) unfreeze() { 177 cn.lock.Lock() 178 cn.frozen = false 179 cn.lock.Unlock() 180 cn.freezeCond.Broadcast() 181 } 182 183 func (cn *clusterNode) resurrect() { 184 gRPCServer, err := comm_utils.NewGRPCServer(cn.bindAddress, cn.serverConfig) 185 if err != nil { 186 panic(fmt.Errorf("failed starting gRPC server: %v", err)) 187 } 188 cn.srv = gRPCServer 189 orderer.RegisterClusterServer(gRPCServer.Server(), cn) 190 go cn.srv.Start() 191 } 192 193 func (cn *clusterNode) stop() { 194 cn.srv.Stop() 195 cn.c.Shutdown() 196 } 197 198 func (cn *clusterNode) renewCertificates() { 199 clientKeyPair, err := ca.NewClientCertKeyPair() 200 if err != nil { 201 panic(fmt.Errorf("failed creating client certificate %v", err)) 202 } 203 serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1") 204 if err != nil { 205 panic(fmt.Errorf("failed creating server certificate %v", err)) 206 } 207 208 cn.nodeInfo.ClientTLSCert = clientKeyPair.TLSCert.Raw 209 cn.nodeInfo.ServerTLSCert = serverKeyPair.TLSCert.Raw 210 211 cn.serverConfig.SecOpts.Certificate = serverKeyPair.Cert 212 cn.serverConfig.SecOpts.Key = serverKeyPair.Key 213 214 cn.dialer.Config.SecOpts.Key = clientKeyPair.Key 215 cn.dialer.Config.SecOpts.Certificate = clientKeyPair.Cert 216 } 217 218 func newTestNodeWithMetrics(t *testing.T, metrics cluster.MetricsProvider, tlsConnGauge metrics.Gauge) *clusterNode { 219 serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1") 220 assert.NoError(t, err) 221 222 clientKeyPair, _ := ca.NewClientCertKeyPair() 223 224 handler := &mocks.Handler{} 225 clientConfig := comm_utils.ClientConfig{ 226 AsyncConnect: true, 227 Timeout: time.Hour, 228 SecOpts: comm_utils.SecureOptions{ 229 RequireClientCert: true, 230 Key: clientKeyPair.Key, 231 Certificate: clientKeyPair.Cert, 232 ServerRootCAs: [][]byte{ca.CertBytes()}, 233 UseTLS: true, 234 ClientRootCAs: [][]byte{ca.CertBytes()}, 235 }, 236 } 237 238 dialer := &cluster.PredicateDialer{ 239 Config: clientConfig, 240 } 241 242 srvConfig := comm_utils.ServerConfig{ 243 SecOpts: comm_utils.SecureOptions{ 244 Key: serverKeyPair.Key, 245 Certificate: serverKeyPair.Cert, 246 UseTLS: true, 247 }, 248 } 249 gRPCServer, err := comm_utils.NewGRPCServer("127.0.0.1:", srvConfig) 250 assert.NoError(t, err) 251 252 tstSrv := &clusterNode{ 253 dialer: dialer, 254 clientConfig: clientConfig, 255 serverConfig: srvConfig, 256 bindAddress: gRPCServer.Address(), 257 handler: handler, 258 nodeInfo: cluster.RemoteNode{ 259 Endpoint: gRPCServer.Address(), 260 ID: nextUnusedID(), 261 ServerTLSCert: serverKeyPair.TLSCert.Raw, 262 ClientTLSCert: clientKeyPair.TLSCert.Raw, 263 }, 264 srv: gRPCServer, 265 } 266 267 tstSrv.freezeCond.L = &tstSrv.lock 268 269 tstSrv.c = &cluster.Comm{ 270 CertExpWarningThreshold: time.Hour, 271 SendBufferSize: 1, 272 Logger: flogging.MustGetLogger("test"), 273 Chan2Members: make(cluster.MembersByChannel), 274 H: handler, 275 ChanExt: channelExtractor, 276 Connections: cluster.NewConnectionStore(dialer, tlsConnGauge), 277 Metrics: cluster.NewMetrics(metrics), 278 } 279 280 orderer.RegisterClusterServer(gRPCServer.Server(), tstSrv) 281 go gRPCServer.Start() 282 return tstSrv 283 } 284 285 func newTestNode(t *testing.T) *clusterNode { 286 return newTestNodeWithMetrics(t, &disabled.Provider{}, &disabled.Gauge{}) 287 } 288 289 func TestSendBigMessage(t *testing.T) { 290 t.Parallel() 291 292 // Scenario: Basic test that spawns 5 nodes and sends a big message 293 // from one of the nodes to the others. 294 // A receiver node's Step() server side method (which calls Recv) 295 // is frozen until the sender's node Send method returns, 296 // Hence - the sender node finishes calling Send 297 // before a receiver node starts calling Recv. 298 // This ensures that Send is non blocking even with big messages. 299 // In the test, we send a total of 8MB of random data (2MB to each node). 300 // The randomness is used so gRPC compression won't compress it to a lower size. 301 302 node1 := newTestNode(t) 303 node2 := newTestNode(t) 304 node3 := newTestNode(t) 305 node4 := newTestNode(t) 306 node5 := newTestNode(t) 307 308 for _, node := range []*clusterNode{node2, node3, node4, node5} { 309 node.c.SendBufferSize = 1 310 } 311 312 defer node1.stop() 313 defer node2.stop() 314 defer node3.stop() 315 defer node4.stop() 316 defer node5.stop() 317 318 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo, node3.nodeInfo, node4.nodeInfo, node5.nodeInfo} 319 node1.c.Configure(testChannel, config) 320 node2.c.Configure(testChannel, config) 321 node3.c.Configure(testChannel, config) 322 node4.c.Configure(testChannel, config) 323 node5.c.Configure(testChannel, config) 324 325 var messageReceived sync.WaitGroup 326 messageReceived.Add(4) 327 328 msgSize := 1024 * 1024 * 2 329 bigMsg := &orderer.ConsensusRequest{ 330 Channel: testChannel, 331 Payload: make([]byte, msgSize), 332 } 333 334 _, err := rand.Read(bigMsg.Payload) 335 assert.NoError(t, err) 336 337 wrappedMsg := &orderer.StepRequest{ 338 Payload: &orderer.StepRequest_ConsensusRequest{ 339 ConsensusRequest: bigMsg, 340 }, 341 } 342 343 for _, node := range []*clusterNode{node2, node3, node4, node5} { 344 node.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 345 msg := args.Get(2).(*orderer.ConsensusRequest) 346 assert.Len(t, msg.Payload, msgSize) 347 messageReceived.Done() 348 }).Return(nil) 349 } 350 351 streams := map[uint64]*cluster.Stream{} 352 353 for _, node := range []*clusterNode{node2, node3, node4, node5} { 354 // Freeze the node, in order to block its Recv 355 node.freeze() 356 } 357 358 for _, node := range []*clusterNode{node2, node3, node4, node5} { 359 rm, err := node1.c.Remote(testChannel, node.nodeInfo.ID) 360 assert.NoError(t, err) 361 362 stream := assertEventualEstablishStream(t, rm) 363 streams[node.nodeInfo.ID] = stream 364 } 365 366 t0 := time.Now() 367 for _, node := range []*clusterNode{node2, node3, node4, node5} { 368 stream := streams[node.nodeInfo.ID] 369 370 t1 := time.Now() 371 err = stream.Send(wrappedMsg) 372 assert.NoError(t, err) 373 t.Log("Sending took", time.Since(t1)) 374 t1 = time.Now() 375 376 // Unfreeze the node. It can now call Recv, and signal the messageReceived waitGroup. 377 node.unfreeze() 378 } 379 380 t.Log("Total sending time to all 4 nodes took:", time.Since(t0)) 381 382 messageReceived.Wait() 383 } 384 385 func TestBlockingSend(t *testing.T) { 386 t.Parallel() 387 // Scenario: Basic test that spawns 2 nodes and sends from the first node 388 // to the second node, three SubmitRequests, or three consensus requests. 389 // SubmitRequests should block, but consensus requests should not. 390 391 for _, testCase := range []struct { 392 description string 393 messageToSend *orderer.StepRequest 394 streamUnblocks bool 395 elapsedGreaterThan time.Duration 396 overflowErr string 397 }{ 398 { 399 description: "SubmitRequest", 400 messageToSend: wrapSubmitReq(testReq), 401 streamUnblocks: true, 402 elapsedGreaterThan: time.Second / 2, 403 }, 404 { 405 description: "ConsensusRequest", 406 messageToSend: testConsensusReq, 407 overflowErr: "send queue overflown", 408 }, 409 } { 410 t.Run(testCase.description, func(t *testing.T) { 411 node1 := newTestNode(t) 412 node2 := newTestNode(t) 413 414 node1.c.SendBufferSize = 1 415 node2.c.SendBufferSize = 1 416 417 defer node1.stop() 418 defer node2.stop() 419 420 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 421 node1.c.Configure(testChannel, config) 422 node2.c.Configure(testChannel, config) 423 424 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 425 assert.NoError(t, err) 426 427 client := &mocks.ClusterClient{} 428 fakeStream := &mocks.StepClient{} 429 430 // Replace real client with a mock client 431 rm.Client = client 432 rm.ProbeConn = func(_ *grpc.ClientConn) error { 433 return nil 434 } 435 // Configure client to return the mock stream 436 fakeStream.On("Context", mock.Anything).Return(context.Background()) 437 client.On("Step", mock.Anything).Return(fakeStream, nil).Once() 438 439 unBlock := make(chan struct{}) 440 var sendInvoked sync.WaitGroup 441 sendInvoked.Add(1) 442 var once sync.Once 443 fakeStream.On("Send", mock.Anything).Run(func(_ mock.Arguments) { 444 once.Do(sendInvoked.Done) 445 <-unBlock 446 }).Return(errors.New("oops")) 447 448 stream, err := rm.NewStream(time.Hour) 449 assert.NoError(t, err) 450 451 // The first send doesn't block, even though the Send operation blocks. 452 err = stream.Send(testCase.messageToSend) 453 assert.NoError(t, err) 454 455 // The second once doesn't either. 456 // After this point, we have 1 goroutine which is blocked on Send(), 457 // and one message in the buffer. 458 sendInvoked.Wait() 459 err = stream.Send(testCase.messageToSend) 460 assert.NoError(t, err) 461 462 // The third blocks, so we need to unblock it ourselves 463 // in order for it to go through, unless the operation 464 // is non blocking. 465 go func() { 466 time.Sleep(time.Second) 467 if testCase.streamUnblocks { 468 close(unBlock) 469 } 470 }() 471 472 t1 := time.Now() 473 err = stream.Send(testCase.messageToSend) 474 // The third send always overflows or blocks. 475 // If we expect to receive an overflow error - assert it. 476 if testCase.overflowErr != "" { 477 assert.EqualError(t, err, testCase.overflowErr) 478 } 479 elapsed := time.Since(t1) 480 t.Log("Elapsed time:", elapsed) 481 assert.True(t, elapsed > testCase.elapsedGreaterThan) 482 483 if !testCase.streamUnblocks { 484 close(unBlock) 485 } 486 }) 487 } 488 } 489 490 func TestBasic(t *testing.T) { 491 t.Parallel() 492 // Scenario: Basic test that spawns 2 nodes and sends each other 493 // messages that are expected to be echoed back 494 495 node1 := newTestNode(t) 496 node2 := newTestNode(t) 497 498 defer node1.stop() 499 defer node2.stop() 500 501 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 502 node1.c.Configure(testChannel, config) 503 node2.c.Configure(testChannel, config) 504 505 assertBiDiCommunication(t, node1, node2, testReq) 506 } 507 508 func TestUnavailableHosts(t *testing.T) { 509 t.Parallel() 510 // Scenario: A node is configured to connect 511 // to a host that is down 512 node1 := newTestNode(t) 513 514 clientConfig := node1.dialer.Config 515 // The below timeout makes sure that connection establishment is done 516 // asynchronously. Had it been synchronous, the Remote() call would be 517 // blocked for an hour. 518 clientConfig.Timeout = time.Hour 519 defer node1.stop() 520 521 node2 := newTestNode(t) 522 node2.stop() 523 524 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 525 remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 526 assert.NoError(t, err) 527 assert.NotNil(t, remote) 528 529 _, err = remote.NewStream(time.Millisecond * 100) 530 assert.Contains(t, err.Error(), "connection") 531 } 532 533 func TestStreamAbort(t *testing.T) { 534 t.Parallel() 535 536 // Scenarios: node 1 is connected to node 2 in 2 channels, 537 // and the consumer of the communication calls receive. 538 // The two sub-scenarios happen: 539 // 1) The server certificate of node 2 changes in the first channel 540 // 2) Node 2 is evicted from the membership of the first channel 541 // In both of the scenarios, the Recv() call should be aborted 542 543 node2 := newTestNode(t) 544 defer node2.stop() 545 546 invalidNodeInfo := cluster.RemoteNode{ 547 ID: node2.nodeInfo.ID, 548 ServerTLSCert: []byte{1, 2, 3}, 549 ClientTLSCert: []byte{1, 2, 3}, 550 } 551 552 for _, tst := range []struct { 553 testName string 554 membership []cluster.RemoteNode 555 expectedError string 556 }{ 557 { 558 testName: "Evicted from membership", 559 membership: nil, 560 expectedError: "rpc error: code = Canceled desc = context canceled", 561 }, 562 { 563 testName: "Changed TLS certificate", 564 membership: []cluster.RemoteNode{invalidNodeInfo}, 565 expectedError: "rpc error: code = Canceled desc = context canceled", 566 }, 567 } { 568 t.Run(tst.testName, func(t *testing.T) { 569 testStreamAbort(t, node2, tst.membership, tst.expectedError) 570 }) 571 } 572 node2.handler.AssertNumberOfCalls(t, "OnSubmit", 2) 573 } 574 575 func testStreamAbort(t *testing.T, node2 *clusterNode, newMembership []cluster.RemoteNode, expectedError string) { 576 node1 := newTestNode(t) 577 defer node1.stop() 578 579 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 580 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 581 node1.c.Configure(testChannel2, []cluster.RemoteNode{node2.nodeInfo}) 582 node2.c.Configure(testChannel2, []cluster.RemoteNode{node1.nodeInfo}) 583 584 var streamCreated sync.WaitGroup 585 streamCreated.Add(1) 586 587 stopChan := make(chan struct{}) 588 589 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Once().Run(func(_ mock.Arguments) { 590 // Notify the stream was created 591 streamCreated.Done() 592 // Wait for the test to finish 593 <-stopChan 594 }).Return(nil).Once() 595 596 rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 597 assert.NoError(t, err) 598 599 go func() { 600 stream := assertEventualEstablishStream(t, rm1) 601 // Signal the reconfiguration 602 err = stream.Send(wrapSubmitReq(testReq)) 603 assert.NoError(t, err) 604 _, err := stream.Recv() 605 assert.Contains(t, err.Error(), expectedError) 606 close(stopChan) 607 }() 608 609 go func() { 610 // Wait for the stream reference to be obtained 611 streamCreated.Wait() 612 // Reconfigure the channel membership 613 node1.c.Configure(testChannel, newMembership) 614 }() 615 616 <-stopChan 617 } 618 619 func TestDoubleReconfigure(t *testing.T) { 620 t.Parallel() 621 // Scenario: Basic test that spawns 2 nodes 622 // and configures node 1 twice, and checks that 623 // the remote stub for node 1 wasn't re-created in the second 624 // configuration since it already existed 625 626 node1 := newTestNode(t) 627 node2 := newTestNode(t) 628 629 defer node1.stop() 630 defer node2.stop() 631 632 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 633 rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 634 assert.NoError(t, err) 635 636 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 637 rm2, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 638 assert.NoError(t, err) 639 // Ensure the references are equal 640 assert.True(t, rm1 == rm2) 641 } 642 643 func TestInvalidChannel(t *testing.T) { 644 t.Parallel() 645 // Scenario: node 1 it ordered to send a message on a channel 646 // that doesn't exist, and also receives a message, but 647 // the channel cannot be extracted from the message. 648 649 t.Run("channel doesn't exist", func(t *testing.T) { 650 t.Parallel() 651 node1 := newTestNode(t) 652 defer node1.stop() 653 654 _, err := node1.c.Remote(testChannel, 0) 655 assert.EqualError(t, err, "channel test doesn't exist") 656 }) 657 658 t.Run("channel cannot be extracted", func(t *testing.T) { 659 t.Parallel() 660 node1 := newTestNode(t) 661 defer node1.stop() 662 663 node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 664 gt := gomega.NewGomegaWithT(t) 665 gt.Eventually(func() (bool, error) { 666 _, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 667 return true, err 668 }, time.Minute).Should(gomega.BeTrue()) 669 670 stub, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 671 assert.NoError(t, err) 672 673 stream := assertEventualEstablishStream(t, stub) 674 675 // An empty SubmitRequest has an empty channel which is invalid 676 err = stream.Send(wrapSubmitReq(&orderer.SubmitRequest{})) 677 assert.NoError(t, err) 678 679 _, err = stream.Recv() 680 assert.EqualError(t, err, "rpc error: code = Unknown desc = badly formatted message, cannot extract channel") 681 682 // Test directly without going through the gRPC stream 683 err = node1.c.DispatchSubmit(context.Background(), &orderer.SubmitRequest{}) 684 assert.EqualError(t, err, "badly formatted message, cannot extract channel") 685 }) 686 } 687 688 func TestAbortRPC(t *testing.T) { 689 t.Parallel() 690 // Scenarios: 691 // (I) The node calls an RPC, and calls Abort() on the remote context 692 // in parallel. The RPC should return even though the server-side call hasn't finished. 693 // (II) The node calls an RPC, but the server-side processing takes too long, 694 // and the RPC invocation returns prematurely. 695 696 testCases := []struct { 697 name string 698 abortFunc func(*cluster.RemoteContext) 699 rpcTimeout time.Duration 700 expectedErr string 701 }{ 702 { 703 name: "Abort() called", 704 expectedErr: "rpc error: code = Canceled desc = context canceled", 705 rpcTimeout: time.Hour, 706 abortFunc: func(rc *cluster.RemoteContext) { 707 rc.Abort() 708 }, 709 }, 710 { 711 name: "RPC timeout", 712 expectedErr: "rpc timeout expired", 713 rpcTimeout: time.Second, 714 abortFunc: func(*cluster.RemoteContext) {}, 715 }, 716 } 717 718 for _, testCase := range testCases { 719 testCase := testCase 720 t.Run(testCase.name, func(t *testing.T) { 721 testAbort(t, testCase.abortFunc, testCase.rpcTimeout, testCase.expectedErr) 722 }) 723 } 724 } 725 726 func testAbort(t *testing.T, abortFunc func(*cluster.RemoteContext), rpcTimeout time.Duration, expectedErr string) { 727 node1 := newTestNode(t) 728 defer node1.stop() 729 730 node2 := newTestNode(t) 731 defer node2.stop() 732 733 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 734 node1.c.Configure(testChannel, config) 735 node2.c.Configure(testChannel, config) 736 var onStepCalled sync.WaitGroup 737 onStepCalled.Add(1) 738 739 // stuckCall ensures the OnStep() call is stuck throughout this test 740 var stuckCall sync.WaitGroup 741 stuckCall.Add(1) 742 // At the end of the test, release the server-side resources 743 defer stuckCall.Done() 744 745 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(_ mock.Arguments) { 746 onStepCalled.Done() 747 stuckCall.Wait() 748 }).Once() 749 750 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 751 assert.NoError(t, err) 752 753 go func() { 754 onStepCalled.Wait() 755 abortFunc(rm) 756 }() 757 758 var stream *cluster.Stream 759 gt := gomega.NewGomegaWithT(t) 760 gt.Eventually(func() error { 761 stream, err = rm.NewStream(rpcTimeout) 762 return err 763 }, time.Second*10, time.Millisecond*10).Should(gomega.Succeed()) 764 765 stream.Send(wrapSubmitReq(testSubReq)) 766 _, err = stream.Recv() 767 768 assert.EqualError(t, err, expectedErr) 769 770 node2.handler.AssertNumberOfCalls(t, "OnSubmit", 1) 771 } 772 773 func TestNoTLSCertificate(t *testing.T) { 774 t.Parallel() 775 // Scenario: The node is sent a message by another node that doesn't 776 // connect with mutual TLS, thus doesn't provide a TLS certificate 777 node1 := newTestNode(t) 778 defer node1.stop() 779 780 node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 781 782 clientConfig := comm_utils.ClientConfig{ 783 AsyncConnect: true, 784 Timeout: time.Millisecond * 100, 785 SecOpts: comm_utils.SecureOptions{ 786 ServerRootCAs: [][]byte{ca.CertBytes()}, 787 UseTLS: true, 788 }, 789 } 790 cl, err := comm_utils.NewGRPCClient(clientConfig) 791 assert.NoError(t, err) 792 793 var conn *grpc.ClientConn 794 gt := gomega.NewGomegaWithT(t) 795 gt.Eventually(func() (bool, error) { 796 conn, err = cl.NewConnection(node1.srv.Address()) 797 return true, err 798 }, time.Minute).Should(gomega.BeTrue()) 799 800 echoClient := orderer.NewClusterClient(conn) 801 stream, err := echoClient.Step(context.Background()) 802 assert.NoError(t, err) 803 804 err = stream.Send(wrapSubmitReq(testSubReq)) 805 assert.NoError(t, err) 806 _, err = stream.Recv() 807 assert.EqualError(t, err, "rpc error: code = Unknown desc = no TLS certificate sent") 808 } 809 810 func TestReconnect(t *testing.T) { 811 t.Parallel() 812 // Scenario: node 1 and node 2 are connected, 813 // and node 2 is taken offline. 814 // Node 1 tries to send a message to node 2 but fails, 815 // and afterwards node 2 is brought back, after which 816 // node 1 sends more messages, and it should succeed 817 // sending a message to node 2 eventually. 818 819 node1 := newTestNode(t) 820 defer node1.stop() 821 conf := node1.dialer.Config 822 conf.Timeout = time.Hour 823 824 node2 := newTestNode(t) 825 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil) 826 defer node2.stop() 827 828 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 829 node1.c.Configure(testChannel, config) 830 node2.c.Configure(testChannel, config) 831 832 // Make node 2 be offline by shutting down its gRPC service 833 node2.srv.Stop() 834 // Obtain the stub for node 2. 835 // Should succeed, because the connection was created at time of configuration 836 stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 837 assert.NoError(t, err) 838 839 // Try to obtain a stream. Should not Succeed. 840 gt := gomega.NewGomegaWithT(t) 841 gt.Eventually(func() error { 842 _, err = stub.NewStream(time.Hour) 843 return err 844 }).Should(gomega.Not(gomega.Succeed())) 845 846 // Wait for the port to be released 847 for { 848 lsnr, err := net.Listen("tcp", node2.nodeInfo.Endpoint) 849 if err == nil { 850 lsnr.Close() 851 break 852 } 853 } 854 855 // Resurrect node 2 856 node2.resurrect() 857 // Send a message from node 1 to node 2. 858 // Should succeed eventually 859 assertEventualSendMessage(t, stub, testReq) 860 } 861 862 func TestRenewCertificates(t *testing.T) { 863 t.Parallel() 864 // Scenario: node 1 and node 2 are connected, 865 // and the certificates are renewed for both nodes 866 // at the same time. 867 // They are expected to connect to one another 868 // after the reconfiguration. 869 870 node1 := newTestNode(t) 871 defer node1.stop() 872 873 node2 := newTestNode(t) 874 defer node2.stop() 875 876 node1.handler.On("OnStep", testChannel, node2.nodeInfo.ID, mock.Anything).Return(testRes, nil) 877 node2.handler.On("OnStep", testChannel, node1.nodeInfo.ID, mock.Anything).Return(testRes, nil) 878 879 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 880 node1.c.Configure(testChannel, config) 881 node2.c.Configure(testChannel, config) 882 883 assertBiDiCommunication(t, node1, node2, testReq) 884 885 // Now, renew certificates both both nodes 886 node1.renewCertificates() 887 node2.renewCertificates() 888 889 // Reconfigure them 890 config = []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 891 node1.c.Configure(testChannel, config) 892 node2.c.Configure(testChannel, config) 893 894 // W.L.O.G, try to send a message from node1 to node2 895 // It should fail, because node2's server certificate has now changed, 896 // so it closed the connection to the remote node 897 info2 := node2.nodeInfo 898 remote, err := node1.c.Remote(testChannel, info2.ID) 899 assert.NoError(t, err) 900 assert.NotNil(t, remote) 901 902 gt := gomega.NewGomegaWithT(t) 903 gt.Eventually(func() string { 904 _, err = remote.NewStream(time.Hour) 905 return err.Error() 906 }, timeout).Should(gomega.ContainSubstring(info2.Endpoint)) 907 908 // Restart the gRPC service on both nodes, to load the new TLS certificates 909 node1.srv.Stop() 910 node1.resurrect() 911 node2.srv.Stop() 912 node2.resurrect() 913 914 // Finally, check that the nodes can communicate once again 915 assertBiDiCommunication(t, node1, node2, testReq) 916 } 917 918 func TestMembershipReconfiguration(t *testing.T) { 919 t.Parallel() 920 // Scenario: node 1 and node 2 are started up 921 // and node 2 is configured to know about node 1, 922 // without node1 knowing about node 2. 923 // The communication between them should only work 924 // after node 1 is configured to know about node 2. 925 926 node1 := newTestNode(t) 927 defer node1.stop() 928 929 node2 := newTestNode(t) 930 defer node2.stop() 931 932 node1.c.Configure(testChannel, []cluster.RemoteNode{}) 933 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 934 935 // Node 1 can't connect to node 2 because it doesn't know its TLS certificate yet 936 _, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 937 assert.EqualError(t, err, fmt.Sprintf("node %d doesn't exist in channel test's membership", node2.nodeInfo.ID)) 938 // Node 2 can connect to node 1, but it can't send it messages because node 1 doesn't know node 2 yet. 939 940 gt := gomega.NewGomegaWithT(t) 941 gt.Eventually(func() (bool, error) { 942 _, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 943 return true, err 944 }, time.Minute).Should(gomega.BeTrue()) 945 946 stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 947 948 stream := assertEventualEstablishStream(t, stub) 949 err = stream.Send(wrapSubmitReq(testSubReq)) 950 assert.NoError(t, err) 951 952 _, err = stream.Recv() 953 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 954 955 // Next, configure node 1 to know about node 2 956 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 957 958 // Check that the communication works correctly between both nodes 959 assertBiDiCommunication(t, node1, node2, testReq) 960 assertBiDiCommunication(t, node2, node1, testReq) 961 962 // Reconfigure node 2 to forget about node 1 963 node2.c.Configure(testChannel, []cluster.RemoteNode{}) 964 // Node 1 can still connect to node 2 965 stub, err = node1.c.Remote(testChannel, node2.nodeInfo.ID) 966 assert.NoError(t, err) 967 // But can't send a message because node 2 now doesn't authorized node 1 968 stream = assertEventualEstablishStream(t, stub) 969 stream.Send(wrapSubmitReq(testSubReq)) 970 _, err = stream.Recv() 971 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 972 } 973 974 func TestShutdown(t *testing.T) { 975 t.Parallel() 976 // Scenario: node 1 is shut down and as a result, can't 977 // send messages to anyone, nor can it be reconfigured 978 979 node1 := newTestNode(t) 980 defer node1.stop() 981 982 node1.c.Shutdown() 983 984 // Obtaining a RemoteContext cannot succeed because shutdown was called before 985 _, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 986 assert.EqualError(t, err, "communication has been shut down") 987 988 node2 := newTestNode(t) 989 defer node2.stop() 990 991 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 992 // Configuration of node doesn't take place 993 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 994 995 gt := gomega.NewGomegaWithT(t) 996 gt.Eventually(func() error { 997 _, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 998 return err 999 }, time.Minute).Should(gomega.Succeed()) 1000 1001 stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 1002 1003 // Therefore, sending a message doesn't succeed because node 1 rejected the configuration change 1004 gt.Eventually(func() string { 1005 stream, err := stub.NewStream(time.Hour) 1006 if err != nil { 1007 return err.Error() 1008 } 1009 err = stream.Send(wrapSubmitReq(testSubReq)) 1010 assert.NoError(t, err) 1011 1012 _, err = stream.Recv() 1013 return err.Error() 1014 }, timeout).Should(gomega.ContainSubstring("channel test doesn't exist")) 1015 } 1016 1017 func TestMultiChannelConfig(t *testing.T) { 1018 t.Parallel() 1019 // Scenario: node 1 is knows node 2 only in channel "foo" 1020 // and knows node 3 only in channel "bar". 1021 // Messages that are received, are routed according to their corresponding channels 1022 // and when node 2 sends a message for channel "bar" to node 1, it is rejected. 1023 // Same thing applies for node 3 that sends a message to node 1 in channel "foo". 1024 1025 node1 := newTestNode(t) 1026 defer node1.stop() 1027 1028 node2 := newTestNode(t) 1029 defer node2.stop() 1030 1031 node3 := newTestNode(t) 1032 defer node3.stop() 1033 1034 node1.c.Configure("foo", []cluster.RemoteNode{node2.nodeInfo}) 1035 node1.c.Configure("bar", []cluster.RemoteNode{node3.nodeInfo}) 1036 node2.c.Configure("foo", []cluster.RemoteNode{node1.nodeInfo}) 1037 node3.c.Configure("bar", []cluster.RemoteNode{node1.nodeInfo}) 1038 1039 t.Run("Correct channel", func(t *testing.T) { 1040 var fromNode2 sync.WaitGroup 1041 fromNode2.Add(1) 1042 node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) { 1043 fromNode2.Done() 1044 }).Once() 1045 1046 var fromNode3 sync.WaitGroup 1047 fromNode3.Add(1) 1048 node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) { 1049 fromNode3.Done() 1050 }).Once() 1051 1052 node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID) 1053 assert.NoError(t, err) 1054 node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID) 1055 assert.NoError(t, err) 1056 1057 stream := assertEventualEstablishStream(t, node2toNode1) 1058 stream.Send(fooReq) 1059 1060 fromNode2.Wait() 1061 node1.handler.AssertNumberOfCalls(t, "OnSubmit", 1) 1062 1063 stream = assertEventualEstablishStream(t, node3toNode1) 1064 stream.Send(barReq) 1065 1066 fromNode3.Wait() 1067 node1.handler.AssertNumberOfCalls(t, "OnSubmit", 2) 1068 }) 1069 1070 t.Run("Incorrect channel", func(t *testing.T) { 1071 node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil) 1072 node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil) 1073 1074 node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID) 1075 assert.NoError(t, err) 1076 node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID) 1077 assert.NoError(t, err) 1078 1079 assertEventualSendMessage(t, node2toNode1, &orderer.SubmitRequest{Channel: "foo"}) 1080 stream, err := node2toNode1.NewStream(time.Hour) 1081 err = stream.Send(barReq) 1082 assert.NoError(t, err) 1083 _, err = stream.Recv() 1084 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 1085 1086 assertEventualSendMessage(t, node3toNode1, &orderer.SubmitRequest{Channel: "bar"}) 1087 stream, err = node3toNode1.NewStream(time.Hour) 1088 err = stream.Send(fooReq) 1089 assert.NoError(t, err) 1090 _, err = stream.Recv() 1091 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 1092 }) 1093 } 1094 1095 func TestConnectionFailure(t *testing.T) { 1096 t.Parallel() 1097 // Scenario: node 1 fails to connect to node 2. 1098 1099 node1 := newTestNode(t) 1100 defer node1.stop() 1101 1102 node2 := newTestNode(t) 1103 defer node2.stop() 1104 1105 dialer := &mocks.SecureDialer{} 1106 dialer.On("Dial", mock.Anything, mock.Anything).Return(nil, errors.New("oops")) 1107 node1.c.Connections = cluster.NewConnectionStore(dialer, &disabled.Gauge{}) 1108 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 1109 1110 _, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1111 assert.EqualError(t, err, "oops") 1112 } 1113 1114 type testMetrics struct { 1115 fakeProvider *mocks.MetricsProvider 1116 egressQueueLength metricsfakes.Gauge 1117 egressQueueCapacity metricsfakes.Gauge 1118 egressStreamCount metricsfakes.Gauge 1119 egressTLSConnCount metricsfakes.Gauge 1120 egressWorkerSize metricsfakes.Gauge 1121 ingressStreamsCount metricsfakes.Gauge 1122 msgSendTime metricsfakes.Histogram 1123 msgDropCount metricsfakes.Counter 1124 } 1125 1126 func (tm *testMetrics) initialize() { 1127 tm.egressQueueLength.WithReturns(&tm.egressQueueLength) 1128 tm.egressQueueCapacity.WithReturns(&tm.egressQueueCapacity) 1129 tm.egressStreamCount.WithReturns(&tm.egressStreamCount) 1130 tm.egressTLSConnCount.WithReturns(&tm.egressTLSConnCount) 1131 tm.egressWorkerSize.WithReturns(&tm.egressWorkerSize) 1132 tm.ingressStreamsCount.WithReturns(&tm.ingressStreamsCount) 1133 tm.msgSendTime.WithReturns(&tm.msgSendTime) 1134 tm.msgDropCount.WithReturns(&tm.msgDropCount) 1135 1136 fakeProvider := tm.fakeProvider 1137 fakeProvider.On("NewGauge", cluster.IngressStreamsCountOpts).Return(&tm.ingressStreamsCount) 1138 fakeProvider.On("NewGauge", cluster.EgressQueueLengthOpts).Return(&tm.egressQueueLength) 1139 fakeProvider.On("NewGauge", cluster.EgressQueueCapacityOpts).Return(&tm.egressQueueCapacity) 1140 fakeProvider.On("NewGauge", cluster.EgressStreamsCountOpts).Return(&tm.egressStreamCount) 1141 fakeProvider.On("NewGauge", cluster.EgressTLSConnectionCountOpts).Return(&tm.egressTLSConnCount) 1142 fakeProvider.On("NewGauge", cluster.EgressWorkersOpts).Return(&tm.egressWorkerSize) 1143 fakeProvider.On("NewCounter", cluster.MessagesDroppedCountOpts).Return(&tm.msgDropCount) 1144 fakeProvider.On("NewHistogram", cluster.MessageSendTimeOpts).Return(&tm.msgSendTime) 1145 } 1146 1147 func TestMetrics(t *testing.T) { 1148 t.Parallel() 1149 1150 for _, testCase := range []struct { 1151 name string 1152 runTest func(node1, node2 *clusterNode, testMetrics *testMetrics) 1153 testMetrics *testMetrics 1154 }{ 1155 { 1156 name: "EgressQueueOccupancy", 1157 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1158 assertBiDiCommunication(t, node1, node2, testReq) 1159 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "transaction", "channel", testChannel}, 1160 testMetrics.egressQueueLength.WithArgsForCall(0)) 1161 assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(0)) 1162 assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(0)) 1163 1164 var messageReceived sync.WaitGroup 1165 messageReceived.Add(1) 1166 node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 1167 messageReceived.Done() 1168 }).Return(nil) 1169 1170 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1171 assert.NoError(t, err) 1172 1173 stream := assertEventualEstablishStream(t, rm) 1174 stream.Send(testConsensusReq) 1175 messageReceived.Wait() 1176 1177 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "consensus", "channel", testChannel}, 1178 testMetrics.egressQueueLength.WithArgsForCall(1)) 1179 assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(1)) 1180 assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(1)) 1181 }, 1182 }, 1183 { 1184 name: "EgressStreamsCount", 1185 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1186 assertBiDiCommunication(t, node1, node2, testReq) 1187 assert.Equal(t, 1, testMetrics.egressStreamCount.SetCallCount()) 1188 assert.Equal(t, 1, testMetrics.egressStreamCount.WithCallCount()) 1189 assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1190 1191 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1192 assert.Equal(t, 2, testMetrics.egressStreamCount.SetCallCount()) 1193 assert.Equal(t, 2, testMetrics.egressStreamCount.WithCallCount()) 1194 assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1195 }, 1196 }, 1197 { 1198 name: "EgressTLSConnCount", 1199 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1200 assertBiDiCommunication(t, node1, node2, testReq) 1201 assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1202 1203 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1204 assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1205 1206 // A single TLS connection despite 2 streams 1207 assert.Equal(t, float64(1), testMetrics.egressTLSConnCount.SetArgsForCall(0)) 1208 assert.Equal(t, 1, testMetrics.egressTLSConnCount.SetCallCount()) 1209 }, 1210 }, 1211 { 1212 name: "EgressWorkerSize", 1213 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1214 assertBiDiCommunication(t, node1, node2, testReq) 1215 assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1216 1217 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1218 assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1219 1220 assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(0)) 1221 assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(1)) 1222 }, 1223 }, 1224 { 1225 name: "MgSendTime", 1226 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1227 assertBiDiCommunication(t, node1, node2, testReq) 1228 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel}, 1229 testMetrics.msgSendTime.WithArgsForCall(0)) 1230 1231 assert.Equal(t, 1, testMetrics.msgSendTime.ObserveCallCount()) 1232 }, 1233 }, 1234 { 1235 name: "MsgDropCount", 1236 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1237 blockRecv := make(chan struct{}) 1238 wasReported := func() bool { 1239 select { 1240 case <-blockRecv: 1241 return true 1242 default: 1243 return false 1244 } 1245 } 1246 // When the drop count is reported, release the lock on the server side receive operation. 1247 testMetrics.msgDropCount.AddStub = func(float642 float64) { 1248 if !wasReported() { 1249 close(blockRecv) 1250 } 1251 } 1252 1253 node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 1254 // Block until the message drop is reported 1255 <-blockRecv 1256 }).Return(nil) 1257 1258 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1259 assert.NoError(t, err) 1260 1261 stream := assertEventualEstablishStream(t, rm) 1262 // Send too many messages while the server side is not reading from the stream 1263 for { 1264 stream.Send(testConsensusReq) 1265 if wasReported() { 1266 break 1267 } 1268 } 1269 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel}, 1270 testMetrics.msgDropCount.WithArgsForCall(0)) 1271 assert.Equal(t, 1, testMetrics.msgDropCount.AddCallCount()) 1272 }, 1273 }, 1274 } { 1275 testCase := testCase 1276 t.Run(testCase.name, func(t *testing.T) { 1277 fakeProvider := &mocks.MetricsProvider{} 1278 testCase.testMetrics = &testMetrics{ 1279 fakeProvider: fakeProvider, 1280 } 1281 1282 testCase.testMetrics.initialize() 1283 1284 node1 := newTestNodeWithMetrics(t, fakeProvider, &testCase.testMetrics.egressTLSConnCount) 1285 defer node1.stop() 1286 1287 node2 := newTestNode(t) 1288 defer node2.stop() 1289 1290 configForNode1 := []cluster.RemoteNode{node2.nodeInfo} 1291 configForNode2 := []cluster.RemoteNode{node1.nodeInfo} 1292 node1.c.Configure(testChannel, configForNode1) 1293 node2.c.Configure(testChannel, configForNode2) 1294 node1.c.Configure(testChannel2, configForNode1) 1295 node2.c.Configure(testChannel2, configForNode2) 1296 1297 testCase.runTest(node1, node2, testCase.testMetrics) 1298 }) 1299 } 1300 } 1301 1302 func TestCertExpirationWarningEgress(t *testing.T) { 1303 t.Parallel() 1304 // Scenario: Ensures that when certificates are due to expire, 1305 // a warning is logged to the log. 1306 1307 node1 := newTestNode(t) 1308 node2 := newTestNode(t) 1309 1310 cert, err := x509.ParseCertificate(node2.nodeInfo.ServerTLSCert) 1311 assert.NoError(t, err) 1312 assert.NotNil(t, cert) 1313 1314 // Let the NotAfter time of the certificate be T1, the current time be T0. 1315 // So time.Until is (T1 - T0), which means we have (T1 - T0) time left. 1316 // We want to trigger a warning, so we set the warning threshold to be 20 seconds above 1317 // the time left, so the time left would be smaller than the threshold. 1318 node1.c.CertExpWarningThreshold = time.Until(cert.NotAfter) + time.Second*20 1319 // We only alert once in 3 seconds 1320 node1.c.MinimumExpirationWarningInterval = time.Second * 3 1321 1322 defer node1.stop() 1323 defer node2.stop() 1324 1325 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 1326 node1.c.Configure(testChannel, config) 1327 node2.c.Configure(testChannel, config) 1328 1329 stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1330 assert.NoError(t, err) 1331 1332 mockgRPC := &mocks.StepClient{} 1333 mockgRPC.On("Send", mock.Anything).Return(nil) 1334 mockgRPC.On("Context").Return(context.Background()) 1335 mockClient := &mocks.ClusterClient{} 1336 mockClient.On("Step", mock.Anything).Return(mockgRPC, nil) 1337 1338 stub.Client = mockClient 1339 1340 stream := assertEventualEstablishStream(t, stub) 1341 1342 alerts := make(chan struct{}, 100) 1343 1344 stream.Logger = stream.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1345 if strings.Contains(entry.Message, "expires in less than") { 1346 alerts <- struct{}{} 1347 } 1348 return nil 1349 })) 1350 1351 // Send a message to the node and expert an alert to be logged. 1352 stream.Send(wrapSubmitReq(testReq)) 1353 select { 1354 case <-alerts: 1355 case <-time.After(time.Second * 5): 1356 t.Fatal("Should have logged an alert") 1357 } 1358 // Send another message, and ensure we don't log anything to the log, because the 1359 // alerts should be suppressed before the minimum interval timeout expires. 1360 stream.Send(wrapSubmitReq(testReq)) 1361 select { 1362 case <-alerts: 1363 t.Fatal("Should not have logged an alert") 1364 case <-time.After(time.Millisecond * 500): 1365 } 1366 // Wait enough time for the alert interval to clear. 1367 time.Sleep(node1.c.MinimumExpirationWarningInterval + time.Second) 1368 // Send again a message, and this time it should be logged again. 1369 stream.Send(wrapSubmitReq(testReq)) 1370 select { 1371 case <-alerts: 1372 case <-time.After(time.Second * 5): 1373 t.Fatal("Should have logged an alert") 1374 } 1375 } 1376 1377 func assertBiDiCommunicationForChannel(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest, channel string) { 1378 for _, tst := range []struct { 1379 label string 1380 sender *clusterNode 1381 receiver *clusterNode 1382 target uint64 1383 }{ 1384 {label: "1->2", sender: node1, target: node2.nodeInfo.ID, receiver: node2}, 1385 {label: "2->1", sender: node2, target: node1.nodeInfo.ID, receiver: node1}, 1386 } { 1387 t.Run(tst.label, func(t *testing.T) { 1388 stub, err := tst.sender.c.Remote(channel, tst.target) 1389 assert.NoError(t, err) 1390 1391 stream := assertEventualEstablishStream(t, stub) 1392 1393 var wg sync.WaitGroup 1394 wg.Add(1) 1395 tst.receiver.handler.On("OnSubmit", channel, tst.sender.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(args mock.Arguments) { 1396 req := args.Get(2).(*orderer.SubmitRequest) 1397 assert.True(t, proto.Equal(req, msgToSend)) 1398 wg.Done() 1399 }) 1400 1401 err = stream.Send(wrapSubmitReq(msgToSend)) 1402 assert.NoError(t, err) 1403 1404 wg.Wait() 1405 }) 1406 } 1407 } 1408 1409 func assertBiDiCommunication(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest) { 1410 assertBiDiCommunicationForChannel(t, node1, node2, msgToSend, testChannel) 1411 } 1412 1413 func assertEventualEstablishStream(t *testing.T, rpc *cluster.RemoteContext) *cluster.Stream { 1414 var res *cluster.Stream 1415 gt := gomega.NewGomegaWithT(t) 1416 gt.Eventually(func() error { 1417 stream, err := rpc.NewStream(time.Hour) 1418 res = stream 1419 return err 1420 }, timeout).Should(gomega.Succeed()) 1421 return res 1422 } 1423 1424 func assertEventualSendMessage(t *testing.T, rpc *cluster.RemoteContext, req *orderer.SubmitRequest) orderer.Cluster_StepClient { 1425 var res orderer.Cluster_StepClient 1426 gt := gomega.NewGomegaWithT(t) 1427 gt.Eventually(func() error { 1428 stream, err := rpc.NewStream(time.Hour) 1429 if err != nil { 1430 return err 1431 } 1432 res = stream 1433 return stream.Send(wrapSubmitReq(req)) 1434 }, timeout).Should(gomega.Succeed()) 1435 return res 1436 } 1437 1438 func wrapSubmitReq(req *orderer.SubmitRequest) *orderer.StepRequest { 1439 return &orderer.StepRequest{ 1440 Payload: &orderer.StepRequest_SubmitRequest{ 1441 SubmitRequest: req, 1442 }, 1443 } 1444 }