github.com/osdi23p228/fabric@v0.0.0-20221218062954-77808885f5db/orderer/common/cluster/comm_test.go (about) 1 /* 2 Copyright IBM Corp. 2017 All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package cluster_test 8 9 import ( 10 "context" 11 "crypto/rand" 12 "crypto/x509" 13 "fmt" 14 "net" 15 "strings" 16 "sync" 17 "sync/atomic" 18 "testing" 19 "time" 20 21 "github.com/golang/protobuf/proto" 22 "github.com/hyperledger/fabric-protos-go/common" 23 "github.com/hyperledger/fabric-protos-go/orderer" 24 "github.com/osdi23p228/fabric/common/crypto" 25 "github.com/osdi23p228/fabric/common/crypto/tlsgen" 26 "github.com/osdi23p228/fabric/common/flogging" 27 "github.com/osdi23p228/fabric/common/metrics" 28 "github.com/osdi23p228/fabric/common/metrics/disabled" 29 "github.com/osdi23p228/fabric/common/metrics/metricsfakes" 30 comm_utils "github.com/osdi23p228/fabric/internal/pkg/comm" 31 "github.com/osdi23p228/fabric/orderer/common/cluster" 32 "github.com/osdi23p228/fabric/orderer/common/cluster/mocks" 33 "github.com/onsi/gomega" 34 "github.com/pkg/errors" 35 "github.com/stretchr/testify/assert" 36 "github.com/stretchr/testify/mock" 37 "go.uber.org/zap" 38 "go.uber.org/zap/zapcore" 39 "google.golang.org/grpc" 40 ) 41 42 const ( 43 testChannel = "test" 44 testChannel2 = "test2" 45 timeout = time.Second * 10 46 ) 47 48 var ( 49 // CA that generates TLS key-pairs. 50 // We use only one CA because the authentication 51 // is based on TLS pinning 52 ca = createCAOrPanic() 53 54 lastNodeID uint64 55 56 testSubReq = &orderer.SubmitRequest{ 57 Channel: "test", 58 } 59 60 testReq = &orderer.SubmitRequest{ 61 Channel: "test", 62 Payload: &common.Envelope{ 63 Payload: []byte("test"), 64 }, 65 } 66 67 testReq2 = &orderer.SubmitRequest{ 68 Channel: testChannel2, 69 Payload: &common.Envelope{ 70 Payload: []byte(testChannel2), 71 }, 72 } 73 74 testRes = &orderer.SubmitResponse{ 75 Info: "test", 76 } 77 78 fooReq = wrapSubmitReq(&orderer.SubmitRequest{ 79 Channel: "foo", 80 }) 81 82 fooRes = &orderer.SubmitResponse{ 83 Info: "foo", 84 } 85 86 barReq = wrapSubmitReq(&orderer.SubmitRequest{ 87 Channel: "bar", 88 }) 89 90 barRes = &orderer.SubmitResponse{ 91 Info: "bar", 92 } 93 94 testConsensusReq = &orderer.StepRequest{ 95 Payload: &orderer.StepRequest_ConsensusRequest{ 96 ConsensusRequest: &orderer.ConsensusRequest{ 97 Payload: []byte{1, 2, 3}, 98 Channel: testChannel, 99 }, 100 }, 101 } 102 103 channelExtractor = &mockChannelExtractor{} 104 ) 105 106 func nextUnusedID() uint64 { 107 return atomic.AddUint64(&lastNodeID, 1) 108 } 109 110 func createCAOrPanic() tlsgen.CA { 111 ca, err := tlsgen.NewCA() 112 if err != nil { 113 panic(fmt.Sprintf("failed creating CA: %+v", err)) 114 } 115 return ca 116 } 117 118 type mockChannelExtractor struct{} 119 120 func (*mockChannelExtractor) TargetChannel(msg proto.Message) string { 121 switch req := msg.(type) { 122 case *orderer.ConsensusRequest: 123 return req.Channel 124 case *orderer.SubmitRequest: 125 return req.Channel 126 default: 127 return "" 128 } 129 } 130 131 type clusterNode struct { 132 lock sync.Mutex 133 frozen bool 134 freezeCond sync.Cond 135 dialer *cluster.PredicateDialer 136 handler *mocks.Handler 137 nodeInfo cluster.RemoteNode 138 srv *comm_utils.GRPCServer 139 bindAddress string 140 clientConfig comm_utils.ClientConfig 141 serverConfig comm_utils.ServerConfig 142 c *cluster.Comm 143 } 144 145 func (cn *clusterNode) Step(stream orderer.Cluster_StepServer) error { 146 cn.waitIfFrozen() 147 req, err := stream.Recv() 148 if err != nil { 149 return err 150 } 151 if submitReq := req.GetSubmitRequest(); submitReq != nil { 152 return cn.c.DispatchSubmit(stream.Context(), submitReq) 153 } 154 if err := cn.c.DispatchConsensus(stream.Context(), req.GetConsensusRequest()); err != nil { 155 return err 156 } 157 return stream.Send(&orderer.StepResponse{}) 158 } 159 160 func (cn *clusterNode) waitIfFrozen() { 161 cn.lock.Lock() 162 // There is no freeze after an unfreeze so no need 163 // for a for loop. 164 if cn.frozen { 165 cn.freezeCond.Wait() 166 return 167 } 168 cn.lock.Unlock() 169 } 170 171 func (cn *clusterNode) freeze() { 172 cn.lock.Lock() 173 defer cn.lock.Unlock() 174 cn.frozen = true 175 } 176 177 func (cn *clusterNode) unfreeze() { 178 cn.lock.Lock() 179 cn.frozen = false 180 cn.lock.Unlock() 181 cn.freezeCond.Broadcast() 182 } 183 184 func (cn *clusterNode) resurrect() { 185 gRPCServer, err := comm_utils.NewGRPCServer(cn.bindAddress, cn.serverConfig) 186 if err != nil { 187 panic(fmt.Errorf("failed starting gRPC server: %v", err)) 188 } 189 cn.srv = gRPCServer 190 orderer.RegisterClusterServer(gRPCServer.Server(), cn) 191 go cn.srv.Start() 192 } 193 194 func (cn *clusterNode) stop() { 195 cn.srv.Stop() 196 cn.c.Shutdown() 197 } 198 199 func (cn *clusterNode) renewCertificates() { 200 clientKeyPair, err := ca.NewClientCertKeyPair() 201 if err != nil { 202 panic(fmt.Errorf("failed creating client certificate %v", err)) 203 } 204 serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1") 205 if err != nil { 206 panic(fmt.Errorf("failed creating server certificate %v", err)) 207 } 208 209 cn.nodeInfo.ClientTLSCert = clientKeyPair.TLSCert.Raw 210 cn.nodeInfo.ServerTLSCert = serverKeyPair.TLSCert.Raw 211 212 cn.serverConfig.SecOpts.Certificate = serverKeyPair.Cert 213 cn.serverConfig.SecOpts.Key = serverKeyPair.Key 214 215 cn.dialer.Config.SecOpts.Key = clientKeyPair.Key 216 cn.dialer.Config.SecOpts.Certificate = clientKeyPair.Cert 217 } 218 219 func newTestNodeWithMetrics(t *testing.T, metrics cluster.MetricsProvider, tlsConnGauge metrics.Gauge) *clusterNode { 220 serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1") 221 assert.NoError(t, err) 222 223 clientKeyPair, _ := ca.NewClientCertKeyPair() 224 225 handler := &mocks.Handler{} 226 clientConfig := comm_utils.ClientConfig{ 227 AsyncConnect: true, 228 Timeout: time.Hour, 229 SecOpts: comm_utils.SecureOptions{ 230 RequireClientCert: true, 231 Key: clientKeyPair.Key, 232 Certificate: clientKeyPair.Cert, 233 ServerRootCAs: [][]byte{ca.CertBytes()}, 234 UseTLS: true, 235 ClientRootCAs: [][]byte{ca.CertBytes()}, 236 }, 237 } 238 239 dialer := &cluster.PredicateDialer{ 240 Config: clientConfig, 241 } 242 243 srvConfig := comm_utils.ServerConfig{ 244 SecOpts: comm_utils.SecureOptions{ 245 Key: serverKeyPair.Key, 246 Certificate: serverKeyPair.Cert, 247 UseTLS: true, 248 }, 249 } 250 gRPCServer, err := comm_utils.NewGRPCServer("127.0.0.1:", srvConfig) 251 assert.NoError(t, err) 252 253 tstSrv := &clusterNode{ 254 dialer: dialer, 255 clientConfig: clientConfig, 256 serverConfig: srvConfig, 257 bindAddress: gRPCServer.Address(), 258 handler: handler, 259 nodeInfo: cluster.RemoteNode{ 260 Endpoint: gRPCServer.Address(), 261 ID: nextUnusedID(), 262 ServerTLSCert: serverKeyPair.TLSCert.Raw, 263 ClientTLSCert: clientKeyPair.TLSCert.Raw, 264 }, 265 srv: gRPCServer, 266 } 267 268 tstSrv.freezeCond.L = &tstSrv.lock 269 270 compareCert := cluster.CachePublicKeyComparisons(func(a, b []byte) bool { 271 return crypto.CertificatesWithSamePublicKey(a, b) == nil 272 }) 273 274 tstSrv.c = &cluster.Comm{ 275 CertExpWarningThreshold: time.Hour, 276 SendBufferSize: 1, 277 Logger: flogging.MustGetLogger("test"), 278 Chan2Members: make(cluster.MembersByChannel), 279 H: handler, 280 ChanExt: channelExtractor, 281 Connections: cluster.NewConnectionStore(dialer, tlsConnGauge), 282 Metrics: cluster.NewMetrics(metrics), 283 CompareCertificate: compareCert, 284 } 285 286 orderer.RegisterClusterServer(gRPCServer.Server(), tstSrv) 287 go gRPCServer.Start() 288 return tstSrv 289 } 290 291 func newTestNode(t *testing.T) *clusterNode { 292 return newTestNodeWithMetrics(t, &disabled.Provider{}, &disabled.Gauge{}) 293 } 294 295 func TestSendBigMessage(t *testing.T) { 296 // Scenario: Basic test that spawns 5 nodes and sends a big message 297 // from one of the nodes to the others. 298 // A receiver node's Step() server side method (which calls Recv) 299 // is frozen until the sender's node Send method returns, 300 // Hence - the sender node finishes calling Send 301 // before a receiver node starts calling Recv. 302 // This ensures that Send is non blocking even with big messages. 303 // In the test, we send a total of 8MB of random data (2MB to each node). 304 // The randomness is used so gRPC compression won't compress it to a lower size. 305 306 node1 := newTestNode(t) 307 node2 := newTestNode(t) 308 node3 := newTestNode(t) 309 node4 := newTestNode(t) 310 node5 := newTestNode(t) 311 312 for _, node := range []*clusterNode{node2, node3, node4, node5} { 313 node.c.SendBufferSize = 1 314 } 315 316 defer node1.stop() 317 defer node2.stop() 318 defer node3.stop() 319 defer node4.stop() 320 defer node5.stop() 321 322 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo, node3.nodeInfo, node4.nodeInfo, node5.nodeInfo} 323 node1.c.Configure(testChannel, config) 324 node2.c.Configure(testChannel, config) 325 node3.c.Configure(testChannel, config) 326 node4.c.Configure(testChannel, config) 327 node5.c.Configure(testChannel, config) 328 329 var messageReceived sync.WaitGroup 330 messageReceived.Add(4) 331 332 msgSize := 1024 * 1024 * 2 333 bigMsg := &orderer.ConsensusRequest{ 334 Channel: testChannel, 335 Payload: make([]byte, msgSize), 336 } 337 338 _, err := rand.Read(bigMsg.Payload) 339 assert.NoError(t, err) 340 341 wrappedMsg := &orderer.StepRequest{ 342 Payload: &orderer.StepRequest_ConsensusRequest{ 343 ConsensusRequest: bigMsg, 344 }, 345 } 346 347 for _, node := range []*clusterNode{node2, node3, node4, node5} { 348 node.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 349 msg := args.Get(2).(*orderer.ConsensusRequest) 350 assert.Len(t, msg.Payload, msgSize) 351 messageReceived.Done() 352 }).Return(nil) 353 } 354 355 streams := map[uint64]*cluster.Stream{} 356 357 for _, node := range []*clusterNode{node2, node3, node4, node5} { 358 // Freeze the node, in order to block its Recv 359 node.freeze() 360 } 361 362 for _, node := range []*clusterNode{node2, node3, node4, node5} { 363 rm, err := node1.c.Remote(testChannel, node.nodeInfo.ID) 364 assert.NoError(t, err) 365 366 stream := assertEventualEstablishStream(t, rm) 367 streams[node.nodeInfo.ID] = stream 368 } 369 370 t0 := time.Now() 371 for _, node := range []*clusterNode{node2, node3, node4, node5} { 372 stream := streams[node.nodeInfo.ID] 373 374 t1 := time.Now() 375 err = stream.Send(wrappedMsg) 376 assert.NoError(t, err) 377 t.Log("Sending took", time.Since(t1)) 378 379 // Unfreeze the node. It can now call Recv, and signal the messageReceived waitGroup. 380 node.unfreeze() 381 } 382 383 t.Log("Total sending time to all 4 nodes took:", time.Since(t0)) 384 385 messageReceived.Wait() 386 } 387 388 func TestBlockingSend(t *testing.T) { 389 // Scenario: Basic test that spawns 2 nodes and sends from the first node 390 // to the second node, three SubmitRequests, or three consensus requests. 391 // SubmitRequests should block, but consensus requests should not. 392 393 for _, testCase := range []struct { 394 description string 395 messageToSend *orderer.StepRequest 396 streamUnblocks bool 397 elapsedGreaterThan time.Duration 398 overflowErr string 399 }{ 400 { 401 description: "SubmitRequest", 402 messageToSend: wrapSubmitReq(testReq), 403 streamUnblocks: true, 404 elapsedGreaterThan: time.Second / 2, 405 }, 406 { 407 description: "ConsensusRequest", 408 messageToSend: testConsensusReq, 409 overflowErr: "send queue overflown", 410 }, 411 } { 412 t.Run(testCase.description, func(t *testing.T) { 413 node1 := newTestNode(t) 414 node2 := newTestNode(t) 415 416 node1.c.SendBufferSize = 1 417 node2.c.SendBufferSize = 1 418 419 defer node1.stop() 420 defer node2.stop() 421 422 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 423 node1.c.Configure(testChannel, config) 424 node2.c.Configure(testChannel, config) 425 426 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 427 assert.NoError(t, err) 428 429 client := &mocks.ClusterClient{} 430 fakeStream := &mocks.StepClient{} 431 432 // Replace real client with a mock client 433 rm.Client = client 434 rm.ProbeConn = func(_ *grpc.ClientConn) error { 435 return nil 436 } 437 // Configure client to return the mock stream 438 fakeStream.On("Context", mock.Anything).Return(context.Background()) 439 client.On("Step", mock.Anything).Return(fakeStream, nil).Once() 440 441 unBlock := make(chan struct{}) 442 var sendInvoked sync.WaitGroup 443 sendInvoked.Add(1) 444 var once sync.Once 445 fakeStream.On("Send", mock.Anything).Run(func(_ mock.Arguments) { 446 once.Do(sendInvoked.Done) 447 <-unBlock 448 }).Return(errors.New("oops")) 449 450 stream, err := rm.NewStream(time.Hour) 451 assert.NoError(t, err) 452 453 // The first send doesn't block, even though the Send operation blocks. 454 err = stream.Send(testCase.messageToSend) 455 assert.NoError(t, err) 456 457 // The second once doesn't either. 458 // After this point, we have 1 goroutine which is blocked on Send(), 459 // and one message in the buffer. 460 sendInvoked.Wait() 461 err = stream.Send(testCase.messageToSend) 462 assert.NoError(t, err) 463 464 // The third blocks, so we need to unblock it ourselves 465 // in order for it to go through, unless the operation 466 // is non blocking. 467 go func() { 468 time.Sleep(time.Second) 469 if testCase.streamUnblocks { 470 close(unBlock) 471 } 472 }() 473 474 t1 := time.Now() 475 err = stream.Send(testCase.messageToSend) 476 // The third send always overflows or blocks. 477 // If we expect to receive an overflow error - assert it. 478 if testCase.overflowErr != "" { 479 assert.EqualError(t, err, testCase.overflowErr) 480 } 481 elapsed := time.Since(t1) 482 t.Log("Elapsed time:", elapsed) 483 assert.True(t, elapsed > testCase.elapsedGreaterThan) 484 485 if !testCase.streamUnblocks { 486 close(unBlock) 487 } 488 }) 489 } 490 } 491 492 func TestBasic(t *testing.T) { 493 // Scenario: Basic test that spawns 2 nodes and sends each other 494 // messages that are expected to be echoed back 495 496 node1 := newTestNode(t) 497 node2 := newTestNode(t) 498 499 defer node1.stop() 500 defer node2.stop() 501 502 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 503 node1.c.Configure(testChannel, config) 504 node2.c.Configure(testChannel, config) 505 506 assertBiDiCommunication(t, node1, node2, testReq) 507 } 508 509 func TestUnavailableHosts(t *testing.T) { 510 // Scenario: A node is configured to connect 511 // to a host that is down 512 node1 := newTestNode(t) 513 514 clientConfig := node1.dialer.Config 515 // The below timeout makes sure that connection establishment is done 516 // asynchronously. Had it been synchronous, the Remote() call would be 517 // blocked for an hour. 518 clientConfig.Timeout = time.Hour 519 defer node1.stop() 520 521 node2 := newTestNode(t) 522 node2.stop() 523 524 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 525 remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 526 assert.NoError(t, err) 527 assert.NotNil(t, remote) 528 529 _, err = remote.NewStream(time.Millisecond * 100) 530 assert.Contains(t, err.Error(), "connection") 531 } 532 533 func TestStreamAbort(t *testing.T) { 534 // Scenarios: node 1 is connected to node 2 in 2 channels, 535 // and the consumer of the communication calls receive. 536 // The two sub-scenarios happen: 537 // 1) The server certificate of node 2 changes in the first channel 538 // 2) Node 2 is evicted from the membership of the first channel 539 // In both of the scenarios, the Recv() call should be aborted 540 541 node2 := newTestNode(t) 542 defer node2.stop() 543 544 invalidNodeInfo := cluster.RemoteNode{ 545 ID: node2.nodeInfo.ID, 546 ServerTLSCert: []byte{1, 2, 3}, 547 ClientTLSCert: []byte{1, 2, 3}, 548 } 549 550 for _, tst := range []struct { 551 testName string 552 membership []cluster.RemoteNode 553 expectedError string 554 }{ 555 { 556 testName: "Evicted from membership", 557 membership: nil, 558 expectedError: "rpc error: code = Canceled desc = context canceled", 559 }, 560 { 561 testName: "Changed TLS certificate", 562 membership: []cluster.RemoteNode{invalidNodeInfo}, 563 expectedError: "rpc error: code = Canceled desc = context canceled", 564 }, 565 } { 566 t.Run(tst.testName, func(t *testing.T) { 567 testStreamAbort(t, node2, tst.membership, tst.expectedError) 568 }) 569 } 570 node2.handler.AssertNumberOfCalls(t, "OnSubmit", 2) 571 } 572 573 func testStreamAbort(t *testing.T, node2 *clusterNode, newMembership []cluster.RemoteNode, expectedError string) { 574 node1 := newTestNode(t) 575 defer node1.stop() 576 577 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 578 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 579 node1.c.Configure(testChannel2, []cluster.RemoteNode{node2.nodeInfo}) 580 node2.c.Configure(testChannel2, []cluster.RemoteNode{node1.nodeInfo}) 581 582 var streamCreated sync.WaitGroup 583 streamCreated.Add(1) 584 585 stopChan := make(chan struct{}) 586 587 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Once().Run(func(_ mock.Arguments) { 588 // Notify the stream was created 589 streamCreated.Done() 590 // Wait for the test to finish 591 <-stopChan 592 }).Return(nil).Once() 593 594 rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 595 assert.NoError(t, err) 596 597 go func() { 598 stream := assertEventualEstablishStream(t, rm1) 599 // Signal the reconfiguration 600 err = stream.Send(wrapSubmitReq(testReq)) 601 assert.NoError(t, err) 602 _, err := stream.Recv() 603 assert.Contains(t, err.Error(), expectedError) 604 close(stopChan) 605 }() 606 607 go func() { 608 // Wait for the stream reference to be obtained 609 streamCreated.Wait() 610 // Reconfigure the channel membership 611 node1.c.Configure(testChannel, newMembership) 612 }() 613 614 <-stopChan 615 } 616 617 func TestDoubleReconfigure(t *testing.T) { 618 // Scenario: Basic test that spawns 2 nodes 619 // and configures node 1 twice, and checks that 620 // the remote stub for node 1 wasn't re-created in the second 621 // configuration since it already existed 622 623 node1 := newTestNode(t) 624 node2 := newTestNode(t) 625 626 defer node1.stop() 627 defer node2.stop() 628 629 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 630 rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 631 assert.NoError(t, err) 632 633 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 634 rm2, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 635 assert.NoError(t, err) 636 // Ensure the references are equal 637 assert.True(t, rm1 == rm2) 638 } 639 640 func TestInvalidChannel(t *testing.T) { 641 // Scenario: node 1 it ordered to send a message on a channel 642 // that doesn't exist, and also receives a message, but 643 // the channel cannot be extracted from the message. 644 645 t.Run("channel doesn't exist", func(t *testing.T) { 646 node1 := newTestNode(t) 647 defer node1.stop() 648 649 _, err := node1.c.Remote(testChannel, 0) 650 assert.EqualError(t, err, "channel test doesn't exist") 651 }) 652 653 t.Run("channel cannot be extracted", func(t *testing.T) { 654 node1 := newTestNode(t) 655 defer node1.stop() 656 657 node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 658 gt := gomega.NewGomegaWithT(t) 659 gt.Eventually(func() (bool, error) { 660 _, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 661 return true, err 662 }, time.Minute).Should(gomega.BeTrue()) 663 664 stub, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 665 assert.NoError(t, err) 666 667 stream := assertEventualEstablishStream(t, stub) 668 669 // An empty SubmitRequest has an empty channel which is invalid 670 err = stream.Send(wrapSubmitReq(&orderer.SubmitRequest{})) 671 assert.NoError(t, err) 672 673 _, err = stream.Recv() 674 assert.EqualError(t, err, "rpc error: code = Unknown desc = badly formatted message, cannot extract channel") 675 676 // Test directly without going through the gRPC stream 677 err = node1.c.DispatchSubmit(context.Background(), &orderer.SubmitRequest{}) 678 assert.EqualError(t, err, "badly formatted message, cannot extract channel") 679 }) 680 } 681 682 func TestAbortRPC(t *testing.T) { 683 // Scenarios: 684 // (I) The node calls an RPC, and calls Abort() on the remote context 685 // in parallel. The RPC should return even though the server-side call hasn't finished. 686 // (II) The node calls an RPC, but the server-side processing takes too long, 687 // and the RPC invocation returns prematurely. 688 689 testCases := []struct { 690 name string 691 abortFunc func(*cluster.RemoteContext) 692 rpcTimeout time.Duration 693 expectedErr string 694 }{ 695 { 696 name: "Abort() called", 697 expectedErr: "rpc error: code = Canceled desc = context canceled", 698 rpcTimeout: time.Hour, 699 abortFunc: func(rc *cluster.RemoteContext) { 700 rc.Abort() 701 }, 702 }, 703 { 704 name: "RPC timeout", 705 expectedErr: "rpc timeout expired", 706 rpcTimeout: time.Second, 707 abortFunc: func(*cluster.RemoteContext) {}, 708 }, 709 } 710 711 for _, testCase := range testCases { 712 testCase := testCase 713 t.Run(testCase.name, func(t *testing.T) { 714 testAbort(t, testCase.abortFunc, testCase.rpcTimeout, testCase.expectedErr) 715 }) 716 } 717 } 718 719 func testAbort(t *testing.T, abortFunc func(*cluster.RemoteContext), rpcTimeout time.Duration, expectedErr string) { 720 node1 := newTestNode(t) 721 defer node1.stop() 722 723 node2 := newTestNode(t) 724 defer node2.stop() 725 726 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 727 node1.c.Configure(testChannel, config) 728 node2.c.Configure(testChannel, config) 729 var onStepCalled sync.WaitGroup 730 onStepCalled.Add(1) 731 732 // stuckCall ensures the OnStep() call is stuck throughout this test 733 var stuckCall sync.WaitGroup 734 stuckCall.Add(1) 735 // At the end of the test, release the server-side resources 736 defer stuckCall.Done() 737 738 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(_ mock.Arguments) { 739 onStepCalled.Done() 740 stuckCall.Wait() 741 }).Once() 742 743 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 744 assert.NoError(t, err) 745 746 go func() { 747 onStepCalled.Wait() 748 abortFunc(rm) 749 }() 750 751 var stream *cluster.Stream 752 gt := gomega.NewGomegaWithT(t) 753 gt.Eventually(func() error { 754 stream, err = rm.NewStream(rpcTimeout) 755 return err 756 }, time.Second*10, time.Millisecond*10).Should(gomega.Succeed()) 757 758 stream.Send(wrapSubmitReq(testSubReq)) 759 _, err = stream.Recv() 760 761 assert.EqualError(t, err, expectedErr) 762 763 node2.handler.AssertNumberOfCalls(t, "OnSubmit", 1) 764 } 765 766 func TestNoTLSCertificate(t *testing.T) { 767 // Scenario: The node is sent a message by another node that doesn't 768 // connect with mutual TLS, thus doesn't provide a TLS certificate 769 node1 := newTestNode(t) 770 defer node1.stop() 771 772 node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 773 774 clientConfig := comm_utils.ClientConfig{ 775 AsyncConnect: true, 776 Timeout: time.Millisecond * 100, 777 SecOpts: comm_utils.SecureOptions{ 778 ServerRootCAs: [][]byte{ca.CertBytes()}, 779 UseTLS: true, 780 }, 781 } 782 cl, err := comm_utils.NewGRPCClient(clientConfig) 783 assert.NoError(t, err) 784 785 var conn *grpc.ClientConn 786 gt := gomega.NewGomegaWithT(t) 787 gt.Eventually(func() (bool, error) { 788 conn, err = cl.NewConnection(node1.srv.Address()) 789 return true, err 790 }, time.Minute).Should(gomega.BeTrue()) 791 792 echoClient := orderer.NewClusterClient(conn) 793 stream, err := echoClient.Step(context.Background()) 794 assert.NoError(t, err) 795 796 err = stream.Send(wrapSubmitReq(testSubReq)) 797 assert.NoError(t, err) 798 _, err = stream.Recv() 799 assert.EqualError(t, err, "rpc error: code = Unknown desc = no TLS certificate sent") 800 } 801 802 func TestReconnect(t *testing.T) { 803 // Scenario: node 1 and node 2 are connected, 804 // and node 2 is taken offline. 805 // Node 1 tries to send a message to node 2 but fails, 806 // and afterwards node 2 is brought back, after which 807 // node 1 sends more messages, and it should succeed 808 // sending a message to node 2 eventually. 809 810 node1 := newTestNode(t) 811 defer node1.stop() 812 conf := node1.dialer.Config 813 conf.Timeout = time.Hour 814 815 node2 := newTestNode(t) 816 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil) 817 defer node2.stop() 818 819 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 820 node1.c.Configure(testChannel, config) 821 node2.c.Configure(testChannel, config) 822 823 // Make node 2 be offline by shutting down its gRPC service 824 node2.srv.Stop() 825 // Obtain the stub for node 2. 826 // Should succeed, because the connection was created at time of configuration 827 stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 828 assert.NoError(t, err) 829 830 // Try to obtain a stream. Should not Succeed. 831 gt := gomega.NewGomegaWithT(t) 832 gt.Eventually(func() error { 833 _, err = stub.NewStream(time.Hour) 834 return err 835 }).Should(gomega.Not(gomega.Succeed())) 836 837 // Wait for the port to be released 838 for { 839 lsnr, err := net.Listen("tcp", node2.nodeInfo.Endpoint) 840 if err == nil { 841 lsnr.Close() 842 break 843 } 844 } 845 846 // Resurrect node 2 847 node2.resurrect() 848 // Send a message from node 1 to node 2. 849 // Should succeed eventually 850 assertEventualSendMessage(t, stub, testReq) 851 } 852 853 func TestRenewCertificates(t *testing.T) { 854 // Scenario: node 1 and node 2 are connected, 855 // and the certificates are renewed for both nodes 856 // at the same time. 857 // They are expected to connect to one another 858 // after the reconfiguration. 859 860 node1 := newTestNode(t) 861 defer node1.stop() 862 863 node2 := newTestNode(t) 864 defer node2.stop() 865 866 node1.handler.On("OnStep", testChannel, node2.nodeInfo.ID, mock.Anything).Return(testRes, nil) 867 node2.handler.On("OnStep", testChannel, node1.nodeInfo.ID, mock.Anything).Return(testRes, nil) 868 869 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 870 node1.c.Configure(testChannel, config) 871 node2.c.Configure(testChannel, config) 872 873 assertBiDiCommunication(t, node1, node2, testReq) 874 875 // Now, renew certificates both both nodes 876 node1.renewCertificates() 877 node2.renewCertificates() 878 879 // Reconfigure them 880 config = []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 881 node1.c.Configure(testChannel, config) 882 node2.c.Configure(testChannel, config) 883 884 // W.L.O.G, try to send a message from node1 to node2 885 // It should fail, because node2's server certificate has now changed, 886 // so it closed the connection to the remote node 887 info2 := node2.nodeInfo 888 remote, err := node1.c.Remote(testChannel, info2.ID) 889 assert.NoError(t, err) 890 assert.NotNil(t, remote) 891 892 gt := gomega.NewGomegaWithT(t) 893 gt.Eventually(func() string { 894 _, err = remote.NewStream(time.Hour) 895 return err.Error() 896 }, timeout).Should(gomega.ContainSubstring(info2.Endpoint)) 897 898 // Restart the gRPC service on both nodes, to load the new TLS certificates 899 node1.srv.Stop() 900 node1.resurrect() 901 node2.srv.Stop() 902 node2.resurrect() 903 904 // Finally, check that the nodes can communicate once again 905 assertBiDiCommunication(t, node1, node2, testReq) 906 } 907 908 func TestMembershipReconfiguration(t *testing.T) { 909 // Scenario: node 1 and node 2 are started up 910 // and node 2 is configured to know about node 1, 911 // without node1 knowing about node 2. 912 // The communication between them should only work 913 // after node 1 is configured to know about node 2. 914 915 node1 := newTestNode(t) 916 defer node1.stop() 917 918 node2 := newTestNode(t) 919 defer node2.stop() 920 921 node1.c.Configure(testChannel, []cluster.RemoteNode{}) 922 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 923 924 // Node 1 can't connect to node 2 because it doesn't know its TLS certificate yet 925 _, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 926 assert.EqualError(t, err, fmt.Sprintf("node %d doesn't exist in channel test's membership", node2.nodeInfo.ID)) 927 // Node 2 can connect to node 1, but it can't send it messages because node 1 doesn't know node 2 yet. 928 929 gt := gomega.NewGomegaWithT(t) 930 gt.Eventually(func() (bool, error) { 931 _, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 932 return true, err 933 }, time.Minute).Should(gomega.BeTrue()) 934 935 stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 936 assert.NoError(t, err) 937 938 stream := assertEventualEstablishStream(t, stub) 939 err = stream.Send(wrapSubmitReq(testSubReq)) 940 assert.NoError(t, err) 941 942 _, err = stream.Recv() 943 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 944 945 // Next, configure node 1 to know about node 2 946 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 947 948 // Check that the communication works correctly between both nodes 949 assertBiDiCommunication(t, node1, node2, testReq) 950 assertBiDiCommunication(t, node2, node1, testReq) 951 952 // Reconfigure node 2 to forget about node 1 953 node2.c.Configure(testChannel, []cluster.RemoteNode{}) 954 // Node 1 can still connect to node 2 955 stub, err = node1.c.Remote(testChannel, node2.nodeInfo.ID) 956 assert.NoError(t, err) 957 // But can't send a message because node 2 now doesn't authorized node 1 958 stream = assertEventualEstablishStream(t, stub) 959 stream.Send(wrapSubmitReq(testSubReq)) 960 _, err = stream.Recv() 961 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 962 } 963 964 func TestShutdown(t *testing.T) { 965 // Scenario: node 1 is shut down and as a result, can't 966 // send messages to anyone, nor can it be reconfigured 967 968 node1 := newTestNode(t) 969 defer node1.stop() 970 971 node1.c.Shutdown() 972 973 // Obtaining a RemoteContext cannot succeed because shutdown was called before 974 _, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 975 assert.EqualError(t, err, "communication has been shut down") 976 977 node2 := newTestNode(t) 978 defer node2.stop() 979 980 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 981 // Configuration of node doesn't take place 982 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 983 984 gt := gomega.NewGomegaWithT(t) 985 gt.Eventually(func() error { 986 _, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 987 return err 988 }, time.Minute).Should(gomega.Succeed()) 989 990 stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 991 assert.NoError(t, err) 992 993 // Therefore, sending a message doesn't succeed because node 1 rejected the configuration change 994 gt.Eventually(func() string { 995 stream, err := stub.NewStream(time.Hour) 996 if err != nil { 997 return err.Error() 998 } 999 err = stream.Send(wrapSubmitReq(testSubReq)) 1000 assert.NoError(t, err) 1001 1002 _, err = stream.Recv() 1003 return err.Error() 1004 }, timeout).Should(gomega.ContainSubstring("channel test doesn't exist")) 1005 } 1006 1007 func TestMultiChannelConfig(t *testing.T) { 1008 // Scenario: node 1 is knows node 2 only in channel "foo" 1009 // and knows node 3 only in channel "bar". 1010 // Messages that are received, are routed according to their corresponding channels 1011 // and when node 2 sends a message for channel "bar" to node 1, it is rejected. 1012 // Same thing applies for node 3 that sends a message to node 1 in channel "foo". 1013 1014 node1 := newTestNode(t) 1015 defer node1.stop() 1016 1017 node2 := newTestNode(t) 1018 defer node2.stop() 1019 1020 node3 := newTestNode(t) 1021 defer node3.stop() 1022 1023 node1.c.Configure("foo", []cluster.RemoteNode{node2.nodeInfo}) 1024 node1.c.Configure("bar", []cluster.RemoteNode{node3.nodeInfo}) 1025 node2.c.Configure("foo", []cluster.RemoteNode{node1.nodeInfo}) 1026 node3.c.Configure("bar", []cluster.RemoteNode{node1.nodeInfo}) 1027 1028 t.Run("Correct channel", func(t *testing.T) { 1029 var fromNode2 sync.WaitGroup 1030 fromNode2.Add(1) 1031 node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) { 1032 fromNode2.Done() 1033 }).Once() 1034 1035 var fromNode3 sync.WaitGroup 1036 fromNode3.Add(1) 1037 node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) { 1038 fromNode3.Done() 1039 }).Once() 1040 1041 node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID) 1042 assert.NoError(t, err) 1043 node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID) 1044 assert.NoError(t, err) 1045 1046 stream := assertEventualEstablishStream(t, node2toNode1) 1047 stream.Send(fooReq) 1048 1049 fromNode2.Wait() 1050 node1.handler.AssertNumberOfCalls(t, "OnSubmit", 1) 1051 1052 stream = assertEventualEstablishStream(t, node3toNode1) 1053 stream.Send(barReq) 1054 1055 fromNode3.Wait() 1056 node1.handler.AssertNumberOfCalls(t, "OnSubmit", 2) 1057 }) 1058 1059 t.Run("Incorrect channel", func(t *testing.T) { 1060 node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil) 1061 node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil) 1062 1063 node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID) 1064 assert.NoError(t, err) 1065 node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID) 1066 assert.NoError(t, err) 1067 1068 assertEventualSendMessage(t, node2toNode1, &orderer.SubmitRequest{Channel: "foo"}) 1069 assert.NoError(t, err) 1070 stream, err := node2toNode1.NewStream(time.Hour) 1071 assert.NoError(t, err) 1072 err = stream.Send(barReq) 1073 assert.NoError(t, err) 1074 _, err = stream.Recv() 1075 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 1076 1077 assertEventualSendMessage(t, node3toNode1, &orderer.SubmitRequest{Channel: "bar"}) 1078 stream, err = node3toNode1.NewStream(time.Hour) 1079 assert.NoError(t, err) 1080 err = stream.Send(fooReq) 1081 assert.NoError(t, err) 1082 _, err = stream.Recv() 1083 assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 1084 }) 1085 } 1086 1087 func TestConnectionFailure(t *testing.T) { 1088 // Scenario: node 1 fails to connect to node 2. 1089 1090 node1 := newTestNode(t) 1091 defer node1.stop() 1092 1093 node2 := newTestNode(t) 1094 defer node2.stop() 1095 1096 dialer := &mocks.SecureDialer{} 1097 dialer.On("Dial", mock.Anything, mock.Anything).Return(nil, errors.New("oops")) 1098 node1.c.Connections = cluster.NewConnectionStore(dialer, &disabled.Gauge{}) 1099 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 1100 1101 _, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1102 assert.EqualError(t, err, "oops") 1103 } 1104 1105 type testMetrics struct { 1106 fakeProvider *mocks.MetricsProvider 1107 egressQueueLength metricsfakes.Gauge 1108 egressQueueCapacity metricsfakes.Gauge 1109 egressStreamCount metricsfakes.Gauge 1110 egressTLSConnCount metricsfakes.Gauge 1111 egressWorkerSize metricsfakes.Gauge 1112 ingressStreamsCount metricsfakes.Gauge 1113 msgSendTime metricsfakes.Histogram 1114 msgDropCount metricsfakes.Counter 1115 } 1116 1117 func (tm *testMetrics) initialize() { 1118 tm.egressQueueLength.WithReturns(&tm.egressQueueLength) 1119 tm.egressQueueCapacity.WithReturns(&tm.egressQueueCapacity) 1120 tm.egressStreamCount.WithReturns(&tm.egressStreamCount) 1121 tm.egressTLSConnCount.WithReturns(&tm.egressTLSConnCount) 1122 tm.egressWorkerSize.WithReturns(&tm.egressWorkerSize) 1123 tm.ingressStreamsCount.WithReturns(&tm.ingressStreamsCount) 1124 tm.msgSendTime.WithReturns(&tm.msgSendTime) 1125 tm.msgDropCount.WithReturns(&tm.msgDropCount) 1126 1127 fakeProvider := tm.fakeProvider 1128 fakeProvider.On("NewGauge", cluster.IngressStreamsCountOpts).Return(&tm.ingressStreamsCount) 1129 fakeProvider.On("NewGauge", cluster.EgressQueueLengthOpts).Return(&tm.egressQueueLength) 1130 fakeProvider.On("NewGauge", cluster.EgressQueueCapacityOpts).Return(&tm.egressQueueCapacity) 1131 fakeProvider.On("NewGauge", cluster.EgressStreamsCountOpts).Return(&tm.egressStreamCount) 1132 fakeProvider.On("NewGauge", cluster.EgressTLSConnectionCountOpts).Return(&tm.egressTLSConnCount) 1133 fakeProvider.On("NewGauge", cluster.EgressWorkersOpts).Return(&tm.egressWorkerSize) 1134 fakeProvider.On("NewCounter", cluster.MessagesDroppedCountOpts).Return(&tm.msgDropCount) 1135 fakeProvider.On("NewHistogram", cluster.MessageSendTimeOpts).Return(&tm.msgSendTime) 1136 } 1137 1138 func TestMetrics(t *testing.T) { 1139 for _, testCase := range []struct { 1140 name string 1141 runTest func(node1, node2 *clusterNode, testMetrics *testMetrics) 1142 testMetrics *testMetrics 1143 }{ 1144 { 1145 name: "EgressQueueOccupancy", 1146 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1147 assertBiDiCommunication(t, node1, node2, testReq) 1148 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "transaction", "channel", testChannel}, 1149 testMetrics.egressQueueLength.WithArgsForCall(0)) 1150 assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(0)) 1151 assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(0)) 1152 1153 var messageReceived sync.WaitGroup 1154 messageReceived.Add(1) 1155 node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 1156 messageReceived.Done() 1157 }).Return(nil) 1158 1159 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1160 assert.NoError(t, err) 1161 1162 stream := assertEventualEstablishStream(t, rm) 1163 stream.Send(testConsensusReq) 1164 messageReceived.Wait() 1165 1166 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "consensus", "channel", testChannel}, 1167 testMetrics.egressQueueLength.WithArgsForCall(1)) 1168 assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(1)) 1169 assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(1)) 1170 }, 1171 }, 1172 { 1173 name: "EgressStreamsCount", 1174 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1175 assertBiDiCommunication(t, node1, node2, testReq) 1176 assert.Equal(t, 1, testMetrics.egressStreamCount.SetCallCount()) 1177 assert.Equal(t, 1, testMetrics.egressStreamCount.WithCallCount()) 1178 assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1179 1180 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1181 assert.Equal(t, 2, testMetrics.egressStreamCount.SetCallCount()) 1182 assert.Equal(t, 2, testMetrics.egressStreamCount.WithCallCount()) 1183 assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1184 }, 1185 }, 1186 { 1187 name: "EgressTLSConnCount", 1188 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1189 assertBiDiCommunication(t, node1, node2, testReq) 1190 assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1191 1192 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1193 assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1194 1195 // A single TLS connection despite 2 streams 1196 assert.Equal(t, float64(1), testMetrics.egressTLSConnCount.SetArgsForCall(0)) 1197 assert.Equal(t, 1, testMetrics.egressTLSConnCount.SetCallCount()) 1198 }, 1199 }, 1200 { 1201 name: "EgressWorkerSize", 1202 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1203 assertBiDiCommunication(t, node1, node2, testReq) 1204 assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1205 1206 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1207 assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1208 1209 assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(0)) 1210 assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(1)) 1211 }, 1212 }, 1213 { 1214 name: "MgSendTime", 1215 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1216 assertBiDiCommunication(t, node1, node2, testReq) 1217 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel}, 1218 testMetrics.msgSendTime.WithArgsForCall(0)) 1219 1220 assert.Equal(t, 1, testMetrics.msgSendTime.ObserveCallCount()) 1221 }, 1222 }, 1223 { 1224 name: "MsgDropCount", 1225 runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) { 1226 blockRecv := make(chan struct{}) 1227 wasReported := func() bool { 1228 select { 1229 case <-blockRecv: 1230 return true 1231 default: 1232 return false 1233 } 1234 } 1235 // When the drop count is reported, release the lock on the server side receive operation. 1236 testMetrics.msgDropCount.AddStub = func(float642 float64) { 1237 if !wasReported() { 1238 close(blockRecv) 1239 } 1240 } 1241 1242 node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 1243 // Block until the message drop is reported 1244 <-blockRecv 1245 }).Return(nil) 1246 1247 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1248 assert.NoError(t, err) 1249 1250 stream := assertEventualEstablishStream(t, rm) 1251 // Send too many messages while the server side is not reading from the stream 1252 for { 1253 stream.Send(testConsensusReq) 1254 if wasReported() { 1255 break 1256 } 1257 } 1258 assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel}, 1259 testMetrics.msgDropCount.WithArgsForCall(0)) 1260 assert.Equal(t, 1, testMetrics.msgDropCount.AddCallCount()) 1261 }, 1262 }, 1263 } { 1264 testCase := testCase 1265 t.Run(testCase.name, func(t *testing.T) { 1266 fakeProvider := &mocks.MetricsProvider{} 1267 testCase.testMetrics = &testMetrics{ 1268 fakeProvider: fakeProvider, 1269 } 1270 1271 testCase.testMetrics.initialize() 1272 1273 node1 := newTestNodeWithMetrics(t, fakeProvider, &testCase.testMetrics.egressTLSConnCount) 1274 defer node1.stop() 1275 1276 node2 := newTestNode(t) 1277 defer node2.stop() 1278 1279 configForNode1 := []cluster.RemoteNode{node2.nodeInfo} 1280 configForNode2 := []cluster.RemoteNode{node1.nodeInfo} 1281 node1.c.Configure(testChannel, configForNode1) 1282 node2.c.Configure(testChannel, configForNode2) 1283 node1.c.Configure(testChannel2, configForNode1) 1284 node2.c.Configure(testChannel2, configForNode2) 1285 1286 testCase.runTest(node1, node2, testCase.testMetrics) 1287 }) 1288 } 1289 } 1290 1291 func TestCertExpirationWarningEgress(t *testing.T) { 1292 // Scenario: Ensures that when certificates are due to expire, 1293 // a warning is logged to the log. 1294 1295 node1 := newTestNode(t) 1296 node2 := newTestNode(t) 1297 1298 cert, err := x509.ParseCertificate(node2.nodeInfo.ServerTLSCert) 1299 assert.NoError(t, err) 1300 assert.NotNil(t, cert) 1301 1302 // Let the NotAfter time of the certificate be T1, the current time be T0. 1303 // So time.Until is (T1 - T0), which means we have (T1 - T0) time left. 1304 // We want to trigger a warning, so we set the warning threshold to be 20 seconds above 1305 // the time left, so the time left would be smaller than the threshold. 1306 node1.c.CertExpWarningThreshold = time.Until(cert.NotAfter) + time.Second*20 1307 // We only alert once in 3 seconds 1308 node1.c.MinimumExpirationWarningInterval = time.Second * 3 1309 1310 defer node1.stop() 1311 defer node2.stop() 1312 1313 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 1314 node1.c.Configure(testChannel, config) 1315 node2.c.Configure(testChannel, config) 1316 1317 stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1318 assert.NoError(t, err) 1319 1320 mockgRPC := &mocks.StepClient{} 1321 mockgRPC.On("Send", mock.Anything).Return(nil) 1322 mockgRPC.On("Context").Return(context.Background()) 1323 mockClient := &mocks.ClusterClient{} 1324 mockClient.On("Step", mock.Anything).Return(mockgRPC, nil) 1325 1326 stub.Client = mockClient 1327 1328 stream := assertEventualEstablishStream(t, stub) 1329 1330 alerts := make(chan struct{}, 100) 1331 1332 stream.Logger = stream.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1333 if strings.Contains(entry.Message, "expires in less than") { 1334 alerts <- struct{}{} 1335 } 1336 return nil 1337 })) 1338 1339 // Send a message to the node and expert an alert to be logged. 1340 stream.Send(wrapSubmitReq(testReq)) 1341 select { 1342 case <-alerts: 1343 case <-time.After(time.Second * 5): 1344 t.Fatal("Should have logged an alert") 1345 } 1346 // Send another message, and ensure we don't log anything to the log, because the 1347 // alerts should be suppressed before the minimum interval timeout expires. 1348 stream.Send(wrapSubmitReq(testReq)) 1349 select { 1350 case <-alerts: 1351 t.Fatal("Should not have logged an alert") 1352 case <-time.After(time.Millisecond * 500): 1353 } 1354 // Wait enough time for the alert interval to clear. 1355 time.Sleep(node1.c.MinimumExpirationWarningInterval + time.Second) 1356 // Send again a message, and this time it should be logged again. 1357 stream.Send(wrapSubmitReq(testReq)) 1358 select { 1359 case <-alerts: 1360 case <-time.After(time.Second * 5): 1361 t.Fatal("Should have logged an alert") 1362 } 1363 } 1364 1365 func assertBiDiCommunicationForChannel(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest, channel string) { 1366 for _, tst := range []struct { 1367 label string 1368 sender *clusterNode 1369 receiver *clusterNode 1370 target uint64 1371 }{ 1372 {label: "1->2", sender: node1, target: node2.nodeInfo.ID, receiver: node2}, 1373 {label: "2->1", sender: node2, target: node1.nodeInfo.ID, receiver: node1}, 1374 } { 1375 t.Run(tst.label, func(t *testing.T) { 1376 stub, err := tst.sender.c.Remote(channel, tst.target) 1377 assert.NoError(t, err) 1378 1379 stream := assertEventualEstablishStream(t, stub) 1380 1381 var wg sync.WaitGroup 1382 wg.Add(1) 1383 tst.receiver.handler.On("OnSubmit", channel, tst.sender.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(args mock.Arguments) { 1384 req := args.Get(2).(*orderer.SubmitRequest) 1385 assert.True(t, proto.Equal(req, msgToSend)) 1386 wg.Done() 1387 }) 1388 1389 err = stream.Send(wrapSubmitReq(msgToSend)) 1390 assert.NoError(t, err) 1391 1392 wg.Wait() 1393 }) 1394 } 1395 } 1396 1397 func assertBiDiCommunication(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest) { 1398 assertBiDiCommunicationForChannel(t, node1, node2, msgToSend, testChannel) 1399 } 1400 1401 func assertEventualEstablishStream(t *testing.T, rpc *cluster.RemoteContext) *cluster.Stream { 1402 var res *cluster.Stream 1403 gt := gomega.NewGomegaWithT(t) 1404 gt.Eventually(func() error { 1405 stream, err := rpc.NewStream(time.Hour) 1406 res = stream 1407 return err 1408 }, timeout).Should(gomega.Succeed()) 1409 return res 1410 } 1411 1412 func assertEventualSendMessage(t *testing.T, rpc *cluster.RemoteContext, req *orderer.SubmitRequest) orderer.Cluster_StepClient { 1413 var res orderer.Cluster_StepClient 1414 gt := gomega.NewGomegaWithT(t) 1415 gt.Eventually(func() error { 1416 stream, err := rpc.NewStream(time.Hour) 1417 if err != nil { 1418 return err 1419 } 1420 res = stream 1421 return stream.Send(wrapSubmitReq(req)) 1422 }, timeout).Should(gomega.Succeed()) 1423 return res 1424 } 1425 1426 func wrapSubmitReq(req *orderer.SubmitRequest) *orderer.StepRequest { 1427 return &orderer.StepRequest{ 1428 Payload: &orderer.StepRequest_SubmitRequest{ 1429 SubmitRequest: req, 1430 }, 1431 } 1432 }