github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/common/cluster/comm_test.go (about) 1 /* 2 Copyright hechain. 2017 All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package cluster_test 8 9 import ( 10 "context" 11 "crypto/rand" 12 "crypto/x509" 13 "fmt" 14 "net" 15 "strings" 16 "sync" 17 "sync/atomic" 18 "testing" 19 "time" 20 21 "github.com/golang/protobuf/proto" 22 "github.com/hechain20/hechain/common/crypto" 23 "github.com/hechain20/hechain/common/crypto/tlsgen" 24 "github.com/hechain20/hechain/common/flogging" 25 "github.com/hechain20/hechain/common/metrics" 26 "github.com/hechain20/hechain/common/metrics/disabled" 27 "github.com/hechain20/hechain/common/metrics/metricsfakes" 28 comm_utils "github.com/hechain20/hechain/internal/pkg/comm" 29 "github.com/hechain20/hechain/orderer/common/cluster" 30 "github.com/hechain20/hechain/orderer/common/cluster/mocks" 31 "github.com/hyperledger/fabric-protos-go/common" 32 "github.com/hyperledger/fabric-protos-go/orderer" 33 "github.com/onsi/gomega" 34 "github.com/pkg/errors" 35 "github.com/stretchr/testify/mock" 36 "github.com/stretchr/testify/require" 37 "go.uber.org/zap" 38 "go.uber.org/zap/zapcore" 39 "google.golang.org/grpc" 40 ) 41 42 const ( 43 testChannel = "test" 44 testChannel2 = "test2" 45 timeout = time.Second * 10 46 ) 47 48 var ( 49 // CA that generates TLS key-pairs. 50 // We use only one CA because the authentication 51 // is based on TLS pinning 52 ca = createCAOrPanic() 53 54 lastNodeID uint64 55 56 testSubReq = &orderer.SubmitRequest{ 57 Channel: "test", 58 } 59 60 testReq = &orderer.SubmitRequest{ 61 Channel: "test", 62 Payload: &common.Envelope{ 63 Payload: []byte("test"), 64 }, 65 } 66 67 testReq2 = &orderer.SubmitRequest{ 68 Channel: testChannel2, 69 Payload: &common.Envelope{ 70 Payload: []byte(testChannel2), 71 }, 72 } 73 74 testRes = &orderer.SubmitResponse{ 75 Info: "test", 76 } 77 78 fooReq = wrapSubmitReq(&orderer.SubmitRequest{ 79 Channel: "foo", 80 }) 81 82 barReq = wrapSubmitReq(&orderer.SubmitRequest{ 83 Channel: "bar", 84 }) 85 86 testConsensusReq = &orderer.StepRequest{ 87 Payload: &orderer.StepRequest_ConsensusRequest{ 88 ConsensusRequest: &orderer.ConsensusRequest{ 89 Payload: []byte{1, 2, 3}, 90 Channel: testChannel, 91 }, 92 }, 93 } 94 95 channelExtractor = &mockChannelExtractor{} 96 ) 97 98 func nextUnusedID() uint64 { 99 return atomic.AddUint64(&lastNodeID, 1) 100 } 101 102 func createCAOrPanic() tlsgen.CA { 103 ca, err := tlsgen.NewCA() 104 if err != nil { 105 panic(fmt.Sprintf("failed creating CA: %+v", err)) 106 } 107 return ca 108 } 109 110 type mockChannelExtractor struct{} 111 112 func (*mockChannelExtractor) TargetChannel(msg proto.Message) string { 113 switch req := msg.(type) { 114 case *orderer.ConsensusRequest: 115 return req.Channel 116 case *orderer.SubmitRequest: 117 return req.Channel 118 default: 119 return "" 120 } 121 } 122 123 type clusterNode struct { 124 lock sync.Mutex 125 frozen bool 126 freezeCond sync.Cond 127 dialer *cluster.PredicateDialer 128 handler *mocks.Handler 129 nodeInfo cluster.RemoteNode 130 srv *comm_utils.GRPCServer 131 bindAddress string 132 clientConfig comm_utils.ClientConfig 133 serverConfig comm_utils.ServerConfig 134 c *cluster.Comm 135 } 136 137 func (cn *clusterNode) Step(stream orderer.Cluster_StepServer) error { 138 cn.waitIfFrozen() 139 req, err := stream.Recv() 140 if err != nil { 141 return err 142 } 143 if submitReq := req.GetSubmitRequest(); submitReq != nil { 144 return cn.c.DispatchSubmit(stream.Context(), submitReq) 145 } 146 if err := cn.c.DispatchConsensus(stream.Context(), req.GetConsensusRequest()); err != nil { 147 return err 148 } 149 return stream.Send(&orderer.StepResponse{}) 150 } 151 152 func (cn *clusterNode) waitIfFrozen() { 153 cn.lock.Lock() 154 // There is no freeze after an unfreeze so no need 155 // for a for loop. 156 if cn.frozen { 157 cn.freezeCond.Wait() 158 return 159 } 160 cn.lock.Unlock() 161 } 162 163 func (cn *clusterNode) freeze() { 164 cn.lock.Lock() 165 defer cn.lock.Unlock() 166 cn.frozen = true 167 } 168 169 func (cn *clusterNode) unfreeze() { 170 cn.lock.Lock() 171 cn.frozen = false 172 cn.lock.Unlock() 173 cn.freezeCond.Broadcast() 174 } 175 176 func (cn *clusterNode) resurrect() { 177 gRPCServer, err := comm_utils.NewGRPCServer(cn.bindAddress, cn.serverConfig) 178 if err != nil { 179 panic(fmt.Errorf("failed starting gRPC server: %v", err)) 180 } 181 cn.srv = gRPCServer 182 orderer.RegisterClusterServer(gRPCServer.Server(), cn) 183 go cn.srv.Start() 184 } 185 186 func (cn *clusterNode) stop() { 187 cn.srv.Stop() 188 cn.c.Shutdown() 189 } 190 191 func (cn *clusterNode) renewCertificates() { 192 clientKeyPair, err := ca.NewClientCertKeyPair() 193 if err != nil { 194 panic(fmt.Errorf("failed creating client certificate %v", err)) 195 } 196 serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1") 197 if err != nil { 198 panic(fmt.Errorf("failed creating server certificate %v", err)) 199 } 200 201 cn.nodeInfo.ClientTLSCert = clientKeyPair.TLSCert.Raw 202 cn.nodeInfo.ServerTLSCert = serverKeyPair.TLSCert.Raw 203 204 cn.serverConfig.SecOpts.Certificate = serverKeyPair.Cert 205 cn.serverConfig.SecOpts.Key = serverKeyPair.Key 206 207 cn.dialer.Config.SecOpts.Key = clientKeyPair.Key 208 cn.dialer.Config.SecOpts.Certificate = clientKeyPair.Cert 209 } 210 211 func newTestNodeWithMetrics(t *testing.T, metrics cluster.MetricsProvider, tlsConnGauge metrics.Gauge) *clusterNode { 212 serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1") 213 require.NoError(t, err) 214 215 clientKeyPair, _ := ca.NewClientCertKeyPair() 216 217 handler := &mocks.Handler{} 218 clientConfig := comm_utils.ClientConfig{ 219 AsyncConnect: true, 220 DialTimeout: time.Hour, 221 SecOpts: comm_utils.SecureOptions{ 222 RequireClientCert: true, 223 Key: clientKeyPair.Key, 224 Certificate: clientKeyPair.Cert, 225 ServerRootCAs: [][]byte{ca.CertBytes()}, 226 UseTLS: true, 227 ClientRootCAs: [][]byte{ca.CertBytes()}, 228 }, 229 } 230 231 dialer := &cluster.PredicateDialer{ 232 Config: clientConfig, 233 } 234 235 srvConfig := comm_utils.ServerConfig{ 236 SecOpts: comm_utils.SecureOptions{ 237 Key: serverKeyPair.Key, 238 Certificate: serverKeyPair.Cert, 239 UseTLS: true, 240 }, 241 } 242 gRPCServer, err := comm_utils.NewGRPCServer("127.0.0.1:", srvConfig) 243 require.NoError(t, err) 244 245 tstSrv := &clusterNode{ 246 dialer: dialer, 247 clientConfig: clientConfig, 248 serverConfig: srvConfig, 249 bindAddress: gRPCServer.Address(), 250 handler: handler, 251 nodeInfo: cluster.RemoteNode{ 252 Endpoint: gRPCServer.Address(), 253 ID: nextUnusedID(), 254 ServerTLSCert: serverKeyPair.TLSCert.Raw, 255 ClientTLSCert: clientKeyPair.TLSCert.Raw, 256 }, 257 srv: gRPCServer, 258 } 259 260 tstSrv.freezeCond.L = &tstSrv.lock 261 262 compareCert := cluster.CachePublicKeyComparisons(func(a, b []byte) bool { 263 return crypto.CertificatesWithSamePublicKey(a, b) == nil 264 }) 265 266 tstSrv.c = &cluster.Comm{ 267 CertExpWarningThreshold: time.Hour, 268 SendBufferSize: 1, 269 Logger: flogging.MustGetLogger("test"), 270 Chan2Members: make(cluster.MembersByChannel), 271 H: handler, 272 ChanExt: channelExtractor, 273 Connections: cluster.NewConnectionStore(dialer, tlsConnGauge), 274 Metrics: cluster.NewMetrics(metrics), 275 CompareCertificate: compareCert, 276 } 277 278 orderer.RegisterClusterServer(gRPCServer.Server(), tstSrv) 279 go gRPCServer.Start() 280 return tstSrv 281 } 282 283 func newTestNode(t *testing.T) *clusterNode { 284 return newTestNodeWithMetrics(t, &disabled.Provider{}, &disabled.Gauge{}) 285 } 286 287 func TestSendBigMessage(t *testing.T) { 288 // Scenario: Basic test that spawns 5 nodes and sends a big message 289 // from one of the nodes to the others. 290 // A receiver node's Step() server side method (which calls Recv) 291 // is frozen until the sender's node Send method returns, 292 // Hence - the sender node finishes calling Send 293 // before a receiver node starts calling Recv. 294 // This ensures that Send is non blocking even with big messages. 295 // In the test, we send a total of 8MB of random data (2MB to each node). 296 // The randomness is used so gRPC compression won't compress it to a lower size. 297 298 node1 := newTestNode(t) 299 node2 := newTestNode(t) 300 node3 := newTestNode(t) 301 node4 := newTestNode(t) 302 node5 := newTestNode(t) 303 304 for _, node := range []*clusterNode{node2, node3, node4, node5} { 305 node.c.SendBufferSize = 1 306 } 307 308 defer node1.stop() 309 defer node2.stop() 310 defer node3.stop() 311 defer node4.stop() 312 defer node5.stop() 313 314 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo, node3.nodeInfo, node4.nodeInfo, node5.nodeInfo} 315 node1.c.Configure(testChannel, config) 316 node2.c.Configure(testChannel, config) 317 node3.c.Configure(testChannel, config) 318 node4.c.Configure(testChannel, config) 319 node5.c.Configure(testChannel, config) 320 321 var messageReceived sync.WaitGroup 322 messageReceived.Add(4) 323 324 msgSize := 1024 * 1024 * 2 325 bigMsg := &orderer.ConsensusRequest{ 326 Channel: testChannel, 327 Payload: make([]byte, msgSize), 328 } 329 330 _, err := rand.Read(bigMsg.Payload) 331 require.NoError(t, err) 332 333 wrappedMsg := &orderer.StepRequest{ 334 Payload: &orderer.StepRequest_ConsensusRequest{ 335 ConsensusRequest: bigMsg, 336 }, 337 } 338 339 for _, node := range []*clusterNode{node2, node3, node4, node5} { 340 node.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 341 msg := args.Get(2).(*orderer.ConsensusRequest) 342 require.Len(t, msg.Payload, msgSize) 343 messageReceived.Done() 344 }).Return(nil) 345 } 346 347 streams := map[uint64]*cluster.Stream{} 348 349 for _, node := range []*clusterNode{node2, node3, node4, node5} { 350 // Freeze the node, in order to block its Recv 351 node.freeze() 352 } 353 354 for _, node := range []*clusterNode{node2, node3, node4, node5} { 355 rm, err := node1.c.Remote(testChannel, node.nodeInfo.ID) 356 require.NoError(t, err) 357 358 stream := assertEventualEstablishStream(t, rm) 359 streams[node.nodeInfo.ID] = stream 360 } 361 362 t0 := time.Now() 363 for _, node := range []*clusterNode{node2, node3, node4, node5} { 364 stream := streams[node.nodeInfo.ID] 365 366 t1 := time.Now() 367 err = stream.Send(wrappedMsg) 368 require.NoError(t, err) 369 t.Log("Sending took", time.Since(t1)) 370 371 // Unfreeze the node. It can now call Recv, and signal the messageReceived waitGroup. 372 node.unfreeze() 373 } 374 375 t.Log("Total sending time to all 4 nodes took:", time.Since(t0)) 376 377 messageReceived.Wait() 378 } 379 380 func TestBlockingSend(t *testing.T) { 381 // Scenario: Basic test that spawns 2 nodes and sends from the first node 382 // to the second node, three SubmitRequests, or three consensus requests. 383 // SubmitRequests should block, but consensus requests should not. 384 385 for _, testCase := range []struct { 386 description string 387 messageToSend *orderer.StepRequest 388 streamUnblocks bool 389 elapsedGreaterThan time.Duration 390 overflowErr string 391 }{ 392 { 393 description: "SubmitRequest", 394 messageToSend: wrapSubmitReq(testReq), 395 streamUnblocks: true, 396 elapsedGreaterThan: time.Second / 2, 397 }, 398 { 399 description: "ConsensusRequest", 400 messageToSend: testConsensusReq, 401 overflowErr: "send queue overflown", 402 }, 403 } { 404 t.Run(testCase.description, func(t *testing.T) { 405 node1 := newTestNode(t) 406 node2 := newTestNode(t) 407 408 node1.c.SendBufferSize = 1 409 node2.c.SendBufferSize = 1 410 411 defer node1.stop() 412 defer node2.stop() 413 414 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 415 node1.c.Configure(testChannel, config) 416 node2.c.Configure(testChannel, config) 417 418 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 419 require.NoError(t, err) 420 421 client := &mocks.ClusterClient{} 422 fakeStream := &mocks.StepClient{} 423 424 // Replace real client with a mock client 425 rm.Client = client 426 rm.ProbeConn = func(_ *grpc.ClientConn) error { 427 return nil 428 } 429 // Configure client to return the mock stream 430 fakeStream.On("Context", mock.Anything).Return(context.Background()) 431 client.On("Step", mock.Anything).Return(fakeStream, nil).Once() 432 433 unBlock := make(chan struct{}) 434 var sendInvoked sync.WaitGroup 435 sendInvoked.Add(1) 436 var once sync.Once 437 fakeStream.On("Send", mock.Anything).Run(func(_ mock.Arguments) { 438 once.Do(sendInvoked.Done) 439 <-unBlock 440 }).Return(errors.New("oops")) 441 442 stream, err := rm.NewStream(time.Hour) 443 require.NoError(t, err) 444 445 // The first send doesn't block, even though the Send operation blocks. 446 err = stream.Send(testCase.messageToSend) 447 require.NoError(t, err) 448 449 // The second once doesn't either. 450 // After this point, we have 1 goroutine which is blocked on Send(), 451 // and one message in the buffer. 452 sendInvoked.Wait() 453 err = stream.Send(testCase.messageToSend) 454 require.NoError(t, err) 455 456 // The third blocks, so we need to unblock it ourselves 457 // in order for it to go through, unless the operation 458 // is non blocking. 459 go func() { 460 time.Sleep(time.Second) 461 if testCase.streamUnblocks { 462 close(unBlock) 463 } 464 }() 465 466 t1 := time.Now() 467 err = stream.Send(testCase.messageToSend) 468 // The third send always overflows or blocks. 469 // If we expect to receive an overflow error - assert it. 470 if testCase.overflowErr != "" { 471 require.EqualError(t, err, testCase.overflowErr) 472 } 473 elapsed := time.Since(t1) 474 t.Log("Elapsed time:", elapsed) 475 require.True(t, elapsed > testCase.elapsedGreaterThan) 476 477 if !testCase.streamUnblocks { 478 close(unBlock) 479 } 480 }) 481 } 482 } 483 484 func TestBasic(t *testing.T) { 485 // Scenario: Basic test that spawns 2 nodes and sends each other 486 // messages that are expected to be echoed back 487 488 node1 := newTestNode(t) 489 node2 := newTestNode(t) 490 491 defer node1.stop() 492 defer node2.stop() 493 494 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 495 node1.c.Configure(testChannel, config) 496 node2.c.Configure(testChannel, config) 497 498 assertBiDiCommunication(t, node1, node2, testReq) 499 } 500 501 func TestUnavailableHosts(t *testing.T) { 502 // Scenario: A node is configured to connect 503 // to a host that is down 504 node1 := newTestNode(t) 505 506 clientConfig := node1.dialer.Config 507 // The below timeout makes sure that connection establishment is done 508 // asynchronously. Had it been synchronous, the Remote() call would be 509 // blocked for an hour. 510 clientConfig.DialTimeout = time.Hour 511 defer node1.stop() 512 513 node2 := newTestNode(t) 514 node2.stop() 515 516 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 517 remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 518 require.NoError(t, err) 519 require.NotNil(t, remote) 520 521 _, err = remote.NewStream(time.Millisecond * 100) 522 require.Contains(t, err.Error(), "connection") 523 } 524 525 func TestStreamAbortReportCorrectError(t *testing.T) { 526 // Scenario: node 1 acquires a stream to node 2 and then the stream 527 // encounters an error and as a result, the stream is aborted. 528 // We ensure the error reported is the first error, even after 529 // multiple attempts of using it. 530 531 node1 := newTestNode(t) 532 defer node1.stop() 533 534 node2 := newTestNode(t) 535 defer node2.stop() 536 537 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 538 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 539 540 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(errors.Errorf("whoops")).Once() 541 542 rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 543 require.NoError(t, err) 544 var streamTerminated sync.WaitGroup 545 streamTerminated.Add(1) 546 547 stream := assertEventualEstablishStream(t, rm1) 548 549 l, err := zap.NewDevelopment() 550 require.NoError(t, err) 551 stream.Logger = flogging.NewFabricLogger(l, zap.Hooks(func(entry zapcore.Entry) error { 552 if strings.Contains(entry.Message, "Stream 1 to") && strings.Contains(entry.Message, "terminated") { 553 streamTerminated.Done() 554 } 555 return nil 556 })) 557 558 // Probe the stream for the first time 559 err = stream.Send(wrapSubmitReq(testReq)) 560 require.NoError(t, err) 561 562 // We should receive back the crafted error 563 _, err = stream.Recv() 564 require.Contains(t, err.Error(), "whoops") 565 566 // Wait for the stream to be terminated from within the communication infrastructure 567 streamTerminated.Wait() 568 569 // We should still receive the original crafted error despite the stream being terminated 570 err = stream.Send(wrapSubmitReq(testReq)) 571 require.Contains(t, err.Error(), "whoops") 572 } 573 574 func TestStreamAbort(t *testing.T) { 575 // Scenarios: node 1 is connected to node 2 in 2 channels, 576 // and the consumer of the communication calls receive. 577 // The two sub-scenarios happen: 578 // 1) The server certificate of node 2 changes in the first channel 579 // 2) Node 2 is evicted from the membership of the first channel 580 // In both of the scenarios, the Recv() call should be aborted 581 582 node2 := newTestNode(t) 583 defer node2.stop() 584 585 invalidNodeInfo := cluster.RemoteNode{ 586 ID: node2.nodeInfo.ID, 587 ServerTLSCert: []byte{1, 2, 3}, 588 ClientTLSCert: []byte{1, 2, 3}, 589 } 590 591 for _, tst := range []struct { 592 testName string 593 membership []cluster.RemoteNode 594 expectedError string 595 }{ 596 { 597 testName: "Evicted from membership", 598 membership: nil, 599 expectedError: "rpc error: code = Canceled desc = context canceled", 600 }, 601 { 602 testName: "Changed TLS certificate", 603 membership: []cluster.RemoteNode{invalidNodeInfo}, 604 expectedError: "rpc error: code = Canceled desc = context canceled", 605 }, 606 } { 607 t.Run(tst.testName, func(t *testing.T) { 608 testStreamAbort(t, node2, tst.membership, tst.expectedError) 609 }) 610 } 611 node2.handler.AssertNumberOfCalls(t, "OnSubmit", 2) 612 } 613 614 func testStreamAbort(t *testing.T, node2 *clusterNode, newMembership []cluster.RemoteNode, expectedError string) { 615 node1 := newTestNode(t) 616 defer node1.stop() 617 618 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 619 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 620 node1.c.Configure(testChannel2, []cluster.RemoteNode{node2.nodeInfo}) 621 node2.c.Configure(testChannel2, []cluster.RemoteNode{node1.nodeInfo}) 622 623 var streamCreated sync.WaitGroup 624 streamCreated.Add(1) 625 626 stopChan := make(chan struct{}) 627 628 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Once().Run(func(_ mock.Arguments) { 629 // Notify the stream was created 630 streamCreated.Done() 631 // Wait for the test to finish 632 <-stopChan 633 }).Return(nil).Once() 634 635 rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 636 require.NoError(t, err) 637 638 go func() { 639 stream := assertEventualEstablishStream(t, rm1) 640 // Signal the reconfiguration 641 err = stream.Send(wrapSubmitReq(testReq)) 642 require.NoError(t, err) 643 _, err := stream.Recv() 644 require.Contains(t, err.Error(), expectedError) 645 close(stopChan) 646 }() 647 648 go func() { 649 // Wait for the stream reference to be obtained 650 streamCreated.Wait() 651 // Reconfigure the channel membership 652 node1.c.Configure(testChannel, newMembership) 653 }() 654 655 <-stopChan 656 } 657 658 func TestDoubleReconfigure(t *testing.T) { 659 // Scenario: Basic test that spawns 2 nodes 660 // and configures node 1 twice, and checks that 661 // the remote stub for node 1 wasn't re-created in the second 662 // configuration since it already existed 663 664 node1 := newTestNode(t) 665 node2 := newTestNode(t) 666 667 defer node1.stop() 668 defer node2.stop() 669 670 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 671 rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 672 require.NoError(t, err) 673 674 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 675 rm2, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 676 require.NoError(t, err) 677 // Ensure the references are equal 678 require.True(t, rm1 == rm2) 679 } 680 681 func TestInvalidChannel(t *testing.T) { 682 // Scenario: node 1 it ordered to send a message on a channel 683 // that doesn't exist, and also receives a message, but 684 // the channel cannot be extracted from the message. 685 686 t.Run("channel doesn't exist", func(t *testing.T) { 687 node1 := newTestNode(t) 688 defer node1.stop() 689 690 _, err := node1.c.Remote(testChannel, 0) 691 require.EqualError(t, err, "channel test doesn't exist") 692 }) 693 694 t.Run("channel cannot be extracted", func(t *testing.T) { 695 node1 := newTestNode(t) 696 defer node1.stop() 697 698 node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 699 gt := gomega.NewGomegaWithT(t) 700 gt.Eventually(func() (bool, error) { 701 _, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 702 return true, err 703 }, time.Minute).Should(gomega.BeTrue()) 704 705 stub, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 706 require.NoError(t, err) 707 708 stream := assertEventualEstablishStream(t, stub) 709 710 // An empty SubmitRequest has an empty channel which is invalid 711 err = stream.Send(wrapSubmitReq(&orderer.SubmitRequest{})) 712 require.NoError(t, err) 713 714 _, err = stream.Recv() 715 require.EqualError(t, err, "rpc error: code = Unknown desc = badly formatted message, cannot extract channel") 716 717 // Test directly without going through the gRPC stream 718 err = node1.c.DispatchSubmit(context.Background(), &orderer.SubmitRequest{}) 719 require.EqualError(t, err, "badly formatted message, cannot extract channel") 720 }) 721 } 722 723 func TestAbortRPC(t *testing.T) { 724 // Scenarios: 725 // (I) The node calls an RPC, and calls Abort() on the remote context 726 // in parallel. The RPC should return even though the server-side call hasn't finished. 727 // (II) The node calls an RPC, but the server-side processing takes too long, 728 // and the RPC invocation returns prematurely. 729 730 testCases := []struct { 731 name string 732 abortFunc func(*cluster.RemoteContext) 733 rpcTimeout time.Duration 734 expectedErr string 735 }{ 736 { 737 name: "Abort() called", 738 expectedErr: "rpc error: code = Canceled desc = context canceled", 739 rpcTimeout: time.Hour, 740 abortFunc: func(rc *cluster.RemoteContext) { 741 rc.Abort() 742 }, 743 }, 744 { 745 name: "RPC timeout", 746 expectedErr: "rpc timeout expired", 747 rpcTimeout: time.Second, 748 abortFunc: func(*cluster.RemoteContext) {}, 749 }, 750 } 751 752 for _, testCase := range testCases { 753 testCase := testCase 754 t.Run(testCase.name, func(t *testing.T) { 755 testAbort(t, testCase.abortFunc, testCase.rpcTimeout, testCase.expectedErr) 756 }) 757 } 758 } 759 760 func testAbort(t *testing.T, abortFunc func(*cluster.RemoteContext), rpcTimeout time.Duration, expectedErr string) { 761 node1 := newTestNode(t) 762 defer node1.stop() 763 764 node2 := newTestNode(t) 765 defer node2.stop() 766 767 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 768 node1.c.Configure(testChannel, config) 769 node2.c.Configure(testChannel, config) 770 var onStepCalled sync.WaitGroup 771 onStepCalled.Add(1) 772 773 // stuckCall ensures the OnStep() call is stuck throughout this test 774 var stuckCall sync.WaitGroup 775 stuckCall.Add(1) 776 // At the end of the test, release the server-side resources 777 defer stuckCall.Done() 778 779 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(_ mock.Arguments) { 780 onStepCalled.Done() 781 stuckCall.Wait() 782 }).Once() 783 784 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 785 require.NoError(t, err) 786 787 go func() { 788 onStepCalled.Wait() 789 abortFunc(rm) 790 }() 791 792 var stream *cluster.Stream 793 gt := gomega.NewGomegaWithT(t) 794 gt.Eventually(func() error { 795 stream, err = rm.NewStream(rpcTimeout) 796 return err 797 }, time.Second*10, time.Millisecond*10).Should(gomega.Succeed()) 798 799 stream.Send(wrapSubmitReq(testSubReq)) 800 _, err = stream.Recv() 801 802 require.EqualError(t, err, expectedErr) 803 804 node2.handler.AssertNumberOfCalls(t, "OnSubmit", 1) 805 } 806 807 func TestNoTLSCertificate(t *testing.T) { 808 // Scenario: The node is sent a message by another node that doesn't 809 // connect with mutual TLS, thus doesn't provide a TLS certificate 810 node1 := newTestNode(t) 811 defer node1.stop() 812 813 node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 814 815 clientConfig := comm_utils.ClientConfig{ 816 AsyncConnect: true, 817 DialTimeout: time.Millisecond * 100, 818 SecOpts: comm_utils.SecureOptions{ 819 ServerRootCAs: [][]byte{ca.CertBytes()}, 820 UseTLS: true, 821 }, 822 } 823 824 var conn *grpc.ClientConn 825 gt := gomega.NewGomegaWithT(t) 826 gt.Eventually(func() (bool, error) { 827 var err error 828 conn, err = clientConfig.Dial(node1.srv.Address()) 829 return true, err 830 }, time.Minute).Should(gomega.BeTrue()) 831 832 echoClient := orderer.NewClusterClient(conn) 833 stream, err := echoClient.Step(context.Background()) 834 require.NoError(t, err) 835 836 err = stream.Send(wrapSubmitReq(testSubReq)) 837 require.NoError(t, err) 838 _, err = stream.Recv() 839 require.EqualError(t, err, "rpc error: code = Unknown desc = no TLS certificate sent") 840 } 841 842 func TestReconnect(t *testing.T) { 843 // Scenario: node 1 and node 2 are connected, 844 // and node 2 is taken offline. 845 // Node 1 tries to send a message to node 2 but fails, 846 // and afterwards node 2 is brought back, after which 847 // node 1 sends more messages, and it should succeed 848 // sending a message to node 2 eventually. 849 850 node1 := newTestNode(t) 851 defer node1.stop() 852 conf := node1.dialer.Config 853 conf.DialTimeout = time.Hour 854 855 node2 := newTestNode(t) 856 node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil) 857 defer node2.stop() 858 859 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 860 node1.c.Configure(testChannel, config) 861 node2.c.Configure(testChannel, config) 862 863 // Make node 2 be offline by shutting down its gRPC service 864 node2.srv.Stop() 865 // Obtain the stub for node 2. 866 // Should succeed, because the connection was created at time of configuration 867 stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 868 require.NoError(t, err) 869 870 // Try to obtain a stream. Should not Succeed. 871 gt := gomega.NewGomegaWithT(t) 872 gt.Eventually(func() error { 873 _, err = stub.NewStream(time.Hour) 874 return err 875 }).Should(gomega.Not(gomega.Succeed())) 876 877 // Wait for the port to be released 878 for { 879 lsnr, err := net.Listen("tcp", node2.nodeInfo.Endpoint) 880 if err == nil { 881 lsnr.Close() 882 break 883 } 884 } 885 886 // Resurrect node 2 887 node2.resurrect() 888 // Send a message from node 1 to node 2. 889 // Should succeed eventually 890 assertEventualSendMessage(t, stub, testReq) 891 } 892 893 func TestRenewCertificates(t *testing.T) { 894 // Scenario: node 1 and node 2 are connected, 895 // Node 2's certificate is renewed, and 896 // node 1 is reconfigured with the new 897 // configuration without being restarted. 898 899 node1 := newTestNode(t) 900 defer node1.stop() 901 902 node2 := newTestNode(t) 903 defer node2.stop() 904 905 node1.handler.On("OnStep", testChannel, node2.nodeInfo.ID, mock.Anything).Return(testRes, nil) 906 node2.handler.On("OnStep", testChannel, node1.nodeInfo.ID, mock.Anything).Return(testRes, nil) 907 908 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 909 node1.c.Configure(testChannel, config) 910 node2.c.Configure(testChannel, config) 911 912 assertBiDiCommunication(t, node1, node2, testReq) 913 914 // Close outgoing connections from node2 to node1 915 node2.c.Configure(testChannel, nil) 916 // Stop the gRPC service of node 2 to replace its certificate 917 node2.srv.Stop() 918 919 // Wait until node 1 detects this 920 gt := gomega.NewGomegaWithT(t) 921 gt.Eventually(func() error { 922 remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 923 if err != nil { 924 return err 925 } 926 stream, err := remote.NewStream(time.Hour) 927 if err != nil { 928 return err 929 } 930 err = stream.Send(wrapSubmitReq(testSubReq)) 931 if err != nil { 932 return err 933 } 934 return nil 935 }).Should(gomega.Not(gomega.Succeed())) 936 937 // Renew node 2's keys 938 node2.renewCertificates() 939 940 // Resurrect node 2 to make it service connections again 941 node2.resurrect() 942 943 // W.L.O.G, try to send a message from node1 to node2 944 // It should fail, because node2's server certificate has now changed, 945 // so it closed the connection to the remote node 946 info2 := node2.nodeInfo 947 remote, err := node1.c.Remote(testChannel, info2.ID) 948 require.NoError(t, err) 949 require.NotNil(t, remote) 950 _, err = remote.NewStream(time.Hour) 951 require.Contains(t, err.Error(), info2.Endpoint) 952 953 // Reconfigure both nodes with the updates keys 954 config = []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 955 node1.c.Configure(testChannel, config) 956 node2.c.Configure(testChannel, config) 957 958 // Finally, check that the nodes can communicate once again 959 assertBiDiCommunication(t, node1, node2, testReq) 960 } 961 962 func TestMembershipReconfiguration(t *testing.T) { 963 // Scenario: node 1 and node 2 are started up 964 // and node 2 is configured to know about node 1, 965 // without node1 knowing about node 2. 966 // The communication between them should only work 967 // after node 1 is configured to know about node 2. 968 969 node1 := newTestNode(t) 970 defer node1.stop() 971 972 node2 := newTestNode(t) 973 defer node2.stop() 974 975 node1.c.Configure(testChannel, []cluster.RemoteNode{}) 976 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 977 978 // Node 1 can't connect to node 2 because it doesn't know its TLS certificate yet 979 _, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 980 require.EqualError(t, err, fmt.Sprintf("node %d doesn't exist in channel test's membership", node2.nodeInfo.ID)) 981 // Node 2 can connect to node 1, but it can't send it messages because node 1 doesn't know node 2 yet. 982 983 gt := gomega.NewGomegaWithT(t) 984 gt.Eventually(func() (bool, error) { 985 _, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 986 return true, err 987 }, time.Minute).Should(gomega.BeTrue()) 988 989 stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 990 require.NoError(t, err) 991 992 stream := assertEventualEstablishStream(t, stub) 993 err = stream.Send(wrapSubmitReq(testSubReq)) 994 require.NoError(t, err) 995 996 _, err = stream.Recv() 997 require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 998 999 // Next, configure node 1 to know about node 2 1000 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 1001 1002 // Check that the communication works correctly between both nodes 1003 assertBiDiCommunication(t, node1, node2, testReq) 1004 assertBiDiCommunication(t, node2, node1, testReq) 1005 1006 // Reconfigure node 2 to forget about node 1 1007 node2.c.Configure(testChannel, []cluster.RemoteNode{}) 1008 // Node 1 can still connect to node 2 1009 stub, err = node1.c.Remote(testChannel, node2.nodeInfo.ID) 1010 require.NoError(t, err) 1011 // But can't send a message because node 2 now doesn't authorized node 1 1012 stream = assertEventualEstablishStream(t, stub) 1013 stream.Send(wrapSubmitReq(testSubReq)) 1014 _, err = stream.Recv() 1015 require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 1016 } 1017 1018 func TestShutdown(t *testing.T) { 1019 // Scenario: node 1 is shut down and as a result, can't 1020 // send messages to anyone, nor can it be reconfigured 1021 1022 node1 := newTestNode(t) 1023 defer node1.stop() 1024 1025 node1.c.Shutdown() 1026 1027 // Obtaining a RemoteContext cannot succeed because shutdown was called before 1028 _, err := node1.c.Remote(testChannel, node1.nodeInfo.ID) 1029 require.EqualError(t, err, "communication has been shut down") 1030 1031 node2 := newTestNode(t) 1032 defer node2.stop() 1033 1034 node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo}) 1035 // Configuration of node doesn't take place 1036 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 1037 1038 gt := gomega.NewGomegaWithT(t) 1039 gt.Eventually(func() error { 1040 _, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 1041 return err 1042 }, time.Minute).Should(gomega.Succeed()) 1043 1044 stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID) 1045 require.NoError(t, err) 1046 1047 // Therefore, sending a message doesn't succeed because node 1 rejected the configuration change 1048 gt.Eventually(func() string { 1049 stream, err := stub.NewStream(time.Hour) 1050 if err != nil { 1051 return err.Error() 1052 } 1053 err = stream.Send(wrapSubmitReq(testSubReq)) 1054 require.NoError(t, err) 1055 1056 _, err = stream.Recv() 1057 return err.Error() 1058 }, timeout).Should(gomega.ContainSubstring("channel test doesn't exist")) 1059 } 1060 1061 func TestMultiChannelConfig(t *testing.T) { 1062 // Scenario: node 1 is knows node 2 only in channel "foo" 1063 // and knows node 3 only in channel "bar". 1064 // Messages that are received, are routed according to their corresponding channels 1065 // and when node 2 sends a message for channel "bar" to node 1, it is rejected. 1066 // Same thing applies for node 3 that sends a message to node 1 in channel "foo". 1067 1068 node1 := newTestNode(t) 1069 defer node1.stop() 1070 1071 node2 := newTestNode(t) 1072 defer node2.stop() 1073 1074 node3 := newTestNode(t) 1075 defer node3.stop() 1076 1077 node1.c.Configure("foo", []cluster.RemoteNode{node2.nodeInfo}) 1078 node1.c.Configure("bar", []cluster.RemoteNode{node3.nodeInfo}) 1079 node2.c.Configure("foo", []cluster.RemoteNode{node1.nodeInfo}) 1080 node3.c.Configure("bar", []cluster.RemoteNode{node1.nodeInfo}) 1081 1082 t.Run("Correct channel", func(t *testing.T) { 1083 var fromNode2 sync.WaitGroup 1084 fromNode2.Add(1) 1085 node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) { 1086 fromNode2.Done() 1087 }).Once() 1088 1089 var fromNode3 sync.WaitGroup 1090 fromNode3.Add(1) 1091 node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) { 1092 fromNode3.Done() 1093 }).Once() 1094 1095 node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID) 1096 require.NoError(t, err) 1097 node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID) 1098 require.NoError(t, err) 1099 1100 stream := assertEventualEstablishStream(t, node2toNode1) 1101 stream.Send(fooReq) 1102 1103 fromNode2.Wait() 1104 node1.handler.AssertNumberOfCalls(t, "OnSubmit", 1) 1105 1106 stream = assertEventualEstablishStream(t, node3toNode1) 1107 stream.Send(barReq) 1108 1109 fromNode3.Wait() 1110 node1.handler.AssertNumberOfCalls(t, "OnSubmit", 2) 1111 }) 1112 1113 t.Run("Incorrect channel", func(t *testing.T) { 1114 node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil) 1115 node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil) 1116 1117 node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID) 1118 require.NoError(t, err) 1119 node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID) 1120 require.NoError(t, err) 1121 1122 assertEventualSendMessage(t, node2toNode1, &orderer.SubmitRequest{Channel: "foo"}) 1123 require.NoError(t, err) 1124 stream, err := node2toNode1.NewStream(time.Hour) 1125 require.NoError(t, err) 1126 err = stream.Send(barReq) 1127 require.NoError(t, err) 1128 _, err = stream.Recv() 1129 require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 1130 1131 assertEventualSendMessage(t, node3toNode1, &orderer.SubmitRequest{Channel: "bar"}) 1132 stream, err = node3toNode1.NewStream(time.Hour) 1133 require.NoError(t, err) 1134 err = stream.Send(fooReq) 1135 require.NoError(t, err) 1136 _, err = stream.Recv() 1137 require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized") 1138 }) 1139 } 1140 1141 func TestConnectionFailure(t *testing.T) { 1142 // Scenario: node 1 fails to connect to node 2. 1143 1144 node1 := newTestNode(t) 1145 defer node1.stop() 1146 1147 node2 := newTestNode(t) 1148 defer node2.stop() 1149 1150 dialer := &mocks.SecureDialer{} 1151 dialer.On("Dial", mock.Anything, mock.Anything).Return(nil, errors.New("oops")) 1152 node1.c.Connections = cluster.NewConnectionStore(dialer, &disabled.Gauge{}) 1153 node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo}) 1154 1155 _, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1156 require.EqualError(t, err, "oops") 1157 } 1158 1159 type testMetrics struct { 1160 fakeProvider *mocks.MetricsProvider 1161 egressQueueLength metricsfakes.Gauge 1162 egressQueueCapacity metricsfakes.Gauge 1163 egressStreamCount metricsfakes.Gauge 1164 egressTLSConnCount metricsfakes.Gauge 1165 egressWorkerSize metricsfakes.Gauge 1166 ingressStreamsCount metricsfakes.Gauge 1167 msgSendTime metricsfakes.Histogram 1168 msgDropCount metricsfakes.Counter 1169 } 1170 1171 func (tm *testMetrics) initialize() { 1172 tm.egressQueueLength.WithReturns(&tm.egressQueueLength) 1173 tm.egressQueueCapacity.WithReturns(&tm.egressQueueCapacity) 1174 tm.egressStreamCount.WithReturns(&tm.egressStreamCount) 1175 tm.egressTLSConnCount.WithReturns(&tm.egressTLSConnCount) 1176 tm.egressWorkerSize.WithReturns(&tm.egressWorkerSize) 1177 tm.ingressStreamsCount.WithReturns(&tm.ingressStreamsCount) 1178 tm.msgSendTime.WithReturns(&tm.msgSendTime) 1179 tm.msgDropCount.WithReturns(&tm.msgDropCount) 1180 1181 fakeProvider := tm.fakeProvider 1182 fakeProvider.On("NewGauge", cluster.IngressStreamsCountOpts).Return(&tm.ingressStreamsCount) 1183 fakeProvider.On("NewGauge", cluster.EgressQueueLengthOpts).Return(&tm.egressQueueLength) 1184 fakeProvider.On("NewGauge", cluster.EgressQueueCapacityOpts).Return(&tm.egressQueueCapacity) 1185 fakeProvider.On("NewGauge", cluster.EgressStreamsCountOpts).Return(&tm.egressStreamCount) 1186 fakeProvider.On("NewGauge", cluster.EgressTLSConnectionCountOpts).Return(&tm.egressTLSConnCount) 1187 fakeProvider.On("NewGauge", cluster.EgressWorkersOpts).Return(&tm.egressWorkerSize) 1188 fakeProvider.On("NewCounter", cluster.MessagesDroppedCountOpts).Return(&tm.msgDropCount) 1189 fakeProvider.On("NewHistogram", cluster.MessageSendTimeOpts).Return(&tm.msgSendTime) 1190 } 1191 1192 func TestMetrics(t *testing.T) { 1193 for _, testCase := range []struct { 1194 name string 1195 runTest func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) 1196 testMetrics *testMetrics 1197 }{ 1198 { 1199 name: "EgressQueueOccupancy", 1200 runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) { 1201 assertBiDiCommunication(t, node1, node2, testReq) 1202 require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "transaction", "channel", testChannel}, 1203 testMetrics.egressQueueLength.WithArgsForCall(0)) 1204 require.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(0)) 1205 require.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(0)) 1206 1207 var messageReceived sync.WaitGroup 1208 messageReceived.Add(1) 1209 node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 1210 messageReceived.Done() 1211 }).Return(nil) 1212 1213 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1214 require.NoError(t, err) 1215 1216 stream := assertEventualEstablishStream(t, rm) 1217 stream.Send(testConsensusReq) 1218 messageReceived.Wait() 1219 1220 require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "consensus", "channel", testChannel}, 1221 testMetrics.egressQueueLength.WithArgsForCall(1)) 1222 require.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(1)) 1223 require.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(1)) 1224 }, 1225 }, 1226 { 1227 name: "EgressStreamsCount", 1228 runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) { 1229 assertBiDiCommunication(t, node1, node2, testReq) 1230 require.Equal(t, 1, testMetrics.egressStreamCount.SetCallCount()) 1231 require.Equal(t, 1, testMetrics.egressStreamCount.WithCallCount()) 1232 require.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1233 1234 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1235 require.Equal(t, 2, testMetrics.egressStreamCount.SetCallCount()) 1236 require.Equal(t, 2, testMetrics.egressStreamCount.WithCallCount()) 1237 require.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1238 }, 1239 }, 1240 { 1241 name: "EgressTLSConnCount", 1242 runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) { 1243 assertBiDiCommunication(t, node1, node2, testReq) 1244 require.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1245 1246 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1247 require.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1248 1249 // A single TLS connection despite 2 streams 1250 require.Equal(t, float64(1), testMetrics.egressTLSConnCount.SetArgsForCall(0)) 1251 require.Equal(t, 1, testMetrics.egressTLSConnCount.SetCallCount()) 1252 }, 1253 }, 1254 { 1255 name: "EgressWorkerSize", 1256 runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) { 1257 assertBiDiCommunication(t, node1, node2, testReq) 1258 require.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0)) 1259 1260 assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2) 1261 require.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1)) 1262 1263 require.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(0)) 1264 require.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(1)) 1265 }, 1266 }, 1267 { 1268 name: "MsgSendTime", 1269 runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) { 1270 assertBiDiCommunication(t, node1, node2, testReq) 1271 require.Eventually(t, func() bool { return testMetrics.msgSendTime.ObserveCallCount() > 0 }, time.Second, 10*time.Millisecond) 1272 require.Equal(t, 1, testMetrics.msgSendTime.ObserveCallCount()) 1273 require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel}, testMetrics.msgSendTime.WithArgsForCall(0)) 1274 }, 1275 }, 1276 { 1277 name: "MsgDropCount", 1278 runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) { 1279 blockRecv := make(chan struct{}) 1280 wasReported := func() bool { 1281 select { 1282 case <-blockRecv: 1283 return true 1284 default: 1285 return false 1286 } 1287 } 1288 // When the drop count is reported, release the lock on the server side receive operation. 1289 testMetrics.msgDropCount.AddStub = func(float642 float64) { 1290 if !wasReported() { 1291 close(blockRecv) 1292 } 1293 } 1294 1295 node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) { 1296 // Block until the message drop is reported 1297 <-blockRecv 1298 }).Return(nil) 1299 1300 rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1301 require.NoError(t, err) 1302 1303 stream := assertEventualEstablishStream(t, rm) 1304 // Send too many messages while the server side is not reading from the stream 1305 for { 1306 stream.Send(testConsensusReq) 1307 if wasReported() { 1308 break 1309 } 1310 } 1311 require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel}, 1312 testMetrics.msgDropCount.WithArgsForCall(0)) 1313 require.Equal(t, 1, testMetrics.msgDropCount.AddCallCount()) 1314 }, 1315 }, 1316 } { 1317 testCase := testCase 1318 t.Run(testCase.name, func(t *testing.T) { 1319 fakeProvider := &mocks.MetricsProvider{} 1320 testCase.testMetrics = &testMetrics{ 1321 fakeProvider: fakeProvider, 1322 } 1323 1324 testCase.testMetrics.initialize() 1325 1326 node1 := newTestNodeWithMetrics(t, fakeProvider, &testCase.testMetrics.egressTLSConnCount) 1327 defer node1.stop() 1328 1329 node2 := newTestNode(t) 1330 defer node2.stop() 1331 1332 configForNode1 := []cluster.RemoteNode{node2.nodeInfo} 1333 configForNode2 := []cluster.RemoteNode{node1.nodeInfo} 1334 node1.c.Configure(testChannel, configForNode1) 1335 node2.c.Configure(testChannel, configForNode2) 1336 node1.c.Configure(testChannel2, configForNode1) 1337 node2.c.Configure(testChannel2, configForNode2) 1338 1339 testCase.runTest(t, node1, node2, testCase.testMetrics) 1340 }) 1341 } 1342 } 1343 1344 func TestCertExpirationWarningEgress(t *testing.T) { 1345 // Scenario: Ensures that when certificates are due to expire, 1346 // a warning is logged to the log. 1347 1348 node1 := newTestNode(t) 1349 node2 := newTestNode(t) 1350 1351 cert, err := x509.ParseCertificate(node2.nodeInfo.ServerTLSCert) 1352 require.NoError(t, err) 1353 require.NotNil(t, cert) 1354 1355 // Let the NotAfter time of the certificate be T1, the current time be T0. 1356 // So time.Until is (T1 - T0), which means we have (T1 - T0) time left. 1357 // We want to trigger a warning, so we set the warning threshold to be 20 seconds above 1358 // the time left, so the time left would be smaller than the threshold. 1359 node1.c.CertExpWarningThreshold = time.Until(cert.NotAfter) + time.Second*20 1360 // We only alert once in 3 seconds 1361 node1.c.MinimumExpirationWarningInterval = time.Second * 3 1362 1363 defer node1.stop() 1364 defer node2.stop() 1365 1366 config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo} 1367 node1.c.Configure(testChannel, config) 1368 node2.c.Configure(testChannel, config) 1369 1370 stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID) 1371 require.NoError(t, err) 1372 1373 mockgRPC := &mocks.StepClient{} 1374 mockgRPC.On("Send", mock.Anything).Return(nil) 1375 mockgRPC.On("Context").Return(context.Background()) 1376 mockClient := &mocks.ClusterClient{} 1377 mockClient.On("Step", mock.Anything).Return(mockgRPC, nil) 1378 1379 stub.Client = mockClient 1380 1381 stream := assertEventualEstablishStream(t, stub) 1382 1383 alerts := make(chan struct{}, 100) 1384 1385 stream.Logger = stream.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error { 1386 if strings.Contains(entry.Message, "expires in less than") { 1387 alerts <- struct{}{} 1388 } 1389 return nil 1390 })) 1391 1392 // Send a message to the node and expert an alert to be logged. 1393 stream.Send(wrapSubmitReq(testReq)) 1394 select { 1395 case <-alerts: 1396 case <-time.After(time.Second * 5): 1397 t.Fatal("Should have logged an alert") 1398 } 1399 // Send another message, and ensure we don't log anything to the log, because the 1400 // alerts should be suppressed before the minimum interval timeout expires. 1401 stream.Send(wrapSubmitReq(testReq)) 1402 select { 1403 case <-alerts: 1404 t.Fatal("Should not have logged an alert") 1405 case <-time.After(time.Millisecond * 500): 1406 } 1407 // Wait enough time for the alert interval to clear. 1408 time.Sleep(node1.c.MinimumExpirationWarningInterval + time.Second) 1409 // Send again a message, and this time it should be logged again. 1410 stream.Send(wrapSubmitReq(testReq)) 1411 select { 1412 case <-alerts: 1413 case <-time.After(time.Second * 5): 1414 t.Fatal("Should have logged an alert") 1415 } 1416 } 1417 1418 func assertBiDiCommunicationForChannel(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest, channel string) { 1419 establish := []struct { 1420 label string 1421 sender *clusterNode 1422 receiver *clusterNode 1423 target uint64 1424 }{ 1425 {label: "1->2", sender: node1, target: node2.nodeInfo.ID, receiver: node2}, 1426 {label: "2->1", sender: node2, target: node1.nodeInfo.ID, receiver: node1}, 1427 } 1428 for _, estab := range establish { 1429 stub, err := estab.sender.c.Remote(channel, estab.target) 1430 require.NoError(t, err) 1431 1432 stream := assertEventualEstablishStream(t, stub) 1433 1434 var wg sync.WaitGroup 1435 wg.Add(1) 1436 estab.receiver.handler.On("OnSubmit", channel, estab.sender.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(args mock.Arguments) { 1437 req := args.Get(2).(*orderer.SubmitRequest) 1438 require.True(t, proto.Equal(req, msgToSend)) 1439 t.Log(estab.label) 1440 wg.Done() 1441 }) 1442 1443 err = stream.Send(wrapSubmitReq(msgToSend)) 1444 require.NoError(t, err) 1445 1446 wg.Wait() 1447 } 1448 } 1449 1450 func assertBiDiCommunication(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest) { 1451 assertBiDiCommunicationForChannel(t, node1, node2, msgToSend, testChannel) 1452 } 1453 1454 func assertEventualEstablishStream(t *testing.T, rpc *cluster.RemoteContext) *cluster.Stream { 1455 var res *cluster.Stream 1456 gt := gomega.NewGomegaWithT(t) 1457 gt.Eventually(func() error { 1458 stream, err := rpc.NewStream(time.Hour) 1459 res = stream 1460 return err 1461 }, timeout).Should(gomega.Succeed()) 1462 return res 1463 } 1464 1465 func assertEventualSendMessage(t *testing.T, rpc *cluster.RemoteContext, req *orderer.SubmitRequest) orderer.Cluster_StepClient { 1466 var res orderer.Cluster_StepClient 1467 gt := gomega.NewGomegaWithT(t) 1468 gt.Eventually(func() error { 1469 stream, err := rpc.NewStream(time.Hour) 1470 if err != nil { 1471 return err 1472 } 1473 res = stream 1474 return stream.Send(wrapSubmitReq(req)) 1475 }, timeout).Should(gomega.Succeed()) 1476 return res 1477 } 1478 1479 func wrapSubmitReq(req *orderer.SubmitRequest) *orderer.StepRequest { 1480 return &orderer.StepRequest{ 1481 Payload: &orderer.StepRequest_SubmitRequest{ 1482 SubmitRequest: req, 1483 }, 1484 } 1485 }