github.com/anjalikarhana/fabric@v2.1.1+incompatible/orderer/common/cluster/comm_test.go (about)

     1  /*
     2  Copyright IBM Corp. 2017 All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package cluster_test
     8  
     9  import (
    10  	"context"
    11  	"crypto/rand"
    12  	"crypto/x509"
    13  	"fmt"
    14  	"net"
    15  	"strings"
    16  	"sync"
    17  	"sync/atomic"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/golang/protobuf/proto"
    22  	"github.com/hyperledger/fabric-protos-go/common"
    23  	"github.com/hyperledger/fabric-protos-go/orderer"
    24  	"github.com/hyperledger/fabric/common/crypto/tlsgen"
    25  	"github.com/hyperledger/fabric/common/flogging"
    26  	"github.com/hyperledger/fabric/common/metrics"
    27  	"github.com/hyperledger/fabric/common/metrics/disabled"
    28  	"github.com/hyperledger/fabric/common/metrics/metricsfakes"
    29  	comm_utils "github.com/hyperledger/fabric/internal/pkg/comm"
    30  	"github.com/hyperledger/fabric/orderer/common/cluster"
    31  	"github.com/hyperledger/fabric/orderer/common/cluster/mocks"
    32  	"github.com/onsi/gomega"
    33  	"github.com/pkg/errors"
    34  	"github.com/stretchr/testify/assert"
    35  	"github.com/stretchr/testify/mock"
    36  	"go.uber.org/zap"
    37  	"go.uber.org/zap/zapcore"
    38  	"google.golang.org/grpc"
    39  )
    40  
    41  const (
    42  	testChannel  = "test"
    43  	testChannel2 = "test2"
    44  	timeout      = time.Second * 10
    45  )
    46  
    47  var (
    48  	// CA that generates TLS key-pairs.
    49  	// We use only one CA because the authentication
    50  	// is based on TLS pinning
    51  	ca = createCAOrPanic()
    52  
    53  	lastNodeID uint64
    54  
    55  	testSubReq = &orderer.SubmitRequest{
    56  		Channel: "test",
    57  	}
    58  
    59  	testReq = &orderer.SubmitRequest{
    60  		Channel: "test",
    61  		Payload: &common.Envelope{
    62  			Payload: []byte("test"),
    63  		},
    64  	}
    65  
    66  	testReq2 = &orderer.SubmitRequest{
    67  		Channel: testChannel2,
    68  		Payload: &common.Envelope{
    69  			Payload: []byte(testChannel2),
    70  		},
    71  	}
    72  
    73  	testRes = &orderer.SubmitResponse{
    74  		Info: "test",
    75  	}
    76  
    77  	fooReq = wrapSubmitReq(&orderer.SubmitRequest{
    78  		Channel: "foo",
    79  	})
    80  
    81  	fooRes = &orderer.SubmitResponse{
    82  		Info: "foo",
    83  	}
    84  
    85  	barReq = wrapSubmitReq(&orderer.SubmitRequest{
    86  		Channel: "bar",
    87  	})
    88  
    89  	barRes = &orderer.SubmitResponse{
    90  		Info: "bar",
    91  	}
    92  
    93  	testConsensusReq = &orderer.StepRequest{
    94  		Payload: &orderer.StepRequest_ConsensusRequest{
    95  			ConsensusRequest: &orderer.ConsensusRequest{
    96  				Payload: []byte{1, 2, 3},
    97  				Channel: testChannel,
    98  			},
    99  		},
   100  	}
   101  
   102  	channelExtractor = &mockChannelExtractor{}
   103  )
   104  
   105  func nextUnusedID() uint64 {
   106  	return atomic.AddUint64(&lastNodeID, 1)
   107  }
   108  
   109  func createCAOrPanic() tlsgen.CA {
   110  	ca, err := tlsgen.NewCA()
   111  	if err != nil {
   112  		panic(fmt.Sprintf("failed creating CA: %+v", err))
   113  	}
   114  	return ca
   115  }
   116  
   117  type mockChannelExtractor struct{}
   118  
   119  func (*mockChannelExtractor) TargetChannel(msg proto.Message) string {
   120  	switch req := msg.(type) {
   121  	case *orderer.ConsensusRequest:
   122  		return req.Channel
   123  	case *orderer.SubmitRequest:
   124  		return req.Channel
   125  	default:
   126  		return ""
   127  	}
   128  }
   129  
   130  type clusterNode struct {
   131  	lock         sync.Mutex
   132  	frozen       bool
   133  	freezeCond   sync.Cond
   134  	dialer       *cluster.PredicateDialer
   135  	handler      *mocks.Handler
   136  	nodeInfo     cluster.RemoteNode
   137  	srv          *comm_utils.GRPCServer
   138  	bindAddress  string
   139  	clientConfig comm_utils.ClientConfig
   140  	serverConfig comm_utils.ServerConfig
   141  	c            *cluster.Comm
   142  }
   143  
   144  func (cn *clusterNode) Step(stream orderer.Cluster_StepServer) error {
   145  	cn.waitIfFrozen()
   146  	req, err := stream.Recv()
   147  	if err != nil {
   148  		return err
   149  	}
   150  	if submitReq := req.GetSubmitRequest(); submitReq != nil {
   151  		return cn.c.DispatchSubmit(stream.Context(), submitReq)
   152  	}
   153  	if err := cn.c.DispatchConsensus(stream.Context(), req.GetConsensusRequest()); err != nil {
   154  		return err
   155  	}
   156  	return stream.Send(&orderer.StepResponse{})
   157  }
   158  
   159  func (cn *clusterNode) waitIfFrozen() {
   160  	cn.lock.Lock()
   161  	// There is no freeze after an unfreeze so no need
   162  	// for a for loop.
   163  	if cn.frozen {
   164  		cn.freezeCond.Wait()
   165  		return
   166  	}
   167  	cn.lock.Unlock()
   168  }
   169  
   170  func (cn *clusterNode) freeze() {
   171  	cn.lock.Lock()
   172  	defer cn.lock.Unlock()
   173  	cn.frozen = true
   174  }
   175  
   176  func (cn *clusterNode) unfreeze() {
   177  	cn.lock.Lock()
   178  	cn.frozen = false
   179  	cn.lock.Unlock()
   180  	cn.freezeCond.Broadcast()
   181  }
   182  
   183  func (cn *clusterNode) resurrect() {
   184  	gRPCServer, err := comm_utils.NewGRPCServer(cn.bindAddress, cn.serverConfig)
   185  	if err != nil {
   186  		panic(fmt.Errorf("failed starting gRPC server: %v", err))
   187  	}
   188  	cn.srv = gRPCServer
   189  	orderer.RegisterClusterServer(gRPCServer.Server(), cn)
   190  	go cn.srv.Start()
   191  }
   192  
   193  func (cn *clusterNode) stop() {
   194  	cn.srv.Stop()
   195  	cn.c.Shutdown()
   196  }
   197  
   198  func (cn *clusterNode) renewCertificates() {
   199  	clientKeyPair, err := ca.NewClientCertKeyPair()
   200  	if err != nil {
   201  		panic(fmt.Errorf("failed creating client certificate %v", err))
   202  	}
   203  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   204  	if err != nil {
   205  		panic(fmt.Errorf("failed creating server certificate %v", err))
   206  	}
   207  
   208  	cn.nodeInfo.ClientTLSCert = clientKeyPair.TLSCert.Raw
   209  	cn.nodeInfo.ServerTLSCert = serverKeyPair.TLSCert.Raw
   210  
   211  	cn.serverConfig.SecOpts.Certificate = serverKeyPair.Cert
   212  	cn.serverConfig.SecOpts.Key = serverKeyPair.Key
   213  
   214  	cn.dialer.Config.SecOpts.Key = clientKeyPair.Key
   215  	cn.dialer.Config.SecOpts.Certificate = clientKeyPair.Cert
   216  }
   217  
   218  func newTestNodeWithMetrics(t *testing.T, metrics cluster.MetricsProvider, tlsConnGauge metrics.Gauge) *clusterNode {
   219  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   220  	assert.NoError(t, err)
   221  
   222  	clientKeyPair, _ := ca.NewClientCertKeyPair()
   223  
   224  	handler := &mocks.Handler{}
   225  	clientConfig := comm_utils.ClientConfig{
   226  		AsyncConnect: true,
   227  		Timeout:      time.Hour,
   228  		SecOpts: comm_utils.SecureOptions{
   229  			RequireClientCert: true,
   230  			Key:               clientKeyPair.Key,
   231  			Certificate:       clientKeyPair.Cert,
   232  			ServerRootCAs:     [][]byte{ca.CertBytes()},
   233  			UseTLS:            true,
   234  			ClientRootCAs:     [][]byte{ca.CertBytes()},
   235  		},
   236  	}
   237  
   238  	dialer := &cluster.PredicateDialer{
   239  		Config: clientConfig,
   240  	}
   241  
   242  	srvConfig := comm_utils.ServerConfig{
   243  		SecOpts: comm_utils.SecureOptions{
   244  			Key:         serverKeyPair.Key,
   245  			Certificate: serverKeyPair.Cert,
   246  			UseTLS:      true,
   247  		},
   248  	}
   249  	gRPCServer, err := comm_utils.NewGRPCServer("127.0.0.1:", srvConfig)
   250  	assert.NoError(t, err)
   251  
   252  	tstSrv := &clusterNode{
   253  		dialer:       dialer,
   254  		clientConfig: clientConfig,
   255  		serverConfig: srvConfig,
   256  		bindAddress:  gRPCServer.Address(),
   257  		handler:      handler,
   258  		nodeInfo: cluster.RemoteNode{
   259  			Endpoint:      gRPCServer.Address(),
   260  			ID:            nextUnusedID(),
   261  			ServerTLSCert: serverKeyPair.TLSCert.Raw,
   262  			ClientTLSCert: clientKeyPair.TLSCert.Raw,
   263  		},
   264  		srv: gRPCServer,
   265  	}
   266  
   267  	tstSrv.freezeCond.L = &tstSrv.lock
   268  
   269  	tstSrv.c = &cluster.Comm{
   270  		CertExpWarningThreshold: time.Hour,
   271  		SendBufferSize:          1,
   272  		Logger:                  flogging.MustGetLogger("test"),
   273  		Chan2Members:            make(cluster.MembersByChannel),
   274  		H:                       handler,
   275  		ChanExt:                 channelExtractor,
   276  		Connections:             cluster.NewConnectionStore(dialer, tlsConnGauge),
   277  		Metrics:                 cluster.NewMetrics(metrics),
   278  	}
   279  
   280  	orderer.RegisterClusterServer(gRPCServer.Server(), tstSrv)
   281  	go gRPCServer.Start()
   282  	return tstSrv
   283  }
   284  
   285  func newTestNode(t *testing.T) *clusterNode {
   286  	return newTestNodeWithMetrics(t, &disabled.Provider{}, &disabled.Gauge{})
   287  }
   288  
   289  func TestSendBigMessage(t *testing.T) {
   290  	// Scenario: Basic test that spawns 5 nodes and sends a big message
   291  	// from one of the nodes to the others.
   292  	// A receiver node's Step() server side method (which calls Recv)
   293  	// is frozen until the sender's node Send method returns,
   294  	// Hence - the sender node finishes calling Send
   295  	// before a receiver node starts calling Recv.
   296  	// This ensures that Send is non blocking even with big messages.
   297  	// In the test, we send a total of 8MB of random data (2MB to each node).
   298  	// The randomness is used so gRPC compression won't compress it to a lower size.
   299  
   300  	node1 := newTestNode(t)
   301  	node2 := newTestNode(t)
   302  	node3 := newTestNode(t)
   303  	node4 := newTestNode(t)
   304  	node5 := newTestNode(t)
   305  
   306  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   307  		node.c.SendBufferSize = 1
   308  	}
   309  
   310  	defer node1.stop()
   311  	defer node2.stop()
   312  	defer node3.stop()
   313  	defer node4.stop()
   314  	defer node5.stop()
   315  
   316  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo, node3.nodeInfo, node4.nodeInfo, node5.nodeInfo}
   317  	node1.c.Configure(testChannel, config)
   318  	node2.c.Configure(testChannel, config)
   319  	node3.c.Configure(testChannel, config)
   320  	node4.c.Configure(testChannel, config)
   321  	node5.c.Configure(testChannel, config)
   322  
   323  	var messageReceived sync.WaitGroup
   324  	messageReceived.Add(4)
   325  
   326  	msgSize := 1024 * 1024 * 2
   327  	bigMsg := &orderer.ConsensusRequest{
   328  		Channel: testChannel,
   329  		Payload: make([]byte, msgSize),
   330  	}
   331  
   332  	_, err := rand.Read(bigMsg.Payload)
   333  	assert.NoError(t, err)
   334  
   335  	wrappedMsg := &orderer.StepRequest{
   336  		Payload: &orderer.StepRequest_ConsensusRequest{
   337  			ConsensusRequest: bigMsg,
   338  		},
   339  	}
   340  
   341  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   342  		node.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
   343  			msg := args.Get(2).(*orderer.ConsensusRequest)
   344  			assert.Len(t, msg.Payload, msgSize)
   345  			messageReceived.Done()
   346  		}).Return(nil)
   347  	}
   348  
   349  	streams := map[uint64]*cluster.Stream{}
   350  
   351  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   352  		// Freeze the node, in order to block its Recv
   353  		node.freeze()
   354  	}
   355  
   356  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   357  		rm, err := node1.c.Remote(testChannel, node.nodeInfo.ID)
   358  		assert.NoError(t, err)
   359  
   360  		stream := assertEventualEstablishStream(t, rm)
   361  		streams[node.nodeInfo.ID] = stream
   362  	}
   363  
   364  	t0 := time.Now()
   365  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   366  		stream := streams[node.nodeInfo.ID]
   367  
   368  		t1 := time.Now()
   369  		err = stream.Send(wrappedMsg)
   370  		assert.NoError(t, err)
   371  		t.Log("Sending took", time.Since(t1))
   372  		t1 = time.Now()
   373  
   374  		// Unfreeze the node. It can now call Recv, and signal the messageReceived waitGroup.
   375  		node.unfreeze()
   376  	}
   377  
   378  	t.Log("Total sending time to all 4 nodes took:", time.Since(t0))
   379  
   380  	messageReceived.Wait()
   381  }
   382  
   383  func TestBlockingSend(t *testing.T) {
   384  	// Scenario: Basic test that spawns 2 nodes and sends from the first node
   385  	// to the second node, three SubmitRequests, or three consensus requests.
   386  	// SubmitRequests should block, but consensus requests should not.
   387  
   388  	for _, testCase := range []struct {
   389  		description        string
   390  		messageToSend      *orderer.StepRequest
   391  		streamUnblocks     bool
   392  		elapsedGreaterThan time.Duration
   393  		overflowErr        string
   394  	}{
   395  		{
   396  			description:        "SubmitRequest",
   397  			messageToSend:      wrapSubmitReq(testReq),
   398  			streamUnblocks:     true,
   399  			elapsedGreaterThan: time.Second / 2,
   400  		},
   401  		{
   402  			description:   "ConsensusRequest",
   403  			messageToSend: testConsensusReq,
   404  			overflowErr:   "send queue overflown",
   405  		},
   406  	} {
   407  		t.Run(testCase.description, func(t *testing.T) {
   408  			node1 := newTestNode(t)
   409  			node2 := newTestNode(t)
   410  
   411  			node1.c.SendBufferSize = 1
   412  			node2.c.SendBufferSize = 1
   413  
   414  			defer node1.stop()
   415  			defer node2.stop()
   416  
   417  			config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   418  			node1.c.Configure(testChannel, config)
   419  			node2.c.Configure(testChannel, config)
   420  
   421  			rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   422  			assert.NoError(t, err)
   423  
   424  			client := &mocks.ClusterClient{}
   425  			fakeStream := &mocks.StepClient{}
   426  
   427  			// Replace real client with a mock client
   428  			rm.Client = client
   429  			rm.ProbeConn = func(_ *grpc.ClientConn) error {
   430  				return nil
   431  			}
   432  			// Configure client to return the mock stream
   433  			fakeStream.On("Context", mock.Anything).Return(context.Background())
   434  			client.On("Step", mock.Anything).Return(fakeStream, nil).Once()
   435  
   436  			unBlock := make(chan struct{})
   437  			var sendInvoked sync.WaitGroup
   438  			sendInvoked.Add(1)
   439  			var once sync.Once
   440  			fakeStream.On("Send", mock.Anything).Run(func(_ mock.Arguments) {
   441  				once.Do(sendInvoked.Done)
   442  				<-unBlock
   443  			}).Return(errors.New("oops"))
   444  
   445  			stream, err := rm.NewStream(time.Hour)
   446  			assert.NoError(t, err)
   447  
   448  			// The first send doesn't block, even though the Send operation blocks.
   449  			err = stream.Send(testCase.messageToSend)
   450  			assert.NoError(t, err)
   451  
   452  			// The second once doesn't either.
   453  			// After this point, we have 1 goroutine which is blocked on Send(),
   454  			// and one message in the buffer.
   455  			sendInvoked.Wait()
   456  			err = stream.Send(testCase.messageToSend)
   457  			assert.NoError(t, err)
   458  
   459  			// The third blocks, so we need to unblock it ourselves
   460  			// in order for it to go through, unless the operation
   461  			// is non blocking.
   462  			go func() {
   463  				time.Sleep(time.Second)
   464  				if testCase.streamUnblocks {
   465  					close(unBlock)
   466  				}
   467  			}()
   468  
   469  			t1 := time.Now()
   470  			err = stream.Send(testCase.messageToSend)
   471  			// The third send always overflows or blocks.
   472  			// If we expect to receive an overflow error - assert it.
   473  			if testCase.overflowErr != "" {
   474  				assert.EqualError(t, err, testCase.overflowErr)
   475  			}
   476  			elapsed := time.Since(t1)
   477  			t.Log("Elapsed time:", elapsed)
   478  			assert.True(t, elapsed > testCase.elapsedGreaterThan)
   479  
   480  			if !testCase.streamUnblocks {
   481  				close(unBlock)
   482  			}
   483  		})
   484  	}
   485  }
   486  
   487  func TestBasic(t *testing.T) {
   488  	// Scenario: Basic test that spawns 2 nodes and sends each other
   489  	// messages that are expected to be echoed back
   490  
   491  	node1 := newTestNode(t)
   492  	node2 := newTestNode(t)
   493  
   494  	defer node1.stop()
   495  	defer node2.stop()
   496  
   497  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   498  	node1.c.Configure(testChannel, config)
   499  	node2.c.Configure(testChannel, config)
   500  
   501  	assertBiDiCommunication(t, node1, node2, testReq)
   502  }
   503  
   504  func TestUnavailableHosts(t *testing.T) {
   505  	// Scenario: A node is configured to connect
   506  	// to a host that is down
   507  	node1 := newTestNode(t)
   508  
   509  	clientConfig := node1.dialer.Config
   510  	// The below timeout makes sure that connection establishment is done
   511  	// asynchronously. Had it been synchronous, the Remote() call would be
   512  	// blocked for an hour.
   513  	clientConfig.Timeout = time.Hour
   514  	defer node1.stop()
   515  
   516  	node2 := newTestNode(t)
   517  	node2.stop()
   518  
   519  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   520  	remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   521  	assert.NoError(t, err)
   522  	assert.NotNil(t, remote)
   523  
   524  	_, err = remote.NewStream(time.Millisecond * 100)
   525  	assert.Contains(t, err.Error(), "connection")
   526  }
   527  
   528  func TestStreamAbort(t *testing.T) {
   529  	// Scenarios: node 1 is connected to node 2 in 2 channels,
   530  	// and the consumer of the communication calls receive.
   531  	// The two sub-scenarios happen:
   532  	// 1) The server certificate of node 2 changes in the first channel
   533  	// 2) Node 2 is evicted from the membership of the first channel
   534  	// In both of the scenarios, the Recv() call should be aborted
   535  
   536  	node2 := newTestNode(t)
   537  	defer node2.stop()
   538  
   539  	invalidNodeInfo := cluster.RemoteNode{
   540  		ID:            node2.nodeInfo.ID,
   541  		ServerTLSCert: []byte{1, 2, 3},
   542  		ClientTLSCert: []byte{1, 2, 3},
   543  	}
   544  
   545  	for _, tst := range []struct {
   546  		testName      string
   547  		membership    []cluster.RemoteNode
   548  		expectedError string
   549  	}{
   550  		{
   551  			testName:      "Evicted from membership",
   552  			membership:    nil,
   553  			expectedError: "rpc error: code = Canceled desc = context canceled",
   554  		},
   555  		{
   556  			testName:      "Changed TLS certificate",
   557  			membership:    []cluster.RemoteNode{invalidNodeInfo},
   558  			expectedError: "rpc error: code = Canceled desc = context canceled",
   559  		},
   560  	} {
   561  		t.Run(tst.testName, func(t *testing.T) {
   562  			testStreamAbort(t, node2, tst.membership, tst.expectedError)
   563  		})
   564  	}
   565  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
   566  }
   567  
   568  func testStreamAbort(t *testing.T, node2 *clusterNode, newMembership []cluster.RemoteNode, expectedError string) {
   569  	node1 := newTestNode(t)
   570  	defer node1.stop()
   571  
   572  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   573  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   574  	node1.c.Configure(testChannel2, []cluster.RemoteNode{node2.nodeInfo})
   575  	node2.c.Configure(testChannel2, []cluster.RemoteNode{node1.nodeInfo})
   576  
   577  	var streamCreated sync.WaitGroup
   578  	streamCreated.Add(1)
   579  
   580  	stopChan := make(chan struct{})
   581  
   582  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Once().Run(func(_ mock.Arguments) {
   583  		// Notify the stream was created
   584  		streamCreated.Done()
   585  		// Wait for the test to finish
   586  		<-stopChan
   587  	}).Return(nil).Once()
   588  
   589  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   590  	assert.NoError(t, err)
   591  
   592  	go func() {
   593  		stream := assertEventualEstablishStream(t, rm1)
   594  		// Signal the reconfiguration
   595  		err = stream.Send(wrapSubmitReq(testReq))
   596  		assert.NoError(t, err)
   597  		_, err := stream.Recv()
   598  		assert.Contains(t, err.Error(), expectedError)
   599  		close(stopChan)
   600  	}()
   601  
   602  	go func() {
   603  		// Wait for the stream reference to be obtained
   604  		streamCreated.Wait()
   605  		// Reconfigure the channel membership
   606  		node1.c.Configure(testChannel, newMembership)
   607  	}()
   608  
   609  	<-stopChan
   610  }
   611  
   612  func TestDoubleReconfigure(t *testing.T) {
   613  	// Scenario: Basic test that spawns 2 nodes
   614  	// and configures node 1 twice, and checks that
   615  	// the remote stub for node 1 wasn't re-created in the second
   616  	// configuration since it already existed
   617  
   618  	node1 := newTestNode(t)
   619  	node2 := newTestNode(t)
   620  
   621  	defer node1.stop()
   622  	defer node2.stop()
   623  
   624  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   625  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   626  	assert.NoError(t, err)
   627  
   628  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   629  	rm2, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   630  	assert.NoError(t, err)
   631  	// Ensure the references are equal
   632  	assert.True(t, rm1 == rm2)
   633  }
   634  
   635  func TestInvalidChannel(t *testing.T) {
   636  	// Scenario: node 1 it ordered to send a message on a channel
   637  	// that doesn't exist, and also receives a message, but
   638  	// the channel cannot be extracted from the message.
   639  
   640  	t.Run("channel doesn't exist", func(t *testing.T) {
   641  		node1 := newTestNode(t)
   642  		defer node1.stop()
   643  
   644  		_, err := node1.c.Remote(testChannel, 0)
   645  		assert.EqualError(t, err, "channel test doesn't exist")
   646  	})
   647  
   648  	t.Run("channel cannot be extracted", func(t *testing.T) {
   649  		node1 := newTestNode(t)
   650  		defer node1.stop()
   651  
   652  		node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   653  		gt := gomega.NewGomegaWithT(t)
   654  		gt.Eventually(func() (bool, error) {
   655  			_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   656  			return true, err
   657  		}, time.Minute).Should(gomega.BeTrue())
   658  
   659  		stub, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   660  		assert.NoError(t, err)
   661  
   662  		stream := assertEventualEstablishStream(t, stub)
   663  
   664  		// An empty SubmitRequest has an empty channel which is invalid
   665  		err = stream.Send(wrapSubmitReq(&orderer.SubmitRequest{}))
   666  		assert.NoError(t, err)
   667  
   668  		_, err = stream.Recv()
   669  		assert.EqualError(t, err, "rpc error: code = Unknown desc = badly formatted message, cannot extract channel")
   670  
   671  		// Test directly without going through the gRPC stream
   672  		err = node1.c.DispatchSubmit(context.Background(), &orderer.SubmitRequest{})
   673  		assert.EqualError(t, err, "badly formatted message, cannot extract channel")
   674  	})
   675  }
   676  
   677  func TestAbortRPC(t *testing.T) {
   678  	// Scenarios:
   679  	// (I) The node calls an RPC, and calls Abort() on the remote context
   680  	//  in parallel. The RPC should return even though the server-side call hasn't finished.
   681  	// (II) The node calls an RPC, but the server-side processing takes too long,
   682  	// and the RPC invocation returns prematurely.
   683  
   684  	testCases := []struct {
   685  		name        string
   686  		abortFunc   func(*cluster.RemoteContext)
   687  		rpcTimeout  time.Duration
   688  		expectedErr string
   689  	}{
   690  		{
   691  			name:        "Abort() called",
   692  			expectedErr: "rpc error: code = Canceled desc = context canceled",
   693  			rpcTimeout:  time.Hour,
   694  			abortFunc: func(rc *cluster.RemoteContext) {
   695  				rc.Abort()
   696  			},
   697  		},
   698  		{
   699  			name:        "RPC timeout",
   700  			expectedErr: "rpc timeout expired",
   701  			rpcTimeout:  time.Second,
   702  			abortFunc:   func(*cluster.RemoteContext) {},
   703  		},
   704  	}
   705  
   706  	for _, testCase := range testCases {
   707  		testCase := testCase
   708  		t.Run(testCase.name, func(t *testing.T) {
   709  			testAbort(t, testCase.abortFunc, testCase.rpcTimeout, testCase.expectedErr)
   710  		})
   711  	}
   712  }
   713  
   714  func testAbort(t *testing.T, abortFunc func(*cluster.RemoteContext), rpcTimeout time.Duration, expectedErr string) {
   715  	node1 := newTestNode(t)
   716  	defer node1.stop()
   717  
   718  	node2 := newTestNode(t)
   719  	defer node2.stop()
   720  
   721  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   722  	node1.c.Configure(testChannel, config)
   723  	node2.c.Configure(testChannel, config)
   724  	var onStepCalled sync.WaitGroup
   725  	onStepCalled.Add(1)
   726  
   727  	// stuckCall ensures the OnStep() call is stuck throughout this test
   728  	var stuckCall sync.WaitGroup
   729  	stuckCall.Add(1)
   730  	// At the end of the test, release the server-side resources
   731  	defer stuckCall.Done()
   732  
   733  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(_ mock.Arguments) {
   734  		onStepCalled.Done()
   735  		stuckCall.Wait()
   736  	}).Once()
   737  
   738  	rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   739  	assert.NoError(t, err)
   740  
   741  	go func() {
   742  		onStepCalled.Wait()
   743  		abortFunc(rm)
   744  	}()
   745  
   746  	var stream *cluster.Stream
   747  	gt := gomega.NewGomegaWithT(t)
   748  	gt.Eventually(func() error {
   749  		stream, err = rm.NewStream(rpcTimeout)
   750  		return err
   751  	}, time.Second*10, time.Millisecond*10).Should(gomega.Succeed())
   752  
   753  	stream.Send(wrapSubmitReq(testSubReq))
   754  	_, err = stream.Recv()
   755  
   756  	assert.EqualError(t, err, expectedErr)
   757  
   758  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
   759  }
   760  
   761  func TestNoTLSCertificate(t *testing.T) {
   762  	// Scenario: The node is sent a message by another node that doesn't
   763  	// connect with mutual TLS, thus doesn't provide a TLS certificate
   764  	node1 := newTestNode(t)
   765  	defer node1.stop()
   766  
   767  	node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   768  
   769  	clientConfig := comm_utils.ClientConfig{
   770  		AsyncConnect: true,
   771  		Timeout:      time.Millisecond * 100,
   772  		SecOpts: comm_utils.SecureOptions{
   773  			ServerRootCAs: [][]byte{ca.CertBytes()},
   774  			UseTLS:        true,
   775  		},
   776  	}
   777  	cl, err := comm_utils.NewGRPCClient(clientConfig)
   778  	assert.NoError(t, err)
   779  
   780  	var conn *grpc.ClientConn
   781  	gt := gomega.NewGomegaWithT(t)
   782  	gt.Eventually(func() (bool, error) {
   783  		conn, err = cl.NewConnection(node1.srv.Address())
   784  		return true, err
   785  	}, time.Minute).Should(gomega.BeTrue())
   786  
   787  	echoClient := orderer.NewClusterClient(conn)
   788  	stream, err := echoClient.Step(context.Background())
   789  	assert.NoError(t, err)
   790  
   791  	err = stream.Send(wrapSubmitReq(testSubReq))
   792  	assert.NoError(t, err)
   793  	_, err = stream.Recv()
   794  	assert.EqualError(t, err, "rpc error: code = Unknown desc = no TLS certificate sent")
   795  }
   796  
   797  func TestReconnect(t *testing.T) {
   798  	// Scenario: node 1 and node 2 are connected,
   799  	// and node 2 is taken offline.
   800  	// Node 1 tries to send a message to node 2 but fails,
   801  	// and afterwards node 2 is brought back, after which
   802  	// node 1 sends more messages, and it should succeed
   803  	// sending a message to node 2 eventually.
   804  
   805  	node1 := newTestNode(t)
   806  	defer node1.stop()
   807  	conf := node1.dialer.Config
   808  	conf.Timeout = time.Hour
   809  
   810  	node2 := newTestNode(t)
   811  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil)
   812  	defer node2.stop()
   813  
   814  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   815  	node1.c.Configure(testChannel, config)
   816  	node2.c.Configure(testChannel, config)
   817  
   818  	// Make node 2 be offline by shutting down its gRPC service
   819  	node2.srv.Stop()
   820  	// Obtain the stub for node 2.
   821  	// Should succeed, because the connection was created at time of configuration
   822  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   823  	assert.NoError(t, err)
   824  
   825  	// Try to obtain a stream. Should not Succeed.
   826  	gt := gomega.NewGomegaWithT(t)
   827  	gt.Eventually(func() error {
   828  		_, err = stub.NewStream(time.Hour)
   829  		return err
   830  	}).Should(gomega.Not(gomega.Succeed()))
   831  
   832  	// Wait for the port to be released
   833  	for {
   834  		lsnr, err := net.Listen("tcp", node2.nodeInfo.Endpoint)
   835  		if err == nil {
   836  			lsnr.Close()
   837  			break
   838  		}
   839  	}
   840  
   841  	// Resurrect node 2
   842  	node2.resurrect()
   843  	// Send a message from node 1 to node 2.
   844  	// Should succeed eventually
   845  	assertEventualSendMessage(t, stub, testReq)
   846  }
   847  
   848  func TestRenewCertificates(t *testing.T) {
   849  	// Scenario: node 1 and node 2 are connected,
   850  	// and the certificates are renewed for both nodes
   851  	// at the same time.
   852  	// They are expected to connect to one another
   853  	// after the reconfiguration.
   854  
   855  	node1 := newTestNode(t)
   856  	defer node1.stop()
   857  
   858  	node2 := newTestNode(t)
   859  	defer node2.stop()
   860  
   861  	node1.handler.On("OnStep", testChannel, node2.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   862  	node2.handler.On("OnStep", testChannel, node1.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   863  
   864  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   865  	node1.c.Configure(testChannel, config)
   866  	node2.c.Configure(testChannel, config)
   867  
   868  	assertBiDiCommunication(t, node1, node2, testReq)
   869  
   870  	// Now, renew certificates both both nodes
   871  	node1.renewCertificates()
   872  	node2.renewCertificates()
   873  
   874  	// Reconfigure them
   875  	config = []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   876  	node1.c.Configure(testChannel, config)
   877  	node2.c.Configure(testChannel, config)
   878  
   879  	// W.L.O.G, try to send a message from node1 to node2
   880  	// It should fail, because node2's server certificate has now changed,
   881  	// so it closed the connection to the remote node
   882  	info2 := node2.nodeInfo
   883  	remote, err := node1.c.Remote(testChannel, info2.ID)
   884  	assert.NoError(t, err)
   885  	assert.NotNil(t, remote)
   886  
   887  	gt := gomega.NewGomegaWithT(t)
   888  	gt.Eventually(func() string {
   889  		_, err = remote.NewStream(time.Hour)
   890  		return err.Error()
   891  	}, timeout).Should(gomega.ContainSubstring(info2.Endpoint))
   892  
   893  	// Restart the gRPC service on both nodes, to load the new TLS certificates
   894  	node1.srv.Stop()
   895  	node1.resurrect()
   896  	node2.srv.Stop()
   897  	node2.resurrect()
   898  
   899  	// Finally, check that the nodes can communicate once again
   900  	assertBiDiCommunication(t, node1, node2, testReq)
   901  }
   902  
   903  func TestMembershipReconfiguration(t *testing.T) {
   904  	// Scenario: node 1 and node 2 are started up
   905  	// and node 2 is configured to know about node 1,
   906  	// without node1 knowing about node 2.
   907  	// The communication between them should only work
   908  	// after node 1 is configured to know about node 2.
   909  
   910  	node1 := newTestNode(t)
   911  	defer node1.stop()
   912  
   913  	node2 := newTestNode(t)
   914  	defer node2.stop()
   915  
   916  	node1.c.Configure(testChannel, []cluster.RemoteNode{})
   917  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   918  
   919  	// Node 1 can't connect to node 2 because it doesn't know its TLS certificate yet
   920  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   921  	assert.EqualError(t, err, fmt.Sprintf("node %d doesn't exist in channel test's membership", node2.nodeInfo.ID))
   922  	// Node 2 can connect to node 1, but it can't send it messages because node 1 doesn't know node 2 yet.
   923  
   924  	gt := gomega.NewGomegaWithT(t)
   925  	gt.Eventually(func() (bool, error) {
   926  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   927  		return true, err
   928  	}, time.Minute).Should(gomega.BeTrue())
   929  
   930  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   931  
   932  	stream := assertEventualEstablishStream(t, stub)
   933  	err = stream.Send(wrapSubmitReq(testSubReq))
   934  	assert.NoError(t, err)
   935  
   936  	_, err = stream.Recv()
   937  	assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
   938  
   939  	// Next, configure node 1 to know about node 2
   940  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   941  
   942  	// Check that the communication works correctly between both nodes
   943  	assertBiDiCommunication(t, node1, node2, testReq)
   944  	assertBiDiCommunication(t, node2, node1, testReq)
   945  
   946  	// Reconfigure node 2 to forget about node 1
   947  	node2.c.Configure(testChannel, []cluster.RemoteNode{})
   948  	// Node 1 can still connect to node 2
   949  	stub, err = node1.c.Remote(testChannel, node2.nodeInfo.ID)
   950  	assert.NoError(t, err)
   951  	// But can't send a message because node 2 now doesn't authorized node 1
   952  	stream = assertEventualEstablishStream(t, stub)
   953  	stream.Send(wrapSubmitReq(testSubReq))
   954  	_, err = stream.Recv()
   955  	assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
   956  }
   957  
   958  func TestShutdown(t *testing.T) {
   959  	// Scenario: node 1 is shut down and as a result, can't
   960  	// send messages to anyone, nor can it be reconfigured
   961  
   962  	node1 := newTestNode(t)
   963  	defer node1.stop()
   964  
   965  	node1.c.Shutdown()
   966  
   967  	// Obtaining a RemoteContext cannot succeed because shutdown was called before
   968  	_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   969  	assert.EqualError(t, err, "communication has been shut down")
   970  
   971  	node2 := newTestNode(t)
   972  	defer node2.stop()
   973  
   974  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   975  	// Configuration of node doesn't take place
   976  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   977  
   978  	gt := gomega.NewGomegaWithT(t)
   979  	gt.Eventually(func() error {
   980  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   981  		return err
   982  	}, time.Minute).Should(gomega.Succeed())
   983  
   984  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   985  
   986  	// Therefore, sending a message doesn't succeed because node 1 rejected the configuration change
   987  	gt.Eventually(func() string {
   988  		stream, err := stub.NewStream(time.Hour)
   989  		if err != nil {
   990  			return err.Error()
   991  		}
   992  		err = stream.Send(wrapSubmitReq(testSubReq))
   993  		assert.NoError(t, err)
   994  
   995  		_, err = stream.Recv()
   996  		return err.Error()
   997  	}, timeout).Should(gomega.ContainSubstring("channel test doesn't exist"))
   998  }
   999  
  1000  func TestMultiChannelConfig(t *testing.T) {
  1001  	// Scenario: node 1 is knows node 2 only in channel "foo"
  1002  	// and knows node 3 only in channel "bar".
  1003  	// Messages that are received, are routed according to their corresponding channels
  1004  	// and when node 2 sends a message for channel "bar" to node 1, it is rejected.
  1005  	// Same thing applies for node 3 that sends a message to node 1 in channel "foo".
  1006  
  1007  	node1 := newTestNode(t)
  1008  	defer node1.stop()
  1009  
  1010  	node2 := newTestNode(t)
  1011  	defer node2.stop()
  1012  
  1013  	node3 := newTestNode(t)
  1014  	defer node3.stop()
  1015  
  1016  	node1.c.Configure("foo", []cluster.RemoteNode{node2.nodeInfo})
  1017  	node1.c.Configure("bar", []cluster.RemoteNode{node3.nodeInfo})
  1018  	node2.c.Configure("foo", []cluster.RemoteNode{node1.nodeInfo})
  1019  	node3.c.Configure("bar", []cluster.RemoteNode{node1.nodeInfo})
  1020  
  1021  	t.Run("Correct channel", func(t *testing.T) {
  1022  		var fromNode2 sync.WaitGroup
  1023  		fromNode2.Add(1)
  1024  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1025  			fromNode2.Done()
  1026  		}).Once()
  1027  
  1028  		var fromNode3 sync.WaitGroup
  1029  		fromNode3.Add(1)
  1030  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1031  			fromNode3.Done()
  1032  		}).Once()
  1033  
  1034  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1035  		assert.NoError(t, err)
  1036  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1037  		assert.NoError(t, err)
  1038  
  1039  		stream := assertEventualEstablishStream(t, node2toNode1)
  1040  		stream.Send(fooReq)
  1041  
  1042  		fromNode2.Wait()
  1043  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
  1044  
  1045  		stream = assertEventualEstablishStream(t, node3toNode1)
  1046  		stream.Send(barReq)
  1047  
  1048  		fromNode3.Wait()
  1049  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
  1050  	})
  1051  
  1052  	t.Run("Incorrect channel", func(t *testing.T) {
  1053  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil)
  1054  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil)
  1055  
  1056  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1057  		assert.NoError(t, err)
  1058  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1059  		assert.NoError(t, err)
  1060  
  1061  		assertEventualSendMessage(t, node2toNode1, &orderer.SubmitRequest{Channel: "foo"})
  1062  		stream, err := node2toNode1.NewStream(time.Hour)
  1063  		err = stream.Send(barReq)
  1064  		assert.NoError(t, err)
  1065  		_, err = stream.Recv()
  1066  		assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1067  
  1068  		assertEventualSendMessage(t, node3toNode1, &orderer.SubmitRequest{Channel: "bar"})
  1069  		stream, err = node3toNode1.NewStream(time.Hour)
  1070  		err = stream.Send(fooReq)
  1071  		assert.NoError(t, err)
  1072  		_, err = stream.Recv()
  1073  		assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1074  	})
  1075  }
  1076  
  1077  func TestConnectionFailure(t *testing.T) {
  1078  	// Scenario: node 1 fails to connect to node 2.
  1079  
  1080  	node1 := newTestNode(t)
  1081  	defer node1.stop()
  1082  
  1083  	node2 := newTestNode(t)
  1084  	defer node2.stop()
  1085  
  1086  	dialer := &mocks.SecureDialer{}
  1087  	dialer.On("Dial", mock.Anything, mock.Anything).Return(nil, errors.New("oops"))
  1088  	node1.c.Connections = cluster.NewConnectionStore(dialer, &disabled.Gauge{})
  1089  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
  1090  
  1091  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1092  	assert.EqualError(t, err, "oops")
  1093  }
  1094  
  1095  type testMetrics struct {
  1096  	fakeProvider        *mocks.MetricsProvider
  1097  	egressQueueLength   metricsfakes.Gauge
  1098  	egressQueueCapacity metricsfakes.Gauge
  1099  	egressStreamCount   metricsfakes.Gauge
  1100  	egressTLSConnCount  metricsfakes.Gauge
  1101  	egressWorkerSize    metricsfakes.Gauge
  1102  	ingressStreamsCount metricsfakes.Gauge
  1103  	msgSendTime         metricsfakes.Histogram
  1104  	msgDropCount        metricsfakes.Counter
  1105  }
  1106  
  1107  func (tm *testMetrics) initialize() {
  1108  	tm.egressQueueLength.WithReturns(&tm.egressQueueLength)
  1109  	tm.egressQueueCapacity.WithReturns(&tm.egressQueueCapacity)
  1110  	tm.egressStreamCount.WithReturns(&tm.egressStreamCount)
  1111  	tm.egressTLSConnCount.WithReturns(&tm.egressTLSConnCount)
  1112  	tm.egressWorkerSize.WithReturns(&tm.egressWorkerSize)
  1113  	tm.ingressStreamsCount.WithReturns(&tm.ingressStreamsCount)
  1114  	tm.msgSendTime.WithReturns(&tm.msgSendTime)
  1115  	tm.msgDropCount.WithReturns(&tm.msgDropCount)
  1116  
  1117  	fakeProvider := tm.fakeProvider
  1118  	fakeProvider.On("NewGauge", cluster.IngressStreamsCountOpts).Return(&tm.ingressStreamsCount)
  1119  	fakeProvider.On("NewGauge", cluster.EgressQueueLengthOpts).Return(&tm.egressQueueLength)
  1120  	fakeProvider.On("NewGauge", cluster.EgressQueueCapacityOpts).Return(&tm.egressQueueCapacity)
  1121  	fakeProvider.On("NewGauge", cluster.EgressStreamsCountOpts).Return(&tm.egressStreamCount)
  1122  	fakeProvider.On("NewGauge", cluster.EgressTLSConnectionCountOpts).Return(&tm.egressTLSConnCount)
  1123  	fakeProvider.On("NewGauge", cluster.EgressWorkersOpts).Return(&tm.egressWorkerSize)
  1124  	fakeProvider.On("NewCounter", cluster.MessagesDroppedCountOpts).Return(&tm.msgDropCount)
  1125  	fakeProvider.On("NewHistogram", cluster.MessageSendTimeOpts).Return(&tm.msgSendTime)
  1126  }
  1127  
  1128  func TestMetrics(t *testing.T) {
  1129  	for _, testCase := range []struct {
  1130  		name        string
  1131  		runTest     func(node1, node2 *clusterNode, testMetrics *testMetrics)
  1132  		testMetrics *testMetrics
  1133  	}{
  1134  		{
  1135  			name: "EgressQueueOccupancy",
  1136  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1137  				assertBiDiCommunication(t, node1, node2, testReq)
  1138  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "transaction", "channel", testChannel},
  1139  					testMetrics.egressQueueLength.WithArgsForCall(0))
  1140  				assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(0))
  1141  				assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(0))
  1142  
  1143  				var messageReceived sync.WaitGroup
  1144  				messageReceived.Add(1)
  1145  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1146  					messageReceived.Done()
  1147  				}).Return(nil)
  1148  
  1149  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1150  				assert.NoError(t, err)
  1151  
  1152  				stream := assertEventualEstablishStream(t, rm)
  1153  				stream.Send(testConsensusReq)
  1154  				messageReceived.Wait()
  1155  
  1156  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "consensus", "channel", testChannel},
  1157  					testMetrics.egressQueueLength.WithArgsForCall(1))
  1158  				assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(1))
  1159  				assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(1))
  1160  			},
  1161  		},
  1162  		{
  1163  			name: "EgressStreamsCount",
  1164  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1165  				assertBiDiCommunication(t, node1, node2, testReq)
  1166  				assert.Equal(t, 1, testMetrics.egressStreamCount.SetCallCount())
  1167  				assert.Equal(t, 1, testMetrics.egressStreamCount.WithCallCount())
  1168  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1169  
  1170  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1171  				assert.Equal(t, 2, testMetrics.egressStreamCount.SetCallCount())
  1172  				assert.Equal(t, 2, testMetrics.egressStreamCount.WithCallCount())
  1173  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1174  			},
  1175  		},
  1176  		{
  1177  			name: "EgressTLSConnCount",
  1178  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1179  				assertBiDiCommunication(t, node1, node2, testReq)
  1180  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1181  
  1182  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1183  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1184  
  1185  				// A single TLS connection despite 2 streams
  1186  				assert.Equal(t, float64(1), testMetrics.egressTLSConnCount.SetArgsForCall(0))
  1187  				assert.Equal(t, 1, testMetrics.egressTLSConnCount.SetCallCount())
  1188  			},
  1189  		},
  1190  		{
  1191  			name: "EgressWorkerSize",
  1192  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1193  				assertBiDiCommunication(t, node1, node2, testReq)
  1194  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1195  
  1196  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1197  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1198  
  1199  				assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(0))
  1200  				assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(1))
  1201  			},
  1202  		},
  1203  		{
  1204  			name: "MgSendTime",
  1205  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1206  				assertBiDiCommunication(t, node1, node2, testReq)
  1207  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel},
  1208  					testMetrics.msgSendTime.WithArgsForCall(0))
  1209  
  1210  				assert.Equal(t, 1, testMetrics.msgSendTime.ObserveCallCount())
  1211  			},
  1212  		},
  1213  		{
  1214  			name: "MsgDropCount",
  1215  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1216  				blockRecv := make(chan struct{})
  1217  				wasReported := func() bool {
  1218  					select {
  1219  					case <-blockRecv:
  1220  						return true
  1221  					default:
  1222  						return false
  1223  					}
  1224  				}
  1225  				// When the drop count is reported, release the lock on the server side receive operation.
  1226  				testMetrics.msgDropCount.AddStub = func(float642 float64) {
  1227  					if !wasReported() {
  1228  						close(blockRecv)
  1229  					}
  1230  				}
  1231  
  1232  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1233  					// Block until the message drop is reported
  1234  					<-blockRecv
  1235  				}).Return(nil)
  1236  
  1237  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1238  				assert.NoError(t, err)
  1239  
  1240  				stream := assertEventualEstablishStream(t, rm)
  1241  				// Send too many messages while the server side is not reading from the stream
  1242  				for {
  1243  					stream.Send(testConsensusReq)
  1244  					if wasReported() {
  1245  						break
  1246  					}
  1247  				}
  1248  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel},
  1249  					testMetrics.msgDropCount.WithArgsForCall(0))
  1250  				assert.Equal(t, 1, testMetrics.msgDropCount.AddCallCount())
  1251  			},
  1252  		},
  1253  	} {
  1254  		testCase := testCase
  1255  		t.Run(testCase.name, func(t *testing.T) {
  1256  			fakeProvider := &mocks.MetricsProvider{}
  1257  			testCase.testMetrics = &testMetrics{
  1258  				fakeProvider: fakeProvider,
  1259  			}
  1260  
  1261  			testCase.testMetrics.initialize()
  1262  
  1263  			node1 := newTestNodeWithMetrics(t, fakeProvider, &testCase.testMetrics.egressTLSConnCount)
  1264  			defer node1.stop()
  1265  
  1266  			node2 := newTestNode(t)
  1267  			defer node2.stop()
  1268  
  1269  			configForNode1 := []cluster.RemoteNode{node2.nodeInfo}
  1270  			configForNode2 := []cluster.RemoteNode{node1.nodeInfo}
  1271  			node1.c.Configure(testChannel, configForNode1)
  1272  			node2.c.Configure(testChannel, configForNode2)
  1273  			node1.c.Configure(testChannel2, configForNode1)
  1274  			node2.c.Configure(testChannel2, configForNode2)
  1275  
  1276  			testCase.runTest(node1, node2, testCase.testMetrics)
  1277  		})
  1278  	}
  1279  }
  1280  
  1281  func TestCertExpirationWarningEgress(t *testing.T) {
  1282  	// Scenario: Ensures that when certificates are due to expire,
  1283  	// a warning is logged to the log.
  1284  
  1285  	node1 := newTestNode(t)
  1286  	node2 := newTestNode(t)
  1287  
  1288  	cert, err := x509.ParseCertificate(node2.nodeInfo.ServerTLSCert)
  1289  	assert.NoError(t, err)
  1290  	assert.NotNil(t, cert)
  1291  
  1292  	// Let the NotAfter time of the certificate be T1, the current time be T0.
  1293  	// So time.Until is (T1 - T0), which means we have (T1 - T0) time left.
  1294  	// We want to trigger a warning, so we set the warning threshold to be 20 seconds above
  1295  	// the time left, so the time left would be smaller than the threshold.
  1296  	node1.c.CertExpWarningThreshold = time.Until(cert.NotAfter) + time.Second*20
  1297  	// We only alert once in 3 seconds
  1298  	node1.c.MinimumExpirationWarningInterval = time.Second * 3
  1299  
  1300  	defer node1.stop()
  1301  	defer node2.stop()
  1302  
  1303  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
  1304  	node1.c.Configure(testChannel, config)
  1305  	node2.c.Configure(testChannel, config)
  1306  
  1307  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1308  	assert.NoError(t, err)
  1309  
  1310  	mockgRPC := &mocks.StepClient{}
  1311  	mockgRPC.On("Send", mock.Anything).Return(nil)
  1312  	mockgRPC.On("Context").Return(context.Background())
  1313  	mockClient := &mocks.ClusterClient{}
  1314  	mockClient.On("Step", mock.Anything).Return(mockgRPC, nil)
  1315  
  1316  	stub.Client = mockClient
  1317  
  1318  	stream := assertEventualEstablishStream(t, stub)
  1319  
  1320  	alerts := make(chan struct{}, 100)
  1321  
  1322  	stream.Logger = stream.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
  1323  		if strings.Contains(entry.Message, "expires in less than") {
  1324  			alerts <- struct{}{}
  1325  		}
  1326  		return nil
  1327  	}))
  1328  
  1329  	// Send a message to the node and expert an alert to be logged.
  1330  	stream.Send(wrapSubmitReq(testReq))
  1331  	select {
  1332  	case <-alerts:
  1333  	case <-time.After(time.Second * 5):
  1334  		t.Fatal("Should have logged an alert")
  1335  	}
  1336  	// Send another message, and ensure we don't log anything to the log, because the
  1337  	// alerts should be suppressed before the minimum interval timeout expires.
  1338  	stream.Send(wrapSubmitReq(testReq))
  1339  	select {
  1340  	case <-alerts:
  1341  		t.Fatal("Should not have logged an alert")
  1342  	case <-time.After(time.Millisecond * 500):
  1343  	}
  1344  	// Wait enough time for the alert interval to clear.
  1345  	time.Sleep(node1.c.MinimumExpirationWarningInterval + time.Second)
  1346  	// Send again a message, and this time it should be logged again.
  1347  	stream.Send(wrapSubmitReq(testReq))
  1348  	select {
  1349  	case <-alerts:
  1350  	case <-time.After(time.Second * 5):
  1351  		t.Fatal("Should have logged an alert")
  1352  	}
  1353  }
  1354  
  1355  func assertBiDiCommunicationForChannel(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest, channel string) {
  1356  	for _, tst := range []struct {
  1357  		label    string
  1358  		sender   *clusterNode
  1359  		receiver *clusterNode
  1360  		target   uint64
  1361  	}{
  1362  		{label: "1->2", sender: node1, target: node2.nodeInfo.ID, receiver: node2},
  1363  		{label: "2->1", sender: node2, target: node1.nodeInfo.ID, receiver: node1},
  1364  	} {
  1365  		t.Run(tst.label, func(t *testing.T) {
  1366  			stub, err := tst.sender.c.Remote(channel, tst.target)
  1367  			assert.NoError(t, err)
  1368  
  1369  			stream := assertEventualEstablishStream(t, stub)
  1370  
  1371  			var wg sync.WaitGroup
  1372  			wg.Add(1)
  1373  			tst.receiver.handler.On("OnSubmit", channel, tst.sender.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(args mock.Arguments) {
  1374  				req := args.Get(2).(*orderer.SubmitRequest)
  1375  				assert.True(t, proto.Equal(req, msgToSend))
  1376  				wg.Done()
  1377  			})
  1378  
  1379  			err = stream.Send(wrapSubmitReq(msgToSend))
  1380  			assert.NoError(t, err)
  1381  
  1382  			wg.Wait()
  1383  		})
  1384  	}
  1385  }
  1386  
  1387  func assertBiDiCommunication(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest) {
  1388  	assertBiDiCommunicationForChannel(t, node1, node2, msgToSend, testChannel)
  1389  }
  1390  
  1391  func assertEventualEstablishStream(t *testing.T, rpc *cluster.RemoteContext) *cluster.Stream {
  1392  	var res *cluster.Stream
  1393  	gt := gomega.NewGomegaWithT(t)
  1394  	gt.Eventually(func() error {
  1395  		stream, err := rpc.NewStream(time.Hour)
  1396  		res = stream
  1397  		return err
  1398  	}, timeout).Should(gomega.Succeed())
  1399  	return res
  1400  }
  1401  
  1402  func assertEventualSendMessage(t *testing.T, rpc *cluster.RemoteContext, req *orderer.SubmitRequest) orderer.Cluster_StepClient {
  1403  	var res orderer.Cluster_StepClient
  1404  	gt := gomega.NewGomegaWithT(t)
  1405  	gt.Eventually(func() error {
  1406  		stream, err := rpc.NewStream(time.Hour)
  1407  		if err != nil {
  1408  			return err
  1409  		}
  1410  		res = stream
  1411  		return stream.Send(wrapSubmitReq(req))
  1412  	}, timeout).Should(gomega.Succeed())
  1413  	return res
  1414  }
  1415  
  1416  func wrapSubmitReq(req *orderer.SubmitRequest) *orderer.StepRequest {
  1417  	return &orderer.StepRequest{
  1418  		Payload: &orderer.StepRequest_SubmitRequest{
  1419  			SubmitRequest: req,
  1420  		},
  1421  	}
  1422  }