github.com/yous1230/fabric@v2.0.0-beta.0.20191224111736-74345bee6ac2+incompatible/orderer/common/cluster/comm_test.go (about)

     1  /*
     2  Copyright IBM Corp. 2017 All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package cluster_test
     8  
     9  import (
    10  	"context"
    11  	"crypto/rand"
    12  	"crypto/x509"
    13  	"fmt"
    14  	"net"
    15  	"strings"
    16  	"sync"
    17  	"sync/atomic"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/golang/protobuf/proto"
    22  	"github.com/hyperledger/fabric-protos-go/common"
    23  	"github.com/hyperledger/fabric-protos-go/orderer"
    24  	"github.com/hyperledger/fabric/common/crypto/tlsgen"
    25  	"github.com/hyperledger/fabric/common/flogging"
    26  	"github.com/hyperledger/fabric/common/metrics"
    27  	"github.com/hyperledger/fabric/common/metrics/disabled"
    28  	"github.com/hyperledger/fabric/common/metrics/metricsfakes"
    29  	comm_utils "github.com/hyperledger/fabric/core/comm"
    30  	"github.com/hyperledger/fabric/orderer/common/cluster"
    31  	"github.com/hyperledger/fabric/orderer/common/cluster/mocks"
    32  	"github.com/onsi/gomega"
    33  	"github.com/pkg/errors"
    34  	"github.com/stretchr/testify/assert"
    35  	"github.com/stretchr/testify/mock"
    36  	"go.uber.org/zap"
    37  	"go.uber.org/zap/zapcore"
    38  	"google.golang.org/grpc"
    39  )
    40  
    41  const (
    42  	testChannel  = "test"
    43  	testChannel2 = "test2"
    44  	timeout      = time.Second * 10
    45  )
    46  
    47  var (
    48  	// CA that generates TLS key-pairs.
    49  	// We use only one CA because the authentication
    50  	// is based on TLS pinning
    51  	ca = createCAOrPanic()
    52  
    53  	lastNodeID uint64
    54  
    55  	testSubReq = &orderer.SubmitRequest{
    56  		Channel: "test",
    57  	}
    58  
    59  	testReq = &orderer.SubmitRequest{
    60  		Channel: "test",
    61  		Payload: &common.Envelope{
    62  			Payload: []byte("test"),
    63  		},
    64  	}
    65  
    66  	testReq2 = &orderer.SubmitRequest{
    67  		Channel: testChannel2,
    68  		Payload: &common.Envelope{
    69  			Payload: []byte(testChannel2),
    70  		},
    71  	}
    72  
    73  	testRes = &orderer.SubmitResponse{
    74  		Info: "test",
    75  	}
    76  
    77  	fooReq = wrapSubmitReq(&orderer.SubmitRequest{
    78  		Channel: "foo",
    79  	})
    80  
    81  	fooRes = &orderer.SubmitResponse{
    82  		Info: "foo",
    83  	}
    84  
    85  	barReq = wrapSubmitReq(&orderer.SubmitRequest{
    86  		Channel: "bar",
    87  	})
    88  
    89  	barRes = &orderer.SubmitResponse{
    90  		Info: "bar",
    91  	}
    92  
    93  	testConsensusReq = &orderer.StepRequest{
    94  		Payload: &orderer.StepRequest_ConsensusRequest{
    95  			ConsensusRequest: &orderer.ConsensusRequest{
    96  				Payload: []byte{1, 2, 3},
    97  				Channel: testChannel,
    98  			},
    99  		},
   100  	}
   101  
   102  	channelExtractor = &mockChannelExtractor{}
   103  )
   104  
   105  func nextUnusedID() uint64 {
   106  	return atomic.AddUint64(&lastNodeID, 1)
   107  }
   108  
   109  func createCAOrPanic() tlsgen.CA {
   110  	ca, err := tlsgen.NewCA()
   111  	if err != nil {
   112  		panic(fmt.Sprintf("failed creating CA: %+v", err))
   113  	}
   114  	return ca
   115  }
   116  
   117  type mockChannelExtractor struct{}
   118  
   119  func (*mockChannelExtractor) TargetChannel(msg proto.Message) string {
   120  	switch req := msg.(type) {
   121  	case *orderer.ConsensusRequest:
   122  		return req.Channel
   123  	case *orderer.SubmitRequest:
   124  		return req.Channel
   125  	default:
   126  		return ""
   127  	}
   128  }
   129  
   130  type clusterNode struct {
   131  	lock         sync.Mutex
   132  	frozen       bool
   133  	freezeCond   sync.Cond
   134  	dialer       *cluster.PredicateDialer
   135  	handler      *mocks.Handler
   136  	nodeInfo     cluster.RemoteNode
   137  	srv          *comm_utils.GRPCServer
   138  	bindAddress  string
   139  	clientConfig comm_utils.ClientConfig
   140  	serverConfig comm_utils.ServerConfig
   141  	c            *cluster.Comm
   142  }
   143  
   144  func (cn *clusterNode) Step(stream orderer.Cluster_StepServer) error {
   145  	cn.waitIfFrozen()
   146  	req, err := stream.Recv()
   147  	if err != nil {
   148  		return err
   149  	}
   150  	if submitReq := req.GetSubmitRequest(); submitReq != nil {
   151  		return cn.c.DispatchSubmit(stream.Context(), submitReq)
   152  	}
   153  	if err := cn.c.DispatchConsensus(stream.Context(), req.GetConsensusRequest()); err != nil {
   154  		return err
   155  	}
   156  	return stream.Send(&orderer.StepResponse{})
   157  }
   158  
   159  func (cn *clusterNode) waitIfFrozen() {
   160  	cn.lock.Lock()
   161  	// There is no freeze after an unfreeze so no need
   162  	// for a for loop.
   163  	if cn.frozen {
   164  		cn.freezeCond.Wait()
   165  		return
   166  	}
   167  	cn.lock.Unlock()
   168  }
   169  
   170  func (cn *clusterNode) freeze() {
   171  	cn.lock.Lock()
   172  	defer cn.lock.Unlock()
   173  	cn.frozen = true
   174  }
   175  
   176  func (cn *clusterNode) unfreeze() {
   177  	cn.lock.Lock()
   178  	cn.frozen = false
   179  	cn.lock.Unlock()
   180  	cn.freezeCond.Broadcast()
   181  }
   182  
   183  func (cn *clusterNode) resurrect() {
   184  	gRPCServer, err := comm_utils.NewGRPCServer(cn.bindAddress, cn.serverConfig)
   185  	if err != nil {
   186  		panic(fmt.Errorf("failed starting gRPC server: %v", err))
   187  	}
   188  	cn.srv = gRPCServer
   189  	orderer.RegisterClusterServer(gRPCServer.Server(), cn)
   190  	go cn.srv.Start()
   191  }
   192  
   193  func (cn *clusterNode) stop() {
   194  	cn.srv.Stop()
   195  	cn.c.Shutdown()
   196  }
   197  
   198  func (cn *clusterNode) renewCertificates() {
   199  	clientKeyPair, err := ca.NewClientCertKeyPair()
   200  	if err != nil {
   201  		panic(fmt.Errorf("failed creating client certificate %v", err))
   202  	}
   203  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   204  	if err != nil {
   205  		panic(fmt.Errorf("failed creating server certificate %v", err))
   206  	}
   207  
   208  	cn.nodeInfo.ClientTLSCert = clientKeyPair.TLSCert.Raw
   209  	cn.nodeInfo.ServerTLSCert = serverKeyPair.TLSCert.Raw
   210  
   211  	cn.serverConfig.SecOpts.Certificate = serverKeyPair.Cert
   212  	cn.serverConfig.SecOpts.Key = serverKeyPair.Key
   213  
   214  	cn.dialer.Config.SecOpts.Key = clientKeyPair.Key
   215  	cn.dialer.Config.SecOpts.Certificate = clientKeyPair.Cert
   216  }
   217  
   218  func newTestNodeWithMetrics(t *testing.T, metrics cluster.MetricsProvider, tlsConnGauge metrics.Gauge) *clusterNode {
   219  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   220  	assert.NoError(t, err)
   221  
   222  	clientKeyPair, _ := ca.NewClientCertKeyPair()
   223  
   224  	handler := &mocks.Handler{}
   225  	clientConfig := comm_utils.ClientConfig{
   226  		AsyncConnect: true,
   227  		Timeout:      time.Hour,
   228  		SecOpts: comm_utils.SecureOptions{
   229  			RequireClientCert: true,
   230  			Key:               clientKeyPair.Key,
   231  			Certificate:       clientKeyPair.Cert,
   232  			ServerRootCAs:     [][]byte{ca.CertBytes()},
   233  			UseTLS:            true,
   234  			ClientRootCAs:     [][]byte{ca.CertBytes()},
   235  		},
   236  	}
   237  
   238  	dialer := &cluster.PredicateDialer{
   239  		Config: clientConfig,
   240  	}
   241  
   242  	srvConfig := comm_utils.ServerConfig{
   243  		SecOpts: comm_utils.SecureOptions{
   244  			Key:         serverKeyPair.Key,
   245  			Certificate: serverKeyPair.Cert,
   246  			UseTLS:      true,
   247  		},
   248  	}
   249  	gRPCServer, err := comm_utils.NewGRPCServer("127.0.0.1:", srvConfig)
   250  	assert.NoError(t, err)
   251  
   252  	tstSrv := &clusterNode{
   253  		dialer:       dialer,
   254  		clientConfig: clientConfig,
   255  		serverConfig: srvConfig,
   256  		bindAddress:  gRPCServer.Address(),
   257  		handler:      handler,
   258  		nodeInfo: cluster.RemoteNode{
   259  			Endpoint:      gRPCServer.Address(),
   260  			ID:            nextUnusedID(),
   261  			ServerTLSCert: serverKeyPair.TLSCert.Raw,
   262  			ClientTLSCert: clientKeyPair.TLSCert.Raw,
   263  		},
   264  		srv: gRPCServer,
   265  	}
   266  
   267  	tstSrv.freezeCond.L = &tstSrv.lock
   268  
   269  	tstSrv.c = &cluster.Comm{
   270  		CertExpWarningThreshold: time.Hour,
   271  		SendBufferSize:          1,
   272  		Logger:                  flogging.MustGetLogger("test"),
   273  		Chan2Members:            make(cluster.MembersByChannel),
   274  		H:                       handler,
   275  		ChanExt:                 channelExtractor,
   276  		Connections:             cluster.NewConnectionStore(dialer, tlsConnGauge),
   277  		Metrics:                 cluster.NewMetrics(metrics),
   278  	}
   279  
   280  	orderer.RegisterClusterServer(gRPCServer.Server(), tstSrv)
   281  	go gRPCServer.Start()
   282  	return tstSrv
   283  }
   284  
   285  func newTestNode(t *testing.T) *clusterNode {
   286  	return newTestNodeWithMetrics(t, &disabled.Provider{}, &disabled.Gauge{})
   287  }
   288  
   289  func TestSendBigMessage(t *testing.T) {
   290  	t.Parallel()
   291  
   292  	// Scenario: Basic test that spawns 5 nodes and sends a big message
   293  	// from one of the nodes to the others.
   294  	// A receiver node's Step() server side method (which calls Recv)
   295  	// is frozen until the sender's node Send method returns,
   296  	// Hence - the sender node finishes calling Send
   297  	// before a receiver node starts calling Recv.
   298  	// This ensures that Send is non blocking even with big messages.
   299  	// In the test, we send a total of 8MB of random data (2MB to each node).
   300  	// The randomness is used so gRPC compression won't compress it to a lower size.
   301  
   302  	node1 := newTestNode(t)
   303  	node2 := newTestNode(t)
   304  	node3 := newTestNode(t)
   305  	node4 := newTestNode(t)
   306  	node5 := newTestNode(t)
   307  
   308  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   309  		node.c.SendBufferSize = 1
   310  	}
   311  
   312  	defer node1.stop()
   313  	defer node2.stop()
   314  	defer node3.stop()
   315  	defer node4.stop()
   316  	defer node5.stop()
   317  
   318  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo, node3.nodeInfo, node4.nodeInfo, node5.nodeInfo}
   319  	node1.c.Configure(testChannel, config)
   320  	node2.c.Configure(testChannel, config)
   321  	node3.c.Configure(testChannel, config)
   322  	node4.c.Configure(testChannel, config)
   323  	node5.c.Configure(testChannel, config)
   324  
   325  	var messageReceived sync.WaitGroup
   326  	messageReceived.Add(4)
   327  
   328  	msgSize := 1024 * 1024 * 2
   329  	bigMsg := &orderer.ConsensusRequest{
   330  		Channel: testChannel,
   331  		Payload: make([]byte, msgSize),
   332  	}
   333  
   334  	_, err := rand.Read(bigMsg.Payload)
   335  	assert.NoError(t, err)
   336  
   337  	wrappedMsg := &orderer.StepRequest{
   338  		Payload: &orderer.StepRequest_ConsensusRequest{
   339  			ConsensusRequest: bigMsg,
   340  		},
   341  	}
   342  
   343  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   344  		node.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
   345  			msg := args.Get(2).(*orderer.ConsensusRequest)
   346  			assert.Len(t, msg.Payload, msgSize)
   347  			messageReceived.Done()
   348  		}).Return(nil)
   349  	}
   350  
   351  	streams := map[uint64]*cluster.Stream{}
   352  
   353  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   354  		// Freeze the node, in order to block its Recv
   355  		node.freeze()
   356  	}
   357  
   358  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   359  		rm, err := node1.c.Remote(testChannel, node.nodeInfo.ID)
   360  		assert.NoError(t, err)
   361  
   362  		stream := assertEventualEstablishStream(t, rm)
   363  		streams[node.nodeInfo.ID] = stream
   364  	}
   365  
   366  	t0 := time.Now()
   367  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   368  		stream := streams[node.nodeInfo.ID]
   369  
   370  		t1 := time.Now()
   371  		err = stream.Send(wrappedMsg)
   372  		assert.NoError(t, err)
   373  		t.Log("Sending took", time.Since(t1))
   374  		t1 = time.Now()
   375  
   376  		// Unfreeze the node. It can now call Recv, and signal the messageReceived waitGroup.
   377  		node.unfreeze()
   378  	}
   379  
   380  	t.Log("Total sending time to all 4 nodes took:", time.Since(t0))
   381  
   382  	messageReceived.Wait()
   383  }
   384  
   385  func TestBlockingSend(t *testing.T) {
   386  	t.Parallel()
   387  	// Scenario: Basic test that spawns 2 nodes and sends from the first node
   388  	// to the second node, three SubmitRequests, or three consensus requests.
   389  	// SubmitRequests should block, but consensus requests should not.
   390  
   391  	for _, testCase := range []struct {
   392  		description        string
   393  		messageToSend      *orderer.StepRequest
   394  		streamUnblocks     bool
   395  		elapsedGreaterThan time.Duration
   396  		overflowErr        string
   397  	}{
   398  		{
   399  			description:        "SubmitRequest",
   400  			messageToSend:      wrapSubmitReq(testReq),
   401  			streamUnblocks:     true,
   402  			elapsedGreaterThan: time.Second / 2,
   403  		},
   404  		{
   405  			description:   "ConsensusRequest",
   406  			messageToSend: testConsensusReq,
   407  			overflowErr:   "send queue overflown",
   408  		},
   409  	} {
   410  		t.Run(testCase.description, func(t *testing.T) {
   411  			node1 := newTestNode(t)
   412  			node2 := newTestNode(t)
   413  
   414  			node1.c.SendBufferSize = 1
   415  			node2.c.SendBufferSize = 1
   416  
   417  			defer node1.stop()
   418  			defer node2.stop()
   419  
   420  			config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   421  			node1.c.Configure(testChannel, config)
   422  			node2.c.Configure(testChannel, config)
   423  
   424  			rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   425  			assert.NoError(t, err)
   426  
   427  			client := &mocks.ClusterClient{}
   428  			fakeStream := &mocks.StepClient{}
   429  
   430  			// Replace real client with a mock client
   431  			rm.Client = client
   432  			rm.ProbeConn = func(_ *grpc.ClientConn) error {
   433  				return nil
   434  			}
   435  			// Configure client to return the mock stream
   436  			fakeStream.On("Context", mock.Anything).Return(context.Background())
   437  			client.On("Step", mock.Anything).Return(fakeStream, nil).Once()
   438  
   439  			unBlock := make(chan struct{})
   440  			var sendInvoked sync.WaitGroup
   441  			sendInvoked.Add(1)
   442  			var once sync.Once
   443  			fakeStream.On("Send", mock.Anything).Run(func(_ mock.Arguments) {
   444  				once.Do(sendInvoked.Done)
   445  				<-unBlock
   446  			}).Return(errors.New("oops"))
   447  
   448  			stream, err := rm.NewStream(time.Hour)
   449  			assert.NoError(t, err)
   450  
   451  			// The first send doesn't block, even though the Send operation blocks.
   452  			err = stream.Send(testCase.messageToSend)
   453  			assert.NoError(t, err)
   454  
   455  			// The second once doesn't either.
   456  			// After this point, we have 1 goroutine which is blocked on Send(),
   457  			// and one message in the buffer.
   458  			sendInvoked.Wait()
   459  			err = stream.Send(testCase.messageToSend)
   460  			assert.NoError(t, err)
   461  
   462  			// The third blocks, so we need to unblock it ourselves
   463  			// in order for it to go through, unless the operation
   464  			// is non blocking.
   465  			go func() {
   466  				time.Sleep(time.Second)
   467  				if testCase.streamUnblocks {
   468  					close(unBlock)
   469  				}
   470  			}()
   471  
   472  			t1 := time.Now()
   473  			err = stream.Send(testCase.messageToSend)
   474  			// The third send always overflows or blocks.
   475  			// If we expect to receive an overflow error - assert it.
   476  			if testCase.overflowErr != "" {
   477  				assert.EqualError(t, err, testCase.overflowErr)
   478  			}
   479  			elapsed := time.Since(t1)
   480  			t.Log("Elapsed time:", elapsed)
   481  			assert.True(t, elapsed > testCase.elapsedGreaterThan)
   482  
   483  			if !testCase.streamUnblocks {
   484  				close(unBlock)
   485  			}
   486  		})
   487  	}
   488  }
   489  
   490  func TestBasic(t *testing.T) {
   491  	t.Parallel()
   492  	// Scenario: Basic test that spawns 2 nodes and sends each other
   493  	// messages that are expected to be echoed back
   494  
   495  	node1 := newTestNode(t)
   496  	node2 := newTestNode(t)
   497  
   498  	defer node1.stop()
   499  	defer node2.stop()
   500  
   501  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   502  	node1.c.Configure(testChannel, config)
   503  	node2.c.Configure(testChannel, config)
   504  
   505  	assertBiDiCommunication(t, node1, node2, testReq)
   506  }
   507  
   508  func TestUnavailableHosts(t *testing.T) {
   509  	t.Parallel()
   510  	// Scenario: A node is configured to connect
   511  	// to a host that is down
   512  	node1 := newTestNode(t)
   513  
   514  	clientConfig := node1.dialer.Config
   515  	// The below timeout makes sure that connection establishment is done
   516  	// asynchronously. Had it been synchronous, the Remote() call would be
   517  	// blocked for an hour.
   518  	clientConfig.Timeout = time.Hour
   519  	defer node1.stop()
   520  
   521  	node2 := newTestNode(t)
   522  	node2.stop()
   523  
   524  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   525  	remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   526  	assert.NoError(t, err)
   527  	assert.NotNil(t, remote)
   528  
   529  	_, err = remote.NewStream(time.Millisecond * 100)
   530  	assert.Contains(t, err.Error(), "connection")
   531  }
   532  
   533  func TestStreamAbort(t *testing.T) {
   534  	t.Parallel()
   535  
   536  	// Scenarios: node 1 is connected to node 2 in 2 channels,
   537  	// and the consumer of the communication calls receive.
   538  	// The two sub-scenarios happen:
   539  	// 1) The server certificate of node 2 changes in the first channel
   540  	// 2) Node 2 is evicted from the membership of the first channel
   541  	// In both of the scenarios, the Recv() call should be aborted
   542  
   543  	node2 := newTestNode(t)
   544  	defer node2.stop()
   545  
   546  	invalidNodeInfo := cluster.RemoteNode{
   547  		ID:            node2.nodeInfo.ID,
   548  		ServerTLSCert: []byte{1, 2, 3},
   549  		ClientTLSCert: []byte{1, 2, 3},
   550  	}
   551  
   552  	for _, tst := range []struct {
   553  		testName      string
   554  		membership    []cluster.RemoteNode
   555  		expectedError string
   556  	}{
   557  		{
   558  			testName:      "Evicted from membership",
   559  			membership:    nil,
   560  			expectedError: "rpc error: code = Canceled desc = context canceled",
   561  		},
   562  		{
   563  			testName:      "Changed TLS certificate",
   564  			membership:    []cluster.RemoteNode{invalidNodeInfo},
   565  			expectedError: "rpc error: code = Canceled desc = context canceled",
   566  		},
   567  	} {
   568  		t.Run(tst.testName, func(t *testing.T) {
   569  			testStreamAbort(t, node2, tst.membership, tst.expectedError)
   570  		})
   571  	}
   572  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
   573  }
   574  
   575  func testStreamAbort(t *testing.T, node2 *clusterNode, newMembership []cluster.RemoteNode, expectedError string) {
   576  	node1 := newTestNode(t)
   577  	defer node1.stop()
   578  
   579  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   580  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   581  	node1.c.Configure(testChannel2, []cluster.RemoteNode{node2.nodeInfo})
   582  	node2.c.Configure(testChannel2, []cluster.RemoteNode{node1.nodeInfo})
   583  
   584  	var streamCreated sync.WaitGroup
   585  	streamCreated.Add(1)
   586  
   587  	stopChan := make(chan struct{})
   588  
   589  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Once().Run(func(_ mock.Arguments) {
   590  		// Notify the stream was created
   591  		streamCreated.Done()
   592  		// Wait for the test to finish
   593  		<-stopChan
   594  	}).Return(nil).Once()
   595  
   596  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   597  	assert.NoError(t, err)
   598  
   599  	go func() {
   600  		stream := assertEventualEstablishStream(t, rm1)
   601  		// Signal the reconfiguration
   602  		err = stream.Send(wrapSubmitReq(testReq))
   603  		assert.NoError(t, err)
   604  		_, err := stream.Recv()
   605  		assert.Contains(t, err.Error(), expectedError)
   606  		close(stopChan)
   607  	}()
   608  
   609  	go func() {
   610  		// Wait for the stream reference to be obtained
   611  		streamCreated.Wait()
   612  		// Reconfigure the channel membership
   613  		node1.c.Configure(testChannel, newMembership)
   614  	}()
   615  
   616  	<-stopChan
   617  }
   618  
   619  func TestDoubleReconfigure(t *testing.T) {
   620  	t.Parallel()
   621  	// Scenario: Basic test that spawns 2 nodes
   622  	// and configures node 1 twice, and checks that
   623  	// the remote stub for node 1 wasn't re-created in the second
   624  	// configuration since it already existed
   625  
   626  	node1 := newTestNode(t)
   627  	node2 := newTestNode(t)
   628  
   629  	defer node1.stop()
   630  	defer node2.stop()
   631  
   632  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   633  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   634  	assert.NoError(t, err)
   635  
   636  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   637  	rm2, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   638  	assert.NoError(t, err)
   639  	// Ensure the references are equal
   640  	assert.True(t, rm1 == rm2)
   641  }
   642  
   643  func TestInvalidChannel(t *testing.T) {
   644  	t.Parallel()
   645  	// Scenario: node 1 it ordered to send a message on a channel
   646  	// that doesn't exist, and also receives a message, but
   647  	// the channel cannot be extracted from the message.
   648  
   649  	t.Run("channel doesn't exist", func(t *testing.T) {
   650  		t.Parallel()
   651  		node1 := newTestNode(t)
   652  		defer node1.stop()
   653  
   654  		_, err := node1.c.Remote(testChannel, 0)
   655  		assert.EqualError(t, err, "channel test doesn't exist")
   656  	})
   657  
   658  	t.Run("channel cannot be extracted", func(t *testing.T) {
   659  		t.Parallel()
   660  		node1 := newTestNode(t)
   661  		defer node1.stop()
   662  
   663  		node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   664  		gt := gomega.NewGomegaWithT(t)
   665  		gt.Eventually(func() (bool, error) {
   666  			_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   667  			return true, err
   668  		}, time.Minute).Should(gomega.BeTrue())
   669  
   670  		stub, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   671  		assert.NoError(t, err)
   672  
   673  		stream := assertEventualEstablishStream(t, stub)
   674  
   675  		// An empty SubmitRequest has an empty channel which is invalid
   676  		err = stream.Send(wrapSubmitReq(&orderer.SubmitRequest{}))
   677  		assert.NoError(t, err)
   678  
   679  		_, err = stream.Recv()
   680  		assert.EqualError(t, err, "rpc error: code = Unknown desc = badly formatted message, cannot extract channel")
   681  
   682  		// Test directly without going through the gRPC stream
   683  		err = node1.c.DispatchSubmit(context.Background(), &orderer.SubmitRequest{})
   684  		assert.EqualError(t, err, "badly formatted message, cannot extract channel")
   685  	})
   686  }
   687  
   688  func TestAbortRPC(t *testing.T) {
   689  	t.Parallel()
   690  	// Scenarios:
   691  	// (I) The node calls an RPC, and calls Abort() on the remote context
   692  	//  in parallel. The RPC should return even though the server-side call hasn't finished.
   693  	// (II) The node calls an RPC, but the server-side processing takes too long,
   694  	// and the RPC invocation returns prematurely.
   695  
   696  	testCases := []struct {
   697  		name        string
   698  		abortFunc   func(*cluster.RemoteContext)
   699  		rpcTimeout  time.Duration
   700  		expectedErr string
   701  	}{
   702  		{
   703  			name:        "Abort() called",
   704  			expectedErr: "rpc error: code = Canceled desc = context canceled",
   705  			rpcTimeout:  time.Hour,
   706  			abortFunc: func(rc *cluster.RemoteContext) {
   707  				rc.Abort()
   708  			},
   709  		},
   710  		{
   711  			name:        "RPC timeout",
   712  			expectedErr: "rpc timeout expired",
   713  			rpcTimeout:  time.Second,
   714  			abortFunc:   func(*cluster.RemoteContext) {},
   715  		},
   716  	}
   717  
   718  	for _, testCase := range testCases {
   719  		testCase := testCase
   720  		t.Run(testCase.name, func(t *testing.T) {
   721  			testAbort(t, testCase.abortFunc, testCase.rpcTimeout, testCase.expectedErr)
   722  		})
   723  	}
   724  }
   725  
   726  func testAbort(t *testing.T, abortFunc func(*cluster.RemoteContext), rpcTimeout time.Duration, expectedErr string) {
   727  	node1 := newTestNode(t)
   728  	defer node1.stop()
   729  
   730  	node2 := newTestNode(t)
   731  	defer node2.stop()
   732  
   733  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   734  	node1.c.Configure(testChannel, config)
   735  	node2.c.Configure(testChannel, config)
   736  	var onStepCalled sync.WaitGroup
   737  	onStepCalled.Add(1)
   738  
   739  	// stuckCall ensures the OnStep() call is stuck throughout this test
   740  	var stuckCall sync.WaitGroup
   741  	stuckCall.Add(1)
   742  	// At the end of the test, release the server-side resources
   743  	defer stuckCall.Done()
   744  
   745  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(_ mock.Arguments) {
   746  		onStepCalled.Done()
   747  		stuckCall.Wait()
   748  	}).Once()
   749  
   750  	rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   751  	assert.NoError(t, err)
   752  
   753  	go func() {
   754  		onStepCalled.Wait()
   755  		abortFunc(rm)
   756  	}()
   757  
   758  	var stream *cluster.Stream
   759  	gt := gomega.NewGomegaWithT(t)
   760  	gt.Eventually(func() error {
   761  		stream, err = rm.NewStream(rpcTimeout)
   762  		return err
   763  	}, time.Second*10, time.Millisecond*10).Should(gomega.Succeed())
   764  
   765  	stream.Send(wrapSubmitReq(testSubReq))
   766  	_, err = stream.Recv()
   767  
   768  	assert.EqualError(t, err, expectedErr)
   769  
   770  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
   771  }
   772  
   773  func TestNoTLSCertificate(t *testing.T) {
   774  	t.Parallel()
   775  	// Scenario: The node is sent a message by another node that doesn't
   776  	// connect with mutual TLS, thus doesn't provide a TLS certificate
   777  	node1 := newTestNode(t)
   778  	defer node1.stop()
   779  
   780  	node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   781  
   782  	clientConfig := comm_utils.ClientConfig{
   783  		AsyncConnect: true,
   784  		Timeout:      time.Millisecond * 100,
   785  		SecOpts: comm_utils.SecureOptions{
   786  			ServerRootCAs: [][]byte{ca.CertBytes()},
   787  			UseTLS:        true,
   788  		},
   789  	}
   790  	cl, err := comm_utils.NewGRPCClient(clientConfig)
   791  	assert.NoError(t, err)
   792  
   793  	var conn *grpc.ClientConn
   794  	gt := gomega.NewGomegaWithT(t)
   795  	gt.Eventually(func() (bool, error) {
   796  		conn, err = cl.NewConnection(node1.srv.Address())
   797  		return true, err
   798  	}, time.Minute).Should(gomega.BeTrue())
   799  
   800  	echoClient := orderer.NewClusterClient(conn)
   801  	stream, err := echoClient.Step(context.Background())
   802  	assert.NoError(t, err)
   803  
   804  	err = stream.Send(wrapSubmitReq(testSubReq))
   805  	assert.NoError(t, err)
   806  	_, err = stream.Recv()
   807  	assert.EqualError(t, err, "rpc error: code = Unknown desc = no TLS certificate sent")
   808  }
   809  
   810  func TestReconnect(t *testing.T) {
   811  	t.Parallel()
   812  	// Scenario: node 1 and node 2 are connected,
   813  	// and node 2 is taken offline.
   814  	// Node 1 tries to send a message to node 2 but fails,
   815  	// and afterwards node 2 is brought back, after which
   816  	// node 1 sends more messages, and it should succeed
   817  	// sending a message to node 2 eventually.
   818  
   819  	node1 := newTestNode(t)
   820  	defer node1.stop()
   821  	conf := node1.dialer.Config
   822  	conf.Timeout = time.Hour
   823  
   824  	node2 := newTestNode(t)
   825  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil)
   826  	defer node2.stop()
   827  
   828  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   829  	node1.c.Configure(testChannel, config)
   830  	node2.c.Configure(testChannel, config)
   831  
   832  	// Make node 2 be offline by shutting down its gRPC service
   833  	node2.srv.Stop()
   834  	// Obtain the stub for node 2.
   835  	// Should succeed, because the connection was created at time of configuration
   836  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   837  	assert.NoError(t, err)
   838  
   839  	// Try to obtain a stream. Should not Succeed.
   840  	gt := gomega.NewGomegaWithT(t)
   841  	gt.Eventually(func() error {
   842  		_, err = stub.NewStream(time.Hour)
   843  		return err
   844  	}).Should(gomega.Not(gomega.Succeed()))
   845  
   846  	// Wait for the port to be released
   847  	for {
   848  		lsnr, err := net.Listen("tcp", node2.nodeInfo.Endpoint)
   849  		if err == nil {
   850  			lsnr.Close()
   851  			break
   852  		}
   853  	}
   854  
   855  	// Resurrect node 2
   856  	node2.resurrect()
   857  	// Send a message from node 1 to node 2.
   858  	// Should succeed eventually
   859  	assertEventualSendMessage(t, stub, testReq)
   860  }
   861  
   862  func TestRenewCertificates(t *testing.T) {
   863  	t.Parallel()
   864  	// Scenario: node 1 and node 2 are connected,
   865  	// and the certificates are renewed for both nodes
   866  	// at the same time.
   867  	// They are expected to connect to one another
   868  	// after the reconfiguration.
   869  
   870  	node1 := newTestNode(t)
   871  	defer node1.stop()
   872  
   873  	node2 := newTestNode(t)
   874  	defer node2.stop()
   875  
   876  	node1.handler.On("OnStep", testChannel, node2.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   877  	node2.handler.On("OnStep", testChannel, node1.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   878  
   879  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   880  	node1.c.Configure(testChannel, config)
   881  	node2.c.Configure(testChannel, config)
   882  
   883  	assertBiDiCommunication(t, node1, node2, testReq)
   884  
   885  	// Now, renew certificates both both nodes
   886  	node1.renewCertificates()
   887  	node2.renewCertificates()
   888  
   889  	// Reconfigure them
   890  	config = []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   891  	node1.c.Configure(testChannel, config)
   892  	node2.c.Configure(testChannel, config)
   893  
   894  	// W.L.O.G, try to send a message from node1 to node2
   895  	// It should fail, because node2's server certificate has now changed,
   896  	// so it closed the connection to the remote node
   897  	info2 := node2.nodeInfo
   898  	remote, err := node1.c.Remote(testChannel, info2.ID)
   899  	assert.NoError(t, err)
   900  	assert.NotNil(t, remote)
   901  
   902  	gt := gomega.NewGomegaWithT(t)
   903  	gt.Eventually(func() string {
   904  		_, err = remote.NewStream(time.Hour)
   905  		return err.Error()
   906  	}, timeout).Should(gomega.ContainSubstring(info2.Endpoint))
   907  
   908  	// Restart the gRPC service on both nodes, to load the new TLS certificates
   909  	node1.srv.Stop()
   910  	node1.resurrect()
   911  	node2.srv.Stop()
   912  	node2.resurrect()
   913  
   914  	// Finally, check that the nodes can communicate once again
   915  	assertBiDiCommunication(t, node1, node2, testReq)
   916  }
   917  
   918  func TestMembershipReconfiguration(t *testing.T) {
   919  	t.Parallel()
   920  	// Scenario: node 1 and node 2 are started up
   921  	// and node 2 is configured to know about node 1,
   922  	// without node1 knowing about node 2.
   923  	// The communication between them should only work
   924  	// after node 1 is configured to know about node 2.
   925  
   926  	node1 := newTestNode(t)
   927  	defer node1.stop()
   928  
   929  	node2 := newTestNode(t)
   930  	defer node2.stop()
   931  
   932  	node1.c.Configure(testChannel, []cluster.RemoteNode{})
   933  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   934  
   935  	// Node 1 can't connect to node 2 because it doesn't know its TLS certificate yet
   936  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   937  	assert.EqualError(t, err, fmt.Sprintf("node %d doesn't exist in channel test's membership", node2.nodeInfo.ID))
   938  	// Node 2 can connect to node 1, but it can't send it messages because node 1 doesn't know node 2 yet.
   939  
   940  	gt := gomega.NewGomegaWithT(t)
   941  	gt.Eventually(func() (bool, error) {
   942  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   943  		return true, err
   944  	}, time.Minute).Should(gomega.BeTrue())
   945  
   946  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   947  
   948  	stream := assertEventualEstablishStream(t, stub)
   949  	err = stream.Send(wrapSubmitReq(testSubReq))
   950  	assert.NoError(t, err)
   951  
   952  	_, err = stream.Recv()
   953  	assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
   954  
   955  	// Next, configure node 1 to know about node 2
   956  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   957  
   958  	// Check that the communication works correctly between both nodes
   959  	assertBiDiCommunication(t, node1, node2, testReq)
   960  	assertBiDiCommunication(t, node2, node1, testReq)
   961  
   962  	// Reconfigure node 2 to forget about node 1
   963  	node2.c.Configure(testChannel, []cluster.RemoteNode{})
   964  	// Node 1 can still connect to node 2
   965  	stub, err = node1.c.Remote(testChannel, node2.nodeInfo.ID)
   966  	assert.NoError(t, err)
   967  	// But can't send a message because node 2 now doesn't authorized node 1
   968  	stream = assertEventualEstablishStream(t, stub)
   969  	stream.Send(wrapSubmitReq(testSubReq))
   970  	_, err = stream.Recv()
   971  	assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
   972  }
   973  
   974  func TestShutdown(t *testing.T) {
   975  	t.Parallel()
   976  	// Scenario: node 1 is shut down and as a result, can't
   977  	// send messages to anyone, nor can it be reconfigured
   978  
   979  	node1 := newTestNode(t)
   980  	defer node1.stop()
   981  
   982  	node1.c.Shutdown()
   983  
   984  	// Obtaining a RemoteContext cannot succeed because shutdown was called before
   985  	_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   986  	assert.EqualError(t, err, "communication has been shut down")
   987  
   988  	node2 := newTestNode(t)
   989  	defer node2.stop()
   990  
   991  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   992  	// Configuration of node doesn't take place
   993  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   994  
   995  	gt := gomega.NewGomegaWithT(t)
   996  	gt.Eventually(func() error {
   997  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   998  		return err
   999  	}, time.Minute).Should(gomega.Succeed())
  1000  
  1001  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
  1002  
  1003  	// Therefore, sending a message doesn't succeed because node 1 rejected the configuration change
  1004  	gt.Eventually(func() string {
  1005  		stream, err := stub.NewStream(time.Hour)
  1006  		if err != nil {
  1007  			return err.Error()
  1008  		}
  1009  		err = stream.Send(wrapSubmitReq(testSubReq))
  1010  		assert.NoError(t, err)
  1011  
  1012  		_, err = stream.Recv()
  1013  		return err.Error()
  1014  	}, timeout).Should(gomega.ContainSubstring("channel test doesn't exist"))
  1015  }
  1016  
  1017  func TestMultiChannelConfig(t *testing.T) {
  1018  	t.Parallel()
  1019  	// Scenario: node 1 is knows node 2 only in channel "foo"
  1020  	// and knows node 3 only in channel "bar".
  1021  	// Messages that are received, are routed according to their corresponding channels
  1022  	// and when node 2 sends a message for channel "bar" to node 1, it is rejected.
  1023  	// Same thing applies for node 3 that sends a message to node 1 in channel "foo".
  1024  
  1025  	node1 := newTestNode(t)
  1026  	defer node1.stop()
  1027  
  1028  	node2 := newTestNode(t)
  1029  	defer node2.stop()
  1030  
  1031  	node3 := newTestNode(t)
  1032  	defer node3.stop()
  1033  
  1034  	node1.c.Configure("foo", []cluster.RemoteNode{node2.nodeInfo})
  1035  	node1.c.Configure("bar", []cluster.RemoteNode{node3.nodeInfo})
  1036  	node2.c.Configure("foo", []cluster.RemoteNode{node1.nodeInfo})
  1037  	node3.c.Configure("bar", []cluster.RemoteNode{node1.nodeInfo})
  1038  
  1039  	t.Run("Correct channel", func(t *testing.T) {
  1040  		var fromNode2 sync.WaitGroup
  1041  		fromNode2.Add(1)
  1042  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1043  			fromNode2.Done()
  1044  		}).Once()
  1045  
  1046  		var fromNode3 sync.WaitGroup
  1047  		fromNode3.Add(1)
  1048  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1049  			fromNode3.Done()
  1050  		}).Once()
  1051  
  1052  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1053  		assert.NoError(t, err)
  1054  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1055  		assert.NoError(t, err)
  1056  
  1057  		stream := assertEventualEstablishStream(t, node2toNode1)
  1058  		stream.Send(fooReq)
  1059  
  1060  		fromNode2.Wait()
  1061  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
  1062  
  1063  		stream = assertEventualEstablishStream(t, node3toNode1)
  1064  		stream.Send(barReq)
  1065  
  1066  		fromNode3.Wait()
  1067  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
  1068  	})
  1069  
  1070  	t.Run("Incorrect channel", func(t *testing.T) {
  1071  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil)
  1072  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil)
  1073  
  1074  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1075  		assert.NoError(t, err)
  1076  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1077  		assert.NoError(t, err)
  1078  
  1079  		assertEventualSendMessage(t, node2toNode1, &orderer.SubmitRequest{Channel: "foo"})
  1080  		stream, err := node2toNode1.NewStream(time.Hour)
  1081  		err = stream.Send(barReq)
  1082  		assert.NoError(t, err)
  1083  		_, err = stream.Recv()
  1084  		assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1085  
  1086  		assertEventualSendMessage(t, node3toNode1, &orderer.SubmitRequest{Channel: "bar"})
  1087  		stream, err = node3toNode1.NewStream(time.Hour)
  1088  		err = stream.Send(fooReq)
  1089  		assert.NoError(t, err)
  1090  		_, err = stream.Recv()
  1091  		assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1092  	})
  1093  }
  1094  
  1095  func TestConnectionFailure(t *testing.T) {
  1096  	t.Parallel()
  1097  	// Scenario: node 1 fails to connect to node 2.
  1098  
  1099  	node1 := newTestNode(t)
  1100  	defer node1.stop()
  1101  
  1102  	node2 := newTestNode(t)
  1103  	defer node2.stop()
  1104  
  1105  	dialer := &mocks.SecureDialer{}
  1106  	dialer.On("Dial", mock.Anything, mock.Anything).Return(nil, errors.New("oops"))
  1107  	node1.c.Connections = cluster.NewConnectionStore(dialer, &disabled.Gauge{})
  1108  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
  1109  
  1110  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1111  	assert.EqualError(t, err, "oops")
  1112  }
  1113  
  1114  type testMetrics struct {
  1115  	fakeProvider        *mocks.MetricsProvider
  1116  	egressQueueLength   metricsfakes.Gauge
  1117  	egressQueueCapacity metricsfakes.Gauge
  1118  	egressStreamCount   metricsfakes.Gauge
  1119  	egressTLSConnCount  metricsfakes.Gauge
  1120  	egressWorkerSize    metricsfakes.Gauge
  1121  	ingressStreamsCount metricsfakes.Gauge
  1122  	msgSendTime         metricsfakes.Histogram
  1123  	msgDropCount        metricsfakes.Counter
  1124  }
  1125  
  1126  func (tm *testMetrics) initialize() {
  1127  	tm.egressQueueLength.WithReturns(&tm.egressQueueLength)
  1128  	tm.egressQueueCapacity.WithReturns(&tm.egressQueueCapacity)
  1129  	tm.egressStreamCount.WithReturns(&tm.egressStreamCount)
  1130  	tm.egressTLSConnCount.WithReturns(&tm.egressTLSConnCount)
  1131  	tm.egressWorkerSize.WithReturns(&tm.egressWorkerSize)
  1132  	tm.ingressStreamsCount.WithReturns(&tm.ingressStreamsCount)
  1133  	tm.msgSendTime.WithReturns(&tm.msgSendTime)
  1134  	tm.msgDropCount.WithReturns(&tm.msgDropCount)
  1135  
  1136  	fakeProvider := tm.fakeProvider
  1137  	fakeProvider.On("NewGauge", cluster.IngressStreamsCountOpts).Return(&tm.ingressStreamsCount)
  1138  	fakeProvider.On("NewGauge", cluster.EgressQueueLengthOpts).Return(&tm.egressQueueLength)
  1139  	fakeProvider.On("NewGauge", cluster.EgressQueueCapacityOpts).Return(&tm.egressQueueCapacity)
  1140  	fakeProvider.On("NewGauge", cluster.EgressStreamsCountOpts).Return(&tm.egressStreamCount)
  1141  	fakeProvider.On("NewGauge", cluster.EgressTLSConnectionCountOpts).Return(&tm.egressTLSConnCount)
  1142  	fakeProvider.On("NewGauge", cluster.EgressWorkersOpts).Return(&tm.egressWorkerSize)
  1143  	fakeProvider.On("NewCounter", cluster.MessagesDroppedCountOpts).Return(&tm.msgDropCount)
  1144  	fakeProvider.On("NewHistogram", cluster.MessageSendTimeOpts).Return(&tm.msgSendTime)
  1145  }
  1146  
  1147  func TestMetrics(t *testing.T) {
  1148  	t.Parallel()
  1149  
  1150  	for _, testCase := range []struct {
  1151  		name        string
  1152  		runTest     func(node1, node2 *clusterNode, testMetrics *testMetrics)
  1153  		testMetrics *testMetrics
  1154  	}{
  1155  		{
  1156  			name: "EgressQueueOccupancy",
  1157  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1158  				assertBiDiCommunication(t, node1, node2, testReq)
  1159  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "transaction", "channel", testChannel},
  1160  					testMetrics.egressQueueLength.WithArgsForCall(0))
  1161  				assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(0))
  1162  				assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(0))
  1163  
  1164  				var messageReceived sync.WaitGroup
  1165  				messageReceived.Add(1)
  1166  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1167  					messageReceived.Done()
  1168  				}).Return(nil)
  1169  
  1170  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1171  				assert.NoError(t, err)
  1172  
  1173  				stream := assertEventualEstablishStream(t, rm)
  1174  				stream.Send(testConsensusReq)
  1175  				messageReceived.Wait()
  1176  
  1177  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "consensus", "channel", testChannel},
  1178  					testMetrics.egressQueueLength.WithArgsForCall(1))
  1179  				assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(1))
  1180  				assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(1))
  1181  			},
  1182  		},
  1183  		{
  1184  			name: "EgressStreamsCount",
  1185  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1186  				assertBiDiCommunication(t, node1, node2, testReq)
  1187  				assert.Equal(t, 1, testMetrics.egressStreamCount.SetCallCount())
  1188  				assert.Equal(t, 1, testMetrics.egressStreamCount.WithCallCount())
  1189  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1190  
  1191  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1192  				assert.Equal(t, 2, testMetrics.egressStreamCount.SetCallCount())
  1193  				assert.Equal(t, 2, testMetrics.egressStreamCount.WithCallCount())
  1194  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1195  			},
  1196  		},
  1197  		{
  1198  			name: "EgressTLSConnCount",
  1199  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1200  				assertBiDiCommunication(t, node1, node2, testReq)
  1201  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1202  
  1203  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1204  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1205  
  1206  				// A single TLS connection despite 2 streams
  1207  				assert.Equal(t, float64(1), testMetrics.egressTLSConnCount.SetArgsForCall(0))
  1208  				assert.Equal(t, 1, testMetrics.egressTLSConnCount.SetCallCount())
  1209  			},
  1210  		},
  1211  		{
  1212  			name: "EgressWorkerSize",
  1213  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1214  				assertBiDiCommunication(t, node1, node2, testReq)
  1215  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1216  
  1217  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1218  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1219  
  1220  				assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(0))
  1221  				assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(1))
  1222  			},
  1223  		},
  1224  		{
  1225  			name: "MgSendTime",
  1226  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1227  				assertBiDiCommunication(t, node1, node2, testReq)
  1228  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel},
  1229  					testMetrics.msgSendTime.WithArgsForCall(0))
  1230  
  1231  				assert.Equal(t, 1, testMetrics.msgSendTime.ObserveCallCount())
  1232  			},
  1233  		},
  1234  		{
  1235  			name: "MsgDropCount",
  1236  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1237  				blockRecv := make(chan struct{})
  1238  				wasReported := func() bool {
  1239  					select {
  1240  					case <-blockRecv:
  1241  						return true
  1242  					default:
  1243  						return false
  1244  					}
  1245  				}
  1246  				// When the drop count is reported, release the lock on the server side receive operation.
  1247  				testMetrics.msgDropCount.AddStub = func(float642 float64) {
  1248  					if !wasReported() {
  1249  						close(blockRecv)
  1250  					}
  1251  				}
  1252  
  1253  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1254  					// Block until the message drop is reported
  1255  					<-blockRecv
  1256  				}).Return(nil)
  1257  
  1258  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1259  				assert.NoError(t, err)
  1260  
  1261  				stream := assertEventualEstablishStream(t, rm)
  1262  				// Send too many messages while the server side is not reading from the stream
  1263  				for {
  1264  					stream.Send(testConsensusReq)
  1265  					if wasReported() {
  1266  						break
  1267  					}
  1268  				}
  1269  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel},
  1270  					testMetrics.msgDropCount.WithArgsForCall(0))
  1271  				assert.Equal(t, 1, testMetrics.msgDropCount.AddCallCount())
  1272  			},
  1273  		},
  1274  	} {
  1275  		testCase := testCase
  1276  		t.Run(testCase.name, func(t *testing.T) {
  1277  			fakeProvider := &mocks.MetricsProvider{}
  1278  			testCase.testMetrics = &testMetrics{
  1279  				fakeProvider: fakeProvider,
  1280  			}
  1281  
  1282  			testCase.testMetrics.initialize()
  1283  
  1284  			node1 := newTestNodeWithMetrics(t, fakeProvider, &testCase.testMetrics.egressTLSConnCount)
  1285  			defer node1.stop()
  1286  
  1287  			node2 := newTestNode(t)
  1288  			defer node2.stop()
  1289  
  1290  			configForNode1 := []cluster.RemoteNode{node2.nodeInfo}
  1291  			configForNode2 := []cluster.RemoteNode{node1.nodeInfo}
  1292  			node1.c.Configure(testChannel, configForNode1)
  1293  			node2.c.Configure(testChannel, configForNode2)
  1294  			node1.c.Configure(testChannel2, configForNode1)
  1295  			node2.c.Configure(testChannel2, configForNode2)
  1296  
  1297  			testCase.runTest(node1, node2, testCase.testMetrics)
  1298  		})
  1299  	}
  1300  }
  1301  
  1302  func TestCertExpirationWarningEgress(t *testing.T) {
  1303  	t.Parallel()
  1304  	// Scenario: Ensures that when certificates are due to expire,
  1305  	// a warning is logged to the log.
  1306  
  1307  	node1 := newTestNode(t)
  1308  	node2 := newTestNode(t)
  1309  
  1310  	cert, err := x509.ParseCertificate(node2.nodeInfo.ServerTLSCert)
  1311  	assert.NoError(t, err)
  1312  	assert.NotNil(t, cert)
  1313  
  1314  	// Let the NotAfter time of the certificate be T1, the current time be T0.
  1315  	// So time.Until is (T1 - T0), which means we have (T1 - T0) time left.
  1316  	// We want to trigger a warning, so we set the warning threshold to be 20 seconds above
  1317  	// the time left, so the time left would be smaller than the threshold.
  1318  	node1.c.CertExpWarningThreshold = time.Until(cert.NotAfter) + time.Second*20
  1319  	// We only alert once in 3 seconds
  1320  	node1.c.MinimumExpirationWarningInterval = time.Second * 3
  1321  
  1322  	defer node1.stop()
  1323  	defer node2.stop()
  1324  
  1325  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
  1326  	node1.c.Configure(testChannel, config)
  1327  	node2.c.Configure(testChannel, config)
  1328  
  1329  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1330  	assert.NoError(t, err)
  1331  
  1332  	mockgRPC := &mocks.StepClient{}
  1333  	mockgRPC.On("Send", mock.Anything).Return(nil)
  1334  	mockgRPC.On("Context").Return(context.Background())
  1335  	mockClient := &mocks.ClusterClient{}
  1336  	mockClient.On("Step", mock.Anything).Return(mockgRPC, nil)
  1337  
  1338  	stub.Client = mockClient
  1339  
  1340  	stream := assertEventualEstablishStream(t, stub)
  1341  
  1342  	alerts := make(chan struct{}, 100)
  1343  
  1344  	stream.Logger = stream.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
  1345  		if strings.Contains(entry.Message, "expires in less than") {
  1346  			alerts <- struct{}{}
  1347  		}
  1348  		return nil
  1349  	}))
  1350  
  1351  	// Send a message to the node and expert an alert to be logged.
  1352  	stream.Send(wrapSubmitReq(testReq))
  1353  	select {
  1354  	case <-alerts:
  1355  	case <-time.After(time.Second * 5):
  1356  		t.Fatal("Should have logged an alert")
  1357  	}
  1358  	// Send another message, and ensure we don't log anything to the log, because the
  1359  	// alerts should be suppressed before the minimum interval timeout expires.
  1360  	stream.Send(wrapSubmitReq(testReq))
  1361  	select {
  1362  	case <-alerts:
  1363  		t.Fatal("Should not have logged an alert")
  1364  	case <-time.After(time.Millisecond * 500):
  1365  	}
  1366  	// Wait enough time for the alert interval to clear.
  1367  	time.Sleep(node1.c.MinimumExpirationWarningInterval + time.Second)
  1368  	// Send again a message, and this time it should be logged again.
  1369  	stream.Send(wrapSubmitReq(testReq))
  1370  	select {
  1371  	case <-alerts:
  1372  	case <-time.After(time.Second * 5):
  1373  		t.Fatal("Should have logged an alert")
  1374  	}
  1375  }
  1376  
  1377  func assertBiDiCommunicationForChannel(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest, channel string) {
  1378  	for _, tst := range []struct {
  1379  		label    string
  1380  		sender   *clusterNode
  1381  		receiver *clusterNode
  1382  		target   uint64
  1383  	}{
  1384  		{label: "1->2", sender: node1, target: node2.nodeInfo.ID, receiver: node2},
  1385  		{label: "2->1", sender: node2, target: node1.nodeInfo.ID, receiver: node1},
  1386  	} {
  1387  		t.Run(tst.label, func(t *testing.T) {
  1388  			stub, err := tst.sender.c.Remote(channel, tst.target)
  1389  			assert.NoError(t, err)
  1390  
  1391  			stream := assertEventualEstablishStream(t, stub)
  1392  
  1393  			var wg sync.WaitGroup
  1394  			wg.Add(1)
  1395  			tst.receiver.handler.On("OnSubmit", channel, tst.sender.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(args mock.Arguments) {
  1396  				req := args.Get(2).(*orderer.SubmitRequest)
  1397  				assert.True(t, proto.Equal(req, msgToSend))
  1398  				wg.Done()
  1399  			})
  1400  
  1401  			err = stream.Send(wrapSubmitReq(msgToSend))
  1402  			assert.NoError(t, err)
  1403  
  1404  			wg.Wait()
  1405  		})
  1406  	}
  1407  }
  1408  
  1409  func assertBiDiCommunication(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest) {
  1410  	assertBiDiCommunicationForChannel(t, node1, node2, msgToSend, testChannel)
  1411  }
  1412  
  1413  func assertEventualEstablishStream(t *testing.T, rpc *cluster.RemoteContext) *cluster.Stream {
  1414  	var res *cluster.Stream
  1415  	gt := gomega.NewGomegaWithT(t)
  1416  	gt.Eventually(func() error {
  1417  		stream, err := rpc.NewStream(time.Hour)
  1418  		res = stream
  1419  		return err
  1420  	}, timeout).Should(gomega.Succeed())
  1421  	return res
  1422  }
  1423  
  1424  func assertEventualSendMessage(t *testing.T, rpc *cluster.RemoteContext, req *orderer.SubmitRequest) orderer.Cluster_StepClient {
  1425  	var res orderer.Cluster_StepClient
  1426  	gt := gomega.NewGomegaWithT(t)
  1427  	gt.Eventually(func() error {
  1428  		stream, err := rpc.NewStream(time.Hour)
  1429  		if err != nil {
  1430  			return err
  1431  		}
  1432  		res = stream
  1433  		return stream.Send(wrapSubmitReq(req))
  1434  	}, timeout).Should(gomega.Succeed())
  1435  	return res
  1436  }
  1437  
  1438  func wrapSubmitReq(req *orderer.SubmitRequest) *orderer.StepRequest {
  1439  	return &orderer.StepRequest{
  1440  		Payload: &orderer.StepRequest_SubmitRequest{
  1441  			SubmitRequest: req,
  1442  		},
  1443  	}
  1444  }