github.com/osdi23p228/fabric@v0.0.0-20221218062954-77808885f5db/orderer/common/cluster/comm_test.go (about)

     1  /*
     2  Copyright IBM Corp. 2017 All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package cluster_test
     8  
     9  import (
    10  	"context"
    11  	"crypto/rand"
    12  	"crypto/x509"
    13  	"fmt"
    14  	"net"
    15  	"strings"
    16  	"sync"
    17  	"sync/atomic"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/golang/protobuf/proto"
    22  	"github.com/hyperledger/fabric-protos-go/common"
    23  	"github.com/hyperledger/fabric-protos-go/orderer"
    24  	"github.com/osdi23p228/fabric/common/crypto"
    25  	"github.com/osdi23p228/fabric/common/crypto/tlsgen"
    26  	"github.com/osdi23p228/fabric/common/flogging"
    27  	"github.com/osdi23p228/fabric/common/metrics"
    28  	"github.com/osdi23p228/fabric/common/metrics/disabled"
    29  	"github.com/osdi23p228/fabric/common/metrics/metricsfakes"
    30  	comm_utils "github.com/osdi23p228/fabric/internal/pkg/comm"
    31  	"github.com/osdi23p228/fabric/orderer/common/cluster"
    32  	"github.com/osdi23p228/fabric/orderer/common/cluster/mocks"
    33  	"github.com/onsi/gomega"
    34  	"github.com/pkg/errors"
    35  	"github.com/stretchr/testify/assert"
    36  	"github.com/stretchr/testify/mock"
    37  	"go.uber.org/zap"
    38  	"go.uber.org/zap/zapcore"
    39  	"google.golang.org/grpc"
    40  )
    41  
    42  const (
    43  	testChannel  = "test"
    44  	testChannel2 = "test2"
    45  	timeout      = time.Second * 10
    46  )
    47  
    48  var (
    49  	// CA that generates TLS key-pairs.
    50  	// We use only one CA because the authentication
    51  	// is based on TLS pinning
    52  	ca = createCAOrPanic()
    53  
    54  	lastNodeID uint64
    55  
    56  	testSubReq = &orderer.SubmitRequest{
    57  		Channel: "test",
    58  	}
    59  
    60  	testReq = &orderer.SubmitRequest{
    61  		Channel: "test",
    62  		Payload: &common.Envelope{
    63  			Payload: []byte("test"),
    64  		},
    65  	}
    66  
    67  	testReq2 = &orderer.SubmitRequest{
    68  		Channel: testChannel2,
    69  		Payload: &common.Envelope{
    70  			Payload: []byte(testChannel2),
    71  		},
    72  	}
    73  
    74  	testRes = &orderer.SubmitResponse{
    75  		Info: "test",
    76  	}
    77  
    78  	fooReq = wrapSubmitReq(&orderer.SubmitRequest{
    79  		Channel: "foo",
    80  	})
    81  
    82  	fooRes = &orderer.SubmitResponse{
    83  		Info: "foo",
    84  	}
    85  
    86  	barReq = wrapSubmitReq(&orderer.SubmitRequest{
    87  		Channel: "bar",
    88  	})
    89  
    90  	barRes = &orderer.SubmitResponse{
    91  		Info: "bar",
    92  	}
    93  
    94  	testConsensusReq = &orderer.StepRequest{
    95  		Payload: &orderer.StepRequest_ConsensusRequest{
    96  			ConsensusRequest: &orderer.ConsensusRequest{
    97  				Payload: []byte{1, 2, 3},
    98  				Channel: testChannel,
    99  			},
   100  		},
   101  	}
   102  
   103  	channelExtractor = &mockChannelExtractor{}
   104  )
   105  
   106  func nextUnusedID() uint64 {
   107  	return atomic.AddUint64(&lastNodeID, 1)
   108  }
   109  
   110  func createCAOrPanic() tlsgen.CA {
   111  	ca, err := tlsgen.NewCA()
   112  	if err != nil {
   113  		panic(fmt.Sprintf("failed creating CA: %+v", err))
   114  	}
   115  	return ca
   116  }
   117  
   118  type mockChannelExtractor struct{}
   119  
   120  func (*mockChannelExtractor) TargetChannel(msg proto.Message) string {
   121  	switch req := msg.(type) {
   122  	case *orderer.ConsensusRequest:
   123  		return req.Channel
   124  	case *orderer.SubmitRequest:
   125  		return req.Channel
   126  	default:
   127  		return ""
   128  	}
   129  }
   130  
   131  type clusterNode struct {
   132  	lock         sync.Mutex
   133  	frozen       bool
   134  	freezeCond   sync.Cond
   135  	dialer       *cluster.PredicateDialer
   136  	handler      *mocks.Handler
   137  	nodeInfo     cluster.RemoteNode
   138  	srv          *comm_utils.GRPCServer
   139  	bindAddress  string
   140  	clientConfig comm_utils.ClientConfig
   141  	serverConfig comm_utils.ServerConfig
   142  	c            *cluster.Comm
   143  }
   144  
   145  func (cn *clusterNode) Step(stream orderer.Cluster_StepServer) error {
   146  	cn.waitIfFrozen()
   147  	req, err := stream.Recv()
   148  	if err != nil {
   149  		return err
   150  	}
   151  	if submitReq := req.GetSubmitRequest(); submitReq != nil {
   152  		return cn.c.DispatchSubmit(stream.Context(), submitReq)
   153  	}
   154  	if err := cn.c.DispatchConsensus(stream.Context(), req.GetConsensusRequest()); err != nil {
   155  		return err
   156  	}
   157  	return stream.Send(&orderer.StepResponse{})
   158  }
   159  
   160  func (cn *clusterNode) waitIfFrozen() {
   161  	cn.lock.Lock()
   162  	// There is no freeze after an unfreeze so no need
   163  	// for a for loop.
   164  	if cn.frozen {
   165  		cn.freezeCond.Wait()
   166  		return
   167  	}
   168  	cn.lock.Unlock()
   169  }
   170  
   171  func (cn *clusterNode) freeze() {
   172  	cn.lock.Lock()
   173  	defer cn.lock.Unlock()
   174  	cn.frozen = true
   175  }
   176  
   177  func (cn *clusterNode) unfreeze() {
   178  	cn.lock.Lock()
   179  	cn.frozen = false
   180  	cn.lock.Unlock()
   181  	cn.freezeCond.Broadcast()
   182  }
   183  
   184  func (cn *clusterNode) resurrect() {
   185  	gRPCServer, err := comm_utils.NewGRPCServer(cn.bindAddress, cn.serverConfig)
   186  	if err != nil {
   187  		panic(fmt.Errorf("failed starting gRPC server: %v", err))
   188  	}
   189  	cn.srv = gRPCServer
   190  	orderer.RegisterClusterServer(gRPCServer.Server(), cn)
   191  	go cn.srv.Start()
   192  }
   193  
   194  func (cn *clusterNode) stop() {
   195  	cn.srv.Stop()
   196  	cn.c.Shutdown()
   197  }
   198  
   199  func (cn *clusterNode) renewCertificates() {
   200  	clientKeyPair, err := ca.NewClientCertKeyPair()
   201  	if err != nil {
   202  		panic(fmt.Errorf("failed creating client certificate %v", err))
   203  	}
   204  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   205  	if err != nil {
   206  		panic(fmt.Errorf("failed creating server certificate %v", err))
   207  	}
   208  
   209  	cn.nodeInfo.ClientTLSCert = clientKeyPair.TLSCert.Raw
   210  	cn.nodeInfo.ServerTLSCert = serverKeyPair.TLSCert.Raw
   211  
   212  	cn.serverConfig.SecOpts.Certificate = serverKeyPair.Cert
   213  	cn.serverConfig.SecOpts.Key = serverKeyPair.Key
   214  
   215  	cn.dialer.Config.SecOpts.Key = clientKeyPair.Key
   216  	cn.dialer.Config.SecOpts.Certificate = clientKeyPair.Cert
   217  }
   218  
   219  func newTestNodeWithMetrics(t *testing.T, metrics cluster.MetricsProvider, tlsConnGauge metrics.Gauge) *clusterNode {
   220  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   221  	assert.NoError(t, err)
   222  
   223  	clientKeyPair, _ := ca.NewClientCertKeyPair()
   224  
   225  	handler := &mocks.Handler{}
   226  	clientConfig := comm_utils.ClientConfig{
   227  		AsyncConnect: true,
   228  		Timeout:      time.Hour,
   229  		SecOpts: comm_utils.SecureOptions{
   230  			RequireClientCert: true,
   231  			Key:               clientKeyPair.Key,
   232  			Certificate:       clientKeyPair.Cert,
   233  			ServerRootCAs:     [][]byte{ca.CertBytes()},
   234  			UseTLS:            true,
   235  			ClientRootCAs:     [][]byte{ca.CertBytes()},
   236  		},
   237  	}
   238  
   239  	dialer := &cluster.PredicateDialer{
   240  		Config: clientConfig,
   241  	}
   242  
   243  	srvConfig := comm_utils.ServerConfig{
   244  		SecOpts: comm_utils.SecureOptions{
   245  			Key:         serverKeyPair.Key,
   246  			Certificate: serverKeyPair.Cert,
   247  			UseTLS:      true,
   248  		},
   249  	}
   250  	gRPCServer, err := comm_utils.NewGRPCServer("127.0.0.1:", srvConfig)
   251  	assert.NoError(t, err)
   252  
   253  	tstSrv := &clusterNode{
   254  		dialer:       dialer,
   255  		clientConfig: clientConfig,
   256  		serverConfig: srvConfig,
   257  		bindAddress:  gRPCServer.Address(),
   258  		handler:      handler,
   259  		nodeInfo: cluster.RemoteNode{
   260  			Endpoint:      gRPCServer.Address(),
   261  			ID:            nextUnusedID(),
   262  			ServerTLSCert: serverKeyPair.TLSCert.Raw,
   263  			ClientTLSCert: clientKeyPair.TLSCert.Raw,
   264  		},
   265  		srv: gRPCServer,
   266  	}
   267  
   268  	tstSrv.freezeCond.L = &tstSrv.lock
   269  
   270  	compareCert := cluster.CachePublicKeyComparisons(func(a, b []byte) bool {
   271  		return crypto.CertificatesWithSamePublicKey(a, b) == nil
   272  	})
   273  
   274  	tstSrv.c = &cluster.Comm{
   275  		CertExpWarningThreshold: time.Hour,
   276  		SendBufferSize:          1,
   277  		Logger:                  flogging.MustGetLogger("test"),
   278  		Chan2Members:            make(cluster.MembersByChannel),
   279  		H:                       handler,
   280  		ChanExt:                 channelExtractor,
   281  		Connections:             cluster.NewConnectionStore(dialer, tlsConnGauge),
   282  		Metrics:                 cluster.NewMetrics(metrics),
   283  		CompareCertificate:      compareCert,
   284  	}
   285  
   286  	orderer.RegisterClusterServer(gRPCServer.Server(), tstSrv)
   287  	go gRPCServer.Start()
   288  	return tstSrv
   289  }
   290  
   291  func newTestNode(t *testing.T) *clusterNode {
   292  	return newTestNodeWithMetrics(t, &disabled.Provider{}, &disabled.Gauge{})
   293  }
   294  
   295  func TestSendBigMessage(t *testing.T) {
   296  	// Scenario: Basic test that spawns 5 nodes and sends a big message
   297  	// from one of the nodes to the others.
   298  	// A receiver node's Step() server side method (which calls Recv)
   299  	// is frozen until the sender's node Send method returns,
   300  	// Hence - the sender node finishes calling Send
   301  	// before a receiver node starts calling Recv.
   302  	// This ensures that Send is non blocking even with big messages.
   303  	// In the test, we send a total of 8MB of random data (2MB to each node).
   304  	// The randomness is used so gRPC compression won't compress it to a lower size.
   305  
   306  	node1 := newTestNode(t)
   307  	node2 := newTestNode(t)
   308  	node3 := newTestNode(t)
   309  	node4 := newTestNode(t)
   310  	node5 := newTestNode(t)
   311  
   312  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   313  		node.c.SendBufferSize = 1
   314  	}
   315  
   316  	defer node1.stop()
   317  	defer node2.stop()
   318  	defer node3.stop()
   319  	defer node4.stop()
   320  	defer node5.stop()
   321  
   322  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo, node3.nodeInfo, node4.nodeInfo, node5.nodeInfo}
   323  	node1.c.Configure(testChannel, config)
   324  	node2.c.Configure(testChannel, config)
   325  	node3.c.Configure(testChannel, config)
   326  	node4.c.Configure(testChannel, config)
   327  	node5.c.Configure(testChannel, config)
   328  
   329  	var messageReceived sync.WaitGroup
   330  	messageReceived.Add(4)
   331  
   332  	msgSize := 1024 * 1024 * 2
   333  	bigMsg := &orderer.ConsensusRequest{
   334  		Channel: testChannel,
   335  		Payload: make([]byte, msgSize),
   336  	}
   337  
   338  	_, err := rand.Read(bigMsg.Payload)
   339  	assert.NoError(t, err)
   340  
   341  	wrappedMsg := &orderer.StepRequest{
   342  		Payload: &orderer.StepRequest_ConsensusRequest{
   343  			ConsensusRequest: bigMsg,
   344  		},
   345  	}
   346  
   347  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   348  		node.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
   349  			msg := args.Get(2).(*orderer.ConsensusRequest)
   350  			assert.Len(t, msg.Payload, msgSize)
   351  			messageReceived.Done()
   352  		}).Return(nil)
   353  	}
   354  
   355  	streams := map[uint64]*cluster.Stream{}
   356  
   357  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   358  		// Freeze the node, in order to block its Recv
   359  		node.freeze()
   360  	}
   361  
   362  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   363  		rm, err := node1.c.Remote(testChannel, node.nodeInfo.ID)
   364  		assert.NoError(t, err)
   365  
   366  		stream := assertEventualEstablishStream(t, rm)
   367  		streams[node.nodeInfo.ID] = stream
   368  	}
   369  
   370  	t0 := time.Now()
   371  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   372  		stream := streams[node.nodeInfo.ID]
   373  
   374  		t1 := time.Now()
   375  		err = stream.Send(wrappedMsg)
   376  		assert.NoError(t, err)
   377  		t.Log("Sending took", time.Since(t1))
   378  
   379  		// Unfreeze the node. It can now call Recv, and signal the messageReceived waitGroup.
   380  		node.unfreeze()
   381  	}
   382  
   383  	t.Log("Total sending time to all 4 nodes took:", time.Since(t0))
   384  
   385  	messageReceived.Wait()
   386  }
   387  
   388  func TestBlockingSend(t *testing.T) {
   389  	// Scenario: Basic test that spawns 2 nodes and sends from the first node
   390  	// to the second node, three SubmitRequests, or three consensus requests.
   391  	// SubmitRequests should block, but consensus requests should not.
   392  
   393  	for _, testCase := range []struct {
   394  		description        string
   395  		messageToSend      *orderer.StepRequest
   396  		streamUnblocks     bool
   397  		elapsedGreaterThan time.Duration
   398  		overflowErr        string
   399  	}{
   400  		{
   401  			description:        "SubmitRequest",
   402  			messageToSend:      wrapSubmitReq(testReq),
   403  			streamUnblocks:     true,
   404  			elapsedGreaterThan: time.Second / 2,
   405  		},
   406  		{
   407  			description:   "ConsensusRequest",
   408  			messageToSend: testConsensusReq,
   409  			overflowErr:   "send queue overflown",
   410  		},
   411  	} {
   412  		t.Run(testCase.description, func(t *testing.T) {
   413  			node1 := newTestNode(t)
   414  			node2 := newTestNode(t)
   415  
   416  			node1.c.SendBufferSize = 1
   417  			node2.c.SendBufferSize = 1
   418  
   419  			defer node1.stop()
   420  			defer node2.stop()
   421  
   422  			config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   423  			node1.c.Configure(testChannel, config)
   424  			node2.c.Configure(testChannel, config)
   425  
   426  			rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   427  			assert.NoError(t, err)
   428  
   429  			client := &mocks.ClusterClient{}
   430  			fakeStream := &mocks.StepClient{}
   431  
   432  			// Replace real client with a mock client
   433  			rm.Client = client
   434  			rm.ProbeConn = func(_ *grpc.ClientConn) error {
   435  				return nil
   436  			}
   437  			// Configure client to return the mock stream
   438  			fakeStream.On("Context", mock.Anything).Return(context.Background())
   439  			client.On("Step", mock.Anything).Return(fakeStream, nil).Once()
   440  
   441  			unBlock := make(chan struct{})
   442  			var sendInvoked sync.WaitGroup
   443  			sendInvoked.Add(1)
   444  			var once sync.Once
   445  			fakeStream.On("Send", mock.Anything).Run(func(_ mock.Arguments) {
   446  				once.Do(sendInvoked.Done)
   447  				<-unBlock
   448  			}).Return(errors.New("oops"))
   449  
   450  			stream, err := rm.NewStream(time.Hour)
   451  			assert.NoError(t, err)
   452  
   453  			// The first send doesn't block, even though the Send operation blocks.
   454  			err = stream.Send(testCase.messageToSend)
   455  			assert.NoError(t, err)
   456  
   457  			// The second once doesn't either.
   458  			// After this point, we have 1 goroutine which is blocked on Send(),
   459  			// and one message in the buffer.
   460  			sendInvoked.Wait()
   461  			err = stream.Send(testCase.messageToSend)
   462  			assert.NoError(t, err)
   463  
   464  			// The third blocks, so we need to unblock it ourselves
   465  			// in order for it to go through, unless the operation
   466  			// is non blocking.
   467  			go func() {
   468  				time.Sleep(time.Second)
   469  				if testCase.streamUnblocks {
   470  					close(unBlock)
   471  				}
   472  			}()
   473  
   474  			t1 := time.Now()
   475  			err = stream.Send(testCase.messageToSend)
   476  			// The third send always overflows or blocks.
   477  			// If we expect to receive an overflow error - assert it.
   478  			if testCase.overflowErr != "" {
   479  				assert.EqualError(t, err, testCase.overflowErr)
   480  			}
   481  			elapsed := time.Since(t1)
   482  			t.Log("Elapsed time:", elapsed)
   483  			assert.True(t, elapsed > testCase.elapsedGreaterThan)
   484  
   485  			if !testCase.streamUnblocks {
   486  				close(unBlock)
   487  			}
   488  		})
   489  	}
   490  }
   491  
   492  func TestBasic(t *testing.T) {
   493  	// Scenario: Basic test that spawns 2 nodes and sends each other
   494  	// messages that are expected to be echoed back
   495  
   496  	node1 := newTestNode(t)
   497  	node2 := newTestNode(t)
   498  
   499  	defer node1.stop()
   500  	defer node2.stop()
   501  
   502  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   503  	node1.c.Configure(testChannel, config)
   504  	node2.c.Configure(testChannel, config)
   505  
   506  	assertBiDiCommunication(t, node1, node2, testReq)
   507  }
   508  
   509  func TestUnavailableHosts(t *testing.T) {
   510  	// Scenario: A node is configured to connect
   511  	// to a host that is down
   512  	node1 := newTestNode(t)
   513  
   514  	clientConfig := node1.dialer.Config
   515  	// The below timeout makes sure that connection establishment is done
   516  	// asynchronously. Had it been synchronous, the Remote() call would be
   517  	// blocked for an hour.
   518  	clientConfig.Timeout = time.Hour
   519  	defer node1.stop()
   520  
   521  	node2 := newTestNode(t)
   522  	node2.stop()
   523  
   524  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   525  	remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   526  	assert.NoError(t, err)
   527  	assert.NotNil(t, remote)
   528  
   529  	_, err = remote.NewStream(time.Millisecond * 100)
   530  	assert.Contains(t, err.Error(), "connection")
   531  }
   532  
   533  func TestStreamAbort(t *testing.T) {
   534  	// Scenarios: node 1 is connected to node 2 in 2 channels,
   535  	// and the consumer of the communication calls receive.
   536  	// The two sub-scenarios happen:
   537  	// 1) The server certificate of node 2 changes in the first channel
   538  	// 2) Node 2 is evicted from the membership of the first channel
   539  	// In both of the scenarios, the Recv() call should be aborted
   540  
   541  	node2 := newTestNode(t)
   542  	defer node2.stop()
   543  
   544  	invalidNodeInfo := cluster.RemoteNode{
   545  		ID:            node2.nodeInfo.ID,
   546  		ServerTLSCert: []byte{1, 2, 3},
   547  		ClientTLSCert: []byte{1, 2, 3},
   548  	}
   549  
   550  	for _, tst := range []struct {
   551  		testName      string
   552  		membership    []cluster.RemoteNode
   553  		expectedError string
   554  	}{
   555  		{
   556  			testName:      "Evicted from membership",
   557  			membership:    nil,
   558  			expectedError: "rpc error: code = Canceled desc = context canceled",
   559  		},
   560  		{
   561  			testName:      "Changed TLS certificate",
   562  			membership:    []cluster.RemoteNode{invalidNodeInfo},
   563  			expectedError: "rpc error: code = Canceled desc = context canceled",
   564  		},
   565  	} {
   566  		t.Run(tst.testName, func(t *testing.T) {
   567  			testStreamAbort(t, node2, tst.membership, tst.expectedError)
   568  		})
   569  	}
   570  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
   571  }
   572  
   573  func testStreamAbort(t *testing.T, node2 *clusterNode, newMembership []cluster.RemoteNode, expectedError string) {
   574  	node1 := newTestNode(t)
   575  	defer node1.stop()
   576  
   577  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   578  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   579  	node1.c.Configure(testChannel2, []cluster.RemoteNode{node2.nodeInfo})
   580  	node2.c.Configure(testChannel2, []cluster.RemoteNode{node1.nodeInfo})
   581  
   582  	var streamCreated sync.WaitGroup
   583  	streamCreated.Add(1)
   584  
   585  	stopChan := make(chan struct{})
   586  
   587  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Once().Run(func(_ mock.Arguments) {
   588  		// Notify the stream was created
   589  		streamCreated.Done()
   590  		// Wait for the test to finish
   591  		<-stopChan
   592  	}).Return(nil).Once()
   593  
   594  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   595  	assert.NoError(t, err)
   596  
   597  	go func() {
   598  		stream := assertEventualEstablishStream(t, rm1)
   599  		// Signal the reconfiguration
   600  		err = stream.Send(wrapSubmitReq(testReq))
   601  		assert.NoError(t, err)
   602  		_, err := stream.Recv()
   603  		assert.Contains(t, err.Error(), expectedError)
   604  		close(stopChan)
   605  	}()
   606  
   607  	go func() {
   608  		// Wait for the stream reference to be obtained
   609  		streamCreated.Wait()
   610  		// Reconfigure the channel membership
   611  		node1.c.Configure(testChannel, newMembership)
   612  	}()
   613  
   614  	<-stopChan
   615  }
   616  
   617  func TestDoubleReconfigure(t *testing.T) {
   618  	// Scenario: Basic test that spawns 2 nodes
   619  	// and configures node 1 twice, and checks that
   620  	// the remote stub for node 1 wasn't re-created in the second
   621  	// configuration since it already existed
   622  
   623  	node1 := newTestNode(t)
   624  	node2 := newTestNode(t)
   625  
   626  	defer node1.stop()
   627  	defer node2.stop()
   628  
   629  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   630  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   631  	assert.NoError(t, err)
   632  
   633  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   634  	rm2, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   635  	assert.NoError(t, err)
   636  	// Ensure the references are equal
   637  	assert.True(t, rm1 == rm2)
   638  }
   639  
   640  func TestInvalidChannel(t *testing.T) {
   641  	// Scenario: node 1 it ordered to send a message on a channel
   642  	// that doesn't exist, and also receives a message, but
   643  	// the channel cannot be extracted from the message.
   644  
   645  	t.Run("channel doesn't exist", func(t *testing.T) {
   646  		node1 := newTestNode(t)
   647  		defer node1.stop()
   648  
   649  		_, err := node1.c.Remote(testChannel, 0)
   650  		assert.EqualError(t, err, "channel test doesn't exist")
   651  	})
   652  
   653  	t.Run("channel cannot be extracted", func(t *testing.T) {
   654  		node1 := newTestNode(t)
   655  		defer node1.stop()
   656  
   657  		node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   658  		gt := gomega.NewGomegaWithT(t)
   659  		gt.Eventually(func() (bool, error) {
   660  			_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   661  			return true, err
   662  		}, time.Minute).Should(gomega.BeTrue())
   663  
   664  		stub, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   665  		assert.NoError(t, err)
   666  
   667  		stream := assertEventualEstablishStream(t, stub)
   668  
   669  		// An empty SubmitRequest has an empty channel which is invalid
   670  		err = stream.Send(wrapSubmitReq(&orderer.SubmitRequest{}))
   671  		assert.NoError(t, err)
   672  
   673  		_, err = stream.Recv()
   674  		assert.EqualError(t, err, "rpc error: code = Unknown desc = badly formatted message, cannot extract channel")
   675  
   676  		// Test directly without going through the gRPC stream
   677  		err = node1.c.DispatchSubmit(context.Background(), &orderer.SubmitRequest{})
   678  		assert.EqualError(t, err, "badly formatted message, cannot extract channel")
   679  	})
   680  }
   681  
   682  func TestAbortRPC(t *testing.T) {
   683  	// Scenarios:
   684  	// (I) The node calls an RPC, and calls Abort() on the remote context
   685  	//  in parallel. The RPC should return even though the server-side call hasn't finished.
   686  	// (II) The node calls an RPC, but the server-side processing takes too long,
   687  	// and the RPC invocation returns prematurely.
   688  
   689  	testCases := []struct {
   690  		name        string
   691  		abortFunc   func(*cluster.RemoteContext)
   692  		rpcTimeout  time.Duration
   693  		expectedErr string
   694  	}{
   695  		{
   696  			name:        "Abort() called",
   697  			expectedErr: "rpc error: code = Canceled desc = context canceled",
   698  			rpcTimeout:  time.Hour,
   699  			abortFunc: func(rc *cluster.RemoteContext) {
   700  				rc.Abort()
   701  			},
   702  		},
   703  		{
   704  			name:        "RPC timeout",
   705  			expectedErr: "rpc timeout expired",
   706  			rpcTimeout:  time.Second,
   707  			abortFunc:   func(*cluster.RemoteContext) {},
   708  		},
   709  	}
   710  
   711  	for _, testCase := range testCases {
   712  		testCase := testCase
   713  		t.Run(testCase.name, func(t *testing.T) {
   714  			testAbort(t, testCase.abortFunc, testCase.rpcTimeout, testCase.expectedErr)
   715  		})
   716  	}
   717  }
   718  
   719  func testAbort(t *testing.T, abortFunc func(*cluster.RemoteContext), rpcTimeout time.Duration, expectedErr string) {
   720  	node1 := newTestNode(t)
   721  	defer node1.stop()
   722  
   723  	node2 := newTestNode(t)
   724  	defer node2.stop()
   725  
   726  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   727  	node1.c.Configure(testChannel, config)
   728  	node2.c.Configure(testChannel, config)
   729  	var onStepCalled sync.WaitGroup
   730  	onStepCalled.Add(1)
   731  
   732  	// stuckCall ensures the OnStep() call is stuck throughout this test
   733  	var stuckCall sync.WaitGroup
   734  	stuckCall.Add(1)
   735  	// At the end of the test, release the server-side resources
   736  	defer stuckCall.Done()
   737  
   738  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(_ mock.Arguments) {
   739  		onStepCalled.Done()
   740  		stuckCall.Wait()
   741  	}).Once()
   742  
   743  	rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   744  	assert.NoError(t, err)
   745  
   746  	go func() {
   747  		onStepCalled.Wait()
   748  		abortFunc(rm)
   749  	}()
   750  
   751  	var stream *cluster.Stream
   752  	gt := gomega.NewGomegaWithT(t)
   753  	gt.Eventually(func() error {
   754  		stream, err = rm.NewStream(rpcTimeout)
   755  		return err
   756  	}, time.Second*10, time.Millisecond*10).Should(gomega.Succeed())
   757  
   758  	stream.Send(wrapSubmitReq(testSubReq))
   759  	_, err = stream.Recv()
   760  
   761  	assert.EqualError(t, err, expectedErr)
   762  
   763  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
   764  }
   765  
   766  func TestNoTLSCertificate(t *testing.T) {
   767  	// Scenario: The node is sent a message by another node that doesn't
   768  	// connect with mutual TLS, thus doesn't provide a TLS certificate
   769  	node1 := newTestNode(t)
   770  	defer node1.stop()
   771  
   772  	node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   773  
   774  	clientConfig := comm_utils.ClientConfig{
   775  		AsyncConnect: true,
   776  		Timeout:      time.Millisecond * 100,
   777  		SecOpts: comm_utils.SecureOptions{
   778  			ServerRootCAs: [][]byte{ca.CertBytes()},
   779  			UseTLS:        true,
   780  		},
   781  	}
   782  	cl, err := comm_utils.NewGRPCClient(clientConfig)
   783  	assert.NoError(t, err)
   784  
   785  	var conn *grpc.ClientConn
   786  	gt := gomega.NewGomegaWithT(t)
   787  	gt.Eventually(func() (bool, error) {
   788  		conn, err = cl.NewConnection(node1.srv.Address())
   789  		return true, err
   790  	}, time.Minute).Should(gomega.BeTrue())
   791  
   792  	echoClient := orderer.NewClusterClient(conn)
   793  	stream, err := echoClient.Step(context.Background())
   794  	assert.NoError(t, err)
   795  
   796  	err = stream.Send(wrapSubmitReq(testSubReq))
   797  	assert.NoError(t, err)
   798  	_, err = stream.Recv()
   799  	assert.EqualError(t, err, "rpc error: code = Unknown desc = no TLS certificate sent")
   800  }
   801  
   802  func TestReconnect(t *testing.T) {
   803  	// Scenario: node 1 and node 2 are connected,
   804  	// and node 2 is taken offline.
   805  	// Node 1 tries to send a message to node 2 but fails,
   806  	// and afterwards node 2 is brought back, after which
   807  	// node 1 sends more messages, and it should succeed
   808  	// sending a message to node 2 eventually.
   809  
   810  	node1 := newTestNode(t)
   811  	defer node1.stop()
   812  	conf := node1.dialer.Config
   813  	conf.Timeout = time.Hour
   814  
   815  	node2 := newTestNode(t)
   816  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil)
   817  	defer node2.stop()
   818  
   819  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   820  	node1.c.Configure(testChannel, config)
   821  	node2.c.Configure(testChannel, config)
   822  
   823  	// Make node 2 be offline by shutting down its gRPC service
   824  	node2.srv.Stop()
   825  	// Obtain the stub for node 2.
   826  	// Should succeed, because the connection was created at time of configuration
   827  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   828  	assert.NoError(t, err)
   829  
   830  	// Try to obtain a stream. Should not Succeed.
   831  	gt := gomega.NewGomegaWithT(t)
   832  	gt.Eventually(func() error {
   833  		_, err = stub.NewStream(time.Hour)
   834  		return err
   835  	}).Should(gomega.Not(gomega.Succeed()))
   836  
   837  	// Wait for the port to be released
   838  	for {
   839  		lsnr, err := net.Listen("tcp", node2.nodeInfo.Endpoint)
   840  		if err == nil {
   841  			lsnr.Close()
   842  			break
   843  		}
   844  	}
   845  
   846  	// Resurrect node 2
   847  	node2.resurrect()
   848  	// Send a message from node 1 to node 2.
   849  	// Should succeed eventually
   850  	assertEventualSendMessage(t, stub, testReq)
   851  }
   852  
   853  func TestRenewCertificates(t *testing.T) {
   854  	// Scenario: node 1 and node 2 are connected,
   855  	// and the certificates are renewed for both nodes
   856  	// at the same time.
   857  	// They are expected to connect to one another
   858  	// after the reconfiguration.
   859  
   860  	node1 := newTestNode(t)
   861  	defer node1.stop()
   862  
   863  	node2 := newTestNode(t)
   864  	defer node2.stop()
   865  
   866  	node1.handler.On("OnStep", testChannel, node2.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   867  	node2.handler.On("OnStep", testChannel, node1.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   868  
   869  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   870  	node1.c.Configure(testChannel, config)
   871  	node2.c.Configure(testChannel, config)
   872  
   873  	assertBiDiCommunication(t, node1, node2, testReq)
   874  
   875  	// Now, renew certificates both both nodes
   876  	node1.renewCertificates()
   877  	node2.renewCertificates()
   878  
   879  	// Reconfigure them
   880  	config = []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   881  	node1.c.Configure(testChannel, config)
   882  	node2.c.Configure(testChannel, config)
   883  
   884  	// W.L.O.G, try to send a message from node1 to node2
   885  	// It should fail, because node2's server certificate has now changed,
   886  	// so it closed the connection to the remote node
   887  	info2 := node2.nodeInfo
   888  	remote, err := node1.c.Remote(testChannel, info2.ID)
   889  	assert.NoError(t, err)
   890  	assert.NotNil(t, remote)
   891  
   892  	gt := gomega.NewGomegaWithT(t)
   893  	gt.Eventually(func() string {
   894  		_, err = remote.NewStream(time.Hour)
   895  		return err.Error()
   896  	}, timeout).Should(gomega.ContainSubstring(info2.Endpoint))
   897  
   898  	// Restart the gRPC service on both nodes, to load the new TLS certificates
   899  	node1.srv.Stop()
   900  	node1.resurrect()
   901  	node2.srv.Stop()
   902  	node2.resurrect()
   903  
   904  	// Finally, check that the nodes can communicate once again
   905  	assertBiDiCommunication(t, node1, node2, testReq)
   906  }
   907  
   908  func TestMembershipReconfiguration(t *testing.T) {
   909  	// Scenario: node 1 and node 2 are started up
   910  	// and node 2 is configured to know about node 1,
   911  	// without node1 knowing about node 2.
   912  	// The communication between them should only work
   913  	// after node 1 is configured to know about node 2.
   914  
   915  	node1 := newTestNode(t)
   916  	defer node1.stop()
   917  
   918  	node2 := newTestNode(t)
   919  	defer node2.stop()
   920  
   921  	node1.c.Configure(testChannel, []cluster.RemoteNode{})
   922  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   923  
   924  	// Node 1 can't connect to node 2 because it doesn't know its TLS certificate yet
   925  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   926  	assert.EqualError(t, err, fmt.Sprintf("node %d doesn't exist in channel test's membership", node2.nodeInfo.ID))
   927  	// Node 2 can connect to node 1, but it can't send it messages because node 1 doesn't know node 2 yet.
   928  
   929  	gt := gomega.NewGomegaWithT(t)
   930  	gt.Eventually(func() (bool, error) {
   931  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   932  		return true, err
   933  	}, time.Minute).Should(gomega.BeTrue())
   934  
   935  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   936  	assert.NoError(t, err)
   937  
   938  	stream := assertEventualEstablishStream(t, stub)
   939  	err = stream.Send(wrapSubmitReq(testSubReq))
   940  	assert.NoError(t, err)
   941  
   942  	_, err = stream.Recv()
   943  	assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
   944  
   945  	// Next, configure node 1 to know about node 2
   946  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   947  
   948  	// Check that the communication works correctly between both nodes
   949  	assertBiDiCommunication(t, node1, node2, testReq)
   950  	assertBiDiCommunication(t, node2, node1, testReq)
   951  
   952  	// Reconfigure node 2 to forget about node 1
   953  	node2.c.Configure(testChannel, []cluster.RemoteNode{})
   954  	// Node 1 can still connect to node 2
   955  	stub, err = node1.c.Remote(testChannel, node2.nodeInfo.ID)
   956  	assert.NoError(t, err)
   957  	// But can't send a message because node 2 now doesn't authorized node 1
   958  	stream = assertEventualEstablishStream(t, stub)
   959  	stream.Send(wrapSubmitReq(testSubReq))
   960  	_, err = stream.Recv()
   961  	assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
   962  }
   963  
   964  func TestShutdown(t *testing.T) {
   965  	// Scenario: node 1 is shut down and as a result, can't
   966  	// send messages to anyone, nor can it be reconfigured
   967  
   968  	node1 := newTestNode(t)
   969  	defer node1.stop()
   970  
   971  	node1.c.Shutdown()
   972  
   973  	// Obtaining a RemoteContext cannot succeed because shutdown was called before
   974  	_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   975  	assert.EqualError(t, err, "communication has been shut down")
   976  
   977  	node2 := newTestNode(t)
   978  	defer node2.stop()
   979  
   980  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   981  	// Configuration of node doesn't take place
   982  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   983  
   984  	gt := gomega.NewGomegaWithT(t)
   985  	gt.Eventually(func() error {
   986  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   987  		return err
   988  	}, time.Minute).Should(gomega.Succeed())
   989  
   990  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   991  	assert.NoError(t, err)
   992  
   993  	// Therefore, sending a message doesn't succeed because node 1 rejected the configuration change
   994  	gt.Eventually(func() string {
   995  		stream, err := stub.NewStream(time.Hour)
   996  		if err != nil {
   997  			return err.Error()
   998  		}
   999  		err = stream.Send(wrapSubmitReq(testSubReq))
  1000  		assert.NoError(t, err)
  1001  
  1002  		_, err = stream.Recv()
  1003  		return err.Error()
  1004  	}, timeout).Should(gomega.ContainSubstring("channel test doesn't exist"))
  1005  }
  1006  
  1007  func TestMultiChannelConfig(t *testing.T) {
  1008  	// Scenario: node 1 is knows node 2 only in channel "foo"
  1009  	// and knows node 3 only in channel "bar".
  1010  	// Messages that are received, are routed according to their corresponding channels
  1011  	// and when node 2 sends a message for channel "bar" to node 1, it is rejected.
  1012  	// Same thing applies for node 3 that sends a message to node 1 in channel "foo".
  1013  
  1014  	node1 := newTestNode(t)
  1015  	defer node1.stop()
  1016  
  1017  	node2 := newTestNode(t)
  1018  	defer node2.stop()
  1019  
  1020  	node3 := newTestNode(t)
  1021  	defer node3.stop()
  1022  
  1023  	node1.c.Configure("foo", []cluster.RemoteNode{node2.nodeInfo})
  1024  	node1.c.Configure("bar", []cluster.RemoteNode{node3.nodeInfo})
  1025  	node2.c.Configure("foo", []cluster.RemoteNode{node1.nodeInfo})
  1026  	node3.c.Configure("bar", []cluster.RemoteNode{node1.nodeInfo})
  1027  
  1028  	t.Run("Correct channel", func(t *testing.T) {
  1029  		var fromNode2 sync.WaitGroup
  1030  		fromNode2.Add(1)
  1031  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1032  			fromNode2.Done()
  1033  		}).Once()
  1034  
  1035  		var fromNode3 sync.WaitGroup
  1036  		fromNode3.Add(1)
  1037  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1038  			fromNode3.Done()
  1039  		}).Once()
  1040  
  1041  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1042  		assert.NoError(t, err)
  1043  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1044  		assert.NoError(t, err)
  1045  
  1046  		stream := assertEventualEstablishStream(t, node2toNode1)
  1047  		stream.Send(fooReq)
  1048  
  1049  		fromNode2.Wait()
  1050  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
  1051  
  1052  		stream = assertEventualEstablishStream(t, node3toNode1)
  1053  		stream.Send(barReq)
  1054  
  1055  		fromNode3.Wait()
  1056  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
  1057  	})
  1058  
  1059  	t.Run("Incorrect channel", func(t *testing.T) {
  1060  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil)
  1061  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil)
  1062  
  1063  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1064  		assert.NoError(t, err)
  1065  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1066  		assert.NoError(t, err)
  1067  
  1068  		assertEventualSendMessage(t, node2toNode1, &orderer.SubmitRequest{Channel: "foo"})
  1069  		assert.NoError(t, err)
  1070  		stream, err := node2toNode1.NewStream(time.Hour)
  1071  		assert.NoError(t, err)
  1072  		err = stream.Send(barReq)
  1073  		assert.NoError(t, err)
  1074  		_, err = stream.Recv()
  1075  		assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1076  
  1077  		assertEventualSendMessage(t, node3toNode1, &orderer.SubmitRequest{Channel: "bar"})
  1078  		stream, err = node3toNode1.NewStream(time.Hour)
  1079  		assert.NoError(t, err)
  1080  		err = stream.Send(fooReq)
  1081  		assert.NoError(t, err)
  1082  		_, err = stream.Recv()
  1083  		assert.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1084  	})
  1085  }
  1086  
  1087  func TestConnectionFailure(t *testing.T) {
  1088  	// Scenario: node 1 fails to connect to node 2.
  1089  
  1090  	node1 := newTestNode(t)
  1091  	defer node1.stop()
  1092  
  1093  	node2 := newTestNode(t)
  1094  	defer node2.stop()
  1095  
  1096  	dialer := &mocks.SecureDialer{}
  1097  	dialer.On("Dial", mock.Anything, mock.Anything).Return(nil, errors.New("oops"))
  1098  	node1.c.Connections = cluster.NewConnectionStore(dialer, &disabled.Gauge{})
  1099  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
  1100  
  1101  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1102  	assert.EqualError(t, err, "oops")
  1103  }
  1104  
  1105  type testMetrics struct {
  1106  	fakeProvider        *mocks.MetricsProvider
  1107  	egressQueueLength   metricsfakes.Gauge
  1108  	egressQueueCapacity metricsfakes.Gauge
  1109  	egressStreamCount   metricsfakes.Gauge
  1110  	egressTLSConnCount  metricsfakes.Gauge
  1111  	egressWorkerSize    metricsfakes.Gauge
  1112  	ingressStreamsCount metricsfakes.Gauge
  1113  	msgSendTime         metricsfakes.Histogram
  1114  	msgDropCount        metricsfakes.Counter
  1115  }
  1116  
  1117  func (tm *testMetrics) initialize() {
  1118  	tm.egressQueueLength.WithReturns(&tm.egressQueueLength)
  1119  	tm.egressQueueCapacity.WithReturns(&tm.egressQueueCapacity)
  1120  	tm.egressStreamCount.WithReturns(&tm.egressStreamCount)
  1121  	tm.egressTLSConnCount.WithReturns(&tm.egressTLSConnCount)
  1122  	tm.egressWorkerSize.WithReturns(&tm.egressWorkerSize)
  1123  	tm.ingressStreamsCount.WithReturns(&tm.ingressStreamsCount)
  1124  	tm.msgSendTime.WithReturns(&tm.msgSendTime)
  1125  	tm.msgDropCount.WithReturns(&tm.msgDropCount)
  1126  
  1127  	fakeProvider := tm.fakeProvider
  1128  	fakeProvider.On("NewGauge", cluster.IngressStreamsCountOpts).Return(&tm.ingressStreamsCount)
  1129  	fakeProvider.On("NewGauge", cluster.EgressQueueLengthOpts).Return(&tm.egressQueueLength)
  1130  	fakeProvider.On("NewGauge", cluster.EgressQueueCapacityOpts).Return(&tm.egressQueueCapacity)
  1131  	fakeProvider.On("NewGauge", cluster.EgressStreamsCountOpts).Return(&tm.egressStreamCount)
  1132  	fakeProvider.On("NewGauge", cluster.EgressTLSConnectionCountOpts).Return(&tm.egressTLSConnCount)
  1133  	fakeProvider.On("NewGauge", cluster.EgressWorkersOpts).Return(&tm.egressWorkerSize)
  1134  	fakeProvider.On("NewCounter", cluster.MessagesDroppedCountOpts).Return(&tm.msgDropCount)
  1135  	fakeProvider.On("NewHistogram", cluster.MessageSendTimeOpts).Return(&tm.msgSendTime)
  1136  }
  1137  
  1138  func TestMetrics(t *testing.T) {
  1139  	for _, testCase := range []struct {
  1140  		name        string
  1141  		runTest     func(node1, node2 *clusterNode, testMetrics *testMetrics)
  1142  		testMetrics *testMetrics
  1143  	}{
  1144  		{
  1145  			name: "EgressQueueOccupancy",
  1146  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1147  				assertBiDiCommunication(t, node1, node2, testReq)
  1148  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "transaction", "channel", testChannel},
  1149  					testMetrics.egressQueueLength.WithArgsForCall(0))
  1150  				assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(0))
  1151  				assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(0))
  1152  
  1153  				var messageReceived sync.WaitGroup
  1154  				messageReceived.Add(1)
  1155  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1156  					messageReceived.Done()
  1157  				}).Return(nil)
  1158  
  1159  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1160  				assert.NoError(t, err)
  1161  
  1162  				stream := assertEventualEstablishStream(t, rm)
  1163  				stream.Send(testConsensusReq)
  1164  				messageReceived.Wait()
  1165  
  1166  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "consensus", "channel", testChannel},
  1167  					testMetrics.egressQueueLength.WithArgsForCall(1))
  1168  				assert.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(1))
  1169  				assert.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(1))
  1170  			},
  1171  		},
  1172  		{
  1173  			name: "EgressStreamsCount",
  1174  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1175  				assertBiDiCommunication(t, node1, node2, testReq)
  1176  				assert.Equal(t, 1, testMetrics.egressStreamCount.SetCallCount())
  1177  				assert.Equal(t, 1, testMetrics.egressStreamCount.WithCallCount())
  1178  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1179  
  1180  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1181  				assert.Equal(t, 2, testMetrics.egressStreamCount.SetCallCount())
  1182  				assert.Equal(t, 2, testMetrics.egressStreamCount.WithCallCount())
  1183  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1184  			},
  1185  		},
  1186  		{
  1187  			name: "EgressTLSConnCount",
  1188  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1189  				assertBiDiCommunication(t, node1, node2, testReq)
  1190  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1191  
  1192  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1193  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1194  
  1195  				// A single TLS connection despite 2 streams
  1196  				assert.Equal(t, float64(1), testMetrics.egressTLSConnCount.SetArgsForCall(0))
  1197  				assert.Equal(t, 1, testMetrics.egressTLSConnCount.SetCallCount())
  1198  			},
  1199  		},
  1200  		{
  1201  			name: "EgressWorkerSize",
  1202  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1203  				assertBiDiCommunication(t, node1, node2, testReq)
  1204  				assert.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1205  
  1206  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1207  				assert.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1208  
  1209  				assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(0))
  1210  				assert.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(1))
  1211  			},
  1212  		},
  1213  		{
  1214  			name: "MgSendTime",
  1215  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1216  				assertBiDiCommunication(t, node1, node2, testReq)
  1217  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel},
  1218  					testMetrics.msgSendTime.WithArgsForCall(0))
  1219  
  1220  				assert.Equal(t, 1, testMetrics.msgSendTime.ObserveCallCount())
  1221  			},
  1222  		},
  1223  		{
  1224  			name: "MsgDropCount",
  1225  			runTest: func(node1, node2 *clusterNode, testMetrics *testMetrics) {
  1226  				blockRecv := make(chan struct{})
  1227  				wasReported := func() bool {
  1228  					select {
  1229  					case <-blockRecv:
  1230  						return true
  1231  					default:
  1232  						return false
  1233  					}
  1234  				}
  1235  				// When the drop count is reported, release the lock on the server side receive operation.
  1236  				testMetrics.msgDropCount.AddStub = func(float642 float64) {
  1237  					if !wasReported() {
  1238  						close(blockRecv)
  1239  					}
  1240  				}
  1241  
  1242  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1243  					// Block until the message drop is reported
  1244  					<-blockRecv
  1245  				}).Return(nil)
  1246  
  1247  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1248  				assert.NoError(t, err)
  1249  
  1250  				stream := assertEventualEstablishStream(t, rm)
  1251  				// Send too many messages while the server side is not reading from the stream
  1252  				for {
  1253  					stream.Send(testConsensusReq)
  1254  					if wasReported() {
  1255  						break
  1256  					}
  1257  				}
  1258  				assert.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel},
  1259  					testMetrics.msgDropCount.WithArgsForCall(0))
  1260  				assert.Equal(t, 1, testMetrics.msgDropCount.AddCallCount())
  1261  			},
  1262  		},
  1263  	} {
  1264  		testCase := testCase
  1265  		t.Run(testCase.name, func(t *testing.T) {
  1266  			fakeProvider := &mocks.MetricsProvider{}
  1267  			testCase.testMetrics = &testMetrics{
  1268  				fakeProvider: fakeProvider,
  1269  			}
  1270  
  1271  			testCase.testMetrics.initialize()
  1272  
  1273  			node1 := newTestNodeWithMetrics(t, fakeProvider, &testCase.testMetrics.egressTLSConnCount)
  1274  			defer node1.stop()
  1275  
  1276  			node2 := newTestNode(t)
  1277  			defer node2.stop()
  1278  
  1279  			configForNode1 := []cluster.RemoteNode{node2.nodeInfo}
  1280  			configForNode2 := []cluster.RemoteNode{node1.nodeInfo}
  1281  			node1.c.Configure(testChannel, configForNode1)
  1282  			node2.c.Configure(testChannel, configForNode2)
  1283  			node1.c.Configure(testChannel2, configForNode1)
  1284  			node2.c.Configure(testChannel2, configForNode2)
  1285  
  1286  			testCase.runTest(node1, node2, testCase.testMetrics)
  1287  		})
  1288  	}
  1289  }
  1290  
  1291  func TestCertExpirationWarningEgress(t *testing.T) {
  1292  	// Scenario: Ensures that when certificates are due to expire,
  1293  	// a warning is logged to the log.
  1294  
  1295  	node1 := newTestNode(t)
  1296  	node2 := newTestNode(t)
  1297  
  1298  	cert, err := x509.ParseCertificate(node2.nodeInfo.ServerTLSCert)
  1299  	assert.NoError(t, err)
  1300  	assert.NotNil(t, cert)
  1301  
  1302  	// Let the NotAfter time of the certificate be T1, the current time be T0.
  1303  	// So time.Until is (T1 - T0), which means we have (T1 - T0) time left.
  1304  	// We want to trigger a warning, so we set the warning threshold to be 20 seconds above
  1305  	// the time left, so the time left would be smaller than the threshold.
  1306  	node1.c.CertExpWarningThreshold = time.Until(cert.NotAfter) + time.Second*20
  1307  	// We only alert once in 3 seconds
  1308  	node1.c.MinimumExpirationWarningInterval = time.Second * 3
  1309  
  1310  	defer node1.stop()
  1311  	defer node2.stop()
  1312  
  1313  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
  1314  	node1.c.Configure(testChannel, config)
  1315  	node2.c.Configure(testChannel, config)
  1316  
  1317  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1318  	assert.NoError(t, err)
  1319  
  1320  	mockgRPC := &mocks.StepClient{}
  1321  	mockgRPC.On("Send", mock.Anything).Return(nil)
  1322  	mockgRPC.On("Context").Return(context.Background())
  1323  	mockClient := &mocks.ClusterClient{}
  1324  	mockClient.On("Step", mock.Anything).Return(mockgRPC, nil)
  1325  
  1326  	stub.Client = mockClient
  1327  
  1328  	stream := assertEventualEstablishStream(t, stub)
  1329  
  1330  	alerts := make(chan struct{}, 100)
  1331  
  1332  	stream.Logger = stream.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
  1333  		if strings.Contains(entry.Message, "expires in less than") {
  1334  			alerts <- struct{}{}
  1335  		}
  1336  		return nil
  1337  	}))
  1338  
  1339  	// Send a message to the node and expert an alert to be logged.
  1340  	stream.Send(wrapSubmitReq(testReq))
  1341  	select {
  1342  	case <-alerts:
  1343  	case <-time.After(time.Second * 5):
  1344  		t.Fatal("Should have logged an alert")
  1345  	}
  1346  	// Send another message, and ensure we don't log anything to the log, because the
  1347  	// alerts should be suppressed before the minimum interval timeout expires.
  1348  	stream.Send(wrapSubmitReq(testReq))
  1349  	select {
  1350  	case <-alerts:
  1351  		t.Fatal("Should not have logged an alert")
  1352  	case <-time.After(time.Millisecond * 500):
  1353  	}
  1354  	// Wait enough time for the alert interval to clear.
  1355  	time.Sleep(node1.c.MinimumExpirationWarningInterval + time.Second)
  1356  	// Send again a message, and this time it should be logged again.
  1357  	stream.Send(wrapSubmitReq(testReq))
  1358  	select {
  1359  	case <-alerts:
  1360  	case <-time.After(time.Second * 5):
  1361  		t.Fatal("Should have logged an alert")
  1362  	}
  1363  }
  1364  
  1365  func assertBiDiCommunicationForChannel(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest, channel string) {
  1366  	for _, tst := range []struct {
  1367  		label    string
  1368  		sender   *clusterNode
  1369  		receiver *clusterNode
  1370  		target   uint64
  1371  	}{
  1372  		{label: "1->2", sender: node1, target: node2.nodeInfo.ID, receiver: node2},
  1373  		{label: "2->1", sender: node2, target: node1.nodeInfo.ID, receiver: node1},
  1374  	} {
  1375  		t.Run(tst.label, func(t *testing.T) {
  1376  			stub, err := tst.sender.c.Remote(channel, tst.target)
  1377  			assert.NoError(t, err)
  1378  
  1379  			stream := assertEventualEstablishStream(t, stub)
  1380  
  1381  			var wg sync.WaitGroup
  1382  			wg.Add(1)
  1383  			tst.receiver.handler.On("OnSubmit", channel, tst.sender.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(args mock.Arguments) {
  1384  				req := args.Get(2).(*orderer.SubmitRequest)
  1385  				assert.True(t, proto.Equal(req, msgToSend))
  1386  				wg.Done()
  1387  			})
  1388  
  1389  			err = stream.Send(wrapSubmitReq(msgToSend))
  1390  			assert.NoError(t, err)
  1391  
  1392  			wg.Wait()
  1393  		})
  1394  	}
  1395  }
  1396  
  1397  func assertBiDiCommunication(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest) {
  1398  	assertBiDiCommunicationForChannel(t, node1, node2, msgToSend, testChannel)
  1399  }
  1400  
  1401  func assertEventualEstablishStream(t *testing.T, rpc *cluster.RemoteContext) *cluster.Stream {
  1402  	var res *cluster.Stream
  1403  	gt := gomega.NewGomegaWithT(t)
  1404  	gt.Eventually(func() error {
  1405  		stream, err := rpc.NewStream(time.Hour)
  1406  		res = stream
  1407  		return err
  1408  	}, timeout).Should(gomega.Succeed())
  1409  	return res
  1410  }
  1411  
  1412  func assertEventualSendMessage(t *testing.T, rpc *cluster.RemoteContext, req *orderer.SubmitRequest) orderer.Cluster_StepClient {
  1413  	var res orderer.Cluster_StepClient
  1414  	gt := gomega.NewGomegaWithT(t)
  1415  	gt.Eventually(func() error {
  1416  		stream, err := rpc.NewStream(time.Hour)
  1417  		if err != nil {
  1418  			return err
  1419  		}
  1420  		res = stream
  1421  		return stream.Send(wrapSubmitReq(req))
  1422  	}, timeout).Should(gomega.Succeed())
  1423  	return res
  1424  }
  1425  
  1426  func wrapSubmitReq(req *orderer.SubmitRequest) *orderer.StepRequest {
  1427  	return &orderer.StepRequest{
  1428  		Payload: &orderer.StepRequest_SubmitRequest{
  1429  			SubmitRequest: req,
  1430  		},
  1431  	}
  1432  }