github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/common/cluster/comm_test.go (about)

     1  /*
     2  Copyright hechain. 2017 All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package cluster_test
     8  
     9  import (
    10  	"context"
    11  	"crypto/rand"
    12  	"crypto/x509"
    13  	"fmt"
    14  	"net"
    15  	"strings"
    16  	"sync"
    17  	"sync/atomic"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/golang/protobuf/proto"
    22  	"github.com/hechain20/hechain/common/crypto"
    23  	"github.com/hechain20/hechain/common/crypto/tlsgen"
    24  	"github.com/hechain20/hechain/common/flogging"
    25  	"github.com/hechain20/hechain/common/metrics"
    26  	"github.com/hechain20/hechain/common/metrics/disabled"
    27  	"github.com/hechain20/hechain/common/metrics/metricsfakes"
    28  	comm_utils "github.com/hechain20/hechain/internal/pkg/comm"
    29  	"github.com/hechain20/hechain/orderer/common/cluster"
    30  	"github.com/hechain20/hechain/orderer/common/cluster/mocks"
    31  	"github.com/hyperledger/fabric-protos-go/common"
    32  	"github.com/hyperledger/fabric-protos-go/orderer"
    33  	"github.com/onsi/gomega"
    34  	"github.com/pkg/errors"
    35  	"github.com/stretchr/testify/mock"
    36  	"github.com/stretchr/testify/require"
    37  	"go.uber.org/zap"
    38  	"go.uber.org/zap/zapcore"
    39  	"google.golang.org/grpc"
    40  )
    41  
    42  const (
    43  	testChannel  = "test"
    44  	testChannel2 = "test2"
    45  	timeout      = time.Second * 10
    46  )
    47  
    48  var (
    49  	// CA that generates TLS key-pairs.
    50  	// We use only one CA because the authentication
    51  	// is based on TLS pinning
    52  	ca = createCAOrPanic()
    53  
    54  	lastNodeID uint64
    55  
    56  	testSubReq = &orderer.SubmitRequest{
    57  		Channel: "test",
    58  	}
    59  
    60  	testReq = &orderer.SubmitRequest{
    61  		Channel: "test",
    62  		Payload: &common.Envelope{
    63  			Payload: []byte("test"),
    64  		},
    65  	}
    66  
    67  	testReq2 = &orderer.SubmitRequest{
    68  		Channel: testChannel2,
    69  		Payload: &common.Envelope{
    70  			Payload: []byte(testChannel2),
    71  		},
    72  	}
    73  
    74  	testRes = &orderer.SubmitResponse{
    75  		Info: "test",
    76  	}
    77  
    78  	fooReq = wrapSubmitReq(&orderer.SubmitRequest{
    79  		Channel: "foo",
    80  	})
    81  
    82  	barReq = wrapSubmitReq(&orderer.SubmitRequest{
    83  		Channel: "bar",
    84  	})
    85  
    86  	testConsensusReq = &orderer.StepRequest{
    87  		Payload: &orderer.StepRequest_ConsensusRequest{
    88  			ConsensusRequest: &orderer.ConsensusRequest{
    89  				Payload: []byte{1, 2, 3},
    90  				Channel: testChannel,
    91  			},
    92  		},
    93  	}
    94  
    95  	channelExtractor = &mockChannelExtractor{}
    96  )
    97  
    98  func nextUnusedID() uint64 {
    99  	return atomic.AddUint64(&lastNodeID, 1)
   100  }
   101  
   102  func createCAOrPanic() tlsgen.CA {
   103  	ca, err := tlsgen.NewCA()
   104  	if err != nil {
   105  		panic(fmt.Sprintf("failed creating CA: %+v", err))
   106  	}
   107  	return ca
   108  }
   109  
   110  type mockChannelExtractor struct{}
   111  
   112  func (*mockChannelExtractor) TargetChannel(msg proto.Message) string {
   113  	switch req := msg.(type) {
   114  	case *orderer.ConsensusRequest:
   115  		return req.Channel
   116  	case *orderer.SubmitRequest:
   117  		return req.Channel
   118  	default:
   119  		return ""
   120  	}
   121  }
   122  
   123  type clusterNode struct {
   124  	lock         sync.Mutex
   125  	frozen       bool
   126  	freezeCond   sync.Cond
   127  	dialer       *cluster.PredicateDialer
   128  	handler      *mocks.Handler
   129  	nodeInfo     cluster.RemoteNode
   130  	srv          *comm_utils.GRPCServer
   131  	bindAddress  string
   132  	clientConfig comm_utils.ClientConfig
   133  	serverConfig comm_utils.ServerConfig
   134  	c            *cluster.Comm
   135  }
   136  
   137  func (cn *clusterNode) Step(stream orderer.Cluster_StepServer) error {
   138  	cn.waitIfFrozen()
   139  	req, err := stream.Recv()
   140  	if err != nil {
   141  		return err
   142  	}
   143  	if submitReq := req.GetSubmitRequest(); submitReq != nil {
   144  		return cn.c.DispatchSubmit(stream.Context(), submitReq)
   145  	}
   146  	if err := cn.c.DispatchConsensus(stream.Context(), req.GetConsensusRequest()); err != nil {
   147  		return err
   148  	}
   149  	return stream.Send(&orderer.StepResponse{})
   150  }
   151  
   152  func (cn *clusterNode) waitIfFrozen() {
   153  	cn.lock.Lock()
   154  	// There is no freeze after an unfreeze so no need
   155  	// for a for loop.
   156  	if cn.frozen {
   157  		cn.freezeCond.Wait()
   158  		return
   159  	}
   160  	cn.lock.Unlock()
   161  }
   162  
   163  func (cn *clusterNode) freeze() {
   164  	cn.lock.Lock()
   165  	defer cn.lock.Unlock()
   166  	cn.frozen = true
   167  }
   168  
   169  func (cn *clusterNode) unfreeze() {
   170  	cn.lock.Lock()
   171  	cn.frozen = false
   172  	cn.lock.Unlock()
   173  	cn.freezeCond.Broadcast()
   174  }
   175  
   176  func (cn *clusterNode) resurrect() {
   177  	gRPCServer, err := comm_utils.NewGRPCServer(cn.bindAddress, cn.serverConfig)
   178  	if err != nil {
   179  		panic(fmt.Errorf("failed starting gRPC server: %v", err))
   180  	}
   181  	cn.srv = gRPCServer
   182  	orderer.RegisterClusterServer(gRPCServer.Server(), cn)
   183  	go cn.srv.Start()
   184  }
   185  
   186  func (cn *clusterNode) stop() {
   187  	cn.srv.Stop()
   188  	cn.c.Shutdown()
   189  }
   190  
   191  func (cn *clusterNode) renewCertificates() {
   192  	clientKeyPair, err := ca.NewClientCertKeyPair()
   193  	if err != nil {
   194  		panic(fmt.Errorf("failed creating client certificate %v", err))
   195  	}
   196  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   197  	if err != nil {
   198  		panic(fmt.Errorf("failed creating server certificate %v", err))
   199  	}
   200  
   201  	cn.nodeInfo.ClientTLSCert = clientKeyPair.TLSCert.Raw
   202  	cn.nodeInfo.ServerTLSCert = serverKeyPair.TLSCert.Raw
   203  
   204  	cn.serverConfig.SecOpts.Certificate = serverKeyPair.Cert
   205  	cn.serverConfig.SecOpts.Key = serverKeyPair.Key
   206  
   207  	cn.dialer.Config.SecOpts.Key = clientKeyPair.Key
   208  	cn.dialer.Config.SecOpts.Certificate = clientKeyPair.Cert
   209  }
   210  
   211  func newTestNodeWithMetrics(t *testing.T, metrics cluster.MetricsProvider, tlsConnGauge metrics.Gauge) *clusterNode {
   212  	serverKeyPair, err := ca.NewServerCertKeyPair("127.0.0.1")
   213  	require.NoError(t, err)
   214  
   215  	clientKeyPair, _ := ca.NewClientCertKeyPair()
   216  
   217  	handler := &mocks.Handler{}
   218  	clientConfig := comm_utils.ClientConfig{
   219  		AsyncConnect: true,
   220  		DialTimeout:  time.Hour,
   221  		SecOpts: comm_utils.SecureOptions{
   222  			RequireClientCert: true,
   223  			Key:               clientKeyPair.Key,
   224  			Certificate:       clientKeyPair.Cert,
   225  			ServerRootCAs:     [][]byte{ca.CertBytes()},
   226  			UseTLS:            true,
   227  			ClientRootCAs:     [][]byte{ca.CertBytes()},
   228  		},
   229  	}
   230  
   231  	dialer := &cluster.PredicateDialer{
   232  		Config: clientConfig,
   233  	}
   234  
   235  	srvConfig := comm_utils.ServerConfig{
   236  		SecOpts: comm_utils.SecureOptions{
   237  			Key:         serverKeyPair.Key,
   238  			Certificate: serverKeyPair.Cert,
   239  			UseTLS:      true,
   240  		},
   241  	}
   242  	gRPCServer, err := comm_utils.NewGRPCServer("127.0.0.1:", srvConfig)
   243  	require.NoError(t, err)
   244  
   245  	tstSrv := &clusterNode{
   246  		dialer:       dialer,
   247  		clientConfig: clientConfig,
   248  		serverConfig: srvConfig,
   249  		bindAddress:  gRPCServer.Address(),
   250  		handler:      handler,
   251  		nodeInfo: cluster.RemoteNode{
   252  			Endpoint:      gRPCServer.Address(),
   253  			ID:            nextUnusedID(),
   254  			ServerTLSCert: serverKeyPair.TLSCert.Raw,
   255  			ClientTLSCert: clientKeyPair.TLSCert.Raw,
   256  		},
   257  		srv: gRPCServer,
   258  	}
   259  
   260  	tstSrv.freezeCond.L = &tstSrv.lock
   261  
   262  	compareCert := cluster.CachePublicKeyComparisons(func(a, b []byte) bool {
   263  		return crypto.CertificatesWithSamePublicKey(a, b) == nil
   264  	})
   265  
   266  	tstSrv.c = &cluster.Comm{
   267  		CertExpWarningThreshold: time.Hour,
   268  		SendBufferSize:          1,
   269  		Logger:                  flogging.MustGetLogger("test"),
   270  		Chan2Members:            make(cluster.MembersByChannel),
   271  		H:                       handler,
   272  		ChanExt:                 channelExtractor,
   273  		Connections:             cluster.NewConnectionStore(dialer, tlsConnGauge),
   274  		Metrics:                 cluster.NewMetrics(metrics),
   275  		CompareCertificate:      compareCert,
   276  	}
   277  
   278  	orderer.RegisterClusterServer(gRPCServer.Server(), tstSrv)
   279  	go gRPCServer.Start()
   280  	return tstSrv
   281  }
   282  
   283  func newTestNode(t *testing.T) *clusterNode {
   284  	return newTestNodeWithMetrics(t, &disabled.Provider{}, &disabled.Gauge{})
   285  }
   286  
   287  func TestSendBigMessage(t *testing.T) {
   288  	// Scenario: Basic test that spawns 5 nodes and sends a big message
   289  	// from one of the nodes to the others.
   290  	// A receiver node's Step() server side method (which calls Recv)
   291  	// is frozen until the sender's node Send method returns,
   292  	// Hence - the sender node finishes calling Send
   293  	// before a receiver node starts calling Recv.
   294  	// This ensures that Send is non blocking even with big messages.
   295  	// In the test, we send a total of 8MB of random data (2MB to each node).
   296  	// The randomness is used so gRPC compression won't compress it to a lower size.
   297  
   298  	node1 := newTestNode(t)
   299  	node2 := newTestNode(t)
   300  	node3 := newTestNode(t)
   301  	node4 := newTestNode(t)
   302  	node5 := newTestNode(t)
   303  
   304  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   305  		node.c.SendBufferSize = 1
   306  	}
   307  
   308  	defer node1.stop()
   309  	defer node2.stop()
   310  	defer node3.stop()
   311  	defer node4.stop()
   312  	defer node5.stop()
   313  
   314  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo, node3.nodeInfo, node4.nodeInfo, node5.nodeInfo}
   315  	node1.c.Configure(testChannel, config)
   316  	node2.c.Configure(testChannel, config)
   317  	node3.c.Configure(testChannel, config)
   318  	node4.c.Configure(testChannel, config)
   319  	node5.c.Configure(testChannel, config)
   320  
   321  	var messageReceived sync.WaitGroup
   322  	messageReceived.Add(4)
   323  
   324  	msgSize := 1024 * 1024 * 2
   325  	bigMsg := &orderer.ConsensusRequest{
   326  		Channel: testChannel,
   327  		Payload: make([]byte, msgSize),
   328  	}
   329  
   330  	_, err := rand.Read(bigMsg.Payload)
   331  	require.NoError(t, err)
   332  
   333  	wrappedMsg := &orderer.StepRequest{
   334  		Payload: &orderer.StepRequest_ConsensusRequest{
   335  			ConsensusRequest: bigMsg,
   336  		},
   337  	}
   338  
   339  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   340  		node.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
   341  			msg := args.Get(2).(*orderer.ConsensusRequest)
   342  			require.Len(t, msg.Payload, msgSize)
   343  			messageReceived.Done()
   344  		}).Return(nil)
   345  	}
   346  
   347  	streams := map[uint64]*cluster.Stream{}
   348  
   349  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   350  		// Freeze the node, in order to block its Recv
   351  		node.freeze()
   352  	}
   353  
   354  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   355  		rm, err := node1.c.Remote(testChannel, node.nodeInfo.ID)
   356  		require.NoError(t, err)
   357  
   358  		stream := assertEventualEstablishStream(t, rm)
   359  		streams[node.nodeInfo.ID] = stream
   360  	}
   361  
   362  	t0 := time.Now()
   363  	for _, node := range []*clusterNode{node2, node3, node4, node5} {
   364  		stream := streams[node.nodeInfo.ID]
   365  
   366  		t1 := time.Now()
   367  		err = stream.Send(wrappedMsg)
   368  		require.NoError(t, err)
   369  		t.Log("Sending took", time.Since(t1))
   370  
   371  		// Unfreeze the node. It can now call Recv, and signal the messageReceived waitGroup.
   372  		node.unfreeze()
   373  	}
   374  
   375  	t.Log("Total sending time to all 4 nodes took:", time.Since(t0))
   376  
   377  	messageReceived.Wait()
   378  }
   379  
   380  func TestBlockingSend(t *testing.T) {
   381  	// Scenario: Basic test that spawns 2 nodes and sends from the first node
   382  	// to the second node, three SubmitRequests, or three consensus requests.
   383  	// SubmitRequests should block, but consensus requests should not.
   384  
   385  	for _, testCase := range []struct {
   386  		description        string
   387  		messageToSend      *orderer.StepRequest
   388  		streamUnblocks     bool
   389  		elapsedGreaterThan time.Duration
   390  		overflowErr        string
   391  	}{
   392  		{
   393  			description:        "SubmitRequest",
   394  			messageToSend:      wrapSubmitReq(testReq),
   395  			streamUnblocks:     true,
   396  			elapsedGreaterThan: time.Second / 2,
   397  		},
   398  		{
   399  			description:   "ConsensusRequest",
   400  			messageToSend: testConsensusReq,
   401  			overflowErr:   "send queue overflown",
   402  		},
   403  	} {
   404  		t.Run(testCase.description, func(t *testing.T) {
   405  			node1 := newTestNode(t)
   406  			node2 := newTestNode(t)
   407  
   408  			node1.c.SendBufferSize = 1
   409  			node2.c.SendBufferSize = 1
   410  
   411  			defer node1.stop()
   412  			defer node2.stop()
   413  
   414  			config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   415  			node1.c.Configure(testChannel, config)
   416  			node2.c.Configure(testChannel, config)
   417  
   418  			rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   419  			require.NoError(t, err)
   420  
   421  			client := &mocks.ClusterClient{}
   422  			fakeStream := &mocks.StepClient{}
   423  
   424  			// Replace real client with a mock client
   425  			rm.Client = client
   426  			rm.ProbeConn = func(_ *grpc.ClientConn) error {
   427  				return nil
   428  			}
   429  			// Configure client to return the mock stream
   430  			fakeStream.On("Context", mock.Anything).Return(context.Background())
   431  			client.On("Step", mock.Anything).Return(fakeStream, nil).Once()
   432  
   433  			unBlock := make(chan struct{})
   434  			var sendInvoked sync.WaitGroup
   435  			sendInvoked.Add(1)
   436  			var once sync.Once
   437  			fakeStream.On("Send", mock.Anything).Run(func(_ mock.Arguments) {
   438  				once.Do(sendInvoked.Done)
   439  				<-unBlock
   440  			}).Return(errors.New("oops"))
   441  
   442  			stream, err := rm.NewStream(time.Hour)
   443  			require.NoError(t, err)
   444  
   445  			// The first send doesn't block, even though the Send operation blocks.
   446  			err = stream.Send(testCase.messageToSend)
   447  			require.NoError(t, err)
   448  
   449  			// The second once doesn't either.
   450  			// After this point, we have 1 goroutine which is blocked on Send(),
   451  			// and one message in the buffer.
   452  			sendInvoked.Wait()
   453  			err = stream.Send(testCase.messageToSend)
   454  			require.NoError(t, err)
   455  
   456  			// The third blocks, so we need to unblock it ourselves
   457  			// in order for it to go through, unless the operation
   458  			// is non blocking.
   459  			go func() {
   460  				time.Sleep(time.Second)
   461  				if testCase.streamUnblocks {
   462  					close(unBlock)
   463  				}
   464  			}()
   465  
   466  			t1 := time.Now()
   467  			err = stream.Send(testCase.messageToSend)
   468  			// The third send always overflows or blocks.
   469  			// If we expect to receive an overflow error - assert it.
   470  			if testCase.overflowErr != "" {
   471  				require.EqualError(t, err, testCase.overflowErr)
   472  			}
   473  			elapsed := time.Since(t1)
   474  			t.Log("Elapsed time:", elapsed)
   475  			require.True(t, elapsed > testCase.elapsedGreaterThan)
   476  
   477  			if !testCase.streamUnblocks {
   478  				close(unBlock)
   479  			}
   480  		})
   481  	}
   482  }
   483  
   484  func TestBasic(t *testing.T) {
   485  	// Scenario: Basic test that spawns 2 nodes and sends each other
   486  	// messages that are expected to be echoed back
   487  
   488  	node1 := newTestNode(t)
   489  	node2 := newTestNode(t)
   490  
   491  	defer node1.stop()
   492  	defer node2.stop()
   493  
   494  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   495  	node1.c.Configure(testChannel, config)
   496  	node2.c.Configure(testChannel, config)
   497  
   498  	assertBiDiCommunication(t, node1, node2, testReq)
   499  }
   500  
   501  func TestUnavailableHosts(t *testing.T) {
   502  	// Scenario: A node is configured to connect
   503  	// to a host that is down
   504  	node1 := newTestNode(t)
   505  
   506  	clientConfig := node1.dialer.Config
   507  	// The below timeout makes sure that connection establishment is done
   508  	// asynchronously. Had it been synchronous, the Remote() call would be
   509  	// blocked for an hour.
   510  	clientConfig.DialTimeout = time.Hour
   511  	defer node1.stop()
   512  
   513  	node2 := newTestNode(t)
   514  	node2.stop()
   515  
   516  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   517  	remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   518  	require.NoError(t, err)
   519  	require.NotNil(t, remote)
   520  
   521  	_, err = remote.NewStream(time.Millisecond * 100)
   522  	require.Contains(t, err.Error(), "connection")
   523  }
   524  
   525  func TestStreamAbortReportCorrectError(t *testing.T) {
   526  	// Scenario: node 1 acquires a stream to node 2 and then the stream
   527  	// encounters an error and as a result, the stream is aborted.
   528  	// We ensure the error reported is the first error, even after
   529  	// multiple attempts of using it.
   530  
   531  	node1 := newTestNode(t)
   532  	defer node1.stop()
   533  
   534  	node2 := newTestNode(t)
   535  	defer node2.stop()
   536  
   537  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   538  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   539  
   540  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(errors.Errorf("whoops")).Once()
   541  
   542  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   543  	require.NoError(t, err)
   544  	var streamTerminated sync.WaitGroup
   545  	streamTerminated.Add(1)
   546  
   547  	stream := assertEventualEstablishStream(t, rm1)
   548  
   549  	l, err := zap.NewDevelopment()
   550  	require.NoError(t, err)
   551  	stream.Logger = flogging.NewFabricLogger(l, zap.Hooks(func(entry zapcore.Entry) error {
   552  		if strings.Contains(entry.Message, "Stream 1 to") && strings.Contains(entry.Message, "terminated") {
   553  			streamTerminated.Done()
   554  		}
   555  		return nil
   556  	}))
   557  
   558  	// Probe the stream for the first time
   559  	err = stream.Send(wrapSubmitReq(testReq))
   560  	require.NoError(t, err)
   561  
   562  	// We should receive back the crafted error
   563  	_, err = stream.Recv()
   564  	require.Contains(t, err.Error(), "whoops")
   565  
   566  	// Wait for the stream to be terminated from within the communication infrastructure
   567  	streamTerminated.Wait()
   568  
   569  	// We should still receive the original crafted error despite the stream being terminated
   570  	err = stream.Send(wrapSubmitReq(testReq))
   571  	require.Contains(t, err.Error(), "whoops")
   572  }
   573  
   574  func TestStreamAbort(t *testing.T) {
   575  	// Scenarios: node 1 is connected to node 2 in 2 channels,
   576  	// and the consumer of the communication calls receive.
   577  	// The two sub-scenarios happen:
   578  	// 1) The server certificate of node 2 changes in the first channel
   579  	// 2) Node 2 is evicted from the membership of the first channel
   580  	// In both of the scenarios, the Recv() call should be aborted
   581  
   582  	node2 := newTestNode(t)
   583  	defer node2.stop()
   584  
   585  	invalidNodeInfo := cluster.RemoteNode{
   586  		ID:            node2.nodeInfo.ID,
   587  		ServerTLSCert: []byte{1, 2, 3},
   588  		ClientTLSCert: []byte{1, 2, 3},
   589  	}
   590  
   591  	for _, tst := range []struct {
   592  		testName      string
   593  		membership    []cluster.RemoteNode
   594  		expectedError string
   595  	}{
   596  		{
   597  			testName:      "Evicted from membership",
   598  			membership:    nil,
   599  			expectedError: "rpc error: code = Canceled desc = context canceled",
   600  		},
   601  		{
   602  			testName:      "Changed TLS certificate",
   603  			membership:    []cluster.RemoteNode{invalidNodeInfo},
   604  			expectedError: "rpc error: code = Canceled desc = context canceled",
   605  		},
   606  	} {
   607  		t.Run(tst.testName, func(t *testing.T) {
   608  			testStreamAbort(t, node2, tst.membership, tst.expectedError)
   609  		})
   610  	}
   611  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
   612  }
   613  
   614  func testStreamAbort(t *testing.T, node2 *clusterNode, newMembership []cluster.RemoteNode, expectedError string) {
   615  	node1 := newTestNode(t)
   616  	defer node1.stop()
   617  
   618  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   619  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   620  	node1.c.Configure(testChannel2, []cluster.RemoteNode{node2.nodeInfo})
   621  	node2.c.Configure(testChannel2, []cluster.RemoteNode{node1.nodeInfo})
   622  
   623  	var streamCreated sync.WaitGroup
   624  	streamCreated.Add(1)
   625  
   626  	stopChan := make(chan struct{})
   627  
   628  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Once().Run(func(_ mock.Arguments) {
   629  		// Notify the stream was created
   630  		streamCreated.Done()
   631  		// Wait for the test to finish
   632  		<-stopChan
   633  	}).Return(nil).Once()
   634  
   635  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   636  	require.NoError(t, err)
   637  
   638  	go func() {
   639  		stream := assertEventualEstablishStream(t, rm1)
   640  		// Signal the reconfiguration
   641  		err = stream.Send(wrapSubmitReq(testReq))
   642  		require.NoError(t, err)
   643  		_, err := stream.Recv()
   644  		require.Contains(t, err.Error(), expectedError)
   645  		close(stopChan)
   646  	}()
   647  
   648  	go func() {
   649  		// Wait for the stream reference to be obtained
   650  		streamCreated.Wait()
   651  		// Reconfigure the channel membership
   652  		node1.c.Configure(testChannel, newMembership)
   653  	}()
   654  
   655  	<-stopChan
   656  }
   657  
   658  func TestDoubleReconfigure(t *testing.T) {
   659  	// Scenario: Basic test that spawns 2 nodes
   660  	// and configures node 1 twice, and checks that
   661  	// the remote stub for node 1 wasn't re-created in the second
   662  	// configuration since it already existed
   663  
   664  	node1 := newTestNode(t)
   665  	node2 := newTestNode(t)
   666  
   667  	defer node1.stop()
   668  	defer node2.stop()
   669  
   670  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   671  	rm1, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   672  	require.NoError(t, err)
   673  
   674  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
   675  	rm2, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   676  	require.NoError(t, err)
   677  	// Ensure the references are equal
   678  	require.True(t, rm1 == rm2)
   679  }
   680  
   681  func TestInvalidChannel(t *testing.T) {
   682  	// Scenario: node 1 it ordered to send a message on a channel
   683  	// that doesn't exist, and also receives a message, but
   684  	// the channel cannot be extracted from the message.
   685  
   686  	t.Run("channel doesn't exist", func(t *testing.T) {
   687  		node1 := newTestNode(t)
   688  		defer node1.stop()
   689  
   690  		_, err := node1.c.Remote(testChannel, 0)
   691  		require.EqualError(t, err, "channel test doesn't exist")
   692  	})
   693  
   694  	t.Run("channel cannot be extracted", func(t *testing.T) {
   695  		node1 := newTestNode(t)
   696  		defer node1.stop()
   697  
   698  		node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   699  		gt := gomega.NewGomegaWithT(t)
   700  		gt.Eventually(func() (bool, error) {
   701  			_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   702  			return true, err
   703  		}, time.Minute).Should(gomega.BeTrue())
   704  
   705  		stub, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
   706  		require.NoError(t, err)
   707  
   708  		stream := assertEventualEstablishStream(t, stub)
   709  
   710  		// An empty SubmitRequest has an empty channel which is invalid
   711  		err = stream.Send(wrapSubmitReq(&orderer.SubmitRequest{}))
   712  		require.NoError(t, err)
   713  
   714  		_, err = stream.Recv()
   715  		require.EqualError(t, err, "rpc error: code = Unknown desc = badly formatted message, cannot extract channel")
   716  
   717  		// Test directly without going through the gRPC stream
   718  		err = node1.c.DispatchSubmit(context.Background(), &orderer.SubmitRequest{})
   719  		require.EqualError(t, err, "badly formatted message, cannot extract channel")
   720  	})
   721  }
   722  
   723  func TestAbortRPC(t *testing.T) {
   724  	// Scenarios:
   725  	// (I) The node calls an RPC, and calls Abort() on the remote context
   726  	//  in parallel. The RPC should return even though the server-side call hasn't finished.
   727  	// (II) The node calls an RPC, but the server-side processing takes too long,
   728  	// and the RPC invocation returns prematurely.
   729  
   730  	testCases := []struct {
   731  		name        string
   732  		abortFunc   func(*cluster.RemoteContext)
   733  		rpcTimeout  time.Duration
   734  		expectedErr string
   735  	}{
   736  		{
   737  			name:        "Abort() called",
   738  			expectedErr: "rpc error: code = Canceled desc = context canceled",
   739  			rpcTimeout:  time.Hour,
   740  			abortFunc: func(rc *cluster.RemoteContext) {
   741  				rc.Abort()
   742  			},
   743  		},
   744  		{
   745  			name:        "RPC timeout",
   746  			expectedErr: "rpc timeout expired",
   747  			rpcTimeout:  time.Second,
   748  			abortFunc:   func(*cluster.RemoteContext) {},
   749  		},
   750  	}
   751  
   752  	for _, testCase := range testCases {
   753  		testCase := testCase
   754  		t.Run(testCase.name, func(t *testing.T) {
   755  			testAbort(t, testCase.abortFunc, testCase.rpcTimeout, testCase.expectedErr)
   756  		})
   757  	}
   758  }
   759  
   760  func testAbort(t *testing.T, abortFunc func(*cluster.RemoteContext), rpcTimeout time.Duration, expectedErr string) {
   761  	node1 := newTestNode(t)
   762  	defer node1.stop()
   763  
   764  	node2 := newTestNode(t)
   765  	defer node2.stop()
   766  
   767  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   768  	node1.c.Configure(testChannel, config)
   769  	node2.c.Configure(testChannel, config)
   770  	var onStepCalled sync.WaitGroup
   771  	onStepCalled.Add(1)
   772  
   773  	// stuckCall ensures the OnStep() call is stuck throughout this test
   774  	var stuckCall sync.WaitGroup
   775  	stuckCall.Add(1)
   776  	// At the end of the test, release the server-side resources
   777  	defer stuckCall.Done()
   778  
   779  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(_ mock.Arguments) {
   780  		onStepCalled.Done()
   781  		stuckCall.Wait()
   782  	}).Once()
   783  
   784  	rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   785  	require.NoError(t, err)
   786  
   787  	go func() {
   788  		onStepCalled.Wait()
   789  		abortFunc(rm)
   790  	}()
   791  
   792  	var stream *cluster.Stream
   793  	gt := gomega.NewGomegaWithT(t)
   794  	gt.Eventually(func() error {
   795  		stream, err = rm.NewStream(rpcTimeout)
   796  		return err
   797  	}, time.Second*10, time.Millisecond*10).Should(gomega.Succeed())
   798  
   799  	stream.Send(wrapSubmitReq(testSubReq))
   800  	_, err = stream.Recv()
   801  
   802  	require.EqualError(t, err, expectedErr)
   803  
   804  	node2.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
   805  }
   806  
   807  func TestNoTLSCertificate(t *testing.T) {
   808  	// Scenario: The node is sent a message by another node that doesn't
   809  	// connect with mutual TLS, thus doesn't provide a TLS certificate
   810  	node1 := newTestNode(t)
   811  	defer node1.stop()
   812  
   813  	node1.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   814  
   815  	clientConfig := comm_utils.ClientConfig{
   816  		AsyncConnect: true,
   817  		DialTimeout:  time.Millisecond * 100,
   818  		SecOpts: comm_utils.SecureOptions{
   819  			ServerRootCAs: [][]byte{ca.CertBytes()},
   820  			UseTLS:        true,
   821  		},
   822  	}
   823  
   824  	var conn *grpc.ClientConn
   825  	gt := gomega.NewGomegaWithT(t)
   826  	gt.Eventually(func() (bool, error) {
   827  		var err error
   828  		conn, err = clientConfig.Dial(node1.srv.Address())
   829  		return true, err
   830  	}, time.Minute).Should(gomega.BeTrue())
   831  
   832  	echoClient := orderer.NewClusterClient(conn)
   833  	stream, err := echoClient.Step(context.Background())
   834  	require.NoError(t, err)
   835  
   836  	err = stream.Send(wrapSubmitReq(testSubReq))
   837  	require.NoError(t, err)
   838  	_, err = stream.Recv()
   839  	require.EqualError(t, err, "rpc error: code = Unknown desc = no TLS certificate sent")
   840  }
   841  
   842  func TestReconnect(t *testing.T) {
   843  	// Scenario: node 1 and node 2 are connected,
   844  	// and node 2 is taken offline.
   845  	// Node 1 tries to send a message to node 2 but fails,
   846  	// and afterwards node 2 is brought back, after which
   847  	// node 1 sends more messages, and it should succeed
   848  	// sending a message to node 2 eventually.
   849  
   850  	node1 := newTestNode(t)
   851  	defer node1.stop()
   852  	conf := node1.dialer.Config
   853  	conf.DialTimeout = time.Hour
   854  
   855  	node2 := newTestNode(t)
   856  	node2.handler.On("OnSubmit", testChannel, node1.nodeInfo.ID, mock.Anything).Return(nil)
   857  	defer node2.stop()
   858  
   859  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   860  	node1.c.Configure(testChannel, config)
   861  	node2.c.Configure(testChannel, config)
   862  
   863  	// Make node 2 be offline by shutting down its gRPC service
   864  	node2.srv.Stop()
   865  	// Obtain the stub for node 2.
   866  	// Should succeed, because the connection was created at time of configuration
   867  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   868  	require.NoError(t, err)
   869  
   870  	// Try to obtain a stream. Should not Succeed.
   871  	gt := gomega.NewGomegaWithT(t)
   872  	gt.Eventually(func() error {
   873  		_, err = stub.NewStream(time.Hour)
   874  		return err
   875  	}).Should(gomega.Not(gomega.Succeed()))
   876  
   877  	// Wait for the port to be released
   878  	for {
   879  		lsnr, err := net.Listen("tcp", node2.nodeInfo.Endpoint)
   880  		if err == nil {
   881  			lsnr.Close()
   882  			break
   883  		}
   884  	}
   885  
   886  	// Resurrect node 2
   887  	node2.resurrect()
   888  	// Send a message from node 1 to node 2.
   889  	// Should succeed eventually
   890  	assertEventualSendMessage(t, stub, testReq)
   891  }
   892  
   893  func TestRenewCertificates(t *testing.T) {
   894  	// Scenario: node 1 and node 2 are connected,
   895  	// Node 2's certificate is renewed, and
   896  	// node 1 is reconfigured with the new
   897  	// configuration without being restarted.
   898  
   899  	node1 := newTestNode(t)
   900  	defer node1.stop()
   901  
   902  	node2 := newTestNode(t)
   903  	defer node2.stop()
   904  
   905  	node1.handler.On("OnStep", testChannel, node2.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   906  	node2.handler.On("OnStep", testChannel, node1.nodeInfo.ID, mock.Anything).Return(testRes, nil)
   907  
   908  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   909  	node1.c.Configure(testChannel, config)
   910  	node2.c.Configure(testChannel, config)
   911  
   912  	assertBiDiCommunication(t, node1, node2, testReq)
   913  
   914  	// Close outgoing connections from node2 to node1
   915  	node2.c.Configure(testChannel, nil)
   916  	// Stop the gRPC service of node 2 to replace its certificate
   917  	node2.srv.Stop()
   918  
   919  	// Wait until node 1 detects this
   920  	gt := gomega.NewGomegaWithT(t)
   921  	gt.Eventually(func() error {
   922  		remote, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   923  		if err != nil {
   924  			return err
   925  		}
   926  		stream, err := remote.NewStream(time.Hour)
   927  		if err != nil {
   928  			return err
   929  		}
   930  		err = stream.Send(wrapSubmitReq(testSubReq))
   931  		if err != nil {
   932  			return err
   933  		}
   934  		return nil
   935  	}).Should(gomega.Not(gomega.Succeed()))
   936  
   937  	// Renew node 2's keys
   938  	node2.renewCertificates()
   939  
   940  	// Resurrect node 2 to make it service connections again
   941  	node2.resurrect()
   942  
   943  	// W.L.O.G, try to send a message from node1 to node2
   944  	// It should fail, because node2's server certificate has now changed,
   945  	// so it closed the connection to the remote node
   946  	info2 := node2.nodeInfo
   947  	remote, err := node1.c.Remote(testChannel, info2.ID)
   948  	require.NoError(t, err)
   949  	require.NotNil(t, remote)
   950  	_, err = remote.NewStream(time.Hour)
   951  	require.Contains(t, err.Error(), info2.Endpoint)
   952  
   953  	// Reconfigure both nodes with the updates keys
   954  	config = []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
   955  	node1.c.Configure(testChannel, config)
   956  	node2.c.Configure(testChannel, config)
   957  
   958  	// Finally, check that the nodes can communicate once again
   959  	assertBiDiCommunication(t, node1, node2, testReq)
   960  }
   961  
   962  func TestMembershipReconfiguration(t *testing.T) {
   963  	// Scenario: node 1 and node 2 are started up
   964  	// and node 2 is configured to know about node 1,
   965  	// without node1 knowing about node 2.
   966  	// The communication between them should only work
   967  	// after node 1 is configured to know about node 2.
   968  
   969  	node1 := newTestNode(t)
   970  	defer node1.stop()
   971  
   972  	node2 := newTestNode(t)
   973  	defer node2.stop()
   974  
   975  	node1.c.Configure(testChannel, []cluster.RemoteNode{})
   976  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
   977  
   978  	// Node 1 can't connect to node 2 because it doesn't know its TLS certificate yet
   979  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
   980  	require.EqualError(t, err, fmt.Sprintf("node %d doesn't exist in channel test's membership", node2.nodeInfo.ID))
   981  	// Node 2 can connect to node 1, but it can't send it messages because node 1 doesn't know node 2 yet.
   982  
   983  	gt := gomega.NewGomegaWithT(t)
   984  	gt.Eventually(func() (bool, error) {
   985  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   986  		return true, err
   987  	}, time.Minute).Should(gomega.BeTrue())
   988  
   989  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
   990  	require.NoError(t, err)
   991  
   992  	stream := assertEventualEstablishStream(t, stub)
   993  	err = stream.Send(wrapSubmitReq(testSubReq))
   994  	require.NoError(t, err)
   995  
   996  	_, err = stream.Recv()
   997  	require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
   998  
   999  	// Next, configure node 1 to know about node 2
  1000  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
  1001  
  1002  	// Check that the communication works correctly between both nodes
  1003  	assertBiDiCommunication(t, node1, node2, testReq)
  1004  	assertBiDiCommunication(t, node2, node1, testReq)
  1005  
  1006  	// Reconfigure node 2 to forget about node 1
  1007  	node2.c.Configure(testChannel, []cluster.RemoteNode{})
  1008  	// Node 1 can still connect to node 2
  1009  	stub, err = node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1010  	require.NoError(t, err)
  1011  	// But can't send a message because node 2 now doesn't authorized node 1
  1012  	stream = assertEventualEstablishStream(t, stub)
  1013  	stream.Send(wrapSubmitReq(testSubReq))
  1014  	_, err = stream.Recv()
  1015  	require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1016  }
  1017  
  1018  func TestShutdown(t *testing.T) {
  1019  	// Scenario: node 1 is shut down and as a result, can't
  1020  	// send messages to anyone, nor can it be reconfigured
  1021  
  1022  	node1 := newTestNode(t)
  1023  	defer node1.stop()
  1024  
  1025  	node1.c.Shutdown()
  1026  
  1027  	// Obtaining a RemoteContext cannot succeed because shutdown was called before
  1028  	_, err := node1.c.Remote(testChannel, node1.nodeInfo.ID)
  1029  	require.EqualError(t, err, "communication has been shut down")
  1030  
  1031  	node2 := newTestNode(t)
  1032  	defer node2.stop()
  1033  
  1034  	node2.c.Configure(testChannel, []cluster.RemoteNode{node1.nodeInfo})
  1035  	// Configuration of node doesn't take place
  1036  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
  1037  
  1038  	gt := gomega.NewGomegaWithT(t)
  1039  	gt.Eventually(func() error {
  1040  		_, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
  1041  		return err
  1042  	}, time.Minute).Should(gomega.Succeed())
  1043  
  1044  	stub, err := node2.c.Remote(testChannel, node1.nodeInfo.ID)
  1045  	require.NoError(t, err)
  1046  
  1047  	// Therefore, sending a message doesn't succeed because node 1 rejected the configuration change
  1048  	gt.Eventually(func() string {
  1049  		stream, err := stub.NewStream(time.Hour)
  1050  		if err != nil {
  1051  			return err.Error()
  1052  		}
  1053  		err = stream.Send(wrapSubmitReq(testSubReq))
  1054  		require.NoError(t, err)
  1055  
  1056  		_, err = stream.Recv()
  1057  		return err.Error()
  1058  	}, timeout).Should(gomega.ContainSubstring("channel test doesn't exist"))
  1059  }
  1060  
  1061  func TestMultiChannelConfig(t *testing.T) {
  1062  	// Scenario: node 1 is knows node 2 only in channel "foo"
  1063  	// and knows node 3 only in channel "bar".
  1064  	// Messages that are received, are routed according to their corresponding channels
  1065  	// and when node 2 sends a message for channel "bar" to node 1, it is rejected.
  1066  	// Same thing applies for node 3 that sends a message to node 1 in channel "foo".
  1067  
  1068  	node1 := newTestNode(t)
  1069  	defer node1.stop()
  1070  
  1071  	node2 := newTestNode(t)
  1072  	defer node2.stop()
  1073  
  1074  	node3 := newTestNode(t)
  1075  	defer node3.stop()
  1076  
  1077  	node1.c.Configure("foo", []cluster.RemoteNode{node2.nodeInfo})
  1078  	node1.c.Configure("bar", []cluster.RemoteNode{node3.nodeInfo})
  1079  	node2.c.Configure("foo", []cluster.RemoteNode{node1.nodeInfo})
  1080  	node3.c.Configure("bar", []cluster.RemoteNode{node1.nodeInfo})
  1081  
  1082  	t.Run("Correct channel", func(t *testing.T) {
  1083  		var fromNode2 sync.WaitGroup
  1084  		fromNode2.Add(1)
  1085  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1086  			fromNode2.Done()
  1087  		}).Once()
  1088  
  1089  		var fromNode3 sync.WaitGroup
  1090  		fromNode3.Add(1)
  1091  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil).Run(func(_ mock.Arguments) {
  1092  			fromNode3.Done()
  1093  		}).Once()
  1094  
  1095  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1096  		require.NoError(t, err)
  1097  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1098  		require.NoError(t, err)
  1099  
  1100  		stream := assertEventualEstablishStream(t, node2toNode1)
  1101  		stream.Send(fooReq)
  1102  
  1103  		fromNode2.Wait()
  1104  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 1)
  1105  
  1106  		stream = assertEventualEstablishStream(t, node3toNode1)
  1107  		stream.Send(barReq)
  1108  
  1109  		fromNode3.Wait()
  1110  		node1.handler.AssertNumberOfCalls(t, "OnSubmit", 2)
  1111  	})
  1112  
  1113  	t.Run("Incorrect channel", func(t *testing.T) {
  1114  		node1.handler.On("OnSubmit", "foo", node2.nodeInfo.ID, mock.Anything).Return(nil)
  1115  		node1.handler.On("OnSubmit", "bar", node3.nodeInfo.ID, mock.Anything).Return(nil)
  1116  
  1117  		node2toNode1, err := node2.c.Remote("foo", node1.nodeInfo.ID)
  1118  		require.NoError(t, err)
  1119  		node3toNode1, err := node3.c.Remote("bar", node1.nodeInfo.ID)
  1120  		require.NoError(t, err)
  1121  
  1122  		assertEventualSendMessage(t, node2toNode1, &orderer.SubmitRequest{Channel: "foo"})
  1123  		require.NoError(t, err)
  1124  		stream, err := node2toNode1.NewStream(time.Hour)
  1125  		require.NoError(t, err)
  1126  		err = stream.Send(barReq)
  1127  		require.NoError(t, err)
  1128  		_, err = stream.Recv()
  1129  		require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1130  
  1131  		assertEventualSendMessage(t, node3toNode1, &orderer.SubmitRequest{Channel: "bar"})
  1132  		stream, err = node3toNode1.NewStream(time.Hour)
  1133  		require.NoError(t, err)
  1134  		err = stream.Send(fooReq)
  1135  		require.NoError(t, err)
  1136  		_, err = stream.Recv()
  1137  		require.EqualError(t, err, "rpc error: code = Unknown desc = certificate extracted from TLS connection isn't authorized")
  1138  	})
  1139  }
  1140  
  1141  func TestConnectionFailure(t *testing.T) {
  1142  	// Scenario: node 1 fails to connect to node 2.
  1143  
  1144  	node1 := newTestNode(t)
  1145  	defer node1.stop()
  1146  
  1147  	node2 := newTestNode(t)
  1148  	defer node2.stop()
  1149  
  1150  	dialer := &mocks.SecureDialer{}
  1151  	dialer.On("Dial", mock.Anything, mock.Anything).Return(nil, errors.New("oops"))
  1152  	node1.c.Connections = cluster.NewConnectionStore(dialer, &disabled.Gauge{})
  1153  	node1.c.Configure(testChannel, []cluster.RemoteNode{node2.nodeInfo})
  1154  
  1155  	_, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1156  	require.EqualError(t, err, "oops")
  1157  }
  1158  
  1159  type testMetrics struct {
  1160  	fakeProvider        *mocks.MetricsProvider
  1161  	egressQueueLength   metricsfakes.Gauge
  1162  	egressQueueCapacity metricsfakes.Gauge
  1163  	egressStreamCount   metricsfakes.Gauge
  1164  	egressTLSConnCount  metricsfakes.Gauge
  1165  	egressWorkerSize    metricsfakes.Gauge
  1166  	ingressStreamsCount metricsfakes.Gauge
  1167  	msgSendTime         metricsfakes.Histogram
  1168  	msgDropCount        metricsfakes.Counter
  1169  }
  1170  
  1171  func (tm *testMetrics) initialize() {
  1172  	tm.egressQueueLength.WithReturns(&tm.egressQueueLength)
  1173  	tm.egressQueueCapacity.WithReturns(&tm.egressQueueCapacity)
  1174  	tm.egressStreamCount.WithReturns(&tm.egressStreamCount)
  1175  	tm.egressTLSConnCount.WithReturns(&tm.egressTLSConnCount)
  1176  	tm.egressWorkerSize.WithReturns(&tm.egressWorkerSize)
  1177  	tm.ingressStreamsCount.WithReturns(&tm.ingressStreamsCount)
  1178  	tm.msgSendTime.WithReturns(&tm.msgSendTime)
  1179  	tm.msgDropCount.WithReturns(&tm.msgDropCount)
  1180  
  1181  	fakeProvider := tm.fakeProvider
  1182  	fakeProvider.On("NewGauge", cluster.IngressStreamsCountOpts).Return(&tm.ingressStreamsCount)
  1183  	fakeProvider.On("NewGauge", cluster.EgressQueueLengthOpts).Return(&tm.egressQueueLength)
  1184  	fakeProvider.On("NewGauge", cluster.EgressQueueCapacityOpts).Return(&tm.egressQueueCapacity)
  1185  	fakeProvider.On("NewGauge", cluster.EgressStreamsCountOpts).Return(&tm.egressStreamCount)
  1186  	fakeProvider.On("NewGauge", cluster.EgressTLSConnectionCountOpts).Return(&tm.egressTLSConnCount)
  1187  	fakeProvider.On("NewGauge", cluster.EgressWorkersOpts).Return(&tm.egressWorkerSize)
  1188  	fakeProvider.On("NewCounter", cluster.MessagesDroppedCountOpts).Return(&tm.msgDropCount)
  1189  	fakeProvider.On("NewHistogram", cluster.MessageSendTimeOpts).Return(&tm.msgSendTime)
  1190  }
  1191  
  1192  func TestMetrics(t *testing.T) {
  1193  	for _, testCase := range []struct {
  1194  		name        string
  1195  		runTest     func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics)
  1196  		testMetrics *testMetrics
  1197  	}{
  1198  		{
  1199  			name: "EgressQueueOccupancy",
  1200  			runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) {
  1201  				assertBiDiCommunication(t, node1, node2, testReq)
  1202  				require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "transaction", "channel", testChannel},
  1203  					testMetrics.egressQueueLength.WithArgsForCall(0))
  1204  				require.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(0))
  1205  				require.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(0))
  1206  
  1207  				var messageReceived sync.WaitGroup
  1208  				messageReceived.Add(1)
  1209  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1210  					messageReceived.Done()
  1211  				}).Return(nil)
  1212  
  1213  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1214  				require.NoError(t, err)
  1215  
  1216  				stream := assertEventualEstablishStream(t, rm)
  1217  				stream.Send(testConsensusReq)
  1218  				messageReceived.Wait()
  1219  
  1220  				require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "msg_type", "consensus", "channel", testChannel},
  1221  					testMetrics.egressQueueLength.WithArgsForCall(1))
  1222  				require.Equal(t, float64(0), testMetrics.egressQueueLength.SetArgsForCall(1))
  1223  				require.Equal(t, float64(1), testMetrics.egressQueueCapacity.SetArgsForCall(1))
  1224  			},
  1225  		},
  1226  		{
  1227  			name: "EgressStreamsCount",
  1228  			runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) {
  1229  				assertBiDiCommunication(t, node1, node2, testReq)
  1230  				require.Equal(t, 1, testMetrics.egressStreamCount.SetCallCount())
  1231  				require.Equal(t, 1, testMetrics.egressStreamCount.WithCallCount())
  1232  				require.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1233  
  1234  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1235  				require.Equal(t, 2, testMetrics.egressStreamCount.SetCallCount())
  1236  				require.Equal(t, 2, testMetrics.egressStreamCount.WithCallCount())
  1237  				require.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1238  			},
  1239  		},
  1240  		{
  1241  			name: "EgressTLSConnCount",
  1242  			runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) {
  1243  				assertBiDiCommunication(t, node1, node2, testReq)
  1244  				require.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1245  
  1246  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1247  				require.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1248  
  1249  				// A single TLS connection despite 2 streams
  1250  				require.Equal(t, float64(1), testMetrics.egressTLSConnCount.SetArgsForCall(0))
  1251  				require.Equal(t, 1, testMetrics.egressTLSConnCount.SetCallCount())
  1252  			},
  1253  		},
  1254  		{
  1255  			name: "EgressWorkerSize",
  1256  			runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) {
  1257  				assertBiDiCommunication(t, node1, node2, testReq)
  1258  				require.Equal(t, []string{"channel", testChannel}, testMetrics.egressStreamCount.WithArgsForCall(0))
  1259  
  1260  				assertBiDiCommunicationForChannel(t, node1, node2, testReq2, testChannel2)
  1261  				require.Equal(t, []string{"channel", testChannel2}, testMetrics.egressStreamCount.WithArgsForCall(1))
  1262  
  1263  				require.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(0))
  1264  				require.Equal(t, float64(1), testMetrics.egressWorkerSize.SetArgsForCall(1))
  1265  			},
  1266  		},
  1267  		{
  1268  			name: "MsgSendTime",
  1269  			runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) {
  1270  				assertBiDiCommunication(t, node1, node2, testReq)
  1271  				require.Eventually(t, func() bool { return testMetrics.msgSendTime.ObserveCallCount() > 0 }, time.Second, 10*time.Millisecond)
  1272  				require.Equal(t, 1, testMetrics.msgSendTime.ObserveCallCount())
  1273  				require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel}, testMetrics.msgSendTime.WithArgsForCall(0))
  1274  			},
  1275  		},
  1276  		{
  1277  			name: "MsgDropCount",
  1278  			runTest: func(t *testing.T, node1, node2 *clusterNode, testMetrics *testMetrics) {
  1279  				blockRecv := make(chan struct{})
  1280  				wasReported := func() bool {
  1281  					select {
  1282  					case <-blockRecv:
  1283  						return true
  1284  					default:
  1285  						return false
  1286  					}
  1287  				}
  1288  				// When the drop count is reported, release the lock on the server side receive operation.
  1289  				testMetrics.msgDropCount.AddStub = func(float642 float64) {
  1290  					if !wasReported() {
  1291  						close(blockRecv)
  1292  					}
  1293  				}
  1294  
  1295  				node2.handler.On("OnConsensus", testChannel, node1.nodeInfo.ID, mock.Anything).Run(func(args mock.Arguments) {
  1296  					// Block until the message drop is reported
  1297  					<-blockRecv
  1298  				}).Return(nil)
  1299  
  1300  				rm, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1301  				require.NoError(t, err)
  1302  
  1303  				stream := assertEventualEstablishStream(t, rm)
  1304  				// Send too many messages while the server side is not reading from the stream
  1305  				for {
  1306  					stream.Send(testConsensusReq)
  1307  					if wasReported() {
  1308  						break
  1309  					}
  1310  				}
  1311  				require.Equal(t, []string{"host", node2.nodeInfo.Endpoint, "channel", testChannel},
  1312  					testMetrics.msgDropCount.WithArgsForCall(0))
  1313  				require.Equal(t, 1, testMetrics.msgDropCount.AddCallCount())
  1314  			},
  1315  		},
  1316  	} {
  1317  		testCase := testCase
  1318  		t.Run(testCase.name, func(t *testing.T) {
  1319  			fakeProvider := &mocks.MetricsProvider{}
  1320  			testCase.testMetrics = &testMetrics{
  1321  				fakeProvider: fakeProvider,
  1322  			}
  1323  
  1324  			testCase.testMetrics.initialize()
  1325  
  1326  			node1 := newTestNodeWithMetrics(t, fakeProvider, &testCase.testMetrics.egressTLSConnCount)
  1327  			defer node1.stop()
  1328  
  1329  			node2 := newTestNode(t)
  1330  			defer node2.stop()
  1331  
  1332  			configForNode1 := []cluster.RemoteNode{node2.nodeInfo}
  1333  			configForNode2 := []cluster.RemoteNode{node1.nodeInfo}
  1334  			node1.c.Configure(testChannel, configForNode1)
  1335  			node2.c.Configure(testChannel, configForNode2)
  1336  			node1.c.Configure(testChannel2, configForNode1)
  1337  			node2.c.Configure(testChannel2, configForNode2)
  1338  
  1339  			testCase.runTest(t, node1, node2, testCase.testMetrics)
  1340  		})
  1341  	}
  1342  }
  1343  
  1344  func TestCertExpirationWarningEgress(t *testing.T) {
  1345  	// Scenario: Ensures that when certificates are due to expire,
  1346  	// a warning is logged to the log.
  1347  
  1348  	node1 := newTestNode(t)
  1349  	node2 := newTestNode(t)
  1350  
  1351  	cert, err := x509.ParseCertificate(node2.nodeInfo.ServerTLSCert)
  1352  	require.NoError(t, err)
  1353  	require.NotNil(t, cert)
  1354  
  1355  	// Let the NotAfter time of the certificate be T1, the current time be T0.
  1356  	// So time.Until is (T1 - T0), which means we have (T1 - T0) time left.
  1357  	// We want to trigger a warning, so we set the warning threshold to be 20 seconds above
  1358  	// the time left, so the time left would be smaller than the threshold.
  1359  	node1.c.CertExpWarningThreshold = time.Until(cert.NotAfter) + time.Second*20
  1360  	// We only alert once in 3 seconds
  1361  	node1.c.MinimumExpirationWarningInterval = time.Second * 3
  1362  
  1363  	defer node1.stop()
  1364  	defer node2.stop()
  1365  
  1366  	config := []cluster.RemoteNode{node1.nodeInfo, node2.nodeInfo}
  1367  	node1.c.Configure(testChannel, config)
  1368  	node2.c.Configure(testChannel, config)
  1369  
  1370  	stub, err := node1.c.Remote(testChannel, node2.nodeInfo.ID)
  1371  	require.NoError(t, err)
  1372  
  1373  	mockgRPC := &mocks.StepClient{}
  1374  	mockgRPC.On("Send", mock.Anything).Return(nil)
  1375  	mockgRPC.On("Context").Return(context.Background())
  1376  	mockClient := &mocks.ClusterClient{}
  1377  	mockClient.On("Step", mock.Anything).Return(mockgRPC, nil)
  1378  
  1379  	stub.Client = mockClient
  1380  
  1381  	stream := assertEventualEstablishStream(t, stub)
  1382  
  1383  	alerts := make(chan struct{}, 100)
  1384  
  1385  	stream.Logger = stream.Logger.WithOptions(zap.Hooks(func(entry zapcore.Entry) error {
  1386  		if strings.Contains(entry.Message, "expires in less than") {
  1387  			alerts <- struct{}{}
  1388  		}
  1389  		return nil
  1390  	}))
  1391  
  1392  	// Send a message to the node and expert an alert to be logged.
  1393  	stream.Send(wrapSubmitReq(testReq))
  1394  	select {
  1395  	case <-alerts:
  1396  	case <-time.After(time.Second * 5):
  1397  		t.Fatal("Should have logged an alert")
  1398  	}
  1399  	// Send another message, and ensure we don't log anything to the log, because the
  1400  	// alerts should be suppressed before the minimum interval timeout expires.
  1401  	stream.Send(wrapSubmitReq(testReq))
  1402  	select {
  1403  	case <-alerts:
  1404  		t.Fatal("Should not have logged an alert")
  1405  	case <-time.After(time.Millisecond * 500):
  1406  	}
  1407  	// Wait enough time for the alert interval to clear.
  1408  	time.Sleep(node1.c.MinimumExpirationWarningInterval + time.Second)
  1409  	// Send again a message, and this time it should be logged again.
  1410  	stream.Send(wrapSubmitReq(testReq))
  1411  	select {
  1412  	case <-alerts:
  1413  	case <-time.After(time.Second * 5):
  1414  		t.Fatal("Should have logged an alert")
  1415  	}
  1416  }
  1417  
  1418  func assertBiDiCommunicationForChannel(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest, channel string) {
  1419  	establish := []struct {
  1420  		label    string
  1421  		sender   *clusterNode
  1422  		receiver *clusterNode
  1423  		target   uint64
  1424  	}{
  1425  		{label: "1->2", sender: node1, target: node2.nodeInfo.ID, receiver: node2},
  1426  		{label: "2->1", sender: node2, target: node1.nodeInfo.ID, receiver: node1},
  1427  	}
  1428  	for _, estab := range establish {
  1429  		stub, err := estab.sender.c.Remote(channel, estab.target)
  1430  		require.NoError(t, err)
  1431  
  1432  		stream := assertEventualEstablishStream(t, stub)
  1433  
  1434  		var wg sync.WaitGroup
  1435  		wg.Add(1)
  1436  		estab.receiver.handler.On("OnSubmit", channel, estab.sender.nodeInfo.ID, mock.Anything).Return(nil).Once().Run(func(args mock.Arguments) {
  1437  			req := args.Get(2).(*orderer.SubmitRequest)
  1438  			require.True(t, proto.Equal(req, msgToSend))
  1439  			t.Log(estab.label)
  1440  			wg.Done()
  1441  		})
  1442  
  1443  		err = stream.Send(wrapSubmitReq(msgToSend))
  1444  		require.NoError(t, err)
  1445  
  1446  		wg.Wait()
  1447  	}
  1448  }
  1449  
  1450  func assertBiDiCommunication(t *testing.T, node1, node2 *clusterNode, msgToSend *orderer.SubmitRequest) {
  1451  	assertBiDiCommunicationForChannel(t, node1, node2, msgToSend, testChannel)
  1452  }
  1453  
  1454  func assertEventualEstablishStream(t *testing.T, rpc *cluster.RemoteContext) *cluster.Stream {
  1455  	var res *cluster.Stream
  1456  	gt := gomega.NewGomegaWithT(t)
  1457  	gt.Eventually(func() error {
  1458  		stream, err := rpc.NewStream(time.Hour)
  1459  		res = stream
  1460  		return err
  1461  	}, timeout).Should(gomega.Succeed())
  1462  	return res
  1463  }
  1464  
  1465  func assertEventualSendMessage(t *testing.T, rpc *cluster.RemoteContext, req *orderer.SubmitRequest) orderer.Cluster_StepClient {
  1466  	var res orderer.Cluster_StepClient
  1467  	gt := gomega.NewGomegaWithT(t)
  1468  	gt.Eventually(func() error {
  1469  		stream, err := rpc.NewStream(time.Hour)
  1470  		if err != nil {
  1471  			return err
  1472  		}
  1473  		res = stream
  1474  		return stream.Send(wrapSubmitReq(req))
  1475  	}, timeout).Should(gomega.Succeed())
  1476  	return res
  1477  }
  1478  
  1479  func wrapSubmitReq(req *orderer.SubmitRequest) *orderer.StepRequest {
  1480  	return &orderer.StepRequest{
  1481  		Payload: &orderer.StepRequest_SubmitRequest{
  1482  			SubmitRequest: req,
  1483  		},
  1484  	}
  1485  }