github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/common/cluster/comm.go (about)

     1  /*
     2  Copyright hechain. 2017 All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package cluster
     8  
     9  import (
    10  	"bytes"
    11  	"context"
    12  	"crypto/x509"
    13  	"encoding/pem"
    14  	"fmt"
    15  	"sync"
    16  	"sync/atomic"
    17  	"time"
    18  
    19  	"github.com/golang/protobuf/proto"
    20  	"github.com/hechain20/hechain/common/flogging"
    21  	"github.com/hechain20/hechain/common/util"
    22  	"github.com/hyperledger/fabric-protos-go/orderer"
    23  	"github.com/pkg/errors"
    24  	"go.uber.org/zap"
    25  	"google.golang.org/grpc"
    26  	"google.golang.org/grpc/connectivity"
    27  )
    28  
    29  const (
    30  	// MinimumExpirationWarningInterval is the default minimum time interval
    31  	// between consecutive warnings about certificate expiration.
    32  	MinimumExpirationWarningInterval = time.Minute * 5
    33  )
    34  
    35  var (
    36  	errOverflow = errors.New("send queue overflown")
    37  	errAborted  = errors.New("aborted")
    38  	errTimeout  = errors.New("rpc timeout expired")
    39  )
    40  
    41  // ChannelExtractor extracts the channel of a given message,
    42  // or returns an empty string if that's not possible
    43  type ChannelExtractor interface {
    44  	TargetChannel(message proto.Message) string
    45  }
    46  
    47  //go:generate mockery -dir . -name Handler -case underscore -output ./mocks/
    48  
    49  // Handler handles Step() and Submit() requests and returns a corresponding response
    50  type Handler interface {
    51  	OnConsensus(channel string, sender uint64, req *orderer.ConsensusRequest) error
    52  	OnSubmit(channel string, sender uint64, req *orderer.SubmitRequest) error
    53  }
    54  
    55  // RemoteNode represents a cluster member
    56  type RemoteNode struct {
    57  	// ID is unique among all members, and cannot be 0.
    58  	ID uint64
    59  	// Endpoint is the endpoint of the node, denoted in %s:%d format
    60  	Endpoint string
    61  	// ServerTLSCert is the DER encoded TLS server certificate of the node
    62  	ServerTLSCert []byte
    63  	// ClientTLSCert is the DER encoded TLS client certificate of the node
    64  	ClientTLSCert []byte
    65  }
    66  
    67  // String returns a string representation of this RemoteNode
    68  func (rm RemoteNode) String() string {
    69  	return fmt.Sprintf("ID: %d,\nEndpoint: %s,\nServerTLSCert:%s, ClientTLSCert:%s",
    70  		rm.ID, rm.Endpoint, DERtoPEM(rm.ServerTLSCert), DERtoPEM(rm.ClientTLSCert))
    71  }
    72  
    73  //go:generate mockery -dir . -name Communicator -case underscore -output ./mocks/
    74  
    75  // Communicator defines communication for a consenter
    76  type Communicator interface {
    77  	// Remote returns a RemoteContext for the given RemoteNode ID in the context
    78  	// of the given channel, or error if connection cannot be established, or
    79  	// the channel wasn't configured
    80  	Remote(channel string, id uint64) (*RemoteContext, error)
    81  	// Configure configures the communication to connect to all
    82  	// given members, and disconnect from any members not among the given
    83  	// members.
    84  	Configure(channel string, members []RemoteNode)
    85  	// Shutdown shuts down the communicator
    86  	Shutdown()
    87  }
    88  
    89  // MembersByChannel is a mapping from channel name
    90  // to MemberMapping
    91  type MembersByChannel map[string]MemberMapping
    92  
    93  // Comm implements Communicator
    94  type Comm struct {
    95  	MinimumExpirationWarningInterval time.Duration
    96  	CertExpWarningThreshold          time.Duration
    97  	shutdownSignal                   chan struct{}
    98  	shutdown                         bool
    99  	SendBufferSize                   int
   100  	Lock                             sync.RWMutex
   101  	Logger                           *flogging.FabricLogger
   102  	ChanExt                          ChannelExtractor
   103  	H                                Handler
   104  	Connections                      *ConnectionStore
   105  	Chan2Members                     MembersByChannel
   106  	Metrics                          *Metrics
   107  	CompareCertificate               CertificateComparator
   108  }
   109  
   110  type requestContext struct {
   111  	channel string
   112  	sender  uint64
   113  }
   114  
   115  // DispatchSubmit identifies the channel and sender of the submit request and passes it
   116  // to the underlying Handler
   117  func (c *Comm) DispatchSubmit(ctx context.Context, request *orderer.SubmitRequest) error {
   118  	reqCtx, err := c.requestContext(ctx, request)
   119  	if err != nil {
   120  		return err
   121  	}
   122  	return c.H.OnSubmit(reqCtx.channel, reqCtx.sender, request)
   123  }
   124  
   125  // DispatchConsensus identifies the channel and sender of the step request and passes it
   126  // to the underlying Handler
   127  func (c *Comm) DispatchConsensus(ctx context.Context, request *orderer.ConsensusRequest) error {
   128  	reqCtx, err := c.requestContext(ctx, request)
   129  	if err != nil {
   130  		return err
   131  	}
   132  	return c.H.OnConsensus(reqCtx.channel, reqCtx.sender, request)
   133  }
   134  
   135  // requestContext identifies the sender and channel of the request and returns
   136  // it wrapped in a requestContext
   137  func (c *Comm) requestContext(ctx context.Context, msg proto.Message) (*requestContext, error) {
   138  	channel := c.ChanExt.TargetChannel(msg)
   139  	if channel == "" {
   140  		return nil, errors.Errorf("badly formatted message, cannot extract channel")
   141  	}
   142  
   143  	c.Lock.RLock()
   144  	mapping, exists := c.Chan2Members[channel]
   145  	c.Lock.RUnlock()
   146  
   147  	if !exists {
   148  		return nil, errors.Errorf("channel %s doesn't exist", channel)
   149  	}
   150  
   151  	cert := util.ExtractRawCertificateFromContext(ctx)
   152  	if len(cert) == 0 {
   153  		return nil, errors.Errorf("no TLS certificate sent")
   154  	}
   155  
   156  	stub := mapping.LookupByClientCert(cert)
   157  	if stub == nil {
   158  		return nil, errors.Errorf("certificate extracted from TLS connection isn't authorized")
   159  	}
   160  	return &requestContext{
   161  		channel: channel,
   162  		sender:  stub.ID,
   163  	}, nil
   164  }
   165  
   166  // Remote obtains a RemoteContext linked to the destination node on the context
   167  // of a given channel
   168  func (c *Comm) Remote(channel string, id uint64) (*RemoteContext, error) {
   169  	c.Lock.RLock()
   170  	defer c.Lock.RUnlock()
   171  
   172  	if c.shutdown {
   173  		return nil, errors.New("communication has been shut down")
   174  	}
   175  
   176  	mapping, exists := c.Chan2Members[channel]
   177  	if !exists {
   178  		return nil, errors.Errorf("channel %s doesn't exist", channel)
   179  	}
   180  	stub := mapping.ByID(id)
   181  	if stub == nil {
   182  		return nil, errors.Errorf("node %d doesn't exist in channel %s's membership", id, channel)
   183  	}
   184  
   185  	if stub.Active() {
   186  		return stub.RemoteContext, nil
   187  	}
   188  
   189  	err := stub.Activate(c.createRemoteContext(stub, channel))
   190  	if err != nil {
   191  		return nil, errors.WithStack(err)
   192  	}
   193  	return stub.RemoteContext, nil
   194  }
   195  
   196  // Configure configures the channel with the given RemoteNodes
   197  func (c *Comm) Configure(channel string, newNodes []RemoteNode) {
   198  	c.Logger.Infof("Entering, channel: %s, nodes: %v", channel, newNodes)
   199  	defer c.Logger.Infof("Exiting")
   200  
   201  	c.Lock.Lock()
   202  	defer c.Lock.Unlock()
   203  
   204  	c.createShutdownSignalIfNeeded()
   205  
   206  	if c.shutdown {
   207  		return
   208  	}
   209  
   210  	beforeConfigChange := c.serverCertsInUse()
   211  	// Update the channel-scoped mapping with the new nodes
   212  	c.applyMembershipConfig(channel, newNodes)
   213  	// Close connections to nodes that are not present in the new membership
   214  	c.cleanUnusedConnections(beforeConfigChange)
   215  }
   216  
   217  func (c *Comm) createShutdownSignalIfNeeded() {
   218  	if c.shutdownSignal == nil {
   219  		c.shutdownSignal = make(chan struct{})
   220  	}
   221  }
   222  
   223  // Shutdown shuts down the instance
   224  func (c *Comm) Shutdown() {
   225  	c.Lock.Lock()
   226  	defer c.Lock.Unlock()
   227  
   228  	c.createShutdownSignalIfNeeded()
   229  	if !c.shutdown {
   230  		close(c.shutdownSignal)
   231  	}
   232  
   233  	c.shutdown = true
   234  	for _, members := range c.Chan2Members {
   235  		members.Foreach(func(id uint64, stub *Stub) {
   236  			c.Connections.Disconnect(stub.ServerTLSCert)
   237  		})
   238  	}
   239  }
   240  
   241  // cleanUnusedConnections disconnects all connections that are un-used
   242  // at the moment of the invocation
   243  func (c *Comm) cleanUnusedConnections(serverCertsBeforeConfig StringSet) {
   244  	// Scan all nodes after the reconfiguration
   245  	serverCertsAfterConfig := c.serverCertsInUse()
   246  	// Filter out the certificates that remained after the reconfiguration
   247  	serverCertsBeforeConfig.subtract(serverCertsAfterConfig)
   248  	// Close the connections to all these nodes as they shouldn't be in use now
   249  	for serverCertificate := range serverCertsBeforeConfig {
   250  		c.Connections.Disconnect([]byte(serverCertificate))
   251  	}
   252  }
   253  
   254  // serverCertsInUse returns the server certificates that are in use
   255  // represented as strings.
   256  func (c *Comm) serverCertsInUse() StringSet {
   257  	endpointsInUse := make(StringSet)
   258  	for _, mapping := range c.Chan2Members {
   259  		endpointsInUse.union(mapping.ServerCertificates())
   260  	}
   261  	return endpointsInUse
   262  }
   263  
   264  // applyMembershipConfig sets the given RemoteNodes for the given channel
   265  func (c *Comm) applyMembershipConfig(channel string, newNodes []RemoteNode) {
   266  	mapping := c.getOrCreateMapping(channel)
   267  	newNodeIDs := make(map[uint64]struct{})
   268  
   269  	for _, node := range newNodes {
   270  		newNodeIDs[node.ID] = struct{}{}
   271  		c.updateStubInMapping(channel, mapping, node)
   272  	}
   273  
   274  	// Remove all stubs without a corresponding node
   275  	// in the new nodes
   276  	mapping.Foreach(func(id uint64, stub *Stub) {
   277  		if _, exists := newNodeIDs[id]; exists {
   278  			c.Logger.Info(id, "exists in both old and new membership for channel", channel, ", skipping its deactivation")
   279  			return
   280  		}
   281  		c.Logger.Info("Deactivated node", id, "who's endpoint is", stub.Endpoint, "as it's removed from membership")
   282  		mapping.Remove(id)
   283  		stub.Deactivate()
   284  	})
   285  }
   286  
   287  // updateStubInMapping updates the given RemoteNode and adds it to the MemberMapping
   288  func (c *Comm) updateStubInMapping(channel string, mapping MemberMapping, node RemoteNode) {
   289  	stub := mapping.ByID(node.ID)
   290  	if stub == nil {
   291  		c.Logger.Info("Allocating a new stub for node", node.ID, "with endpoint of", node.Endpoint, "for channel", channel)
   292  		stub = &Stub{}
   293  	}
   294  
   295  	// Check if the TLS server certificate of the node is replaced
   296  	// and if so - then deactivate the stub, to trigger
   297  	// a re-creation of its gRPC connection
   298  	if !bytes.Equal(stub.ServerTLSCert, node.ServerTLSCert) {
   299  		c.Logger.Info("Deactivating node", node.ID, "in channel", channel,
   300  			"with endpoint of", node.Endpoint, "due to TLS certificate change")
   301  		stub.Deactivate()
   302  	}
   303  
   304  	// Overwrite the stub Node data with the new data
   305  	stub.RemoteNode = node
   306  
   307  	// Put the stub into the mapping
   308  	mapping.Put(stub)
   309  
   310  	// Check if the stub needs activation.
   311  	if stub.Active() {
   312  		return
   313  	}
   314  
   315  	// Activate the stub
   316  	stub.Activate(c.createRemoteContext(stub, channel))
   317  }
   318  
   319  // createRemoteStub returns a function that creates a RemoteContext.
   320  // It is used as a parameter to Stub.Activate() in order to activate
   321  // a stub atomically.
   322  func (c *Comm) createRemoteContext(stub *Stub, channel string) func() (*RemoteContext, error) {
   323  	return func() (*RemoteContext, error) {
   324  		cert, err := x509.ParseCertificate(stub.ServerTLSCert)
   325  		if err != nil {
   326  			pemString := string(pem.EncodeToMemory(&pem.Block{Bytes: stub.ServerTLSCert}))
   327  			c.Logger.Errorf("Invalid DER for channel %s, endpoint %s, ID %d: %v", channel, stub.Endpoint, stub.ID, pemString)
   328  			return nil, errors.Wrap(err, "invalid certificate DER")
   329  		}
   330  
   331  		c.Logger.Debug("Connecting to", stub.RemoteNode, "for channel", channel)
   332  
   333  		conn, err := c.Connections.Connection(stub.Endpoint, stub.ServerTLSCert)
   334  		if err != nil {
   335  			c.Logger.Warningf("Unable to obtain connection to %d(%s) (channel %s): %v", stub.ID, stub.Endpoint, channel, err)
   336  			return nil, err
   337  		}
   338  
   339  		probeConnection := func(conn *grpc.ClientConn) error {
   340  			connState := conn.GetState()
   341  			if connState == connectivity.Connecting {
   342  				return errors.Errorf("connection to %d(%s) is in state %s", stub.ID, stub.Endpoint, connState)
   343  			}
   344  			return nil
   345  		}
   346  
   347  		clusterClient := orderer.NewClusterClient(conn)
   348  
   349  		workerCountReporter := workerCountReporter{
   350  			channel: channel,
   351  		}
   352  
   353  		rc := &RemoteContext{
   354  			expiresAt:                        cert.NotAfter,
   355  			minimumExpirationWarningInterval: c.MinimumExpirationWarningInterval,
   356  			certExpWarningThreshold:          c.CertExpWarningThreshold,
   357  			workerCountReporter:              workerCountReporter,
   358  			Channel:                          channel,
   359  			Metrics:                          c.Metrics,
   360  			SendBuffSize:                     c.SendBufferSize,
   361  			shutdownSignal:                   c.shutdownSignal,
   362  			endpoint:                         stub.Endpoint,
   363  			Logger:                           c.Logger,
   364  			ProbeConn:                        probeConnection,
   365  			conn:                             conn,
   366  			Client:                           clusterClient,
   367  		}
   368  		return rc, nil
   369  	}
   370  }
   371  
   372  // getOrCreateMapping creates a MemberMapping for the given channel
   373  // or returns the existing one.
   374  func (c *Comm) getOrCreateMapping(channel string) MemberMapping {
   375  	// Lazily create a mapping if it doesn't already exist
   376  	mapping, exists := c.Chan2Members[channel]
   377  	if !exists {
   378  		mapping = MemberMapping{
   379  			id2stub:       make(map[uint64]*Stub),
   380  			SamePublicKey: c.CompareCertificate,
   381  		}
   382  		c.Chan2Members[channel] = mapping
   383  	}
   384  	return mapping
   385  }
   386  
   387  // Stub holds all information about the remote node,
   388  // including the RemoteContext for it, and serializes
   389  // some operations on it.
   390  type Stub struct {
   391  	lock sync.RWMutex
   392  	RemoteNode
   393  	*RemoteContext
   394  }
   395  
   396  // Active returns whether the Stub
   397  // is active or not
   398  func (stub *Stub) Active() bool {
   399  	stub.lock.RLock()
   400  	defer stub.lock.RUnlock()
   401  	return stub.isActive()
   402  }
   403  
   404  // Active returns whether the Stub
   405  // is active or not.
   406  func (stub *Stub) isActive() bool {
   407  	return stub.RemoteContext != nil
   408  }
   409  
   410  // Deactivate deactivates the Stub and
   411  // ceases all communication operations
   412  // invoked on it.
   413  func (stub *Stub) Deactivate() {
   414  	stub.lock.Lock()
   415  	defer stub.lock.Unlock()
   416  	if !stub.isActive() {
   417  		return
   418  	}
   419  	stub.RemoteContext.Abort()
   420  	stub.RemoteContext = nil
   421  }
   422  
   423  // Activate creates a remote context with the given function callback
   424  // in an atomic manner - if two parallel invocations are invoked on this Stub,
   425  // only a single invocation of createRemoteStub takes place.
   426  func (stub *Stub) Activate(createRemoteContext func() (*RemoteContext, error)) error {
   427  	stub.lock.Lock()
   428  	defer stub.lock.Unlock()
   429  	// Check if the stub has already been activated while we were waiting for the lock
   430  	if stub.isActive() {
   431  		return nil
   432  	}
   433  	remoteStub, err := createRemoteContext()
   434  	if err != nil {
   435  		return errors.WithStack(err)
   436  	}
   437  
   438  	stub.RemoteContext = remoteStub
   439  	return nil
   440  }
   441  
   442  // RemoteContext interacts with remote cluster
   443  // nodes. Every call can be aborted via call to Abort()
   444  type RemoteContext struct {
   445  	expiresAt                        time.Time
   446  	minimumExpirationWarningInterval time.Duration
   447  	certExpWarningThreshold          time.Duration
   448  	Metrics                          *Metrics
   449  	Channel                          string
   450  	SendBuffSize                     int
   451  	shutdownSignal                   chan struct{}
   452  	Logger                           *flogging.FabricLogger
   453  	endpoint                         string
   454  	Client                           orderer.ClusterClient
   455  	ProbeConn                        func(conn *grpc.ClientConn) error
   456  	conn                             *grpc.ClientConn
   457  	nextStreamID                     uint64
   458  	streamsByID                      streamsMapperReporter
   459  	workerCountReporter              workerCountReporter
   460  }
   461  
   462  // Stream is used to send/receive messages to/from the remote cluster member.
   463  type Stream struct {
   464  	abortChan <-chan struct{}
   465  	sendBuff  chan struct {
   466  		request *orderer.StepRequest
   467  		report  func(error)
   468  	}
   469  	commShutdown chan struct{}
   470  	abortReason  *atomic.Value
   471  	metrics      *Metrics
   472  	ID           uint64
   473  	Channel      string
   474  	NodeName     string
   475  	Endpoint     string
   476  	Logger       *flogging.FabricLogger
   477  	Timeout      time.Duration
   478  	orderer.Cluster_StepClient
   479  	Cancel   func(error)
   480  	canceled *uint32
   481  	expCheck *certificateExpirationCheck
   482  }
   483  
   484  // StreamOperation denotes an operation done by a stream, such a Send or Receive.
   485  type StreamOperation func() (*orderer.StepResponse, error)
   486  
   487  // Canceled returns whether the stream was canceled.
   488  func (stream *Stream) Canceled() bool {
   489  	return atomic.LoadUint32(stream.canceled) == uint32(1)
   490  }
   491  
   492  // Send sends the given request to the remote cluster member.
   493  func (stream *Stream) Send(request *orderer.StepRequest) error {
   494  	return stream.SendWithReport(request, func(_ error) {})
   495  }
   496  
   497  // SendWithReport sends the given request to the remote cluster member and invokes report on the send result.
   498  func (stream *Stream) SendWithReport(request *orderer.StepRequest, report func(error)) error {
   499  	if stream.Canceled() {
   500  		return errors.New(stream.abortReason.Load().(string))
   501  	}
   502  	var allowDrop bool
   503  	// We want to drop consensus transactions if the remote node cannot keep up with us,
   504  	// otherwise we'll slow down the entire FSM.
   505  	if request.GetConsensusRequest() != nil {
   506  		allowDrop = true
   507  	}
   508  
   509  	return stream.sendOrDrop(request, allowDrop, report)
   510  }
   511  
   512  // sendOrDrop sends the given request to the remote cluster member, or drops it
   513  // if it is a consensus request and the queue is full.
   514  func (stream *Stream) sendOrDrop(request *orderer.StepRequest, allowDrop bool, report func(error)) error {
   515  	msgType := "transaction"
   516  	if allowDrop {
   517  		msgType = "consensus"
   518  	}
   519  
   520  	stream.metrics.reportQueueOccupancy(stream.Endpoint, msgType, stream.Channel, len(stream.sendBuff), cap(stream.sendBuff))
   521  
   522  	if allowDrop && len(stream.sendBuff) == cap(stream.sendBuff) {
   523  		stream.Cancel(errOverflow)
   524  		stream.metrics.reportMessagesDropped(stream.Endpoint, stream.Channel)
   525  		return errOverflow
   526  	}
   527  
   528  	select {
   529  	case <-stream.abortChan:
   530  		return errors.Errorf("stream %d aborted", stream.ID)
   531  	case stream.sendBuff <- struct {
   532  		request *orderer.StepRequest
   533  		report  func(error)
   534  	}{request: request, report: report}:
   535  		return nil
   536  	case <-stream.commShutdown:
   537  		return nil
   538  	}
   539  }
   540  
   541  // sendMessage sends the request down the stream
   542  func (stream *Stream) sendMessage(request *orderer.StepRequest, report func(error)) {
   543  	start := time.Now()
   544  	var err error
   545  	defer func() {
   546  		message := fmt.Sprintf("Send of %s to %s(%s) took %v",
   547  			requestAsString(request), stream.NodeName, stream.Endpoint, time.Since(start))
   548  		if err != nil {
   549  			stream.Logger.Warnf("%s but failed due to %s", message, err.Error())
   550  		} else {
   551  			stream.Logger.Debug(message)
   552  		}
   553  	}()
   554  
   555  	f := func() (*orderer.StepResponse, error) {
   556  		startSend := time.Now()
   557  		stream.expCheck.checkExpiration(startSend, stream.Channel)
   558  		err := stream.Cluster_StepClient.Send(request)
   559  		stream.metrics.reportMsgSendTime(stream.Endpoint, stream.Channel, time.Since(startSend))
   560  		return nil, err
   561  	}
   562  
   563  	_, err = stream.operateWithTimeout(f, report)
   564  }
   565  
   566  func (stream *Stream) serviceStream() {
   567  	streamStartTime := time.Now()
   568  	defer func() {
   569  		stream.Cancel(errAborted)
   570  		stream.Logger.Debugf("Stream %d to (%s) terminated with total lifetime of %s",
   571  			stream.ID, stream.Endpoint, time.Since(streamStartTime))
   572  	}()
   573  
   574  	for {
   575  		select {
   576  		case reqReport := <-stream.sendBuff:
   577  			stream.sendMessage(reqReport.request, reqReport.report)
   578  		case <-stream.abortChan:
   579  			return
   580  		case <-stream.commShutdown:
   581  			return
   582  		}
   583  	}
   584  }
   585  
   586  // Recv receives a message from a remote cluster member.
   587  func (stream *Stream) Recv() (*orderer.StepResponse, error) {
   588  	start := time.Now()
   589  	defer func() {
   590  		if !stream.Logger.IsEnabledFor(zap.DebugLevel) {
   591  			return
   592  		}
   593  		stream.Logger.Debugf("Receive from %s(%s) took %v", stream.NodeName, stream.Endpoint, time.Since(start))
   594  	}()
   595  
   596  	f := func() (*orderer.StepResponse, error) {
   597  		return stream.Cluster_StepClient.Recv()
   598  	}
   599  
   600  	return stream.operateWithTimeout(f, func(_ error) {})
   601  }
   602  
   603  // operateWithTimeout performs the given operation on the stream, and blocks until the timeout expires.
   604  func (stream *Stream) operateWithTimeout(invoke StreamOperation, report func(error)) (*orderer.StepResponse, error) {
   605  	timer := time.NewTimer(stream.Timeout)
   606  	defer timer.Stop()
   607  
   608  	var operationEnded sync.WaitGroup
   609  	operationEnded.Add(1)
   610  
   611  	responseChan := make(chan struct {
   612  		res *orderer.StepResponse
   613  		err error
   614  	}, 1)
   615  
   616  	go func() {
   617  		defer operationEnded.Done()
   618  		res, err := invoke()
   619  		responseChan <- struct {
   620  			res *orderer.StepResponse
   621  			err error
   622  		}{res: res, err: err}
   623  	}()
   624  
   625  	select {
   626  	case r := <-responseChan:
   627  		report(r.err)
   628  		if r.err != nil {
   629  			stream.Cancel(r.err)
   630  		}
   631  		return r.res, r.err
   632  	case <-timer.C:
   633  		report(errTimeout)
   634  		stream.Logger.Warningf("Stream %d to %s(%s) was forcibly terminated because timeout (%v) expired",
   635  			stream.ID, stream.NodeName, stream.Endpoint, stream.Timeout)
   636  		stream.Cancel(errTimeout)
   637  		// Wait for the operation goroutine to end
   638  		operationEnded.Wait()
   639  		return nil, errTimeout
   640  	}
   641  }
   642  
   643  func requestAsString(request *orderer.StepRequest) string {
   644  	switch t := request.GetPayload().(type) {
   645  	case *orderer.StepRequest_SubmitRequest:
   646  		if t.SubmitRequest == nil || t.SubmitRequest.Payload == nil {
   647  			return fmt.Sprintf("Empty SubmitRequest: %v", t.SubmitRequest)
   648  		}
   649  		return fmt.Sprintf("SubmitRequest for channel %s with payload of size %d",
   650  			t.SubmitRequest.Channel, len(t.SubmitRequest.Payload.Payload))
   651  	case *orderer.StepRequest_ConsensusRequest:
   652  		return fmt.Sprintf("ConsensusRequest for channel %s with payload of size %d",
   653  			t.ConsensusRequest.Channel, len(t.ConsensusRequest.Payload))
   654  	default:
   655  		return fmt.Sprintf("unknown type: %v", request)
   656  	}
   657  }
   658  
   659  // NewStream creates a new stream.
   660  // It is not thread safe, and Send() or Recv() block only until the timeout expires.
   661  func (rc *RemoteContext) NewStream(timeout time.Duration) (*Stream, error) {
   662  	if err := rc.ProbeConn(rc.conn); err != nil {
   663  		return nil, err
   664  	}
   665  
   666  	ctx, cancel := context.WithCancel(context.TODO())
   667  	stream, err := rc.Client.Step(ctx)
   668  	if err != nil {
   669  		cancel()
   670  		return nil, errors.WithStack(err)
   671  	}
   672  
   673  	streamID := atomic.AddUint64(&rc.nextStreamID, 1)
   674  	nodeName := commonNameFromContext(stream.Context())
   675  
   676  	var canceled uint32
   677  
   678  	abortChan := make(chan struct{})
   679  	abortReason := &atomic.Value{}
   680  
   681  	once := &sync.Once{}
   682  
   683  	cancelWithReason := func(err error) {
   684  		once.Do(func() {
   685  			abortReason.Store(err.Error())
   686  			cancel()
   687  			rc.streamsByID.Delete(streamID)
   688  			rc.Metrics.reportEgressStreamCount(rc.Channel, atomic.LoadUint32(&rc.streamsByID.size))
   689  			rc.Logger.Debugf("Stream %d to %s(%s) is aborted", streamID, nodeName, rc.endpoint)
   690  			atomic.StoreUint32(&canceled, 1)
   691  			close(abortChan)
   692  		})
   693  	}
   694  
   695  	logger := flogging.MustGetLogger("orderer.common.cluster.step")
   696  	stepLogger := logger.WithOptions(zap.AddCallerSkip(1))
   697  
   698  	s := &Stream{
   699  		Channel:     rc.Channel,
   700  		metrics:     rc.Metrics,
   701  		abortReason: abortReason,
   702  		abortChan:   abortChan,
   703  		sendBuff: make(chan struct {
   704  			request *orderer.StepRequest
   705  			report  func(error)
   706  		}, rc.SendBuffSize),
   707  		commShutdown:       rc.shutdownSignal,
   708  		NodeName:           nodeName,
   709  		Logger:             stepLogger,
   710  		ID:                 streamID,
   711  		Endpoint:           rc.endpoint,
   712  		Timeout:            timeout,
   713  		Cluster_StepClient: stream,
   714  		Cancel:             cancelWithReason,
   715  		canceled:           &canceled,
   716  	}
   717  
   718  	s.expCheck = &certificateExpirationCheck{
   719  		minimumExpirationWarningInterval: rc.minimumExpirationWarningInterval,
   720  		expirationWarningThreshold:       rc.certExpWarningThreshold,
   721  		expiresAt:                        rc.expiresAt,
   722  		endpoint:                         s.Endpoint,
   723  		nodeName:                         s.NodeName,
   724  		alert: func(template string, args ...interface{}) {
   725  			s.Logger.Warningf(template, args...)
   726  		},
   727  	}
   728  
   729  	rc.Logger.Debugf("Created new stream to %s with ID of %d and buffer size of %d",
   730  		rc.endpoint, streamID, cap(s.sendBuff))
   731  
   732  	rc.streamsByID.Store(streamID, s)
   733  	rc.Metrics.reportEgressStreamCount(rc.Channel, atomic.LoadUint32(&rc.streamsByID.size))
   734  
   735  	go func() {
   736  		rc.workerCountReporter.increment(s.metrics)
   737  		s.serviceStream()
   738  		rc.workerCountReporter.decrement(s.metrics)
   739  	}()
   740  
   741  	return s, nil
   742  }
   743  
   744  // Abort aborts the contexts the RemoteContext uses, thus effectively
   745  // causes all operations that use this RemoteContext to terminate.
   746  func (rc *RemoteContext) Abort() {
   747  	rc.streamsByID.Range(func(_, value interface{}) bool {
   748  		value.(*Stream).Cancel(errAborted)
   749  		return false
   750  	})
   751  }
   752  
   753  func commonNameFromContext(ctx context.Context) string {
   754  	cert := util.ExtractCertificateFromContext(ctx)
   755  	if cert == nil {
   756  		return "unidentified node"
   757  	}
   758  	return cert.Subject.CommonName
   759  }
   760  
   761  type streamsMapperReporter struct {
   762  	size uint32
   763  	sync.Map
   764  }
   765  
   766  func (smr *streamsMapperReporter) Delete(key interface{}) {
   767  	smr.Map.Delete(key)
   768  	atomic.AddUint32(&smr.size, ^uint32(0))
   769  }
   770  
   771  func (smr *streamsMapperReporter) Store(key, value interface{}) {
   772  	smr.Map.Store(key, value)
   773  	atomic.AddUint32(&smr.size, 1)
   774  }
   775  
   776  type workerCountReporter struct {
   777  	channel     string
   778  	workerCount uint32
   779  }
   780  
   781  func (wcr *workerCountReporter) increment(m *Metrics) {
   782  	count := atomic.AddUint32(&wcr.workerCount, 1)
   783  	m.reportWorkerCount(wcr.channel, count)
   784  }
   785  
   786  func (wcr *workerCountReporter) decrement(m *Metrics) {
   787  	// ^0 flips all zeros to ones, which means
   788  	// 2^32 - 1, and then we add this number wcr.workerCount.
   789  	// It follows from commutativity of the unsigned integers group
   790  	// that wcr.workerCount + 2^32 - 1 = wcr.workerCount - 1 + 2^32
   791  	// which is just wcr.workerCount - 1.
   792  	count := atomic.AddUint32(&wcr.workerCount, ^uint32(0))
   793  	m.reportWorkerCount(wcr.channel, count)
   794  }