github.com/MetalBlockchain/subnet-evm@v0.4.9/peer/network.go (about)

     1  // (c) 2019-2022, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package peer
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"sync"
    11  	"time"
    12  
    13  	"golang.org/x/sync/semaphore"
    14  
    15  	"github.com/ethereum/go-ethereum/log"
    16  
    17  	"github.com/MetalBlockchain/metalgo/codec"
    18  	"github.com/MetalBlockchain/metalgo/ids"
    19  	"github.com/MetalBlockchain/metalgo/snow/engine/common"
    20  	"github.com/MetalBlockchain/metalgo/snow/validators"
    21  	"github.com/MetalBlockchain/metalgo/utils/set"
    22  	"github.com/MetalBlockchain/metalgo/version"
    23  
    24  	"github.com/MetalBlockchain/subnet-evm/peer/stats"
    25  	"github.com/MetalBlockchain/subnet-evm/plugin/evm/message"
    26  )
    27  
    28  // Minimum amount of time to handle a request
    29  const minRequestHandlingDuration = 100 * time.Millisecond
    30  
    31  var (
    32  	errAcquiringSemaphore                      = errors.New("error acquiring semaphore")
    33  	errExpiredRequest                          = errors.New("expired request")
    34  	_                     Network              = &network{}
    35  	_                     validators.Connector = &network{}
    36  	_                     common.AppHandler    = &network{}
    37  )
    38  
    39  type Network interface {
    40  	validators.Connector
    41  	common.AppHandler
    42  
    43  	// SendAppRequestAny synchronously sends request to an arbitrary peer with a
    44  	// node version greater than or equal to minVersion.
    45  	// Returns the ID of the chosen peer, and an error if the request could not
    46  	// be sent to a peer with the desired [minVersion].
    47  	SendAppRequestAny(minVersion *version.Application, message []byte, handler message.ResponseHandler) (ids.NodeID, error)
    48  
    49  	// SendAppRequest sends message to given nodeID, notifying handler when there's a response or timeout
    50  	SendAppRequest(nodeID ids.NodeID, message []byte, handler message.ResponseHandler) error
    51  
    52  	// Gossip sends given gossip message to peers
    53  	Gossip(gossip []byte) error
    54  
    55  	// SendCrossChainRequest sends a message to given chainID notifying handler when there's a response or timeout
    56  	SendCrossChainRequest(chainID ids.ID, message []byte, handler message.ResponseHandler) error
    57  
    58  	// Shutdown stops all peer channel listeners and marks the node to have stopped
    59  	// n.Start() can be called again but the peers will have to be reconnected
    60  	// by calling OnPeerConnected for each peer
    61  	Shutdown()
    62  
    63  	// SetGossipHandler sets the provided gossip handler as the gossip handler
    64  	SetGossipHandler(handler message.GossipHandler)
    65  
    66  	// SetRequestHandler sets the provided request handler as the request handler
    67  	SetRequestHandler(handler message.RequestHandler)
    68  
    69  	// SetCrossChainHandler sets the provided cross chain request handler as the cross chain request handler
    70  	SetCrossChainRequestHandler(handler message.CrossChainRequestHandler)
    71  
    72  	// Size returns the size of the network in number of connected peers
    73  	Size() uint32
    74  
    75  	// TrackBandwidth should be called for each valid request with the bandwidth
    76  	// (length of response divided by request time), and with 0 if the response is invalid.
    77  	TrackBandwidth(nodeID ids.NodeID, bandwidth float64)
    78  }
    79  
    80  // network is an implementation of Network that processes message requests for
    81  // each peer in linear fashion
    82  type network struct {
    83  	lock                       sync.RWMutex                       // lock for mutating state of this Network struct
    84  	self                       ids.NodeID                         // NodeID of this node
    85  	requestIDGen               uint32                             // requestID counter used to track outbound requests
    86  	outstandingRequestHandlers map[uint32]message.ResponseHandler // maps metalgo requestID => message.ResponseHandler
    87  	activeAppRequests          *semaphore.Weighted                // controls maximum number of active outbound requests
    88  	activeCrossChainRequests   *semaphore.Weighted                // controls maximum number of active outbound cross chain requests
    89  	appSender                  common.AppSender                   // metalgo AppSender for sending messages
    90  	codec                      codec.Manager                      // Codec used for parsing messages
    91  	crossChainCodec            codec.Manager                      // Codec used for parsing cross chain messages
    92  	appRequestHandler          message.RequestHandler             // maps request type => handler
    93  	crossChainRequestHandler   message.CrossChainRequestHandler   // maps cross chain request type => handler
    94  	gossipHandler              message.GossipHandler              // maps gossip type => handler
    95  	peers                      *peerTracker                       // tracking of peers & bandwidth
    96  	appStats                   stats.RequestHandlerStats          // Provide request handler metrics
    97  	crossChainStats            stats.RequestHandlerStats          // Provide cross chain request handler metrics
    98  }
    99  
   100  func NewNetwork(appSender common.AppSender, codec codec.Manager, crossChainCodec codec.Manager, self ids.NodeID, maxActiveAppRequests int64, maxActiveCrossChainRequests int64) Network {
   101  	return &network{
   102  		appSender:                  appSender,
   103  		codec:                      codec,
   104  		crossChainCodec:            crossChainCodec,
   105  		self:                       self,
   106  		outstandingRequestHandlers: make(map[uint32]message.ResponseHandler),
   107  		activeAppRequests:          semaphore.NewWeighted(maxActiveAppRequests),
   108  		activeCrossChainRequests:   semaphore.NewWeighted(maxActiveCrossChainRequests),
   109  		gossipHandler:              message.NoopMempoolGossipHandler{},
   110  		appRequestHandler:          message.NoopRequestHandler{},
   111  		crossChainRequestHandler:   message.NoopCrossChainRequestHandler{},
   112  		peers:                      NewPeerTracker(),
   113  		appStats:                   stats.NewRequestHandlerStats(),
   114  		crossChainStats:            stats.NewCrossChainRequestHandlerStats(),
   115  	}
   116  }
   117  
   118  // SendAppRequestAny synchronously sends request to an arbitrary peer with a
   119  // node version greater than or equal to minVersion. If minVersion is nil,
   120  // the request will be sent to any peer regardless of their version.
   121  // Returns the ID of the chosen peer, and an error if the request could not
   122  // be sent to a peer with the desired [minVersion].
   123  func (n *network) SendAppRequestAny(minVersion *version.Application, request []byte, handler message.ResponseHandler) (ids.NodeID, error) {
   124  	// Take a slot from total [activeAppRequests] and block until a slot becomes available.
   125  	if err := n.activeAppRequests.Acquire(context.Background(), 1); err != nil {
   126  		return ids.EmptyNodeID, errAcquiringSemaphore
   127  	}
   128  
   129  	n.lock.Lock()
   130  	defer n.lock.Unlock()
   131  	if nodeID, ok := n.peers.GetAnyPeer(minVersion); ok {
   132  		return nodeID, n.sendAppRequest(nodeID, request, handler)
   133  	}
   134  
   135  	n.activeAppRequests.Release(1)
   136  	return ids.EmptyNodeID, fmt.Errorf("no peers found matching version %s out of %d peers", minVersion, n.peers.Size())
   137  }
   138  
   139  // SendAppRequest sends request message bytes to specified nodeID, notifying the responseHandler on response or failure
   140  func (n *network) SendAppRequest(nodeID ids.NodeID, request []byte, responseHandler message.ResponseHandler) error {
   141  	if nodeID == ids.EmptyNodeID {
   142  		return fmt.Errorf("cannot send request to empty nodeID, nodeID=%s, requestLen=%d", nodeID, len(request))
   143  	}
   144  
   145  	// Take a slot from total [activeAppRequests] and block until a slot becomes available.
   146  	if err := n.activeAppRequests.Acquire(context.Background(), 1); err != nil {
   147  		return errAcquiringSemaphore
   148  	}
   149  
   150  	n.lock.Lock()
   151  	defer n.lock.Unlock()
   152  
   153  	return n.sendAppRequest(nodeID, request, responseHandler)
   154  }
   155  
   156  // sendAppRequest sends request message bytes to specified nodeID and adds [responseHandler] to [outstandingRequestHandlers]
   157  // so that it can be invoked when the network receives either a response or failure message.
   158  // Assumes [nodeID] is never [self] since we guarantee [self] will not be added to the [peers] map.
   159  // Releases active requests semaphore if there was an error in sending the request
   160  // Returns an error if [appSender] is unable to make the request.
   161  // Assumes write lock is held
   162  func (n *network) sendAppRequest(nodeID ids.NodeID, request []byte, responseHandler message.ResponseHandler) error {
   163  	log.Debug("sending request to peer", "nodeID", nodeID, "requestLen", len(request))
   164  	n.peers.TrackPeer(nodeID)
   165  
   166  	// generate requestID
   167  	requestID := n.requestIDGen
   168  	n.requestIDGen++
   169  
   170  	n.outstandingRequestHandlers[requestID] = responseHandler
   171  
   172  	nodeIDs := set.NewSet[ids.NodeID](1)
   173  	nodeIDs.Add(nodeID)
   174  
   175  	// Send app request to [nodeID].
   176  	// On failure, release the slot from [activeAppRequests] and delete request from [outstandingRequestHandlers]
   177  	if err := n.appSender.SendAppRequest(context.TODO(), nodeIDs, requestID, request); err != nil {
   178  		n.activeAppRequests.Release(1)
   179  		delete(n.outstandingRequestHandlers, requestID)
   180  		return err
   181  	}
   182  
   183  	log.Debug("sent request message to peer", "nodeID", nodeID, "requestID", requestID)
   184  	return nil
   185  }
   186  
   187  // SendCrossChainRequest sends request message bytes to specified chainID and adds [handler] to [outstandingRequestHandlers]
   188  // so that it can be invoked when the network receives either a response or failure message.
   189  // Returns an error if [appSender] is unable to make the request.
   190  func (n *network) SendCrossChainRequest(chainID ids.ID, request []byte, handler message.ResponseHandler) error {
   191  	// Take a slot from total [activeCrossChainRequests] and block until a slot becomes available.
   192  	if err := n.activeCrossChainRequests.Acquire(context.Background(), 1); err != nil {
   193  		return errAcquiringSemaphore
   194  	}
   195  
   196  	n.lock.Lock()
   197  	defer n.lock.Unlock()
   198  
   199  	// generate requestID
   200  	requestID := n.requestIDGen
   201  	n.requestIDGen++
   202  
   203  	n.outstandingRequestHandlers[requestID] = handler
   204  
   205  	// Send cross chain request to [chainID].
   206  	// On failure, release the slot from [activeCrossChainRequests] and delete request from [outstandingRequestHandlers].
   207  	if err := n.appSender.SendCrossChainAppRequest(context.TODO(), chainID, requestID, request); err != nil {
   208  		n.activeCrossChainRequests.Release(1)
   209  		delete(n.outstandingRequestHandlers, requestID)
   210  		return err
   211  	}
   212  
   213  	log.Debug("sent request message to chain", "chainID", chainID, "crossChainRequestID", requestID)
   214  	return nil
   215  }
   216  
   217  // CrossChainAppRequest notifies the VM when another chain in the network requests for data.
   218  // Send a CrossChainAppResponse to [chainID] in response to a valid message using the same
   219  // [requestID] before the deadline.
   220  func (n *network) CrossChainAppRequest(ctx context.Context, requestingChainID ids.ID, requestID uint32, deadline time.Time, request []byte) error {
   221  	log.Debug("received CrossChainAppRequest from chain", "requestingChainID", requestingChainID, "requestID", requestID, "requestLen", len(request))
   222  
   223  	var req message.CrossChainRequest
   224  	if _, err := n.crossChainCodec.Unmarshal(request, &req); err != nil {
   225  		log.Debug("failed to unmarshal CrossChainAppRequest", "requestingChainID", requestingChainID, "requestID", requestID, "requestLen", len(request), "err", err)
   226  		return nil
   227  	}
   228  
   229  	bufferedDeadline, err := calculateTimeUntilDeadline(deadline, n.crossChainStats)
   230  	if err != nil {
   231  		log.Debug("deadline to process CrossChainAppRequest has expired, skipping", "requestingChainID", requestingChainID, "requestID", requestID, "err", err)
   232  		return nil
   233  	}
   234  
   235  	log.Debug("processing incoming CrossChainAppRequest", "requestingChainID", requestingChainID, "requestID", requestID, "req", req)
   236  	handleCtx, cancel := context.WithDeadline(context.Background(), bufferedDeadline)
   237  	defer cancel()
   238  
   239  	responseBytes, err := req.Handle(handleCtx, requestingChainID, requestID, n.crossChainRequestHandler)
   240  	switch {
   241  	case err != nil && err != context.DeadlineExceeded:
   242  		return err // Return a fatal error
   243  	case responseBytes != nil:
   244  		return n.appSender.SendCrossChainAppResponse(ctx, requestingChainID, requestID, responseBytes) // Propagate fatal error
   245  	default:
   246  		return nil
   247  	}
   248  }
   249  
   250  // CrossChainAppRequestFailed can be called by the metalgo -> VM in following cases:
   251  // - respondingChain doesn't exist
   252  // - invalid CrossChainAppResponse from respondingChain
   253  // - invalid CrossChainRequest was sent to respondingChain
   254  // - request times out before a response is provided
   255  // If [requestID] is not known, this function will emit a log and return a nil error.
   256  // If the response handler returns an error it is propagated as a fatal error.
   257  func (n *network) CrossChainAppRequestFailed(ctx context.Context, respondingChainID ids.ID, requestID uint32) error {
   258  	n.lock.Lock()
   259  	defer n.lock.Unlock()
   260  
   261  	log.Debug("received CrossChainAppRequestFailed from chain", "respondingChainID", respondingChainID, "requestID", requestID)
   262  
   263  	handler, exists := n.markRequestFulfilled(requestID)
   264  	if !exists {
   265  		// Should never happen since the engine should be managing outstanding requests
   266  		log.Error("received CrossChainAppRequestFailed to unknown request", "respondingChainID", respondingChainID, "requestID", requestID)
   267  		return nil
   268  	}
   269  
   270  	// We must release the slot
   271  	n.activeCrossChainRequests.Release(1)
   272  
   273  	return handler.OnFailure()
   274  }
   275  
   276  // CrossChainAppResponse is invoked when there is a
   277  // response received from [respondingChainID] regarding a request the VM sent out
   278  // If [requestID] is not known, this function will emit a log and return a nil error.
   279  // If the response handler returns an error it is propagated as a fatal error.
   280  func (n *network) CrossChainAppResponse(ctx context.Context, respondingChainID ids.ID, requestID uint32, response []byte) error {
   281  	n.lock.Lock()
   282  	defer n.lock.Unlock()
   283  
   284  	log.Debug("received CrossChainAppResponse from responding chain", "respondingChainID", respondingChainID, "requestID", requestID)
   285  
   286  	handler, exists := n.markRequestFulfilled(requestID)
   287  	if !exists {
   288  		// Should never happen since the engine should be managing outstanding requests
   289  		log.Error("received CrossChainAppResponse to unknown request", "respondingChainID", respondingChainID, "requestID", requestID, "responseLen", len(response))
   290  		return nil
   291  	}
   292  
   293  	// We must release the slot
   294  	n.activeCrossChainRequests.Release(1)
   295  
   296  	return handler.OnResponse(response)
   297  }
   298  
   299  // AppRequest is called by metalgo -> VM when there is an incoming AppRequest from a peer
   300  // error returned by this function is expected to be treated as fatal by the engine
   301  // returns error if the requestHandler returns an error
   302  // sends a response back to the sender if length of response returned by the handler is >0
   303  // expects the deadline to not have been passed
   304  func (n *network) AppRequest(ctx context.Context, nodeID ids.NodeID, requestID uint32, deadline time.Time, request []byte) error {
   305  	log.Debug("received AppRequest from node", "nodeID", nodeID, "requestID", requestID, "requestLen", len(request))
   306  
   307  	var req message.Request
   308  	if _, err := n.codec.Unmarshal(request, &req); err != nil {
   309  		log.Debug("failed to unmarshal app request", "nodeID", nodeID, "requestID", requestID, "requestLen", len(request), "err", err)
   310  		return nil
   311  	}
   312  
   313  	bufferedDeadline, err := calculateTimeUntilDeadline(deadline, n.appStats)
   314  	if err != nil {
   315  		log.Debug("deadline to process AppRequest has expired, skipping", "nodeID", nodeID, "requestID", requestID, "err", err)
   316  		return nil
   317  	}
   318  
   319  	log.Debug("processing incoming request", "nodeID", nodeID, "requestID", requestID, "req", req)
   320  	// We make a new context here because we don't want to cancel the context
   321  	// passed into n.AppSender.SendAppResponse below
   322  	handleCtx, cancel := context.WithDeadline(context.Background(), bufferedDeadline)
   323  	defer cancel()
   324  
   325  	responseBytes, err := req.Handle(handleCtx, nodeID, requestID, n.appRequestHandler)
   326  	switch {
   327  	case err != nil && err != context.DeadlineExceeded:
   328  		return err // Return a fatal error
   329  	case responseBytes != nil:
   330  		return n.appSender.SendAppResponse(ctx, nodeID, requestID, responseBytes) // Propagate fatal error
   331  	default:
   332  		return nil
   333  	}
   334  }
   335  
   336  // AppResponse is invoked when there is a response received from a peer regarding a request
   337  // Error returned by this function is expected to be treated as fatal by the engine
   338  // If [requestID] is not known, this function will emit a log and return a nil error.
   339  // If the response handler returns an error it is propagated as a fatal error.
   340  func (n *network) AppResponse(_ context.Context, nodeID ids.NodeID, requestID uint32, response []byte) error {
   341  	n.lock.Lock()
   342  	defer n.lock.Unlock()
   343  
   344  	log.Debug("received AppResponse from peer", "nodeID", nodeID, "requestID", requestID)
   345  
   346  	handler, exists := n.markRequestFulfilled(requestID)
   347  	if !exists {
   348  		// Should never happen since the engine should be managing outstanding requests
   349  		log.Error("received AppResponse to unknown request", "nodeID", nodeID, "requestID", requestID, "responseLen", len(response))
   350  		return nil
   351  	}
   352  
   353  	// We must release the slot
   354  	n.activeAppRequests.Release(1)
   355  
   356  	return handler.OnResponse(response)
   357  }
   358  
   359  // AppRequestFailed can be called by the metalgo -> VM in following cases:
   360  // - node is benched
   361  // - failed to send message to [nodeID] due to a network issue
   362  // - request times out before a response is provided
   363  // error returned by this function is expected to be treated as fatal by the engine
   364  // returns error only when the response handler returns an error
   365  func (n *network) AppRequestFailed(_ context.Context, nodeID ids.NodeID, requestID uint32) error {
   366  	n.lock.Lock()
   367  	defer n.lock.Unlock()
   368  
   369  	log.Debug("received AppRequestFailed from peer", "nodeID", nodeID, "requestID", requestID)
   370  
   371  	handler, exists := n.markRequestFulfilled(requestID)
   372  	if !exists {
   373  		// Should never happen since the engine should be managing outstanding requests
   374  		log.Error("received AppRequestFailed to unknown request", "nodeID", nodeID, "requestID", requestID)
   375  		return nil
   376  	}
   377  
   378  	// We must release the slot
   379  	n.activeAppRequests.Release(1)
   380  
   381  	return handler.OnFailure()
   382  }
   383  
   384  // calculateTimeUntilDeadline calculates the time until deadline and drops it if we missed he deadline to response.
   385  // This function updates metrics for both app requests and cross chain requests.
   386  // This is called by either [AppRequest] or [CrossChainAppRequest].
   387  func calculateTimeUntilDeadline(deadline time.Time, stats stats.RequestHandlerStats) (time.Time, error) {
   388  	// calculate how much time is left until the deadline
   389  	timeTillDeadline := time.Until(deadline)
   390  	stats.UpdateTimeUntilDeadline(timeTillDeadline)
   391  
   392  	// bufferedDeadline is half the time till actual deadline so that the message has a reasonable chance
   393  	// of completing its processing and sending the response to the peer.
   394  	bufferedDeadline := time.Now().Add(timeTillDeadline / 2)
   395  
   396  	// check if we have enough time to handle this request
   397  	if time.Until(bufferedDeadline) < minRequestHandlingDuration {
   398  		// Drop the request if we already missed the deadline to respond.
   399  		stats.IncDeadlineDroppedRequest()
   400  		return time.Time{}, errExpiredRequest
   401  	}
   402  
   403  	return bufferedDeadline, nil
   404  }
   405  
   406  // markRequestFulfilled fetches the handler for [requestID] and marks the request with [requestID] as having been fulfilled.
   407  // This is called by either [AppResponse] or [AppRequestFailed].
   408  // Assumes that the write lock is held.
   409  func (n *network) markRequestFulfilled(requestID uint32) (message.ResponseHandler, bool) {
   410  	handler, exists := n.outstandingRequestHandlers[requestID]
   411  	if !exists {
   412  		return nil, false
   413  	}
   414  	// mark message as processed
   415  	delete(n.outstandingRequestHandlers, requestID)
   416  
   417  	return handler, true
   418  }
   419  
   420  // Gossip sends given gossip message to peers
   421  func (n *network) Gossip(gossip []byte) error {
   422  	return n.appSender.SendAppGossip(context.TODO(), gossip)
   423  }
   424  
   425  // AppGossip is called by metalgo -> VM when there is an incoming AppGossip from a peer
   426  // error returned by this function is expected to be treated as fatal by the engine
   427  // returns error if request could not be parsed as message.Request or when the requestHandler returns an error
   428  func (n *network) AppGossip(_ context.Context, nodeID ids.NodeID, gossipBytes []byte) error {
   429  	var gossipMsg message.GossipMessage
   430  	if _, err := n.codec.Unmarshal(gossipBytes, &gossipMsg); err != nil {
   431  		log.Debug("could not parse app gossip", "nodeID", nodeID, "gossipLen", len(gossipBytes), "err", err)
   432  		return nil
   433  	}
   434  
   435  	log.Debug("processing AppGossip from node", "nodeID", nodeID, "msg", gossipMsg)
   436  	return gossipMsg.Handle(n.gossipHandler, nodeID)
   437  }
   438  
   439  // Connected adds the given nodeID to the peer list so that it can receive messages
   440  func (n *network) Connected(_ context.Context, nodeID ids.NodeID, nodeVersion *version.Application) error {
   441  	log.Debug("adding new peer", "nodeID", nodeID)
   442  
   443  	n.lock.Lock()
   444  	defer n.lock.Unlock()
   445  
   446  	if nodeID == n.self {
   447  		log.Debug("skipping registering self as peer")
   448  		return nil
   449  	}
   450  
   451  	n.peers.Connected(nodeID, nodeVersion)
   452  	return nil
   453  }
   454  
   455  // Disconnected removes given [nodeID] from the peer list
   456  func (n *network) Disconnected(_ context.Context, nodeID ids.NodeID) error {
   457  	log.Debug("disconnecting peer", "nodeID", nodeID)
   458  	n.lock.Lock()
   459  	defer n.lock.Unlock()
   460  
   461  	n.peers.Disconnected(nodeID)
   462  	return nil
   463  }
   464  
   465  // Shutdown disconnects all peers
   466  func (n *network) Shutdown() {
   467  	n.lock.Lock()
   468  	defer n.lock.Unlock()
   469  
   470  	// reset peers
   471  	n.peers = NewPeerTracker()
   472  }
   473  
   474  func (n *network) SetGossipHandler(handler message.GossipHandler) {
   475  	n.lock.Lock()
   476  	defer n.lock.Unlock()
   477  
   478  	n.gossipHandler = handler
   479  }
   480  
   481  func (n *network) SetRequestHandler(handler message.RequestHandler) {
   482  	n.lock.Lock()
   483  	defer n.lock.Unlock()
   484  
   485  	n.appRequestHandler = handler
   486  }
   487  
   488  func (n *network) SetCrossChainRequestHandler(handler message.CrossChainRequestHandler) {
   489  	n.lock.Lock()
   490  	defer n.lock.Unlock()
   491  
   492  	n.crossChainRequestHandler = handler
   493  }
   494  
   495  func (n *network) Size() uint32 {
   496  	n.lock.RLock()
   497  	defer n.lock.RUnlock()
   498  
   499  	return uint32(n.peers.Size())
   500  }
   501  
   502  func (n *network) TrackBandwidth(nodeID ids.NodeID, bandwidth float64) {
   503  	n.lock.Lock()
   504  	defer n.lock.Unlock()
   505  
   506  	n.peers.TrackBandwidth(nodeID, bandwidth)
   507  }