github.com/MetalBlockchain/subnet-evm@v0.4.9/peer/network.go (about) 1 // (c) 2019-2022, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package peer 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "sync" 11 "time" 12 13 "golang.org/x/sync/semaphore" 14 15 "github.com/ethereum/go-ethereum/log" 16 17 "github.com/MetalBlockchain/metalgo/codec" 18 "github.com/MetalBlockchain/metalgo/ids" 19 "github.com/MetalBlockchain/metalgo/snow/engine/common" 20 "github.com/MetalBlockchain/metalgo/snow/validators" 21 "github.com/MetalBlockchain/metalgo/utils/set" 22 "github.com/MetalBlockchain/metalgo/version" 23 24 "github.com/MetalBlockchain/subnet-evm/peer/stats" 25 "github.com/MetalBlockchain/subnet-evm/plugin/evm/message" 26 ) 27 28 // Minimum amount of time to handle a request 29 const minRequestHandlingDuration = 100 * time.Millisecond 30 31 var ( 32 errAcquiringSemaphore = errors.New("error acquiring semaphore") 33 errExpiredRequest = errors.New("expired request") 34 _ Network = &network{} 35 _ validators.Connector = &network{} 36 _ common.AppHandler = &network{} 37 ) 38 39 type Network interface { 40 validators.Connector 41 common.AppHandler 42 43 // SendAppRequestAny synchronously sends request to an arbitrary peer with a 44 // node version greater than or equal to minVersion. 45 // Returns the ID of the chosen peer, and an error if the request could not 46 // be sent to a peer with the desired [minVersion]. 47 SendAppRequestAny(minVersion *version.Application, message []byte, handler message.ResponseHandler) (ids.NodeID, error) 48 49 // SendAppRequest sends message to given nodeID, notifying handler when there's a response or timeout 50 SendAppRequest(nodeID ids.NodeID, message []byte, handler message.ResponseHandler) error 51 52 // Gossip sends given gossip message to peers 53 Gossip(gossip []byte) error 54 55 // SendCrossChainRequest sends a message to given chainID notifying handler when there's a response or timeout 56 SendCrossChainRequest(chainID ids.ID, message []byte, handler message.ResponseHandler) error 57 58 // Shutdown stops all peer channel listeners and marks the node to have stopped 59 // n.Start() can be called again but the peers will have to be reconnected 60 // by calling OnPeerConnected for each peer 61 Shutdown() 62 63 // SetGossipHandler sets the provided gossip handler as the gossip handler 64 SetGossipHandler(handler message.GossipHandler) 65 66 // SetRequestHandler sets the provided request handler as the request handler 67 SetRequestHandler(handler message.RequestHandler) 68 69 // SetCrossChainHandler sets the provided cross chain request handler as the cross chain request handler 70 SetCrossChainRequestHandler(handler message.CrossChainRequestHandler) 71 72 // Size returns the size of the network in number of connected peers 73 Size() uint32 74 75 // TrackBandwidth should be called for each valid request with the bandwidth 76 // (length of response divided by request time), and with 0 if the response is invalid. 77 TrackBandwidth(nodeID ids.NodeID, bandwidth float64) 78 } 79 80 // network is an implementation of Network that processes message requests for 81 // each peer in linear fashion 82 type network struct { 83 lock sync.RWMutex // lock for mutating state of this Network struct 84 self ids.NodeID // NodeID of this node 85 requestIDGen uint32 // requestID counter used to track outbound requests 86 outstandingRequestHandlers map[uint32]message.ResponseHandler // maps metalgo requestID => message.ResponseHandler 87 activeAppRequests *semaphore.Weighted // controls maximum number of active outbound requests 88 activeCrossChainRequests *semaphore.Weighted // controls maximum number of active outbound cross chain requests 89 appSender common.AppSender // metalgo AppSender for sending messages 90 codec codec.Manager // Codec used for parsing messages 91 crossChainCodec codec.Manager // Codec used for parsing cross chain messages 92 appRequestHandler message.RequestHandler // maps request type => handler 93 crossChainRequestHandler message.CrossChainRequestHandler // maps cross chain request type => handler 94 gossipHandler message.GossipHandler // maps gossip type => handler 95 peers *peerTracker // tracking of peers & bandwidth 96 appStats stats.RequestHandlerStats // Provide request handler metrics 97 crossChainStats stats.RequestHandlerStats // Provide cross chain request handler metrics 98 } 99 100 func NewNetwork(appSender common.AppSender, codec codec.Manager, crossChainCodec codec.Manager, self ids.NodeID, maxActiveAppRequests int64, maxActiveCrossChainRequests int64) Network { 101 return &network{ 102 appSender: appSender, 103 codec: codec, 104 crossChainCodec: crossChainCodec, 105 self: self, 106 outstandingRequestHandlers: make(map[uint32]message.ResponseHandler), 107 activeAppRequests: semaphore.NewWeighted(maxActiveAppRequests), 108 activeCrossChainRequests: semaphore.NewWeighted(maxActiveCrossChainRequests), 109 gossipHandler: message.NoopMempoolGossipHandler{}, 110 appRequestHandler: message.NoopRequestHandler{}, 111 crossChainRequestHandler: message.NoopCrossChainRequestHandler{}, 112 peers: NewPeerTracker(), 113 appStats: stats.NewRequestHandlerStats(), 114 crossChainStats: stats.NewCrossChainRequestHandlerStats(), 115 } 116 } 117 118 // SendAppRequestAny synchronously sends request to an arbitrary peer with a 119 // node version greater than or equal to minVersion. If minVersion is nil, 120 // the request will be sent to any peer regardless of their version. 121 // Returns the ID of the chosen peer, and an error if the request could not 122 // be sent to a peer with the desired [minVersion]. 123 func (n *network) SendAppRequestAny(minVersion *version.Application, request []byte, handler message.ResponseHandler) (ids.NodeID, error) { 124 // Take a slot from total [activeAppRequests] and block until a slot becomes available. 125 if err := n.activeAppRequests.Acquire(context.Background(), 1); err != nil { 126 return ids.EmptyNodeID, errAcquiringSemaphore 127 } 128 129 n.lock.Lock() 130 defer n.lock.Unlock() 131 if nodeID, ok := n.peers.GetAnyPeer(minVersion); ok { 132 return nodeID, n.sendAppRequest(nodeID, request, handler) 133 } 134 135 n.activeAppRequests.Release(1) 136 return ids.EmptyNodeID, fmt.Errorf("no peers found matching version %s out of %d peers", minVersion, n.peers.Size()) 137 } 138 139 // SendAppRequest sends request message bytes to specified nodeID, notifying the responseHandler on response or failure 140 func (n *network) SendAppRequest(nodeID ids.NodeID, request []byte, responseHandler message.ResponseHandler) error { 141 if nodeID == ids.EmptyNodeID { 142 return fmt.Errorf("cannot send request to empty nodeID, nodeID=%s, requestLen=%d", nodeID, len(request)) 143 } 144 145 // Take a slot from total [activeAppRequests] and block until a slot becomes available. 146 if err := n.activeAppRequests.Acquire(context.Background(), 1); err != nil { 147 return errAcquiringSemaphore 148 } 149 150 n.lock.Lock() 151 defer n.lock.Unlock() 152 153 return n.sendAppRequest(nodeID, request, responseHandler) 154 } 155 156 // sendAppRequest sends request message bytes to specified nodeID and adds [responseHandler] to [outstandingRequestHandlers] 157 // so that it can be invoked when the network receives either a response or failure message. 158 // Assumes [nodeID] is never [self] since we guarantee [self] will not be added to the [peers] map. 159 // Releases active requests semaphore if there was an error in sending the request 160 // Returns an error if [appSender] is unable to make the request. 161 // Assumes write lock is held 162 func (n *network) sendAppRequest(nodeID ids.NodeID, request []byte, responseHandler message.ResponseHandler) error { 163 log.Debug("sending request to peer", "nodeID", nodeID, "requestLen", len(request)) 164 n.peers.TrackPeer(nodeID) 165 166 // generate requestID 167 requestID := n.requestIDGen 168 n.requestIDGen++ 169 170 n.outstandingRequestHandlers[requestID] = responseHandler 171 172 nodeIDs := set.NewSet[ids.NodeID](1) 173 nodeIDs.Add(nodeID) 174 175 // Send app request to [nodeID]. 176 // On failure, release the slot from [activeAppRequests] and delete request from [outstandingRequestHandlers] 177 if err := n.appSender.SendAppRequest(context.TODO(), nodeIDs, requestID, request); err != nil { 178 n.activeAppRequests.Release(1) 179 delete(n.outstandingRequestHandlers, requestID) 180 return err 181 } 182 183 log.Debug("sent request message to peer", "nodeID", nodeID, "requestID", requestID) 184 return nil 185 } 186 187 // SendCrossChainRequest sends request message bytes to specified chainID and adds [handler] to [outstandingRequestHandlers] 188 // so that it can be invoked when the network receives either a response or failure message. 189 // Returns an error if [appSender] is unable to make the request. 190 func (n *network) SendCrossChainRequest(chainID ids.ID, request []byte, handler message.ResponseHandler) error { 191 // Take a slot from total [activeCrossChainRequests] and block until a slot becomes available. 192 if err := n.activeCrossChainRequests.Acquire(context.Background(), 1); err != nil { 193 return errAcquiringSemaphore 194 } 195 196 n.lock.Lock() 197 defer n.lock.Unlock() 198 199 // generate requestID 200 requestID := n.requestIDGen 201 n.requestIDGen++ 202 203 n.outstandingRequestHandlers[requestID] = handler 204 205 // Send cross chain request to [chainID]. 206 // On failure, release the slot from [activeCrossChainRequests] and delete request from [outstandingRequestHandlers]. 207 if err := n.appSender.SendCrossChainAppRequest(context.TODO(), chainID, requestID, request); err != nil { 208 n.activeCrossChainRequests.Release(1) 209 delete(n.outstandingRequestHandlers, requestID) 210 return err 211 } 212 213 log.Debug("sent request message to chain", "chainID", chainID, "crossChainRequestID", requestID) 214 return nil 215 } 216 217 // CrossChainAppRequest notifies the VM when another chain in the network requests for data. 218 // Send a CrossChainAppResponse to [chainID] in response to a valid message using the same 219 // [requestID] before the deadline. 220 func (n *network) CrossChainAppRequest(ctx context.Context, requestingChainID ids.ID, requestID uint32, deadline time.Time, request []byte) error { 221 log.Debug("received CrossChainAppRequest from chain", "requestingChainID", requestingChainID, "requestID", requestID, "requestLen", len(request)) 222 223 var req message.CrossChainRequest 224 if _, err := n.crossChainCodec.Unmarshal(request, &req); err != nil { 225 log.Debug("failed to unmarshal CrossChainAppRequest", "requestingChainID", requestingChainID, "requestID", requestID, "requestLen", len(request), "err", err) 226 return nil 227 } 228 229 bufferedDeadline, err := calculateTimeUntilDeadline(deadline, n.crossChainStats) 230 if err != nil { 231 log.Debug("deadline to process CrossChainAppRequest has expired, skipping", "requestingChainID", requestingChainID, "requestID", requestID, "err", err) 232 return nil 233 } 234 235 log.Debug("processing incoming CrossChainAppRequest", "requestingChainID", requestingChainID, "requestID", requestID, "req", req) 236 handleCtx, cancel := context.WithDeadline(context.Background(), bufferedDeadline) 237 defer cancel() 238 239 responseBytes, err := req.Handle(handleCtx, requestingChainID, requestID, n.crossChainRequestHandler) 240 switch { 241 case err != nil && err != context.DeadlineExceeded: 242 return err // Return a fatal error 243 case responseBytes != nil: 244 return n.appSender.SendCrossChainAppResponse(ctx, requestingChainID, requestID, responseBytes) // Propagate fatal error 245 default: 246 return nil 247 } 248 } 249 250 // CrossChainAppRequestFailed can be called by the metalgo -> VM in following cases: 251 // - respondingChain doesn't exist 252 // - invalid CrossChainAppResponse from respondingChain 253 // - invalid CrossChainRequest was sent to respondingChain 254 // - request times out before a response is provided 255 // If [requestID] is not known, this function will emit a log and return a nil error. 256 // If the response handler returns an error it is propagated as a fatal error. 257 func (n *network) CrossChainAppRequestFailed(ctx context.Context, respondingChainID ids.ID, requestID uint32) error { 258 n.lock.Lock() 259 defer n.lock.Unlock() 260 261 log.Debug("received CrossChainAppRequestFailed from chain", "respondingChainID", respondingChainID, "requestID", requestID) 262 263 handler, exists := n.markRequestFulfilled(requestID) 264 if !exists { 265 // Should never happen since the engine should be managing outstanding requests 266 log.Error("received CrossChainAppRequestFailed to unknown request", "respondingChainID", respondingChainID, "requestID", requestID) 267 return nil 268 } 269 270 // We must release the slot 271 n.activeCrossChainRequests.Release(1) 272 273 return handler.OnFailure() 274 } 275 276 // CrossChainAppResponse is invoked when there is a 277 // response received from [respondingChainID] regarding a request the VM sent out 278 // If [requestID] is not known, this function will emit a log and return a nil error. 279 // If the response handler returns an error it is propagated as a fatal error. 280 func (n *network) CrossChainAppResponse(ctx context.Context, respondingChainID ids.ID, requestID uint32, response []byte) error { 281 n.lock.Lock() 282 defer n.lock.Unlock() 283 284 log.Debug("received CrossChainAppResponse from responding chain", "respondingChainID", respondingChainID, "requestID", requestID) 285 286 handler, exists := n.markRequestFulfilled(requestID) 287 if !exists { 288 // Should never happen since the engine should be managing outstanding requests 289 log.Error("received CrossChainAppResponse to unknown request", "respondingChainID", respondingChainID, "requestID", requestID, "responseLen", len(response)) 290 return nil 291 } 292 293 // We must release the slot 294 n.activeCrossChainRequests.Release(1) 295 296 return handler.OnResponse(response) 297 } 298 299 // AppRequest is called by metalgo -> VM when there is an incoming AppRequest from a peer 300 // error returned by this function is expected to be treated as fatal by the engine 301 // returns error if the requestHandler returns an error 302 // sends a response back to the sender if length of response returned by the handler is >0 303 // expects the deadline to not have been passed 304 func (n *network) AppRequest(ctx context.Context, nodeID ids.NodeID, requestID uint32, deadline time.Time, request []byte) error { 305 log.Debug("received AppRequest from node", "nodeID", nodeID, "requestID", requestID, "requestLen", len(request)) 306 307 var req message.Request 308 if _, err := n.codec.Unmarshal(request, &req); err != nil { 309 log.Debug("failed to unmarshal app request", "nodeID", nodeID, "requestID", requestID, "requestLen", len(request), "err", err) 310 return nil 311 } 312 313 bufferedDeadline, err := calculateTimeUntilDeadline(deadline, n.appStats) 314 if err != nil { 315 log.Debug("deadline to process AppRequest has expired, skipping", "nodeID", nodeID, "requestID", requestID, "err", err) 316 return nil 317 } 318 319 log.Debug("processing incoming request", "nodeID", nodeID, "requestID", requestID, "req", req) 320 // We make a new context here because we don't want to cancel the context 321 // passed into n.AppSender.SendAppResponse below 322 handleCtx, cancel := context.WithDeadline(context.Background(), bufferedDeadline) 323 defer cancel() 324 325 responseBytes, err := req.Handle(handleCtx, nodeID, requestID, n.appRequestHandler) 326 switch { 327 case err != nil && err != context.DeadlineExceeded: 328 return err // Return a fatal error 329 case responseBytes != nil: 330 return n.appSender.SendAppResponse(ctx, nodeID, requestID, responseBytes) // Propagate fatal error 331 default: 332 return nil 333 } 334 } 335 336 // AppResponse is invoked when there is a response received from a peer regarding a request 337 // Error returned by this function is expected to be treated as fatal by the engine 338 // If [requestID] is not known, this function will emit a log and return a nil error. 339 // If the response handler returns an error it is propagated as a fatal error. 340 func (n *network) AppResponse(_ context.Context, nodeID ids.NodeID, requestID uint32, response []byte) error { 341 n.lock.Lock() 342 defer n.lock.Unlock() 343 344 log.Debug("received AppResponse from peer", "nodeID", nodeID, "requestID", requestID) 345 346 handler, exists := n.markRequestFulfilled(requestID) 347 if !exists { 348 // Should never happen since the engine should be managing outstanding requests 349 log.Error("received AppResponse to unknown request", "nodeID", nodeID, "requestID", requestID, "responseLen", len(response)) 350 return nil 351 } 352 353 // We must release the slot 354 n.activeAppRequests.Release(1) 355 356 return handler.OnResponse(response) 357 } 358 359 // AppRequestFailed can be called by the metalgo -> VM in following cases: 360 // - node is benched 361 // - failed to send message to [nodeID] due to a network issue 362 // - request times out before a response is provided 363 // error returned by this function is expected to be treated as fatal by the engine 364 // returns error only when the response handler returns an error 365 func (n *network) AppRequestFailed(_ context.Context, nodeID ids.NodeID, requestID uint32) error { 366 n.lock.Lock() 367 defer n.lock.Unlock() 368 369 log.Debug("received AppRequestFailed from peer", "nodeID", nodeID, "requestID", requestID) 370 371 handler, exists := n.markRequestFulfilled(requestID) 372 if !exists { 373 // Should never happen since the engine should be managing outstanding requests 374 log.Error("received AppRequestFailed to unknown request", "nodeID", nodeID, "requestID", requestID) 375 return nil 376 } 377 378 // We must release the slot 379 n.activeAppRequests.Release(1) 380 381 return handler.OnFailure() 382 } 383 384 // calculateTimeUntilDeadline calculates the time until deadline and drops it if we missed he deadline to response. 385 // This function updates metrics for both app requests and cross chain requests. 386 // This is called by either [AppRequest] or [CrossChainAppRequest]. 387 func calculateTimeUntilDeadline(deadline time.Time, stats stats.RequestHandlerStats) (time.Time, error) { 388 // calculate how much time is left until the deadline 389 timeTillDeadline := time.Until(deadline) 390 stats.UpdateTimeUntilDeadline(timeTillDeadline) 391 392 // bufferedDeadline is half the time till actual deadline so that the message has a reasonable chance 393 // of completing its processing and sending the response to the peer. 394 bufferedDeadline := time.Now().Add(timeTillDeadline / 2) 395 396 // check if we have enough time to handle this request 397 if time.Until(bufferedDeadline) < minRequestHandlingDuration { 398 // Drop the request if we already missed the deadline to respond. 399 stats.IncDeadlineDroppedRequest() 400 return time.Time{}, errExpiredRequest 401 } 402 403 return bufferedDeadline, nil 404 } 405 406 // markRequestFulfilled fetches the handler for [requestID] and marks the request with [requestID] as having been fulfilled. 407 // This is called by either [AppResponse] or [AppRequestFailed]. 408 // Assumes that the write lock is held. 409 func (n *network) markRequestFulfilled(requestID uint32) (message.ResponseHandler, bool) { 410 handler, exists := n.outstandingRequestHandlers[requestID] 411 if !exists { 412 return nil, false 413 } 414 // mark message as processed 415 delete(n.outstandingRequestHandlers, requestID) 416 417 return handler, true 418 } 419 420 // Gossip sends given gossip message to peers 421 func (n *network) Gossip(gossip []byte) error { 422 return n.appSender.SendAppGossip(context.TODO(), gossip) 423 } 424 425 // AppGossip is called by metalgo -> VM when there is an incoming AppGossip from a peer 426 // error returned by this function is expected to be treated as fatal by the engine 427 // returns error if request could not be parsed as message.Request or when the requestHandler returns an error 428 func (n *network) AppGossip(_ context.Context, nodeID ids.NodeID, gossipBytes []byte) error { 429 var gossipMsg message.GossipMessage 430 if _, err := n.codec.Unmarshal(gossipBytes, &gossipMsg); err != nil { 431 log.Debug("could not parse app gossip", "nodeID", nodeID, "gossipLen", len(gossipBytes), "err", err) 432 return nil 433 } 434 435 log.Debug("processing AppGossip from node", "nodeID", nodeID, "msg", gossipMsg) 436 return gossipMsg.Handle(n.gossipHandler, nodeID) 437 } 438 439 // Connected adds the given nodeID to the peer list so that it can receive messages 440 func (n *network) Connected(_ context.Context, nodeID ids.NodeID, nodeVersion *version.Application) error { 441 log.Debug("adding new peer", "nodeID", nodeID) 442 443 n.lock.Lock() 444 defer n.lock.Unlock() 445 446 if nodeID == n.self { 447 log.Debug("skipping registering self as peer") 448 return nil 449 } 450 451 n.peers.Connected(nodeID, nodeVersion) 452 return nil 453 } 454 455 // Disconnected removes given [nodeID] from the peer list 456 func (n *network) Disconnected(_ context.Context, nodeID ids.NodeID) error { 457 log.Debug("disconnecting peer", "nodeID", nodeID) 458 n.lock.Lock() 459 defer n.lock.Unlock() 460 461 n.peers.Disconnected(nodeID) 462 return nil 463 } 464 465 // Shutdown disconnects all peers 466 func (n *network) Shutdown() { 467 n.lock.Lock() 468 defer n.lock.Unlock() 469 470 // reset peers 471 n.peers = NewPeerTracker() 472 } 473 474 func (n *network) SetGossipHandler(handler message.GossipHandler) { 475 n.lock.Lock() 476 defer n.lock.Unlock() 477 478 n.gossipHandler = handler 479 } 480 481 func (n *network) SetRequestHandler(handler message.RequestHandler) { 482 n.lock.Lock() 483 defer n.lock.Unlock() 484 485 n.appRequestHandler = handler 486 } 487 488 func (n *network) SetCrossChainRequestHandler(handler message.CrossChainRequestHandler) { 489 n.lock.Lock() 490 defer n.lock.Unlock() 491 492 n.crossChainRequestHandler = handler 493 } 494 495 func (n *network) Size() uint32 { 496 n.lock.RLock() 497 defer n.lock.RUnlock() 498 499 return uint32(n.peers.Size()) 500 } 501 502 func (n *network) TrackBandwidth(nodeID ids.NodeID, bandwidth float64) { 503 n.lock.Lock() 504 defer n.lock.Unlock() 505 506 n.peers.TrackBandwidth(nodeID, bandwidth) 507 }