github.com/MetalBlockchain/metalgo@v1.11.9/x/sync/network_client.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package sync 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "sync" 11 "time" 12 13 "github.com/prometheus/client_golang/prometheus" 14 "go.uber.org/zap" 15 "golang.org/x/sync/semaphore" 16 17 "github.com/MetalBlockchain/metalgo/ids" 18 "github.com/MetalBlockchain/metalgo/network/p2p" 19 "github.com/MetalBlockchain/metalgo/snow/engine/common" 20 "github.com/MetalBlockchain/metalgo/utils/logging" 21 "github.com/MetalBlockchain/metalgo/utils/set" 22 "github.com/MetalBlockchain/metalgo/version" 23 ) 24 25 // Minimum amount of time to handle a request 26 const minRequestHandlingDuration = 100 * time.Millisecond 27 28 var ( 29 _ NetworkClient = (*networkClient)(nil) 30 31 errAcquiringSemaphore = errors.New("error acquiring semaphore") 32 errRequestFailed = errors.New("request failed") 33 errAppSendFailed = errors.New("failed to send app message") 34 ) 35 36 // NetworkClient defines ability to send request / response through the Network 37 type NetworkClient interface { 38 // RequestAny synchronously sends request to an arbitrary peer with a 39 // node version greater than or equal to minVersion. 40 // Returns response bytes, the ID of the chosen peer, and ErrRequestFailed if 41 // the request should be retried. 42 RequestAny( 43 ctx context.Context, 44 request []byte, 45 ) (ids.NodeID, []byte, error) 46 47 // Sends [request] to [nodeID] and returns the response. 48 // Blocks until the number of outstanding requests is 49 // below the limit before sending the request. 50 Request( 51 ctx context.Context, 52 nodeID ids.NodeID, 53 request []byte, 54 ) ([]byte, error) 55 56 // The following declarations allow this interface to be embedded in the VM 57 // to handle incoming responses from peers. 58 59 // Always returns nil because the engine considers errors 60 // returned from this function as fatal. 61 AppResponse(context.Context, ids.NodeID, uint32, []byte) error 62 63 // Always returns nil because the engine considers errors 64 // returned from this function as fatal. 65 AppRequestFailed(context.Context, ids.NodeID, uint32) error 66 67 // Adds the given [nodeID] to the peer 68 // list so that it can receive messages. 69 // If [nodeID] is this node's ID, this is a no-op. 70 Connected(context.Context, ids.NodeID, *version.Application) error 71 72 // Removes given [nodeID] from the peer list. 73 Disconnected(context.Context, ids.NodeID) error 74 } 75 76 type networkClient struct { 77 lock sync.Mutex 78 log logging.Logger 79 // requestID counter used to track outbound requests 80 requestID uint32 81 // requestID => handler for the response/failure 82 outstandingRequestHandlers map[uint32]ResponseHandler 83 // controls maximum number of active outbound requests 84 activeRequests *semaphore.Weighted 85 // tracking of peers & bandwidth usage 86 peers *p2p.PeerTracker 87 // For sending messages to peers 88 appSender common.AppSender 89 } 90 91 func NewNetworkClient( 92 appSender common.AppSender, 93 myNodeID ids.NodeID, 94 maxActiveRequests int64, 95 log logging.Logger, 96 metricsNamespace string, 97 registerer prometheus.Registerer, 98 minVersion *version.Application, 99 ) (NetworkClient, error) { 100 peerTracker, err := p2p.NewPeerTracker( 101 log, 102 metricsNamespace, 103 registerer, 104 set.Of(myNodeID), 105 minVersion, 106 ) 107 if err != nil { 108 return nil, fmt.Errorf("failed to create peer tracker: %w", err) 109 } 110 111 return &networkClient{ 112 appSender: appSender, 113 outstandingRequestHandlers: make(map[uint32]ResponseHandler), 114 activeRequests: semaphore.NewWeighted(maxActiveRequests), 115 peers: peerTracker, 116 log: log, 117 }, nil 118 } 119 120 func (c *networkClient) AppResponse( 121 _ context.Context, 122 nodeID ids.NodeID, 123 requestID uint32, 124 response []byte, 125 ) error { 126 c.lock.Lock() 127 defer c.lock.Unlock() 128 129 c.log.Info( 130 "received AppResponse from peer", 131 zap.Stringer("nodeID", nodeID), 132 zap.Uint32("requestID", requestID), 133 zap.Int("responseLen", len(response)), 134 ) 135 136 handler, exists := c.getRequestHandler(requestID) 137 if !exists { 138 // Should never happen since the engine 139 // should be managing outstanding requests 140 c.log.Warn( 141 "received response to unknown request", 142 zap.Stringer("nodeID", nodeID), 143 zap.Uint32("requestID", requestID), 144 zap.Int("responseLen", len(response)), 145 ) 146 return nil 147 } 148 handler.OnResponse(response) 149 return nil 150 } 151 152 func (c *networkClient) AppRequestFailed( 153 _ context.Context, 154 nodeID ids.NodeID, 155 requestID uint32, 156 ) error { 157 c.lock.Lock() 158 defer c.lock.Unlock() 159 160 c.log.Info( 161 "received AppRequestFailed from peer", 162 zap.Stringer("nodeID", nodeID), 163 zap.Uint32("requestID", requestID), 164 ) 165 166 handler, exists := c.getRequestHandler(requestID) 167 if !exists { 168 // Should never happen since the engine 169 // should be managing outstanding requests 170 c.log.Warn( 171 "received request failed to unknown request", 172 zap.Stringer("nodeID", nodeID), 173 zap.Uint32("requestID", requestID), 174 ) 175 return nil 176 } 177 handler.OnFailure() 178 return nil 179 } 180 181 // Returns the handler for [requestID] and marks the request as fulfilled. 182 // Returns false if there's no outstanding request with [requestID]. 183 // Assumes [c.lock] is held. 184 func (c *networkClient) getRequestHandler(requestID uint32) (ResponseHandler, bool) { 185 handler, exists := c.outstandingRequestHandlers[requestID] 186 if !exists { 187 return nil, false 188 } 189 // mark message as processed, release activeRequests slot 190 delete(c.outstandingRequestHandlers, requestID) 191 return handler, true 192 } 193 194 // If [errAppSendFailed] is returned this should be considered fatal. 195 func (c *networkClient) RequestAny( 196 ctx context.Context, 197 request []byte, 198 ) (ids.NodeID, []byte, error) { 199 // Take a slot from total [activeRequests] and block until a slot becomes available. 200 if err := c.activeRequests.Acquire(ctx, 1); err != nil { 201 return ids.EmptyNodeID, nil, errAcquiringSemaphore 202 } 203 defer c.activeRequests.Release(1) 204 205 nodeID, responseChan, err := c.sendRequestAny(ctx, request) 206 if err != nil { 207 return ids.EmptyNodeID, nil, err 208 } 209 210 response, err := c.awaitResponse(ctx, nodeID, responseChan) 211 return nodeID, response, err 212 } 213 214 func (c *networkClient) sendRequestAny( 215 ctx context.Context, 216 request []byte, 217 ) (ids.NodeID, chan []byte, error) { 218 c.lock.Lock() 219 defer c.lock.Unlock() 220 221 nodeID, ok := c.peers.SelectPeer() 222 if !ok { 223 numPeers := c.peers.Size() 224 return ids.EmptyNodeID, nil, fmt.Errorf("no peers found from %d peers", numPeers) 225 } 226 227 responseChan, err := c.sendRequestLocked(ctx, nodeID, request) 228 return nodeID, responseChan, err 229 } 230 231 // If [errAppSendFailed] is returned this should be considered fatal. 232 func (c *networkClient) Request( 233 ctx context.Context, 234 nodeID ids.NodeID, 235 request []byte, 236 ) ([]byte, error) { 237 // Take a slot from total [activeRequests] 238 // and block until a slot becomes available. 239 if err := c.activeRequests.Acquire(ctx, 1); err != nil { 240 return nil, errAcquiringSemaphore 241 } 242 defer c.activeRequests.Release(1) 243 244 responseChan, err := c.sendRequest(ctx, nodeID, request) 245 if err != nil { 246 return nil, err 247 } 248 249 return c.awaitResponse(ctx, nodeID, responseChan) 250 } 251 252 func (c *networkClient) sendRequest( 253 ctx context.Context, 254 nodeID ids.NodeID, 255 request []byte, 256 ) (chan []byte, error) { 257 c.lock.Lock() 258 defer c.lock.Unlock() 259 260 return c.sendRequestLocked(ctx, nodeID, request) 261 } 262 263 // Sends [request] to [nodeID] and returns a channel that will populate the 264 // response. 265 // 266 // If [errAppSendFailed] is returned this should be considered fatal. 267 // 268 // Assumes [nodeID] is never [c.myNodeID] since we guarantee [c.myNodeID] will 269 // not be added to [c.peers]. 270 // 271 // Assumes [c.lock] is held. 272 func (c *networkClient) sendRequestLocked( 273 ctx context.Context, 274 nodeID ids.NodeID, 275 request []byte, 276 ) (chan []byte, error) { 277 requestID := c.requestID 278 c.requestID++ 279 280 c.log.Debug("sending request to peer", 281 zap.Stringer("nodeID", nodeID), 282 zap.Uint32("requestID", requestID), 283 zap.Int("requestLen", len(request)), 284 ) 285 c.peers.RegisterRequest(nodeID) 286 287 // Send an app request to the peer. 288 nodeIDs := set.Of(nodeID) 289 // Cancellation is removed from this context to avoid erroring unexpectedly. 290 // SendAppRequest should be non-blocking and any error other than context 291 // cancellation is unexpected. 292 // 293 // This guarantees that the network should never receive an unexpected 294 // AppResponse. 295 ctxWithoutCancel := context.WithoutCancel(ctx) 296 if err := c.appSender.SendAppRequest(ctxWithoutCancel, nodeIDs, requestID, request); err != nil { 297 c.lock.Unlock() 298 c.log.Fatal("failed to send app request", 299 zap.Stringer("nodeID", nodeID), 300 zap.Uint32("requestID", requestID), 301 zap.Int("requestLen", len(request)), 302 zap.Error(err), 303 ) 304 return nil, fmt.Errorf("%w: %w", errAppSendFailed, err) 305 } 306 307 handler := newResponseHandler() 308 c.outstandingRequestHandlers[requestID] = handler 309 return handler.responseChan, nil 310 } 311 312 // awaitResponse from [nodeID] and returns the response. 313 // 314 // Returns an error if the request failed or [ctx] is canceled. 315 // 316 // Blocks until a response is received or the [ctx] is canceled fails. 317 // 318 // Assumes [nodeID] is never [c.myNodeID] since we guarantee [c.myNodeID] will 319 // not be added to [c.peers]. 320 // 321 // Assumes [c.lock] is not held. 322 func (c *networkClient) awaitResponse( 323 ctx context.Context, 324 nodeID ids.NodeID, 325 responseChan chan []byte, 326 ) ([]byte, error) { 327 var ( 328 response []byte 329 responded bool 330 startTime = time.Now() 331 ) 332 select { 333 case <-ctx.Done(): 334 c.peers.RegisterFailure(nodeID) 335 return nil, ctx.Err() 336 case response, responded = <-responseChan: 337 } 338 if !responded { 339 c.peers.RegisterFailure(nodeID) 340 return nil, errRequestFailed 341 } 342 343 elapsedSeconds := time.Since(startTime).Seconds() 344 bandwidth := float64(len(response)) / (elapsedSeconds + epsilon) 345 c.peers.RegisterResponse(nodeID, bandwidth) 346 347 c.log.Debug("received response from peer", 348 zap.Stringer("nodeID", nodeID), 349 zap.Int("responseLen", len(response)), 350 ) 351 return response, nil 352 } 353 354 func (c *networkClient) Connected( 355 _ context.Context, 356 nodeID ids.NodeID, 357 nodeVersion *version.Application, 358 ) error { 359 c.log.Debug("adding new peer", zap.Stringer("nodeID", nodeID)) 360 c.peers.Connected(nodeID, nodeVersion) 361 return nil 362 } 363 364 func (c *networkClient) Disconnected(_ context.Context, nodeID ids.NodeID) error { 365 c.log.Debug("disconnecting peer", zap.Stringer("nodeID", nodeID)) 366 c.peers.Disconnected(nodeID) 367 return nil 368 }