github.com/MetalBlockchain/metalgo@v1.11.9/x/sync/network_client.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package sync
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	"go.uber.org/zap"
    15  	"golang.org/x/sync/semaphore"
    16  
    17  	"github.com/MetalBlockchain/metalgo/ids"
    18  	"github.com/MetalBlockchain/metalgo/network/p2p"
    19  	"github.com/MetalBlockchain/metalgo/snow/engine/common"
    20  	"github.com/MetalBlockchain/metalgo/utils/logging"
    21  	"github.com/MetalBlockchain/metalgo/utils/set"
    22  	"github.com/MetalBlockchain/metalgo/version"
    23  )
    24  
    25  // Minimum amount of time to handle a request
    26  const minRequestHandlingDuration = 100 * time.Millisecond
    27  
    28  var (
    29  	_ NetworkClient = (*networkClient)(nil)
    30  
    31  	errAcquiringSemaphore = errors.New("error acquiring semaphore")
    32  	errRequestFailed      = errors.New("request failed")
    33  	errAppSendFailed      = errors.New("failed to send app message")
    34  )
    35  
    36  // NetworkClient defines ability to send request / response through the Network
    37  type NetworkClient interface {
    38  	// RequestAny synchronously sends request to an arbitrary peer with a
    39  	// node version greater than or equal to minVersion.
    40  	// Returns response bytes, the ID of the chosen peer, and ErrRequestFailed if
    41  	// the request should be retried.
    42  	RequestAny(
    43  		ctx context.Context,
    44  		request []byte,
    45  	) (ids.NodeID, []byte, error)
    46  
    47  	// Sends [request] to [nodeID] and returns the response.
    48  	// Blocks until the number of outstanding requests is
    49  	// below the limit before sending the request.
    50  	Request(
    51  		ctx context.Context,
    52  		nodeID ids.NodeID,
    53  		request []byte,
    54  	) ([]byte, error)
    55  
    56  	// The following declarations allow this interface to be embedded in the VM
    57  	// to handle incoming responses from peers.
    58  
    59  	// Always returns nil because the engine considers errors
    60  	// returned from this function as fatal.
    61  	AppResponse(context.Context, ids.NodeID, uint32, []byte) error
    62  
    63  	// Always returns nil because the engine considers errors
    64  	// returned from this function as fatal.
    65  	AppRequestFailed(context.Context, ids.NodeID, uint32) error
    66  
    67  	// Adds the given [nodeID] to the peer
    68  	// list so that it can receive messages.
    69  	// If [nodeID] is this node's ID, this is a no-op.
    70  	Connected(context.Context, ids.NodeID, *version.Application) error
    71  
    72  	// Removes given [nodeID] from the peer list.
    73  	Disconnected(context.Context, ids.NodeID) error
    74  }
    75  
    76  type networkClient struct {
    77  	lock sync.Mutex
    78  	log  logging.Logger
    79  	// requestID counter used to track outbound requests
    80  	requestID uint32
    81  	// requestID => handler for the response/failure
    82  	outstandingRequestHandlers map[uint32]ResponseHandler
    83  	// controls maximum number of active outbound requests
    84  	activeRequests *semaphore.Weighted
    85  	// tracking of peers & bandwidth usage
    86  	peers *p2p.PeerTracker
    87  	// For sending messages to peers
    88  	appSender common.AppSender
    89  }
    90  
    91  func NewNetworkClient(
    92  	appSender common.AppSender,
    93  	myNodeID ids.NodeID,
    94  	maxActiveRequests int64,
    95  	log logging.Logger,
    96  	metricsNamespace string,
    97  	registerer prometheus.Registerer,
    98  	minVersion *version.Application,
    99  ) (NetworkClient, error) {
   100  	peerTracker, err := p2p.NewPeerTracker(
   101  		log,
   102  		metricsNamespace,
   103  		registerer,
   104  		set.Of(myNodeID),
   105  		minVersion,
   106  	)
   107  	if err != nil {
   108  		return nil, fmt.Errorf("failed to create peer tracker: %w", err)
   109  	}
   110  
   111  	return &networkClient{
   112  		appSender:                  appSender,
   113  		outstandingRequestHandlers: make(map[uint32]ResponseHandler),
   114  		activeRequests:             semaphore.NewWeighted(maxActiveRequests),
   115  		peers:                      peerTracker,
   116  		log:                        log,
   117  	}, nil
   118  }
   119  
   120  func (c *networkClient) AppResponse(
   121  	_ context.Context,
   122  	nodeID ids.NodeID,
   123  	requestID uint32,
   124  	response []byte,
   125  ) error {
   126  	c.lock.Lock()
   127  	defer c.lock.Unlock()
   128  
   129  	c.log.Info(
   130  		"received AppResponse from peer",
   131  		zap.Stringer("nodeID", nodeID),
   132  		zap.Uint32("requestID", requestID),
   133  		zap.Int("responseLen", len(response)),
   134  	)
   135  
   136  	handler, exists := c.getRequestHandler(requestID)
   137  	if !exists {
   138  		// Should never happen since the engine
   139  		// should be managing outstanding requests
   140  		c.log.Warn(
   141  			"received response to unknown request",
   142  			zap.Stringer("nodeID", nodeID),
   143  			zap.Uint32("requestID", requestID),
   144  			zap.Int("responseLen", len(response)),
   145  		)
   146  		return nil
   147  	}
   148  	handler.OnResponse(response)
   149  	return nil
   150  }
   151  
   152  func (c *networkClient) AppRequestFailed(
   153  	_ context.Context,
   154  	nodeID ids.NodeID,
   155  	requestID uint32,
   156  ) error {
   157  	c.lock.Lock()
   158  	defer c.lock.Unlock()
   159  
   160  	c.log.Info(
   161  		"received AppRequestFailed from peer",
   162  		zap.Stringer("nodeID", nodeID),
   163  		zap.Uint32("requestID", requestID),
   164  	)
   165  
   166  	handler, exists := c.getRequestHandler(requestID)
   167  	if !exists {
   168  		// Should never happen since the engine
   169  		// should be managing outstanding requests
   170  		c.log.Warn(
   171  			"received request failed to unknown request",
   172  			zap.Stringer("nodeID", nodeID),
   173  			zap.Uint32("requestID", requestID),
   174  		)
   175  		return nil
   176  	}
   177  	handler.OnFailure()
   178  	return nil
   179  }
   180  
   181  // Returns the handler for [requestID] and marks the request as fulfilled.
   182  // Returns false if there's no outstanding request with [requestID].
   183  // Assumes [c.lock] is held.
   184  func (c *networkClient) getRequestHandler(requestID uint32) (ResponseHandler, bool) {
   185  	handler, exists := c.outstandingRequestHandlers[requestID]
   186  	if !exists {
   187  		return nil, false
   188  	}
   189  	// mark message as processed, release activeRequests slot
   190  	delete(c.outstandingRequestHandlers, requestID)
   191  	return handler, true
   192  }
   193  
   194  // If [errAppSendFailed] is returned this should be considered fatal.
   195  func (c *networkClient) RequestAny(
   196  	ctx context.Context,
   197  	request []byte,
   198  ) (ids.NodeID, []byte, error) {
   199  	// Take a slot from total [activeRequests] and block until a slot becomes available.
   200  	if err := c.activeRequests.Acquire(ctx, 1); err != nil {
   201  		return ids.EmptyNodeID, nil, errAcquiringSemaphore
   202  	}
   203  	defer c.activeRequests.Release(1)
   204  
   205  	nodeID, responseChan, err := c.sendRequestAny(ctx, request)
   206  	if err != nil {
   207  		return ids.EmptyNodeID, nil, err
   208  	}
   209  
   210  	response, err := c.awaitResponse(ctx, nodeID, responseChan)
   211  	return nodeID, response, err
   212  }
   213  
   214  func (c *networkClient) sendRequestAny(
   215  	ctx context.Context,
   216  	request []byte,
   217  ) (ids.NodeID, chan []byte, error) {
   218  	c.lock.Lock()
   219  	defer c.lock.Unlock()
   220  
   221  	nodeID, ok := c.peers.SelectPeer()
   222  	if !ok {
   223  		numPeers := c.peers.Size()
   224  		return ids.EmptyNodeID, nil, fmt.Errorf("no peers found from %d peers", numPeers)
   225  	}
   226  
   227  	responseChan, err := c.sendRequestLocked(ctx, nodeID, request)
   228  	return nodeID, responseChan, err
   229  }
   230  
   231  // If [errAppSendFailed] is returned this should be considered fatal.
   232  func (c *networkClient) Request(
   233  	ctx context.Context,
   234  	nodeID ids.NodeID,
   235  	request []byte,
   236  ) ([]byte, error) {
   237  	// Take a slot from total [activeRequests]
   238  	// and block until a slot becomes available.
   239  	if err := c.activeRequests.Acquire(ctx, 1); err != nil {
   240  		return nil, errAcquiringSemaphore
   241  	}
   242  	defer c.activeRequests.Release(1)
   243  
   244  	responseChan, err := c.sendRequest(ctx, nodeID, request)
   245  	if err != nil {
   246  		return nil, err
   247  	}
   248  
   249  	return c.awaitResponse(ctx, nodeID, responseChan)
   250  }
   251  
   252  func (c *networkClient) sendRequest(
   253  	ctx context.Context,
   254  	nodeID ids.NodeID,
   255  	request []byte,
   256  ) (chan []byte, error) {
   257  	c.lock.Lock()
   258  	defer c.lock.Unlock()
   259  
   260  	return c.sendRequestLocked(ctx, nodeID, request)
   261  }
   262  
   263  // Sends [request] to [nodeID] and returns a channel that will populate the
   264  // response.
   265  //
   266  // If [errAppSendFailed] is returned this should be considered fatal.
   267  //
   268  // Assumes [nodeID] is never [c.myNodeID] since we guarantee [c.myNodeID] will
   269  // not be added to [c.peers].
   270  //
   271  // Assumes [c.lock] is held.
   272  func (c *networkClient) sendRequestLocked(
   273  	ctx context.Context,
   274  	nodeID ids.NodeID,
   275  	request []byte,
   276  ) (chan []byte, error) {
   277  	requestID := c.requestID
   278  	c.requestID++
   279  
   280  	c.log.Debug("sending request to peer",
   281  		zap.Stringer("nodeID", nodeID),
   282  		zap.Uint32("requestID", requestID),
   283  		zap.Int("requestLen", len(request)),
   284  	)
   285  	c.peers.RegisterRequest(nodeID)
   286  
   287  	// Send an app request to the peer.
   288  	nodeIDs := set.Of(nodeID)
   289  	// Cancellation is removed from this context to avoid erroring unexpectedly.
   290  	// SendAppRequest should be non-blocking and any error other than context
   291  	// cancellation is unexpected.
   292  	//
   293  	// This guarantees that the network should never receive an unexpected
   294  	// AppResponse.
   295  	ctxWithoutCancel := context.WithoutCancel(ctx)
   296  	if err := c.appSender.SendAppRequest(ctxWithoutCancel, nodeIDs, requestID, request); err != nil {
   297  		c.lock.Unlock()
   298  		c.log.Fatal("failed to send app request",
   299  			zap.Stringer("nodeID", nodeID),
   300  			zap.Uint32("requestID", requestID),
   301  			zap.Int("requestLen", len(request)),
   302  			zap.Error(err),
   303  		)
   304  		return nil, fmt.Errorf("%w: %w", errAppSendFailed, err)
   305  	}
   306  
   307  	handler := newResponseHandler()
   308  	c.outstandingRequestHandlers[requestID] = handler
   309  	return handler.responseChan, nil
   310  }
   311  
   312  // awaitResponse from [nodeID] and returns the response.
   313  //
   314  // Returns an error if the request failed or [ctx] is canceled.
   315  //
   316  // Blocks until a response is received or the [ctx] is canceled fails.
   317  //
   318  // Assumes [nodeID] is never [c.myNodeID] since we guarantee [c.myNodeID] will
   319  // not be added to [c.peers].
   320  //
   321  // Assumes [c.lock] is not held.
   322  func (c *networkClient) awaitResponse(
   323  	ctx context.Context,
   324  	nodeID ids.NodeID,
   325  	responseChan chan []byte,
   326  ) ([]byte, error) {
   327  	var (
   328  		response  []byte
   329  		responded bool
   330  		startTime = time.Now()
   331  	)
   332  	select {
   333  	case <-ctx.Done():
   334  		c.peers.RegisterFailure(nodeID)
   335  		return nil, ctx.Err()
   336  	case response, responded = <-responseChan:
   337  	}
   338  	if !responded {
   339  		c.peers.RegisterFailure(nodeID)
   340  		return nil, errRequestFailed
   341  	}
   342  
   343  	elapsedSeconds := time.Since(startTime).Seconds()
   344  	bandwidth := float64(len(response)) / (elapsedSeconds + epsilon)
   345  	c.peers.RegisterResponse(nodeID, bandwidth)
   346  
   347  	c.log.Debug("received response from peer",
   348  		zap.Stringer("nodeID", nodeID),
   349  		zap.Int("responseLen", len(response)),
   350  	)
   351  	return response, nil
   352  }
   353  
   354  func (c *networkClient) Connected(
   355  	_ context.Context,
   356  	nodeID ids.NodeID,
   357  	nodeVersion *version.Application,
   358  ) error {
   359  	c.log.Debug("adding new peer", zap.Stringer("nodeID", nodeID))
   360  	c.peers.Connected(nodeID, nodeVersion)
   361  	return nil
   362  }
   363  
   364  func (c *networkClient) Disconnected(_ context.Context, nodeID ids.NodeID) error {
   365  	c.log.Debug("disconnecting peer", zap.Stringer("nodeID", nodeID))
   366  	c.peers.Disconnected(nodeID)
   367  	return nil
   368  }