github.com/ava-labs/avalanchego@v1.11.11/network/throttling/bandwidth_throttler.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package throttling
     5  
     6  import (
     7  	"context"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"go.uber.org/zap"
    13  	"golang.org/x/time/rate"
    14  
    15  	"github.com/ava-labs/avalanchego/ids"
    16  	"github.com/ava-labs/avalanchego/utils/logging"
    17  	"github.com/ava-labs/avalanchego/utils/metric"
    18  	"github.com/ava-labs/avalanchego/utils/wrappers"
    19  )
    20  
    21  var _ bandwidthThrottler = (*bandwidthThrottlerImpl)(nil)
    22  
    23  // Returns a bandwidth throttler that uses a token bucket
    24  // model, where each token is 1 byte, to rate-limit bandwidth usage.
    25  // See https://pkg.go.dev/golang.org/x/time/rate#Limiter
    26  type bandwidthThrottler interface {
    27  	// Blocks until [nodeID] can read a message of size [msgSize].
    28  	// AddNode([nodeID], ...) must have been called since
    29  	// the last time RemoveNode([nodeID]) was called, if any.
    30  	// It's safe for multiple goroutines to concurrently call Acquire.
    31  	// Returns immediately if [ctx] is canceled.
    32  	Acquire(ctx context.Context, msgSize uint64, nodeID ids.NodeID)
    33  
    34  	// Add a new node to this throttler.
    35  	// Must be called before Acquire(..., [nodeID]) is called.
    36  	// RemoveNode([nodeID]) must have been called since the last time
    37  	// AddNode([nodeID], ...) was called, if any.
    38  	// Its bandwidth allocation refills at a rate of [refillRate].
    39  	// Its bandwidth allocation can hold up to [maxBurstSize] at a time.
    40  	// [maxBurstSize] must be at least the maximum message size.
    41  	// It's safe for multiple goroutines to concurrently call AddNode.
    42  	AddNode(nodeID ids.NodeID)
    43  
    44  	// Remove a node from this throttler.
    45  	// AddNode([nodeID], ...) must have been called since
    46  	// the last time RemoveNode([nodeID]) was called, if any.
    47  	// Must be called when we stop reading messages from [nodeID].
    48  	// It's safe for multiple goroutines to concurrently call RemoveNode.
    49  	RemoveNode(nodeID ids.NodeID)
    50  }
    51  
    52  type BandwidthThrottlerConfig struct {
    53  	// Rate at which the inbound bandwidth consumable by a peer replenishes
    54  	RefillRate uint64 `json:"bandwidthRefillRate"`
    55  	// Max amount of consumable bandwidth that can accumulate for a given peer
    56  	MaxBurstSize uint64 `json:"bandwidthMaxBurstRate"`
    57  }
    58  
    59  func newBandwidthThrottler(
    60  	log logging.Logger,
    61  	registerer prometheus.Registerer,
    62  	config BandwidthThrottlerConfig,
    63  ) (bandwidthThrottler, error) {
    64  	errs := wrappers.Errs{}
    65  	t := &bandwidthThrottlerImpl{
    66  		BandwidthThrottlerConfig: config,
    67  		log:                      log,
    68  		limiters:                 make(map[ids.NodeID]*rate.Limiter),
    69  		metrics: bandwidthThrottlerMetrics{
    70  			acquireLatency: metric.NewAveragerWithErrs(
    71  				"bandwidth_throttler_inbound_acquire_latency",
    72  				"average time (in ns) to acquire bytes from the inbound bandwidth throttler",
    73  				registerer,
    74  				&errs,
    75  			),
    76  			awaitingAcquire: prometheus.NewGauge(prometheus.GaugeOpts{
    77  				Name: "bandwidth_throttler_inbound_awaiting_acquire",
    78  				Help: "Number of inbound messages waiting to acquire bandwidth from the inbound bandwidth throttler",
    79  			}),
    80  		},
    81  	}
    82  	errs.Add(registerer.Register(t.metrics.awaitingAcquire))
    83  	return t, errs.Err
    84  }
    85  
    86  type bandwidthThrottlerMetrics struct {
    87  	acquireLatency  metric.Averager
    88  	awaitingAcquire prometheus.Gauge
    89  }
    90  
    91  type bandwidthThrottlerImpl struct {
    92  	BandwidthThrottlerConfig
    93  	metrics bandwidthThrottlerMetrics
    94  	log     logging.Logger
    95  	lock    sync.RWMutex
    96  	// Node ID --> token bucket based rate limiter where each token
    97  	// is a byte of bandwidth.
    98  	limiters map[ids.NodeID]*rate.Limiter
    99  }
   100  
   101  // See BandwidthThrottler.
   102  func (t *bandwidthThrottlerImpl) Acquire(
   103  	ctx context.Context,
   104  	msgSize uint64,
   105  	nodeID ids.NodeID,
   106  ) {
   107  	startTime := time.Now()
   108  	t.metrics.awaitingAcquire.Inc()
   109  	defer func() {
   110  		t.metrics.acquireLatency.Observe(float64(time.Since(startTime)))
   111  		t.metrics.awaitingAcquire.Dec()
   112  	}()
   113  
   114  	t.lock.RLock()
   115  	limiter, ok := t.limiters[nodeID]
   116  	t.lock.RUnlock()
   117  	if !ok {
   118  		// This should never happen. If it is, the caller is misusing this struct.
   119  		t.log.Debug("tried to acquire throttler but the node isn't registered",
   120  			zap.Uint64("messageSize", msgSize),
   121  			zap.Stringer("nodeID", nodeID),
   122  		)
   123  		return
   124  	}
   125  	if err := limiter.WaitN(ctx, int(msgSize)); err != nil {
   126  		// This should only happen on shutdown.
   127  		t.log.Debug("error while waiting for throttler",
   128  			zap.Uint64("messageSize", msgSize),
   129  			zap.Stringer("nodeID", nodeID),
   130  			zap.Error(err),
   131  		)
   132  	}
   133  }
   134  
   135  // See BandwidthThrottler.
   136  func (t *bandwidthThrottlerImpl) AddNode(nodeID ids.NodeID) {
   137  	t.lock.Lock()
   138  	defer t.lock.Unlock()
   139  
   140  	if _, ok := t.limiters[nodeID]; ok {
   141  		t.log.Debug("tried to add peer but it's already registered",
   142  			zap.Stringer("nodeID", nodeID),
   143  		)
   144  		return
   145  	}
   146  	t.limiters[nodeID] = rate.NewLimiter(rate.Limit(t.RefillRate), int(t.MaxBurstSize))
   147  }
   148  
   149  // See BandwidthThrottler.
   150  func (t *bandwidthThrottlerImpl) RemoveNode(nodeID ids.NodeID) {
   151  	t.lock.Lock()
   152  	defer t.lock.Unlock()
   153  
   154  	if _, ok := t.limiters[nodeID]; !ok {
   155  		t.log.Debug("tried to remove peer but it isn't registered",
   156  			zap.Stringer("nodeID", nodeID),
   157  		)
   158  		return
   159  	}
   160  	delete(t.limiters, nodeID)
   161  }