github.com/ava-labs/avalanchego@v1.11.11/network/throttling/inbound_msg_byte_throttler.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package throttling
     5  
     6  import (
     7  	"context"
     8  	"time"
     9  
    10  	"github.com/prometheus/client_golang/prometheus"
    11  	"go.uber.org/zap"
    12  
    13  	"github.com/ava-labs/avalanchego/ids"
    14  	"github.com/ava-labs/avalanchego/snow/validators"
    15  	"github.com/ava-labs/avalanchego/utils/constants"
    16  	"github.com/ava-labs/avalanchego/utils/linked"
    17  	"github.com/ava-labs/avalanchego/utils/logging"
    18  	"github.com/ava-labs/avalanchego/utils/metric"
    19  	"github.com/ava-labs/avalanchego/utils/wrappers"
    20  )
    21  
    22  // See inbound_msg_throttler.go
    23  
    24  func newInboundMsgByteThrottler(
    25  	log logging.Logger,
    26  	registerer prometheus.Registerer,
    27  	vdrs validators.Manager,
    28  	config MsgByteThrottlerConfig,
    29  ) (*inboundMsgByteThrottler, error) {
    30  	t := &inboundMsgByteThrottler{
    31  		commonMsgThrottler: commonMsgThrottler{
    32  			log:                    log,
    33  			vdrs:                   vdrs,
    34  			maxVdrBytes:            config.VdrAllocSize,
    35  			remainingVdrBytes:      config.VdrAllocSize,
    36  			remainingAtLargeBytes:  config.AtLargeAllocSize,
    37  			nodeMaxAtLargeBytes:    config.NodeMaxAtLargeBytes,
    38  			nodeToVdrBytesUsed:     make(map[ids.NodeID]uint64),
    39  			nodeToAtLargeBytesUsed: make(map[ids.NodeID]uint64),
    40  		},
    41  		waitingToAcquire:   linked.NewHashmap[uint64, *msgMetadata](),
    42  		nodeToWaitingMsgID: make(map[ids.NodeID]uint64),
    43  	}
    44  	return t, t.metrics.initialize(registerer)
    45  }
    46  
    47  // Information about a message waiting to be read.
    48  type msgMetadata struct {
    49  	// Need this many more bytes before Acquire returns
    50  	bytesNeeded uint64
    51  	// The number of bytes that were attempted to be acquired
    52  	msgSize uint64
    53  	// The sender of this incoming message
    54  	nodeID ids.NodeID
    55  	// Closed when the message can be read.
    56  	closeOnAcquireChan chan struct{}
    57  }
    58  
    59  // It gives more space to validators with more stake.
    60  // Messages are guaranteed to make progress toward
    61  // acquiring enough bytes to be read.
    62  type inboundMsgByteThrottler struct {
    63  	commonMsgThrottler
    64  	metrics   inboundMsgByteThrottlerMetrics
    65  	nextMsgID uint64
    66  	// Node ID --> Msg ID for a message this node is waiting to acquire
    67  	nodeToWaitingMsgID map[ids.NodeID]uint64
    68  	// Msg ID --> *msgMetadata
    69  	waitingToAcquire *linked.Hashmap[uint64, *msgMetadata]
    70  	// Invariant: The node is only waiting on a single message at a time
    71  	//
    72  	// Invariant: waitingToAcquire.Get(nodeToWaitingMsgIDs[nodeID])
    73  	// is the info about the message [nodeID] that has been blocking
    74  	// on reading.
    75  	//
    76  	// Invariant: len(nodeToWaitingMsgIDs) >= 1
    77  	// implies waitingToAcquire.Len() >= 1, and vice versa.
    78  }
    79  
    80  // Returns when we can read a message of size [msgSize] from node [nodeID].
    81  // The returned ReleaseFunc must be called (!) when done with the message
    82  // or when we give up trying to read the message, if applicable.
    83  func (t *inboundMsgByteThrottler) Acquire(ctx context.Context, msgSize uint64, nodeID ids.NodeID) ReleaseFunc {
    84  	startTime := time.Now()
    85  	defer func() {
    86  		t.metrics.awaitingRelease.Inc()
    87  		t.metrics.acquireLatency.Observe(float64(time.Since(startTime)))
    88  	}()
    89  	metadata := &msgMetadata{
    90  		bytesNeeded: msgSize,
    91  		msgSize:     msgSize,
    92  		nodeID:      nodeID,
    93  	}
    94  
    95  	t.lock.Lock()
    96  
    97  	// If there is already a message waiting, log the error and return
    98  	if existingID, exists := t.nodeToWaitingMsgID[nodeID]; exists {
    99  		t.log.Error("node already waiting on message",
   100  			zap.Stringer("nodeID", nodeID),
   101  			zap.Uint64("messageID", existingID),
   102  		)
   103  		t.lock.Unlock()
   104  		return t.metrics.awaitingRelease.Dec
   105  	}
   106  
   107  	// Take as many bytes as we can from the at-large allocation.
   108  	atLargeBytesUsed := min(
   109  		// only give as many bytes as needed
   110  		metadata.bytesNeeded,
   111  		// don't exceed per-node limit
   112  		t.nodeMaxAtLargeBytes-t.nodeToAtLargeBytesUsed[nodeID],
   113  		// don't give more bytes than are in the allocation
   114  		t.remainingAtLargeBytes,
   115  	)
   116  	if atLargeBytesUsed > 0 {
   117  		t.remainingAtLargeBytes -= atLargeBytesUsed
   118  		t.metrics.remainingAtLargeBytes.Set(float64(t.remainingAtLargeBytes))
   119  		metadata.bytesNeeded -= atLargeBytesUsed
   120  		t.nodeToAtLargeBytesUsed[nodeID] += atLargeBytesUsed
   121  		if metadata.bytesNeeded == 0 { // If we acquired enough bytes, return
   122  			t.lock.Unlock()
   123  			return func() {
   124  				t.release(metadata, nodeID)
   125  			}
   126  		}
   127  	}
   128  
   129  	// Take as many bytes as we can from [nodeID]'s validator allocation.
   130  	// Calculate [nodeID]'s validator allocation size based on its weight
   131  	vdrAllocationSize := uint64(0)
   132  	weight := t.vdrs.GetWeight(constants.PrimaryNetworkID, nodeID)
   133  	if weight != 0 {
   134  		totalWeight, err := t.vdrs.TotalWeight(constants.PrimaryNetworkID)
   135  		if err != nil {
   136  			t.log.Error("couldn't get total weight of primary network",
   137  				zap.Error(err),
   138  			)
   139  		} else {
   140  			vdrAllocationSize = uint64(float64(t.maxVdrBytes) * float64(weight) / float64(totalWeight))
   141  		}
   142  	}
   143  	vdrBytesAlreadyUsed := t.nodeToVdrBytesUsed[nodeID]
   144  	// [vdrBytesAllowed] is the number of bytes this node
   145  	// may take from its validator allocation.
   146  	vdrBytesAllowed := vdrAllocationSize
   147  	if vdrBytesAlreadyUsed >= vdrAllocationSize {
   148  		// We're already using all the bytes we can from the validator allocation
   149  		vdrBytesAllowed = 0
   150  	} else {
   151  		vdrBytesAllowed -= vdrBytesAlreadyUsed
   152  	}
   153  	vdrBytesUsed := min(t.remainingVdrBytes, metadata.bytesNeeded, vdrBytesAllowed)
   154  	if vdrBytesUsed > 0 {
   155  		// Mark that [nodeID] used [vdrBytesUsed] from its validator allocation
   156  		t.nodeToVdrBytesUsed[nodeID] += vdrBytesUsed
   157  		t.remainingVdrBytes -= vdrBytesUsed
   158  		t.metrics.remainingVdrBytes.Set(float64(t.remainingVdrBytes))
   159  		metadata.bytesNeeded -= vdrBytesUsed
   160  		if metadata.bytesNeeded == 0 { // If we acquired enough bytes, return
   161  			t.lock.Unlock()
   162  			return func() {
   163  				t.release(metadata, nodeID)
   164  			}
   165  		}
   166  	}
   167  
   168  	// We still haven't acquired enough bytes to read the message.
   169  	// Wait until more bytes are released.
   170  
   171  	// [closeOnAcquireChan] is closed when [msgSize] bytes have
   172  	// been acquired and the message can be read.
   173  	metadata.closeOnAcquireChan = make(chan struct{})
   174  	t.nextMsgID++
   175  	msgID := t.nextMsgID
   176  	t.waitingToAcquire.Put(
   177  		msgID,
   178  		metadata,
   179  	)
   180  
   181  	t.nodeToWaitingMsgID[nodeID] = msgID
   182  	t.lock.Unlock()
   183  
   184  	t.metrics.awaitingAcquire.Inc()
   185  	defer t.metrics.awaitingAcquire.Dec()
   186  
   187  	select {
   188  	case <-metadata.closeOnAcquireChan:
   189  	case <-ctx.Done():
   190  		t.lock.Lock()
   191  		t.waitingToAcquire.Delete(msgID)
   192  		delete(t.nodeToWaitingMsgID, nodeID)
   193  		t.lock.Unlock()
   194  	}
   195  
   196  	return func() {
   197  		t.release(metadata, nodeID)
   198  	}
   199  }
   200  
   201  // Must correspond to a previous call of Acquire([msgSize], [nodeID])
   202  func (t *inboundMsgByteThrottler) release(metadata *msgMetadata, nodeID ids.NodeID) {
   203  	t.lock.Lock()
   204  	defer func() {
   205  		t.metrics.remainingAtLargeBytes.Set(float64(t.remainingAtLargeBytes))
   206  		t.metrics.remainingVdrBytes.Set(float64(t.remainingVdrBytes))
   207  		t.metrics.awaitingRelease.Dec()
   208  		t.lock.Unlock()
   209  	}()
   210  
   211  	// [vdrBytesToReturn] is the number of bytes from [msgSize]
   212  	// that will be given back to [nodeID]'s validator allocation
   213  	// or messages from [nodeID] currently waiting to acquire bytes.
   214  	vdrBytesUsed := t.nodeToVdrBytesUsed[nodeID]
   215  	releasedBytes := metadata.msgSize - metadata.bytesNeeded
   216  	vdrBytesToReturn := min(releasedBytes, vdrBytesUsed)
   217  
   218  	// [atLargeBytesToReturn] is the number of bytes from [msgSize]
   219  	// that will be given to the at-large allocation or a message
   220  	// from any node currently waiting to acquire bytes.
   221  	atLargeBytesToReturn := releasedBytes - vdrBytesToReturn
   222  	if atLargeBytesToReturn > 0 {
   223  		// Mark that [nodeID] has released these bytes.
   224  		t.remainingAtLargeBytes += atLargeBytesToReturn
   225  		t.nodeToAtLargeBytesUsed[nodeID] -= atLargeBytesToReturn
   226  		if t.nodeToAtLargeBytesUsed[nodeID] == 0 {
   227  			delete(t.nodeToAtLargeBytesUsed, nodeID)
   228  		}
   229  
   230  		// Iterates over messages waiting to acquire bytes from oldest
   231  		// (waiting the longest) to newest. Try to give bytes to the
   232  		// oldest message, then next oldest, etc. until there are no
   233  		// waiting messages or we exhaust the bytes.
   234  		iter := t.waitingToAcquire.NewIterator()
   235  		for t.remainingAtLargeBytes > 0 && iter.Next() {
   236  			msg := iter.Value()
   237  			// From the at-large allocation, take the maximum number of bytes
   238  			// without exceeding the per-node limit on taking from at-large pool.
   239  			atLargeBytesGiven := min(
   240  				// don't give [msg] too many bytes
   241  				msg.bytesNeeded,
   242  				// don't exceed per-node limit
   243  				t.nodeMaxAtLargeBytes-t.nodeToAtLargeBytesUsed[msg.nodeID],
   244  				// don't give more bytes than are in the allocation
   245  				t.remainingAtLargeBytes,
   246  			)
   247  			if atLargeBytesGiven > 0 {
   248  				// Mark that we gave [atLargeBytesGiven] to [msg]
   249  				t.nodeToAtLargeBytesUsed[msg.nodeID] += atLargeBytesGiven
   250  				t.remainingAtLargeBytes -= atLargeBytesGiven
   251  				atLargeBytesToReturn -= atLargeBytesGiven
   252  				msg.bytesNeeded -= atLargeBytesGiven
   253  			}
   254  			if msg.bytesNeeded == 0 {
   255  				// [msg] has acquired enough bytes to be read.
   256  				// Unblock the corresponding thread in Acquire
   257  				close(msg.closeOnAcquireChan)
   258  				// Mark that this message is no longer waiting to acquire bytes
   259  				delete(t.nodeToWaitingMsgID, msg.nodeID)
   260  
   261  				t.waitingToAcquire.Delete(iter.Key())
   262  			}
   263  		}
   264  	}
   265  
   266  	// Get the message from [nodeID], if any, waiting to acquire
   267  	msgID, ok := t.nodeToWaitingMsgID[nodeID]
   268  	if vdrBytesToReturn > 0 && ok {
   269  		msg, exists := t.waitingToAcquire.Get(msgID)
   270  		if exists {
   271  			// Give [msg] all the bytes we can
   272  			bytesToGive := min(msg.bytesNeeded, vdrBytesToReturn)
   273  			msg.bytesNeeded -= bytesToGive
   274  			vdrBytesToReturn -= bytesToGive
   275  			if msg.bytesNeeded == 0 {
   276  				// Unblock the corresponding thread in Acquire
   277  				close(msg.closeOnAcquireChan)
   278  				delete(t.nodeToWaitingMsgID, nodeID)
   279  				t.waitingToAcquire.Delete(msgID)
   280  			}
   281  		} else {
   282  			// This should never happen
   283  			t.log.Warn("couldn't find message",
   284  				zap.Stringer("nodeID", nodeID),
   285  				zap.Uint64("messageID", msgID),
   286  			)
   287  		}
   288  	}
   289  	if vdrBytesToReturn > 0 {
   290  		// We gave back all the bytes we could to waiting messages from [nodeID]
   291  		// but some are still left.
   292  		t.nodeToVdrBytesUsed[nodeID] -= vdrBytesToReturn
   293  		if t.nodeToVdrBytesUsed[nodeID] == 0 {
   294  			delete(t.nodeToVdrBytesUsed, nodeID)
   295  		}
   296  		t.remainingVdrBytes += vdrBytesToReturn
   297  	}
   298  }
   299  
   300  type inboundMsgByteThrottlerMetrics struct {
   301  	acquireLatency        metric.Averager
   302  	remainingAtLargeBytes prometheus.Gauge
   303  	remainingVdrBytes     prometheus.Gauge
   304  	awaitingAcquire       prometheus.Gauge
   305  	awaitingRelease       prometheus.Gauge
   306  }
   307  
   308  func (m *inboundMsgByteThrottlerMetrics) initialize(reg prometheus.Registerer) error {
   309  	errs := wrappers.Errs{}
   310  	m.acquireLatency = metric.NewAveragerWithErrs(
   311  		"byte_throttler_inbound_acquire_latency",
   312  		"average time (in ns) to get space on the inbound message byte buffer",
   313  		reg,
   314  		&errs,
   315  	)
   316  	m.remainingAtLargeBytes = prometheus.NewGauge(prometheus.GaugeOpts{
   317  		Name: "byte_throttler_inbound_remaining_at_large_bytes",
   318  		Help: "Bytes remaining in the at-large byte buffer",
   319  	})
   320  	m.remainingVdrBytes = prometheus.NewGauge(prometheus.GaugeOpts{
   321  		Name: "byte_throttler_inbound_remaining_validator_bytes",
   322  		Help: "Bytes remaining in the validator byte buffer",
   323  	})
   324  	m.awaitingAcquire = prometheus.NewGauge(prometheus.GaugeOpts{
   325  		Name: "byte_throttler_inbound_awaiting_acquire",
   326  		Help: "Number of inbound messages waiting to acquire space on the inbound message byte buffer",
   327  	})
   328  	m.awaitingRelease = prometheus.NewGauge(prometheus.GaugeOpts{
   329  		Name: "byte_throttler_inbound_awaiting_release",
   330  		Help: "Number of messages currently being read/handled",
   331  	})
   332  	errs.Add(
   333  		reg.Register(m.remainingAtLargeBytes),
   334  		reg.Register(m.remainingVdrBytes),
   335  		reg.Register(m.awaitingAcquire),
   336  		reg.Register(m.awaitingRelease),
   337  	)
   338  	return errs.Err
   339  }