github.com/ava-labs/avalanchego@v1.11.11/network/throttling/inbound_msg_byte_throttler.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package throttling 5 6 import ( 7 "context" 8 "time" 9 10 "github.com/prometheus/client_golang/prometheus" 11 "go.uber.org/zap" 12 13 "github.com/ava-labs/avalanchego/ids" 14 "github.com/ava-labs/avalanchego/snow/validators" 15 "github.com/ava-labs/avalanchego/utils/constants" 16 "github.com/ava-labs/avalanchego/utils/linked" 17 "github.com/ava-labs/avalanchego/utils/logging" 18 "github.com/ava-labs/avalanchego/utils/metric" 19 "github.com/ava-labs/avalanchego/utils/wrappers" 20 ) 21 22 // See inbound_msg_throttler.go 23 24 func newInboundMsgByteThrottler( 25 log logging.Logger, 26 registerer prometheus.Registerer, 27 vdrs validators.Manager, 28 config MsgByteThrottlerConfig, 29 ) (*inboundMsgByteThrottler, error) { 30 t := &inboundMsgByteThrottler{ 31 commonMsgThrottler: commonMsgThrottler{ 32 log: log, 33 vdrs: vdrs, 34 maxVdrBytes: config.VdrAllocSize, 35 remainingVdrBytes: config.VdrAllocSize, 36 remainingAtLargeBytes: config.AtLargeAllocSize, 37 nodeMaxAtLargeBytes: config.NodeMaxAtLargeBytes, 38 nodeToVdrBytesUsed: make(map[ids.NodeID]uint64), 39 nodeToAtLargeBytesUsed: make(map[ids.NodeID]uint64), 40 }, 41 waitingToAcquire: linked.NewHashmap[uint64, *msgMetadata](), 42 nodeToWaitingMsgID: make(map[ids.NodeID]uint64), 43 } 44 return t, t.metrics.initialize(registerer) 45 } 46 47 // Information about a message waiting to be read. 48 type msgMetadata struct { 49 // Need this many more bytes before Acquire returns 50 bytesNeeded uint64 51 // The number of bytes that were attempted to be acquired 52 msgSize uint64 53 // The sender of this incoming message 54 nodeID ids.NodeID 55 // Closed when the message can be read. 56 closeOnAcquireChan chan struct{} 57 } 58 59 // It gives more space to validators with more stake. 60 // Messages are guaranteed to make progress toward 61 // acquiring enough bytes to be read. 62 type inboundMsgByteThrottler struct { 63 commonMsgThrottler 64 metrics inboundMsgByteThrottlerMetrics 65 nextMsgID uint64 66 // Node ID --> Msg ID for a message this node is waiting to acquire 67 nodeToWaitingMsgID map[ids.NodeID]uint64 68 // Msg ID --> *msgMetadata 69 waitingToAcquire *linked.Hashmap[uint64, *msgMetadata] 70 // Invariant: The node is only waiting on a single message at a time 71 // 72 // Invariant: waitingToAcquire.Get(nodeToWaitingMsgIDs[nodeID]) 73 // is the info about the message [nodeID] that has been blocking 74 // on reading. 75 // 76 // Invariant: len(nodeToWaitingMsgIDs) >= 1 77 // implies waitingToAcquire.Len() >= 1, and vice versa. 78 } 79 80 // Returns when we can read a message of size [msgSize] from node [nodeID]. 81 // The returned ReleaseFunc must be called (!) when done with the message 82 // or when we give up trying to read the message, if applicable. 83 func (t *inboundMsgByteThrottler) Acquire(ctx context.Context, msgSize uint64, nodeID ids.NodeID) ReleaseFunc { 84 startTime := time.Now() 85 defer func() { 86 t.metrics.awaitingRelease.Inc() 87 t.metrics.acquireLatency.Observe(float64(time.Since(startTime))) 88 }() 89 metadata := &msgMetadata{ 90 bytesNeeded: msgSize, 91 msgSize: msgSize, 92 nodeID: nodeID, 93 } 94 95 t.lock.Lock() 96 97 // If there is already a message waiting, log the error and return 98 if existingID, exists := t.nodeToWaitingMsgID[nodeID]; exists { 99 t.log.Error("node already waiting on message", 100 zap.Stringer("nodeID", nodeID), 101 zap.Uint64("messageID", existingID), 102 ) 103 t.lock.Unlock() 104 return t.metrics.awaitingRelease.Dec 105 } 106 107 // Take as many bytes as we can from the at-large allocation. 108 atLargeBytesUsed := min( 109 // only give as many bytes as needed 110 metadata.bytesNeeded, 111 // don't exceed per-node limit 112 t.nodeMaxAtLargeBytes-t.nodeToAtLargeBytesUsed[nodeID], 113 // don't give more bytes than are in the allocation 114 t.remainingAtLargeBytes, 115 ) 116 if atLargeBytesUsed > 0 { 117 t.remainingAtLargeBytes -= atLargeBytesUsed 118 t.metrics.remainingAtLargeBytes.Set(float64(t.remainingAtLargeBytes)) 119 metadata.bytesNeeded -= atLargeBytesUsed 120 t.nodeToAtLargeBytesUsed[nodeID] += atLargeBytesUsed 121 if metadata.bytesNeeded == 0 { // If we acquired enough bytes, return 122 t.lock.Unlock() 123 return func() { 124 t.release(metadata, nodeID) 125 } 126 } 127 } 128 129 // Take as many bytes as we can from [nodeID]'s validator allocation. 130 // Calculate [nodeID]'s validator allocation size based on its weight 131 vdrAllocationSize := uint64(0) 132 weight := t.vdrs.GetWeight(constants.PrimaryNetworkID, nodeID) 133 if weight != 0 { 134 totalWeight, err := t.vdrs.TotalWeight(constants.PrimaryNetworkID) 135 if err != nil { 136 t.log.Error("couldn't get total weight of primary network", 137 zap.Error(err), 138 ) 139 } else { 140 vdrAllocationSize = uint64(float64(t.maxVdrBytes) * float64(weight) / float64(totalWeight)) 141 } 142 } 143 vdrBytesAlreadyUsed := t.nodeToVdrBytesUsed[nodeID] 144 // [vdrBytesAllowed] is the number of bytes this node 145 // may take from its validator allocation. 146 vdrBytesAllowed := vdrAllocationSize 147 if vdrBytesAlreadyUsed >= vdrAllocationSize { 148 // We're already using all the bytes we can from the validator allocation 149 vdrBytesAllowed = 0 150 } else { 151 vdrBytesAllowed -= vdrBytesAlreadyUsed 152 } 153 vdrBytesUsed := min(t.remainingVdrBytes, metadata.bytesNeeded, vdrBytesAllowed) 154 if vdrBytesUsed > 0 { 155 // Mark that [nodeID] used [vdrBytesUsed] from its validator allocation 156 t.nodeToVdrBytesUsed[nodeID] += vdrBytesUsed 157 t.remainingVdrBytes -= vdrBytesUsed 158 t.metrics.remainingVdrBytes.Set(float64(t.remainingVdrBytes)) 159 metadata.bytesNeeded -= vdrBytesUsed 160 if metadata.bytesNeeded == 0 { // If we acquired enough bytes, return 161 t.lock.Unlock() 162 return func() { 163 t.release(metadata, nodeID) 164 } 165 } 166 } 167 168 // We still haven't acquired enough bytes to read the message. 169 // Wait until more bytes are released. 170 171 // [closeOnAcquireChan] is closed when [msgSize] bytes have 172 // been acquired and the message can be read. 173 metadata.closeOnAcquireChan = make(chan struct{}) 174 t.nextMsgID++ 175 msgID := t.nextMsgID 176 t.waitingToAcquire.Put( 177 msgID, 178 metadata, 179 ) 180 181 t.nodeToWaitingMsgID[nodeID] = msgID 182 t.lock.Unlock() 183 184 t.metrics.awaitingAcquire.Inc() 185 defer t.metrics.awaitingAcquire.Dec() 186 187 select { 188 case <-metadata.closeOnAcquireChan: 189 case <-ctx.Done(): 190 t.lock.Lock() 191 t.waitingToAcquire.Delete(msgID) 192 delete(t.nodeToWaitingMsgID, nodeID) 193 t.lock.Unlock() 194 } 195 196 return func() { 197 t.release(metadata, nodeID) 198 } 199 } 200 201 // Must correspond to a previous call of Acquire([msgSize], [nodeID]) 202 func (t *inboundMsgByteThrottler) release(metadata *msgMetadata, nodeID ids.NodeID) { 203 t.lock.Lock() 204 defer func() { 205 t.metrics.remainingAtLargeBytes.Set(float64(t.remainingAtLargeBytes)) 206 t.metrics.remainingVdrBytes.Set(float64(t.remainingVdrBytes)) 207 t.metrics.awaitingRelease.Dec() 208 t.lock.Unlock() 209 }() 210 211 // [vdrBytesToReturn] is the number of bytes from [msgSize] 212 // that will be given back to [nodeID]'s validator allocation 213 // or messages from [nodeID] currently waiting to acquire bytes. 214 vdrBytesUsed := t.nodeToVdrBytesUsed[nodeID] 215 releasedBytes := metadata.msgSize - metadata.bytesNeeded 216 vdrBytesToReturn := min(releasedBytes, vdrBytesUsed) 217 218 // [atLargeBytesToReturn] is the number of bytes from [msgSize] 219 // that will be given to the at-large allocation or a message 220 // from any node currently waiting to acquire bytes. 221 atLargeBytesToReturn := releasedBytes - vdrBytesToReturn 222 if atLargeBytesToReturn > 0 { 223 // Mark that [nodeID] has released these bytes. 224 t.remainingAtLargeBytes += atLargeBytesToReturn 225 t.nodeToAtLargeBytesUsed[nodeID] -= atLargeBytesToReturn 226 if t.nodeToAtLargeBytesUsed[nodeID] == 0 { 227 delete(t.nodeToAtLargeBytesUsed, nodeID) 228 } 229 230 // Iterates over messages waiting to acquire bytes from oldest 231 // (waiting the longest) to newest. Try to give bytes to the 232 // oldest message, then next oldest, etc. until there are no 233 // waiting messages or we exhaust the bytes. 234 iter := t.waitingToAcquire.NewIterator() 235 for t.remainingAtLargeBytes > 0 && iter.Next() { 236 msg := iter.Value() 237 // From the at-large allocation, take the maximum number of bytes 238 // without exceeding the per-node limit on taking from at-large pool. 239 atLargeBytesGiven := min( 240 // don't give [msg] too many bytes 241 msg.bytesNeeded, 242 // don't exceed per-node limit 243 t.nodeMaxAtLargeBytes-t.nodeToAtLargeBytesUsed[msg.nodeID], 244 // don't give more bytes than are in the allocation 245 t.remainingAtLargeBytes, 246 ) 247 if atLargeBytesGiven > 0 { 248 // Mark that we gave [atLargeBytesGiven] to [msg] 249 t.nodeToAtLargeBytesUsed[msg.nodeID] += atLargeBytesGiven 250 t.remainingAtLargeBytes -= atLargeBytesGiven 251 atLargeBytesToReturn -= atLargeBytesGiven 252 msg.bytesNeeded -= atLargeBytesGiven 253 } 254 if msg.bytesNeeded == 0 { 255 // [msg] has acquired enough bytes to be read. 256 // Unblock the corresponding thread in Acquire 257 close(msg.closeOnAcquireChan) 258 // Mark that this message is no longer waiting to acquire bytes 259 delete(t.nodeToWaitingMsgID, msg.nodeID) 260 261 t.waitingToAcquire.Delete(iter.Key()) 262 } 263 } 264 } 265 266 // Get the message from [nodeID], if any, waiting to acquire 267 msgID, ok := t.nodeToWaitingMsgID[nodeID] 268 if vdrBytesToReturn > 0 && ok { 269 msg, exists := t.waitingToAcquire.Get(msgID) 270 if exists { 271 // Give [msg] all the bytes we can 272 bytesToGive := min(msg.bytesNeeded, vdrBytesToReturn) 273 msg.bytesNeeded -= bytesToGive 274 vdrBytesToReturn -= bytesToGive 275 if msg.bytesNeeded == 0 { 276 // Unblock the corresponding thread in Acquire 277 close(msg.closeOnAcquireChan) 278 delete(t.nodeToWaitingMsgID, nodeID) 279 t.waitingToAcquire.Delete(msgID) 280 } 281 } else { 282 // This should never happen 283 t.log.Warn("couldn't find message", 284 zap.Stringer("nodeID", nodeID), 285 zap.Uint64("messageID", msgID), 286 ) 287 } 288 } 289 if vdrBytesToReturn > 0 { 290 // We gave back all the bytes we could to waiting messages from [nodeID] 291 // but some are still left. 292 t.nodeToVdrBytesUsed[nodeID] -= vdrBytesToReturn 293 if t.nodeToVdrBytesUsed[nodeID] == 0 { 294 delete(t.nodeToVdrBytesUsed, nodeID) 295 } 296 t.remainingVdrBytes += vdrBytesToReturn 297 } 298 } 299 300 type inboundMsgByteThrottlerMetrics struct { 301 acquireLatency metric.Averager 302 remainingAtLargeBytes prometheus.Gauge 303 remainingVdrBytes prometheus.Gauge 304 awaitingAcquire prometheus.Gauge 305 awaitingRelease prometheus.Gauge 306 } 307 308 func (m *inboundMsgByteThrottlerMetrics) initialize(reg prometheus.Registerer) error { 309 errs := wrappers.Errs{} 310 m.acquireLatency = metric.NewAveragerWithErrs( 311 "byte_throttler_inbound_acquire_latency", 312 "average time (in ns) to get space on the inbound message byte buffer", 313 reg, 314 &errs, 315 ) 316 m.remainingAtLargeBytes = prometheus.NewGauge(prometheus.GaugeOpts{ 317 Name: "byte_throttler_inbound_remaining_at_large_bytes", 318 Help: "Bytes remaining in the at-large byte buffer", 319 }) 320 m.remainingVdrBytes = prometheus.NewGauge(prometheus.GaugeOpts{ 321 Name: "byte_throttler_inbound_remaining_validator_bytes", 322 Help: "Bytes remaining in the validator byte buffer", 323 }) 324 m.awaitingAcquire = prometheus.NewGauge(prometheus.GaugeOpts{ 325 Name: "byte_throttler_inbound_awaiting_acquire", 326 Help: "Number of inbound messages waiting to acquire space on the inbound message byte buffer", 327 }) 328 m.awaitingRelease = prometheus.NewGauge(prometheus.GaugeOpts{ 329 Name: "byte_throttler_inbound_awaiting_release", 330 Help: "Number of messages currently being read/handled", 331 }) 332 errs.Add( 333 reg.Register(m.remainingAtLargeBytes), 334 reg.Register(m.remainingVdrBytes), 335 reg.Register(m.awaitingAcquire), 336 reg.Register(m.awaitingRelease), 337 ) 338 return errs.Err 339 }