github.com/MetalBlockchain/metalgo@v1.11.9/network/throttling/inbound_resource_throttler.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package throttling 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "sync" 11 "time" 12 13 "github.com/prometheus/client_golang/prometheus" 14 15 "github.com/MetalBlockchain/metalgo/ids" 16 "github.com/MetalBlockchain/metalgo/snow/networking/tracker" 17 "github.com/MetalBlockchain/metalgo/utils/timer/mockable" 18 ) 19 20 const epsilon = time.Millisecond 21 22 var ( 23 _ SystemThrottler = (*systemThrottler)(nil) 24 _ SystemThrottler = noSystemThrottler{} 25 ) 26 27 // SystemThrottler rate-limits based on the system metrics usage caused by each 28 // peer. We will not read messages from peers whose messages cause excessive 29 // usage until the usage caused by the peer drops to an acceptable level. 30 type SystemThrottler interface { 31 // Blocks until we can read a message from the given peer. 32 // If [ctx] is canceled, returns immediately. 33 Acquire(ctx context.Context, nodeID ids.NodeID) 34 } 35 36 // A system throttler that always immediately returns on [Acquire]. 37 type noSystemThrottler struct{} 38 39 func (noSystemThrottler) Acquire(context.Context, ids.NodeID) {} 40 41 type SystemThrottlerConfig struct { 42 Clock mockable.Clock `json:"-"` 43 // The maximum amount of time we'll wait before re-checking whether a call 44 // to [Acquire] can return. 45 MaxRecheckDelay time.Duration `json:"maxRecheckDelay"` 46 } 47 48 type systemThrottler struct { 49 SystemThrottlerConfig 50 metrics *systemThrottlerMetrics 51 // Tells us the target utilization of each node. 52 targeter tracker.Targeter 53 // Tells us the utilization of each node. 54 tracker tracker.Tracker 55 // Invariant: [timerPool] only returns timers that have been stopped and drained. 56 timerPool sync.Pool 57 } 58 59 type systemThrottlerMetrics struct { 60 totalWaits prometheus.Counter 61 totalNoWaits prometheus.Counter 62 awaitingAcquire prometheus.Gauge 63 } 64 65 func newSystemThrottlerMetrics(namespace string, reg prometheus.Registerer) (*systemThrottlerMetrics, error) { 66 m := &systemThrottlerMetrics{ 67 totalWaits: prometheus.NewCounter(prometheus.CounterOpts{ 68 Namespace: namespace, 69 Name: "throttler_total_waits", 70 Help: "Number of times we've waited to read a message from a node because their usage was too high", 71 }), 72 totalNoWaits: prometheus.NewCounter(prometheus.CounterOpts{ 73 Namespace: namespace, 74 Name: "throttler_total_no_waits", 75 Help: "Number of times we didn't wait to read a message because their usage is too high", 76 }), 77 awaitingAcquire: prometheus.NewGauge(prometheus.GaugeOpts{ 78 Namespace: namespace, 79 Name: "throttler_awaiting_acquire", 80 Help: "Number of nodes we're waiting to read a message from because their usage is too high", 81 }), 82 } 83 err := errors.Join( 84 reg.Register(m.totalWaits), 85 reg.Register(m.totalNoWaits), 86 reg.Register(m.awaitingAcquire), 87 ) 88 return m, err 89 } 90 91 func NewSystemThrottler( 92 namespace string, 93 reg prometheus.Registerer, 94 config SystemThrottlerConfig, 95 tracker tracker.Tracker, 96 targeter tracker.Targeter, 97 ) (SystemThrottler, error) { 98 metrics, err := newSystemThrottlerMetrics(namespace, reg) 99 if err != nil { 100 return nil, fmt.Errorf("couldn't initialize system throttler metrics: %w", err) 101 } 102 return &systemThrottler{ 103 metrics: metrics, 104 SystemThrottlerConfig: config, 105 targeter: targeter, 106 tracker: tracker, 107 timerPool: sync.Pool{ 108 New: func() interface{} { 109 // Satisfy invariant that timer is stopped and drained. 110 timer := time.NewTimer(0) 111 if !timer.Stop() { 112 <-timer.C 113 } 114 return timer 115 }, 116 }, 117 }, nil 118 } 119 120 func (t *systemThrottler) Acquire(ctx context.Context, nodeID ids.NodeID) { 121 // [timer] fires when we should re-check whether this node's 122 // usage has fallen to an acceptable level. 123 // Lazily initialize timer only if we actually need to wait. 124 var timer *time.Timer 125 defer func() { 126 if timer != nil { // We waited at least once for usage to fall. 127 t.metrics.totalWaits.Inc() 128 // Note that [t.metrics.awaitingAcquire.Inc()] was called once if 129 // and only if [waited] is true. 130 t.metrics.awaitingAcquire.Dec() 131 } else { 132 t.metrics.totalNoWaits.Inc() 133 } 134 }() 135 136 for { 137 now := t.Clock.Time() 138 // Get target usage for this node. 139 target := t.targeter.TargetUsage(nodeID) 140 // Get actual usage for this node. 141 usage := t.tracker.Usage(nodeID, now) 142 if usage <= target { 143 return 144 } 145 // See how long it will take for actual usage to drop to the target, 146 // assuming this node uses no more resources. 147 waitDuration := t.tracker.TimeUntilUsage(nodeID, now, target) 148 if waitDuration < epsilon { 149 // If the amount of time until we reach the target is very small, 150 // just return to avoid a situation where we excessively re-check. 151 return 152 } 153 if waitDuration > t.MaxRecheckDelay { 154 // Re-check at least every [t.MaxRecheckDelay] in case it will be a 155 // very long time until usage reaches the target level. 156 // 157 // Note that not only can a node's usage decrease over time, but 158 // also its target usage may increase. 159 // In this case, the node's usage can drop to the target level 160 // sooner than [waitDuration] because the target has increased. 161 // The minimum re-check frequency accounts for that case by 162 // optimistically re-checking whether the node's usage is now at an 163 // acceptable level. 164 waitDuration = t.MaxRecheckDelay 165 } 166 167 // Reset [timer]. 168 if timer == nil { 169 // Note this is called at most once. 170 t.metrics.awaitingAcquire.Inc() 171 172 timer = t.timerPool.Get().(*time.Timer) 173 defer func() { 174 // Satisfy [t.timerPool] invariant. 175 if !timer.Stop() { 176 // The default ensures we don't wait forever in the case 177 // that the channel was already drained. 178 select { 179 case <-timer.C: 180 default: 181 } 182 } 183 t.timerPool.Put(timer) 184 }() 185 } 186 timer.Reset(waitDuration) 187 select { 188 case <-ctx.Done(): 189 return 190 case <-timer.C: 191 } 192 } 193 }