github.com/MetalBlockchain/metalgo@v1.11.9/snow/networking/benchlist/benchlist.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package benchlist 5 6 import ( 7 "errors" 8 "fmt" 9 "math/rand" 10 "sync" 11 "time" 12 13 "github.com/prometheus/client_golang/prometheus" 14 "go.uber.org/zap" 15 16 "github.com/MetalBlockchain/metalgo/ids" 17 "github.com/MetalBlockchain/metalgo/snow" 18 "github.com/MetalBlockchain/metalgo/snow/validators" 19 "github.com/MetalBlockchain/metalgo/utils/heap" 20 "github.com/MetalBlockchain/metalgo/utils/set" 21 "github.com/MetalBlockchain/metalgo/utils/timer/mockable" 22 23 safemath "github.com/MetalBlockchain/metalgo/utils/math" 24 ) 25 26 // If a peer consistently does not respond to queries, it will 27 // increase latencies on the network whenever that peer is polled. 28 // If we cannot terminate the poll early, then the poll will wait 29 // the full timeout before finalizing the poll and making progress. 30 // This can increase network latencies to an undesirable level. 31 32 // Therefore, nodes that consistently fail are "benched" such that 33 // queries to that node fail immediately to avoid waiting up to 34 // the full network timeout for a response. 35 type Benchlist interface { 36 // RegisterResponse registers the response to a query message 37 RegisterResponse(nodeID ids.NodeID) 38 // RegisterFailure registers that we didn't receive a response within the timeout 39 RegisterFailure(nodeID ids.NodeID) 40 // IsBenched returns true if messages to [validatorID] 41 // should not be sent over the network and should immediately fail. 42 IsBenched(nodeID ids.NodeID) bool 43 } 44 45 type failureStreak struct { 46 // Time of first consecutive timeout 47 firstFailure time.Time 48 // Number of consecutive message timeouts 49 consecutive int 50 } 51 52 type benchlist struct { 53 lock sync.RWMutex 54 // Context of the chain this is the benchlist for 55 ctx *snow.ConsensusContext 56 57 numBenched, weightBenched prometheus.Gauge 58 59 // Used to notify the timer that it should recalculate when it should fire 60 resetTimer chan struct{} 61 62 // Tells the time. Can be faked for testing. 63 clock mockable.Clock 64 65 // notified when a node is benched or unbenched 66 benchable Benchable 67 68 // Validator set of the network 69 vdrs validators.Manager 70 71 // Validator ID --> Consecutive failure information 72 // [streaklock] must be held when touching [failureStreaks] 73 streaklock sync.Mutex 74 failureStreaks map[ids.NodeID]failureStreak 75 76 // IDs of validators that are currently benched 77 benchlistSet set.Set[ids.NodeID] 78 79 // Min heap of benched validators ordered by when they can be unbenched 80 benchedHeap heap.Map[ids.NodeID, time.Time] 81 82 // A validator will be benched if [threshold] messages in a row 83 // to them time out and the first of those messages was more than 84 // [minimumFailingDuration] ago 85 threshold int 86 minimumFailingDuration time.Duration 87 88 // A benched validator will be benched for between [duration/2] and [duration] 89 duration time.Duration 90 91 // The maximum percentage of total network stake that may be benched 92 // Must be in [0,1) 93 maxPortion float64 94 } 95 96 // NewBenchlist returns a new Benchlist 97 func NewBenchlist( 98 ctx *snow.ConsensusContext, 99 benchable Benchable, 100 validators validators.Manager, 101 threshold int, 102 minimumFailingDuration, 103 duration time.Duration, 104 maxPortion float64, 105 reg prometheus.Registerer, 106 ) (Benchlist, error) { 107 if maxPortion < 0 || maxPortion >= 1 { 108 return nil, fmt.Errorf("max portion of benched stake must be in [0,1) but got %f", maxPortion) 109 } 110 111 benchlist := &benchlist{ 112 ctx: ctx, 113 numBenched: prometheus.NewGauge(prometheus.GaugeOpts{ 114 Name: "benched_num", 115 Help: "Number of currently benched validators", 116 }), 117 weightBenched: prometheus.NewGauge(prometheus.GaugeOpts{ 118 Name: "benched_weight", 119 Help: "Weight of currently benched validators", 120 }), 121 resetTimer: make(chan struct{}, 1), 122 failureStreaks: make(map[ids.NodeID]failureStreak), 123 benchlistSet: set.Set[ids.NodeID]{}, 124 benchable: benchable, 125 benchedHeap: heap.NewMap[ids.NodeID, time.Time](time.Time.Before), 126 vdrs: validators, 127 threshold: threshold, 128 minimumFailingDuration: minimumFailingDuration, 129 duration: duration, 130 maxPortion: maxPortion, 131 } 132 133 err := errors.Join( 134 reg.Register(benchlist.numBenched), 135 reg.Register(benchlist.weightBenched), 136 ) 137 if err != nil { 138 return nil, err 139 } 140 141 go benchlist.run() 142 return benchlist, nil 143 } 144 145 // TODO: Close this goroutine during node shutdown 146 func (b *benchlist) run() { 147 timer := time.NewTimer(0) 148 defer timer.Stop() 149 150 for { 151 // Invariant: The [timer] is not stopped. 152 select { 153 case <-timer.C: 154 case <-b.resetTimer: 155 if !timer.Stop() { 156 <-timer.C 157 } 158 } 159 160 b.waitForBenchedNodes() 161 162 b.removedExpiredNodes() 163 164 // Note: If there are no nodes to remove, [duration] will be 0 and we 165 // will immediately wait until there are benched nodes. 166 duration := b.durationToSleep() 167 timer.Reset(duration) 168 } 169 } 170 171 func (b *benchlist) waitForBenchedNodes() { 172 for { 173 b.lock.RLock() 174 _, _, ok := b.benchedHeap.Peek() 175 b.lock.RUnlock() 176 if ok { 177 return 178 } 179 180 // Invariant: Whenever a new node is benched we ensure that resetTimer 181 // has a pending message while the write lock is held. 182 <-b.resetTimer 183 } 184 } 185 186 func (b *benchlist) removedExpiredNodes() { 187 b.lock.Lock() 188 defer b.lock.Unlock() 189 190 now := b.clock.Time() 191 for { 192 _, next, ok := b.benchedHeap.Peek() 193 if !ok { 194 break 195 } 196 if now.Before(next) { 197 break 198 } 199 200 nodeID, _, _ := b.benchedHeap.Pop() 201 b.ctx.Log.Debug("removing node from benchlist", 202 zap.Stringer("nodeID", nodeID), 203 ) 204 b.benchlistSet.Remove(nodeID) 205 b.benchable.Unbenched(b.ctx.ChainID, nodeID) 206 } 207 208 b.numBenched.Set(float64(b.benchedHeap.Len())) 209 benchedStake, err := b.vdrs.SubsetWeight(b.ctx.SubnetID, b.benchlistSet) 210 if err != nil { 211 b.ctx.Log.Error("error calculating benched stake", 212 zap.Stringer("subnetID", b.ctx.SubnetID), 213 zap.Error(err), 214 ) 215 return 216 } 217 b.weightBenched.Set(float64(benchedStake)) 218 } 219 220 func (b *benchlist) durationToSleep() time.Duration { 221 b.lock.RLock() 222 defer b.lock.RUnlock() 223 224 _, next, ok := b.benchedHeap.Peek() 225 if !ok { 226 return 0 227 } 228 229 now := b.clock.Time() 230 return next.Sub(now) 231 } 232 233 // IsBenched returns true if messages to [nodeID] should not be sent over the 234 // network and should immediately fail. 235 func (b *benchlist) IsBenched(nodeID ids.NodeID) bool { 236 b.lock.RLock() 237 defer b.lock.RUnlock() 238 239 return b.benchlistSet.Contains(nodeID) 240 } 241 242 // RegisterResponse notes that we received a response from [nodeID] 243 func (b *benchlist) RegisterResponse(nodeID ids.NodeID) { 244 b.streaklock.Lock() 245 defer b.streaklock.Unlock() 246 247 delete(b.failureStreaks, nodeID) 248 } 249 250 // RegisterFailure notes that a request to [nodeID] timed out 251 func (b *benchlist) RegisterFailure(nodeID ids.NodeID) { 252 b.lock.Lock() 253 defer b.lock.Unlock() 254 255 if b.benchlistSet.Contains(nodeID) { 256 // This validator is benched. Ignore failures until they're not. 257 return 258 } 259 260 b.streaklock.Lock() 261 failureStreak := b.failureStreaks[nodeID] 262 // Increment consecutive failures 263 failureStreak.consecutive++ 264 now := b.clock.Time() 265 // Update first failure time 266 if failureStreak.firstFailure.IsZero() { 267 // This is the first consecutive failure 268 failureStreak.firstFailure = now 269 } 270 b.failureStreaks[nodeID] = failureStreak 271 b.streaklock.Unlock() 272 273 if failureStreak.consecutive >= b.threshold && now.After(failureStreak.firstFailure.Add(b.minimumFailingDuration)) { 274 b.bench(nodeID) 275 } 276 } 277 278 // Assumes [b.lock] is held 279 // Assumes [nodeID] is not already benched 280 func (b *benchlist) bench(nodeID ids.NodeID) { 281 validatorStake := b.vdrs.GetWeight(b.ctx.SubnetID, nodeID) 282 if validatorStake == 0 { 283 // We might want to bench a non-validator because they don't respond to 284 // my Get requests, but we choose to only bench validators. 285 return 286 } 287 288 benchedStake, err := b.vdrs.SubsetWeight(b.ctx.SubnetID, b.benchlistSet) 289 if err != nil { 290 b.ctx.Log.Error("error calculating benched stake", 291 zap.Stringer("subnetID", b.ctx.SubnetID), 292 zap.Error(err), 293 ) 294 return 295 } 296 297 newBenchedStake, err := safemath.Add64(benchedStake, validatorStake) 298 if err != nil { 299 // This should never happen 300 b.ctx.Log.Error("overflow calculating new benched stake", 301 zap.Stringer("nodeID", nodeID), 302 ) 303 return 304 } 305 306 totalStake, err := b.vdrs.TotalWeight(b.ctx.SubnetID) 307 if err != nil { 308 b.ctx.Log.Error("error calculating total stake", 309 zap.Stringer("subnetID", b.ctx.SubnetID), 310 zap.Error(err), 311 ) 312 return 313 } 314 315 maxBenchedStake := float64(totalStake) * b.maxPortion 316 317 if float64(newBenchedStake) > maxBenchedStake { 318 b.ctx.Log.Debug("not benching node", 319 zap.String("reason", "benched stake would exceed max"), 320 zap.Stringer("nodeID", nodeID), 321 zap.Float64("benchedStake", float64(newBenchedStake)), 322 zap.Float64("maxBenchedStake", maxBenchedStake), 323 ) 324 return 325 } 326 327 // Validator is benched for between [b.duration]/2 and [b.duration] 328 now := b.clock.Time() 329 minBenchDuration := b.duration / 2 330 minBenchedUntil := now.Add(minBenchDuration) 331 maxBenchedUntil := now.Add(b.duration) 332 diff := maxBenchedUntil.Sub(minBenchedUntil) 333 benchedUntil := minBenchedUntil.Add(time.Duration(rand.Float64() * float64(diff))) // #nosec G404 334 335 b.ctx.Log.Debug("benching validator after consecutive failed queries", 336 zap.Stringer("nodeID", nodeID), 337 zap.Duration("benchDuration", benchedUntil.Sub(now)), 338 zap.Int("numFailedQueries", b.threshold), 339 ) 340 341 // Add to benchlist times with randomized delay 342 b.benchlistSet.Add(nodeID) 343 b.benchable.Benched(b.ctx.ChainID, nodeID) 344 345 b.streaklock.Lock() 346 delete(b.failureStreaks, nodeID) 347 b.streaklock.Unlock() 348 349 b.benchedHeap.Push(nodeID, benchedUntil) 350 351 // Update the timer to account for the newly benched node. 352 select { 353 case b.resetTimer <- struct{}{}: 354 default: 355 } 356 357 // Update metrics 358 b.numBenched.Set(float64(b.benchedHeap.Len())) 359 b.weightBenched.Set(float64(newBenchedStake)) 360 }