github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/p2p/msgrate/msgrate.go (about) 1 // Copyright 2021 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package msgrate allows estimating the throughput of peers for more balanced syncs. 18 package msgrate 19 20 import ( 21 "context" 22 "errors" 23 "math" 24 "sort" 25 "sync" 26 "time" 27 28 "github.com/ethereum/go-ethereum/log" 29 ) 30 31 // measurementImpact is the impact a single measurement has on a peer's final 32 // capacity value. A value closer to 0 reacts slower to sudden network changes, 33 // but it is also more stable against temporary hiccups. 0.1 worked well for 34 // most of Ethereum's existence, so might as well go with it. 35 const measurementImpact = 0.1 36 37 // capacityOverestimation is the ratio of items to over-estimate when retrieving 38 // a peer's capacity to avoid locking into a lower value due to never attempting 39 // to fetch more than some local stable value. 40 const capacityOverestimation = 1.01 41 42 // rttMinEstimate is the minimal round trip time to target requests for. Since 43 // every request entails a 2 way latency + bandwidth + serving database lookups, 44 // it should be generous enough to permit meaningful work to be done on top of 45 // the transmission costs. 46 const rttMinEstimate = 2 * time.Second 47 48 // rttMaxEstimate is the maximal round trip time to target requests for. Although 49 // the expectation is that a well connected node will never reach this, certain 50 // special connectivity ones might experience significant delays (e.g. satellite 51 // uplink with 3s RTT). This value should be low enough to forbid stalling the 52 // pipeline too long, but large enough to cover the worst of the worst links. 53 const rttMaxEstimate = 20 * time.Second 54 55 // rttPushdownFactor is a multiplier to attempt forcing quicker requests than 56 // what the message rate tracker estimates. The reason is that message rate 57 // tracking adapts queries to the RTT, but multiple RTT values can be perfectly 58 // valid, they just result in higher packet sizes. Since smaller packets almost 59 // always result in stabler download streams, this factor hones in on the lowest 60 // RTT from all the functional ones. 61 const rttPushdownFactor = 0.9 62 63 // rttMinConfidence is the minimum value the roundtrip confidence factor may drop 64 // to. Since the target timeouts are based on how confident the tracker is in the 65 // true roundtrip, it's important to not allow too huge fluctuations. 66 const rttMinConfidence = 0.1 67 68 // ttlScaling is the multiplier that converts the estimated roundtrip time to a 69 // timeout cap for network requests. The expectation is that peers' response time 70 // will fluctuate around the estimated roundtrip, but depending in their load at 71 // request time, it might be higher than anticipated. This scaling factor ensures 72 // that we allow remote connections some slack but at the same time do enforce a 73 // behavior similar to our median peers. 74 const ttlScaling = 3 75 76 // ttlLimit is the maximum timeout allowance to prevent reaching crazy numbers 77 // if some unforeseen network events happen. As much as we try to hone in on 78 // the most optimal values, it doesn't make any sense to go above a threshold, 79 // even if everything is slow and screwy. 80 const ttlLimit = time.Minute 81 82 // tuningConfidenceCap is the number of active peers above which to stop detuning 83 // the confidence number. The idea here is that once we hone in on the capacity 84 // of a meaningful number of peers, adding one more should ot have a significant 85 // impact on things, so just ron with the originals. 86 const tuningConfidenceCap = 10 87 88 // tuningImpact is the influence that a new tuning target has on the previously 89 // cached value. This number is mostly just an out-of-the-blue heuristic that 90 // prevents the estimates from jumping around. There's no particular reason for 91 // the current value. 92 const tuningImpact = 0.25 93 94 // Tracker estimates the throughput capacity of a peer with regard to each data 95 // type it can deliver. The goal is to dynamically adjust request sizes to max 96 // out network throughput without overloading either the peer or the local node. 97 // 98 // By tracking in real time the latencies and bandwidths peers exhibit for each 99 // packet type, it's possible to prevent overloading by detecting a slowdown on 100 // one type when another type is pushed too hard. 101 // 102 // Similarly, real time measurements also help avoid overloading the local net 103 // connection if our peers would otherwise be capable to deliver more, but the 104 // local link is saturated. In that case, the live measurements will force us 105 // to reduce request sizes until the throughput gets stable. 106 // 107 // Lastly, message rate measurements allows us to detect if a peer is unusually 108 // slow compared to other peers, in which case we can decide to keep it around 109 // or free up the slot so someone closer. 110 // 111 // Since throughput tracking and estimation adapts dynamically to live network 112 // conditions, it's fine to have multiple trackers locally track the same peer 113 // in different subsystem. The throughput will simply be distributed across the 114 // two trackers if both are highly active. 115 type Tracker struct { 116 // capacity is the number of items retrievable per second of a given type. 117 // It is analogous to bandwidth, but we deliberately avoided using bytes 118 // as the unit, since serving nodes also spend a lot of time loading data 119 // from disk, which is linear in the number of items, but mostly constant 120 // in their sizes. 121 // 122 // Callers of course are free to use the item counter as a byte counter if 123 // or when their protocol of choice if capped by bytes instead of items. 124 // (eg. eth.getHeaders vs snap.getAccountRange). 125 capacity map[uint64]float64 126 127 // roundtrip is the latency a peer in general responds to data requests. 128 // This number is not used inside the tracker, but is exposed to compare 129 // peers to each other and filter out slow ones. Note however, it only 130 // makes sense to compare RTTs if the caller caters request sizes for 131 // each peer to target the same RTT. There's no need to make this number 132 // the real networking RTT, we just need a number to compare peers with. 133 roundtrip time.Duration 134 135 lock sync.RWMutex 136 } 137 138 // NewTracker creates a new message rate tracker for a specific peer. An initial 139 // RTT is needed to avoid a peer getting marked as an outlier compared to others 140 // right after joining. It's suggested to use the median rtt across all peers to 141 // init a new peer tracker. 142 func NewTracker(caps map[uint64]float64, rtt time.Duration) *Tracker { 143 if caps == nil { 144 caps = make(map[uint64]float64) 145 } 146 return &Tracker{ 147 capacity: caps, 148 roundtrip: rtt, 149 } 150 } 151 152 // Capacity calculates the number of items the peer is estimated to be able to 153 // retrieve within the allotted time slot. The method will round up any division 154 // errors and will add an additional overestimation ratio on top. The reason for 155 // overshooting the capacity is because certain message types might not increase 156 // the load proportionally to the requested items, so fetching a bit more might 157 // still take the same RTT. By forcefully overshooting by a small amount, we can 158 // avoid locking into a lower-that-real capacity. 159 func (t *Tracker) Capacity(kind uint64, targetRTT time.Duration) int { 160 t.lock.RLock() 161 defer t.lock.RUnlock() 162 163 // Calculate the actual measured throughput 164 throughput := t.capacity[kind] * float64(targetRTT) / float64(time.Second) 165 166 // Return an overestimation to force the peer out of a stuck minima, adding 167 // +1 in case the item count is too low for the overestimator to dent 168 return roundCapacity(1 + capacityOverestimation*throughput) 169 } 170 171 // roundCapacity gives the integer value of a capacity. 172 // The result fits int32, and is guaranteed to be positive. 173 func roundCapacity(cap float64) int { 174 const maxInt32 = float64(1<<31 - 1) 175 return int(math.Min(maxInt32, math.Max(1, math.Ceil(cap)))) 176 } 177 178 // Update modifies the peer's capacity values for a specific data type with a new 179 // measurement. If the delivery is zero, the peer is assumed to have either timed 180 // out or to not have the requested data, resulting in a slash to 0 capacity. This 181 // avoids assigning the peer retrievals that it won't be able to honour. 182 func (t *Tracker) Update(kind uint64, elapsed time.Duration, items int) { 183 t.lock.Lock() 184 defer t.lock.Unlock() 185 186 // If nothing was delivered (timeout / unavailable data), reduce throughput 187 // to minimum 188 if items == 0 { 189 t.capacity[kind] = 0 190 return 191 } 192 // Otherwise update the throughput with a new measurement 193 if elapsed <= 0 { 194 elapsed = 1 // +1 (ns) to ensure non-zero divisor 195 } 196 measured := float64(items) / (float64(elapsed) / float64(time.Second)) 197 198 t.capacity[kind] = (1-measurementImpact)*(t.capacity[kind]) + measurementImpact*measured 199 t.roundtrip = time.Duration((1-measurementImpact)*float64(t.roundtrip) + measurementImpact*float64(elapsed)) 200 } 201 202 // Trackers is a set of message rate trackers across a number of peers with the 203 // goal of aggregating certain measurements across the entire set for outlier 204 // filtering and newly joining initialization. 205 type Trackers struct { 206 trackers map[string]*Tracker 207 208 // roundtrip is the current best guess as to what is a stable round trip time 209 // across the entire collection of connected peers. This is derived from the 210 // various trackers added, but is used as a cache to avoid recomputing on each 211 // network request. The value is updated once every RTT to avoid fluctuations 212 // caused by hiccups or peer events. 213 roundtrip time.Duration 214 215 // confidence represents the probability that the estimated roundtrip value 216 // is the real one across all our peers. The confidence value is used as an 217 // impact factor of new measurements on old estimates. As our connectivity 218 // stabilizes, this value gravitates towards 1, new measurements having 219 // almost no impact. If there's a large peer churn and few peers, then new 220 // measurements will impact it more. The confidence is increased with every 221 // packet and dropped with every new connection. 222 confidence float64 223 224 // tuned is the time instance the tracker recalculated its cached roundtrip 225 // value and confidence values. A cleaner way would be to have a heartbeat 226 // goroutine do it regularly, but that requires a lot of maintenance to just 227 // run every now and again. 228 tuned time.Time 229 230 // The fields below can be used to override certain default values. Their 231 // purpose is to allow quicker tests. Don't use them in production. 232 OverrideTTLLimit time.Duration 233 234 log log.Logger 235 lock sync.RWMutex 236 } 237 238 // NewTrackers creates an empty set of trackers to be filled with peers. 239 func NewTrackers(log log.Logger) *Trackers { 240 return &Trackers{ 241 trackers: make(map[string]*Tracker), 242 roundtrip: rttMaxEstimate, 243 confidence: 1, 244 tuned: time.Now(), 245 OverrideTTLLimit: ttlLimit, 246 log: log, 247 } 248 } 249 250 // Track inserts a new tracker into the set. 251 func (t *Trackers) Track(id string, tracker *Tracker) error { 252 t.lock.Lock() 253 defer t.lock.Unlock() 254 255 if _, ok := t.trackers[id]; ok { 256 return errors.New("already tracking") 257 } 258 t.trackers[id] = tracker 259 t.detune() 260 261 return nil 262 } 263 264 // Untrack stops tracking a previously added peer. 265 func (t *Trackers) Untrack(id string) error { 266 t.lock.Lock() 267 defer t.lock.Unlock() 268 269 if _, ok := t.trackers[id]; !ok { 270 return errors.New("not tracking") 271 } 272 delete(t.trackers, id) 273 return nil 274 } 275 276 // MedianRoundTrip returns the median RTT across all known trackers. The purpose 277 // of the median RTT is to initialize a new peer with sane statistics that it will 278 // hopefully outperform. If it seriously underperforms, there's a risk of dropping 279 // the peer, but that is ok as we're aiming for a strong median. 280 func (t *Trackers) MedianRoundTrip() time.Duration { 281 t.lock.RLock() 282 defer t.lock.RUnlock() 283 284 return t.medianRoundTrip() 285 } 286 287 // medianRoundTrip is the internal lockless version of MedianRoundTrip to be used 288 // by the QoS tuner. 289 func (t *Trackers) medianRoundTrip() time.Duration { 290 // Gather all the currently measured round trip times 291 rtts := make([]float64, 0, len(t.trackers)) 292 for _, tt := range t.trackers { 293 tt.lock.RLock() 294 rtts = append(rtts, float64(tt.roundtrip)) 295 tt.lock.RUnlock() 296 } 297 sort.Float64s(rtts) 298 299 var median time.Duration 300 switch len(rtts) { 301 case 0: 302 median = rttMaxEstimate 303 case 1: 304 median = time.Duration(rtts[0]) 305 default: 306 idx := int(math.Sqrt(float64(len(rtts)))) 307 median = time.Duration(rtts[idx]) 308 } 309 // Restrict the RTT into some QoS defaults, irrelevant of true RTT 310 if median < rttMinEstimate { 311 median = rttMinEstimate 312 } 313 if median > rttMaxEstimate { 314 median = rttMaxEstimate 315 } 316 return median 317 } 318 319 // MeanCapacities returns the capacities averaged across all the added trackers. 320 // The purpose of the mean capacities are to initialize a new peer with some sane 321 // starting values that it will hopefully outperform. If the mean overshoots, the 322 // peer will be cut back to minimal capacity and given another chance. 323 func (t *Trackers) MeanCapacities() map[uint64]float64 { 324 t.lock.RLock() 325 defer t.lock.RUnlock() 326 327 return t.meanCapacities() 328 } 329 330 // meanCapacities is the internal lockless version of MeanCapacities used for 331 // debug logging. 332 func (t *Trackers) meanCapacities() map[uint64]float64 { 333 capacities := make(map[uint64]float64, len(t.trackers)) 334 for _, tt := range t.trackers { 335 tt.lock.RLock() 336 for key, val := range tt.capacity { 337 capacities[key] += val 338 } 339 tt.lock.RUnlock() 340 } 341 for key, val := range capacities { 342 capacities[key] = val / float64(len(t.trackers)) 343 } 344 return capacities 345 } 346 347 // TargetRoundTrip returns the current target round trip time for a request to 348 // complete in.The returned RTT is slightly under the estimated RTT. The reason 349 // is that message rate estimation is a 2 dimensional problem which is solvable 350 // for any RTT. The goal is to gravitate towards smaller RTTs instead of large 351 // messages, to result in a stabler download stream. 352 func (t *Trackers) TargetRoundTrip() time.Duration { 353 // Recalculate the internal caches if it's been a while 354 t.tune() 355 356 // Caches surely recent, return target roundtrip 357 t.lock.RLock() 358 defer t.lock.RUnlock() 359 360 return time.Duration(float64(t.roundtrip) * rttPushdownFactor) 361 } 362 363 // TargetTimeout returns the timeout allowance for a single request to finish 364 // under. The timeout is proportional to the roundtrip, but also takes into 365 // consideration the tracker's confidence in said roundtrip and scales it 366 // accordingly. The final value is capped to avoid runaway requests. 367 func (t *Trackers) TargetTimeout() time.Duration { 368 // Recalculate the internal caches if it's been a while 369 t.tune() 370 371 // Caches surely recent, return target timeout 372 t.lock.RLock() 373 defer t.lock.RUnlock() 374 375 return t.targetTimeout() 376 } 377 378 // targetTimeout is the internal lockless version of TargetTimeout to be used 379 // during QoS tuning. 380 func (t *Trackers) targetTimeout() time.Duration { 381 timeout := time.Duration(ttlScaling * float64(t.roundtrip) / t.confidence) 382 if timeout > t.OverrideTTLLimit { 383 timeout = t.OverrideTTLLimit 384 } 385 return timeout 386 } 387 388 // tune gathers the individual tracker statistics and updates the estimated 389 // request round trip time. 390 func (t *Trackers) tune() { 391 // Tune may be called concurrently all over the place, but we only want to 392 // periodically update and even then only once. First check if it was updated 393 // recently and abort if so. 394 t.lock.RLock() 395 dirty := time.Since(t.tuned) > t.roundtrip 396 t.lock.RUnlock() 397 if !dirty { 398 return 399 } 400 // If an update is needed, obtain a write lock but make sure we don't update 401 // it on all concurrent threads one by one. 402 t.lock.Lock() 403 defer t.lock.Unlock() 404 405 if dirty := time.Since(t.tuned) > t.roundtrip; !dirty { 406 return // A concurrent request beat us to the tuning 407 } 408 // First thread reaching the tuning point, update the estimates and return 409 t.roundtrip = time.Duration((1-tuningImpact)*float64(t.roundtrip) + tuningImpact*float64(t.medianRoundTrip())) 410 t.confidence = t.confidence + (1-t.confidence)/2 411 412 t.tuned = time.Now() 413 t.log.Debug("Recalculated msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout(), "next", t.tuned.Add(t.roundtrip)) 414 if t.log.Enabled(context.Background(), log.LevelTrace) { 415 t.log.Trace("Debug dump of mean capacities", "caps", t.meanCapacities()) 416 } 417 } 418 419 // detune reduces the tracker's confidence in order to make fresh measurements 420 // have a larger impact on the estimates. It is meant to be used during new peer 421 // connections so they can have a proper impact on the estimates. 422 func (t *Trackers) detune() { 423 // If we have a single peer, confidence is always 1 424 if len(t.trackers) == 1 { 425 t.confidence = 1 426 return 427 } 428 // If we have a ton of peers, don't drop the confidence since there's enough 429 // remaining to retain the same throughput 430 if len(t.trackers) >= tuningConfidenceCap { 431 return 432 } 433 // Otherwise drop the confidence factor 434 peers := float64(len(t.trackers)) 435 436 t.confidence = t.confidence * (peers - 1) / peers 437 if t.confidence < rttMinConfidence { 438 t.confidence = rttMinConfidence 439 } 440 t.log.Debug("Relaxed msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout()) 441 } 442 443 // Capacity is a helper function to access a specific tracker without having to 444 // track it explicitly outside. 445 func (t *Trackers) Capacity(id string, kind uint64, targetRTT time.Duration) int { 446 t.lock.RLock() 447 defer t.lock.RUnlock() 448 449 tracker := t.trackers[id] 450 if tracker == nil { 451 return 1 // Unregister race, don't return 0, it's a dangerous number 452 } 453 return tracker.Capacity(kind, targetRTT) 454 } 455 456 // Update is a helper function to access a specific tracker without having to 457 // track it explicitly outside. 458 func (t *Trackers) Update(id string, kind uint64, elapsed time.Duration, items int) { 459 t.lock.RLock() 460 defer t.lock.RUnlock() 461 462 if tracker := t.trackers[id]; tracker != nil { 463 tracker.Update(kind, elapsed, items) 464 } 465 }