github.com/bcnmy/go-ethereum@v1.10.27/p2p/msgrate/msgrate.go (about) 1 // Copyright 2021 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package msgrate allows estimating the throughput of peers for more balanced syncs. 18 package msgrate 19 20 import ( 21 "errors" 22 "math" 23 "sort" 24 "sync" 25 "time" 26 27 "github.com/ethereum/go-ethereum/log" 28 ) 29 30 // measurementImpact is the impact a single measurement has on a peer's final 31 // capacity value. A value closer to 0 reacts slower to sudden network changes, 32 // but it is also more stable against temporary hiccups. 0.1 worked well for 33 // most of Ethereum's existence, so might as well go with it. 34 const measurementImpact = 0.1 35 36 // capacityOverestimation is the ratio of items to over-estimate when retrieving 37 // a peer's capacity to avoid locking into a lower value due to never attempting 38 // to fetch more than some local stable value. 39 const capacityOverestimation = 1.01 40 41 // qosTuningPeers is the number of best peers to tune round trip times based on. 42 // An Ethereum node doesn't need hundreds of connections to operate correctly, 43 // so instead of lowering our download speed to the median of potentially many 44 // bad nodes, we can target a smaller set of vey good nodes. At worse this will 45 // result in less nodes to sync from, but that's still better than some hogging 46 // the pipeline. 47 const qosTuningPeers = 5 48 49 // rttMinEstimate is the minimal round trip time to target requests for. Since 50 // every request entails a 2 way latency + bandwidth + serving database lookups, 51 // it should be generous enough to permit meaningful work to be done on top of 52 // the transmission costs. 53 const rttMinEstimate = 2 * time.Second 54 55 // rttMaxEstimate is the maximal round trip time to target requests for. Although 56 // the expectation is that a well connected node will never reach this, certain 57 // special connectivity ones might experience significant delays (e.g. satellite 58 // uplink with 3s RTT). This value should be low enough to forbid stalling the 59 // pipeline too long, but large enough to cover the worst of the worst links. 60 const rttMaxEstimate = 20 * time.Second 61 62 // rttPushdownFactor is a multiplier to attempt forcing quicker requests than 63 // what the message rate tracker estimates. The reason is that message rate 64 // tracking adapts queries to the RTT, but multiple RTT values can be perfectly 65 // valid, they just result in higher packet sizes. Since smaller packets almost 66 // always result in stabler download streams, this factor hones in on the lowest 67 // RTT from all the functional ones. 68 const rttPushdownFactor = 0.9 69 70 // rttMinConfidence is the minimum value the roundtrip confidence factor may drop 71 // to. Since the target timeouts are based on how confident the tracker is in the 72 // true roundtrip, it's important to not allow too huge fluctuations. 73 const rttMinConfidence = 0.1 74 75 // ttlScaling is the multiplier that converts the estimated roundtrip time to a 76 // timeout cap for network requests. The expectation is that peers' response time 77 // will fluctuate around the estimated roundtrip, but depending in their load at 78 // request time, it might be higher than anticipated. This scaling factor ensures 79 // that we allow remote connections some slack but at the same time do enforce a 80 // behavior similar to our median peers. 81 const ttlScaling = 3 82 83 // ttlLimit is the maximum timeout allowance to prevent reaching crazy numbers 84 // if some unforeseen network events shappen. As much as we try to hone in on 85 // the most optimal values, it doesn't make any sense to go above a threshold, 86 // even if everything is slow and screwy. 87 const ttlLimit = time.Minute 88 89 // tuningConfidenceCap is the number of active peers above which to stop detuning 90 // the confidence number. The idea here is that once we hone in on the capacity 91 // of a meaningful number of peers, adding one more should ot have a significant 92 // impact on things, so just ron with the originals. 93 const tuningConfidenceCap = 10 94 95 // tuningImpact is the influence that a new tuning target has on the previously 96 // cached value. This number is mostly just an out-of-the-blue heuristic that 97 // prevents the estimates from jumping around. There's no particular reason for 98 // the current value. 99 const tuningImpact = 0.25 100 101 // Tracker estimates the throughput capacity of a peer with regard to each data 102 // type it can deliver. The goal is to dynamically adjust request sizes to max 103 // out network throughput without overloading either the peer or th elocal node. 104 // 105 // By tracking in real time the latencies and bandiwdths peers exhibit for each 106 // packet type, it's possible to prevent overloading by detecting a slowdown on 107 // one type when another type is pushed too hard. 108 // 109 // Similarly, real time measurements also help avoid overloading the local net 110 // connection if our peers would otherwise be capable to deliver more, but the 111 // local link is saturated. In that case, the live measurements will force us 112 // to reduce request sizes until the throughput gets stable. 113 // 114 // Lastly, message rate measurements allows us to detect if a peer is unusually 115 // slow compared to other peers, in which case we can decide to keep it around 116 // or free up the slot so someone closer. 117 // 118 // Since throughput tracking and estimation adapts dynamically to live network 119 // conditions, it's fine to have multiple trackers locally track the same peer 120 // in different subsystem. The throughput will simply be distributed across the 121 // two trackers if both are highly active. 122 type Tracker struct { 123 // capacity is the number of items retrievable per second of a given type. 124 // It is analogous to bandwidth, but we deliberately avoided using bytes 125 // as the unit, since serving nodes also spend a lot of time loading data 126 // from disk, which is linear in the number of items, but mostly constant 127 // in their sizes. 128 // 129 // Callers of course are free to use the item counter as a byte counter if 130 // or when their protocol of choice if capped by bytes instead of items. 131 // (eg. eth.getHeaders vs snap.getAccountRange). 132 capacity map[uint64]float64 133 134 // roundtrip is the latency a peer in general responds to data requests. 135 // This number is not used inside the tracker, but is exposed to compare 136 // peers to each other and filter out slow ones. Note however, it only 137 // makes sense to compare RTTs if the caller caters request sizes for 138 // each peer to target the same RTT. There's no need to make this number 139 // the real networking RTT, we just need a number to compare peers with. 140 roundtrip time.Duration 141 142 lock sync.RWMutex 143 } 144 145 // NewTracker creates a new message rate tracker for a specific peer. An initial 146 // RTT is needed to avoid a peer getting marked as an outlier compared to others 147 // right after joining. It's suggested to use the median rtt across all peers to 148 // init a new peer tracker. 149 func NewTracker(caps map[uint64]float64, rtt time.Duration) *Tracker { 150 if caps == nil { 151 caps = make(map[uint64]float64) 152 } 153 return &Tracker{ 154 capacity: caps, 155 roundtrip: rtt, 156 } 157 } 158 159 // Capacity calculates the number of items the peer is estimated to be able to 160 // retrieve within the allotted time slot. The method will round up any division 161 // errors and will add an additional overestimation ratio on top. The reason for 162 // overshooting the capacity is because certain message types might not increase 163 // the load proportionally to the requested items, so fetching a bit more might 164 // still take the same RTT. By forcefully overshooting by a small amount, we can 165 // avoid locking into a lower-that-real capacity. 166 func (t *Tracker) Capacity(kind uint64, targetRTT time.Duration) int { 167 t.lock.RLock() 168 defer t.lock.RUnlock() 169 170 // Calculate the actual measured throughput 171 throughput := t.capacity[kind] * float64(targetRTT) / float64(time.Second) 172 173 // Return an overestimation to force the peer out of a stuck minima, adding 174 // +1 in case the item count is too low for the overestimator to dent 175 return roundCapacity(1 + capacityOverestimation*throughput) 176 } 177 178 // roundCapacity gives the integer value of a capacity. 179 // The result fits int32, and is guaranteed to be positive. 180 func roundCapacity(cap float64) int { 181 const maxInt32 = float64(1<<31 - 1) 182 return int(math.Min(maxInt32, math.Max(1, math.Ceil(cap)))) 183 } 184 185 // Update modifies the peer's capacity values for a specific data type with a new 186 // measurement. If the delivery is zero, the peer is assumed to have either timed 187 // out or to not have the requested data, resulting in a slash to 0 capacity. This 188 // avoids assigning the peer retrievals that it won't be able to honour. 189 func (t *Tracker) Update(kind uint64, elapsed time.Duration, items int) { 190 t.lock.Lock() 191 defer t.lock.Unlock() 192 193 // If nothing was delivered (timeout / unavailable data), reduce throughput 194 // to minimum 195 if items == 0 { 196 t.capacity[kind] = 0 197 return 198 } 199 // Otherwise update the throughput with a new measurement 200 if elapsed <= 0 { 201 elapsed = 1 // +1 (ns) to ensure non-zero divisor 202 } 203 measured := float64(items) / (float64(elapsed) / float64(time.Second)) 204 205 t.capacity[kind] = (1-measurementImpact)*(t.capacity[kind]) + measurementImpact*measured 206 t.roundtrip = time.Duration((1-measurementImpact)*float64(t.roundtrip) + measurementImpact*float64(elapsed)) 207 } 208 209 // Trackers is a set of message rate trackers across a number of peers with the 210 // goal of aggregating certain measurements across the entire set for outlier 211 // filtering and newly joining initialization. 212 type Trackers struct { 213 trackers map[string]*Tracker 214 215 // roundtrip is the current best guess as to what is a stable round trip time 216 // across the entire collection of connected peers. This is derived from the 217 // various trackers added, but is used as a cache to avoid recomputing on each 218 // network request. The value is updated once every RTT to avoid fluctuations 219 // caused by hiccups or peer events. 220 roundtrip time.Duration 221 222 // confidence represents the probability that the estimated roundtrip value 223 // is the real one across all our peers. The confidence value is used as an 224 // impact factor of new measurements on old estimates. As our connectivity 225 // stabilizes, this value gravitates towards 1, new measurements havinng 226 // almost no impact. If there's a large peer churn and few peers, then new 227 // measurements will impact it more. The confidence is increased with every 228 // packet and dropped with every new connection. 229 confidence float64 230 231 // tuned is the time instance the tracker recalculated its cached roundtrip 232 // value and confidence values. A cleaner way would be to have a heartbeat 233 // goroutine do it regularly, but that requires a lot of maintenance to just 234 // run every now and again. 235 tuned time.Time 236 237 // The fields below can be used to override certain default values. Their 238 // purpose is to allow quicker tests. Don't use them in production. 239 OverrideTTLLimit time.Duration 240 241 log log.Logger 242 lock sync.RWMutex 243 } 244 245 // NewTrackers creates an empty set of trackers to be filled with peers. 246 func NewTrackers(log log.Logger) *Trackers { 247 return &Trackers{ 248 trackers: make(map[string]*Tracker), 249 roundtrip: rttMaxEstimate, 250 confidence: 1, 251 tuned: time.Now(), 252 OverrideTTLLimit: ttlLimit, 253 log: log, 254 } 255 } 256 257 // Track inserts a new tracker into the set. 258 func (t *Trackers) Track(id string, tracker *Tracker) error { 259 t.lock.Lock() 260 defer t.lock.Unlock() 261 262 if _, ok := t.trackers[id]; ok { 263 return errors.New("already tracking") 264 } 265 t.trackers[id] = tracker 266 t.detune() 267 268 return nil 269 } 270 271 // Untrack stops tracking a previously added peer. 272 func (t *Trackers) Untrack(id string) error { 273 t.lock.Lock() 274 defer t.lock.Unlock() 275 276 if _, ok := t.trackers[id]; !ok { 277 return errors.New("not tracking") 278 } 279 delete(t.trackers, id) 280 return nil 281 } 282 283 // MedianRoundTrip returns the median RTT across all known trackers. The purpose 284 // of the median RTT is to initialize a new peer with sane statistics that it will 285 // hopefully outperform. If it seriously underperforms, there's a risk of dropping 286 // the peer, but that is ok as we're aiming for a strong median. 287 func (t *Trackers) MedianRoundTrip() time.Duration { 288 t.lock.RLock() 289 defer t.lock.RUnlock() 290 291 return t.medianRoundTrip() 292 } 293 294 // medianRoundTrip is the internal lockless version of MedianRoundTrip to be used 295 // by the QoS tuner. 296 func (t *Trackers) medianRoundTrip() time.Duration { 297 // Gather all the currently measured round trip times 298 rtts := make([]float64, 0, len(t.trackers)) 299 for _, tt := range t.trackers { 300 tt.lock.RLock() 301 rtts = append(rtts, float64(tt.roundtrip)) 302 tt.lock.RUnlock() 303 } 304 sort.Float64s(rtts) 305 306 median := rttMaxEstimate 307 if qosTuningPeers <= len(rtts) { 308 median = time.Duration(rtts[qosTuningPeers/2]) // Median of our best few peers 309 } else if len(rtts) > 0 { 310 median = time.Duration(rtts[len(rtts)/2]) // Median of all out connected peers 311 } 312 // Restrict the RTT into some QoS defaults, irrelevant of true RTT 313 if median < rttMinEstimate { 314 median = rttMinEstimate 315 } 316 if median > rttMaxEstimate { 317 median = rttMaxEstimate 318 } 319 return median 320 } 321 322 // MeanCapacities returns the capacities averaged across all the added trackers. 323 // The purpos of the mean capacities are to initialize a new peer with some sane 324 // starting values that it will hopefully outperform. If the mean overshoots, the 325 // peer will be cut back to minimal capacity and given another chance. 326 func (t *Trackers) MeanCapacities() map[uint64]float64 { 327 t.lock.RLock() 328 defer t.lock.RUnlock() 329 330 return t.meanCapacities() 331 } 332 333 // meanCapacities is the internal lockless version of MeanCapacities used for 334 // debug logging. 335 func (t *Trackers) meanCapacities() map[uint64]float64 { 336 capacities := make(map[uint64]float64) 337 for _, tt := range t.trackers { 338 tt.lock.RLock() 339 for key, val := range tt.capacity { 340 capacities[key] += val 341 } 342 tt.lock.RUnlock() 343 } 344 for key, val := range capacities { 345 capacities[key] = val / float64(len(t.trackers)) 346 } 347 return capacities 348 } 349 350 // TargetRoundTrip returns the current target round trip time for a request to 351 // complete in.The returned RTT is slightly under the estimated RTT. The reason 352 // is that message rate estimation is a 2 dimensional problem which is solvable 353 // for any RTT. The goal is to gravitate towards smaller RTTs instead of large 354 // messages, to result in a stabler download stream. 355 func (t *Trackers) TargetRoundTrip() time.Duration { 356 // Recalculate the internal caches if it's been a while 357 t.tune() 358 359 // Caches surely recent, return target roundtrip 360 t.lock.RLock() 361 defer t.lock.RUnlock() 362 363 return time.Duration(float64(t.roundtrip) * rttPushdownFactor) 364 } 365 366 // TargetTimeout returns the timeout allowance for a single request to finish 367 // under. The timeout is proportional to the roundtrip, but also takes into 368 // consideration the tracker's confidence in said roundtrip and scales it 369 // accordingly. The final value is capped to avoid runaway requests. 370 func (t *Trackers) TargetTimeout() time.Duration { 371 // Recalculate the internal caches if it's been a while 372 t.tune() 373 374 // Caches surely recent, return target timeout 375 t.lock.RLock() 376 defer t.lock.RUnlock() 377 378 return t.targetTimeout() 379 } 380 381 // targetTimeout is the internal lockless version of TargetTimeout to be used 382 // during QoS tuning. 383 func (t *Trackers) targetTimeout() time.Duration { 384 timeout := time.Duration(ttlScaling * float64(t.roundtrip) / t.confidence) 385 if timeout > t.OverrideTTLLimit { 386 timeout = t.OverrideTTLLimit 387 } 388 return timeout 389 } 390 391 // tune gathers the individual tracker statistics and updates the estimated 392 // request round trip time. 393 func (t *Trackers) tune() { 394 // Tune may be called concurrently all over the place, but we only want to 395 // periodically update and even then only once. First check if it was updated 396 // recently and abort if so. 397 t.lock.RLock() 398 dirty := time.Since(t.tuned) > t.roundtrip 399 t.lock.RUnlock() 400 if !dirty { 401 return 402 } 403 // If an update is needed, obtain a write lock but make sure we don't update 404 // it on all concurrent threads one by one. 405 t.lock.Lock() 406 defer t.lock.Unlock() 407 408 if dirty := time.Since(t.tuned) > t.roundtrip; !dirty { 409 return // A concurrent request beat us to the tuning 410 } 411 // First thread reaching the tuning point, update the estimates and return 412 t.roundtrip = time.Duration((1-tuningImpact)*float64(t.roundtrip) + tuningImpact*float64(t.medianRoundTrip())) 413 t.confidence = t.confidence + (1-t.confidence)/2 414 415 t.tuned = time.Now() 416 t.log.Debug("Recalculated msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout(), "next", t.tuned.Add(t.roundtrip)) 417 t.log.Trace("Debug dump of mean capacities", "caps", log.Lazy{Fn: t.meanCapacities}) 418 } 419 420 // detune reduces the tracker's confidence in order to make fresh measurements 421 // have a larger impact on the estimates. It is meant to be used during new peer 422 // connections so they can have a proper impact on the estimates. 423 func (t *Trackers) detune() { 424 // If we have a single peer, confidence is always 1 425 if len(t.trackers) == 1 { 426 t.confidence = 1 427 return 428 } 429 // If we have a ton of peers, don't drop the confidence since there's enough 430 // remaining to retain the same throughput 431 if len(t.trackers) >= tuningConfidenceCap { 432 return 433 } 434 // Otherwise drop the confidence factor 435 peers := float64(len(t.trackers)) 436 437 t.confidence = t.confidence * (peers - 1) / peers 438 if t.confidence < rttMinConfidence { 439 t.confidence = rttMinConfidence 440 } 441 t.log.Debug("Relaxed msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout()) 442 } 443 444 // Capacity is a helper function to access a specific tracker without having to 445 // track it explicitly outside. 446 func (t *Trackers) Capacity(id string, kind uint64, targetRTT time.Duration) int { 447 t.lock.RLock() 448 defer t.lock.RUnlock() 449 450 tracker := t.trackers[id] 451 if tracker == nil { 452 return 1 // Unregister race, don't return 0, it's a dangerous number 453 } 454 return tracker.Capacity(kind, targetRTT) 455 } 456 457 // Update is a helper function to access a specific tracker without having to 458 // track it explicitly outside. 459 func (t *Trackers) Update(id string, kind uint64, elapsed time.Duration, items int) { 460 t.lock.RLock() 461 defer t.lock.RUnlock() 462 463 if tracker := t.trackers[id]; tracker != nil { 464 tracker.Update(kind, elapsed, items) 465 } 466 }