github.com/klaytn/klaytn@v1.12.1/networks/p2p/msgrate/msgrate.go (about) 1 // Modifications Copyright 2022 The klaytn Authors 2 // Copyright 2021 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from p2p/msgrate/msgrate.go (2022/06/29). 19 // Modified and improved for the klaytn development. 20 21 // Package msgrate allows estimating the throughput of peers for more balanced syncs. 22 23 package msgrate 24 25 import ( 26 "errors" 27 "math" 28 "sort" 29 "sync" 30 "time" 31 32 "github.com/klaytn/klaytn/log" 33 ) 34 35 // measurementImpact is the impact a single measurement has on a peer's final 36 // capacity value. A value closer to 0 reacts slower to sudden network changes, 37 // but it is also more stable against temporary hiccups. 0.1 worked well for 38 // most of Ethereum's existence, so might as well go with it. 39 const measurementImpact = 0.1 40 41 // capacityOverestimation is the ratio of items to over-estimate when retrieving 42 // a peer's capacity to avoid locking into a lower value due to never attempting 43 // to fetch more than some local stable value. 44 const capacityOverestimation = 1.01 45 46 // qosTuningPeers is the number of best peers to tune round trip times based on. 47 // An Ethereum node doesn't need hundreds of connections to operate correctly, 48 // so instead of lowering our download speed to the median of potentially many 49 // bad nodes, we can target a smaller set of vey good nodes. At worse this will 50 // result in less nodes to sync from, but that's still better than some hogging 51 // the pipeline. 52 const qosTuningPeers = 5 53 54 // rttMinEstimate is the minimal round trip time to target requests for. Since 55 // every request entails a 2 way latency + bandwidth + serving database lookups, 56 // it should be generous enough to permit meaningful work to be done on top of 57 // the transmission costs. 58 const rttMinEstimate = 2 * time.Second 59 60 // rttMaxEstimate is the maximal round trip time to target requests for. Although 61 // the expectation is that a well connected node will never reach this, certain 62 // special connectivity ones might experience significant delays (e.g. satellite 63 // uplink with 3s RTT). This value should be low enough to forbid stalling the 64 // pipeline too long, but large enough to cover the worst of the worst links. 65 const rttMaxEstimate = 20 * time.Second 66 67 // rttPushdownFactor is a multiplier to attempt forcing quicker requests than 68 // what the message rate tracker estimates. The reason is that message rate 69 // tracking adapts queries to the RTT, but multiple RTT values can be perfectly 70 // valid, they just result in higher packet sizes. Since smaller packets almost 71 // always result in stabler download streams, this factor hones in on the lowest 72 // RTT from all the functional ones. 73 const rttPushdownFactor = 0.9 74 75 // rttMinConfidence is the minimum value the roundtrip confidence factor may drop 76 // to. Since the target timeouts are based on how confident the tracker is in the 77 // true roundtrip, it's important to not allow too huge fluctuations. 78 const rttMinConfidence = 0.1 79 80 // ttlScaling is the multiplier that converts the estimated roundtrip time to a 81 // timeout cap for network requests. The expectation is that peers' response time 82 // will fluctuate around the estimated roundtrip, but depending in their load at 83 // request time, it might be higher than anticipated. This scaling factor ensures 84 // that we allow remote connections some slack but at the same time do enforce a 85 // behavior similar to our median peers. 86 const ttlScaling = 3 87 88 // ttlLimit is the maximum timeout allowance to prevent reaching crazy numbers 89 // if some unforeseen network events shappen. As much as we try to hone in on 90 // the most optimal values, it doesn't make any sense to go above a threshold, 91 // even if everything is slow and screwy. 92 const ttlLimit = time.Minute 93 94 // tuningConfidenceCap is the number of active peers above which to stop detuning 95 // the confidence number. The idea here is that once we hone in on the capacity 96 // of a meaningful number of peers, adding one more should ot have a significant 97 // impact on things, so just ron with the originals. 98 const tuningConfidenceCap = 10 99 100 // tuningImpact is the influence that a new tuning target has on the previously 101 // cached value. This number is mostly just an out-of-the-blue heuristic that 102 // prevents the estimates from jumping around. There's no particular reason for 103 // the current value. 104 const tuningImpact = 0.25 105 106 // Tracker estimates the throughput capacity of a peer with regard to each data 107 // type it can deliver. The goal is to dynamically adjust request sizes to max 108 // out network throughput without overloading either the peer or th elocal node. 109 // 110 // By tracking in real time the latencies and bandiwdths peers exhibit for each 111 // packet type, it's possible to prevent overloading by detecting a slowdown on 112 // one type when another type is pushed too hard. 113 // 114 // Similarly, real time measurements also help avoid overloading the local net 115 // connection if our peers would otherwise be capable to deliver more, but the 116 // local link is saturated. In that case, the live measurements will force us 117 // to reduce request sizes until the throughput gets stable. 118 // 119 // Lastly, message rate measurements allows us to detect if a peer is unsuaully 120 // slow compared to other peers, in which case we can decide to keep it around 121 // or free up the slot so someone closer. 122 // 123 // Since throughput tracking and estimation adapts dynamically to live network 124 // conditions, it's fine to have multiple trackers locally track the same peer 125 // in different subsystem. The throughput will simply be distributed across the 126 // two trackers if both are highly active. 127 type Tracker struct { 128 // capacity is the number of items retrievable per second of a given type. 129 // It is analogous to bandwidth, but we deliberately avoided using bytes 130 // as the unit, since serving nodes also spend a lot of time loading data 131 // from disk, which is linear in the number of items, but mostly constant 132 // in their sizes. 133 // 134 // Callers of course are free to use the item counter as a byte counter if 135 // or when their protocol of choise if capped by bytes instead of items. 136 // (eg. eth.getHeaders vs snap.getAccountRange). 137 capacity map[uint64]float64 138 139 // roundtrip is the latency a peer in general responds to data requests. 140 // This number is not used inside the tracker, but is exposed to compare 141 // peers to each other and filter out slow ones. Note however, it only 142 // makes sense to compare RTTs if the caller caters request sizes for 143 // each peer to target the same RTT. There's no need to make this number 144 // the real networking RTT, we just need a number to compare peers with. 145 roundtrip time.Duration 146 147 lock sync.RWMutex 148 } 149 150 // NewTracker creates a new message rate tracker for a specific peer. An initial 151 // RTT is needed to avoid a peer getting marked as an outlier compared to others 152 // right after joining. It's suggested to use the median rtt across all peers to 153 // init a new peer tracker. 154 func NewTracker(caps map[uint64]float64, rtt time.Duration) *Tracker { 155 if caps == nil { 156 caps = make(map[uint64]float64) 157 } 158 return &Tracker{ 159 capacity: caps, 160 roundtrip: rtt, 161 } 162 } 163 164 // Capacity calculates the number of items the peer is estimated to be able to 165 // retrieve within the alloted time slot. The method will round up any division 166 // errors and will add an additional overestimation ratio on top. The reason for 167 // overshooting the capacity is because certain message types might not increase 168 // the load proportionally to the requested items, so fetching a bit more might 169 // still take the same RTT. By forcefully overshooting by a small amount, we can 170 // avoid locking into a lower-that-real capacity. 171 func (t *Tracker) Capacity(kind uint64, targetRTT time.Duration) int { 172 t.lock.RLock() 173 defer t.lock.RUnlock() 174 175 // Calculate the actual measured throughput 176 throughput := t.capacity[kind] * float64(targetRTT) / float64(time.Second) 177 178 // Return an overestimation to force the peer out of a stuck minima, adding 179 // +1 in case the item count is too low for the overestimator to dent 180 return roundCapacity(1 + capacityOverestimation*throughput) 181 } 182 183 // roundCapacity gives the integer value of a capacity. 184 // The result fits int32, and is guaranteed to be positive. 185 func roundCapacity(cap float64) int { 186 const maxInt32 = float64(1<<31 - 1) 187 return int(math.Min(maxInt32, math.Max(1, math.Ceil(cap)))) 188 } 189 190 // Update modifies the peer's capacity values for a specific data type with a new 191 // measurement. If the delivery is zero, the peer is assumed to have either timed 192 // out or to not have the requested data, resulting in a slash to 0 capacity. This 193 // avoids assigning the peer retrievals that it won't be able to honour. 194 func (t *Tracker) Update(kind uint64, elapsed time.Duration, items int) { 195 t.lock.Lock() 196 defer t.lock.Unlock() 197 198 // If nothing was delivered (timeout / unavailable data), reduce throughput 199 // to minimum 200 if items == 0 { 201 t.capacity[kind] = 0 202 return 203 } 204 // Otherwise update the throughput with a new measurement 205 if elapsed <= 0 { 206 elapsed = 1 // +1 (ns) to ensure non-zero divisor 207 } 208 measured := float64(items) / (float64(elapsed) / float64(time.Second)) 209 210 t.capacity[kind] = (1-measurementImpact)*(t.capacity[kind]) + measurementImpact*measured 211 t.roundtrip = time.Duration((1-measurementImpact)*float64(t.roundtrip) + measurementImpact*float64(elapsed)) 212 } 213 214 // Trackers is a set of message rate trackers across a number of peers with the 215 // goal of aggregating certain measurements across the entire set for outlier 216 // filtering and newly joining initialization. 217 type Trackers struct { 218 trackers map[string]*Tracker 219 220 // roundtrip is the current best guess as to what is a stable round trip time 221 // across the entire collection of connected peers. This is derived from the 222 // various trackers added, but is used as a cache to avoid recomputing on each 223 // network request. The value is updated once every RTT to avoid fluctuations 224 // caused by hiccups or peer events. 225 roundtrip time.Duration 226 227 // confidence represents the probability that the estimated roundtrip value 228 // is the real one across all our peers. The confidence value is used as an 229 // impact factor of new measurements on old estimates. As our connectivity 230 // stabilizes, this value gravitates towards 1, new measurements havinng 231 // almost no impact. If there's a large peer churn and few peers, then new 232 // measurements will impact it more. The confidence is increased with every 233 // packet and dropped with every new connection. 234 confidence float64 235 236 // tuned is the time instance the tracker recalculated its cached roundtrip 237 // value and confidence values. A cleaner way would be to have a heartbeat 238 // goroutine do it regularly, but that requires a lot of maintenance to just 239 // run every now and again. 240 tuned time.Time 241 242 // The fields below can be used to override certain default values. Their 243 // purpose is to allow quicker tests. Don't use them in production. 244 OverrideTTLLimit time.Duration 245 246 log log.Logger 247 lock sync.RWMutex 248 } 249 250 // NewTrackers creates an empty set of trackers to be filled with peers. 251 func NewTrackers(log log.Logger) *Trackers { 252 return &Trackers{ 253 trackers: make(map[string]*Tracker), 254 roundtrip: rttMaxEstimate, 255 confidence: 1, 256 tuned: time.Now(), 257 OverrideTTLLimit: ttlLimit, 258 log: log, 259 } 260 } 261 262 // Track inserts a new tracker into the set. 263 func (t *Trackers) Track(id string, tracker *Tracker) error { 264 t.lock.Lock() 265 defer t.lock.Unlock() 266 267 if _, ok := t.trackers[id]; ok { 268 return errors.New("already tracking") 269 } 270 t.trackers[id] = tracker 271 t.detune() 272 273 return nil 274 } 275 276 // Untrack stops tracking a previously added peer. 277 func (t *Trackers) Untrack(id string) error { 278 t.lock.Lock() 279 defer t.lock.Unlock() 280 281 if _, ok := t.trackers[id]; !ok { 282 return errors.New("not tracking") 283 } 284 delete(t.trackers, id) 285 return nil 286 } 287 288 // MedianRoundTrip returns the median RTT across all known trackers. The purpose 289 // of the median RTT is to initialize a new peer with sane statistics that it will 290 // hopefully outperform. If it seriously underperforms, there's a risk of dropping 291 // the peer, but that is ok as we're aiming for a strong median. 292 func (t *Trackers) MedianRoundTrip() time.Duration { 293 t.lock.RLock() 294 defer t.lock.RUnlock() 295 296 return t.medianRoundTrip() 297 } 298 299 // medianRoundTrip is the internal lockless version of MedianRoundTrip to be used 300 // by the QoS tuner. 301 func (t *Trackers) medianRoundTrip() time.Duration { 302 // Gather all the currently measured round trip times 303 rtts := make([]float64, 0, len(t.trackers)) 304 for _, tt := range t.trackers { 305 tt.lock.RLock() 306 rtts = append(rtts, float64(tt.roundtrip)) 307 tt.lock.RUnlock() 308 } 309 sort.Float64s(rtts) 310 311 median := rttMaxEstimate 312 if qosTuningPeers <= len(rtts) { 313 median = time.Duration(rtts[qosTuningPeers/2]) // Median of our best few peers 314 } else if len(rtts) > 0 { 315 median = time.Duration(rtts[len(rtts)/2]) // Median of all out connected peers 316 } 317 // Restrict the RTT into some QoS defaults, irrelevant of true RTT 318 if median < rttMinEstimate { 319 median = rttMinEstimate 320 } 321 if median > rttMaxEstimate { 322 median = rttMaxEstimate 323 } 324 return median 325 } 326 327 // MeanCapacities returns the capacities averaged across all the added trackers. 328 // The purpos of the mean capacities are to initialize a new peer with some sane 329 // starting values that it will hopefully outperform. If the mean overshoots, the 330 // peer will be cut back to minimal capacity and given another chance. 331 func (t *Trackers) MeanCapacities() map[uint64]float64 { 332 t.lock.RLock() 333 defer t.lock.RUnlock() 334 335 return t.meanCapacities() 336 } 337 338 // meanCapacities is the internal lockless version of MeanCapacities used for 339 // debug logging. 340 func (t *Trackers) meanCapacities() map[uint64]float64 { 341 capacities := make(map[uint64]float64) 342 for _, tt := range t.trackers { 343 tt.lock.RLock() 344 for key, val := range tt.capacity { 345 capacities[key] += val 346 } 347 tt.lock.RUnlock() 348 } 349 for key, val := range capacities { 350 capacities[key] = val / float64(len(t.trackers)) 351 } 352 return capacities 353 } 354 355 // TargetRoundTrip returns the current target round trip time for a request to 356 // complete in.The returned RTT is slightly under the estimated RTT. The reason 357 // is that message rate estimation is a 2 dimensional problem which is solvable 358 // for any RTT. The goal is to gravitate towards smaller RTTs instead of large 359 // messages, to result in a stabler download stream. 360 func (t *Trackers) TargetRoundTrip() time.Duration { 361 // Recalculate the internal caches if it's been a while 362 t.tune() 363 364 // Caches surely recent, return target roundtrip 365 t.lock.RLock() 366 defer t.lock.RUnlock() 367 368 return time.Duration(float64(t.roundtrip) * rttPushdownFactor) 369 } 370 371 // TargetTimeout returns the timeout allowance for a single request to finish 372 // under. The timeout is proportional to the roundtrip, but also takes into 373 // consideration the tracker's confidence in said roundtrip and scales it 374 // accordingly. The final value is capped to avoid runaway requests. 375 func (t *Trackers) TargetTimeout() time.Duration { 376 // Recalculate the internal caches if it's been a while 377 t.tune() 378 379 // Caches surely recent, return target timeout 380 t.lock.RLock() 381 defer t.lock.RUnlock() 382 383 return t.targetTimeout() 384 } 385 386 // targetTimeout is the internal lockless version of TargetTimeout to be used 387 // during QoS tuning. 388 func (t *Trackers) targetTimeout() time.Duration { 389 timeout := time.Duration(ttlScaling * float64(t.roundtrip) / t.confidence) 390 if timeout > t.OverrideTTLLimit { 391 timeout = t.OverrideTTLLimit 392 } 393 return timeout 394 } 395 396 // tune gathers the individual tracker statistics and updates the estimated 397 // request round trip time. 398 func (t *Trackers) tune() { 399 // Tune may be called concurrently all over the place, but we only want to 400 // periodically update and even then only once. First check if it was updated 401 // recently and abort if so. 402 t.lock.RLock() 403 dirty := time.Since(t.tuned) > t.roundtrip 404 t.lock.RUnlock() 405 if !dirty { 406 return 407 } 408 // If an update is needed, obtain a write lock but make sure we don't update 409 // it on all concurrent threads one by one. 410 t.lock.Lock() 411 defer t.lock.Unlock() 412 413 if dirty := time.Since(t.tuned) > t.roundtrip; !dirty { 414 return // A concurrent request beat us to the tuning 415 } 416 // First thread reaching the tuning point, update the estimates and return 417 t.roundtrip = time.Duration((1-tuningImpact)*float64(t.roundtrip) + tuningImpact*float64(t.medianRoundTrip())) 418 t.confidence = t.confidence + (1-t.confidence)/2 419 420 t.tuned = time.Now() 421 t.log.Debug("Recalculated msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout(), "next", t.tuned.Add(t.roundtrip)) 422 t.log.Trace("Debug dump of mean capacities", "caps", log.Lazy{Fn: t.meanCapacities}) 423 } 424 425 // detune reduces the tracker's confidence in order to make fresh measurements 426 // have a larger impact on the estimates. It is meant to be used during new peer 427 // connections so they can have a proper impact on the estimates. 428 func (t *Trackers) detune() { 429 // If we have a single peer, confidence is always 1 430 if len(t.trackers) == 1 { 431 t.confidence = 1 432 return 433 } 434 // If we have a ton of peers, don't drop the confidence since there's enough 435 // remaining to retain the same throughput 436 if len(t.trackers) >= tuningConfidenceCap { 437 return 438 } 439 // Otherwise drop the confidence factor 440 peers := float64(len(t.trackers)) 441 442 t.confidence = t.confidence * (peers - 1) / peers 443 if t.confidence < rttMinConfidence { 444 t.confidence = rttMinConfidence 445 } 446 t.log.Debug("Relaxed msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout()) 447 } 448 449 // Capacity is a helper function to access a specific tracker without having to 450 // track it explicitly outside. 451 func (t *Trackers) Capacity(id string, kind uint64, targetRTT time.Duration) int { 452 t.lock.RLock() 453 defer t.lock.RUnlock() 454 455 tracker := t.trackers[id] 456 if tracker == nil { 457 return 1 // Unregister race, don't return 0, it's a dangerous number 458 } 459 return tracker.Capacity(kind, targetRTT) 460 } 461 462 // Update is a helper function to access a specific tracker without having to 463 // track it explicitly outside. 464 func (t *Trackers) Update(id string, kind uint64, elapsed time.Duration, items int) { 465 t.lock.RLock() 466 defer t.lock.RUnlock() 467 468 if tracker := t.trackers[id]; tracker != nil { 469 tracker.Update(kind, elapsed, items) 470 } 471 }