github.com/dominant-strategies/go-quai@v0.28.2/p2p/msgrate/msgrate.go (about) 1 // Copyright 2021 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package msgrate allows estimating the throughput of peers for more balanced syncs. 18 package msgrate 19 20 import ( 21 "encoding/json" 22 "errors" 23 "math" 24 "sort" 25 "sync" 26 "time" 27 28 "github.com/dominant-strategies/go-quai/log" 29 ) 30 31 // measurementImpact is the impact a single measurement has on a peer's final 32 // capacity value. A value closer to 0 reacts slower to sudden network changes, 33 // but it is also more stable against temporary hiccups. 34 const measurementImpact = 0.1 35 36 // capacityOverestimation is the ratio of items to over-estimate when retrieving 37 // a peer's capacity to avoid locking into a lower value due to never attempting 38 // to fetch more than some local stable value. 39 const capacityOverestimation = 1.01 40 41 // qosTuningPeers is the number of best peers to tune round trip times based on. 42 // A Quai node doesn't need hundreds of connections to operate correctly, 43 // so instead of lowering our download speed to the median of potentially many 44 // bad nodes, we can target a smaller set of vey good nodes. At worse this will 45 // result in less nodes to sync from, but that's still better than some hogging 46 // the pipeline. 47 const qosTuningPeers = 5 48 49 // rttMinEstimate is the minimal round trip time to target requests for. Since 50 // every request entails a 2 way latency + bandwidth + serving database lookups, 51 // it should be generous enough to permit meaningful work to be done on top of 52 // the transmission costs. 53 const rttMinEstimate = 2 * time.Second 54 55 // rttMaxEstimate is the maximal round trip time to target requests for. Although 56 // the expectation is that a well connected node will never reach this, certain 57 // special connectivity ones might experience significant delays (e.g. satellite 58 // uplink with 3s RTT). This value should be low enough to forbid stalling the 59 // pipeline too long, but large enough to cover the worst of the worst links. 60 const rttMaxEstimate = 20 * time.Second 61 62 // rttPushdownFactor is a multiplier to attempt forcing quicker requests than 63 // what the message rate tracker estimates. The reason is that message rate 64 // tracking adapts queries to the RTT, but multiple RTT values can be perfectly 65 // valid, they just result in higher packet sizes. Since smaller packets almost 66 // always result in stabler download streams, this factor hones in on the lowest 67 // RTT from all the functional ones. 68 const rttPushdownFactor = 0.9 69 70 // rttMinConfidence is the minimum value the roundtrip confidence factor may drop 71 // to. Since the target timeouts are based on how confident the tracker is in the 72 // true roundtrip, it's important to not allow too huge fluctuations. 73 const rttMinConfidence = 0.1 74 75 // ttlScaling is the multiplier that converts the estimated roundtrip time to a 76 // timeout cap for network requests. The expectation is that peers' response time 77 // will fluctuate around the estimated roundtrip, but depending in their load at 78 // request time, it might be higher than anticipated. This scaling factor ensures 79 // that we allow remote connections some slack but at the same time do enforce a 80 // behavior similar to our median peers. 81 const ttlScaling = 3 82 83 // ttlLimit is the maximum timeout allowance to prevent reaching crazy numbers 84 // if some unforeseen network events shappen. As much as we try to hone in on 85 // the most optimal values, it doesn't make any sense to go above a threshold, 86 // even if everything is slow and screwy. 87 const ttlLimit = time.Minute 88 89 // tuningConfidenceCap is the number of active peers above which to stop detuning 90 // the confidence number. The idea here is that once we hone in on the capacity 91 // of a meaningful number of peers, adding one more should ot have a significant 92 // impact on things, so just ron with the originals. 93 const tuningConfidenceCap = 10 94 95 // tuningImpact is the influence that a new tuning target has on the previously 96 // cached value. This number is mostly just an out-of-the-blue heuristic that 97 // prevents the estimates from jumping around. There's no particular reason for 98 // the current value. 99 const tuningImpact = 0.25 100 101 // Tracker estimates the throughput capacity of a peer with regard to each data 102 // type it can deliver. The goal is to dynamically adjust request sizes to max 103 // out network throughput without overloading either the peer or th elocal node. 104 // 105 // By tracking in real time the latencies and bandiwdths peers exhibit for each 106 // packet type, it's possible to prevent overloading by detecting a slowdown on 107 // one type when another type is pushed too hard. 108 // 109 // Similarly, real time measurements also help avoid overloading the local net 110 // connection if our peers would otherwise be capable to deliver more, but the 111 // local link is saturated. In that case, the live measurements will force us 112 // to reduce request sizes until the throughput gets stable. 113 // 114 // Lastly, message rate measurements allows us to detect if a peer is unsuaully 115 // slow compared to other peers, in which case we can decide to keep it around 116 // or free up the slot so someone closer. 117 // 118 // Since throughput tracking and estimation adapts dynamically to live network 119 // conditions, it's fine to have multiple trackers locally track the same peer 120 // in different subsystem. The throughput will simply be distributed across the 121 // two trackers if both are highly active. 122 type Tracker struct { 123 // capacity is the number of items retrievable per second of a given type. 124 // It is analogous to bandwidth, but we deliberately avoided using bytes 125 // as the unit, since serving nodes also spend a lot of time loading data 126 // from disk, which is linear in the number of items, but mostly constant 127 // in their sizes. 128 // 129 // Callers of course are free to use the item counter as a byte counter if 130 // or when their protocol of choise if capped by bytes instead of items. 131 // (eg. eth.getHeaders vs snap.getAccountRange). 132 capacity map[uint64]float64 133 134 // roundtrip is the latency a peer in general responds to data requests. 135 // This number is not used inside the tracker, but is exposed to compare 136 // peers to each other and filter out slow ones. Note however, it only 137 // makes sense to compare RTTs if the caller caters request sizes for 138 // each peer to target the same RTT. There's no need to make this number 139 // the real networking RTT, we just need a number to compare peers with. 140 roundtrip time.Duration 141 142 lock sync.RWMutex 143 } 144 145 // NewTracker creates a new message rate tracker for a specific peer. An initial 146 // RTT is needed to avoid a peer getting marked as an outlier compared to others 147 // right after joining. It's suggested to use the median rtt across all peers to 148 // init a new peer tracker. 149 func NewTracker(caps map[uint64]float64, rtt time.Duration) *Tracker { 150 if caps == nil { 151 caps = make(map[uint64]float64) 152 } 153 return &Tracker{ 154 capacity: caps, 155 roundtrip: rtt, 156 } 157 } 158 159 // Capacity calculates the number of items the peer is estimated to be able to 160 // retrieve within the alloted time slot. The method will round up any division 161 // errors and will add an additional overestimation ratio on top. The reason for 162 // overshooting the capacity is because certain message types might not increase 163 // the load proportionally to the requested items, so fetching a bit more might 164 // still take the same RTT. By forcefully overshooting by a small amount, we can 165 // avoid locking into a lower-that-real capacity. 166 func (t *Tracker) Capacity(kind uint64, targetRTT time.Duration) int { 167 t.lock.RLock() 168 defer t.lock.RUnlock() 169 170 // Calculate the actual measured throughput 171 throughput := t.capacity[kind] * float64(targetRTT) / float64(time.Second) 172 173 // Return an overestimation to force the peer out of a stuck minima, adding 174 // +1 in case the item count is too low for the overestimator to dent 175 return roundCapacity(1 + capacityOverestimation*throughput) 176 } 177 178 // Roundtrip returns the estimated roundtrip time of the peer. 179 func (t *Tracker) Roundtrip() time.Duration { 180 t.lock.RLock() 181 defer t.lock.RUnlock() 182 return t.roundtrip 183 } 184 185 // roundCapacity gives the integer value of a capacity. 186 // The result fits int32, and is guaranteed to be positive. 187 func roundCapacity(cap float64) int { 188 const maxInt32 = float64(1<<31 - 1) 189 return int(math.Min(maxInt32, math.Max(1, math.Ceil(cap)))) 190 } 191 192 // Update modifies the peer's capacity values for a specific data type with a new 193 // measurement. If the delivery is zero, the peer is assumed to have either timed 194 // out or to not have the requested data, resulting in a slash to 0 capacity. This 195 // avoids assigning the peer retrievals that it won't be able to honour. 196 func (t *Tracker) Update(kind uint64, elapsed time.Duration, items int) { 197 t.lock.Lock() 198 defer t.lock.Unlock() 199 200 // If nothing was delivered (timeout / unavailable data), reduce throughput 201 // to minimum 202 if items == 0 { 203 t.capacity[kind] = 0 204 return 205 } 206 // Otherwise update the throughput with a new measurement 207 if elapsed <= 0 { 208 elapsed = 1 // +1 (ns) to ensure non-zero divisor 209 } 210 measured := float64(items) / (float64(elapsed) / float64(time.Second)) 211 212 t.capacity[kind] = (1-measurementImpact)*(t.capacity[kind]) + measurementImpact*measured 213 t.roundtrip = time.Duration((1-measurementImpact)*float64(t.roundtrip) + measurementImpact*float64(elapsed)) 214 } 215 216 // Trackers is a set of message rate trackers across a number of peers with the 217 // goal of aggregating certain measurements across the entire set for outlier 218 // filtering and newly joining initialization. 219 type Trackers struct { 220 trackers map[string]*Tracker 221 222 // roundtrip is the current best guess as to what is a stable round trip time 223 // across the entire collection of connected peers. This is derived from the 224 // various trackers added, but is used as a cache to avoid recomputing on each 225 // network request. The value is updated once every RTT to avoid fluctuations 226 // caused by hiccups or peer events. 227 roundtrip time.Duration 228 229 // confidence represents the probability that the estimated roundtrip value 230 // is the real one across all our peers. The confidence value is used as an 231 // impact factor of new measurements on old estimates. As our connectivity 232 // stabilizes, this value gravitates towards 1, new measurements havinng 233 // almost no impact. If there's a large peer churn and few peers, then new 234 // measurements will impact it more. The confidence is increased with every 235 // packet and dropped with every new connection. 236 confidence float64 237 238 // tuned is the time instance the tracker recalculated its cached roundtrip 239 // value and confidence values. A cleaner way would be to have a heartbeat 240 // goroutine do it regularly, but that requires a lot of maintenance to just 241 // run every now and again. 242 tuned time.Time 243 244 // The fields below can be used to override certain default values. Their 245 // purpose is to allow quicker tests. Don't use them in production. 246 OverrideTTLLimit time.Duration 247 248 log *log.Logger 249 lock sync.RWMutex 250 } 251 252 // NewTrackers creates an empty set of trackers to be filled with peers. 253 func NewTrackers(log *log.Logger) *Trackers { 254 return &Trackers{ 255 trackers: make(map[string]*Tracker), 256 roundtrip: rttMaxEstimate, 257 confidence: 1, 258 tuned: time.Now(), 259 OverrideTTLLimit: ttlLimit, 260 log: log, 261 } 262 } 263 264 // Track inserts a new tracker into the set. 265 func (t *Trackers) Track(id string, tracker *Tracker) error { 266 t.lock.Lock() 267 defer t.lock.Unlock() 268 269 if _, ok := t.trackers[id]; ok { 270 return errors.New("already tracking") 271 } 272 t.trackers[id] = tracker 273 t.detune() 274 275 return nil 276 } 277 278 // Untrack stops tracking a previously added peer. 279 func (t *Trackers) Untrack(id string) error { 280 t.lock.Lock() 281 defer t.lock.Unlock() 282 283 if _, ok := t.trackers[id]; !ok { 284 return errors.New("not tracking") 285 } 286 delete(t.trackers, id) 287 return nil 288 } 289 290 // MedianRoundTrip returns the median RTT across all known trackers. The purpose 291 // of the median RTT is to initialize a new peer with sane statistics that it will 292 // hopefully outperform. If it seriously underperforms, there's a risk of dropping 293 // the peer, but that is ok as we're aiming for a strong median. 294 func (t *Trackers) MedianRoundTrip() time.Duration { 295 t.lock.RLock() 296 defer t.lock.RUnlock() 297 298 return t.medianRoundTrip() 299 } 300 301 // medianRoundTrip is the internal lockless version of MedianRoundTrip to be used 302 // by the QoS tuner. 303 func (t *Trackers) medianRoundTrip() time.Duration { 304 // Gather all the currently measured round trip times 305 rtts := make([]float64, 0, len(t.trackers)) 306 for _, tt := range t.trackers { 307 tt.lock.RLock() 308 rtts = append(rtts, float64(tt.roundtrip)) 309 tt.lock.RUnlock() 310 } 311 sort.Float64s(rtts) 312 313 median := rttMaxEstimate 314 if qosTuningPeers <= len(rtts) { 315 median = time.Duration(rtts[qosTuningPeers/2]) // Median of our best few peers 316 } else if len(rtts) > 0 { 317 median = time.Duration(rtts[len(rtts)/2]) // Median of all out connected peers 318 } 319 // Restrict the RTT into some QoS defaults, irrelevant of true RTT 320 if median < rttMinEstimate { 321 median = rttMinEstimate 322 } 323 if median > rttMaxEstimate { 324 median = rttMaxEstimate 325 } 326 return median 327 } 328 329 // MeanCapacities returns the capacities averaged across all the added trackers. 330 // The purpos of the mean capacities are to initialize a new peer with some sane 331 // starting values that it will hopefully outperform. If the mean overshoots, the 332 // peer will be cut back to minimal capacity and given another chance. 333 func (t *Trackers) MeanCapacities() map[uint64]float64 { 334 t.lock.RLock() 335 defer t.lock.RUnlock() 336 337 return t.meanCapacities() 338 } 339 340 // meanCapacities is the internal lockless version of MeanCapacities used for 341 // debug logging. 342 func (t *Trackers) meanCapacities() map[uint64]float64 { 343 capacities := make(map[uint64]float64) 344 for _, tt := range t.trackers { 345 tt.lock.RLock() 346 for key, val := range tt.capacity { 347 capacities[key] += val 348 } 349 tt.lock.RUnlock() 350 } 351 for key, val := range capacities { 352 capacities[key] = val / float64(len(t.trackers)) 353 } 354 return capacities 355 } 356 357 // TargetRoundTrip returns the current target round trip time for a request to 358 // complete in.The returned RTT is slightly under the estimated RTT. The reason 359 // is that message rate estimation is a 2 dimensional problem which is solvable 360 // for any RTT. The goal is to gravitate towards smaller RTTs instead of large 361 // messages, to result in a stabler download stream. 362 func (t *Trackers) TargetRoundTrip() time.Duration { 363 // Recalculate the internal caches if it's been a while 364 t.tune() 365 366 // Caches surely recent, return target roundtrip 367 t.lock.RLock() 368 defer t.lock.RUnlock() 369 370 return time.Duration(float64(t.roundtrip) * rttPushdownFactor) 371 } 372 373 // TargetTimeout returns the timeout allowance for a single request to finish 374 // under. The timeout is proportional to the roundtrip, but also takes into 375 // consideration the tracker's confidence in said roundtrip and scales it 376 // accordingly. The final value is capped to avoid runaway requests. 377 func (t *Trackers) TargetTimeout() time.Duration { 378 // Recalculate the internal caches if it's been a while 379 t.tune() 380 381 // Caches surely recent, return target timeout 382 t.lock.RLock() 383 defer t.lock.RUnlock() 384 385 return t.targetTimeout() 386 } 387 388 // targetTimeout is the internal lockless version of TargetTimeout to be used 389 // during QoS tuning. 390 func (t *Trackers) targetTimeout() time.Duration { 391 timeout := time.Duration(ttlScaling * float64(t.roundtrip) / t.confidence) 392 if timeout > t.OverrideTTLLimit { 393 timeout = t.OverrideTTLLimit 394 } 395 return timeout 396 } 397 398 // tune gathers the individual tracker statistics and updates the estimated 399 // request round trip time. 400 func (t *Trackers) tune() { 401 // Tune may be called concurrently all over the place, but we only want to 402 // periodically update and even then only once. First check if it was updated 403 // recently and abort if so. 404 t.lock.RLock() 405 dirty := time.Since(t.tuned) > t.roundtrip 406 t.lock.RUnlock() 407 if !dirty { 408 return 409 } 410 // If an update is needed, obtain a write lock but make sure we don't update 411 // it on all concurrent threads one by one. 412 t.lock.Lock() 413 defer t.lock.Unlock() 414 415 if dirty := time.Since(t.tuned) > t.roundtrip; !dirty { 416 return // A concurrent request beat us to the tuning 417 } 418 // First thread reaching the tuning point, update the estimates and return 419 t.roundtrip = time.Duration((1-tuningImpact)*float64(t.roundtrip) + tuningImpact*float64(t.medianRoundTrip())) 420 t.confidence = t.confidence + (1-t.confidence)/2 421 422 t.tuned = time.Now() 423 log.Debug("Recalculated msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout(), "next", t.tuned.Add(t.roundtrip)) 424 log.Lazy(func() string { 425 b, _ := json.Marshal(t.meanCapacities()) 426 return string(b) 427 }, "trace") 428 } 429 430 // detune reduces the tracker's confidence in order to make fresh measurements 431 // have a larger impact on the estimates. It is meant to be used during new peer 432 // connections so they can have a proper impact on the estimates. 433 func (t *Trackers) detune() { 434 // If we have a single peer, confidence is always 1 435 if len(t.trackers) == 1 { 436 t.confidence = 1 437 return 438 } 439 // If we have a ton of peers, don't drop the confidence since there's enough 440 // remaining to retain the same throughput 441 if len(t.trackers) >= tuningConfidenceCap { 442 return 443 } 444 // Otherwise drop the confidence factor 445 peers := float64(len(t.trackers)) 446 447 t.confidence = t.confidence * (peers - 1) / peers 448 if t.confidence < rttMinConfidence { 449 t.confidence = rttMinConfidence 450 } 451 log.Debug("Relaxed msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout()) 452 } 453 454 // Capacity is a helper function to access a specific tracker without having to 455 // track it explicitly outside. 456 func (t *Trackers) Capacity(id string, kind uint64, targetRTT time.Duration) int { 457 t.lock.RLock() 458 defer t.lock.RUnlock() 459 460 tracker := t.trackers[id] 461 if tracker == nil { 462 return 1 // Unregister race, don't return 0, it's a dangerous number 463 } 464 return tracker.Capacity(kind, targetRTT) 465 } 466 467 // Update is a helper function to access a specific tracker without having to 468 // track it explicitly outside. 469 func (t *Trackers) Update(id string, kind uint64, elapsed time.Duration, items int) { 470 t.lock.RLock() 471 defer t.lock.RUnlock() 472 473 if tracker := t.trackers[id]; tracker != nil { 474 tracker.Update(kind, elapsed, items) 475 } 476 }