github.com/klaytn/klaytn@v1.12.1/networks/p2p/msgrate/msgrate.go

github.com/klaytn/klaytn@v1.12.1/networks/p2p/msgrate/msgrate.go (about)

     1  // Modifications Copyright 2022 The klaytn Authors
     2  // Copyright 2021 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from p2p/msgrate/msgrate.go (2022/06/29).
    19  // Modified and improved for the klaytn development.
    20  
    21  // Package msgrate allows estimating the throughput of peers for more balanced syncs.
    22  
    23  package msgrate
    24  
    25  import (
    26  	"errors"
    27  	"math"
    28  	"sort"
    29  	"sync"
    30  	"time"
    31  
    32  	"github.com/klaytn/klaytn/log"
    33  )
    34  
    35  // measurementImpact is the impact a single measurement has on a peer's final
    36  // capacity value. A value closer to 0 reacts slower to sudden network changes,
    37  // but it is also more stable against temporary hiccups. 0.1 worked well for
    38  // most of Ethereum's existence, so might as well go with it.
    39  const measurementImpact = 0.1
    40  
    41  // capacityOverestimation is the ratio of items to over-estimate when retrieving
    42  // a peer's capacity to avoid locking into a lower value due to never attempting
    43  // to fetch more than some local stable value.
    44  const capacityOverestimation = 1.01
    45  
    46  // qosTuningPeers is the number of best peers to tune round trip times based on.
    47  // An Ethereum node doesn't need hundreds of connections to operate correctly,
    48  // so instead of lowering our download speed to the median of potentially many
    49  // bad nodes, we can target a smaller set of vey good nodes. At worse this will
    50  // result in less nodes to sync from, but that's still better than some hogging
    51  // the pipeline.
    52  const qosTuningPeers = 5
    53  
    54  // rttMinEstimate is the minimal round trip time to target requests for. Since
    55  // every request entails a 2 way latency + bandwidth + serving database lookups,
    56  // it should be generous enough to permit meaningful work to be done on top of
    57  // the transmission costs.
    58  const rttMinEstimate = 2 * time.Second
    59  
    60  // rttMaxEstimate is the maximal round trip time to target requests for. Although
    61  // the expectation is that a well connected node will never reach this, certain
    62  // special connectivity ones might experience significant delays (e.g. satellite
    63  // uplink with 3s RTT). This value should be low enough to forbid stalling the
    64  // pipeline too long, but large enough to cover the worst of the worst links.
    65  const rttMaxEstimate = 20 * time.Second
    66  
    67  // rttPushdownFactor is a multiplier to attempt forcing quicker requests than
    68  // what the message rate tracker estimates. The reason is that message rate
    69  // tracking adapts queries to the RTT, but multiple RTT values can be perfectly
    70  // valid, they just result in higher packet sizes. Since smaller packets almost
    71  // always result in stabler download streams, this factor hones in on the lowest
    72  // RTT from all the functional ones.
    73  const rttPushdownFactor = 0.9
    74  
    75  // rttMinConfidence is the minimum value the roundtrip confidence factor may drop
    76  // to. Since the target timeouts are based on how confident the tracker is in the
    77  // true roundtrip, it's important to not allow too huge fluctuations.
    78  const rttMinConfidence = 0.1
    79  
    80  // ttlScaling is the multiplier that converts the estimated roundtrip time to a
    81  // timeout cap for network requests. The expectation is that peers' response time
    82  // will fluctuate around the estimated roundtrip, but depending in their load at
    83  // request time, it might be higher than anticipated. This scaling factor ensures
    84  // that we allow remote connections some slack but at the same time do enforce a
    85  // behavior similar to our median peers.
    86  const ttlScaling = 3
    87  
    88  // ttlLimit is the maximum timeout allowance to prevent reaching crazy numbers
    89  // if some unforeseen network events shappen. As much as we try to hone in on
    90  // the most optimal values, it doesn't make any sense to go above a threshold,
    91  // even if everything is slow and screwy.
    92  const ttlLimit = time.Minute
    93  
    94  // tuningConfidenceCap is the number of active peers above which to stop detuning
    95  // the confidence number. The idea here is that once we hone in on the capacity
    96  // of a meaningful number of peers, adding one more should ot have a significant
    97  // impact on things, so just ron with the originals.
    98  const tuningConfidenceCap = 10
    99  
   100  // tuningImpact is the influence that a new tuning target has on the previously
   101  // cached value. This number is mostly just an out-of-the-blue heuristic that
   102  // prevents the estimates from jumping around. There's no particular reason for
   103  // the current value.
   104  const tuningImpact = 0.25
   105  
   106  // Tracker estimates the throughput capacity of a peer with regard to each data
   107  // type it can deliver. The goal is to dynamically adjust request sizes to max
   108  // out network throughput without overloading either the peer or th elocal node.
   109  //
   110  // By tracking in real time the latencies and bandiwdths peers exhibit for each
   111  // packet type, it's possible to prevent overloading by detecting a slowdown on
   112  // one type when another type is pushed too hard.
   113  //
   114  // Similarly, real time measurements also help avoid overloading the local net
   115  // connection if our peers would otherwise be capable to deliver more, but the
   116  // local link is saturated. In that case, the live measurements will force us
   117  // to reduce request sizes until the throughput gets stable.
   118  //
   119  // Lastly, message rate measurements allows us to detect if a peer is unsuaully
   120  // slow compared to other peers, in which case we can decide to keep it around
   121  // or free up the slot so someone closer.
   122  //
   123  // Since throughput tracking and estimation adapts dynamically to live network
   124  // conditions, it's fine to have multiple trackers locally track the same peer
   125  // in different subsystem. The throughput will simply be distributed across the
   126  // two trackers if both are highly active.
   127  type Tracker struct {
   128  	// capacity is the number of items retrievable per second of a given type.
   129  	// It is analogous to bandwidth, but we deliberately avoided using bytes
   130  	// as the unit, since serving nodes also spend a lot of time loading data
   131  	// from disk, which is linear in the number of items, but mostly constant
   132  	// in their sizes.
   133  	//
   134  	// Callers of course are free to use the item counter as a byte counter if
   135  	// or when their protocol of choise if capped by bytes instead of items.
   136  	// (eg. eth.getHeaders vs snap.getAccountRange).
   137  	capacity map[uint64]float64
   138  
   139  	// roundtrip is the latency a peer in general responds to data requests.
   140  	// This number is not used inside the tracker, but is exposed to compare
   141  	// peers to each other and filter out slow ones. Note however, it only
   142  	// makes sense to compare RTTs if the caller caters request sizes for
   143  	// each peer to target the same RTT. There's no need to make this number
   144  	// the real networking RTT, we just need a number to compare peers with.
   145  	roundtrip time.Duration
   146  
   147  	lock sync.RWMutex
   148  }
   149  
   150  // NewTracker creates a new message rate tracker for a specific peer. An initial
   151  // RTT is needed to avoid a peer getting marked as an outlier compared to others
   152  // right after joining. It's suggested to use the median rtt across all peers to
   153  // init a new peer tracker.
   154  func NewTracker(caps map[uint64]float64, rtt time.Duration) *Tracker {
   155  	if caps == nil {
   156  		caps = make(map[uint64]float64)
   157  	}
   158  	return &Tracker{
   159  		capacity:  caps,
   160  		roundtrip: rtt,
   161  	}
   162  }
   163  
   164  // Capacity calculates the number of items the peer is estimated to be able to
   165  // retrieve within the alloted time slot. The method will round up any division
   166  // errors and will add an additional overestimation ratio on top. The reason for
   167  // overshooting the capacity is because certain message types might not increase
   168  // the load proportionally to the requested items, so fetching a bit more might
   169  // still take the same RTT. By forcefully overshooting by a small amount, we can
   170  // avoid locking into a lower-that-real capacity.
   171  func (t *Tracker) Capacity(kind uint64, targetRTT time.Duration) int {
   172  	t.lock.RLock()
   173  	defer t.lock.RUnlock()
   174  
   175  	// Calculate the actual measured throughput
   176  	throughput := t.capacity[kind] * float64(targetRTT) / float64(time.Second)
   177  
   178  	// Return an overestimation to force the peer out of a stuck minima, adding
   179  	// +1 in case the item count is too low for the overestimator to dent
   180  	return roundCapacity(1 + capacityOverestimation*throughput)
   181  }
   182  
   183  // roundCapacity gives the integer value of a capacity.
   184  // The result fits int32, and is guaranteed to be positive.
   185  func roundCapacity(cap float64) int {
   186  	const maxInt32 = float64(1<<31 - 1)
   187  	return int(math.Min(maxInt32, math.Max(1, math.Ceil(cap))))
   188  }
   189  
   190  // Update modifies the peer's capacity values for a specific data type with a new
   191  // measurement. If the delivery is zero, the peer is assumed to have either timed
   192  // out or to not have the requested data, resulting in a slash to 0 capacity. This
   193  // avoids assigning the peer retrievals that it won't be able to honour.
   194  func (t *Tracker) Update(kind uint64, elapsed time.Duration, items int) {
   195  	t.lock.Lock()
   196  	defer t.lock.Unlock()
   197  
   198  	// If nothing was delivered (timeout / unavailable data), reduce throughput
   199  	// to minimum
   200  	if items == 0 {
   201  		t.capacity[kind] = 0
   202  		return
   203  	}
   204  	// Otherwise update the throughput with a new measurement
   205  	if elapsed <= 0 {
   206  		elapsed = 1 // +1 (ns) to ensure non-zero divisor
   207  	}
   208  	measured := float64(items) / (float64(elapsed) / float64(time.Second))
   209  
   210  	t.capacity[kind] = (1-measurementImpact)*(t.capacity[kind]) + measurementImpact*measured
   211  	t.roundtrip = time.Duration((1-measurementImpact)*float64(t.roundtrip) + measurementImpact*float64(elapsed))
   212  }
   213  
   214  // Trackers is a set of message rate trackers across a number of peers with the
   215  // goal of aggregating certain measurements across the entire set for outlier
   216  // filtering and newly joining initialization.
   217  type Trackers struct {
   218  	trackers map[string]*Tracker
   219  
   220  	// roundtrip is the current best guess as to what is a stable round trip time
   221  	// across the entire collection of connected peers. This is derived from the
   222  	// various trackers added, but is used as a cache to avoid recomputing on each
   223  	// network request. The value is updated once every RTT to avoid fluctuations
   224  	// caused by hiccups or peer events.
   225  	roundtrip time.Duration
   226  
   227  	// confidence represents the probability that the estimated roundtrip value
   228  	// is the real one across all our peers. The confidence value is used as an
   229  	// impact factor of new measurements on old estimates. As our connectivity
   230  	// stabilizes, this value gravitates towards 1, new measurements havinng
   231  	// almost no impact. If there's a large peer churn and few peers, then new
   232  	// measurements will impact it more. The confidence is increased with every
   233  	// packet and dropped with every new connection.
   234  	confidence float64
   235  
   236  	// tuned is the time instance the tracker recalculated its cached roundtrip
   237  	// value and confidence values. A cleaner way would be to have a heartbeat
   238  	// goroutine do it regularly, but that requires a lot of maintenance to just
   239  	// run every now and again.
   240  	tuned time.Time
   241  
   242  	// The fields below can be used to override certain default values. Their
   243  	// purpose is to allow quicker tests. Don't use them in production.
   244  	OverrideTTLLimit time.Duration
   245  
   246  	log  log.Logger
   247  	lock sync.RWMutex
   248  }
   249  
   250  // NewTrackers creates an empty set of trackers to be filled with peers.
   251  func NewTrackers(log log.Logger) *Trackers {
   252  	return &Trackers{
   253  		trackers:         make(map[string]*Tracker),
   254  		roundtrip:        rttMaxEstimate,
   255  		confidence:       1,
   256  		tuned:            time.Now(),
   257  		OverrideTTLLimit: ttlLimit,
   258  		log:              log,
   259  	}
   260  }
   261  
   262  // Track inserts a new tracker into the set.
   263  func (t *Trackers) Track(id string, tracker *Tracker) error {
   264  	t.lock.Lock()
   265  	defer t.lock.Unlock()
   266  
   267  	if _, ok := t.trackers[id]; ok {
   268  		return errors.New("already tracking")
   269  	}
   270  	t.trackers[id] = tracker
   271  	t.detune()
   272  
   273  	return nil
   274  }
   275  
   276  // Untrack stops tracking a previously added peer.
   277  func (t *Trackers) Untrack(id string) error {
   278  	t.lock.Lock()
   279  	defer t.lock.Unlock()
   280  
   281  	if _, ok := t.trackers[id]; !ok {
   282  		return errors.New("not tracking")
   283  	}
   284  	delete(t.trackers, id)
   285  	return nil
   286  }
   287  
   288  // MedianRoundTrip returns the median RTT across all known trackers. The purpose
   289  // of the median RTT is to initialize a new peer with sane statistics that it will
   290  // hopefully outperform. If it seriously underperforms, there's a risk of dropping
   291  // the peer, but that is ok as we're aiming for a strong median.
   292  func (t *Trackers) MedianRoundTrip() time.Duration {
   293  	t.lock.RLock()
   294  	defer t.lock.RUnlock()
   295  
   296  	return t.medianRoundTrip()
   297  }
   298  
   299  // medianRoundTrip is the internal lockless version of MedianRoundTrip to be used
   300  // by the QoS tuner.
   301  func (t *Trackers) medianRoundTrip() time.Duration {
   302  	// Gather all the currently measured round trip times
   303  	rtts := make([]float64, 0, len(t.trackers))
   304  	for _, tt := range t.trackers {
   305  		tt.lock.RLock()
   306  		rtts = append(rtts, float64(tt.roundtrip))
   307  		tt.lock.RUnlock()
   308  	}
   309  	sort.Float64s(rtts)
   310  
   311  	median := rttMaxEstimate
   312  	if qosTuningPeers <= len(rtts) {
   313  		median = time.Duration(rtts[qosTuningPeers/2]) // Median of our best few peers
   314  	} else if len(rtts) > 0 {
   315  		median = time.Duration(rtts[len(rtts)/2]) // Median of all out connected peers
   316  	}
   317  	// Restrict the RTT into some QoS defaults, irrelevant of true RTT
   318  	if median < rttMinEstimate {
   319  		median = rttMinEstimate
   320  	}
   321  	if median > rttMaxEstimate {
   322  		median = rttMaxEstimate
   323  	}
   324  	return median
   325  }
   326  
   327  // MeanCapacities returns the capacities averaged across all the added trackers.
   328  // The purpos of the mean capacities are to initialize a new peer with some sane
   329  // starting values that it will hopefully outperform. If the mean overshoots, the
   330  // peer will be cut back to minimal capacity and given another chance.
   331  func (t *Trackers) MeanCapacities() map[uint64]float64 {
   332  	t.lock.RLock()
   333  	defer t.lock.RUnlock()
   334  
   335  	return t.meanCapacities()
   336  }
   337  
   338  // meanCapacities is the internal lockless version of MeanCapacities used for
   339  // debug logging.
   340  func (t *Trackers) meanCapacities() map[uint64]float64 {
   341  	capacities := make(map[uint64]float64)
   342  	for _, tt := range t.trackers {
   343  		tt.lock.RLock()
   344  		for key, val := range tt.capacity {
   345  			capacities[key] += val
   346  		}
   347  		tt.lock.RUnlock()
   348  	}
   349  	for key, val := range capacities {
   350  		capacities[key] = val / float64(len(t.trackers))
   351  	}
   352  	return capacities
   353  }
   354  
   355  // TargetRoundTrip returns the current target round trip time for a request to
   356  // complete in.The returned RTT is slightly under the estimated RTT. The reason
   357  // is that message rate estimation is a 2 dimensional problem which is solvable
   358  // for any RTT. The goal is to gravitate towards smaller RTTs instead of large
   359  // messages, to result in a stabler download stream.
   360  func (t *Trackers) TargetRoundTrip() time.Duration {
   361  	// Recalculate the internal caches if it's been a while
   362  	t.tune()
   363  
   364  	// Caches surely recent, return target roundtrip
   365  	t.lock.RLock()
   366  	defer t.lock.RUnlock()
   367  
   368  	return time.Duration(float64(t.roundtrip) * rttPushdownFactor)
   369  }
   370  
   371  // TargetTimeout returns the timeout allowance for a single request to finish
   372  // under. The timeout is proportional to the roundtrip, but also takes into
   373  // consideration the tracker's confidence in said roundtrip and scales it
   374  // accordingly. The final value is capped to avoid runaway requests.
   375  func (t *Trackers) TargetTimeout() time.Duration {
   376  	// Recalculate the internal caches if it's been a while
   377  	t.tune()
   378  
   379  	// Caches surely recent, return target timeout
   380  	t.lock.RLock()
   381  	defer t.lock.RUnlock()
   382  
   383  	return t.targetTimeout()
   384  }
   385  
   386  // targetTimeout is the internal lockless version of TargetTimeout to be used
   387  // during QoS tuning.
   388  func (t *Trackers) targetTimeout() time.Duration {
   389  	timeout := time.Duration(ttlScaling * float64(t.roundtrip) / t.confidence)
   390  	if timeout > t.OverrideTTLLimit {
   391  		timeout = t.OverrideTTLLimit
   392  	}
   393  	return timeout
   394  }
   395  
   396  // tune gathers the individual tracker statistics and updates the estimated
   397  // request round trip time.
   398  func (t *Trackers) tune() {
   399  	// Tune may be called concurrently all over the place, but we only want to
   400  	// periodically update and even then only once. First check if it was updated
   401  	// recently and abort if so.
   402  	t.lock.RLock()
   403  	dirty := time.Since(t.tuned) > t.roundtrip
   404  	t.lock.RUnlock()
   405  	if !dirty {
   406  		return
   407  	}
   408  	// If an update is needed, obtain a write lock but make sure we don't update
   409  	// it on all concurrent threads one by one.
   410  	t.lock.Lock()
   411  	defer t.lock.Unlock()
   412  
   413  	if dirty := time.Since(t.tuned) > t.roundtrip; !dirty {
   414  		return // A concurrent request beat us to the tuning
   415  	}
   416  	// First thread reaching the tuning point, update the estimates and return
   417  	t.roundtrip = time.Duration((1-tuningImpact)*float64(t.roundtrip) + tuningImpact*float64(t.medianRoundTrip()))
   418  	t.confidence = t.confidence + (1-t.confidence)/2
   419  
   420  	t.tuned = time.Now()
   421  	t.log.Debug("Recalculated msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout(), "next", t.tuned.Add(t.roundtrip))
   422  	t.log.Trace("Debug dump of mean capacities", "caps", log.Lazy{Fn: t.meanCapacities})
   423  }
   424  
   425  // detune reduces the tracker's confidence in order to make fresh measurements
   426  // have a larger impact on the estimates. It is meant to be used during new peer
   427  // connections so they can have a proper impact on the estimates.
   428  func (t *Trackers) detune() {
   429  	// If we have a single peer, confidence is always 1
   430  	if len(t.trackers) == 1 {
   431  		t.confidence = 1
   432  		return
   433  	}
   434  	// If we have a ton of peers, don't drop the confidence since there's enough
   435  	// remaining to retain the same throughput
   436  	if len(t.trackers) >= tuningConfidenceCap {
   437  		return
   438  	}
   439  	// Otherwise drop the confidence factor
   440  	peers := float64(len(t.trackers))
   441  
   442  	t.confidence = t.confidence * (peers - 1) / peers
   443  	if t.confidence < rttMinConfidence {
   444  		t.confidence = rttMinConfidence
   445  	}
   446  	t.log.Debug("Relaxed msgrate QoS values", "rtt", t.roundtrip, "confidence", t.confidence, "ttl", t.targetTimeout())
   447  }
   448  
   449  // Capacity is a helper function to access a specific tracker without having to
   450  // track it explicitly outside.
   451  func (t *Trackers) Capacity(id string, kind uint64, targetRTT time.Duration) int {
   452  	t.lock.RLock()
   453  	defer t.lock.RUnlock()
   454  
   455  	tracker := t.trackers[id]
   456  	if tracker == nil {
   457  		return 1 // Unregister race, don't return 0, it's a dangerous number
   458  	}
   459  	return tracker.Capacity(kind, targetRTT)
   460  }
   461  
   462  // Update is a helper function to access a specific tracker without having to
   463  // track it explicitly outside.
   464  func (t *Trackers) Update(id string, kind uint64, elapsed time.Duration, items int) {
   465  	t.lock.RLock()
   466  	defer t.lock.RUnlock()
   467  
   468  	if tracker := t.trackers[id]; tracker != nil {
   469  		tracker.Update(kind, elapsed, items)
   470  	}
   471  }