github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_stats.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"math"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    19  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    20  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    21  )
    22  
    23  const (
    24  	replStatsRotateInterval = 5 * time.Minute
    25  	decayFactor             = 0.8
    26  
    27  	// MinStatsDuration defines a lower bound on how long users of replica stats
    28  	// should wait before using those stats for anything. If the duration of a
    29  	// measurement has been less than MinStatsDuration, these methods could easily
    30  	// return outlier/anomalous data.
    31  	MinStatsDuration = 5 * time.Second
    32  )
    33  
    34  type localityOracle func(roachpb.NodeID) string
    35  
    36  // perLocalityCounts maps from the string representation of a locality to count.
    37  type perLocalityCounts map[string]float64
    38  
    39  // replicaStats maintains statistics about the work done by a replica. Its
    40  // initial use is tracking the number of requests received from each
    41  // cluster locality in order to inform lease transfer decisions.
    42  type replicaStats struct {
    43  	clock           *hlc.Clock
    44  	getNodeLocality localityOracle
    45  
    46  	// We use a set of time windows in order to age out old stats without having
    47  	// to do hard resets. The `requests` array is a circular buffer of the last
    48  	// N windows of stats. We rotate through the circular buffer every so often
    49  	// as determined by `replStatsRotateInterval`.
    50  	//
    51  	// We could alternatively use a forward decay approach here, but it would
    52  	// require more memory than this slightly less precise windowing method:
    53  	//   http://dimacs.rutgers.edu/~graham/pubs/papers/fwddecay.pdf
    54  	mu struct {
    55  		syncutil.Mutex
    56  		idx        int
    57  		requests   [6]perLocalityCounts
    58  		lastRotate time.Time
    59  		lastReset  time.Time
    60  	}
    61  }
    62  
    63  func newReplicaStats(clock *hlc.Clock, getNodeLocality localityOracle) *replicaStats {
    64  	rs := &replicaStats{
    65  		clock:           clock,
    66  		getNodeLocality: getNodeLocality,
    67  	}
    68  	rs.mu.requests[rs.mu.idx] = make(perLocalityCounts)
    69  	rs.mu.lastRotate = timeutil.Unix(0, rs.clock.PhysicalNow())
    70  	rs.mu.lastReset = rs.mu.lastRotate
    71  	return rs
    72  }
    73  
    74  // splitRequestCounts divides the current replicaStats object in two for the
    75  // purposes of splitting a range. It modifies itself to have half its requests
    76  // and the provided other to have the other half.
    77  //
    78  // Note that assuming a 50/50 split is optimistic, but it's much better than
    79  // resetting both sides upon a split.
    80  // TODO(a-robinson): Write test for this.
    81  func (rs *replicaStats) splitRequestCounts(other *replicaStats) {
    82  	other.mu.Lock()
    83  	defer other.mu.Unlock()
    84  	rs.mu.Lock()
    85  	defer rs.mu.Unlock()
    86  
    87  	other.mu.idx = rs.mu.idx
    88  	other.mu.lastRotate = rs.mu.lastRotate
    89  	other.mu.lastReset = rs.mu.lastReset
    90  
    91  	for i := range rs.mu.requests {
    92  		if rs.mu.requests[i] == nil {
    93  			other.mu.requests[i] = nil
    94  			continue
    95  		}
    96  		other.mu.requests[i] = make(perLocalityCounts)
    97  		for k := range rs.mu.requests[i] {
    98  			newVal := rs.mu.requests[i][k] / 2.0
    99  			rs.mu.requests[i][k] = newVal
   100  			other.mu.requests[i][k] = newVal
   101  		}
   102  	}
   103  }
   104  
   105  func (rs *replicaStats) record(nodeID roachpb.NodeID) {
   106  	rs.recordCount(1, nodeID)
   107  }
   108  
   109  func (rs *replicaStats) recordCount(count float64, nodeID roachpb.NodeID) {
   110  	var locality string
   111  	if rs.getNodeLocality != nil {
   112  		locality = rs.getNodeLocality(nodeID)
   113  	}
   114  	now := timeutil.Unix(0, rs.clock.PhysicalNow())
   115  
   116  	rs.mu.Lock()
   117  	defer rs.mu.Unlock()
   118  
   119  	rs.maybeRotateLocked(now)
   120  	rs.mu.requests[rs.mu.idx][locality] += count
   121  }
   122  
   123  func (rs *replicaStats) maybeRotateLocked(now time.Time) {
   124  	if now.Sub(rs.mu.lastRotate) >= replStatsRotateInterval {
   125  		rs.rotateLocked()
   126  		rs.mu.lastRotate = now
   127  	}
   128  }
   129  
   130  func (rs *replicaStats) rotateLocked() {
   131  	rs.mu.idx = (rs.mu.idx + 1) % len(rs.mu.requests)
   132  	rs.mu.requests[rs.mu.idx] = make(perLocalityCounts)
   133  }
   134  
   135  // perLocalityDecayingQPS returns the per-locality QPS and the amount of time
   136  // over which the stats were accumulated.
   137  // Note that the QPS stats are exponentially decayed such that newer requests
   138  // are weighted more heavily than older requests.
   139  func (rs *replicaStats) perLocalityDecayingQPS() (perLocalityCounts, time.Duration) {
   140  	now := timeutil.Unix(0, rs.clock.PhysicalNow())
   141  
   142  	rs.mu.Lock()
   143  	defer rs.mu.Unlock()
   144  
   145  	rs.maybeRotateLocked(now)
   146  
   147  	// Use the fraction of time since the last rotation as a smoothing factor to
   148  	// avoid jarring changes in request count immediately before/after a rotation.
   149  	timeSinceRotate := now.Sub(rs.mu.lastRotate)
   150  	fractionOfRotation := float64(timeSinceRotate) / float64(replStatsRotateInterval)
   151  
   152  	counts := make(perLocalityCounts)
   153  	var duration time.Duration
   154  	for i := range rs.mu.requests {
   155  		// We have to add len(rs.mu.requests) to the numerator to avoid getting a
   156  		// negative result from the modulus operation when rs.mu.idx is small.
   157  		requestsIdx := (rs.mu.idx + len(rs.mu.requests) - i) % len(rs.mu.requests)
   158  		if cur := rs.mu.requests[requestsIdx]; cur != nil {
   159  			decay := math.Pow(decayFactor, float64(i)+fractionOfRotation)
   160  			if i == 0 {
   161  				duration += time.Duration(float64(timeSinceRotate) * decay)
   162  			} else {
   163  				duration += time.Duration(float64(replStatsRotateInterval) * decay)
   164  			}
   165  			for k, v := range cur {
   166  				counts[k] += v * decay
   167  			}
   168  		}
   169  	}
   170  
   171  	if duration.Seconds() > 0 {
   172  		for k := range counts {
   173  			counts[k] = counts[k] / duration.Seconds()
   174  		}
   175  	}
   176  	return counts, now.Sub(rs.mu.lastReset)
   177  }
   178  
   179  // avgQPS returns the average requests-per-second and the amount of time
   180  // over which the stat was accumulated. Note that these averages are exact,
   181  // not exponentially decayed (there isn't a ton of justification for going
   182  // one way or the the other, but not decaying makes the average more stable,
   183  // which is probably better for avoiding rebalance thrashing).
   184  func (rs *replicaStats) avgQPS() (float64, time.Duration) {
   185  	now := timeutil.Unix(0, rs.clock.PhysicalNow())
   186  
   187  	rs.mu.Lock()
   188  	defer rs.mu.Unlock()
   189  
   190  	rs.maybeRotateLocked(now)
   191  
   192  	// First accumulate the counts, then divide by the total number of seconds.
   193  	var sum float64
   194  	var windowsUsed int
   195  	for i := range rs.mu.requests {
   196  		// We have to add len(rs.mu.requests) to the numerator to avoid getting a
   197  		// negative result from the modulus operation when rs.mu.idx is small.
   198  		requestsIdx := (rs.mu.idx + len(rs.mu.requests) - i) % len(rs.mu.requests)
   199  		if cur := rs.mu.requests[requestsIdx]; cur != nil {
   200  			windowsUsed++
   201  			for _, v := range cur {
   202  				sum += v
   203  			}
   204  		}
   205  	}
   206  	if windowsUsed <= 0 {
   207  		return 0, 0
   208  	}
   209  	duration := now.Sub(rs.mu.lastRotate) + time.Duration(windowsUsed-1)*replStatsRotateInterval
   210  	if duration == 0 {
   211  		return 0, 0
   212  	}
   213  	return sum / duration.Seconds(), duration
   214  }
   215  
   216  func (rs *replicaStats) resetRequestCounts() {
   217  	rs.mu.Lock()
   218  	defer rs.mu.Unlock()
   219  
   220  	for i := range rs.mu.requests {
   221  		rs.mu.requests[i] = nil
   222  	}
   223  	rs.mu.requests[rs.mu.idx] = make(perLocalityCounts)
   224  	rs.mu.lastRotate = timeutil.Unix(0, rs.clock.PhysicalNow())
   225  	rs.mu.lastReset = rs.mu.lastRotate
   226  }