github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_stats.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "math" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/roachpb" 18 "github.com/cockroachdb/cockroach/pkg/util/hlc" 19 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 20 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 21 ) 22 23 const ( 24 replStatsRotateInterval = 5 * time.Minute 25 decayFactor = 0.8 26 27 // MinStatsDuration defines a lower bound on how long users of replica stats 28 // should wait before using those stats for anything. If the duration of a 29 // measurement has been less than MinStatsDuration, these methods could easily 30 // return outlier/anomalous data. 31 MinStatsDuration = 5 * time.Second 32 ) 33 34 type localityOracle func(roachpb.NodeID) string 35 36 // perLocalityCounts maps from the string representation of a locality to count. 37 type perLocalityCounts map[string]float64 38 39 // replicaStats maintains statistics about the work done by a replica. Its 40 // initial use is tracking the number of requests received from each 41 // cluster locality in order to inform lease transfer decisions. 42 type replicaStats struct { 43 clock *hlc.Clock 44 getNodeLocality localityOracle 45 46 // We use a set of time windows in order to age out old stats without having 47 // to do hard resets. The `requests` array is a circular buffer of the last 48 // N windows of stats. We rotate through the circular buffer every so often 49 // as determined by `replStatsRotateInterval`. 50 // 51 // We could alternatively use a forward decay approach here, but it would 52 // require more memory than this slightly less precise windowing method: 53 // http://dimacs.rutgers.edu/~graham/pubs/papers/fwddecay.pdf 54 mu struct { 55 syncutil.Mutex 56 idx int 57 requests [6]perLocalityCounts 58 lastRotate time.Time 59 lastReset time.Time 60 } 61 } 62 63 func newReplicaStats(clock *hlc.Clock, getNodeLocality localityOracle) *replicaStats { 64 rs := &replicaStats{ 65 clock: clock, 66 getNodeLocality: getNodeLocality, 67 } 68 rs.mu.requests[rs.mu.idx] = make(perLocalityCounts) 69 rs.mu.lastRotate = timeutil.Unix(0, rs.clock.PhysicalNow()) 70 rs.mu.lastReset = rs.mu.lastRotate 71 return rs 72 } 73 74 // splitRequestCounts divides the current replicaStats object in two for the 75 // purposes of splitting a range. It modifies itself to have half its requests 76 // and the provided other to have the other half. 77 // 78 // Note that assuming a 50/50 split is optimistic, but it's much better than 79 // resetting both sides upon a split. 80 // TODO(a-robinson): Write test for this. 81 func (rs *replicaStats) splitRequestCounts(other *replicaStats) { 82 other.mu.Lock() 83 defer other.mu.Unlock() 84 rs.mu.Lock() 85 defer rs.mu.Unlock() 86 87 other.mu.idx = rs.mu.idx 88 other.mu.lastRotate = rs.mu.lastRotate 89 other.mu.lastReset = rs.mu.lastReset 90 91 for i := range rs.mu.requests { 92 if rs.mu.requests[i] == nil { 93 other.mu.requests[i] = nil 94 continue 95 } 96 other.mu.requests[i] = make(perLocalityCounts) 97 for k := range rs.mu.requests[i] { 98 newVal := rs.mu.requests[i][k] / 2.0 99 rs.mu.requests[i][k] = newVal 100 other.mu.requests[i][k] = newVal 101 } 102 } 103 } 104 105 func (rs *replicaStats) record(nodeID roachpb.NodeID) { 106 rs.recordCount(1, nodeID) 107 } 108 109 func (rs *replicaStats) recordCount(count float64, nodeID roachpb.NodeID) { 110 var locality string 111 if rs.getNodeLocality != nil { 112 locality = rs.getNodeLocality(nodeID) 113 } 114 now := timeutil.Unix(0, rs.clock.PhysicalNow()) 115 116 rs.mu.Lock() 117 defer rs.mu.Unlock() 118 119 rs.maybeRotateLocked(now) 120 rs.mu.requests[rs.mu.idx][locality] += count 121 } 122 123 func (rs *replicaStats) maybeRotateLocked(now time.Time) { 124 if now.Sub(rs.mu.lastRotate) >= replStatsRotateInterval { 125 rs.rotateLocked() 126 rs.mu.lastRotate = now 127 } 128 } 129 130 func (rs *replicaStats) rotateLocked() { 131 rs.mu.idx = (rs.mu.idx + 1) % len(rs.mu.requests) 132 rs.mu.requests[rs.mu.idx] = make(perLocalityCounts) 133 } 134 135 // perLocalityDecayingQPS returns the per-locality QPS and the amount of time 136 // over which the stats were accumulated. 137 // Note that the QPS stats are exponentially decayed such that newer requests 138 // are weighted more heavily than older requests. 139 func (rs *replicaStats) perLocalityDecayingQPS() (perLocalityCounts, time.Duration) { 140 now := timeutil.Unix(0, rs.clock.PhysicalNow()) 141 142 rs.mu.Lock() 143 defer rs.mu.Unlock() 144 145 rs.maybeRotateLocked(now) 146 147 // Use the fraction of time since the last rotation as a smoothing factor to 148 // avoid jarring changes in request count immediately before/after a rotation. 149 timeSinceRotate := now.Sub(rs.mu.lastRotate) 150 fractionOfRotation := float64(timeSinceRotate) / float64(replStatsRotateInterval) 151 152 counts := make(perLocalityCounts) 153 var duration time.Duration 154 for i := range rs.mu.requests { 155 // We have to add len(rs.mu.requests) to the numerator to avoid getting a 156 // negative result from the modulus operation when rs.mu.idx is small. 157 requestsIdx := (rs.mu.idx + len(rs.mu.requests) - i) % len(rs.mu.requests) 158 if cur := rs.mu.requests[requestsIdx]; cur != nil { 159 decay := math.Pow(decayFactor, float64(i)+fractionOfRotation) 160 if i == 0 { 161 duration += time.Duration(float64(timeSinceRotate) * decay) 162 } else { 163 duration += time.Duration(float64(replStatsRotateInterval) * decay) 164 } 165 for k, v := range cur { 166 counts[k] += v * decay 167 } 168 } 169 } 170 171 if duration.Seconds() > 0 { 172 for k := range counts { 173 counts[k] = counts[k] / duration.Seconds() 174 } 175 } 176 return counts, now.Sub(rs.mu.lastReset) 177 } 178 179 // avgQPS returns the average requests-per-second and the amount of time 180 // over which the stat was accumulated. Note that these averages are exact, 181 // not exponentially decayed (there isn't a ton of justification for going 182 // one way or the the other, but not decaying makes the average more stable, 183 // which is probably better for avoiding rebalance thrashing). 184 func (rs *replicaStats) avgQPS() (float64, time.Duration) { 185 now := timeutil.Unix(0, rs.clock.PhysicalNow()) 186 187 rs.mu.Lock() 188 defer rs.mu.Unlock() 189 190 rs.maybeRotateLocked(now) 191 192 // First accumulate the counts, then divide by the total number of seconds. 193 var sum float64 194 var windowsUsed int 195 for i := range rs.mu.requests { 196 // We have to add len(rs.mu.requests) to the numerator to avoid getting a 197 // negative result from the modulus operation when rs.mu.idx is small. 198 requestsIdx := (rs.mu.idx + len(rs.mu.requests) - i) % len(rs.mu.requests) 199 if cur := rs.mu.requests[requestsIdx]; cur != nil { 200 windowsUsed++ 201 for _, v := range cur { 202 sum += v 203 } 204 } 205 } 206 if windowsUsed <= 0 { 207 return 0, 0 208 } 209 duration := now.Sub(rs.mu.lastRotate) + time.Duration(windowsUsed-1)*replStatsRotateInterval 210 if duration == 0 { 211 return 0, 0 212 } 213 return sum / duration.Seconds(), duration 214 } 215 216 func (rs *replicaStats) resetRequestCounts() { 217 rs.mu.Lock() 218 defer rs.mu.Unlock() 219 220 for i := range rs.mu.requests { 221 rs.mu.requests[i] = nil 222 } 223 rs.mu.requests[rs.mu.idx] = make(perLocalityCounts) 224 rs.mu.lastRotate = timeutil.Unix(0, rs.clock.PhysicalNow()) 225 rs.mu.lastReset = rs.mu.lastRotate 226 }