github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/rpc/clock_offset.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rpc 12 13 import ( 14 "context" 15 "math" 16 "time" 17 18 "github.com/VividCortex/ewma" 19 "github.com/cockroachdb/cockroach/pkg/util/hlc" 20 "github.com/cockroachdb/cockroach/pkg/util/log" 21 "github.com/cockroachdb/cockroach/pkg/util/metric" 22 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 23 "github.com/cockroachdb/errors" 24 "github.com/montanaflynn/stats" 25 ) 26 27 // RemoteClockMetrics is the collection of metrics for the clock monitor. 28 type RemoteClockMetrics struct { 29 ClockOffsetMeanNanos *metric.Gauge 30 ClockOffsetStdDevNanos *metric.Gauge 31 LatencyHistogramNanos *metric.Histogram 32 } 33 34 // avgLatencyMeasurementAge determines how to exponentially weight the 35 // moving average of latency measurements. This means that the weight 36 // will center around the 20th oldest measurement, such that for measurements 37 // that are made every 3 seconds, the average measurement will be about one 38 // minute old. 39 const avgLatencyMeasurementAge = 20.0 40 41 var ( 42 metaClockOffsetMeanNanos = metric.Metadata{ 43 Name: "clock-offset.meannanos", 44 Help: "Mean clock offset with other nodes", 45 Measurement: "Clock Offset", 46 Unit: metric.Unit_NANOSECONDS, 47 } 48 metaClockOffsetStdDevNanos = metric.Metadata{ 49 Name: "clock-offset.stddevnanos", 50 Help: "Stddev clock offset with other nodes", 51 Measurement: "Clock Offset", 52 Unit: metric.Unit_NANOSECONDS, 53 } 54 metaLatencyHistogramNanos = metric.Metadata{ 55 Name: "round-trip-latency", 56 Help: "Distribution of round-trip latencies with other nodes", 57 Measurement: "Roundtrip Latency", 58 Unit: metric.Unit_NANOSECONDS, 59 } 60 ) 61 62 // RemoteClockMonitor keeps track of the most recent measurements of remote 63 // offsets and round-trip latency from this node to connected nodes. 64 type RemoteClockMonitor struct { 65 clock *hlc.Clock 66 offsetTTL time.Duration 67 68 mu struct { 69 syncutil.RWMutex 70 offsets map[string]RemoteOffset 71 latenciesNanos map[string]ewma.MovingAverage 72 } 73 74 metrics RemoteClockMetrics 75 } 76 77 // newRemoteClockMonitor returns a monitor with the given server clock. 78 func newRemoteClockMonitor( 79 clock *hlc.Clock, offsetTTL time.Duration, histogramWindowInterval time.Duration, 80 ) *RemoteClockMonitor { 81 r := RemoteClockMonitor{ 82 clock: clock, 83 offsetTTL: offsetTTL, 84 } 85 r.mu.offsets = make(map[string]RemoteOffset) 86 r.mu.latenciesNanos = make(map[string]ewma.MovingAverage) 87 if histogramWindowInterval == 0 { 88 histogramWindowInterval = time.Duration(math.MaxInt64) 89 } 90 r.metrics = RemoteClockMetrics{ 91 ClockOffsetMeanNanos: metric.NewGauge(metaClockOffsetMeanNanos), 92 ClockOffsetStdDevNanos: metric.NewGauge(metaClockOffsetStdDevNanos), 93 LatencyHistogramNanos: metric.NewLatency(metaLatencyHistogramNanos, histogramWindowInterval), 94 } 95 return &r 96 } 97 98 // Metrics returns the metrics struct. Useful to examine individual metrics, 99 // or to add to the registry. 100 func (r *RemoteClockMonitor) Metrics() *RemoteClockMetrics { 101 return &r.metrics 102 } 103 104 // Latency returns the exponentially weighted moving average latency to the 105 // given node address. Returns true if the measurement is valid, or false if 106 // we don't have enough samples to compute a reliable average. 107 func (r *RemoteClockMonitor) Latency(addr string) (time.Duration, bool) { 108 r.mu.RLock() 109 defer r.mu.RUnlock() 110 if avg, ok := r.mu.latenciesNanos[addr]; ok && avg.Value() != 0.0 { 111 return time.Duration(int64(avg.Value())), true 112 } 113 return 0, false 114 } 115 116 // AllLatencies returns a map of all currently valid latency measurements. 117 func (r *RemoteClockMonitor) AllLatencies() map[string]time.Duration { 118 r.mu.RLock() 119 defer r.mu.RUnlock() 120 result := make(map[string]time.Duration) 121 for addr, avg := range r.mu.latenciesNanos { 122 if avg.Value() != 0.0 { 123 result[addr] = time.Duration(int64(avg.Value())) 124 } 125 } 126 return result 127 } 128 129 // UpdateOffset is a thread-safe way to update the remote clock and latency 130 // measurements. 131 // 132 // It only updates the offset for addr if one of the following cases holds: 133 // 1. There is no prior offset for that address. 134 // 2. The old offset for addr was measured long enough ago to be considered 135 // stale. 136 // 3. The new offset's error is smaller than the old offset's error. 137 // 138 // Pass a roundTripLatency of 0 or less to avoid recording the latency. 139 func (r *RemoteClockMonitor) UpdateOffset( 140 ctx context.Context, addr string, offset RemoteOffset, roundTripLatency time.Duration, 141 ) { 142 emptyOffset := offset == RemoteOffset{} 143 144 r.mu.Lock() 145 defer r.mu.Unlock() 146 147 if oldOffset, ok := r.mu.offsets[addr]; !ok { 148 // We don't have a measurement - if the incoming measurement is not empty, 149 // set it. 150 if !emptyOffset { 151 r.mu.offsets[addr] = offset 152 } 153 } else if oldOffset.isStale(r.offsetTTL, r.clock.PhysicalTime()) { 154 // We have a measurement but it's old - if the incoming measurement is not empty, 155 // set it, otherwise delete the old measurement. 156 if !emptyOffset { 157 r.mu.offsets[addr] = offset 158 } else { 159 delete(r.mu.offsets, addr) 160 } 161 } else if offset.Uncertainty < oldOffset.Uncertainty { 162 // We have a measurement but its uncertainty is greater than that of the 163 // incoming measurement - if the incoming measurement is not empty, set it. 164 if !emptyOffset { 165 r.mu.offsets[addr] = offset 166 } 167 } 168 169 if roundTripLatency > 0 { 170 latencyAvg, ok := r.mu.latenciesNanos[addr] 171 if !ok { 172 latencyAvg = ewma.NewMovingAverage(avgLatencyMeasurementAge) 173 r.mu.latenciesNanos[addr] = latencyAvg 174 } 175 latencyAvg.Add(float64(roundTripLatency.Nanoseconds())) 176 r.metrics.LatencyHistogramNanos.RecordValue(roundTripLatency.Nanoseconds()) 177 } 178 179 if log.V(2) { 180 log.Infof(ctx, "update offset: %s %v", addr, r.mu.offsets[addr]) 181 } 182 } 183 184 // VerifyClockOffset calculates the number of nodes to which the known offset 185 // is healthy (as defined by RemoteOffset.isHealthy). It returns nil iff more 186 // than half the known offsets are healthy, and an error otherwise. A non-nil 187 // return indicates that this node's clock is unreliable, and that the node 188 // should terminate. 189 func (r *RemoteClockMonitor) VerifyClockOffset(ctx context.Context) error { 190 // By the contract of the hlc, if the value is 0, then safety checking of 191 // the max offset is disabled. However we may still want to propagate the 192 // information to a status node. 193 // 194 // TODO(tschottdorf): disallow maxOffset == 0 but probably lots of tests to 195 // fix. 196 if maxOffset := r.clock.MaxOffset(); maxOffset != 0 { 197 now := r.clock.PhysicalTime() 198 199 healthyOffsetCount := 0 200 201 r.mu.Lock() 202 // Each measurement is recorded as its minimum and maximum value. 203 offsets := make(stats.Float64Data, 0, 2*len(r.mu.offsets)) 204 for addr, offset := range r.mu.offsets { 205 if offset.isStale(r.offsetTTL, now) { 206 delete(r.mu.offsets, addr) 207 continue 208 } 209 offsets = append(offsets, float64(offset.Offset+offset.Uncertainty)) 210 offsets = append(offsets, float64(offset.Offset-offset.Uncertainty)) 211 if offset.isHealthy(ctx, maxOffset) { 212 healthyOffsetCount++ 213 } 214 } 215 numClocks := len(r.mu.offsets) 216 r.mu.Unlock() 217 218 mean, err := offsets.Mean() 219 if err != nil && !errors.Is(err, stats.EmptyInput) { 220 return err 221 } 222 stdDev, err := offsets.StandardDeviation() 223 if err != nil && !errors.Is(err, stats.EmptyInput) { 224 return err 225 } 226 r.metrics.ClockOffsetMeanNanos.Update(int64(mean)) 227 r.metrics.ClockOffsetStdDevNanos.Update(int64(stdDev)) 228 229 if numClocks > 0 && healthyOffsetCount <= numClocks/2 { 230 return errors.Errorf( 231 "clock synchronization error: this node is more than %s away from at least half of the known nodes (%d of %d are within the offset)", 232 maxOffset, healthyOffsetCount, numClocks) 233 } 234 if log.V(1) { 235 log.Infof(ctx, "%d of %d nodes are within the maximum clock offset of %s", healthyOffsetCount, numClocks, maxOffset) 236 } 237 } 238 239 return nil 240 } 241 242 func (r RemoteOffset) isHealthy(ctx context.Context, maxOffset time.Duration) bool { 243 // Tolerate up to 80% of the maximum offset. 244 toleratedOffset := maxOffset * 4 / 5 245 246 // Offset may be negative, but Uncertainty is always positive. 247 absOffset := r.Offset 248 if absOffset < 0 { 249 absOffset = -absOffset 250 } 251 switch { 252 case time.Duration(absOffset-r.Uncertainty)*time.Nanosecond > toleratedOffset: 253 // The minimum possible true offset exceeds the maximum offset; definitely 254 // unhealthy. 255 return false 256 257 case time.Duration(absOffset+r.Uncertainty)*time.Nanosecond < toleratedOffset: 258 // The maximum possible true offset does not exceed the maximum offset; 259 // definitely healthy. 260 return true 261 262 default: 263 // The maximum offset is in the uncertainty window of the measured offset; 264 // health is ambiguous. For now, we err on the side of not spuriously 265 // killing nodes. 266 if log.V(1) { 267 log.Infof(ctx, "uncertain remote offset %s for maximum tolerated offset %s, treating as healthy", r, toleratedOffset) 268 } 269 return true 270 } 271 } 272 273 func (r RemoteOffset) isStale(ttl time.Duration, now time.Time) bool { 274 return r.measuredAt().Add(ttl).Before(now) 275 }