github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_metrics.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/base" 17 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 18 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/util/hlc" 21 "go.etcd.io/etcd/raft" 22 ) 23 24 // ReplicaMetrics contains details on the current status of the replica. 25 type ReplicaMetrics struct { 26 Leader bool 27 LeaseValid bool 28 Leaseholder bool 29 LeaseType roachpb.LeaseType 30 LeaseStatus kvserverpb.LeaseStatus 31 32 // Quiescent indicates whether the replica believes itself to be quiesced. 33 Quiescent bool 34 // Ticking indicates whether the store is ticking the replica. It should be 35 // the opposite of Quiescent. 36 Ticking bool 37 38 // Is this the replica which collects per-range metrics? This is done either 39 // on the leader or, if there is no leader, on the largest live replica ID. 40 RangeCounter bool 41 Unavailable bool 42 Underreplicated bool 43 Overreplicated bool 44 BehindCount int64 45 LatchInfoLocal kvserverpb.LatchManagerInfo 46 LatchInfoGlobal kvserverpb.LatchManagerInfo 47 RaftLogTooLarge bool 48 } 49 50 // Metrics returns the current metrics for the replica. 51 func (r *Replica) Metrics( 52 ctx context.Context, now hlc.Timestamp, livenessMap IsLiveMap, clusterNodes int, 53 ) ReplicaMetrics { 54 r.mu.RLock() 55 raftStatus := r.raftStatusRLocked() 56 leaseStatus := r.leaseStatus(*r.mu.state.Lease, now, r.mu.minLeaseProposedTS) 57 quiescent := r.mu.quiescent || r.mu.internalRaftGroup == nil 58 desc := r.mu.state.Desc 59 zone := r.mu.zone 60 raftLogSize := r.mu.raftLogSize 61 raftLogSizeTrusted := r.mu.raftLogSizeTrusted 62 r.mu.RUnlock() 63 64 r.store.unquiescedReplicas.Lock() 65 _, ticking := r.store.unquiescedReplicas.m[r.RangeID] 66 r.store.unquiescedReplicas.Unlock() 67 68 latchInfoGlobal, latchInfoLocal := r.concMgr.LatchMetrics() 69 70 return calcReplicaMetrics( 71 ctx, 72 now, 73 &r.store.cfg.RaftConfig, 74 zone, 75 livenessMap, 76 clusterNodes, 77 desc, 78 raftStatus, 79 leaseStatus, 80 r.store.StoreID(), 81 quiescent, 82 ticking, 83 latchInfoLocal, 84 latchInfoGlobal, 85 raftLogSize, 86 raftLogSizeTrusted, 87 ) 88 } 89 90 func calcReplicaMetrics( 91 _ context.Context, 92 _ hlc.Timestamp, 93 raftCfg *base.RaftConfig, 94 zone *zonepb.ZoneConfig, 95 livenessMap IsLiveMap, 96 clusterNodes int, 97 desc *roachpb.RangeDescriptor, 98 raftStatus *raft.Status, 99 leaseStatus kvserverpb.LeaseStatus, 100 storeID roachpb.StoreID, 101 quiescent bool, 102 ticking bool, 103 latchInfoLocal kvserverpb.LatchManagerInfo, 104 latchInfoGlobal kvserverpb.LatchManagerInfo, 105 raftLogSize int64, 106 raftLogSizeTrusted bool, 107 ) ReplicaMetrics { 108 var m ReplicaMetrics 109 110 var leaseOwner bool 111 m.LeaseStatus = leaseStatus 112 if leaseStatus.State == kvserverpb.LeaseState_VALID { 113 m.LeaseValid = true 114 leaseOwner = leaseStatus.Lease.OwnedBy(storeID) 115 m.LeaseType = leaseStatus.Lease.Type() 116 } 117 m.Leaseholder = m.LeaseValid && leaseOwner 118 m.Leader = isRaftLeader(raftStatus) 119 m.Quiescent = quiescent 120 m.Ticking = ticking 121 122 m.RangeCounter, m.Unavailable, m.Underreplicated, m.Overreplicated = 123 calcRangeCounter(storeID, desc, livenessMap, *zone.NumReplicas, clusterNodes) 124 125 // The raft leader computes the number of raft entries that replicas are 126 // behind. 127 if m.Leader { 128 m.BehindCount = calcBehindCount(raftStatus, desc, livenessMap) 129 } 130 131 m.LatchInfoLocal = latchInfoLocal 132 m.LatchInfoGlobal = latchInfoGlobal 133 134 const raftLogTooLargeMultiple = 4 135 m.RaftLogTooLarge = raftLogSize > (raftLogTooLargeMultiple*raftCfg.RaftLogTruncationThreshold) && 136 raftLogSizeTrusted 137 138 return m 139 } 140 141 // calcRangeCounter returns whether this replica is designated as the 142 // replica in the range responsible for range-level metrics, whether 143 // the range doesn't have a quorum of live replicas, and whether the 144 // range is currently under-replicated. 145 // 146 // Note: we compute an estimated range count across the cluster by counting the 147 // first live replica in each descriptor. Note that the first live replica is 148 // an arbitrary choice. We want to select one live replica to do the counting 149 // that all replicas can agree on. 150 // 151 // Note that this heuristic can double count. If the first live replica is on 152 // a node that is partitioned from the other replicas in the range, there may 153 // be multiple nodes which believe they are the first live replica. This 154 // scenario seems rare as it requires the partitioned node to be alive enough 155 // to be performing liveness heartbeats. 156 func calcRangeCounter( 157 storeID roachpb.StoreID, 158 desc *roachpb.RangeDescriptor, 159 livenessMap IsLiveMap, 160 numReplicas int32, 161 clusterNodes int, 162 ) (rangeCounter, unavailable, underreplicated, overreplicated bool) { 163 // It seems unlikely that a learner replica would be the first live one, but 164 // there's no particular reason to exclude them. Note that `All` returns the 165 // voters first. 166 for _, rd := range desc.Replicas().All() { 167 if livenessMap[rd.NodeID].IsLive { 168 rangeCounter = rd.StoreID == storeID 169 break 170 } 171 } 172 // We also compute an estimated per-range count of under-replicated and 173 // unavailable ranges for each range based on the liveness table. 174 if rangeCounter { 175 unavailable = !desc.Replicas().CanMakeProgress(func(rDesc roachpb.ReplicaDescriptor) bool { 176 return livenessMap[rDesc.NodeID].IsLive 177 }) 178 needed := GetNeededReplicas(numReplicas, clusterNodes) 179 liveVoterReplicas := calcLiveVoterReplicas(desc, livenessMap) 180 if needed > liveVoterReplicas { 181 underreplicated = true 182 } else if needed < liveVoterReplicas { 183 overreplicated = true 184 } 185 } 186 return 187 } 188 189 // calcLiveVoterReplicas returns a count of the live voter replicas; a live 190 // replica is determined by checking its node in the provided liveness map. This 191 // method is used when indicating under-replication so only voter replicas are 192 // considered. 193 func calcLiveVoterReplicas(desc *roachpb.RangeDescriptor, livenessMap IsLiveMap) int { 194 var live int 195 for _, rd := range desc.Replicas().Voters() { 196 if livenessMap[rd.NodeID].IsLive { 197 live++ 198 } 199 } 200 return live 201 } 202 203 // calcBehindCount returns a total count of log entries that follower replicas 204 // are behind. This can only be computed on the raft leader. 205 func calcBehindCount( 206 raftStatus *raft.Status, desc *roachpb.RangeDescriptor, livenessMap IsLiveMap, 207 ) int64 { 208 var behindCount int64 209 for _, rd := range desc.Replicas().All() { 210 if progress, ok := raftStatus.Progress[uint64(rd.ReplicaID)]; ok { 211 if progress.Match > 0 && 212 progress.Match < raftStatus.Commit { 213 behindCount += int64(raftStatus.Commit) - int64(progress.Match) 214 } 215 } 216 } 217 218 return behindCount 219 } 220 221 // QueriesPerSecond returns the range's average QPS if it is the current 222 // leaseholder. If it isn't, this will return 0 because the replica does not 223 // know about the reads that the leaseholder is serving. 224 // 225 // A "Query" is a BatchRequest (regardless of its contents) arriving at the 226 // leaseholder with a gateway node set in the header (i.e. excluding requests 227 // that weren't sent through a DistSender, which in practice should be 228 // practically none). 229 func (r *Replica) QueriesPerSecond() float64 { 230 qps, _ := r.leaseholderStats.avgQPS() 231 return qps 232 } 233 234 // WritesPerSecond returns the range's average keys written per second. A 235 // "Write" is a mutation applied by Raft as measured by 236 // engine.RocksDBBatchCount(writeBatch). This corresponds roughly to the number 237 // of keys mutated by a write. For example, writing 12 intents would count as 24 238 // writes (12 for the metadata, 12 for the versions). A DeleteRange that 239 // ultimately only removes one key counts as one (or two if it's transactional). 240 func (r *Replica) WritesPerSecond() float64 { 241 wps, _ := r.writeStats.avgQPS() 242 return wps 243 } 244 245 func (r *Replica) needsSplitBySizeRLocked() bool { 246 exceeded, _ := r.exceedsMultipleOfSplitSizeRLocked(1) 247 return exceeded 248 } 249 250 func (r *Replica) needsMergeBySizeRLocked() bool { 251 return r.mu.state.Stats.Total() < *r.mu.zone.RangeMinBytes 252 } 253 254 // exceedsMultipleOfSplitSizeRLocked returns whether the current size of the 255 // range exceeds the max size times mult. If so, the bytes overage is also 256 // returned. Note that the max size is determined by either the current maximum 257 // size as dictated by the zone config or a previous max size indicating that 258 // the max size has changed relatively recently and thus we should not 259 // backpressure for being over. 260 func (r *Replica) exceedsMultipleOfSplitSizeRLocked(mult float64) (exceeded bool, bytesOver int64) { 261 maxBytes := *r.mu.zone.RangeMaxBytes 262 if r.mu.largestPreviousMaxRangeSizeBytes > maxBytes { 263 maxBytes = r.mu.largestPreviousMaxRangeSizeBytes 264 } 265 size := r.mu.state.Stats.Total() 266 maxSize := int64(float64(maxBytes)*mult) + 1 267 if maxBytes <= 0 || size <= maxSize { 268 return false, 0 269 } 270 return true, size - maxSize 271 }