github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_metrics.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/base"
    17  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    21  	"go.etcd.io/etcd/raft"
    22  )
    23  
    24  // ReplicaMetrics contains details on the current status of the replica.
    25  type ReplicaMetrics struct {
    26  	Leader      bool
    27  	LeaseValid  bool
    28  	Leaseholder bool
    29  	LeaseType   roachpb.LeaseType
    30  	LeaseStatus kvserverpb.LeaseStatus
    31  
    32  	// Quiescent indicates whether the replica believes itself to be quiesced.
    33  	Quiescent bool
    34  	// Ticking indicates whether the store is ticking the replica. It should be
    35  	// the opposite of Quiescent.
    36  	Ticking bool
    37  
    38  	// Is this the replica which collects per-range metrics? This is done either
    39  	// on the leader or, if there is no leader, on the largest live replica ID.
    40  	RangeCounter    bool
    41  	Unavailable     bool
    42  	Underreplicated bool
    43  	Overreplicated  bool
    44  	BehindCount     int64
    45  	LatchInfoLocal  kvserverpb.LatchManagerInfo
    46  	LatchInfoGlobal kvserverpb.LatchManagerInfo
    47  	RaftLogTooLarge bool
    48  }
    49  
    50  // Metrics returns the current metrics for the replica.
    51  func (r *Replica) Metrics(
    52  	ctx context.Context, now hlc.Timestamp, livenessMap IsLiveMap, clusterNodes int,
    53  ) ReplicaMetrics {
    54  	r.mu.RLock()
    55  	raftStatus := r.raftStatusRLocked()
    56  	leaseStatus := r.leaseStatus(*r.mu.state.Lease, now, r.mu.minLeaseProposedTS)
    57  	quiescent := r.mu.quiescent || r.mu.internalRaftGroup == nil
    58  	desc := r.mu.state.Desc
    59  	zone := r.mu.zone
    60  	raftLogSize := r.mu.raftLogSize
    61  	raftLogSizeTrusted := r.mu.raftLogSizeTrusted
    62  	r.mu.RUnlock()
    63  
    64  	r.store.unquiescedReplicas.Lock()
    65  	_, ticking := r.store.unquiescedReplicas.m[r.RangeID]
    66  	r.store.unquiescedReplicas.Unlock()
    67  
    68  	latchInfoGlobal, latchInfoLocal := r.concMgr.LatchMetrics()
    69  
    70  	return calcReplicaMetrics(
    71  		ctx,
    72  		now,
    73  		&r.store.cfg.RaftConfig,
    74  		zone,
    75  		livenessMap,
    76  		clusterNodes,
    77  		desc,
    78  		raftStatus,
    79  		leaseStatus,
    80  		r.store.StoreID(),
    81  		quiescent,
    82  		ticking,
    83  		latchInfoLocal,
    84  		latchInfoGlobal,
    85  		raftLogSize,
    86  		raftLogSizeTrusted,
    87  	)
    88  }
    89  
    90  func calcReplicaMetrics(
    91  	_ context.Context,
    92  	_ hlc.Timestamp,
    93  	raftCfg *base.RaftConfig,
    94  	zone *zonepb.ZoneConfig,
    95  	livenessMap IsLiveMap,
    96  	clusterNodes int,
    97  	desc *roachpb.RangeDescriptor,
    98  	raftStatus *raft.Status,
    99  	leaseStatus kvserverpb.LeaseStatus,
   100  	storeID roachpb.StoreID,
   101  	quiescent bool,
   102  	ticking bool,
   103  	latchInfoLocal kvserverpb.LatchManagerInfo,
   104  	latchInfoGlobal kvserverpb.LatchManagerInfo,
   105  	raftLogSize int64,
   106  	raftLogSizeTrusted bool,
   107  ) ReplicaMetrics {
   108  	var m ReplicaMetrics
   109  
   110  	var leaseOwner bool
   111  	m.LeaseStatus = leaseStatus
   112  	if leaseStatus.State == kvserverpb.LeaseState_VALID {
   113  		m.LeaseValid = true
   114  		leaseOwner = leaseStatus.Lease.OwnedBy(storeID)
   115  		m.LeaseType = leaseStatus.Lease.Type()
   116  	}
   117  	m.Leaseholder = m.LeaseValid && leaseOwner
   118  	m.Leader = isRaftLeader(raftStatus)
   119  	m.Quiescent = quiescent
   120  	m.Ticking = ticking
   121  
   122  	m.RangeCounter, m.Unavailable, m.Underreplicated, m.Overreplicated =
   123  		calcRangeCounter(storeID, desc, livenessMap, *zone.NumReplicas, clusterNodes)
   124  
   125  	// The raft leader computes the number of raft entries that replicas are
   126  	// behind.
   127  	if m.Leader {
   128  		m.BehindCount = calcBehindCount(raftStatus, desc, livenessMap)
   129  	}
   130  
   131  	m.LatchInfoLocal = latchInfoLocal
   132  	m.LatchInfoGlobal = latchInfoGlobal
   133  
   134  	const raftLogTooLargeMultiple = 4
   135  	m.RaftLogTooLarge = raftLogSize > (raftLogTooLargeMultiple*raftCfg.RaftLogTruncationThreshold) &&
   136  		raftLogSizeTrusted
   137  
   138  	return m
   139  }
   140  
   141  // calcRangeCounter returns whether this replica is designated as the
   142  // replica in the range responsible for range-level metrics, whether
   143  // the range doesn't have a quorum of live replicas, and whether the
   144  // range is currently under-replicated.
   145  //
   146  // Note: we compute an estimated range count across the cluster by counting the
   147  // first live replica in each descriptor. Note that the first live replica is
   148  // an arbitrary choice. We want to select one live replica to do the counting
   149  // that all replicas can agree on.
   150  //
   151  // Note that this heuristic can double count. If the first live replica is on
   152  // a node that is partitioned from the other replicas in the range, there may
   153  // be multiple nodes which believe they are the first live replica. This
   154  // scenario seems rare as it requires the partitioned node to be alive enough
   155  // to be performing liveness heartbeats.
   156  func calcRangeCounter(
   157  	storeID roachpb.StoreID,
   158  	desc *roachpb.RangeDescriptor,
   159  	livenessMap IsLiveMap,
   160  	numReplicas int32,
   161  	clusterNodes int,
   162  ) (rangeCounter, unavailable, underreplicated, overreplicated bool) {
   163  	// It seems unlikely that a learner replica would be the first live one, but
   164  	// there's no particular reason to exclude them. Note that `All` returns the
   165  	// voters first.
   166  	for _, rd := range desc.Replicas().All() {
   167  		if livenessMap[rd.NodeID].IsLive {
   168  			rangeCounter = rd.StoreID == storeID
   169  			break
   170  		}
   171  	}
   172  	// We also compute an estimated per-range count of under-replicated and
   173  	// unavailable ranges for each range based on the liveness table.
   174  	if rangeCounter {
   175  		unavailable = !desc.Replicas().CanMakeProgress(func(rDesc roachpb.ReplicaDescriptor) bool {
   176  			return livenessMap[rDesc.NodeID].IsLive
   177  		})
   178  		needed := GetNeededReplicas(numReplicas, clusterNodes)
   179  		liveVoterReplicas := calcLiveVoterReplicas(desc, livenessMap)
   180  		if needed > liveVoterReplicas {
   181  			underreplicated = true
   182  		} else if needed < liveVoterReplicas {
   183  			overreplicated = true
   184  		}
   185  	}
   186  	return
   187  }
   188  
   189  // calcLiveVoterReplicas returns a count of the live voter replicas; a live
   190  // replica is determined by checking its node in the provided liveness map. This
   191  // method is used when indicating under-replication so only voter replicas are
   192  // considered.
   193  func calcLiveVoterReplicas(desc *roachpb.RangeDescriptor, livenessMap IsLiveMap) int {
   194  	var live int
   195  	for _, rd := range desc.Replicas().Voters() {
   196  		if livenessMap[rd.NodeID].IsLive {
   197  			live++
   198  		}
   199  	}
   200  	return live
   201  }
   202  
   203  // calcBehindCount returns a total count of log entries that follower replicas
   204  // are behind. This can only be computed on the raft leader.
   205  func calcBehindCount(
   206  	raftStatus *raft.Status, desc *roachpb.RangeDescriptor, livenessMap IsLiveMap,
   207  ) int64 {
   208  	var behindCount int64
   209  	for _, rd := range desc.Replicas().All() {
   210  		if progress, ok := raftStatus.Progress[uint64(rd.ReplicaID)]; ok {
   211  			if progress.Match > 0 &&
   212  				progress.Match < raftStatus.Commit {
   213  				behindCount += int64(raftStatus.Commit) - int64(progress.Match)
   214  			}
   215  		}
   216  	}
   217  
   218  	return behindCount
   219  }
   220  
   221  // QueriesPerSecond returns the range's average QPS if it is the current
   222  // leaseholder. If it isn't, this will return 0 because the replica does not
   223  // know about the reads that the leaseholder is serving.
   224  //
   225  // A "Query" is a BatchRequest (regardless of its contents) arriving at the
   226  // leaseholder with a gateway node set in the header (i.e. excluding requests
   227  // that weren't sent through a DistSender, which in practice should be
   228  // practically none).
   229  func (r *Replica) QueriesPerSecond() float64 {
   230  	qps, _ := r.leaseholderStats.avgQPS()
   231  	return qps
   232  }
   233  
   234  // WritesPerSecond returns the range's average keys written per second. A
   235  // "Write" is a mutation applied by Raft as measured by
   236  // engine.RocksDBBatchCount(writeBatch). This corresponds roughly to the number
   237  // of keys mutated by a write. For example, writing 12 intents would count as 24
   238  // writes (12 for the metadata, 12 for the versions). A DeleteRange that
   239  // ultimately only removes one key counts as one (or two if it's transactional).
   240  func (r *Replica) WritesPerSecond() float64 {
   241  	wps, _ := r.writeStats.avgQPS()
   242  	return wps
   243  }
   244  
   245  func (r *Replica) needsSplitBySizeRLocked() bool {
   246  	exceeded, _ := r.exceedsMultipleOfSplitSizeRLocked(1)
   247  	return exceeded
   248  }
   249  
   250  func (r *Replica) needsMergeBySizeRLocked() bool {
   251  	return r.mu.state.Stats.Total() < *r.mu.zone.RangeMinBytes
   252  }
   253  
   254  // exceedsMultipleOfSplitSizeRLocked returns whether the current size of the
   255  // range exceeds the max size times mult. If so, the bytes overage is also
   256  // returned. Note that the max size is determined by either the current maximum
   257  // size as dictated by the zone config or a previous max size indicating that
   258  // the max size has changed relatively recently and thus we should not
   259  // backpressure for being over.
   260  func (r *Replica) exceedsMultipleOfSplitSizeRLocked(mult float64) (exceeded bool, bytesOver int64) {
   261  	maxBytes := *r.mu.zone.RangeMaxBytes
   262  	if r.mu.largestPreviousMaxRangeSizeBytes > maxBytes {
   263  		maxBytes = r.mu.largestPreviousMaxRangeSizeBytes
   264  	}
   265  	size := r.mu.state.Stats.Total()
   266  	maxSize := int64(float64(maxBytes)*mult) + 1
   267  	if maxBytes <= 0 || size <= maxSize {
   268  		return false, 0
   269  	}
   270  	return true, size - maxSize
   271  }