github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/rpc/metrics.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rpc
    12  
    13  import "github.com/cockroachdb/cockroach/pkg/util/metric"
    14  
    15  // We want to have a way to track the number of connection
    16  // but we also want to have a way to know that connection health.
    17  //
    18  // For this we're going to add a variety of metrics.
    19  // One will be a gauge of how many heartbeat loops are in which state
    20  // and another will be a counter for heartbeat failures.
    21  
    22  var (
    23  	// The below gauges store the current state of running heartbeat loops.
    24  	// Gauges are useful for examing the current state of a system but can hide
    25  	// information is the face of rapidly changing values. The context
    26  	// additionally keeps counters for the number of heartbeat loops started
    27  	// and completed as well as a counter for the number of heartbeat failures.
    28  	// Together these metrics should provide a picture of the state of current
    29  	// connections.
    30  
    31  	metaHeartbeatsInitializing = metric.Metadata{
    32  		Name:        "rpc.heartbeats.initializing",
    33  		Help:        "Gauge of current connections in the initializing state",
    34  		Measurement: "Connections",
    35  		Unit:        metric.Unit_COUNT,
    36  	}
    37  	metaHeartbeatsNominal = metric.Metadata{
    38  		Name:        "rpc.heartbeats.nominal",
    39  		Help:        "Gauge of current connections in the nominal state",
    40  		Measurement: "Connections",
    41  		Unit:        metric.Unit_COUNT,
    42  	}
    43  	metaHeartbeatsFailed = metric.Metadata{
    44  		Name:        "rpc.heartbeats.failed",
    45  		Help:        "Gauge of current connections in the failed state",
    46  		Measurement: "Connections",
    47  		Unit:        metric.Unit_COUNT,
    48  	}
    49  
    50  	metaHeartbeatLoopsStarted = metric.Metadata{
    51  		Name: "rpc.heartbeats.loops.started",
    52  		Help: "Counter of the number of connection heartbeat loops which " +
    53  			"have been started",
    54  		Measurement: "Connections",
    55  		Unit:        metric.Unit_COUNT,
    56  	}
    57  	metaHeartbeatLoopsExited = metric.Metadata{
    58  		Name: "rpc.heartbeats.loops.exited",
    59  		Help: "Counter of the number of connection heartbeat loops which " +
    60  			"have exited with an error",
    61  		Measurement: "Connections",
    62  		Unit:        metric.Unit_COUNT,
    63  	}
    64  )
    65  
    66  type heartbeatState int
    67  
    68  const (
    69  	heartbeatNotRunning heartbeatState = iota
    70  	heartbeatInitializing
    71  	heartbeatNominal
    72  	heartbeatFailed
    73  )
    74  
    75  func makeMetrics() Metrics {
    76  	return Metrics{
    77  		HeartbeatLoopsStarted:  metric.NewCounter(metaHeartbeatLoopsStarted),
    78  		HeartbeatLoopsExited:   metric.NewCounter(metaHeartbeatLoopsExited),
    79  		HeartbeatsInitializing: metric.NewGauge(metaHeartbeatsInitializing),
    80  		HeartbeatsNominal:      metric.NewGauge(metaHeartbeatsNominal),
    81  		HeartbeatsFailed:       metric.NewGauge(metaHeartbeatsFailed),
    82  	}
    83  }
    84  
    85  // Metrics is a metrics struct for Context metrics.
    86  type Metrics struct {
    87  
    88  	// HeartbeatLoopsStarted is a counter which tracks the number of heartbeat
    89  	// loops which have been started.
    90  	HeartbeatLoopsStarted *metric.Counter
    91  
    92  	// HeartbeatLoopsExited is a counter which tracks the number of heartbeat
    93  	// loops which have exited with an error. The only time a heartbeat loop
    94  	// exits without an error is during server shutdown.
    95  	HeartbeatLoopsExited *metric.Counter
    96  
    97  	// HeartbeatsInitializing tracks the current number of heartbeat loops
    98  	// which have not yet ever succeeded.
    99  	HeartbeatsInitializing *metric.Gauge
   100  	// HeartbeatsNominal tracks the current number of heartbeat loops which
   101  	// succeeded on their previous attempt.
   102  	HeartbeatsNominal *metric.Gauge
   103  	// HeartbeatsNominal tracks the current number of heartbeat loops which
   104  	// succeeded on their previous attempt.
   105  	HeartbeatsFailed *metric.Gauge
   106  }
   107  
   108  // updateHeartbeatState decrements the gauge for the current state and
   109  // increments the gauge for the new state, returning the new state.
   110  func updateHeartbeatState(m *Metrics, old, new heartbeatState) heartbeatState {
   111  	if old == new {
   112  		return new
   113  	}
   114  	if g := heartbeatGauge(m, new); g != nil {
   115  		g.Inc(1)
   116  	}
   117  	if g := heartbeatGauge(m, old); g != nil {
   118  		g.Dec(1)
   119  	}
   120  	return new
   121  }
   122  
   123  // heartbeatGauge returns the appropriate gauge for the given heartbeatState.
   124  func heartbeatGauge(m *Metrics, s heartbeatState) (g *metric.Gauge) {
   125  	switch s {
   126  	case heartbeatInitializing:
   127  		g = m.HeartbeatsInitializing
   128  	case heartbeatNominal:
   129  		g = m.HeartbeatsNominal
   130  	case heartbeatFailed:
   131  		g = m.HeartbeatsFailed
   132  	}
   133  	return g
   134  }