github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/apiserver/apiservermetrics.go (about)

     1  // Copyright 2017 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package apiserver
     5  
     6  import (
     7  	"fmt"
     8  	"runtime"
     9  
    10  	"github.com/prometheus/client_golang/prometheus"
    11  
    12  	"github.com/juju/juju/apiserver/observer/metricobserver"
    13  	"github.com/juju/juju/version"
    14  )
    15  
    16  const (
    17  	apiserverMetricsNamespace   = "juju"
    18  	apiserverSubsystemNamespace = "apiserver"
    19  )
    20  
    21  const (
    22  	// MetricLabelEndpoint defines a constant for the APIConnections and
    23  	// PingFailureCount Labels
    24  	MetricLabelEndpoint = "endpoint"
    25  
    26  	// MetricLabelModelUUID defines a constant for the PingFailureCount and
    27  	// LogWriteCount Labels
    28  	// Note: prometheus doesn't allow hyphens only underscores
    29  	MetricLabelModelUUID = "model_uuid"
    30  
    31  	// MetricLabelState defines a constant for the state Label
    32  	MetricLabelState = "state"
    33  
    34  	// MetricLabelHost defines a host constant for the Requests Label
    35  	MetricLabelHost = "host"
    36  
    37  	// MetricLabelStatus defines a status constant for the Requests Label
    38  	MetricLabelStatus = "status"
    39  
    40  	// MetricLabelVersion is the metric for the Juju Version of the controller
    41  	MetricLabelVersion = "version"
    42  )
    43  
    44  // MetricAPIConnectionsLabelNames defines a series of labels for the
    45  // APIConnections metric.
    46  var MetricAPIConnectionsLabelNames = []string{
    47  	MetricLabelEndpoint,
    48  }
    49  
    50  // MetricPingFailureLabelNames defines a series of labels for the PingFailure
    51  // metric.
    52  var MetricPingFailureLabelNames = []string{
    53  	MetricLabelModelUUID,
    54  	MetricLabelEndpoint,
    55  }
    56  
    57  // MetricLogLabelNames defines a series of labels for the LogWrite and LogRead
    58  // metric
    59  var MetricLogLabelNames = []string{
    60  	MetricLabelModelUUID,
    61  	MetricLabelState,
    62  }
    63  
    64  // MetricTotalRequestsWithStatusLabelNames defines a series of labels for the
    65  // TotalRequests metric.
    66  var MetricTotalRequestsWithStatusLabelNames = []string{
    67  	MetricLabelModelUUID,
    68  	MetricLabelHost,
    69  	MetricLabelStatus,
    70  }
    71  
    72  // MetricTotalRequestsLabelNames defines a series of labels for the
    73  // TotalRequests metric.
    74  var MetricTotalRequestsLabelNames = []string{
    75  	MetricLabelModelUUID,
    76  	MetricLabelHost,
    77  }
    78  
    79  // Collector is a prometheus.Collector that collects metrics based
    80  // on apiserver status.
    81  type Collector struct {
    82  	BuildInfo        prometheus.Gauge
    83  	TotalConnections prometheus.Counter
    84  
    85  	LoginAttempts      prometheus.Gauge
    86  	APIConnections     *prometheus.GaugeVec
    87  	APIRequestDuration *prometheus.SummaryVec
    88  
    89  	PingFailureCount *prometheus.CounterVec
    90  
    91  	LogWriteCount *prometheus.CounterVec
    92  	LogReadCount  *prometheus.CounterVec
    93  
    94  	TotalRequests         *prometheus.CounterVec
    95  	TotalRequestErrors    *prometheus.CounterVec
    96  	TotalRequestsDuration *prometheus.SummaryVec
    97  }
    98  
    99  // NewMetricsCollector returns a new Collector.
   100  func NewMetricsCollector() *Collector {
   101  	// BuildInfo is a bit special as a 'metric'. It is following the guidance of
   102  	// https://prometheus.io/docs/instrumenting/writing_exporters/#target-labels-not-static-scraped-labels
   103  	// and https://www.robustperception.io/how-to-have-labels-for-machine-roles/
   104  	// you essentially have one const metric with a value of 1 and the labels are the strings
   105  	// that describe the instance.
   106  	buildInfo := prometheus.NewGauge(prometheus.GaugeOpts{
   107  		Namespace: apiserverMetricsNamespace,
   108  		Subsystem: apiserverSubsystemNamespace,
   109  		Name:      "build_info",
   110  		Help:      "The versions of various parts of the Juju Controller",
   111  		ConstLabels: prometheus.Labels{
   112  			MetricLabelVersion: version.Current.String(),
   113  			"official_build":   fmt.Sprint(version.OfficialBuild),
   114  			"go_compiler":      runtime.Compiler,
   115  			"go_version":       runtime.Version(),
   116  			"git_commit":       version.GitCommit,
   117  			"git_commit_state": version.GitTreeState,
   118  			"go_build_tags":    version.GoBuildTags,
   119  		},
   120  	})
   121  	buildInfo.Set(1.0)
   122  	return &Collector{
   123  		TotalConnections: prometheus.NewCounter(prometheus.CounterOpts{
   124  			Namespace: apiserverMetricsNamespace,
   125  			Subsystem: apiserverSubsystemNamespace,
   126  			Name:      "connections_total",
   127  			Help:      "Total number of apiserver connections ever made",
   128  		}),
   129  
   130  		APIConnections: prometheus.NewGaugeVec(prometheus.GaugeOpts{
   131  			Namespace: apiserverMetricsNamespace,
   132  			Subsystem: apiserverSubsystemNamespace,
   133  			Name:      "connections",
   134  			Help:      "Current number of active apiserver connections for api handlers",
   135  		}, MetricAPIConnectionsLabelNames),
   136  		LoginAttempts: prometheus.NewGauge(prometheus.GaugeOpts{
   137  			Namespace: apiserverMetricsNamespace,
   138  			Subsystem: apiserverSubsystemNamespace,
   139  			Name:      "active_login_attempts",
   140  			Help:      "Current number of active agent login attempts",
   141  		}),
   142  		APIRequestDuration: prometheus.NewSummaryVec(prometheus.SummaryOpts{
   143  			Namespace: apiserverMetricsNamespace,
   144  			Subsystem: apiserverSubsystemNamespace,
   145  			Name:      "request_duration_seconds",
   146  			Help:      "Latency of Juju API requests in seconds.",
   147  			Objectives: map[float64]float64{
   148  				0.5:  0.05,
   149  				0.9:  0.01,
   150  				0.99: 0.001,
   151  			},
   152  		}, metricobserver.MetricLabelNames),
   153  
   154  		PingFailureCount: prometheus.NewCounterVec(prometheus.CounterOpts{
   155  			Namespace: apiserverMetricsNamespace,
   156  			Subsystem: apiserverSubsystemNamespace,
   157  			Name:      "ping_failure_count",
   158  			Help:      "Current number of ping failures",
   159  		}, MetricPingFailureLabelNames),
   160  
   161  		LogWriteCount: prometheus.NewCounterVec(prometheus.CounterOpts{
   162  			Namespace: apiserverMetricsNamespace,
   163  			Subsystem: apiserverSubsystemNamespace,
   164  			Name:      "log_write_count",
   165  			Help:      "Current number of log writes",
   166  		}, MetricLogLabelNames),
   167  		LogReadCount: prometheus.NewCounterVec(prometheus.CounterOpts{
   168  			Namespace: apiserverMetricsNamespace,
   169  			Subsystem: apiserverSubsystemNamespace,
   170  			Name:      "log_read_count",
   171  			Help:      "Current number of log reads",
   172  		}, MetricLogLabelNames),
   173  
   174  		TotalRequests: prometheus.NewCounterVec(prometheus.CounterOpts{
   175  			Namespace: apiserverMetricsNamespace,
   176  			Subsystem: apiserverSubsystemNamespace,
   177  			Name:      "outbound_requests_total",
   178  			Help:      "Total number of http requests to outbound APIs",
   179  		}, MetricTotalRequestsWithStatusLabelNames),
   180  		TotalRequestErrors: prometheus.NewCounterVec(prometheus.CounterOpts{
   181  			Namespace: apiserverMetricsNamespace,
   182  			Subsystem: apiserverSubsystemNamespace,
   183  			Name:      "outbound_request_errors_total",
   184  			Help:      "Total number of http request errors to outbound APIs",
   185  		}, MetricTotalRequestsLabelNames),
   186  		TotalRequestsDuration: prometheus.NewSummaryVec(prometheus.SummaryOpts{
   187  			Namespace: apiserverMetricsNamespace,
   188  			Subsystem: apiserverSubsystemNamespace,
   189  			Name:      "outbound_request_duration_seconds",
   190  			Help:      "Latency of outbound API requests in seconds.",
   191  			Objectives: map[float64]float64{
   192  				0.5:  0.05,
   193  				0.9:  0.01,
   194  				0.99: 0.001,
   195  			},
   196  		}, MetricTotalRequestsLabelNames),
   197  		BuildInfo: buildInfo,
   198  	}
   199  }
   200  
   201  // Describe is part of the prometheus.Collector interface.
   202  func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
   203  	c.TotalConnections.Describe(ch)
   204  	c.APIConnections.Describe(ch)
   205  	c.LoginAttempts.Describe(ch)
   206  	c.APIRequestDuration.Describe(ch)
   207  	c.PingFailureCount.Describe(ch)
   208  	c.LogWriteCount.Describe(ch)
   209  	c.LogReadCount.Describe(ch)
   210  	c.TotalRequests.Describe(ch)
   211  	c.TotalRequestErrors.Describe(ch)
   212  	c.TotalRequestsDuration.Describe(ch)
   213  	c.BuildInfo.Describe(ch)
   214  }
   215  
   216  // Collect is part of the prometheus.Collector interface.
   217  func (c *Collector) Collect(ch chan<- prometheus.Metric) {
   218  	c.TotalConnections.Collect(ch)
   219  	c.APIConnections.Collect(ch)
   220  	c.LoginAttempts.Collect(ch)
   221  	c.APIRequestDuration.Collect(ch)
   222  	c.PingFailureCount.Collect(ch)
   223  	c.LogWriteCount.Collect(ch)
   224  	c.LogReadCount.Collect(ch)
   225  	c.TotalRequests.Collect(ch)
   226  	c.TotalRequestErrors.Collect(ch)
   227  	c.TotalRequestsDuration.Collect(ch)
   228  	c.BuildInfo.Collect(ch)
   229  }