github.com/argoproj/argo-cd/v3@v3.2.1/util/metrics/kubectl/kubectl_metrics.go (about)

     1  package kubectl
     2  
     3  import (
     4  	"context"
     5  	"net/url"
     6  	"strconv"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/prometheus/client_golang/prometheus"
    11  	"k8s.io/client-go/tools/metrics"
    12  )
    13  
    14  // The label names are meant to match this: https://github.com/kubernetes/component-base/blob/264c1fd30132a3b36b7588e50ac54eb0ff75f26a/metrics/prometheus/restclient/metrics.go
    15  // Even in cases where the label name doesn't align well with Argo CD's other labels, we use the Kubernetes labels to
    16  // make it easier to copy/paste dashboards/alerts/etc. designed for Kubernetes.
    17  const (
    18  	// LabelCallStatus represents the status of the exec plugin call, indicating whether it was successful or failed.
    19  	// These are the possible values, as of the current client-go version:
    20  	// no_error, plugin_execution_error, plugin_not_found_error, client_internal_error
    21  	LabelCallStatus = "call_status"
    22  	// LabelCode represents either the HTTP status code returned by the request or the exit code of the command run.
    23  	LabelCode = "code"
    24  	// LabelHost represents the hostname of the server to which the request was made.
    25  	LabelHost = "host"
    26  	// LabelMethod represents the HTTP method used for the request (e.g., GET, POST).
    27  	LabelMethod = "method"
    28  	// LabelResult represents an attempt to get a transport from the transport cache.
    29  	// These are the possible values, as of the current client-go version: hit, miss, unreachable
    30  	// `unreachable` indicates that the cache was not usable for a given REST config because, for example, TLS files
    31  	// couldn't be loaded, or a proxy is being used.
    32  	LabelResult = "result"
    33  	// LabelVerb represents the Kubernetes API verb used in the request (e.g., list, get, create).
    34  	LabelVerb = "verb"
    35  )
    36  
    37  // All metric names below match https://github.com/kubernetes/component-base/blob/264c1fd30132a3b36b7588e50ac54eb0ff75f26a/metrics/prometheus/restclient/metrics.go
    38  // except rest_client_ is replaced with argocd_kubectl_.
    39  //
    40  // We use similar histogram bucket ranges, but reduce cardinality.
    41  //
    42  // We try to use similar labels, but we adjust to more closely match other Argo CD metrics.
    43  //
    44  // The idea is that if we stay close to the Kubernetes metrics, then people can take more advantage of copy/pasting
    45  // dashboards/alerts/etc. designed for Kubernetes.
    46  var (
    47  	clientCertRotationAgeGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
    48  		Name: "argocd_kubectl_client_cert_rotation_age_seconds",
    49  		Help: "Age of a certificate that has just been rotated",
    50  	}, []string{})
    51  
    52  	requestLatencyHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
    53  		Name:    "argocd_kubectl_request_duration_seconds",
    54  		Help:    "Request latency in seconds",
    55  		Buckets: []float64{0.005, 0.1, 0.5, 2.0, 8.0, 30.0},
    56  	}, []string{LabelHost, LabelVerb})
    57  
    58  	resolverLatencyHistogram = prometheus.NewHistogramVec(
    59  		prometheus.HistogramOpts{
    60  			Name:    "argocd_kubectl_dns_resolution_duration_seconds",
    61  			Help:    "Kubectl resolver latency",
    62  			Buckets: []float64{0.005, 0.1, 0.5, 2.0, 8.0, 30.0},
    63  		},
    64  		[]string{LabelHost},
    65  	)
    66  
    67  	requestSizeHistogram = prometheus.NewHistogramVec(
    68  		prometheus.HistogramOpts{
    69  			Name: "argocd_kubectl_request_size_bytes",
    70  			Help: "Size of kubectl requests",
    71  			// 64 bytes to 16MB
    72  			Buckets: []float64{64, 512, 4096, 65536, 1048576, 16777216},
    73  		},
    74  		[]string{LabelHost, LabelMethod},
    75  	)
    76  
    77  	responseSizeHistogram = prometheus.NewHistogramVec(
    78  		prometheus.HistogramOpts{
    79  			Name: "argocd_kubectl_response_size_bytes",
    80  			Help: "Size of kubectl responses",
    81  			// 64 bytes to 16MB
    82  			Buckets: []float64{64, 512, 4096, 65536, 1048576, 16777216},
    83  		},
    84  		[]string{LabelHost, LabelMethod},
    85  	)
    86  
    87  	rateLimiterLatencyHistogram = prometheus.NewHistogramVec(
    88  		prometheus.HistogramOpts{
    89  			Name:    "argocd_kubectl_rate_limiter_duration_seconds",
    90  			Help:    "Kubectl rate limiter latency",
    91  			Buckets: []float64{0.005, 0.1, 0.5, 2.0, 8.0, 30.0},
    92  		},
    93  		[]string{LabelHost, LabelVerb},
    94  	)
    95  
    96  	requestResultCounter = prometheus.NewCounterVec(
    97  		prometheus.CounterOpts{
    98  			Name: "argocd_kubectl_requests_total",
    99  			Help: "Number of kubectl request results",
   100  		},
   101  		[]string{LabelHost, LabelMethod, LabelCode},
   102  	)
   103  
   104  	execPluginCallsCounter = prometheus.NewCounterVec(
   105  		prometheus.CounterOpts{
   106  			Name: "argocd_kubectl_exec_plugin_call_total",
   107  			Help: "Number of kubectl exec plugin calls",
   108  		},
   109  		[]string{LabelCode, LabelCallStatus},
   110  	)
   111  
   112  	requestRetryCounter = prometheus.NewCounterVec(
   113  		prometheus.CounterOpts{
   114  			Name: "argocd_kubectl_request_retries_total",
   115  			Help: "Number of kubectl request retries",
   116  		},
   117  		[]string{LabelHost, LabelMethod, LabelCode},
   118  	)
   119  
   120  	transportCacheEntriesGauge = prometheus.NewGaugeVec(
   121  		prometheus.GaugeOpts{
   122  			Name: "argocd_kubectl_transport_cache_entries",
   123  			Help: "Number of kubectl transport cache entries",
   124  		},
   125  		[]string{},
   126  	)
   127  
   128  	transportCreateCallsCounter = prometheus.NewCounterVec(
   129  		prometheus.CounterOpts{
   130  			Name: "argocd_kubectl_transport_create_calls_total",
   131  			Help: "Number of kubectl transport create calls",
   132  		},
   133  		[]string{LabelResult},
   134  	)
   135  )
   136  
   137  // RegisterWithPrometheus registers the kubectl metrics with the given prometheus registry.
   138  func RegisterWithPrometheus(registry prometheus.Registerer) {
   139  	registry.MustRegister(clientCertRotationAgeGauge)
   140  	registry.MustRegister(requestLatencyHistogram)
   141  	registry.MustRegister(resolverLatencyHistogram)
   142  	registry.MustRegister(requestSizeHistogram)
   143  	registry.MustRegister(responseSizeHistogram)
   144  	registry.MustRegister(rateLimiterLatencyHistogram)
   145  	registry.MustRegister(requestResultCounter)
   146  	registry.MustRegister(execPluginCallsCounter)
   147  	registry.MustRegister(requestRetryCounter)
   148  	registry.MustRegister(transportCacheEntriesGauge)
   149  	registry.MustRegister(transportCreateCallsCounter)
   150  }
   151  
   152  // ResetAll resets all kubectl metrics
   153  func ResetAll() {
   154  	clientCertRotationAgeGauge.Reset()
   155  	requestLatencyHistogram.Reset()
   156  	resolverLatencyHistogram.Reset()
   157  	requestSizeHistogram.Reset()
   158  	responseSizeHistogram.Reset()
   159  	rateLimiterLatencyHistogram.Reset()
   160  	requestResultCounter.Reset()
   161  	execPluginCallsCounter.Reset()
   162  	requestRetryCounter.Reset()
   163  	transportCacheEntriesGauge.Reset()
   164  	transportCreateCallsCounter.Reset()
   165  }
   166  
   167  var newKubectlMetricsOnce sync.Once
   168  
   169  // RegisterWithClientGo sets the metrics handlers for the go-client library. We do not use the metrics library's `RegisterWithClientGo` method,
   170  // because it is protected by a sync.Once. controller-runtime registers a single handler, which blocks our registration
   171  // of our own handlers. So we must rudely set them all directly.
   172  //
   173  // Since the metrics are global, this function only needs to be called once for a given Argo CD component.
   174  //
   175  // You must also call RegisterWithPrometheus to register the metrics with the metrics server's prometheus registry.
   176  func RegisterWithClientGo() {
   177  	// Do once to avoid races in unit tests that call this function.
   178  	newKubectlMetricsOnce.Do(func() {
   179  		metrics.ClientCertRotationAge = &kubectlClientCertRotationAgeMetric{}
   180  		metrics.RequestLatency = &kubectlRequestLatencyMetric{}
   181  		metrics.ResolverLatency = &kubectlResolverLatencyMetric{}
   182  		metrics.RequestSize = &kubectlRequestSizeMetric{}
   183  		metrics.ResponseSize = &kubectlResponseSizeMetric{}
   184  		metrics.RateLimiterLatency = &kubectlRateLimiterLatencyMetric{}
   185  		metrics.RequestResult = &kubectlRequestResultMetric{}
   186  		metrics.ExecPluginCalls = &kubectlExecPluginCallsMetric{}
   187  		metrics.RequestRetry = &kubectlRequestRetryMetric{}
   188  		metrics.TransportCacheEntries = &kubectlTransportCacheEntriesMetric{}
   189  		metrics.TransportCreateCalls = &kubectlTransportCreateCallsMetric{}
   190  	})
   191  }
   192  
   193  type kubectlClientCertRotationAgeMetric struct{}
   194  
   195  func (k *kubectlClientCertRotationAgeMetric) Observe(certDuration time.Duration) {
   196  	clientCertRotationAgeGauge.WithLabelValues().Set(certDuration.Seconds())
   197  }
   198  
   199  type kubectlRequestLatencyMetric struct{}
   200  
   201  func (k *kubectlRequestLatencyMetric) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) {
   202  	k8sVerb := resolveK8sRequestVerb(u, verb)
   203  	requestLatencyHistogram.WithLabelValues(u.Host, k8sVerb).Observe(latency.Seconds())
   204  }
   205  
   206  type kubectlResolverLatencyMetric struct{}
   207  
   208  func (k *kubectlResolverLatencyMetric) Observe(_ context.Context, host string, latency time.Duration) {
   209  	resolverLatencyHistogram.WithLabelValues(host).Observe(latency.Seconds())
   210  }
   211  
   212  type kubectlRequestSizeMetric struct{}
   213  
   214  func (k *kubectlRequestSizeMetric) Observe(_ context.Context, verb string, host string, size float64) {
   215  	requestSizeHistogram.WithLabelValues(host, verb).Observe(size)
   216  }
   217  
   218  type kubectlResponseSizeMetric struct{}
   219  
   220  func (k *kubectlResponseSizeMetric) Observe(_ context.Context, verb string, host string, size float64) {
   221  	responseSizeHistogram.WithLabelValues(host, verb).Observe(size)
   222  }
   223  
   224  type kubectlRateLimiterLatencyMetric struct{}
   225  
   226  func (k *kubectlRateLimiterLatencyMetric) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) {
   227  	k8sVerb := resolveK8sRequestVerb(u, verb)
   228  	rateLimiterLatencyHistogram.WithLabelValues(u.Host, k8sVerb).Observe(latency.Seconds())
   229  }
   230  
   231  type kubectlRequestResultMetric struct{}
   232  
   233  func (k *kubectlRequestResultMetric) Increment(_ context.Context, code string, method string, host string) {
   234  	requestResultCounter.WithLabelValues(host, method, code).Inc()
   235  }
   236  
   237  type kubectlExecPluginCallsMetric struct{}
   238  
   239  func (k *kubectlExecPluginCallsMetric) Increment(exitCode int, callStatus string) {
   240  	execPluginCallsCounter.WithLabelValues(strconv.Itoa(exitCode), callStatus).Inc()
   241  }
   242  
   243  type kubectlRequestRetryMetric struct{}
   244  
   245  func (k *kubectlRequestRetryMetric) IncrementRetry(_ context.Context, code string, method string, host string) {
   246  	requestRetryCounter.WithLabelValues(host, method, code).Inc()
   247  }
   248  
   249  type kubectlTransportCacheEntriesMetric struct{}
   250  
   251  func (k *kubectlTransportCacheEntriesMetric) Observe(value int) {
   252  	transportCacheEntriesGauge.WithLabelValues().Set(float64(value))
   253  }
   254  
   255  type kubectlTransportCreateCallsMetric struct{}
   256  
   257  func (k *kubectlTransportCreateCallsMetric) Increment(result string) {
   258  	transportCreateCallsCounter.WithLabelValues(result).Inc()
   259  }