github.com/argoproj/argo-cd/v3@v3.2.1/util/metrics/kubectl/kubectl_metrics.go (about) 1 package kubectl 2 3 import ( 4 "context" 5 "net/url" 6 "strconv" 7 "sync" 8 "time" 9 10 "github.com/prometheus/client_golang/prometheus" 11 "k8s.io/client-go/tools/metrics" 12 ) 13 14 // The label names are meant to match this: https://github.com/kubernetes/component-base/blob/264c1fd30132a3b36b7588e50ac54eb0ff75f26a/metrics/prometheus/restclient/metrics.go 15 // Even in cases where the label name doesn't align well with Argo CD's other labels, we use the Kubernetes labels to 16 // make it easier to copy/paste dashboards/alerts/etc. designed for Kubernetes. 17 const ( 18 // LabelCallStatus represents the status of the exec plugin call, indicating whether it was successful or failed. 19 // These are the possible values, as of the current client-go version: 20 // no_error, plugin_execution_error, plugin_not_found_error, client_internal_error 21 LabelCallStatus = "call_status" 22 // LabelCode represents either the HTTP status code returned by the request or the exit code of the command run. 23 LabelCode = "code" 24 // LabelHost represents the hostname of the server to which the request was made. 25 LabelHost = "host" 26 // LabelMethod represents the HTTP method used for the request (e.g., GET, POST). 27 LabelMethod = "method" 28 // LabelResult represents an attempt to get a transport from the transport cache. 29 // These are the possible values, as of the current client-go version: hit, miss, unreachable 30 // `unreachable` indicates that the cache was not usable for a given REST config because, for example, TLS files 31 // couldn't be loaded, or a proxy is being used. 32 LabelResult = "result" 33 // LabelVerb represents the Kubernetes API verb used in the request (e.g., list, get, create). 34 LabelVerb = "verb" 35 ) 36 37 // All metric names below match https://github.com/kubernetes/component-base/blob/264c1fd30132a3b36b7588e50ac54eb0ff75f26a/metrics/prometheus/restclient/metrics.go 38 // except rest_client_ is replaced with argocd_kubectl_. 39 // 40 // We use similar histogram bucket ranges, but reduce cardinality. 41 // 42 // We try to use similar labels, but we adjust to more closely match other Argo CD metrics. 43 // 44 // The idea is that if we stay close to the Kubernetes metrics, then people can take more advantage of copy/pasting 45 // dashboards/alerts/etc. designed for Kubernetes. 46 var ( 47 clientCertRotationAgeGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 48 Name: "argocd_kubectl_client_cert_rotation_age_seconds", 49 Help: "Age of a certificate that has just been rotated", 50 }, []string{}) 51 52 requestLatencyHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ 53 Name: "argocd_kubectl_request_duration_seconds", 54 Help: "Request latency in seconds", 55 Buckets: []float64{0.005, 0.1, 0.5, 2.0, 8.0, 30.0}, 56 }, []string{LabelHost, LabelVerb}) 57 58 resolverLatencyHistogram = prometheus.NewHistogramVec( 59 prometheus.HistogramOpts{ 60 Name: "argocd_kubectl_dns_resolution_duration_seconds", 61 Help: "Kubectl resolver latency", 62 Buckets: []float64{0.005, 0.1, 0.5, 2.0, 8.0, 30.0}, 63 }, 64 []string{LabelHost}, 65 ) 66 67 requestSizeHistogram = prometheus.NewHistogramVec( 68 prometheus.HistogramOpts{ 69 Name: "argocd_kubectl_request_size_bytes", 70 Help: "Size of kubectl requests", 71 // 64 bytes to 16MB 72 Buckets: []float64{64, 512, 4096, 65536, 1048576, 16777216}, 73 }, 74 []string{LabelHost, LabelMethod}, 75 ) 76 77 responseSizeHistogram = prometheus.NewHistogramVec( 78 prometheus.HistogramOpts{ 79 Name: "argocd_kubectl_response_size_bytes", 80 Help: "Size of kubectl responses", 81 // 64 bytes to 16MB 82 Buckets: []float64{64, 512, 4096, 65536, 1048576, 16777216}, 83 }, 84 []string{LabelHost, LabelMethod}, 85 ) 86 87 rateLimiterLatencyHistogram = prometheus.NewHistogramVec( 88 prometheus.HistogramOpts{ 89 Name: "argocd_kubectl_rate_limiter_duration_seconds", 90 Help: "Kubectl rate limiter latency", 91 Buckets: []float64{0.005, 0.1, 0.5, 2.0, 8.0, 30.0}, 92 }, 93 []string{LabelHost, LabelVerb}, 94 ) 95 96 requestResultCounter = prometheus.NewCounterVec( 97 prometheus.CounterOpts{ 98 Name: "argocd_kubectl_requests_total", 99 Help: "Number of kubectl request results", 100 }, 101 []string{LabelHost, LabelMethod, LabelCode}, 102 ) 103 104 execPluginCallsCounter = prometheus.NewCounterVec( 105 prometheus.CounterOpts{ 106 Name: "argocd_kubectl_exec_plugin_call_total", 107 Help: "Number of kubectl exec plugin calls", 108 }, 109 []string{LabelCode, LabelCallStatus}, 110 ) 111 112 requestRetryCounter = prometheus.NewCounterVec( 113 prometheus.CounterOpts{ 114 Name: "argocd_kubectl_request_retries_total", 115 Help: "Number of kubectl request retries", 116 }, 117 []string{LabelHost, LabelMethod, LabelCode}, 118 ) 119 120 transportCacheEntriesGauge = prometheus.NewGaugeVec( 121 prometheus.GaugeOpts{ 122 Name: "argocd_kubectl_transport_cache_entries", 123 Help: "Number of kubectl transport cache entries", 124 }, 125 []string{}, 126 ) 127 128 transportCreateCallsCounter = prometheus.NewCounterVec( 129 prometheus.CounterOpts{ 130 Name: "argocd_kubectl_transport_create_calls_total", 131 Help: "Number of kubectl transport create calls", 132 }, 133 []string{LabelResult}, 134 ) 135 ) 136 137 // RegisterWithPrometheus registers the kubectl metrics with the given prometheus registry. 138 func RegisterWithPrometheus(registry prometheus.Registerer) { 139 registry.MustRegister(clientCertRotationAgeGauge) 140 registry.MustRegister(requestLatencyHistogram) 141 registry.MustRegister(resolverLatencyHistogram) 142 registry.MustRegister(requestSizeHistogram) 143 registry.MustRegister(responseSizeHistogram) 144 registry.MustRegister(rateLimiterLatencyHistogram) 145 registry.MustRegister(requestResultCounter) 146 registry.MustRegister(execPluginCallsCounter) 147 registry.MustRegister(requestRetryCounter) 148 registry.MustRegister(transportCacheEntriesGauge) 149 registry.MustRegister(transportCreateCallsCounter) 150 } 151 152 // ResetAll resets all kubectl metrics 153 func ResetAll() { 154 clientCertRotationAgeGauge.Reset() 155 requestLatencyHistogram.Reset() 156 resolverLatencyHistogram.Reset() 157 requestSizeHistogram.Reset() 158 responseSizeHistogram.Reset() 159 rateLimiterLatencyHistogram.Reset() 160 requestResultCounter.Reset() 161 execPluginCallsCounter.Reset() 162 requestRetryCounter.Reset() 163 transportCacheEntriesGauge.Reset() 164 transportCreateCallsCounter.Reset() 165 } 166 167 var newKubectlMetricsOnce sync.Once 168 169 // RegisterWithClientGo sets the metrics handlers for the go-client library. We do not use the metrics library's `RegisterWithClientGo` method, 170 // because it is protected by a sync.Once. controller-runtime registers a single handler, which blocks our registration 171 // of our own handlers. So we must rudely set them all directly. 172 // 173 // Since the metrics are global, this function only needs to be called once for a given Argo CD component. 174 // 175 // You must also call RegisterWithPrometheus to register the metrics with the metrics server's prometheus registry. 176 func RegisterWithClientGo() { 177 // Do once to avoid races in unit tests that call this function. 178 newKubectlMetricsOnce.Do(func() { 179 metrics.ClientCertRotationAge = &kubectlClientCertRotationAgeMetric{} 180 metrics.RequestLatency = &kubectlRequestLatencyMetric{} 181 metrics.ResolverLatency = &kubectlResolverLatencyMetric{} 182 metrics.RequestSize = &kubectlRequestSizeMetric{} 183 metrics.ResponseSize = &kubectlResponseSizeMetric{} 184 metrics.RateLimiterLatency = &kubectlRateLimiterLatencyMetric{} 185 metrics.RequestResult = &kubectlRequestResultMetric{} 186 metrics.ExecPluginCalls = &kubectlExecPluginCallsMetric{} 187 metrics.RequestRetry = &kubectlRequestRetryMetric{} 188 metrics.TransportCacheEntries = &kubectlTransportCacheEntriesMetric{} 189 metrics.TransportCreateCalls = &kubectlTransportCreateCallsMetric{} 190 }) 191 } 192 193 type kubectlClientCertRotationAgeMetric struct{} 194 195 func (k *kubectlClientCertRotationAgeMetric) Observe(certDuration time.Duration) { 196 clientCertRotationAgeGauge.WithLabelValues().Set(certDuration.Seconds()) 197 } 198 199 type kubectlRequestLatencyMetric struct{} 200 201 func (k *kubectlRequestLatencyMetric) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) { 202 k8sVerb := resolveK8sRequestVerb(u, verb) 203 requestLatencyHistogram.WithLabelValues(u.Host, k8sVerb).Observe(latency.Seconds()) 204 } 205 206 type kubectlResolverLatencyMetric struct{} 207 208 func (k *kubectlResolverLatencyMetric) Observe(_ context.Context, host string, latency time.Duration) { 209 resolverLatencyHistogram.WithLabelValues(host).Observe(latency.Seconds()) 210 } 211 212 type kubectlRequestSizeMetric struct{} 213 214 func (k *kubectlRequestSizeMetric) Observe(_ context.Context, verb string, host string, size float64) { 215 requestSizeHistogram.WithLabelValues(host, verb).Observe(size) 216 } 217 218 type kubectlResponseSizeMetric struct{} 219 220 func (k *kubectlResponseSizeMetric) Observe(_ context.Context, verb string, host string, size float64) { 221 responseSizeHistogram.WithLabelValues(host, verb).Observe(size) 222 } 223 224 type kubectlRateLimiterLatencyMetric struct{} 225 226 func (k *kubectlRateLimiterLatencyMetric) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) { 227 k8sVerb := resolveK8sRequestVerb(u, verb) 228 rateLimiterLatencyHistogram.WithLabelValues(u.Host, k8sVerb).Observe(latency.Seconds()) 229 } 230 231 type kubectlRequestResultMetric struct{} 232 233 func (k *kubectlRequestResultMetric) Increment(_ context.Context, code string, method string, host string) { 234 requestResultCounter.WithLabelValues(host, method, code).Inc() 235 } 236 237 type kubectlExecPluginCallsMetric struct{} 238 239 func (k *kubectlExecPluginCallsMetric) Increment(exitCode int, callStatus string) { 240 execPluginCallsCounter.WithLabelValues(strconv.Itoa(exitCode), callStatus).Inc() 241 } 242 243 type kubectlRequestRetryMetric struct{} 244 245 func (k *kubectlRequestRetryMetric) IncrementRetry(_ context.Context, code string, method string, host string) { 246 requestRetryCounter.WithLabelValues(host, method, code).Inc() 247 } 248 249 type kubectlTransportCacheEntriesMetric struct{} 250 251 func (k *kubectlTransportCacheEntriesMetric) Observe(value int) { 252 transportCacheEntriesGauge.WithLabelValues().Set(float64(value)) 253 } 254 255 type kubectlTransportCreateCallsMetric struct{} 256 257 func (k *kubectlTransportCreateCallsMetric) Increment(result string) { 258 transportCreateCallsCounter.WithLabelValues(result).Inc() 259 }