k8s.io/apiserver@v0.31.1/pkg/endpoints/metrics/metrics.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "context" 21 "fmt" 22 "net/http" 23 "net/url" 24 "strconv" 25 "strings" 26 "sync" 27 "time" 28 29 restful "github.com/emicklei/go-restful/v3" 30 31 metainternalversion "k8s.io/apimachinery/pkg/apis/meta/internalversion" 32 "k8s.io/apimachinery/pkg/apis/meta/v1/validation" 33 "k8s.io/apimachinery/pkg/runtime/schema" 34 "k8s.io/apimachinery/pkg/types" 35 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 36 utilsets "k8s.io/apimachinery/pkg/util/sets" 37 "k8s.io/apiserver/pkg/audit" 38 "k8s.io/apiserver/pkg/authentication/user" 39 "k8s.io/apiserver/pkg/endpoints/request" 40 "k8s.io/apiserver/pkg/endpoints/responsewriter" 41 compbasemetrics "k8s.io/component-base/metrics" 42 "k8s.io/component-base/metrics/legacyregistry" 43 ) 44 45 // resettableCollector is the interface implemented by prometheus.MetricVec 46 // that can be used by Prometheus to collect metrics and reset their values. 47 type resettableCollector interface { 48 compbasemetrics.Registerable 49 Reset() 50 } 51 52 const ( 53 APIServerComponent string = "apiserver" 54 OtherRequestMethod string = "other" 55 ) 56 57 /* 58 * By default, all the following metrics are defined as falling under 59 * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes) 60 * 61 * Promoting the stability level of the metric is a responsibility of the component owner, since it 62 * involves explicitly acknowledging support for the metric across multiple releases, in accordance with 63 * the metric stability policy. 64 */ 65 var ( 66 deprecatedRequestGauge = compbasemetrics.NewGaugeVec( 67 &compbasemetrics.GaugeOpts{ 68 Subsystem: APIServerComponent, 69 Name: "requested_deprecated_apis", 70 Help: "Gauge of deprecated APIs that have been requested, broken out by API group, version, resource, subresource, and removed_release.", 71 StabilityLevel: compbasemetrics.STABLE, 72 }, 73 []string{"group", "version", "resource", "subresource", "removed_release"}, 74 ) 75 76 // TODO(a-robinson): Add unit tests for the handling of these metrics once 77 // the upstream library supports it. 78 requestCounter = compbasemetrics.NewCounterVec( 79 &compbasemetrics.CounterOpts{ 80 Subsystem: APIServerComponent, 81 Name: "request_total", 82 Help: "Counter of apiserver requests broken out for each verb, dry run value, group, version, resource, scope, component, and HTTP response code.", 83 StabilityLevel: compbasemetrics.STABLE, 84 }, 85 []string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component", "code"}, 86 ) 87 longRunningRequestsGauge = compbasemetrics.NewGaugeVec( 88 &compbasemetrics.GaugeOpts{ 89 Subsystem: APIServerComponent, 90 Name: "longrunning_requests", 91 Help: "Gauge of all active long-running apiserver requests broken out by verb, group, version, resource, scope and component. Not all requests are tracked this way.", 92 StabilityLevel: compbasemetrics.STABLE, 93 }, 94 []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, 95 ) 96 requestLatencies = compbasemetrics.NewHistogramVec( 97 &compbasemetrics.HistogramOpts{ 98 Subsystem: APIServerComponent, 99 Name: "request_duration_seconds", 100 Help: "Response latency distribution in seconds for each verb, dry run value, group, version, resource, subresource, scope and component.", 101 // This metric is used for verifying api call latencies SLO, 102 // as well as tracking regressions in this aspects. 103 // Thus we customize buckets significantly, to empower both usecases. 104 Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, 105 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, 106 StabilityLevel: compbasemetrics.STABLE, 107 }, 108 []string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component"}, 109 ) 110 requestSloLatencies = compbasemetrics.NewHistogramVec( 111 &compbasemetrics.HistogramOpts{ 112 Subsystem: APIServerComponent, 113 Name: "request_slo_duration_seconds", 114 Help: "Response latency distribution (not counting webhook duration and priority & fairness queue wait times) in seconds for each verb, group, version, resource, subresource, scope and component.", 115 // This metric is supplementary to the requestLatencies metric. 116 // It measures request duration excluding webhooks as they are mostly 117 // dependant on user configuration. 118 Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, 119 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, 120 StabilityLevel: compbasemetrics.ALPHA, 121 DeprecatedVersion: "1.27.0", 122 }, 123 []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, 124 ) 125 requestSliLatencies = compbasemetrics.NewHistogramVec( 126 &compbasemetrics.HistogramOpts{ 127 Subsystem: APIServerComponent, 128 Name: "request_sli_duration_seconds", 129 Help: "Response latency distribution (not counting webhook duration and priority & fairness queue wait times) in seconds for each verb, group, version, resource, subresource, scope and component.", 130 // This metric is supplementary to the requestLatencies metric. 131 // It measures request duration excluding webhooks as they are mostly 132 // dependant on user configuration. 133 Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, 134 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, 135 StabilityLevel: compbasemetrics.ALPHA, 136 }, 137 []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, 138 ) 139 fieldValidationRequestLatencies = compbasemetrics.NewHistogramVec( 140 &compbasemetrics.HistogramOpts{ 141 Name: "field_validation_request_duration_seconds", 142 Help: "Response latency distribution in seconds for each field validation value", 143 // This metric is supplementary to the requestLatencies metric. 144 // It measures request durations for the various field validation 145 // values. 146 Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, 147 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, 148 StabilityLevel: compbasemetrics.ALPHA, 149 }, 150 []string{"field_validation"}, 151 ) 152 responseSizes = compbasemetrics.NewHistogramVec( 153 &compbasemetrics.HistogramOpts{ 154 Subsystem: APIServerComponent, 155 Name: "response_sizes", 156 Help: "Response size distribution in bytes for each group, version, verb, resource, subresource, scope and component.", 157 // Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB). 158 Buckets: compbasemetrics.ExponentialBuckets(1000, 10.0, 7), 159 StabilityLevel: compbasemetrics.STABLE, 160 }, 161 []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, 162 ) 163 // TLSHandshakeErrors is a number of requests dropped with 'TLS handshake error from' error 164 TLSHandshakeErrors = compbasemetrics.NewCounter( 165 &compbasemetrics.CounterOpts{ 166 Subsystem: APIServerComponent, 167 Name: "tls_handshake_errors_total", 168 Help: "Number of requests dropped with 'TLS handshake error from' error", 169 StabilityLevel: compbasemetrics.ALPHA, 170 }, 171 ) 172 WatchEvents = compbasemetrics.NewCounterVec( 173 &compbasemetrics.CounterOpts{ 174 Subsystem: APIServerComponent, 175 Name: "watch_events_total", 176 Help: "Number of events sent in watch clients", 177 StabilityLevel: compbasemetrics.ALPHA, 178 }, 179 []string{"group", "version", "kind"}, 180 ) 181 WatchEventsSizes = compbasemetrics.NewHistogramVec( 182 &compbasemetrics.HistogramOpts{ 183 Subsystem: APIServerComponent, 184 Name: "watch_events_sizes", 185 Help: "Watch event size distribution in bytes", 186 Buckets: compbasemetrics.ExponentialBuckets(1024, 2.0, 8), // 1K, 2K, 4K, 8K, ..., 128K. 187 StabilityLevel: compbasemetrics.ALPHA, 188 }, 189 []string{"group", "version", "kind"}, 190 ) 191 // Because of volatility of the base metric this is pre-aggregated one. Instead of reporting current usage all the time 192 // it reports maximal usage during the last second. 193 currentInflightRequests = compbasemetrics.NewGaugeVec( 194 &compbasemetrics.GaugeOpts{ 195 Subsystem: APIServerComponent, 196 Name: "current_inflight_requests", 197 Help: "Maximal number of currently used inflight request limit of this apiserver per request kind in last second.", 198 StabilityLevel: compbasemetrics.STABLE, 199 }, 200 []string{"request_kind"}, 201 ) 202 currentInqueueRequests = compbasemetrics.NewGaugeVec( 203 &compbasemetrics.GaugeOpts{ 204 Subsystem: APIServerComponent, 205 Name: "current_inqueue_requests", 206 Help: "Maximal number of queued requests in this apiserver per request kind in last second.", 207 StabilityLevel: compbasemetrics.ALPHA, 208 }, 209 []string{"request_kind"}, 210 ) 211 212 requestTerminationsTotal = compbasemetrics.NewCounterVec( 213 &compbasemetrics.CounterOpts{ 214 Subsystem: APIServerComponent, 215 Name: "request_terminations_total", 216 Help: "Number of requests which apiserver terminated in self-defense.", 217 StabilityLevel: compbasemetrics.ALPHA, 218 }, 219 []string{"verb", "group", "version", "resource", "subresource", "scope", "component", "code"}, 220 ) 221 222 apiSelfRequestCounter = compbasemetrics.NewCounterVec( 223 &compbasemetrics.CounterOpts{ 224 Subsystem: APIServerComponent, 225 Name: "selfrequest_total", 226 Help: "Counter of apiserver self-requests broken out for each verb, API resource and subresource.", 227 StabilityLevel: compbasemetrics.ALPHA, 228 }, 229 []string{"verb", "resource", "subresource"}, 230 ) 231 232 requestFilterDuration = compbasemetrics.NewHistogramVec( 233 &compbasemetrics.HistogramOpts{ 234 Subsystem: APIServerComponent, 235 Name: "request_filter_duration_seconds", 236 Help: "Request filter latency distribution in seconds, for each filter type", 237 Buckets: []float64{0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 5.0, 10.0, 15.0, 30.0}, 238 StabilityLevel: compbasemetrics.ALPHA, 239 }, 240 []string{"filter"}, 241 ) 242 243 // requestAbortsTotal is a number of aborted requests with http.ErrAbortHandler 244 requestAbortsTotal = compbasemetrics.NewCounterVec( 245 &compbasemetrics.CounterOpts{ 246 Subsystem: APIServerComponent, 247 Name: "request_aborts_total", 248 Help: "Number of requests which apiserver aborted possibly due to a timeout, for each group, version, verb, resource, subresource and scope", 249 StabilityLevel: compbasemetrics.ALPHA, 250 }, 251 []string{"verb", "group", "version", "resource", "subresource", "scope"}, 252 ) 253 254 // requestPostTimeoutTotal tracks the activity of the executing request handler after the associated request 255 // has been timed out by the apiserver. 256 // source: the name of the handler that is recording this metric. Currently, we have two: 257 // - timeout-handler: the "executing" handler returns after the timeout filter times out the request. 258 // - rest-handler: the "executing" handler returns after the rest layer times out the request. 259 // status: whether the handler panicked or threw an error, possible values: 260 // - 'panic': the handler panicked 261 // - 'error': the handler return an error 262 // - 'ok': the handler returned a result (no error and no panic) 263 // - 'pending': the handler is still running in the background and it did not return 264 // within the wait threshold. 265 requestPostTimeoutTotal = compbasemetrics.NewCounterVec( 266 &compbasemetrics.CounterOpts{ 267 Subsystem: APIServerComponent, 268 Name: "request_post_timeout_total", 269 Help: "Tracks the activity of the request handlers after the associated requests have been timed out by the apiserver", 270 StabilityLevel: compbasemetrics.ALPHA, 271 }, 272 []string{"source", "status"}, 273 ) 274 275 requestTimestampComparisonDuration = compbasemetrics.NewHistogramVec( 276 &compbasemetrics.HistogramOpts{ 277 Subsystem: APIServerComponent, 278 Name: "request_timestamp_comparison_time", 279 Help: "Time taken for comparison of old vs new objects in UPDATE or PATCH requests", 280 Buckets: []float64{0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 5.0}, 281 StabilityLevel: compbasemetrics.ALPHA, 282 }, 283 // Path the code takes to reach a conclusion: 284 // i.e. unequalObjectsFast, unequalObjectsSlow, equalObjectsSlow 285 []string{"code_path"}, 286 ) 287 288 watchListLatencies = compbasemetrics.NewHistogramVec( 289 &compbasemetrics.HistogramOpts{ 290 Subsystem: APIServerComponent, 291 Name: "watch_list_duration_seconds", 292 Help: "Response latency distribution in seconds for watch list requests broken by group, version, resource and scope.", 293 Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2, 4, 6, 8, 10, 15, 20, 30, 45, 60}, 294 StabilityLevel: compbasemetrics.ALPHA, 295 }, 296 []string{"group", "version", "resource", "scope"}, 297 ) 298 299 metrics = []resettableCollector{ 300 deprecatedRequestGauge, 301 requestCounter, 302 longRunningRequestsGauge, 303 requestLatencies, 304 requestSloLatencies, 305 requestSliLatencies, 306 fieldValidationRequestLatencies, 307 responseSizes, 308 TLSHandshakeErrors, 309 WatchEvents, 310 WatchEventsSizes, 311 currentInflightRequests, 312 currentInqueueRequests, 313 requestTerminationsTotal, 314 apiSelfRequestCounter, 315 requestFilterDuration, 316 requestAbortsTotal, 317 requestPostTimeoutTotal, 318 requestTimestampComparisonDuration, 319 watchListLatencies, 320 } 321 322 // these are the valid request methods which we report in our metrics. Any other request methods 323 // will be aggregated under 'unknown' 324 validRequestMethods = utilsets.NewString( 325 "APPLY", 326 "CONNECT", 327 "CREATE", 328 "DELETE", 329 "DELETECOLLECTION", 330 "GET", 331 "LIST", 332 "PATCH", 333 "POST", 334 "PROXY", 335 "PUT", 336 "UPDATE", 337 "WATCH", 338 "WATCHLIST") 339 340 // These are the valid connect requests which we report in our metrics. 341 validConnectRequests = utilsets.NewString( 342 "log", 343 "exec", 344 "portforward", 345 "attach", 346 "proxy") 347 ) 348 349 const ( 350 // ReadOnlyKind is a string identifying read only request kind 351 ReadOnlyKind = "readOnly" 352 // MutatingKind is a string identifying mutating request kind 353 MutatingKind = "mutating" 354 355 // WaitingPhase is the phase value for a request waiting in a queue 356 WaitingPhase = "waiting" 357 // ExecutingPhase is the phase value for an executing request 358 ExecutingPhase = "executing" 359 ) 360 361 const ( 362 // deprecatedAnnotationKey is a key for an audit annotation set to 363 // "true" on requests made to deprecated API versions 364 deprecatedAnnotationKey = "k8s.io/deprecated" 365 // removedReleaseAnnotationKey is a key for an audit annotation set to 366 // the target removal release, in "<major>.<minor>" format, 367 // on requests made to deprecated API versions with a target removal release 368 removedReleaseAnnotationKey = "k8s.io/removed-release" 369 ) 370 371 const ( 372 // The source that is recording the apiserver_request_post_timeout_total metric. 373 // The "executing" request handler returns after the timeout filter times out the request. 374 PostTimeoutSourceTimeoutHandler = "timeout-handler" 375 376 // The source that is recording the apiserver_request_post_timeout_total metric. 377 // The "executing" request handler returns after the rest layer times out the request. 378 PostTimeoutSourceRestHandler = "rest-handler" 379 ) 380 381 const ( 382 // The executing request handler panicked after the request had 383 // been timed out by the apiserver. 384 PostTimeoutHandlerPanic = "panic" 385 386 // The executing request handler has returned an error to the post-timeout 387 // receiver after the request had been timed out by the apiserver. 388 PostTimeoutHandlerError = "error" 389 390 // The executing request handler has returned a result to the post-timeout 391 // receiver after the request had been timed out by the apiserver. 392 PostTimeoutHandlerOK = "ok" 393 394 // The executing request handler has not panicked or returned any error/result to 395 // the post-timeout receiver yet after the request had been timed out by the apiserver. 396 // The post-timeout receiver gives up after waiting for certain threshold and if the 397 // executing request handler has not returned yet we use the following label. 398 PostTimeoutHandlerPending = "pending" 399 ) 400 401 var registerMetrics sync.Once 402 403 // Register all metrics. 404 func Register() { 405 registerMetrics.Do(func() { 406 for _, metric := range metrics { 407 legacyregistry.MustRegister(metric) 408 } 409 }) 410 } 411 412 // Reset all metrics. 413 func Reset() { 414 for _, metric := range metrics { 415 metric.Reset() 416 } 417 } 418 419 // UpdateInflightRequestMetrics reports concurrency metrics classified by 420 // mutating vs Readonly. 421 func UpdateInflightRequestMetrics(phase string, nonmutating, mutating int) { 422 for _, kc := range []struct { 423 kind string 424 count int 425 }{{ReadOnlyKind, nonmutating}, {MutatingKind, mutating}} { 426 if phase == ExecutingPhase { 427 currentInflightRequests.WithLabelValues(kc.kind).Set(float64(kc.count)) 428 } else { 429 currentInqueueRequests.WithLabelValues(kc.kind).Set(float64(kc.count)) 430 } 431 } 432 } 433 434 func RecordFilterLatency(ctx context.Context, name string, elapsed time.Duration) { 435 requestFilterDuration.WithContext(ctx).WithLabelValues(name).Observe(elapsed.Seconds()) 436 } 437 438 func RecordTimestampComparisonLatency(codePath string, elapsed time.Duration) { 439 requestTimestampComparisonDuration.WithLabelValues(codePath).Observe(elapsed.Seconds()) 440 } 441 442 func RecordRequestPostTimeout(source string, status string) { 443 requestPostTimeoutTotal.WithLabelValues(source, status).Inc() 444 } 445 446 // RecordRequestAbort records that the request was aborted possibly due to a timeout. 447 func RecordRequestAbort(req *http.Request, requestInfo *request.RequestInfo) { 448 if requestInfo == nil { 449 requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path} 450 } 451 452 scope := CleanScope(requestInfo) 453 reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo) 454 resource := requestInfo.Resource 455 subresource := requestInfo.Subresource 456 group := requestInfo.APIGroup 457 version := requestInfo.APIVersion 458 459 requestAbortsTotal.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope).Inc() 460 } 461 462 // RecordDroppedRequest records that the request was rejected via http.TooManyRequests. 463 func RecordDroppedRequest(req *http.Request, requestInfo *request.RequestInfo, component string, isMutatingRequest bool) { 464 if requestInfo == nil { 465 requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path} 466 } 467 scope := CleanScope(requestInfo) 468 dryRun := cleanDryRun(req.URL) 469 470 // We don't use verb from <requestInfo>, as this may be propagated from 471 // InstrumentRouteFunc which is registered in installer.go with predefined 472 // list of verbs (different than those translated to RequestInfo). 473 // However, we need to tweak it e.g. to differentiate GET from LIST. 474 reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo) 475 476 if requestInfo.IsResourceRequest { 477 requestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, requestInfo.APIGroup, requestInfo.APIVersion, requestInfo.Resource, requestInfo.Subresource, scope, component, codeToString(http.StatusTooManyRequests)).Inc() 478 } else { 479 requestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, "", "", "", requestInfo.Subresource, scope, component, codeToString(http.StatusTooManyRequests)).Inc() 480 } 481 } 482 483 // RecordRequestTermination records that the request was terminated early as part of a resource 484 // preservation or apiserver self-defense mechanism (e.g. timeouts, maxinflight throttling, 485 // proxyHandler errors). RecordRequestTermination should only be called zero or one times 486 // per request. 487 func RecordRequestTermination(req *http.Request, requestInfo *request.RequestInfo, component string, code int) { 488 if requestInfo == nil { 489 requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path} 490 } 491 scope := CleanScope(requestInfo) 492 493 // We don't use verb from <requestInfo>, as this may be propagated from 494 // InstrumentRouteFunc which is registered in installer.go with predefined 495 // list of verbs (different than those translated to RequestInfo). 496 // However, we need to tweak it e.g. to differentiate GET from LIST. 497 reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo) 498 499 if requestInfo.IsResourceRequest { 500 requestTerminationsTotal.WithContext(req.Context()).WithLabelValues(reportedVerb, requestInfo.APIGroup, requestInfo.APIVersion, requestInfo.Resource, requestInfo.Subresource, scope, component, codeToString(code)).Inc() 501 } else { 502 requestTerminationsTotal.WithContext(req.Context()).WithLabelValues(reportedVerb, "", "", "", requestInfo.Path, scope, component, codeToString(code)).Inc() 503 } 504 } 505 506 // RecordLongRunning tracks the execution of a long running request against the API server. It provides an accurate count 507 // of the total number of open long running requests. requestInfo may be nil if the caller is not in the normal request flow. 508 func RecordLongRunning(req *http.Request, requestInfo *request.RequestInfo, component string, fn func()) { 509 if requestInfo == nil { 510 requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path} 511 } 512 var g compbasemetrics.GaugeMetric 513 scope := CleanScope(requestInfo) 514 515 // We don't use verb from <requestInfo>, as this may be propagated from 516 // InstrumentRouteFunc which is registered in installer.go with predefined 517 // list of verbs (different than those translated to RequestInfo). 518 // However, we need to tweak it e.g. to differentiate GET from LIST. 519 reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo) 520 521 if requestInfo.IsResourceRequest { 522 g = longRunningRequestsGauge.WithContext(req.Context()).WithLabelValues(reportedVerb, requestInfo.APIGroup, requestInfo.APIVersion, requestInfo.Resource, requestInfo.Subresource, scope, component) 523 } else { 524 g = longRunningRequestsGauge.WithContext(req.Context()).WithLabelValues(reportedVerb, "", "", "", requestInfo.Path, scope, component) 525 } 526 g.Inc() 527 defer g.Dec() 528 fn() 529 } 530 531 // RecordWatchListLatency simply records response latency for watch list requests. 532 func RecordWatchListLatency(ctx context.Context, gvr schema.GroupVersionResource, metricsScope string) { 533 requestReceivedTimestamp, ok := request.ReceivedTimestampFrom(ctx) 534 if !ok { 535 utilruntime.HandleError(fmt.Errorf("unable to measure watchlist latency because no received ts found in the ctx, gvr: %s", gvr)) 536 return 537 } 538 elapsedSeconds := time.Since(requestReceivedTimestamp).Seconds() 539 540 watchListLatencies.WithContext(ctx).WithLabelValues(gvr.Group, gvr.Version, gvr.Resource, metricsScope).Observe(elapsedSeconds) 541 } 542 543 // MonitorRequest handles standard transformations for client and the reported verb and then invokes Monitor to record 544 // a request. verb must be uppercase to be backwards compatible with existing monitoring tooling. 545 func MonitorRequest(req *http.Request, verb, group, version, resource, subresource, scope, component string, deprecated bool, removedRelease string, httpCode, respSize int, elapsed time.Duration) { 546 requestInfo, ok := request.RequestInfoFrom(req.Context()) 547 if !ok || requestInfo == nil { 548 requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path} 549 } 550 // We don't use verb from <requestInfo>, as this may be propagated from 551 // InstrumentRouteFunc which is registered in installer.go with predefined 552 // list of verbs (different than those translated to RequestInfo). 553 // However, we need to tweak it e.g. to differentiate GET from LIST. 554 reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), verb, req, requestInfo) 555 556 dryRun := cleanDryRun(req.URL) 557 elapsedSeconds := elapsed.Seconds() 558 requestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component, codeToString(httpCode)).Inc() 559 // MonitorRequest happens after authentication, so we can trust the username given by the request 560 info, ok := request.UserFrom(req.Context()) 561 if ok && info.GetName() == user.APIServerUser { 562 apiSelfRequestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, resource, subresource).Inc() 563 } 564 if deprecated { 565 deprecatedRequestGauge.WithContext(req.Context()).WithLabelValues(group, version, resource, subresource, removedRelease).Set(1) 566 audit.AddAuditAnnotation(req.Context(), deprecatedAnnotationKey, "true") 567 if len(removedRelease) > 0 { 568 audit.AddAuditAnnotation(req.Context(), removedReleaseAnnotationKey, removedRelease) 569 } 570 } 571 requestLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component).Observe(elapsedSeconds) 572 fieldValidation := cleanFieldValidation(req.URL) 573 fieldValidationRequestLatencies.WithContext(req.Context()).WithLabelValues(fieldValidation) 574 575 if wd, ok := request.LatencyTrackersFrom(req.Context()); ok { 576 sliLatency := elapsedSeconds - (wd.MutatingWebhookTracker.GetLatency() + wd.ValidatingWebhookTracker.GetLatency() + wd.APFQueueWaitTracker.GetLatency()).Seconds() 577 requestSloLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency) 578 requestSliLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency) 579 } 580 // We are only interested in response sizes of read requests. 581 if verb == "GET" || verb == "LIST" { 582 responseSizes.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize)) 583 } 584 } 585 586 // InstrumentRouteFunc works like Prometheus' InstrumentHandlerFunc but wraps 587 // the go-restful RouteFunction instead of a HandlerFunc plus some Kubernetes endpoint specific information. 588 func InstrumentRouteFunc(verb, group, version, resource, subresource, scope, component string, deprecated bool, removedRelease string, routeFunc restful.RouteFunction) restful.RouteFunction { 589 return restful.RouteFunction(func(req *restful.Request, response *restful.Response) { 590 requestReceivedTimestamp, ok := request.ReceivedTimestampFrom(req.Request.Context()) 591 if !ok { 592 requestReceivedTimestamp = time.Now() 593 } 594 595 delegate := &ResponseWriterDelegator{ResponseWriter: response.ResponseWriter} 596 597 rw := responsewriter.WrapForHTTP1Or2(delegate) 598 response.ResponseWriter = rw 599 600 routeFunc(req, response) 601 602 MonitorRequest(req.Request, verb, group, version, resource, subresource, scope, component, deprecated, removedRelease, delegate.Status(), delegate.ContentLength(), time.Since(requestReceivedTimestamp)) 603 }) 604 } 605 606 // InstrumentHandlerFunc works like Prometheus' InstrumentHandlerFunc but adds some Kubernetes endpoint specific information. 607 func InstrumentHandlerFunc(verb, group, version, resource, subresource, scope, component string, deprecated bool, removedRelease string, handler http.HandlerFunc) http.HandlerFunc { 608 return func(w http.ResponseWriter, req *http.Request) { 609 requestReceivedTimestamp, ok := request.ReceivedTimestampFrom(req.Context()) 610 if !ok { 611 requestReceivedTimestamp = time.Now() 612 } 613 614 delegate := &ResponseWriterDelegator{ResponseWriter: w} 615 w = responsewriter.WrapForHTTP1Or2(delegate) 616 617 handler(w, req) 618 619 MonitorRequest(req, verb, group, version, resource, subresource, scope, component, deprecated, removedRelease, delegate.Status(), delegate.ContentLength(), time.Since(requestReceivedTimestamp)) 620 } 621 } 622 623 // NormalizedVerb returns normalized verb 624 func NormalizedVerb(req *http.Request) string { 625 verb := req.Method 626 requestInfo, ok := request.RequestInfoFrom(req.Context()) 627 if ok { 628 // If we can find a requestInfo, we can get a scope, and then 629 // we can convert GETs to LISTs when needed. 630 scope := CleanScope(requestInfo) 631 verb = CanonicalVerb(strings.ToUpper(verb), scope) 632 } 633 634 // mark APPLY requests, WATCH requests and CONNECT requests correctly. 635 return CleanVerb(verb, req, requestInfo) 636 } 637 638 // CleanScope returns the scope of the request. 639 func CleanScope(requestInfo *request.RequestInfo) string { 640 if requestInfo.Name != "" || requestInfo.Verb == "create" { 641 return "resource" 642 } 643 if requestInfo.Namespace != "" { 644 return "namespace" 645 } 646 if requestInfo.IsResourceRequest { 647 return "cluster" 648 } 649 // this is the empty scope 650 return "" 651 } 652 653 // CleanListScope computes the request scope for metrics. 654 // 655 // Note that normally we would use CleanScope for computation. 656 // But due to the same reasons mentioned in determineRequestNamespaceAndName we cannot. 657 func CleanListScope(ctx context.Context, opts *metainternalversion.ListOptions) string { 658 namespace, name := determineRequestNamespaceAndName(ctx, opts) 659 if len(name) > 0 { 660 return "resource" 661 } 662 if len(namespace) > 0 { 663 return "namespace" 664 } 665 if requestInfo, ok := request.RequestInfoFrom(ctx); ok { 666 if requestInfo.IsResourceRequest { 667 return "cluster" 668 } 669 } 670 return "" 671 } 672 673 // CanonicalVerb distinguishes LISTs from GETs (and HEADs). It assumes verb is 674 // UPPERCASE. 675 func CanonicalVerb(verb string, scope string) string { 676 switch verb { 677 case "GET", "HEAD": 678 if scope != "resource" && scope != "" { 679 return "LIST" 680 } 681 return "GET" 682 default: 683 return verb 684 } 685 } 686 687 // CleanVerb returns a normalized verb, so that it is easy to tell WATCH from 688 // LIST, APPLY from PATCH and CONNECT from others. 689 func CleanVerb(verb string, request *http.Request, requestInfo *request.RequestInfo) string { 690 reportedVerb := verb 691 if suggestedVerb := getVerbIfWatch(request); suggestedVerb == "WATCH" { 692 reportedVerb = "WATCH" 693 } 694 // normalize the legacy WATCHLIST to WATCH to ensure users aren't surprised by metrics 695 if verb == "WATCHLIST" { 696 reportedVerb = "WATCH" 697 } 698 if verb == "PATCH" && request.Header.Get("Content-Type") == string(types.ApplyPatchType) { 699 reportedVerb = "APPLY" 700 } 701 if requestInfo != nil && requestInfo.IsResourceRequest && len(requestInfo.Subresource) > 0 && validConnectRequests.Has(requestInfo.Subresource) { 702 reportedVerb = "CONNECT" 703 } 704 return reportedVerb 705 } 706 707 // determineRequestNamespaceAndName computes name and namespace for the given requests 708 // 709 // note that the logic of this function was copy&pasted from cacher.go 710 // after an unsuccessful attempt of moving it to RequestInfo 711 // 712 // see: https://github.com/kubernetes/kubernetes/pull/120520 713 func determineRequestNamespaceAndName(ctx context.Context, opts *metainternalversion.ListOptions) (namespace, name string) { 714 if requestNamespace, ok := request.NamespaceFrom(ctx); ok && len(requestNamespace) > 0 { 715 namespace = requestNamespace 716 } else if opts != nil && opts.FieldSelector != nil { 717 if selectorNamespace, ok := opts.FieldSelector.RequiresExactMatch("metadata.namespace"); ok { 718 namespace = selectorNamespace 719 } 720 } 721 if requestInfo, ok := request.RequestInfoFrom(ctx); ok && requestInfo != nil && len(requestInfo.Name) > 0 { 722 name = requestInfo.Name 723 } else if opts != nil && opts.FieldSelector != nil { 724 if selectorName, ok := opts.FieldSelector.RequiresExactMatch("metadata.name"); ok { 725 name = selectorName 726 } 727 } 728 return 729 } 730 731 // cleanVerb additionally ensures that unknown verbs don't clog up the metrics. 732 func cleanVerb(verb, suggestedVerb string, request *http.Request, requestInfo *request.RequestInfo) string { 733 // CanonicalVerb (being an input for this function) doesn't handle correctly the 734 // deprecated path pattern for watch of: 735 // GET /api/{version}/watch/{resource} 736 // We correct it manually based on the pass verb from the installer. 737 if suggestedVerb == "WATCH" || suggestedVerb == "WATCHLIST" { 738 return "WATCH" 739 } 740 reportedVerb := CleanVerb(verb, request, requestInfo) 741 if validRequestMethods.Has(reportedVerb) { 742 return reportedVerb 743 } 744 return OtherRequestMethod 745 } 746 747 // getVerbIfWatch additionally ensures that GET or List would be transformed to WATCH 748 func getVerbIfWatch(req *http.Request) string { 749 if strings.ToUpper(req.Method) == "GET" || strings.ToUpper(req.Method) == "LIST" { 750 // see apimachinery/pkg/runtime/conversion.go Convert_Slice_string_To_bool 751 if values := req.URL.Query()["watch"]; len(values) > 0 { 752 if value := strings.ToLower(values[0]); value != "0" && value != "false" { 753 return "WATCH" 754 } 755 } 756 } 757 return "" 758 } 759 760 func cleanDryRun(u *url.URL) string { 761 // avoid allocating when we don't see dryRun in the query 762 if !strings.Contains(u.RawQuery, "dryRun") { 763 return "" 764 } 765 dryRun := u.Query()["dryRun"] 766 if errs := validation.ValidateDryRun(nil, dryRun); len(errs) > 0 { 767 return "invalid" 768 } 769 // Since dryRun could be valid with any arbitrarily long length 770 // we have to dedup and sort the elements before joining them together 771 // TODO: this is a fairly large allocation for what it does, consider 772 // a sort and dedup in a single pass 773 return strings.Join(utilsets.NewString(dryRun...).List(), ",") 774 } 775 776 func cleanFieldValidation(u *url.URL) string { 777 // avoid allocating when we don't see dryRun in the query 778 if !strings.Contains(u.RawQuery, "fieldValidation") { 779 return "" 780 } 781 fieldValidation := u.Query()["fieldValidation"] 782 if len(fieldValidation) != 1 { 783 return "invalid" 784 } 785 if errs := validation.ValidateFieldValidation(nil, fieldValidation[0]); len(errs) > 0 { 786 return "invalid" 787 } 788 return fieldValidation[0] 789 } 790 791 var _ http.ResponseWriter = (*ResponseWriterDelegator)(nil) 792 var _ responsewriter.UserProvidedDecorator = (*ResponseWriterDelegator)(nil) 793 794 // ResponseWriterDelegator interface wraps http.ResponseWriter to additionally record content-length, status-code, etc. 795 type ResponseWriterDelegator struct { 796 http.ResponseWriter 797 798 status int 799 written int64 800 wroteHeader bool 801 } 802 803 func (r *ResponseWriterDelegator) Unwrap() http.ResponseWriter { 804 return r.ResponseWriter 805 } 806 807 func (r *ResponseWriterDelegator) WriteHeader(code int) { 808 r.status = code 809 r.wroteHeader = true 810 r.ResponseWriter.WriteHeader(code) 811 } 812 813 func (r *ResponseWriterDelegator) Write(b []byte) (int, error) { 814 if !r.wroteHeader { 815 r.WriteHeader(http.StatusOK) 816 } 817 n, err := r.ResponseWriter.Write(b) 818 r.written += int64(n) 819 return n, err 820 } 821 822 func (r *ResponseWriterDelegator) Status() int { 823 return r.status 824 } 825 826 func (r *ResponseWriterDelegator) ContentLength() int { 827 return int(r.written) 828 } 829 830 // Small optimization over Itoa 831 func codeToString(s int) string { 832 switch s { 833 case 100: 834 return "100" 835 case 101: 836 return "101" 837 838 case 200: 839 return "200" 840 case 201: 841 return "201" 842 case 202: 843 return "202" 844 case 203: 845 return "203" 846 case 204: 847 return "204" 848 case 205: 849 return "205" 850 case 206: 851 return "206" 852 853 case 300: 854 return "300" 855 case 301: 856 return "301" 857 case 302: 858 return "302" 859 case 304: 860 return "304" 861 case 305: 862 return "305" 863 case 307: 864 return "307" 865 866 case 400: 867 return "400" 868 case 401: 869 return "401" 870 case 402: 871 return "402" 872 case 403: 873 return "403" 874 case 404: 875 return "404" 876 case 405: 877 return "405" 878 case 406: 879 return "406" 880 case 407: 881 return "407" 882 case 408: 883 return "408" 884 case 409: 885 return "409" 886 case 410: 887 return "410" 888 case 411: 889 return "411" 890 case 412: 891 return "412" 892 case 413: 893 return "413" 894 case 414: 895 return "414" 896 case 415: 897 return "415" 898 case 416: 899 return "416" 900 case 417: 901 return "417" 902 case 418: 903 return "418" 904 905 case 500: 906 return "500" 907 case 501: 908 return "501" 909 case 502: 910 return "502" 911 case 503: 912 return "503" 913 case 504: 914 return "504" 915 case 505: 916 return "505" 917 918 case 428: 919 return "428" 920 case 429: 921 return "429" 922 case 431: 923 return "431" 924 case 511: 925 return "511" 926 927 default: 928 return strconv.Itoa(s) 929 } 930 }