k8s.io/apiserver@v0.31.1/pkg/endpoints/metrics/metrics.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net/http"
    23  	"net/url"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	restful "github.com/emicklei/go-restful/v3"
    30  
    31  	metainternalversion "k8s.io/apimachinery/pkg/apis/meta/internalversion"
    32  	"k8s.io/apimachinery/pkg/apis/meta/v1/validation"
    33  	"k8s.io/apimachinery/pkg/runtime/schema"
    34  	"k8s.io/apimachinery/pkg/types"
    35  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    36  	utilsets "k8s.io/apimachinery/pkg/util/sets"
    37  	"k8s.io/apiserver/pkg/audit"
    38  	"k8s.io/apiserver/pkg/authentication/user"
    39  	"k8s.io/apiserver/pkg/endpoints/request"
    40  	"k8s.io/apiserver/pkg/endpoints/responsewriter"
    41  	compbasemetrics "k8s.io/component-base/metrics"
    42  	"k8s.io/component-base/metrics/legacyregistry"
    43  )
    44  
    45  // resettableCollector is the interface implemented by prometheus.MetricVec
    46  // that can be used by Prometheus to collect metrics and reset their values.
    47  type resettableCollector interface {
    48  	compbasemetrics.Registerable
    49  	Reset()
    50  }
    51  
    52  const (
    53  	APIServerComponent string = "apiserver"
    54  	OtherRequestMethod string = "other"
    55  )
    56  
    57  /*
    58   * By default, all the following metrics are defined as falling under
    59   * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
    60   *
    61   * Promoting the stability level of the metric is a responsibility of the component owner, since it
    62   * involves explicitly acknowledging support for the metric across multiple releases, in accordance with
    63   * the metric stability policy.
    64   */
    65  var (
    66  	deprecatedRequestGauge = compbasemetrics.NewGaugeVec(
    67  		&compbasemetrics.GaugeOpts{
    68  			Subsystem:      APIServerComponent,
    69  			Name:           "requested_deprecated_apis",
    70  			Help:           "Gauge of deprecated APIs that have been requested, broken out by API group, version, resource, subresource, and removed_release.",
    71  			StabilityLevel: compbasemetrics.STABLE,
    72  		},
    73  		[]string{"group", "version", "resource", "subresource", "removed_release"},
    74  	)
    75  
    76  	// TODO(a-robinson): Add unit tests for the handling of these metrics once
    77  	// the upstream library supports it.
    78  	requestCounter = compbasemetrics.NewCounterVec(
    79  		&compbasemetrics.CounterOpts{
    80  			Subsystem:      APIServerComponent,
    81  			Name:           "request_total",
    82  			Help:           "Counter of apiserver requests broken out for each verb, dry run value, group, version, resource, scope, component, and HTTP response code.",
    83  			StabilityLevel: compbasemetrics.STABLE,
    84  		},
    85  		[]string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component", "code"},
    86  	)
    87  	longRunningRequestsGauge = compbasemetrics.NewGaugeVec(
    88  		&compbasemetrics.GaugeOpts{
    89  			Subsystem:      APIServerComponent,
    90  			Name:           "longrunning_requests",
    91  			Help:           "Gauge of all active long-running apiserver requests broken out by verb, group, version, resource, scope and component. Not all requests are tracked this way.",
    92  			StabilityLevel: compbasemetrics.STABLE,
    93  		},
    94  		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
    95  	)
    96  	requestLatencies = compbasemetrics.NewHistogramVec(
    97  		&compbasemetrics.HistogramOpts{
    98  			Subsystem: APIServerComponent,
    99  			Name:      "request_duration_seconds",
   100  			Help:      "Response latency distribution in seconds for each verb, dry run value, group, version, resource, subresource, scope and component.",
   101  			// This metric is used for verifying api call latencies SLO,
   102  			// as well as tracking regressions in this aspects.
   103  			// Thus we customize buckets significantly, to empower both usecases.
   104  			Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
   105  				4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
   106  			StabilityLevel: compbasemetrics.STABLE,
   107  		},
   108  		[]string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component"},
   109  	)
   110  	requestSloLatencies = compbasemetrics.NewHistogramVec(
   111  		&compbasemetrics.HistogramOpts{
   112  			Subsystem: APIServerComponent,
   113  			Name:      "request_slo_duration_seconds",
   114  			Help:      "Response latency distribution (not counting webhook duration and priority & fairness queue wait times) in seconds for each verb, group, version, resource, subresource, scope and component.",
   115  			// This metric is supplementary to the requestLatencies metric.
   116  			// It measures request duration excluding webhooks as they are mostly
   117  			// dependant on user configuration.
   118  			Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
   119  				4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
   120  			StabilityLevel:    compbasemetrics.ALPHA,
   121  			DeprecatedVersion: "1.27.0",
   122  		},
   123  		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
   124  	)
   125  	requestSliLatencies = compbasemetrics.NewHistogramVec(
   126  		&compbasemetrics.HistogramOpts{
   127  			Subsystem: APIServerComponent,
   128  			Name:      "request_sli_duration_seconds",
   129  			Help:      "Response latency distribution (not counting webhook duration and priority & fairness queue wait times) in seconds for each verb, group, version, resource, subresource, scope and component.",
   130  			// This metric is supplementary to the requestLatencies metric.
   131  			// It measures request duration excluding webhooks as they are mostly
   132  			// dependant on user configuration.
   133  			Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
   134  				4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
   135  			StabilityLevel: compbasemetrics.ALPHA,
   136  		},
   137  		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
   138  	)
   139  	fieldValidationRequestLatencies = compbasemetrics.NewHistogramVec(
   140  		&compbasemetrics.HistogramOpts{
   141  			Name: "field_validation_request_duration_seconds",
   142  			Help: "Response latency distribution in seconds for each field validation value",
   143  			// This metric is supplementary to the requestLatencies metric.
   144  			// It measures request durations for the various field validation
   145  			// values.
   146  			Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
   147  				4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
   148  			StabilityLevel: compbasemetrics.ALPHA,
   149  		},
   150  		[]string{"field_validation"},
   151  	)
   152  	responseSizes = compbasemetrics.NewHistogramVec(
   153  		&compbasemetrics.HistogramOpts{
   154  			Subsystem: APIServerComponent,
   155  			Name:      "response_sizes",
   156  			Help:      "Response size distribution in bytes for each group, version, verb, resource, subresource, scope and component.",
   157  			// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
   158  			Buckets:        compbasemetrics.ExponentialBuckets(1000, 10.0, 7),
   159  			StabilityLevel: compbasemetrics.STABLE,
   160  		},
   161  		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
   162  	)
   163  	// TLSHandshakeErrors is a number of requests dropped with 'TLS handshake error from' error
   164  	TLSHandshakeErrors = compbasemetrics.NewCounter(
   165  		&compbasemetrics.CounterOpts{
   166  			Subsystem:      APIServerComponent,
   167  			Name:           "tls_handshake_errors_total",
   168  			Help:           "Number of requests dropped with 'TLS handshake error from' error",
   169  			StabilityLevel: compbasemetrics.ALPHA,
   170  		},
   171  	)
   172  	WatchEvents = compbasemetrics.NewCounterVec(
   173  		&compbasemetrics.CounterOpts{
   174  			Subsystem:      APIServerComponent,
   175  			Name:           "watch_events_total",
   176  			Help:           "Number of events sent in watch clients",
   177  			StabilityLevel: compbasemetrics.ALPHA,
   178  		},
   179  		[]string{"group", "version", "kind"},
   180  	)
   181  	WatchEventsSizes = compbasemetrics.NewHistogramVec(
   182  		&compbasemetrics.HistogramOpts{
   183  			Subsystem:      APIServerComponent,
   184  			Name:           "watch_events_sizes",
   185  			Help:           "Watch event size distribution in bytes",
   186  			Buckets:        compbasemetrics.ExponentialBuckets(1024, 2.0, 8), // 1K, 2K, 4K, 8K, ..., 128K.
   187  			StabilityLevel: compbasemetrics.ALPHA,
   188  		},
   189  		[]string{"group", "version", "kind"},
   190  	)
   191  	// Because of volatility of the base metric this is pre-aggregated one. Instead of reporting current usage all the time
   192  	// it reports maximal usage during the last second.
   193  	currentInflightRequests = compbasemetrics.NewGaugeVec(
   194  		&compbasemetrics.GaugeOpts{
   195  			Subsystem:      APIServerComponent,
   196  			Name:           "current_inflight_requests",
   197  			Help:           "Maximal number of currently used inflight request limit of this apiserver per request kind in last second.",
   198  			StabilityLevel: compbasemetrics.STABLE,
   199  		},
   200  		[]string{"request_kind"},
   201  	)
   202  	currentInqueueRequests = compbasemetrics.NewGaugeVec(
   203  		&compbasemetrics.GaugeOpts{
   204  			Subsystem:      APIServerComponent,
   205  			Name:           "current_inqueue_requests",
   206  			Help:           "Maximal number of queued requests in this apiserver per request kind in last second.",
   207  			StabilityLevel: compbasemetrics.ALPHA,
   208  		},
   209  		[]string{"request_kind"},
   210  	)
   211  
   212  	requestTerminationsTotal = compbasemetrics.NewCounterVec(
   213  		&compbasemetrics.CounterOpts{
   214  			Subsystem:      APIServerComponent,
   215  			Name:           "request_terminations_total",
   216  			Help:           "Number of requests which apiserver terminated in self-defense.",
   217  			StabilityLevel: compbasemetrics.ALPHA,
   218  		},
   219  		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "code"},
   220  	)
   221  
   222  	apiSelfRequestCounter = compbasemetrics.NewCounterVec(
   223  		&compbasemetrics.CounterOpts{
   224  			Subsystem:      APIServerComponent,
   225  			Name:           "selfrequest_total",
   226  			Help:           "Counter of apiserver self-requests broken out for each verb, API resource and subresource.",
   227  			StabilityLevel: compbasemetrics.ALPHA,
   228  		},
   229  		[]string{"verb", "resource", "subresource"},
   230  	)
   231  
   232  	requestFilterDuration = compbasemetrics.NewHistogramVec(
   233  		&compbasemetrics.HistogramOpts{
   234  			Subsystem:      APIServerComponent,
   235  			Name:           "request_filter_duration_seconds",
   236  			Help:           "Request filter latency distribution in seconds, for each filter type",
   237  			Buckets:        []float64{0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 5.0, 10.0, 15.0, 30.0},
   238  			StabilityLevel: compbasemetrics.ALPHA,
   239  		},
   240  		[]string{"filter"},
   241  	)
   242  
   243  	// requestAbortsTotal is a number of aborted requests with http.ErrAbortHandler
   244  	requestAbortsTotal = compbasemetrics.NewCounterVec(
   245  		&compbasemetrics.CounterOpts{
   246  			Subsystem:      APIServerComponent,
   247  			Name:           "request_aborts_total",
   248  			Help:           "Number of requests which apiserver aborted possibly due to a timeout, for each group, version, verb, resource, subresource and scope",
   249  			StabilityLevel: compbasemetrics.ALPHA,
   250  		},
   251  		[]string{"verb", "group", "version", "resource", "subresource", "scope"},
   252  	)
   253  
   254  	// requestPostTimeoutTotal tracks the activity of the executing request handler after the associated request
   255  	// has been timed out by the apiserver.
   256  	// source: the name of the handler that is recording this metric. Currently, we have two:
   257  	//  - timeout-handler: the "executing" handler returns after the timeout filter times out the request.
   258  	//  - rest-handler: the "executing" handler returns after the rest layer times out the request.
   259  	// status: whether the handler panicked or threw an error, possible values:
   260  	//  - 'panic': the handler panicked
   261  	//  - 'error': the handler return an error
   262  	//  - 'ok': the handler returned a result (no error and no panic)
   263  	//  - 'pending': the handler is still running in the background and it did not return
   264  	//    within the wait threshold.
   265  	requestPostTimeoutTotal = compbasemetrics.NewCounterVec(
   266  		&compbasemetrics.CounterOpts{
   267  			Subsystem:      APIServerComponent,
   268  			Name:           "request_post_timeout_total",
   269  			Help:           "Tracks the activity of the request handlers after the associated requests have been timed out by the apiserver",
   270  			StabilityLevel: compbasemetrics.ALPHA,
   271  		},
   272  		[]string{"source", "status"},
   273  	)
   274  
   275  	requestTimestampComparisonDuration = compbasemetrics.NewHistogramVec(
   276  		&compbasemetrics.HistogramOpts{
   277  			Subsystem:      APIServerComponent,
   278  			Name:           "request_timestamp_comparison_time",
   279  			Help:           "Time taken for comparison of old vs new objects in UPDATE or PATCH requests",
   280  			Buckets:        []float64{0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 5.0},
   281  			StabilityLevel: compbasemetrics.ALPHA,
   282  		},
   283  		// Path the code takes to reach a conclusion:
   284  		// i.e. unequalObjectsFast, unequalObjectsSlow, equalObjectsSlow
   285  		[]string{"code_path"},
   286  	)
   287  
   288  	watchListLatencies = compbasemetrics.NewHistogramVec(
   289  		&compbasemetrics.HistogramOpts{
   290  			Subsystem:      APIServerComponent,
   291  			Name:           "watch_list_duration_seconds",
   292  			Help:           "Response latency distribution in seconds for watch list requests broken by group, version, resource and scope.",
   293  			Buckets:        []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2, 4, 6, 8, 10, 15, 20, 30, 45, 60},
   294  			StabilityLevel: compbasemetrics.ALPHA,
   295  		},
   296  		[]string{"group", "version", "resource", "scope"},
   297  	)
   298  
   299  	metrics = []resettableCollector{
   300  		deprecatedRequestGauge,
   301  		requestCounter,
   302  		longRunningRequestsGauge,
   303  		requestLatencies,
   304  		requestSloLatencies,
   305  		requestSliLatencies,
   306  		fieldValidationRequestLatencies,
   307  		responseSizes,
   308  		TLSHandshakeErrors,
   309  		WatchEvents,
   310  		WatchEventsSizes,
   311  		currentInflightRequests,
   312  		currentInqueueRequests,
   313  		requestTerminationsTotal,
   314  		apiSelfRequestCounter,
   315  		requestFilterDuration,
   316  		requestAbortsTotal,
   317  		requestPostTimeoutTotal,
   318  		requestTimestampComparisonDuration,
   319  		watchListLatencies,
   320  	}
   321  
   322  	// these are the valid request methods which we report in our metrics. Any other request methods
   323  	// will be aggregated under 'unknown'
   324  	validRequestMethods = utilsets.NewString(
   325  		"APPLY",
   326  		"CONNECT",
   327  		"CREATE",
   328  		"DELETE",
   329  		"DELETECOLLECTION",
   330  		"GET",
   331  		"LIST",
   332  		"PATCH",
   333  		"POST",
   334  		"PROXY",
   335  		"PUT",
   336  		"UPDATE",
   337  		"WATCH",
   338  		"WATCHLIST")
   339  
   340  	// These are the valid connect requests which we report in our metrics.
   341  	validConnectRequests = utilsets.NewString(
   342  		"log",
   343  		"exec",
   344  		"portforward",
   345  		"attach",
   346  		"proxy")
   347  )
   348  
   349  const (
   350  	// ReadOnlyKind is a string identifying read only request kind
   351  	ReadOnlyKind = "readOnly"
   352  	// MutatingKind is a string identifying mutating request kind
   353  	MutatingKind = "mutating"
   354  
   355  	// WaitingPhase is the phase value for a request waiting in a queue
   356  	WaitingPhase = "waiting"
   357  	// ExecutingPhase is the phase value for an executing request
   358  	ExecutingPhase = "executing"
   359  )
   360  
   361  const (
   362  	// deprecatedAnnotationKey is a key for an audit annotation set to
   363  	// "true" on requests made to deprecated API versions
   364  	deprecatedAnnotationKey = "k8s.io/deprecated"
   365  	// removedReleaseAnnotationKey is a key for an audit annotation set to
   366  	// the target removal release, in "<major>.<minor>" format,
   367  	// on requests made to deprecated API versions with a target removal release
   368  	removedReleaseAnnotationKey = "k8s.io/removed-release"
   369  )
   370  
   371  const (
   372  	// The source that is recording the apiserver_request_post_timeout_total metric.
   373  	// The "executing" request handler returns after the timeout filter times out the request.
   374  	PostTimeoutSourceTimeoutHandler = "timeout-handler"
   375  
   376  	// The source that is recording the apiserver_request_post_timeout_total metric.
   377  	// The "executing" request handler returns after the rest layer times out the request.
   378  	PostTimeoutSourceRestHandler = "rest-handler"
   379  )
   380  
   381  const (
   382  	// The executing request handler panicked after the request had
   383  	// been timed out by the apiserver.
   384  	PostTimeoutHandlerPanic = "panic"
   385  
   386  	// The executing request handler has returned an error to the post-timeout
   387  	// receiver after the request had been timed out by the apiserver.
   388  	PostTimeoutHandlerError = "error"
   389  
   390  	// The executing request handler has returned a result to the post-timeout
   391  	// receiver after the request had been timed out by the apiserver.
   392  	PostTimeoutHandlerOK = "ok"
   393  
   394  	// The executing request handler has not panicked or returned any error/result to
   395  	// the post-timeout receiver yet after the request had been timed out by the apiserver.
   396  	// The post-timeout receiver gives up after waiting for certain threshold and if the
   397  	// executing request handler has not returned yet we use the following label.
   398  	PostTimeoutHandlerPending = "pending"
   399  )
   400  
   401  var registerMetrics sync.Once
   402  
   403  // Register all metrics.
   404  func Register() {
   405  	registerMetrics.Do(func() {
   406  		for _, metric := range metrics {
   407  			legacyregistry.MustRegister(metric)
   408  		}
   409  	})
   410  }
   411  
   412  // Reset all metrics.
   413  func Reset() {
   414  	for _, metric := range metrics {
   415  		metric.Reset()
   416  	}
   417  }
   418  
   419  // UpdateInflightRequestMetrics reports concurrency metrics classified by
   420  // mutating vs Readonly.
   421  func UpdateInflightRequestMetrics(phase string, nonmutating, mutating int) {
   422  	for _, kc := range []struct {
   423  		kind  string
   424  		count int
   425  	}{{ReadOnlyKind, nonmutating}, {MutatingKind, mutating}} {
   426  		if phase == ExecutingPhase {
   427  			currentInflightRequests.WithLabelValues(kc.kind).Set(float64(kc.count))
   428  		} else {
   429  			currentInqueueRequests.WithLabelValues(kc.kind).Set(float64(kc.count))
   430  		}
   431  	}
   432  }
   433  
   434  func RecordFilterLatency(ctx context.Context, name string, elapsed time.Duration) {
   435  	requestFilterDuration.WithContext(ctx).WithLabelValues(name).Observe(elapsed.Seconds())
   436  }
   437  
   438  func RecordTimestampComparisonLatency(codePath string, elapsed time.Duration) {
   439  	requestTimestampComparisonDuration.WithLabelValues(codePath).Observe(elapsed.Seconds())
   440  }
   441  
   442  func RecordRequestPostTimeout(source string, status string) {
   443  	requestPostTimeoutTotal.WithLabelValues(source, status).Inc()
   444  }
   445  
   446  // RecordRequestAbort records that the request was aborted possibly due to a timeout.
   447  func RecordRequestAbort(req *http.Request, requestInfo *request.RequestInfo) {
   448  	if requestInfo == nil {
   449  		requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path}
   450  	}
   451  
   452  	scope := CleanScope(requestInfo)
   453  	reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo)
   454  	resource := requestInfo.Resource
   455  	subresource := requestInfo.Subresource
   456  	group := requestInfo.APIGroup
   457  	version := requestInfo.APIVersion
   458  
   459  	requestAbortsTotal.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope).Inc()
   460  }
   461  
   462  // RecordDroppedRequest records that the request was rejected via http.TooManyRequests.
   463  func RecordDroppedRequest(req *http.Request, requestInfo *request.RequestInfo, component string, isMutatingRequest bool) {
   464  	if requestInfo == nil {
   465  		requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path}
   466  	}
   467  	scope := CleanScope(requestInfo)
   468  	dryRun := cleanDryRun(req.URL)
   469  
   470  	// We don't use verb from <requestInfo>, as this may be propagated from
   471  	// InstrumentRouteFunc which is registered in installer.go with predefined
   472  	// list of verbs (different than those translated to RequestInfo).
   473  	// However, we need to tweak it e.g. to differentiate GET from LIST.
   474  	reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo)
   475  
   476  	if requestInfo.IsResourceRequest {
   477  		requestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, requestInfo.APIGroup, requestInfo.APIVersion, requestInfo.Resource, requestInfo.Subresource, scope, component, codeToString(http.StatusTooManyRequests)).Inc()
   478  	} else {
   479  		requestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, "", "", "", requestInfo.Subresource, scope, component, codeToString(http.StatusTooManyRequests)).Inc()
   480  	}
   481  }
   482  
   483  // RecordRequestTermination records that the request was terminated early as part of a resource
   484  // preservation or apiserver self-defense mechanism (e.g. timeouts, maxinflight throttling,
   485  // proxyHandler errors). RecordRequestTermination should only be called zero or one times
   486  // per request.
   487  func RecordRequestTermination(req *http.Request, requestInfo *request.RequestInfo, component string, code int) {
   488  	if requestInfo == nil {
   489  		requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path}
   490  	}
   491  	scope := CleanScope(requestInfo)
   492  
   493  	// We don't use verb from <requestInfo>, as this may be propagated from
   494  	// InstrumentRouteFunc which is registered in installer.go with predefined
   495  	// list of verbs (different than those translated to RequestInfo).
   496  	// However, we need to tweak it e.g. to differentiate GET from LIST.
   497  	reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo)
   498  
   499  	if requestInfo.IsResourceRequest {
   500  		requestTerminationsTotal.WithContext(req.Context()).WithLabelValues(reportedVerb, requestInfo.APIGroup, requestInfo.APIVersion, requestInfo.Resource, requestInfo.Subresource, scope, component, codeToString(code)).Inc()
   501  	} else {
   502  		requestTerminationsTotal.WithContext(req.Context()).WithLabelValues(reportedVerb, "", "", "", requestInfo.Path, scope, component, codeToString(code)).Inc()
   503  	}
   504  }
   505  
   506  // RecordLongRunning tracks the execution of a long running request against the API server. It provides an accurate count
   507  // of the total number of open long running requests. requestInfo may be nil if the caller is not in the normal request flow.
   508  func RecordLongRunning(req *http.Request, requestInfo *request.RequestInfo, component string, fn func()) {
   509  	if requestInfo == nil {
   510  		requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path}
   511  	}
   512  	var g compbasemetrics.GaugeMetric
   513  	scope := CleanScope(requestInfo)
   514  
   515  	// We don't use verb from <requestInfo>, as this may be propagated from
   516  	// InstrumentRouteFunc which is registered in installer.go with predefined
   517  	// list of verbs (different than those translated to RequestInfo).
   518  	// However, we need to tweak it e.g. to differentiate GET from LIST.
   519  	reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), "", req, requestInfo)
   520  
   521  	if requestInfo.IsResourceRequest {
   522  		g = longRunningRequestsGauge.WithContext(req.Context()).WithLabelValues(reportedVerb, requestInfo.APIGroup, requestInfo.APIVersion, requestInfo.Resource, requestInfo.Subresource, scope, component)
   523  	} else {
   524  		g = longRunningRequestsGauge.WithContext(req.Context()).WithLabelValues(reportedVerb, "", "", "", requestInfo.Path, scope, component)
   525  	}
   526  	g.Inc()
   527  	defer g.Dec()
   528  	fn()
   529  }
   530  
   531  // RecordWatchListLatency simply records response latency for watch list requests.
   532  func RecordWatchListLatency(ctx context.Context, gvr schema.GroupVersionResource, metricsScope string) {
   533  	requestReceivedTimestamp, ok := request.ReceivedTimestampFrom(ctx)
   534  	if !ok {
   535  		utilruntime.HandleError(fmt.Errorf("unable to measure watchlist latency because no received ts found in the ctx, gvr: %s", gvr))
   536  		return
   537  	}
   538  	elapsedSeconds := time.Since(requestReceivedTimestamp).Seconds()
   539  
   540  	watchListLatencies.WithContext(ctx).WithLabelValues(gvr.Group, gvr.Version, gvr.Resource, metricsScope).Observe(elapsedSeconds)
   541  }
   542  
   543  // MonitorRequest handles standard transformations for client and the reported verb and then invokes Monitor to record
   544  // a request. verb must be uppercase to be backwards compatible with existing monitoring tooling.
   545  func MonitorRequest(req *http.Request, verb, group, version, resource, subresource, scope, component string, deprecated bool, removedRelease string, httpCode, respSize int, elapsed time.Duration) {
   546  	requestInfo, ok := request.RequestInfoFrom(req.Context())
   547  	if !ok || requestInfo == nil {
   548  		requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path}
   549  	}
   550  	// We don't use verb from <requestInfo>, as this may be propagated from
   551  	// InstrumentRouteFunc which is registered in installer.go with predefined
   552  	// list of verbs (different than those translated to RequestInfo).
   553  	// However, we need to tweak it e.g. to differentiate GET from LIST.
   554  	reportedVerb := cleanVerb(CanonicalVerb(strings.ToUpper(req.Method), scope), verb, req, requestInfo)
   555  
   556  	dryRun := cleanDryRun(req.URL)
   557  	elapsedSeconds := elapsed.Seconds()
   558  	requestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component, codeToString(httpCode)).Inc()
   559  	// MonitorRequest happens after authentication, so we can trust the username given by the request
   560  	info, ok := request.UserFrom(req.Context())
   561  	if ok && info.GetName() == user.APIServerUser {
   562  		apiSelfRequestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, resource, subresource).Inc()
   563  	}
   564  	if deprecated {
   565  		deprecatedRequestGauge.WithContext(req.Context()).WithLabelValues(group, version, resource, subresource, removedRelease).Set(1)
   566  		audit.AddAuditAnnotation(req.Context(), deprecatedAnnotationKey, "true")
   567  		if len(removedRelease) > 0 {
   568  			audit.AddAuditAnnotation(req.Context(), removedReleaseAnnotationKey, removedRelease)
   569  		}
   570  	}
   571  	requestLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
   572  	fieldValidation := cleanFieldValidation(req.URL)
   573  	fieldValidationRequestLatencies.WithContext(req.Context()).WithLabelValues(fieldValidation)
   574  
   575  	if wd, ok := request.LatencyTrackersFrom(req.Context()); ok {
   576  		sliLatency := elapsedSeconds - (wd.MutatingWebhookTracker.GetLatency() + wd.ValidatingWebhookTracker.GetLatency() + wd.APFQueueWaitTracker.GetLatency()).Seconds()
   577  		requestSloLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency)
   578  		requestSliLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency)
   579  	}
   580  	// We are only interested in response sizes of read requests.
   581  	if verb == "GET" || verb == "LIST" {
   582  		responseSizes.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize))
   583  	}
   584  }
   585  
   586  // InstrumentRouteFunc works like Prometheus' InstrumentHandlerFunc but wraps
   587  // the go-restful RouteFunction instead of a HandlerFunc plus some Kubernetes endpoint specific information.
   588  func InstrumentRouteFunc(verb, group, version, resource, subresource, scope, component string, deprecated bool, removedRelease string, routeFunc restful.RouteFunction) restful.RouteFunction {
   589  	return restful.RouteFunction(func(req *restful.Request, response *restful.Response) {
   590  		requestReceivedTimestamp, ok := request.ReceivedTimestampFrom(req.Request.Context())
   591  		if !ok {
   592  			requestReceivedTimestamp = time.Now()
   593  		}
   594  
   595  		delegate := &ResponseWriterDelegator{ResponseWriter: response.ResponseWriter}
   596  
   597  		rw := responsewriter.WrapForHTTP1Or2(delegate)
   598  		response.ResponseWriter = rw
   599  
   600  		routeFunc(req, response)
   601  
   602  		MonitorRequest(req.Request, verb, group, version, resource, subresource, scope, component, deprecated, removedRelease, delegate.Status(), delegate.ContentLength(), time.Since(requestReceivedTimestamp))
   603  	})
   604  }
   605  
   606  // InstrumentHandlerFunc works like Prometheus' InstrumentHandlerFunc but adds some Kubernetes endpoint specific information.
   607  func InstrumentHandlerFunc(verb, group, version, resource, subresource, scope, component string, deprecated bool, removedRelease string, handler http.HandlerFunc) http.HandlerFunc {
   608  	return func(w http.ResponseWriter, req *http.Request) {
   609  		requestReceivedTimestamp, ok := request.ReceivedTimestampFrom(req.Context())
   610  		if !ok {
   611  			requestReceivedTimestamp = time.Now()
   612  		}
   613  
   614  		delegate := &ResponseWriterDelegator{ResponseWriter: w}
   615  		w = responsewriter.WrapForHTTP1Or2(delegate)
   616  
   617  		handler(w, req)
   618  
   619  		MonitorRequest(req, verb, group, version, resource, subresource, scope, component, deprecated, removedRelease, delegate.Status(), delegate.ContentLength(), time.Since(requestReceivedTimestamp))
   620  	}
   621  }
   622  
   623  // NormalizedVerb returns normalized verb
   624  func NormalizedVerb(req *http.Request) string {
   625  	verb := req.Method
   626  	requestInfo, ok := request.RequestInfoFrom(req.Context())
   627  	if ok {
   628  		// If we can find a requestInfo, we can get a scope, and then
   629  		// we can convert GETs to LISTs when needed.
   630  		scope := CleanScope(requestInfo)
   631  		verb = CanonicalVerb(strings.ToUpper(verb), scope)
   632  	}
   633  
   634  	// mark APPLY requests, WATCH requests and CONNECT requests correctly.
   635  	return CleanVerb(verb, req, requestInfo)
   636  }
   637  
   638  // CleanScope returns the scope of the request.
   639  func CleanScope(requestInfo *request.RequestInfo) string {
   640  	if requestInfo.Name != "" || requestInfo.Verb == "create" {
   641  		return "resource"
   642  	}
   643  	if requestInfo.Namespace != "" {
   644  		return "namespace"
   645  	}
   646  	if requestInfo.IsResourceRequest {
   647  		return "cluster"
   648  	}
   649  	// this is the empty scope
   650  	return ""
   651  }
   652  
   653  // CleanListScope computes the request scope for metrics.
   654  //
   655  // Note that normally we would use CleanScope for computation.
   656  // But due to the same reasons mentioned in determineRequestNamespaceAndName we cannot.
   657  func CleanListScope(ctx context.Context, opts *metainternalversion.ListOptions) string {
   658  	namespace, name := determineRequestNamespaceAndName(ctx, opts)
   659  	if len(name) > 0 {
   660  		return "resource"
   661  	}
   662  	if len(namespace) > 0 {
   663  		return "namespace"
   664  	}
   665  	if requestInfo, ok := request.RequestInfoFrom(ctx); ok {
   666  		if requestInfo.IsResourceRequest {
   667  			return "cluster"
   668  		}
   669  	}
   670  	return ""
   671  }
   672  
   673  // CanonicalVerb distinguishes LISTs from GETs (and HEADs). It assumes verb is
   674  // UPPERCASE.
   675  func CanonicalVerb(verb string, scope string) string {
   676  	switch verb {
   677  	case "GET", "HEAD":
   678  		if scope != "resource" && scope != "" {
   679  			return "LIST"
   680  		}
   681  		return "GET"
   682  	default:
   683  		return verb
   684  	}
   685  }
   686  
   687  // CleanVerb returns a normalized verb, so that it is easy to tell WATCH from
   688  // LIST, APPLY from PATCH and CONNECT from others.
   689  func CleanVerb(verb string, request *http.Request, requestInfo *request.RequestInfo) string {
   690  	reportedVerb := verb
   691  	if suggestedVerb := getVerbIfWatch(request); suggestedVerb == "WATCH" {
   692  		reportedVerb = "WATCH"
   693  	}
   694  	// normalize the legacy WATCHLIST to WATCH to ensure users aren't surprised by metrics
   695  	if verb == "WATCHLIST" {
   696  		reportedVerb = "WATCH"
   697  	}
   698  	if verb == "PATCH" && request.Header.Get("Content-Type") == string(types.ApplyPatchType) {
   699  		reportedVerb = "APPLY"
   700  	}
   701  	if requestInfo != nil && requestInfo.IsResourceRequest && len(requestInfo.Subresource) > 0 && validConnectRequests.Has(requestInfo.Subresource) {
   702  		reportedVerb = "CONNECT"
   703  	}
   704  	return reportedVerb
   705  }
   706  
   707  // determineRequestNamespaceAndName computes name and namespace for the given requests
   708  //
   709  // note that the logic of this function was copy&pasted from cacher.go
   710  // after an unsuccessful attempt of moving it to RequestInfo
   711  //
   712  // see: https://github.com/kubernetes/kubernetes/pull/120520
   713  func determineRequestNamespaceAndName(ctx context.Context, opts *metainternalversion.ListOptions) (namespace, name string) {
   714  	if requestNamespace, ok := request.NamespaceFrom(ctx); ok && len(requestNamespace) > 0 {
   715  		namespace = requestNamespace
   716  	} else if opts != nil && opts.FieldSelector != nil {
   717  		if selectorNamespace, ok := opts.FieldSelector.RequiresExactMatch("metadata.namespace"); ok {
   718  			namespace = selectorNamespace
   719  		}
   720  	}
   721  	if requestInfo, ok := request.RequestInfoFrom(ctx); ok && requestInfo != nil && len(requestInfo.Name) > 0 {
   722  		name = requestInfo.Name
   723  	} else if opts != nil && opts.FieldSelector != nil {
   724  		if selectorName, ok := opts.FieldSelector.RequiresExactMatch("metadata.name"); ok {
   725  			name = selectorName
   726  		}
   727  	}
   728  	return
   729  }
   730  
   731  // cleanVerb additionally ensures that unknown verbs don't clog up the metrics.
   732  func cleanVerb(verb, suggestedVerb string, request *http.Request, requestInfo *request.RequestInfo) string {
   733  	// CanonicalVerb (being an input for this function) doesn't handle correctly the
   734  	// deprecated path pattern for watch of:
   735  	//   GET /api/{version}/watch/{resource}
   736  	// We correct it manually based on the pass verb from the installer.
   737  	if suggestedVerb == "WATCH" || suggestedVerb == "WATCHLIST" {
   738  		return "WATCH"
   739  	}
   740  	reportedVerb := CleanVerb(verb, request, requestInfo)
   741  	if validRequestMethods.Has(reportedVerb) {
   742  		return reportedVerb
   743  	}
   744  	return OtherRequestMethod
   745  }
   746  
   747  // getVerbIfWatch additionally ensures that GET or List would be transformed to WATCH
   748  func getVerbIfWatch(req *http.Request) string {
   749  	if strings.ToUpper(req.Method) == "GET" || strings.ToUpper(req.Method) == "LIST" {
   750  		// see apimachinery/pkg/runtime/conversion.go Convert_Slice_string_To_bool
   751  		if values := req.URL.Query()["watch"]; len(values) > 0 {
   752  			if value := strings.ToLower(values[0]); value != "0" && value != "false" {
   753  				return "WATCH"
   754  			}
   755  		}
   756  	}
   757  	return ""
   758  }
   759  
   760  func cleanDryRun(u *url.URL) string {
   761  	// avoid allocating when we don't see dryRun in the query
   762  	if !strings.Contains(u.RawQuery, "dryRun") {
   763  		return ""
   764  	}
   765  	dryRun := u.Query()["dryRun"]
   766  	if errs := validation.ValidateDryRun(nil, dryRun); len(errs) > 0 {
   767  		return "invalid"
   768  	}
   769  	// Since dryRun could be valid with any arbitrarily long length
   770  	// we have to dedup and sort the elements before joining them together
   771  	// TODO: this is a fairly large allocation for what it does, consider
   772  	//   a sort and dedup in a single pass
   773  	return strings.Join(utilsets.NewString(dryRun...).List(), ",")
   774  }
   775  
   776  func cleanFieldValidation(u *url.URL) string {
   777  	// avoid allocating when we don't see dryRun in the query
   778  	if !strings.Contains(u.RawQuery, "fieldValidation") {
   779  		return ""
   780  	}
   781  	fieldValidation := u.Query()["fieldValidation"]
   782  	if len(fieldValidation) != 1 {
   783  		return "invalid"
   784  	}
   785  	if errs := validation.ValidateFieldValidation(nil, fieldValidation[0]); len(errs) > 0 {
   786  		return "invalid"
   787  	}
   788  	return fieldValidation[0]
   789  }
   790  
   791  var _ http.ResponseWriter = (*ResponseWriterDelegator)(nil)
   792  var _ responsewriter.UserProvidedDecorator = (*ResponseWriterDelegator)(nil)
   793  
   794  // ResponseWriterDelegator interface wraps http.ResponseWriter to additionally record content-length, status-code, etc.
   795  type ResponseWriterDelegator struct {
   796  	http.ResponseWriter
   797  
   798  	status      int
   799  	written     int64
   800  	wroteHeader bool
   801  }
   802  
   803  func (r *ResponseWriterDelegator) Unwrap() http.ResponseWriter {
   804  	return r.ResponseWriter
   805  }
   806  
   807  func (r *ResponseWriterDelegator) WriteHeader(code int) {
   808  	r.status = code
   809  	r.wroteHeader = true
   810  	r.ResponseWriter.WriteHeader(code)
   811  }
   812  
   813  func (r *ResponseWriterDelegator) Write(b []byte) (int, error) {
   814  	if !r.wroteHeader {
   815  		r.WriteHeader(http.StatusOK)
   816  	}
   817  	n, err := r.ResponseWriter.Write(b)
   818  	r.written += int64(n)
   819  	return n, err
   820  }
   821  
   822  func (r *ResponseWriterDelegator) Status() int {
   823  	return r.status
   824  }
   825  
   826  func (r *ResponseWriterDelegator) ContentLength() int {
   827  	return int(r.written)
   828  }
   829  
   830  // Small optimization over Itoa
   831  func codeToString(s int) string {
   832  	switch s {
   833  	case 100:
   834  		return "100"
   835  	case 101:
   836  		return "101"
   837  
   838  	case 200:
   839  		return "200"
   840  	case 201:
   841  		return "201"
   842  	case 202:
   843  		return "202"
   844  	case 203:
   845  		return "203"
   846  	case 204:
   847  		return "204"
   848  	case 205:
   849  		return "205"
   850  	case 206:
   851  		return "206"
   852  
   853  	case 300:
   854  		return "300"
   855  	case 301:
   856  		return "301"
   857  	case 302:
   858  		return "302"
   859  	case 304:
   860  		return "304"
   861  	case 305:
   862  		return "305"
   863  	case 307:
   864  		return "307"
   865  
   866  	case 400:
   867  		return "400"
   868  	case 401:
   869  		return "401"
   870  	case 402:
   871  		return "402"
   872  	case 403:
   873  		return "403"
   874  	case 404:
   875  		return "404"
   876  	case 405:
   877  		return "405"
   878  	case 406:
   879  		return "406"
   880  	case 407:
   881  		return "407"
   882  	case 408:
   883  		return "408"
   884  	case 409:
   885  		return "409"
   886  	case 410:
   887  		return "410"
   888  	case 411:
   889  		return "411"
   890  	case 412:
   891  		return "412"
   892  	case 413:
   893  		return "413"
   894  	case 414:
   895  		return "414"
   896  	case 415:
   897  		return "415"
   898  	case 416:
   899  		return "416"
   900  	case 417:
   901  		return "417"
   902  	case 418:
   903  		return "418"
   904  
   905  	case 500:
   906  		return "500"
   907  	case 501:
   908  		return "501"
   909  	case 502:
   910  		return "502"
   911  	case 503:
   912  		return "503"
   913  	case 504:
   914  		return "504"
   915  	case 505:
   916  		return "505"
   917  
   918  	case 428:
   919  		return "428"
   920  	case 429:
   921  		return "429"
   922  	case 431:
   923  		return "431"
   924  	case 511:
   925  		return "511"
   926  
   927  	default:
   928  		return strconv.Itoa(s)
   929  	}
   930  }