k8s.io/apiserver@v0.31.1/pkg/admission/plugin/policy/validating/metrics/metrics.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cel
    18  
    19  import (
    20  	"context"
    21  	"time"
    22  
    23  	"k8s.io/component-base/metrics"
    24  	"k8s.io/component-base/metrics/legacyregistry"
    25  )
    26  
    27  const (
    28  	metricsNamespace = "apiserver"
    29  	metricsSubsystem = "validating_admission_policy"
    30  )
    31  
    32  // ValidationErrorType defines different error types that happen to a validation expression
    33  type ValidationErrorType string
    34  
    35  const (
    36  	// ValidationCompileError indicates that the expression fails to compile.
    37  	ValidationCompileError ValidationErrorType = "compile_error"
    38  	// ValidatingInvalidError indicates that the expression fails due to internal
    39  	// errors that are out of the control of the user.
    40  	ValidatingInvalidError ValidationErrorType = "invalid_error"
    41  	// ValidatingOutOfBudget indicates that the expression fails due to running
    42  	// out of cost budget, or the budget cannot be obtained.
    43  	ValidatingOutOfBudget ValidationErrorType = "out_of_budget"
    44  	// ValidationNoError indicates that the expression returns without an error.
    45  	ValidationNoError ValidationErrorType = "no_error"
    46  )
    47  
    48  var (
    49  	// Metrics provides access to validation admission metrics.
    50  	Metrics = newValidationAdmissionMetrics()
    51  )
    52  
    53  // ValidatingAdmissionPolicyMetrics aggregates Prometheus metrics related to validation admission control.
    54  type ValidatingAdmissionPolicyMetrics struct {
    55  	policyCheck   *metrics.CounterVec
    56  	policyLatency *metrics.HistogramVec
    57  }
    58  
    59  func newValidationAdmissionMetrics() *ValidatingAdmissionPolicyMetrics {
    60  	check := metrics.NewCounterVec(
    61  		&metrics.CounterOpts{
    62  			Namespace:      metricsNamespace,
    63  			Subsystem:      metricsSubsystem,
    64  			Name:           "check_total",
    65  			Help:           "Validation admission policy check total, labeled by policy and further identified by binding and enforcement action taken.",
    66  			StabilityLevel: metrics.BETA,
    67  		},
    68  		[]string{"policy", "policy_binding", "error_type", "enforcement_action"},
    69  	)
    70  	latency := metrics.NewHistogramVec(&metrics.HistogramOpts{
    71  		Namespace: metricsNamespace,
    72  		Subsystem: metricsSubsystem,
    73  		Name:      "check_duration_seconds",
    74  		Help:      "Validation admission latency for individual validation expressions in seconds, labeled by policy and further including binding and enforcement action taken.",
    75  		// the bucket distribution here is based oo the benchmark suite at
    76  		// github.com/DangerOnTheRanger/cel-benchmark performed on 16-core Intel Xeon
    77  		// the lowest bucket was based around the 180ns/op figure for BenchmarkAccess,
    78  		// plus some additional leeway to account for the apiserver doing other things
    79  		// the largest bucket was chosen based on the fact that benchmarks indicate the
    80  		// same Xeon running a CEL expression close to the estimated cost limit takes
    81  		// around 760ms, so that bucket should only ever have the slowest CEL expressions
    82  		// in it
    83  		Buckets:        []float64{0.0000005, 0.001, 0.01, 0.1, 1.0},
    84  		StabilityLevel: metrics.BETA,
    85  	},
    86  		[]string{"policy", "policy_binding", "error_type", "enforcement_action"},
    87  	)
    88  
    89  	legacyregistry.MustRegister(check)
    90  	legacyregistry.MustRegister(latency)
    91  	return &ValidatingAdmissionPolicyMetrics{policyCheck: check, policyLatency: latency}
    92  }
    93  
    94  // Reset resets all validation admission-related Prometheus metrics.
    95  func (m *ValidatingAdmissionPolicyMetrics) Reset() {
    96  	m.policyCheck.Reset()
    97  	m.policyLatency.Reset()
    98  }
    99  
   100  // ObserveAdmission observes a policy validation, with an optional error to indicate the error that may occur but ignored.
   101  func (m *ValidatingAdmissionPolicyMetrics) ObserveAdmission(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) {
   102  	m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "allow").Inc()
   103  	m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "allow").Observe(elapsed.Seconds())
   104  }
   105  
   106  // ObserveRejection observes a policy validation error that was at least one of the reasons for a deny.
   107  func (m *ValidatingAdmissionPolicyMetrics) ObserveRejection(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) {
   108  	m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "deny").Inc()
   109  	m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "deny").Observe(elapsed.Seconds())
   110  }
   111  
   112  // ObserveAudit observes a policy validation audit annotation was published for a validation failure.
   113  func (m *ValidatingAdmissionPolicyMetrics) ObserveAudit(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) {
   114  	m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "audit").Inc()
   115  	m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "audit").Observe(elapsed.Seconds())
   116  }
   117  
   118  // ObserveWarn observes a policy validation warning was published for a validation failure.
   119  func (m *ValidatingAdmissionPolicyMetrics) ObserveWarn(ctx context.Context, elapsed time.Duration, policy, binding string, errorType ValidationErrorType) {
   120  	m.policyCheck.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "warn").Inc()
   121  	m.policyLatency.WithContext(ctx).WithLabelValues(policy, binding, string(errorType), "warn").Observe(elapsed.Seconds())
   122  }