k8s.io/apiserver@v0.31.1/pkg/audit/metrics.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package audit
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	auditinternal "k8s.io/apiserver/pkg/apis/audit"
    24  	"k8s.io/component-base/metrics"
    25  	"k8s.io/component-base/metrics/legacyregistry"
    26  	"k8s.io/klog/v2"
    27  )
    28  
    29  const (
    30  	subsystem = "apiserver_audit"
    31  )
    32  
    33  /*
    34   * By default, all the following metrics are defined as falling under
    35   * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
    36   *
    37   * Promoting the stability level of the metric is a responsibility of the component owner, since it
    38   * involves explicitly acknowledging support for the metric across multiple releases, in accordance with
    39   * the metric stability policy.
    40   */
    41  var (
    42  	eventCounter = metrics.NewCounter(
    43  		&metrics.CounterOpts{
    44  			Subsystem:      subsystem,
    45  			Name:           "event_total",
    46  			Help:           "Counter of audit events generated and sent to the audit backend.",
    47  			StabilityLevel: metrics.ALPHA,
    48  		})
    49  	errorCounter = metrics.NewCounterVec(
    50  		&metrics.CounterOpts{
    51  			Subsystem: subsystem,
    52  			Name:      "error_total",
    53  			Help: "Counter of audit events that failed to be audited properly. " +
    54  				"Plugin identifies the plugin affected by the error.",
    55  			StabilityLevel: metrics.ALPHA,
    56  		},
    57  		[]string{"plugin"},
    58  	)
    59  	levelCounter = metrics.NewCounterVec(
    60  		&metrics.CounterOpts{
    61  			Subsystem:      subsystem,
    62  			Name:           "level_total",
    63  			Help:           "Counter of policy levels for audit events (1 per request).",
    64  			StabilityLevel: metrics.ALPHA,
    65  		},
    66  		[]string{"level"},
    67  	)
    68  
    69  	ApiserverAuditDroppedCounter = metrics.NewCounter(
    70  		&metrics.CounterOpts{
    71  			Subsystem: subsystem,
    72  			Name:      "requests_rejected_total",
    73  			Help: "Counter of apiserver requests rejected due to an error " +
    74  				"in audit logging backend.",
    75  			StabilityLevel: metrics.ALPHA,
    76  		},
    77  	)
    78  )
    79  
    80  func init() {
    81  	legacyregistry.MustRegister(eventCounter)
    82  	legacyregistry.MustRegister(errorCounter)
    83  	legacyregistry.MustRegister(levelCounter)
    84  	legacyregistry.MustRegister(ApiserverAuditDroppedCounter)
    85  }
    86  
    87  // ObserveEvent updates the relevant prometheus metrics for the generated audit event.
    88  func ObserveEvent(ctx context.Context) {
    89  	eventCounter.WithContext(ctx).Inc()
    90  }
    91  
    92  // ObservePolicyLevel updates the relevant prometheus metrics with the audit level for a request.
    93  func ObservePolicyLevel(ctx context.Context, level auditinternal.Level) {
    94  	levelCounter.WithContext(ctx).WithLabelValues(string(level)).Inc()
    95  }
    96  
    97  // HandlePluginError handles an error that occurred in an audit plugin. This method should only be
    98  // used if the error may have prevented the audit event from being properly recorded. The events are
    99  // logged to the debug log.
   100  func HandlePluginError(plugin string, err error, impacted ...*auditinternal.Event) {
   101  	// Count the error.
   102  	errorCounter.WithLabelValues(plugin).Add(float64(len(impacted)))
   103  
   104  	// Log the audit events to the debug log.
   105  	msg := fmt.Sprintf("Error in audit plugin '%s' affecting %d audit events: %v\nImpacted events:\n",
   106  		plugin, len(impacted), err)
   107  	for _, ev := range impacted {
   108  		msg = msg + EventString(ev) + "\n"
   109  	}
   110  	klog.Error(msg)
   111  }