k8s.io/apiserver@v0.31.1/pkg/storage/value/metrics.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package value
    18  
    19  import (
    20  	"errors"
    21  	"sync"
    22  	"time"
    23  
    24  	"google.golang.org/grpc/codes"
    25  	"google.golang.org/grpc/status"
    26  
    27  	"k8s.io/component-base/metrics"
    28  	"k8s.io/component-base/metrics/legacyregistry"
    29  )
    30  
    31  const (
    32  	namespace = "apiserver"
    33  	subsystem = "storage"
    34  )
    35  
    36  /*
    37   * By default, all the following metrics are defined as falling under
    38   * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
    39   *
    40   * Promoting the stability level of the metric is a responsibility of the component owner, since it
    41   * involves explicitly acknowledging support for the metric across multiple releases, in accordance with
    42   * the metric stability policy.
    43   */
    44  var (
    45  	transformerLatencies = metrics.NewHistogramVec(
    46  		&metrics.HistogramOpts{
    47  			Namespace: namespace,
    48  			Subsystem: subsystem,
    49  			Name:      "transformation_duration_seconds",
    50  			Help:      "Latencies in seconds of value transformation operations.",
    51  			// In-process transformations (ex. AES CBC) complete on the order of 20 microseconds. However, when
    52  			// external KMS is involved latencies may climb into hundreds of milliseconds.
    53  			Buckets:        metrics.ExponentialBuckets(5e-6, 2, 25),
    54  			StabilityLevel: metrics.ALPHA,
    55  		},
    56  		[]string{"transformation_type", "transformer_prefix"},
    57  	)
    58  
    59  	transformerOperationsTotal = metrics.NewCounterVec(
    60  		&metrics.CounterOpts{
    61  			Namespace:      namespace,
    62  			Subsystem:      subsystem,
    63  			Name:           "transformation_operations_total",
    64  			Help:           "Total number of transformations. Successful transformation will have a status 'OK' and a varied status string when the transformation fails. This status and transformation_type fields may be used for alerting on encryption/decryption failure using transformation_type from_storage for decryption and to_storage for encryption",
    65  			StabilityLevel: metrics.ALPHA,
    66  		},
    67  		[]string{"transformation_type", "transformer_prefix", "status"},
    68  	)
    69  
    70  	envelopeTransformationCacheMissTotal = metrics.NewCounter(
    71  		&metrics.CounterOpts{
    72  			Namespace:      namespace,
    73  			Subsystem:      subsystem,
    74  			Name:           "envelope_transformation_cache_misses_total",
    75  			Help:           "Total number of cache misses while accessing key decryption key(KEK).",
    76  			StabilityLevel: metrics.ALPHA,
    77  		},
    78  	)
    79  
    80  	dataKeyGenerationLatencies = metrics.NewHistogram(
    81  		&metrics.HistogramOpts{
    82  			Namespace:      namespace,
    83  			Subsystem:      subsystem,
    84  			Name:           "data_key_generation_duration_seconds",
    85  			Help:           "Latencies in seconds of data encryption key(DEK) generation operations.",
    86  			Buckets:        metrics.ExponentialBuckets(5e-6, 2, 14),
    87  			StabilityLevel: metrics.ALPHA,
    88  		},
    89  	)
    90  
    91  	dataKeyGenerationFailuresTotal = metrics.NewCounter(
    92  		&metrics.CounterOpts{
    93  			Namespace:      namespace,
    94  			Subsystem:      subsystem,
    95  			Name:           "data_key_generation_failures_total",
    96  			Help:           "Total number of failed data encryption key(DEK) generation operations.",
    97  			StabilityLevel: metrics.ALPHA,
    98  		},
    99  	)
   100  )
   101  
   102  var registerMetrics sync.Once
   103  
   104  func RegisterMetrics() {
   105  	registerMetrics.Do(func() {
   106  		legacyregistry.MustRegister(transformerLatencies)
   107  		legacyregistry.MustRegister(transformerOperationsTotal)
   108  		legacyregistry.MustRegister(envelopeTransformationCacheMissTotal)
   109  		legacyregistry.MustRegister(dataKeyGenerationLatencies)
   110  		legacyregistry.MustRegister(dataKeyGenerationFailuresTotal)
   111  	})
   112  }
   113  
   114  // RecordTransformation records latencies and count of TransformFromStorage and TransformToStorage operations.
   115  // Note that transformation_failures_total metric is deprecated, use transformation_operations_total instead.
   116  func RecordTransformation(transformationType, transformerPrefix string, elapsed time.Duration, err error) {
   117  	transformerOperationsTotal.WithLabelValues(transformationType, transformerPrefix, getErrorCode(err)).Inc()
   118  
   119  	if err == nil {
   120  		transformerLatencies.WithLabelValues(transformationType, transformerPrefix).Observe(elapsed.Seconds())
   121  	}
   122  }
   123  
   124  // RecordCacheMiss records a miss on Key Encryption Key(KEK) - call to KMS was required to decrypt KEK.
   125  func RecordCacheMiss() {
   126  	envelopeTransformationCacheMissTotal.Inc()
   127  }
   128  
   129  // RecordDataKeyGeneration records latencies and count of Data Encryption Key generation operations.
   130  func RecordDataKeyGeneration(start time.Time, err error) {
   131  	if err != nil {
   132  		dataKeyGenerationFailuresTotal.Inc()
   133  		return
   134  	}
   135  
   136  	dataKeyGenerationLatencies.Observe(sinceInSeconds(start))
   137  }
   138  
   139  // sinceInSeconds gets the time since the specified start in seconds.
   140  func sinceInSeconds(start time.Time) float64 {
   141  	return time.Since(start).Seconds()
   142  }
   143  
   144  type gRPCError interface {
   145  	GRPCStatus() *status.Status
   146  }
   147  
   148  func getErrorCode(err error) string {
   149  	if err == nil {
   150  		return codes.OK.String()
   151  	}
   152  
   153  	// handle errors wrapped with fmt.Errorf and similar
   154  	var s gRPCError
   155  	if errors.As(err, &s) {
   156  		return s.GRPCStatus().Code().String()
   157  	}
   158  
   159  	// This is not gRPC error. The operation must have failed before gRPC
   160  	// method was called, otherwise we would get gRPC error.
   161  	return "unknown-non-grpc"
   162  }