k8s.io/kubernetes@v1.29.3/pkg/volume/util/metrics.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package util 18 19 import ( 20 "fmt" 21 "strconv" 22 "time" 23 24 "google.golang.org/grpc/codes" 25 "google.golang.org/grpc/status" 26 "k8s.io/component-base/metrics" 27 "k8s.io/component-base/metrics/legacyregistry" 28 "k8s.io/kubernetes/pkg/volume" 29 "k8s.io/kubernetes/pkg/volume/util/types" 30 ) 31 32 const ( 33 statusSuccess = "success" 34 statusFailUnknown = "fail-unknown" 35 ) 36 37 /* 38 * By default, all the following metrics are defined as falling under 39 * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes) 40 * 41 * Promoting the stability level of the metric is a responsibility of the component owner, since it 42 * involves explicitly acknowledging support for the metric across multiple releases, in accordance with 43 * the metric stability policy. 44 */ 45 46 var StorageOperationMetric = metrics.NewHistogramVec( 47 &metrics.HistogramOpts{ 48 Name: "storage_operation_duration_seconds", 49 Help: "Storage operation duration", 50 Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600}, 51 StabilityLevel: metrics.ALPHA, 52 }, 53 []string{"volume_plugin", "operation_name", "status", "migrated"}, 54 ) 55 56 var storageOperationEndToEndLatencyMetric = metrics.NewHistogramVec( 57 &metrics.HistogramOpts{ 58 Name: "volume_operation_total_seconds", 59 Help: "Storage operation end to end duration in seconds", 60 Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600}, 61 StabilityLevel: metrics.ALPHA, 62 }, 63 []string{"plugin_name", "operation_name"}, 64 ) 65 66 var csiOperationsLatencyMetric = metrics.NewHistogramVec( 67 &metrics.HistogramOpts{ 68 Subsystem: "csi", 69 Name: "operations_seconds", 70 Help: "Container Storage Interface operation duration with gRPC error code status total", 71 Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600}, 72 StabilityLevel: metrics.ALPHA, 73 }, 74 []string{"driver_name", "method_name", "grpc_status_code", "migrated"}, 75 ) 76 77 func init() { 78 registerMetrics() 79 } 80 81 func registerMetrics() { 82 // legacyregistry is the internal k8s wrapper around the prometheus 83 // global registry, used specifically for metric stability enforcement 84 legacyregistry.MustRegister(StorageOperationMetric) 85 legacyregistry.MustRegister(storageOperationEndToEndLatencyMetric) 86 legacyregistry.MustRegister(csiOperationsLatencyMetric) 87 } 88 89 // OperationCompleteHook returns a hook to call when an operation is completed 90 func OperationCompleteHook(plugin, operationName string) func(types.CompleteFuncParam) { 91 requestTime := time.Now() 92 opComplete := func(c types.CompleteFuncParam) { 93 timeTaken := time.Since(requestTime).Seconds() 94 // Create metric with operation name and plugin name 95 status := statusSuccess 96 if *c.Err != nil { 97 // TODO: Establish well-known error codes to be able to distinguish 98 // user configuration errors from system errors. 99 status = statusFailUnknown 100 } 101 migrated := false 102 if c.Migrated != nil { 103 migrated = *c.Migrated 104 } 105 StorageOperationMetric.WithLabelValues(plugin, operationName, status, strconv.FormatBool(migrated)).Observe(timeTaken) 106 } 107 return opComplete 108 } 109 110 // FSGroupCompleteHook returns a hook to call when volume recursive permission is changed 111 func FSGroupCompleteHook(plugin volume.VolumePlugin, spec *volume.Spec) func(types.CompleteFuncParam) { 112 return OperationCompleteHook(GetFullQualifiedPluginNameForVolume(plugin.GetPluginName(), spec), "volume_apply_access_control") 113 } 114 115 // GetFullQualifiedPluginNameForVolume returns full qualified plugin name for 116 // given volume. For CSI plugin, it appends plugin driver name at the end of 117 // plugin name, e.g. kubernetes.io/csi:csi-hostpath. It helps to distinguish 118 // between metrics emitted for CSI volumes which may be handled by different 119 // CSI plugin drivers. 120 func GetFullQualifiedPluginNameForVolume(pluginName string, spec *volume.Spec) string { 121 if spec != nil { 122 if spec.Volume != nil && spec.Volume.CSI != nil { 123 return fmt.Sprintf("%s:%s", pluginName, spec.Volume.CSI.Driver) 124 } 125 if spec.PersistentVolume != nil && spec.PersistentVolume.Spec.CSI != nil { 126 return fmt.Sprintf("%s:%s", pluginName, spec.PersistentVolume.Spec.CSI.Driver) 127 } 128 } 129 return pluginName 130 } 131 132 // RecordOperationLatencyMetric records the end to end latency for certain operation 133 // into metric volume_operation_total_seconds 134 func RecordOperationLatencyMetric(plugin, operationName string, secondsTaken float64) { 135 storageOperationEndToEndLatencyMetric.WithLabelValues(plugin, operationName).Observe(secondsTaken) 136 } 137 138 // RecordCSIOperationLatencyMetrics records the CSI operation latency and grpc status 139 // into metric csi_kubelet_operations_seconds 140 func RecordCSIOperationLatencyMetrics(driverName string, 141 operationName string, 142 operationErr error, 143 operationDuration time.Duration, 144 migrated string) { 145 csiOperationsLatencyMetric.WithLabelValues(driverName, operationName, getErrorCode(operationErr), migrated).Observe(operationDuration.Seconds()) 146 } 147 148 func getErrorCode(err error) string { 149 if err == nil { 150 return codes.OK.String() 151 } 152 153 st, ok := status.FromError(err) 154 if !ok { 155 // This is not gRPC error. The operation must have failed before gRPC 156 // method was called, otherwise we would get gRPC error. 157 return "unknown-non-grpc" 158 } 159 160 return st.Code().String() 161 }