k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/volume/persistentvolume/metrics/metrics.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"sync"
    21  	"time"
    22  
    23  	v1 "k8s.io/api/core/v1"
    24  	"k8s.io/component-base/metrics"
    25  	"k8s.io/component-base/metrics/legacyregistry"
    26  	"k8s.io/kubernetes/pkg/volume"
    27  	metricutil "k8s.io/kubernetes/pkg/volume/util"
    28  )
    29  
    30  const (
    31  	// Subsystem names.
    32  	pvControllerSubsystem = "pv_collector"
    33  
    34  	// Metric names.
    35  	totalPVKey    = "total_pv_count"
    36  	boundPVKey    = "bound_pv_count"
    37  	unboundPVKey  = "unbound_pv_count"
    38  	boundPVCKey   = "bound_pvc_count"
    39  	unboundPVCKey = "unbound_pvc_count"
    40  
    41  	// Label names.
    42  	namespaceLabel    = "namespace"
    43  	storageClassLabel = "storage_class"
    44  	pluginNameLabel   = "plugin_name"
    45  	volumeModeLabel   = "volume_mode"
    46  
    47  	// String to use when plugin name cannot be determined
    48  	pluginNameNotAvailable = "N/A"
    49  )
    50  
    51  var registerMetrics sync.Once
    52  
    53  // PVLister used to list persistent volumes.
    54  type PVLister interface {
    55  	List() []interface{}
    56  }
    57  
    58  // PVCLister used to list persistent volume claims.
    59  type PVCLister interface {
    60  	List() []interface{}
    61  }
    62  
    63  // Register all metrics for pv controller.
    64  func Register(pvLister PVLister, pvcLister PVCLister, pluginMgr *volume.VolumePluginMgr) {
    65  	registerMetrics.Do(func() {
    66  		legacyregistry.CustomMustRegister(newPVAndPVCCountCollector(pvLister, pvcLister, pluginMgr))
    67  		legacyregistry.MustRegister(volumeOperationErrorsMetric)
    68  		legacyregistry.MustRegister(retroactiveStorageClassMetric)
    69  		legacyregistry.MustRegister(retroactiveStorageClassErrorMetric)
    70  	})
    71  }
    72  
    73  func newPVAndPVCCountCollector(pvLister PVLister, pvcLister PVCLister, pluginMgr *volume.VolumePluginMgr) *pvAndPVCCountCollector {
    74  	return &pvAndPVCCountCollector{pvLister: pvLister, pvcLister: pvcLister, pluginMgr: pluginMgr}
    75  }
    76  
    77  // Custom collector for current pod and container counts.
    78  type pvAndPVCCountCollector struct {
    79  	metrics.BaseStableCollector
    80  
    81  	// Cache for accessing information about PersistentVolumes.
    82  	pvLister PVLister
    83  	// Cache for accessing information about PersistentVolumeClaims.
    84  	pvcLister PVCLister
    85  	// Volume plugin manager
    86  	pluginMgr *volume.VolumePluginMgr
    87  }
    88  
    89  // Check if our collector implements necessary collector interface
    90  var _ metrics.StableCollector = &pvAndPVCCountCollector{}
    91  
    92  var (
    93  	totalPVCountDesc = metrics.NewDesc(
    94  		metrics.BuildFQName("", pvControllerSubsystem, totalPVKey),
    95  		"Gauge measuring total number of persistent volumes",
    96  		[]string{pluginNameLabel, volumeModeLabel}, nil,
    97  		metrics.ALPHA, "")
    98  	boundPVCountDesc = metrics.NewDesc(
    99  		metrics.BuildFQName("", pvControllerSubsystem, boundPVKey),
   100  		"Gauge measuring number of persistent volume currently bound",
   101  		[]string{storageClassLabel}, nil,
   102  		metrics.ALPHA, "")
   103  	unboundPVCountDesc = metrics.NewDesc(
   104  		metrics.BuildFQName("", pvControllerSubsystem, unboundPVKey),
   105  		"Gauge measuring number of persistent volume currently unbound",
   106  		[]string{storageClassLabel}, nil,
   107  		metrics.ALPHA, "")
   108  
   109  	boundPVCCountDesc = metrics.NewDesc(
   110  		metrics.BuildFQName("", pvControllerSubsystem, boundPVCKey),
   111  		"Gauge measuring number of persistent volume claim currently bound",
   112  		[]string{namespaceLabel}, nil,
   113  		metrics.ALPHA, "")
   114  	unboundPVCCountDesc = metrics.NewDesc(
   115  		metrics.BuildFQName("", pvControllerSubsystem, unboundPVCKey),
   116  		"Gauge measuring number of persistent volume claim currently unbound",
   117  		[]string{namespaceLabel}, nil,
   118  		metrics.ALPHA, "")
   119  
   120  	volumeOperationErrorsMetric = metrics.NewCounterVec(
   121  		&metrics.CounterOpts{
   122  			Name:           "volume_operation_total_errors",
   123  			Help:           "Total volume operation errors",
   124  			StabilityLevel: metrics.ALPHA,
   125  		},
   126  		[]string{"plugin_name", "operation_name"})
   127  
   128  	retroactiveStorageClassMetric = metrics.NewCounter(
   129  		&metrics.CounterOpts{
   130  			Name:           "retroactive_storageclass_total",
   131  			Help:           "Total number of retroactive StorageClass assignments to persistent volume claim",
   132  			StabilityLevel: metrics.ALPHA,
   133  		})
   134  
   135  	retroactiveStorageClassErrorMetric = metrics.NewCounter(
   136  		&metrics.CounterOpts{
   137  			Name:           "retroactive_storageclass_errors_total",
   138  			Help:           "Total number of failed retroactive StorageClass assignments to persistent volume claim",
   139  			StabilityLevel: metrics.ALPHA,
   140  		})
   141  )
   142  
   143  // volumeCount counts by PluginName and VolumeMode.
   144  type volumeCount map[string]map[string]int
   145  
   146  func (v volumeCount) add(pluginName string, volumeMode string) {
   147  	count, ok := v[pluginName]
   148  	if !ok {
   149  		count = map[string]int{}
   150  	}
   151  	count[volumeMode]++
   152  	v[pluginName] = count
   153  }
   154  
   155  func (collector *pvAndPVCCountCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
   156  	ch <- totalPVCountDesc
   157  	ch <- boundPVCountDesc
   158  	ch <- unboundPVCountDesc
   159  	ch <- boundPVCCountDesc
   160  	ch <- unboundPVCCountDesc
   161  }
   162  
   163  func (collector *pvAndPVCCountCollector) CollectWithStability(ch chan<- metrics.Metric) {
   164  	collector.pvCollect(ch)
   165  	collector.pvcCollect(ch)
   166  }
   167  
   168  func (collector *pvAndPVCCountCollector) getPVPluginName(pv *v1.PersistentVolume) string {
   169  	spec := volume.NewSpecFromPersistentVolume(pv, true)
   170  	fullPluginName := pluginNameNotAvailable
   171  	if plugin, err := collector.pluginMgr.FindPluginBySpec(spec); err == nil {
   172  		fullPluginName = metricutil.GetFullQualifiedPluginNameForVolume(plugin.GetPluginName(), spec)
   173  	}
   174  	return fullPluginName
   175  }
   176  
   177  func (collector *pvAndPVCCountCollector) pvCollect(ch chan<- metrics.Metric) {
   178  	boundNumberByStorageClass := make(map[string]int)
   179  	unboundNumberByStorageClass := make(map[string]int)
   180  	totalCount := make(volumeCount)
   181  	for _, pvObj := range collector.pvLister.List() {
   182  		pv, ok := pvObj.(*v1.PersistentVolume)
   183  		if !ok {
   184  			continue
   185  		}
   186  		pluginName := collector.getPVPluginName(pv)
   187  		totalCount.add(pluginName, string(*pv.Spec.VolumeMode))
   188  		if pv.Status.Phase == v1.VolumeBound {
   189  			boundNumberByStorageClass[pv.Spec.StorageClassName]++
   190  		} else {
   191  			unboundNumberByStorageClass[pv.Spec.StorageClassName]++
   192  		}
   193  	}
   194  	for storageClassName, number := range boundNumberByStorageClass {
   195  		ch <- metrics.NewLazyConstMetric(
   196  			boundPVCountDesc,
   197  			metrics.GaugeValue,
   198  			float64(number),
   199  			storageClassName)
   200  	}
   201  	for storageClassName, number := range unboundNumberByStorageClass {
   202  		ch <- metrics.NewLazyConstMetric(
   203  			unboundPVCountDesc,
   204  			metrics.GaugeValue,
   205  			float64(number),
   206  			storageClassName)
   207  	}
   208  	for pluginName, volumeModeCount := range totalCount {
   209  		for volumeMode, number := range volumeModeCount {
   210  			ch <- metrics.NewLazyConstMetric(
   211  				totalPVCountDesc,
   212  				metrics.GaugeValue,
   213  				float64(number),
   214  				pluginName,
   215  				volumeMode)
   216  		}
   217  	}
   218  }
   219  
   220  func (collector *pvAndPVCCountCollector) pvcCollect(ch chan<- metrics.Metric) {
   221  	boundNumberByNamespace := make(map[string]int)
   222  	unboundNumberByNamespace := make(map[string]int)
   223  	for _, pvcObj := range collector.pvcLister.List() {
   224  		pvc, ok := pvcObj.(*v1.PersistentVolumeClaim)
   225  		if !ok {
   226  			continue
   227  		}
   228  		if pvc.Status.Phase == v1.ClaimBound {
   229  			boundNumberByNamespace[pvc.Namespace]++
   230  		} else {
   231  			unboundNumberByNamespace[pvc.Namespace]++
   232  		}
   233  	}
   234  	for namespace, number := range boundNumberByNamespace {
   235  		ch <- metrics.NewLazyConstMetric(
   236  			boundPVCCountDesc,
   237  			metrics.GaugeValue,
   238  			float64(number),
   239  			namespace)
   240  	}
   241  	for namespace, number := range unboundNumberByNamespace {
   242  		ch <- metrics.NewLazyConstMetric(
   243  			unboundPVCCountDesc,
   244  			metrics.GaugeValue,
   245  			float64(number),
   246  			namespace)
   247  	}
   248  }
   249  
   250  // RecordRetroactiveStorageClassMetric increments only retroactive_storageclass_total
   251  // metric or both retroactive_storageclass_total and retroactive_storageclass_errors_total
   252  // if success is false.
   253  func RecordRetroactiveStorageClassMetric(success bool) {
   254  	if !success {
   255  		retroactiveStorageClassMetric.Inc()
   256  		retroactiveStorageClassErrorMetric.Inc()
   257  	} else {
   258  		retroactiveStorageClassMetric.Inc()
   259  	}
   260  }
   261  
   262  // RecordVolumeOperationErrorMetric records error count into metric
   263  // volume_operation_total_errors for provisioning/deletion operations
   264  func RecordVolumeOperationErrorMetric(pluginName, opName string) {
   265  	if pluginName == "" {
   266  		pluginName = "N/A"
   267  	}
   268  	volumeOperationErrorsMetric.WithLabelValues(pluginName, opName).Inc()
   269  }
   270  
   271  // operationTimestamp stores the start time of an operation by a plugin
   272  type operationTimestamp struct {
   273  	pluginName string
   274  	operation  string
   275  	startTs    time.Time
   276  }
   277  
   278  func newOperationTimestamp(pluginName, operationName string) *operationTimestamp {
   279  	return &operationTimestamp{
   280  		pluginName: pluginName,
   281  		operation:  operationName,
   282  		startTs:    time.Now(),
   283  	}
   284  }
   285  
   286  // OperationStartTimeCache concurrent safe cache for operation start timestamps
   287  type OperationStartTimeCache struct {
   288  	cache sync.Map // [string]operationTimestamp
   289  }
   290  
   291  // NewOperationStartTimeCache creates a operation timestamp cache
   292  func NewOperationStartTimeCache() OperationStartTimeCache {
   293  	return OperationStartTimeCache{
   294  		cache: sync.Map{}, // [string]operationTimestamp {}
   295  	}
   296  }
   297  
   298  // AddIfNotExist returns directly if there exists an entry with the key. Otherwise, it
   299  // creates a new operation timestamp using operationName, pluginName, and current timestamp
   300  // and stores the operation timestamp with the key
   301  func (c *OperationStartTimeCache) AddIfNotExist(key, pluginName, operationName string) {
   302  	ts := newOperationTimestamp(pluginName, operationName)
   303  	c.cache.LoadOrStore(key, ts)
   304  }
   305  
   306  // Delete deletes a value for a key.
   307  func (c *OperationStartTimeCache) Delete(key string) {
   308  	c.cache.Delete(key)
   309  }
   310  
   311  // Has returns a bool value indicates the existence of a key in the cache
   312  func (c *OperationStartTimeCache) Has(key string) bool {
   313  	_, exists := c.cache.Load(key)
   314  	return exists
   315  }
   316  
   317  // RecordMetric records either an error count metric or a latency metric if there
   318  // exists a start timestamp entry in the cache. For a successful operation, i.e.,
   319  // err == nil, the corresponding timestamp entry will be removed from cache
   320  func RecordMetric(key string, c *OperationStartTimeCache, err error) {
   321  	obj, exists := c.cache.Load(key)
   322  	if !exists {
   323  		return
   324  	}
   325  	ts, ok := obj.(*operationTimestamp)
   326  	if !ok {
   327  		return
   328  	}
   329  	if err != nil {
   330  		RecordVolumeOperationErrorMetric(ts.pluginName, ts.operation)
   331  	} else {
   332  		timeTaken := time.Since(ts.startTs).Seconds()
   333  		metricutil.RecordOperationLatencyMetric(ts.pluginName, ts.operation, timeTaken)
   334  		// end of this operation, remove the timestamp entry from cache
   335  		c.Delete(key)
   336  	}
   337  }