k8s.io/apiserver@v0.29.3/pkg/storage/etcd3/metrics/metrics.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	compbasemetrics "k8s.io/component-base/metrics"
    26  	"k8s.io/component-base/metrics/legacyregistry"
    27  	"k8s.io/klog/v2"
    28  )
    29  
    30  /*
    31   * By default, all the following metrics are defined as falling under
    32   * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
    33   *
    34   * Promoting the stability level of the metric is a responsibility of the component owner, since it
    35   * involves explicitly acknowledging support for the metric across multiple releases, in accordance with
    36   * the metric stability policy.
    37   */
    38  var (
    39  	etcdRequestLatency = compbasemetrics.NewHistogramVec(
    40  		&compbasemetrics.HistogramOpts{
    41  			Name: "etcd_request_duration_seconds",
    42  			Help: "Etcd request latency in seconds for each operation and object type.",
    43  			// Etcd request latency in seconds for each operation and object type.
    44  			// This metric is used for verifying etcd api call latencies SLO
    45  			// keep consistent with apiserver metric 'requestLatencies' in
    46  			// staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go
    47  			Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
    48  				4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
    49  			StabilityLevel: compbasemetrics.ALPHA,
    50  		},
    51  		[]string{"operation", "type"},
    52  	)
    53  	etcdRequestCounts = compbasemetrics.NewCounterVec(
    54  		&compbasemetrics.CounterOpts{
    55  			Name:           "etcd_requests_total",
    56  			Help:           "Etcd request counts for each operation and object type.",
    57  			StabilityLevel: compbasemetrics.ALPHA,
    58  		},
    59  		[]string{"operation", "type"},
    60  	)
    61  	etcdRequestErrorCounts = compbasemetrics.NewCounterVec(
    62  		&compbasemetrics.CounterOpts{
    63  			Name:           "etcd_request_errors_total",
    64  			Help:           "Etcd failed request counts for each operation and object type.",
    65  			StabilityLevel: compbasemetrics.ALPHA,
    66  		},
    67  		[]string{"operation", "type"},
    68  	)
    69  	objectCounts = compbasemetrics.NewGaugeVec(
    70  		&compbasemetrics.GaugeOpts{
    71  			Name:           "apiserver_storage_objects",
    72  			Help:           "Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.",
    73  			StabilityLevel: compbasemetrics.STABLE,
    74  		},
    75  		[]string{"resource"},
    76  	)
    77  	dbTotalSize = compbasemetrics.NewGaugeVec(
    78  		&compbasemetrics.GaugeOpts{
    79  			Subsystem:         "apiserver",
    80  			Name:              "storage_db_total_size_in_bytes",
    81  			Help:              "Total size of the storage database file physically allocated in bytes.",
    82  			StabilityLevel:    compbasemetrics.ALPHA,
    83  			DeprecatedVersion: "1.28.0",
    84  		},
    85  		[]string{"endpoint"},
    86  	)
    87  	storageSizeDescription   = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"cluster"}, nil, compbasemetrics.ALPHA, "")
    88  	storageMonitor           = &monitorCollector{monitorGetter: func() ([]Monitor, error) { return nil, nil }}
    89  	etcdEventsReceivedCounts = compbasemetrics.NewCounterVec(
    90  		&compbasemetrics.CounterOpts{
    91  			Subsystem:      "apiserver",
    92  			Name:           "storage_events_received_total",
    93  			Help:           "Number of etcd events received split by kind.",
    94  			StabilityLevel: compbasemetrics.ALPHA,
    95  		},
    96  		[]string{"resource"},
    97  	)
    98  	etcdBookmarkCounts = compbasemetrics.NewGaugeVec(
    99  		&compbasemetrics.GaugeOpts{
   100  			Name:           "etcd_bookmark_counts",
   101  			Help:           "Number of etcd bookmarks (progress notify events) split by kind.",
   102  			StabilityLevel: compbasemetrics.ALPHA,
   103  		},
   104  		[]string{"resource"},
   105  	)
   106  	etcdLeaseObjectCounts = compbasemetrics.NewHistogramVec(
   107  		&compbasemetrics.HistogramOpts{
   108  			Name:           "etcd_lease_object_counts",
   109  			Help:           "Number of objects attached to a single etcd lease.",
   110  			Buckets:        []float64{10, 50, 100, 500, 1000, 2500, 5000},
   111  			StabilityLevel: compbasemetrics.ALPHA,
   112  		},
   113  		[]string{},
   114  	)
   115  	listStorageCount = compbasemetrics.NewCounterVec(
   116  		&compbasemetrics.CounterOpts{
   117  			Name:           "apiserver_storage_list_total",
   118  			Help:           "Number of LIST requests served from storage",
   119  			StabilityLevel: compbasemetrics.ALPHA,
   120  		},
   121  		[]string{"resource"},
   122  	)
   123  	listStorageNumFetched = compbasemetrics.NewCounterVec(
   124  		&compbasemetrics.CounterOpts{
   125  			Name:           "apiserver_storage_list_fetched_objects_total",
   126  			Help:           "Number of objects read from storage in the course of serving a LIST request",
   127  			StabilityLevel: compbasemetrics.ALPHA,
   128  		},
   129  		[]string{"resource"},
   130  	)
   131  	listStorageNumSelectorEvals = compbasemetrics.NewCounterVec(
   132  		&compbasemetrics.CounterOpts{
   133  			Name:           "apiserver_storage_list_evaluated_objects_total",
   134  			Help:           "Number of objects tested in the course of serving a LIST request from storage",
   135  			StabilityLevel: compbasemetrics.ALPHA,
   136  		},
   137  		[]string{"resource"},
   138  	)
   139  	listStorageNumReturned = compbasemetrics.NewCounterVec(
   140  		&compbasemetrics.CounterOpts{
   141  			Name:           "apiserver_storage_list_returned_objects_total",
   142  			Help:           "Number of objects returned for a LIST request from storage",
   143  			StabilityLevel: compbasemetrics.ALPHA,
   144  		},
   145  		[]string{"resource"},
   146  	)
   147  	decodeErrorCounts = compbasemetrics.NewCounterVec(
   148  		&compbasemetrics.CounterOpts{
   149  			Namespace:      "apiserver",
   150  			Name:           "storage_decode_errors_total",
   151  			Help:           "Number of stored object decode errors split by object type",
   152  			StabilityLevel: compbasemetrics.ALPHA,
   153  		},
   154  		[]string{"resource"},
   155  	)
   156  )
   157  
   158  var registerMetrics sync.Once
   159  
   160  // Register all metrics.
   161  func Register() {
   162  	// Register the metrics.
   163  	registerMetrics.Do(func() {
   164  		legacyregistry.MustRegister(etcdRequestLatency)
   165  		legacyregistry.MustRegister(etcdRequestCounts)
   166  		legacyregistry.MustRegister(etcdRequestErrorCounts)
   167  		legacyregistry.MustRegister(objectCounts)
   168  		legacyregistry.MustRegister(dbTotalSize)
   169  		legacyregistry.CustomMustRegister(storageMonitor)
   170  		legacyregistry.MustRegister(etcdBookmarkCounts)
   171  		legacyregistry.MustRegister(etcdLeaseObjectCounts)
   172  		legacyregistry.MustRegister(listStorageCount)
   173  		legacyregistry.MustRegister(listStorageNumFetched)
   174  		legacyregistry.MustRegister(listStorageNumSelectorEvals)
   175  		legacyregistry.MustRegister(listStorageNumReturned)
   176  		legacyregistry.MustRegister(decodeErrorCounts)
   177  	})
   178  }
   179  
   180  // UpdateObjectCount sets the apiserver_storage_object_counts metric.
   181  func UpdateObjectCount(resourcePrefix string, count int64) {
   182  	objectCounts.WithLabelValues(resourcePrefix).Set(float64(count))
   183  }
   184  
   185  // RecordEtcdRequest updates and sets the etcd_request_duration_seconds,
   186  // etcd_request_total, etcd_request_errors_total metrics.
   187  func RecordEtcdRequest(verb, resource string, err error, startTime time.Time) {
   188  	v := []string{verb, resource}
   189  	etcdRequestLatency.WithLabelValues(v...).Observe(sinceInSeconds(startTime))
   190  	etcdRequestCounts.WithLabelValues(v...).Inc()
   191  	if err != nil {
   192  		etcdRequestErrorCounts.WithLabelValues(v...).Inc()
   193  	}
   194  }
   195  
   196  // RecordEtcdEvent updated the etcd_events_received_total metric.
   197  func RecordEtcdEvent(resource string) {
   198  	etcdEventsReceivedCounts.WithLabelValues(resource).Inc()
   199  }
   200  
   201  // RecordEtcdBookmark updates the etcd_bookmark_counts metric.
   202  func RecordEtcdBookmark(resource string) {
   203  	etcdBookmarkCounts.WithLabelValues(resource).Inc()
   204  }
   205  
   206  // RecordDecodeError sets the storage_decode_errors metrics.
   207  func RecordDecodeError(resource string) {
   208  	decodeErrorCounts.WithLabelValues(resource).Inc()
   209  }
   210  
   211  // Reset resets the etcd_request_duration_seconds metric.
   212  func Reset() {
   213  	etcdRequestLatency.Reset()
   214  }
   215  
   216  // sinceInSeconds gets the time since the specified start in seconds.
   217  //
   218  // This is a variable to facilitate testing.
   219  var sinceInSeconds = func(start time.Time) float64 {
   220  	return time.Since(start).Seconds()
   221  }
   222  
   223  // UpdateEtcdDbSize sets the etcd_db_total_size_in_bytes metric.
   224  // Deprecated: Metric etcd_db_total_size_in_bytes will be replaced with apiserver_storage_size_bytes
   225  func UpdateEtcdDbSize(ep string, size int64) {
   226  	dbTotalSize.WithLabelValues(ep).Set(float64(size))
   227  }
   228  
   229  // SetStorageMonitorGetter sets monitor getter to allow monitoring etcd stats.
   230  func SetStorageMonitorGetter(getter func() ([]Monitor, error)) {
   231  	storageMonitor.setGetter(getter)
   232  }
   233  
   234  // UpdateLeaseObjectCount sets the etcd_lease_object_counts metric.
   235  func UpdateLeaseObjectCount(count int64) {
   236  	// Currently we only store one previous lease, since all the events have the same ttl.
   237  	// See pkg/storage/etcd3/lease_manager.go
   238  	etcdLeaseObjectCounts.WithLabelValues().Observe(float64(count))
   239  }
   240  
   241  // RecordListEtcd3Metrics notes various metrics of the cost to serve a LIST request
   242  func RecordStorageListMetrics(resource string, numFetched, numEvald, numReturned int) {
   243  	listStorageCount.WithLabelValues(resource).Inc()
   244  	listStorageNumFetched.WithLabelValues(resource).Add(float64(numFetched))
   245  	listStorageNumSelectorEvals.WithLabelValues(resource).Add(float64(numEvald))
   246  	listStorageNumReturned.WithLabelValues(resource).Add(float64(numReturned))
   247  }
   248  
   249  type Monitor interface {
   250  	Monitor(ctx context.Context) (StorageMetrics, error)
   251  	Close() error
   252  }
   253  
   254  type StorageMetrics struct {
   255  	Size int64
   256  }
   257  
   258  type monitorCollector struct {
   259  	compbasemetrics.BaseStableCollector
   260  
   261  	mutex         sync.Mutex
   262  	monitorGetter func() ([]Monitor, error)
   263  }
   264  
   265  func (m *monitorCollector) setGetter(monitorGetter func() ([]Monitor, error)) {
   266  	m.mutex.Lock()
   267  	defer m.mutex.Unlock()
   268  	m.monitorGetter = monitorGetter
   269  }
   270  
   271  func (m *monitorCollector) getGetter() func() ([]Monitor, error) {
   272  	m.mutex.Lock()
   273  	defer m.mutex.Unlock()
   274  	return m.monitorGetter
   275  }
   276  
   277  // DescribeWithStability implements compbasemetrics.StableColletor
   278  func (c *monitorCollector) DescribeWithStability(ch chan<- *compbasemetrics.Desc) {
   279  	ch <- storageSizeDescription
   280  }
   281  
   282  // CollectWithStability implements compbasemetrics.StableColletor
   283  func (c *monitorCollector) CollectWithStability(ch chan<- compbasemetrics.Metric) {
   284  	monitors, err := c.getGetter()()
   285  	if err != nil {
   286  		return
   287  	}
   288  
   289  	for i, m := range monitors {
   290  		cluster := fmt.Sprintf("etcd-%d", i)
   291  
   292  		klog.V(4).InfoS("Start collecting storage metrics", "cluster", cluster)
   293  		ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   294  		metrics, err := m.Monitor(ctx)
   295  		cancel()
   296  		m.Close()
   297  		if err != nil {
   298  			klog.InfoS("Failed to get storage metrics", "cluster", cluster, "err", err)
   299  			continue
   300  		}
   301  
   302  		metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), cluster)
   303  		if err != nil {
   304  			klog.ErrorS(err, "Failed to create metric", "cluster", cluster)
   305  		}
   306  		ch <- metric
   307  	}
   308  }