k8s.io/apiserver@v0.31.1/pkg/storage/etcd3/metrics/metrics.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 compbasemetrics "k8s.io/component-base/metrics" 26 "k8s.io/component-base/metrics/legacyregistry" 27 "k8s.io/klog/v2" 28 ) 29 30 /* 31 * By default, all the following metrics are defined as falling under 32 * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes) 33 * 34 * Promoting the stability level of the metric is a responsibility of the component owner, since it 35 * involves explicitly acknowledging support for the metric across multiple releases, in accordance with 36 * the metric stability policy. 37 */ 38 var ( 39 etcdRequestLatency = compbasemetrics.NewHistogramVec( 40 &compbasemetrics.HistogramOpts{ 41 Name: "etcd_request_duration_seconds", 42 Help: "Etcd request latency in seconds for each operation and object type.", 43 // Etcd request latency in seconds for each operation and object type. 44 // This metric is used for verifying etcd api call latencies SLO 45 // keep consistent with apiserver metric 'requestLatencies' in 46 // staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go 47 Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, 48 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, 49 StabilityLevel: compbasemetrics.ALPHA, 50 }, 51 []string{"operation", "type"}, 52 ) 53 etcdRequestCounts = compbasemetrics.NewCounterVec( 54 &compbasemetrics.CounterOpts{ 55 Name: "etcd_requests_total", 56 Help: "Etcd request counts for each operation and object type.", 57 StabilityLevel: compbasemetrics.ALPHA, 58 }, 59 []string{"operation", "type"}, 60 ) 61 etcdRequestErrorCounts = compbasemetrics.NewCounterVec( 62 &compbasemetrics.CounterOpts{ 63 Name: "etcd_request_errors_total", 64 Help: "Etcd failed request counts for each operation and object type.", 65 StabilityLevel: compbasemetrics.ALPHA, 66 }, 67 []string{"operation", "type"}, 68 ) 69 objectCounts = compbasemetrics.NewGaugeVec( 70 &compbasemetrics.GaugeOpts{ 71 Name: "apiserver_storage_objects", 72 Help: "Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.", 73 StabilityLevel: compbasemetrics.STABLE, 74 }, 75 []string{"resource"}, 76 ) 77 dbTotalSize = compbasemetrics.NewGaugeVec( 78 &compbasemetrics.GaugeOpts{ 79 Subsystem: "apiserver", 80 Name: "storage_db_total_size_in_bytes", 81 Help: "Total size of the storage database file physically allocated in bytes.", 82 StabilityLevel: compbasemetrics.ALPHA, 83 DeprecatedVersion: "1.28.0", 84 }, 85 []string{"endpoint"}, 86 ) 87 storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"storage_cluster_id"}, nil, compbasemetrics.STABLE, "") 88 storageMonitor = &monitorCollector{monitorGetter: func() ([]Monitor, error) { return nil, nil }} 89 etcdEventsReceivedCounts = compbasemetrics.NewCounterVec( 90 &compbasemetrics.CounterOpts{ 91 Subsystem: "apiserver", 92 Name: "storage_events_received_total", 93 Help: "Number of etcd events received split by kind.", 94 StabilityLevel: compbasemetrics.ALPHA, 95 }, 96 []string{"resource"}, 97 ) 98 etcdBookmarkCounts = compbasemetrics.NewGaugeVec( 99 &compbasemetrics.GaugeOpts{ 100 Name: "etcd_bookmark_counts", 101 Help: "Number of etcd bookmarks (progress notify events) split by kind.", 102 StabilityLevel: compbasemetrics.ALPHA, 103 }, 104 []string{"resource"}, 105 ) 106 etcdLeaseObjectCounts = compbasemetrics.NewHistogramVec( 107 &compbasemetrics.HistogramOpts{ 108 Name: "etcd_lease_object_counts", 109 Help: "Number of objects attached to a single etcd lease.", 110 Buckets: []float64{10, 50, 100, 500, 1000, 2500, 5000}, 111 StabilityLevel: compbasemetrics.ALPHA, 112 }, 113 []string{}, 114 ) 115 listStorageCount = compbasemetrics.NewCounterVec( 116 &compbasemetrics.CounterOpts{ 117 Name: "apiserver_storage_list_total", 118 Help: "Number of LIST requests served from storage", 119 StabilityLevel: compbasemetrics.ALPHA, 120 }, 121 []string{"resource"}, 122 ) 123 listStorageNumFetched = compbasemetrics.NewCounterVec( 124 &compbasemetrics.CounterOpts{ 125 Name: "apiserver_storage_list_fetched_objects_total", 126 Help: "Number of objects read from storage in the course of serving a LIST request", 127 StabilityLevel: compbasemetrics.ALPHA, 128 }, 129 []string{"resource"}, 130 ) 131 listStorageNumSelectorEvals = compbasemetrics.NewCounterVec( 132 &compbasemetrics.CounterOpts{ 133 Name: "apiserver_storage_list_evaluated_objects_total", 134 Help: "Number of objects tested in the course of serving a LIST request from storage", 135 StabilityLevel: compbasemetrics.ALPHA, 136 }, 137 []string{"resource"}, 138 ) 139 listStorageNumReturned = compbasemetrics.NewCounterVec( 140 &compbasemetrics.CounterOpts{ 141 Name: "apiserver_storage_list_returned_objects_total", 142 Help: "Number of objects returned for a LIST request from storage", 143 StabilityLevel: compbasemetrics.ALPHA, 144 }, 145 []string{"resource"}, 146 ) 147 decodeErrorCounts = compbasemetrics.NewCounterVec( 148 &compbasemetrics.CounterOpts{ 149 Namespace: "apiserver", 150 Name: "storage_decode_errors_total", 151 Help: "Number of stored object decode errors split by object type", 152 StabilityLevel: compbasemetrics.ALPHA, 153 }, 154 []string{"resource"}, 155 ) 156 ) 157 158 var registerMetrics sync.Once 159 160 // Register all metrics. 161 func Register() { 162 // Register the metrics. 163 registerMetrics.Do(func() { 164 legacyregistry.MustRegister(etcdRequestLatency) 165 legacyregistry.MustRegister(etcdRequestCounts) 166 legacyregistry.MustRegister(etcdRequestErrorCounts) 167 legacyregistry.MustRegister(objectCounts) 168 legacyregistry.MustRegister(dbTotalSize) 169 legacyregistry.CustomMustRegister(storageMonitor) 170 legacyregistry.MustRegister(etcdEventsReceivedCounts) 171 legacyregistry.MustRegister(etcdBookmarkCounts) 172 legacyregistry.MustRegister(etcdLeaseObjectCounts) 173 legacyregistry.MustRegister(listStorageCount) 174 legacyregistry.MustRegister(listStorageNumFetched) 175 legacyregistry.MustRegister(listStorageNumSelectorEvals) 176 legacyregistry.MustRegister(listStorageNumReturned) 177 legacyregistry.MustRegister(decodeErrorCounts) 178 }) 179 } 180 181 // UpdateObjectCount sets the apiserver_storage_object_counts metric. 182 func UpdateObjectCount(resourcePrefix string, count int64) { 183 objectCounts.WithLabelValues(resourcePrefix).Set(float64(count)) 184 } 185 186 // RecordEtcdRequest updates and sets the etcd_request_duration_seconds, 187 // etcd_request_total, etcd_request_errors_total metrics. 188 func RecordEtcdRequest(verb, resource string, err error, startTime time.Time) { 189 v := []string{verb, resource} 190 etcdRequestLatency.WithLabelValues(v...).Observe(sinceInSeconds(startTime)) 191 etcdRequestCounts.WithLabelValues(v...).Inc() 192 if err != nil { 193 etcdRequestErrorCounts.WithLabelValues(v...).Inc() 194 } 195 } 196 197 // RecordEtcdEvent updated the etcd_events_received_total metric. 198 func RecordEtcdEvent(resource string) { 199 etcdEventsReceivedCounts.WithLabelValues(resource).Inc() 200 } 201 202 // RecordEtcdBookmark updates the etcd_bookmark_counts metric. 203 func RecordEtcdBookmark(resource string) { 204 etcdBookmarkCounts.WithLabelValues(resource).Inc() 205 } 206 207 // RecordDecodeError sets the storage_decode_errors metrics. 208 func RecordDecodeError(resource string) { 209 decodeErrorCounts.WithLabelValues(resource).Inc() 210 } 211 212 // Reset resets the etcd_request_duration_seconds metric. 213 func Reset() { 214 etcdRequestLatency.Reset() 215 } 216 217 // sinceInSeconds gets the time since the specified start in seconds. 218 // 219 // This is a variable to facilitate testing. 220 var sinceInSeconds = func(start time.Time) float64 { 221 return time.Since(start).Seconds() 222 } 223 224 // UpdateEtcdDbSize sets the etcd_db_total_size_in_bytes metric. 225 // Deprecated: Metric etcd_db_total_size_in_bytes will be replaced with apiserver_storage_size_bytes 226 func UpdateEtcdDbSize(ep string, size int64) { 227 dbTotalSize.WithLabelValues(ep).Set(float64(size)) 228 } 229 230 // SetStorageMonitorGetter sets monitor getter to allow monitoring etcd stats. 231 func SetStorageMonitorGetter(getter func() ([]Monitor, error)) { 232 storageMonitor.setGetter(getter) 233 } 234 235 // UpdateLeaseObjectCount sets the etcd_lease_object_counts metric. 236 func UpdateLeaseObjectCount(count int64) { 237 // Currently we only store one previous lease, since all the events have the same ttl. 238 // See pkg/storage/etcd3/lease_manager.go 239 etcdLeaseObjectCounts.WithLabelValues().Observe(float64(count)) 240 } 241 242 // RecordListEtcd3Metrics notes various metrics of the cost to serve a LIST request 243 func RecordStorageListMetrics(resource string, numFetched, numEvald, numReturned int) { 244 listStorageCount.WithLabelValues(resource).Inc() 245 listStorageNumFetched.WithLabelValues(resource).Add(float64(numFetched)) 246 listStorageNumSelectorEvals.WithLabelValues(resource).Add(float64(numEvald)) 247 listStorageNumReturned.WithLabelValues(resource).Add(float64(numReturned)) 248 } 249 250 type Monitor interface { 251 Monitor(ctx context.Context) (StorageMetrics, error) 252 Close() error 253 } 254 255 type StorageMetrics struct { 256 Size int64 257 } 258 259 type monitorCollector struct { 260 compbasemetrics.BaseStableCollector 261 262 mutex sync.Mutex 263 monitorGetter func() ([]Monitor, error) 264 } 265 266 func (m *monitorCollector) setGetter(monitorGetter func() ([]Monitor, error)) { 267 m.mutex.Lock() 268 defer m.mutex.Unlock() 269 m.monitorGetter = monitorGetter 270 } 271 272 func (m *monitorCollector) getGetter() func() ([]Monitor, error) { 273 m.mutex.Lock() 274 defer m.mutex.Unlock() 275 return m.monitorGetter 276 } 277 278 // DescribeWithStability implements compbasemetrics.StableColletor 279 func (c *monitorCollector) DescribeWithStability(ch chan<- *compbasemetrics.Desc) { 280 ch <- storageSizeDescription 281 } 282 283 // CollectWithStability implements compbasemetrics.StableColletor 284 func (c *monitorCollector) CollectWithStability(ch chan<- compbasemetrics.Metric) { 285 monitors, err := c.getGetter()() 286 if err != nil { 287 return 288 } 289 290 for i, m := range monitors { 291 storageClusterID := fmt.Sprintf("etcd-%d", i) 292 293 klog.V(4).InfoS("Start collecting storage metrics", "storage_cluster_id", storageClusterID) 294 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 295 metrics, err := m.Monitor(ctx) 296 cancel() 297 m.Close() 298 if err != nil { 299 klog.InfoS("Failed to get storage metrics", "storage_cluster_id", storageClusterID, "err", err) 300 continue 301 } 302 303 metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), storageClusterID) 304 if err != nil { 305 klog.ErrorS(err, "Failed to create metric", "storage_cluster_id", storageClusterID) 306 } 307 ch <- metric 308 } 309 }