k8s.io/apiserver@v0.29.3/pkg/storage/etcd3/metrics/metrics.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 compbasemetrics "k8s.io/component-base/metrics" 26 "k8s.io/component-base/metrics/legacyregistry" 27 "k8s.io/klog/v2" 28 ) 29 30 /* 31 * By default, all the following metrics are defined as falling under 32 * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes) 33 * 34 * Promoting the stability level of the metric is a responsibility of the component owner, since it 35 * involves explicitly acknowledging support for the metric across multiple releases, in accordance with 36 * the metric stability policy. 37 */ 38 var ( 39 etcdRequestLatency = compbasemetrics.NewHistogramVec( 40 &compbasemetrics.HistogramOpts{ 41 Name: "etcd_request_duration_seconds", 42 Help: "Etcd request latency in seconds for each operation and object type.", 43 // Etcd request latency in seconds for each operation and object type. 44 // This metric is used for verifying etcd api call latencies SLO 45 // keep consistent with apiserver metric 'requestLatencies' in 46 // staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go 47 Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, 48 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, 49 StabilityLevel: compbasemetrics.ALPHA, 50 }, 51 []string{"operation", "type"}, 52 ) 53 etcdRequestCounts = compbasemetrics.NewCounterVec( 54 &compbasemetrics.CounterOpts{ 55 Name: "etcd_requests_total", 56 Help: "Etcd request counts for each operation and object type.", 57 StabilityLevel: compbasemetrics.ALPHA, 58 }, 59 []string{"operation", "type"}, 60 ) 61 etcdRequestErrorCounts = compbasemetrics.NewCounterVec( 62 &compbasemetrics.CounterOpts{ 63 Name: "etcd_request_errors_total", 64 Help: "Etcd failed request counts for each operation and object type.", 65 StabilityLevel: compbasemetrics.ALPHA, 66 }, 67 []string{"operation", "type"}, 68 ) 69 objectCounts = compbasemetrics.NewGaugeVec( 70 &compbasemetrics.GaugeOpts{ 71 Name: "apiserver_storage_objects", 72 Help: "Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.", 73 StabilityLevel: compbasemetrics.STABLE, 74 }, 75 []string{"resource"}, 76 ) 77 dbTotalSize = compbasemetrics.NewGaugeVec( 78 &compbasemetrics.GaugeOpts{ 79 Subsystem: "apiserver", 80 Name: "storage_db_total_size_in_bytes", 81 Help: "Total size of the storage database file physically allocated in bytes.", 82 StabilityLevel: compbasemetrics.ALPHA, 83 DeprecatedVersion: "1.28.0", 84 }, 85 []string{"endpoint"}, 86 ) 87 storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"cluster"}, nil, compbasemetrics.ALPHA, "") 88 storageMonitor = &monitorCollector{monitorGetter: func() ([]Monitor, error) { return nil, nil }} 89 etcdEventsReceivedCounts = compbasemetrics.NewCounterVec( 90 &compbasemetrics.CounterOpts{ 91 Subsystem: "apiserver", 92 Name: "storage_events_received_total", 93 Help: "Number of etcd events received split by kind.", 94 StabilityLevel: compbasemetrics.ALPHA, 95 }, 96 []string{"resource"}, 97 ) 98 etcdBookmarkCounts = compbasemetrics.NewGaugeVec( 99 &compbasemetrics.GaugeOpts{ 100 Name: "etcd_bookmark_counts", 101 Help: "Number of etcd bookmarks (progress notify events) split by kind.", 102 StabilityLevel: compbasemetrics.ALPHA, 103 }, 104 []string{"resource"}, 105 ) 106 etcdLeaseObjectCounts = compbasemetrics.NewHistogramVec( 107 &compbasemetrics.HistogramOpts{ 108 Name: "etcd_lease_object_counts", 109 Help: "Number of objects attached to a single etcd lease.", 110 Buckets: []float64{10, 50, 100, 500, 1000, 2500, 5000}, 111 StabilityLevel: compbasemetrics.ALPHA, 112 }, 113 []string{}, 114 ) 115 listStorageCount = compbasemetrics.NewCounterVec( 116 &compbasemetrics.CounterOpts{ 117 Name: "apiserver_storage_list_total", 118 Help: "Number of LIST requests served from storage", 119 StabilityLevel: compbasemetrics.ALPHA, 120 }, 121 []string{"resource"}, 122 ) 123 listStorageNumFetched = compbasemetrics.NewCounterVec( 124 &compbasemetrics.CounterOpts{ 125 Name: "apiserver_storage_list_fetched_objects_total", 126 Help: "Number of objects read from storage in the course of serving a LIST request", 127 StabilityLevel: compbasemetrics.ALPHA, 128 }, 129 []string{"resource"}, 130 ) 131 listStorageNumSelectorEvals = compbasemetrics.NewCounterVec( 132 &compbasemetrics.CounterOpts{ 133 Name: "apiserver_storage_list_evaluated_objects_total", 134 Help: "Number of objects tested in the course of serving a LIST request from storage", 135 StabilityLevel: compbasemetrics.ALPHA, 136 }, 137 []string{"resource"}, 138 ) 139 listStorageNumReturned = compbasemetrics.NewCounterVec( 140 &compbasemetrics.CounterOpts{ 141 Name: "apiserver_storage_list_returned_objects_total", 142 Help: "Number of objects returned for a LIST request from storage", 143 StabilityLevel: compbasemetrics.ALPHA, 144 }, 145 []string{"resource"}, 146 ) 147 decodeErrorCounts = compbasemetrics.NewCounterVec( 148 &compbasemetrics.CounterOpts{ 149 Namespace: "apiserver", 150 Name: "storage_decode_errors_total", 151 Help: "Number of stored object decode errors split by object type", 152 StabilityLevel: compbasemetrics.ALPHA, 153 }, 154 []string{"resource"}, 155 ) 156 ) 157 158 var registerMetrics sync.Once 159 160 // Register all metrics. 161 func Register() { 162 // Register the metrics. 163 registerMetrics.Do(func() { 164 legacyregistry.MustRegister(etcdRequestLatency) 165 legacyregistry.MustRegister(etcdRequestCounts) 166 legacyregistry.MustRegister(etcdRequestErrorCounts) 167 legacyregistry.MustRegister(objectCounts) 168 legacyregistry.MustRegister(dbTotalSize) 169 legacyregistry.CustomMustRegister(storageMonitor) 170 legacyregistry.MustRegister(etcdBookmarkCounts) 171 legacyregistry.MustRegister(etcdLeaseObjectCounts) 172 legacyregistry.MustRegister(listStorageCount) 173 legacyregistry.MustRegister(listStorageNumFetched) 174 legacyregistry.MustRegister(listStorageNumSelectorEvals) 175 legacyregistry.MustRegister(listStorageNumReturned) 176 legacyregistry.MustRegister(decodeErrorCounts) 177 }) 178 } 179 180 // UpdateObjectCount sets the apiserver_storage_object_counts metric. 181 func UpdateObjectCount(resourcePrefix string, count int64) { 182 objectCounts.WithLabelValues(resourcePrefix).Set(float64(count)) 183 } 184 185 // RecordEtcdRequest updates and sets the etcd_request_duration_seconds, 186 // etcd_request_total, etcd_request_errors_total metrics. 187 func RecordEtcdRequest(verb, resource string, err error, startTime time.Time) { 188 v := []string{verb, resource} 189 etcdRequestLatency.WithLabelValues(v...).Observe(sinceInSeconds(startTime)) 190 etcdRequestCounts.WithLabelValues(v...).Inc() 191 if err != nil { 192 etcdRequestErrorCounts.WithLabelValues(v...).Inc() 193 } 194 } 195 196 // RecordEtcdEvent updated the etcd_events_received_total metric. 197 func RecordEtcdEvent(resource string) { 198 etcdEventsReceivedCounts.WithLabelValues(resource).Inc() 199 } 200 201 // RecordEtcdBookmark updates the etcd_bookmark_counts metric. 202 func RecordEtcdBookmark(resource string) { 203 etcdBookmarkCounts.WithLabelValues(resource).Inc() 204 } 205 206 // RecordDecodeError sets the storage_decode_errors metrics. 207 func RecordDecodeError(resource string) { 208 decodeErrorCounts.WithLabelValues(resource).Inc() 209 } 210 211 // Reset resets the etcd_request_duration_seconds metric. 212 func Reset() { 213 etcdRequestLatency.Reset() 214 } 215 216 // sinceInSeconds gets the time since the specified start in seconds. 217 // 218 // This is a variable to facilitate testing. 219 var sinceInSeconds = func(start time.Time) float64 { 220 return time.Since(start).Seconds() 221 } 222 223 // UpdateEtcdDbSize sets the etcd_db_total_size_in_bytes metric. 224 // Deprecated: Metric etcd_db_total_size_in_bytes will be replaced with apiserver_storage_size_bytes 225 func UpdateEtcdDbSize(ep string, size int64) { 226 dbTotalSize.WithLabelValues(ep).Set(float64(size)) 227 } 228 229 // SetStorageMonitorGetter sets monitor getter to allow monitoring etcd stats. 230 func SetStorageMonitorGetter(getter func() ([]Monitor, error)) { 231 storageMonitor.setGetter(getter) 232 } 233 234 // UpdateLeaseObjectCount sets the etcd_lease_object_counts metric. 235 func UpdateLeaseObjectCount(count int64) { 236 // Currently we only store one previous lease, since all the events have the same ttl. 237 // See pkg/storage/etcd3/lease_manager.go 238 etcdLeaseObjectCounts.WithLabelValues().Observe(float64(count)) 239 } 240 241 // RecordListEtcd3Metrics notes various metrics of the cost to serve a LIST request 242 func RecordStorageListMetrics(resource string, numFetched, numEvald, numReturned int) { 243 listStorageCount.WithLabelValues(resource).Inc() 244 listStorageNumFetched.WithLabelValues(resource).Add(float64(numFetched)) 245 listStorageNumSelectorEvals.WithLabelValues(resource).Add(float64(numEvald)) 246 listStorageNumReturned.WithLabelValues(resource).Add(float64(numReturned)) 247 } 248 249 type Monitor interface { 250 Monitor(ctx context.Context) (StorageMetrics, error) 251 Close() error 252 } 253 254 type StorageMetrics struct { 255 Size int64 256 } 257 258 type monitorCollector struct { 259 compbasemetrics.BaseStableCollector 260 261 mutex sync.Mutex 262 monitorGetter func() ([]Monitor, error) 263 } 264 265 func (m *monitorCollector) setGetter(monitorGetter func() ([]Monitor, error)) { 266 m.mutex.Lock() 267 defer m.mutex.Unlock() 268 m.monitorGetter = monitorGetter 269 } 270 271 func (m *monitorCollector) getGetter() func() ([]Monitor, error) { 272 m.mutex.Lock() 273 defer m.mutex.Unlock() 274 return m.monitorGetter 275 } 276 277 // DescribeWithStability implements compbasemetrics.StableColletor 278 func (c *monitorCollector) DescribeWithStability(ch chan<- *compbasemetrics.Desc) { 279 ch <- storageSizeDescription 280 } 281 282 // CollectWithStability implements compbasemetrics.StableColletor 283 func (c *monitorCollector) CollectWithStability(ch chan<- compbasemetrics.Metric) { 284 monitors, err := c.getGetter()() 285 if err != nil { 286 return 287 } 288 289 for i, m := range monitors { 290 cluster := fmt.Sprintf("etcd-%d", i) 291 292 klog.V(4).InfoS("Start collecting storage metrics", "cluster", cluster) 293 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 294 metrics, err := m.Monitor(ctx) 295 cancel() 296 m.Close() 297 if err != nil { 298 klog.InfoS("Failed to get storage metrics", "cluster", cluster, "err", err) 299 continue 300 } 301 302 metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), cluster) 303 if err != nil { 304 klog.ErrorS(err, "Failed to create metric", "cluster", cluster) 305 } 306 ch <- metric 307 } 308 }