k8s.io/kubernetes@v1.29.3/pkg/controller/nodelifecycle/metrics.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package nodelifecycle 18 19 import ( 20 "sync" 21 22 "k8s.io/component-base/metrics" 23 "k8s.io/component-base/metrics/legacyregistry" 24 ) 25 26 const ( 27 nodeControllerSubsystem = "node_collector" 28 zoneHealthStatisticKey = "zone_health" 29 zoneSizeKey = "zone_size" 30 zoneNoUnhealthyNodesKey = "unhealthy_nodes_in_zone" 31 evictionsTotalKey = "evictions_total" 32 33 updateNodeHealthKey = "update_node_health_duration_seconds" 34 updateAllNodesHealthKey = "update_all_nodes_health_duration_seconds" 35 ) 36 37 var ( 38 zoneHealth = metrics.NewGaugeVec( 39 &metrics.GaugeOpts{ 40 Subsystem: nodeControllerSubsystem, 41 Name: zoneHealthStatisticKey, 42 Help: "Gauge measuring percentage of healthy nodes per zone.", 43 StabilityLevel: metrics.ALPHA, 44 }, 45 []string{"zone"}, 46 ) 47 zoneSize = metrics.NewGaugeVec( 48 &metrics.GaugeOpts{ 49 Subsystem: nodeControllerSubsystem, 50 Name: zoneSizeKey, 51 Help: "Gauge measuring number of registered Nodes per zones.", 52 StabilityLevel: metrics.ALPHA, 53 }, 54 []string{"zone"}, 55 ) 56 unhealthyNodes = metrics.NewGaugeVec( 57 &metrics.GaugeOpts{ 58 Subsystem: nodeControllerSubsystem, 59 Name: zoneNoUnhealthyNodesKey, 60 Help: "Gauge measuring number of not Ready Nodes per zones.", 61 StabilityLevel: metrics.ALPHA, 62 }, 63 []string{"zone"}, 64 ) 65 evictionsTotal = metrics.NewCounterVec( 66 &metrics.CounterOpts{ 67 Subsystem: nodeControllerSubsystem, 68 Name: evictionsTotalKey, 69 Help: "Number of Node evictions that happened since current instance of NodeController started.", 70 StabilityLevel: metrics.STABLE, 71 }, 72 []string{"zone"}, 73 ) 74 75 updateNodeHealthDuration = metrics.NewHistogram( 76 &metrics.HistogramOpts{ 77 Subsystem: nodeControllerSubsystem, 78 Name: updateNodeHealthKey, 79 Help: "Duration in seconds for NodeController to update the health of a single node.", 80 Buckets: metrics.ExponentialBuckets(0.001, 4, 8), // 1ms -> ~15s 81 StabilityLevel: metrics.ALPHA, 82 }, 83 ) 84 updateAllNodesHealthDuration = metrics.NewHistogram( 85 &metrics.HistogramOpts{ 86 Subsystem: nodeControllerSubsystem, 87 Name: updateAllNodesHealthKey, 88 Help: "Duration in seconds for NodeController to update the health of all nodes.", 89 Buckets: metrics.ExponentialBuckets(0.01, 4, 8), // 10ms -> ~3m 90 StabilityLevel: metrics.ALPHA, 91 }, 92 ) 93 ) 94 95 var registerMetrics sync.Once 96 97 // Register the metrics that are to be monitored. 98 func Register() { 99 registerMetrics.Do(func() { 100 legacyregistry.MustRegister(zoneHealth) 101 legacyregistry.MustRegister(zoneSize) 102 legacyregistry.MustRegister(unhealthyNodes) 103 legacyregistry.MustRegister(evictionsTotal) 104 legacyregistry.MustRegister(updateNodeHealthDuration) 105 legacyregistry.MustRegister(updateAllNodesHealthDuration) 106 }) 107 }