k8s.io/kubernetes@v1.29.3/pkg/kubelet/metrics/collectors/resource_metrics.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package collectors 18 19 import ( 20 "context" 21 "time" 22 23 "k8s.io/component-base/metrics" 24 "k8s.io/klog/v2" 25 summary "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 26 "k8s.io/kubernetes/pkg/kubelet/server/stats" 27 ) 28 29 var ( 30 nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total", 31 "Cumulative cpu time consumed by the node in core-seconds", 32 nil, 33 nil, 34 metrics.STABLE, 35 "") 36 37 nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes", 38 "Current working set of the node in bytes", 39 nil, 40 nil, 41 metrics.STABLE, 42 "") 43 44 nodeSwapUsageDesc = metrics.NewDesc("node_swap_usage_bytes", 45 "Current swap usage of the node in bytes. Reported only on non-windows systems", 46 nil, 47 nil, 48 metrics.ALPHA, 49 "") 50 51 containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total", 52 "Cumulative cpu time consumed by the container in core-seconds", 53 []string{"container", "pod", "namespace"}, 54 nil, 55 metrics.STABLE, 56 "") 57 58 containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes", 59 "Current working set of the container in bytes", 60 []string{"container", "pod", "namespace"}, 61 nil, 62 metrics.STABLE, 63 "") 64 65 containerSwapUsageDesc = metrics.NewDesc("container_swap_usage_bytes", 66 "Current amount of the container swap usage in bytes. Reported only on non-windows systems", 67 []string{"container", "pod", "namespace"}, 68 nil, 69 metrics.ALPHA, 70 "") 71 72 podCPUUsageDesc = metrics.NewDesc("pod_cpu_usage_seconds_total", 73 "Cumulative cpu time consumed by the pod in core-seconds", 74 []string{"pod", "namespace"}, 75 nil, 76 metrics.STABLE, 77 "") 78 79 podMemoryUsageDesc = metrics.NewDesc("pod_memory_working_set_bytes", 80 "Current working set of the pod in bytes", 81 []string{"pod", "namespace"}, 82 nil, 83 metrics.STABLE, 84 "") 85 86 podSwapUsageDesc = metrics.NewDesc("pod_swap_usage_bytes", 87 "Current amount of the pod swap usage in bytes. Reported only on non-windows systems", 88 []string{"pod", "namespace"}, 89 nil, 90 metrics.ALPHA, 91 "") 92 93 resourceScrapeResultDesc = metrics.NewDesc("scrape_error", 94 "1 if there was an error while getting container metrics, 0 otherwise", 95 nil, 96 nil, 97 metrics.ALPHA, 98 "1.29.0") 99 100 resourceScrapeErrorResultDesc = metrics.NewDesc("resource_scrape_error", 101 "1 if there was an error while getting container metrics, 0 otherwise", 102 nil, 103 nil, 104 metrics.STABLE, 105 "") 106 107 containerStartTimeDesc = metrics.NewDesc("container_start_time_seconds", 108 "Start time of the container since unix epoch in seconds", 109 []string{"container", "pod", "namespace"}, 110 nil, 111 metrics.STABLE, 112 "") 113 ) 114 115 // NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics 116 func NewResourceMetricsCollector(provider stats.SummaryProvider) metrics.StableCollector { 117 return &resourceMetricsCollector{ 118 provider: provider, 119 } 120 } 121 122 type resourceMetricsCollector struct { 123 metrics.BaseStableCollector 124 125 provider stats.SummaryProvider 126 } 127 128 // Check if resourceMetricsCollector implements necessary interface 129 var _ metrics.StableCollector = &resourceMetricsCollector{} 130 131 // DescribeWithStability implements metrics.StableCollector 132 func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) { 133 ch <- nodeCPUUsageDesc 134 ch <- nodeMemoryUsageDesc 135 ch <- nodeSwapUsageDesc 136 ch <- containerStartTimeDesc 137 ch <- containerCPUUsageDesc 138 ch <- containerMemoryUsageDesc 139 ch <- containerSwapUsageDesc 140 ch <- podCPUUsageDesc 141 ch <- podMemoryUsageDesc 142 ch <- podSwapUsageDesc 143 ch <- resourceScrapeResultDesc 144 ch <- resourceScrapeErrorResultDesc 145 } 146 147 // CollectWithStability implements metrics.StableCollector 148 // Since new containers are frequently created and removed, using the Gauge would 149 // leak metric collectors for containers or pods that no longer exist. Instead, implement 150 // custom collector in a way that only collects metrics for active containers. 151 func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) { 152 ctx := context.Background() 153 var errorCount float64 154 defer func() { 155 ch <- metrics.NewLazyConstMetric(resourceScrapeResultDesc, metrics.GaugeValue, errorCount) 156 ch <- metrics.NewLazyConstMetric(resourceScrapeErrorResultDesc, metrics.GaugeValue, errorCount) 157 }() 158 statsSummary, err := rc.provider.GetCPUAndMemoryStats(ctx) 159 if err != nil { 160 errorCount = 1 161 klog.ErrorS(err, "Error getting summary for resourceMetric prometheus endpoint") 162 return 163 } 164 165 rc.collectNodeCPUMetrics(ch, statsSummary.Node) 166 rc.collectNodeMemoryMetrics(ch, statsSummary.Node) 167 rc.collectNodeSwapMetrics(ch, statsSummary.Node) 168 169 for _, pod := range statsSummary.Pods { 170 for _, container := range pod.Containers { 171 rc.collectContainerStartTime(ch, pod, container) 172 rc.collectContainerCPUMetrics(ch, pod, container) 173 rc.collectContainerMemoryMetrics(ch, pod, container) 174 rc.collectContainerSwapMetrics(ch, pod, container) 175 } 176 rc.collectPodCPUMetrics(ch, pod) 177 rc.collectPodMemoryMetrics(ch, pod) 178 rc.collectPodSwapMetrics(ch, pod) 179 } 180 } 181 182 func (rc *resourceMetricsCollector) collectNodeCPUMetrics(ch chan<- metrics.Metric, s summary.NodeStats) { 183 if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil { 184 return 185 } 186 187 ch <- metrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time, 188 metrics.NewLazyConstMetric(nodeCPUUsageDesc, metrics.CounterValue, float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second))) 189 } 190 191 func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- metrics.Metric, s summary.NodeStats) { 192 if s.Memory == nil || s.Memory.WorkingSetBytes == nil { 193 return 194 } 195 196 ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time, 197 metrics.NewLazyConstMetric(nodeMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes))) 198 } 199 200 func (rc *resourceMetricsCollector) collectNodeSwapMetrics(ch chan<- metrics.Metric, s summary.NodeStats) { 201 if s.Swap == nil || s.Swap.SwapUsageBytes == nil { 202 return 203 } 204 205 ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time, 206 metrics.NewLazyConstMetric(nodeSwapUsageDesc, metrics.GaugeValue, float64(*s.Swap.SwapUsageBytes))) 207 } 208 209 func (rc *resourceMetricsCollector) collectContainerStartTime(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) { 210 if s.StartTime.Unix() <= 0 { 211 return 212 } 213 214 ch <- metrics.NewLazyConstMetric(containerStartTimeDesc, metrics.GaugeValue, float64(s.StartTime.UnixNano())/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace) 215 } 216 217 func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) { 218 if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil { 219 return 220 } 221 222 ch <- metrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time, 223 metrics.NewLazyConstMetric(containerCPUUsageDesc, metrics.CounterValue, 224 float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace)) 225 } 226 227 func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) { 228 if s.Memory == nil || s.Memory.WorkingSetBytes == nil { 229 return 230 } 231 232 ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time, 233 metrics.NewLazyConstMetric(containerMemoryUsageDesc, metrics.GaugeValue, 234 float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace)) 235 } 236 237 func (rc *resourceMetricsCollector) collectContainerSwapMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) { 238 if s.Swap == nil || s.Swap.SwapUsageBytes == nil { 239 return 240 } 241 242 ch <- metrics.NewLazyMetricWithTimestamp(s.Swap.Time.Time, 243 metrics.NewLazyConstMetric(containerSwapUsageDesc, metrics.GaugeValue, 244 float64(*s.Swap.SwapUsageBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace)) 245 } 246 247 func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats) { 248 if pod.CPU == nil || pod.CPU.UsageCoreNanoSeconds == nil { 249 return 250 } 251 252 ch <- metrics.NewLazyMetricWithTimestamp(pod.CPU.Time.Time, 253 metrics.NewLazyConstMetric(podCPUUsageDesc, metrics.CounterValue, 254 float64(*pod.CPU.UsageCoreNanoSeconds)/float64(time.Second), pod.PodRef.Name, pod.PodRef.Namespace)) 255 } 256 257 func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats) { 258 if pod.Memory == nil || pod.Memory.WorkingSetBytes == nil { 259 return 260 } 261 262 ch <- metrics.NewLazyMetricWithTimestamp(pod.Memory.Time.Time, 263 metrics.NewLazyConstMetric(podMemoryUsageDesc, metrics.GaugeValue, 264 float64(*pod.Memory.WorkingSetBytes), pod.PodRef.Name, pod.PodRef.Namespace)) 265 } 266 267 func (rc *resourceMetricsCollector) collectPodSwapMetrics(ch chan<- metrics.Metric, pod summary.PodStats) { 268 if pod.Swap == nil || pod.Swap.SwapUsageBytes == nil { 269 return 270 } 271 272 ch <- metrics.NewLazyMetricWithTimestamp(pod.Swap.Time.Time, 273 metrics.NewLazyConstMetric(podSwapUsageDesc, metrics.GaugeValue, 274 float64(*pod.Swap.SwapUsageBytes), pod.PodRef.Name, pod.PodRef.Namespace)) 275 }