github.com/netdata/go.d.plugin@v0.58.1/modules/k8s_kubelet/collect.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package k8s_kubelet 4 5 import ( 6 "math" 7 8 mtx "github.com/netdata/go.d.plugin/pkg/metrics" 9 "github.com/netdata/go.d.plugin/pkg/prometheus" 10 "github.com/netdata/go.d.plugin/pkg/stm" 11 12 "github.com/netdata/go.d.plugin/agent/module" 13 ) 14 15 func (k *Kubelet) collect() (map[string]int64, error) { 16 raw, err := k.prom.ScrapeSeries() 17 18 if err != nil { 19 return nil, err 20 } 21 22 mx := newMetrics() 23 24 k.collectToken(raw, mx) 25 k.collectRESTClientHTTPRequests(raw, mx) 26 k.collectAPIServer(raw, mx) 27 k.collectKubelet(raw, mx) 28 k.collectVolumeManager(raw, mx) 29 30 return stm.ToMap(mx), nil 31 } 32 33 func (k *Kubelet) collectLogsUsagePerPod(raw prometheus.Series, mx *metrics) { 34 chart := k.charts.Get("kubelet_pods_log_filesystem_used_bytes") 35 seen := make(map[string]bool) 36 37 for _, metric := range raw.FindByName("kubelet_container_log_filesystem_used_bytes") { 38 pod := metric.Labels.Get("pod") 39 namespace := metric.Labels.Get("namespace") 40 41 if pod == "" || namespace == "" { 42 continue 43 } 44 45 key := namespace + "_" + pod 46 dimID := "kubelet_log_file_system_usage_" + key 47 48 if !chart.HasDim(dimID) { 49 _ = chart.AddDim(&Dim{ID: dimID, Name: pod}) 50 chart.MarkNotCreated() 51 } 52 53 seen[dimID] = true 54 v := mx.Kubelet.PodLogFileSystemUsage[key] 55 v.Add(metric.Value) 56 mx.Kubelet.PodLogFileSystemUsage[key] = v 57 } 58 59 for _, dim := range chart.Dims { 60 if seen[dim.ID] { 61 continue 62 } 63 _ = chart.MarkDimRemove(dim.ID, false) 64 chart.MarkNotCreated() 65 } 66 } 67 68 func (k *Kubelet) collectVolumeManager(raw prometheus.Series, mx *metrics) { 69 vmPlugins := make(map[string]*volumeManagerPlugin) 70 71 for _, metric := range raw.FindByName("volume_manager_total_volumes") { 72 pluginName := metric.Labels.Get("plugin_name") 73 state := metric.Labels.Get("state") 74 75 if !k.collectedVMPlugins[pluginName] { 76 _ = k.charts.Add(newVolumeManagerChart(pluginName)) 77 k.collectedVMPlugins[pluginName] = true 78 } 79 if _, ok := vmPlugins[pluginName]; !ok { 80 vmPlugins[pluginName] = &volumeManagerPlugin{} 81 } 82 83 switch state { 84 case "actual_state_of_world": 85 vmPlugins[pluginName].State.Actual.Set(metric.Value) 86 case "desired_state_of_world": 87 vmPlugins[pluginName].State.Desired.Set(metric.Value) 88 } 89 } 90 91 mx.VolumeManager.Plugins = vmPlugins 92 } 93 94 func (k *Kubelet) collectKubelet(raw prometheus.Series, mx *metrics) { 95 value := raw.FindByName("kubelet_node_config_error").Max() 96 mx.Kubelet.NodeConfigError.Set(value) 97 98 /* 99 # HELP kubelet_running_containers [ALPHA] Number of containers currently running 100 # TYPE kubelet_running_containers gauge 101 kubelet_running_containers{container_state="created"} 1 102 kubelet_running_containers{container_state="exited"} 13 103 kubelet_running_containers{container_state="running"} 42 104 kubelet_running_containers{container_state="unknown"} 1 105 */ 106 107 ms := raw.FindByName("kubelet_running_container_count") 108 value = ms.Max() 109 if ms.Len() == 0 { 110 for _, m := range raw.FindByName("kubelet_running_containers") { 111 if m.Labels.Get("container_state") == "running" { 112 value = m.Value 113 break 114 } 115 } 116 } 117 mx.Kubelet.RunningContainerCount.Set(value) 118 119 /* 120 # HELP kubelet_running_pods [ALPHA] Number of pods currently running 121 # TYPE kubelet_running_pods gauge 122 kubelet_running_pods 37 123 */ 124 value = raw.FindByNames("kubelet_running_pod_count", "kubelet_running_pods").Max() 125 mx.Kubelet.RunningPodCount.Set(value) 126 127 k.collectRuntimeOperations(raw, mx) 128 k.collectRuntimeOperationsErrors(raw, mx) 129 k.collectDockerOperations(raw, mx) 130 k.collectDockerOperationsErrors(raw, mx) 131 k.collectPLEGRelisting(raw, mx) 132 k.collectLogsUsagePerPod(raw, mx) 133 } 134 135 func (k *Kubelet) collectAPIServer(raw prometheus.Series, mx *metrics) { 136 value := raw.FindByName("apiserver_audit_requests_rejected_total").Max() 137 mx.APIServer.Audit.Requests.Rejected.Set(value) 138 139 value = raw.FindByName("apiserver_storage_data_key_generation_failures_total").Max() 140 mx.APIServer.Storage.DataKeyGeneration.Failures.Set(value) 141 142 value = raw.FindByName("apiserver_storage_envelope_transformation_cache_misses_total").Max() 143 mx.APIServer.Storage.EnvelopeTransformation.CacheMisses.Set(value) 144 145 k.collectStorageDataKeyGenerationLatencies(raw, mx) 146 } 147 148 func (k *Kubelet) collectToken(raw prometheus.Series, mx *metrics) { 149 value := raw.FindByName("get_token_count").Max() 150 mx.Token.Count.Set(value) 151 152 value = raw.FindByName("get_token_fail_count").Max() 153 mx.Token.FailCount.Set(value) 154 } 155 156 func (k *Kubelet) collectPLEGRelisting(raw prometheus.Series, mx *metrics) { 157 // Summary 158 for _, metric := range raw.FindByName("kubelet_pleg_relist_interval_microseconds") { 159 if math.IsNaN(metric.Value) { 160 continue 161 } 162 quantile := metric.Labels.Get("quantile") 163 switch quantile { 164 case "0.5": 165 mx.Kubelet.PLEG.Relist.Interval.Quantile05.Set(metric.Value) 166 case "0.9": 167 mx.Kubelet.PLEG.Relist.Interval.Quantile09.Set(metric.Value) 168 case "0.99": 169 mx.Kubelet.PLEG.Relist.Interval.Quantile099.Set(metric.Value) 170 } 171 } 172 for _, metric := range raw.FindByName("kubelet_pleg_relist_latency_microseconds") { 173 if math.IsNaN(metric.Value) { 174 continue 175 } 176 quantile := metric.Labels.Get("quantile") 177 switch quantile { 178 case "0.5": 179 mx.Kubelet.PLEG.Relist.Latency.Quantile05.Set(metric.Value) 180 case "0.9": 181 mx.Kubelet.PLEG.Relist.Latency.Quantile09.Set(metric.Value) 182 case "0.99": 183 mx.Kubelet.PLEG.Relist.Latency.Quantile099.Set(metric.Value) 184 } 185 } 186 } 187 188 func (k *Kubelet) collectStorageDataKeyGenerationLatencies(raw prometheus.Series, mx *metrics) { 189 latencies := &mx.APIServer.Storage.DataKeyGeneration.Latencies 190 metricName := "apiserver_storage_data_key_generation_latencies_microseconds_bucket" 191 192 for _, metric := range raw.FindByName(metricName) { 193 value := metric.Value 194 bucket := metric.Labels.Get("le") 195 switch bucket { 196 case "5": 197 latencies.LE5.Set(value) 198 case "10": 199 latencies.LE10.Set(value) 200 case "20": 201 latencies.LE20.Set(value) 202 case "40": 203 latencies.LE40.Set(value) 204 case "80": 205 latencies.LE80.Set(value) 206 case "160": 207 latencies.LE160.Set(value) 208 case "320": 209 latencies.LE320.Set(value) 210 case "640": 211 latencies.LE640.Set(value) 212 case "1280": 213 latencies.LE1280.Set(value) 214 case "2560": 215 latencies.LE2560.Set(value) 216 case "5120": 217 latencies.LE5120.Set(value) 218 case "10240": 219 latencies.LE10240.Set(value) 220 case "20480": 221 latencies.LE20480.Set(value) 222 case "40960": 223 latencies.LE40960.Set(value) 224 case "+Inf": 225 latencies.LEInf.Set(value) 226 } 227 } 228 229 latencies.LEInf.Sub(latencies.LE40960.Value()) 230 latencies.LE40960.Sub(latencies.LE20480.Value()) 231 latencies.LE20480.Sub(latencies.LE10240.Value()) 232 latencies.LE10240.Sub(latencies.LE5120.Value()) 233 latencies.LE5120.Sub(latencies.LE2560.Value()) 234 latencies.LE2560.Sub(latencies.LE1280.Value()) 235 latencies.LE1280.Sub(latencies.LE640.Value()) 236 latencies.LE640.Sub(latencies.LE320.Value()) 237 latencies.LE320.Sub(latencies.LE160.Value()) 238 latencies.LE160.Sub(latencies.LE80.Value()) 239 latencies.LE80.Sub(latencies.LE40.Value()) 240 latencies.LE40.Sub(latencies.LE20.Value()) 241 latencies.LE20.Sub(latencies.LE10.Value()) 242 latencies.LE10.Sub(latencies.LE5.Value()) 243 } 244 245 func (k *Kubelet) collectRESTClientHTTPRequests(raw prometheus.Series, mx *metrics) { 246 metricName := "rest_client_requests_total" 247 chart := k.charts.Get("rest_client_requests_by_code") 248 249 for _, metric := range raw.FindByName(metricName) { 250 code := metric.Labels.Get("code") 251 if code == "" { 252 continue 253 } 254 dimID := "rest_client_requests_" + code 255 if !chart.HasDim(dimID) { 256 _ = chart.AddDim(&Dim{ID: dimID, Name: code, Algo: module.Incremental}) 257 chart.MarkNotCreated() 258 } 259 mx.RESTClient.Requests.ByStatusCode[code] = mtx.Gauge(metric.Value) 260 } 261 262 chart = k.charts.Get("rest_client_requests_by_method") 263 264 for _, metric := range raw.FindByName(metricName) { 265 method := metric.Labels.Get("method") 266 if method == "" { 267 continue 268 } 269 dimID := "rest_client_requests_" + method 270 if !chart.HasDim(dimID) { 271 _ = chart.AddDim(&Dim{ID: dimID, Name: method, Algo: module.Incremental}) 272 chart.MarkNotCreated() 273 } 274 mx.RESTClient.Requests.ByMethod[method] = mtx.Gauge(metric.Value) 275 } 276 } 277 278 func (k *Kubelet) collectRuntimeOperations(raw prometheus.Series, mx *metrics) { 279 chart := k.charts.Get("kubelet_runtime_operations") 280 281 // kubelet_runtime_operations_total 282 for _, metric := range raw.FindByNames("kubelet_runtime_operations", "kubelet_runtime_operations_total") { 283 opType := metric.Labels.Get("operation_type") 284 if opType == "" { 285 continue 286 } 287 dimID := "kubelet_runtime_operations_" + opType 288 if !chart.HasDim(dimID) { 289 _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) 290 chart.MarkNotCreated() 291 } 292 mx.Kubelet.Runtime.Operations[opType] = mtx.Gauge(metric.Value) 293 } 294 } 295 296 func (k *Kubelet) collectRuntimeOperationsErrors(raw prometheus.Series, mx *metrics) { 297 chart := k.charts.Get("kubelet_runtime_operations_errors") 298 299 // kubelet_runtime_operations_errors_total 300 for _, metric := range raw.FindByNames("kubelet_runtime_operations_errors", "kubelet_runtime_operations_errors_total") { 301 opType := metric.Labels.Get("operation_type") 302 if opType == "" { 303 continue 304 } 305 dimID := "kubelet_runtime_operations_errors_" + opType 306 if !chart.HasDim(dimID) { 307 _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) 308 chart.MarkNotCreated() 309 } 310 mx.Kubelet.Runtime.OperationsErrors[opType] = mtx.Gauge(metric.Value) 311 } 312 } 313 314 func (k *Kubelet) collectDockerOperations(raw prometheus.Series, mx *metrics) { 315 chart := k.charts.Get("kubelet_docker_operations") 316 317 // kubelet_docker_operations_total 318 for _, metric := range raw.FindByNames("kubelet_docker_operations", "kubelet_docker_operations_total") { 319 opType := metric.Labels.Get("operation_type") 320 if opType == "" { 321 continue 322 } 323 dimID := "kubelet_docker_operations_" + opType 324 if !chart.HasDim(dimID) { 325 _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) 326 chart.MarkNotCreated() 327 } 328 mx.Kubelet.Docker.Operations[opType] = mtx.Gauge(metric.Value) 329 } 330 } 331 332 func (k *Kubelet) collectDockerOperationsErrors(raw prometheus.Series, mx *metrics) { 333 chart := k.charts.Get("kubelet_docker_operations_errors") 334 335 // kubelet_docker_operations_errors_total 336 for _, metric := range raw.FindByNames("kubelet_docker_operations_errors", "kubelet_docker_operations_errors_total") { 337 opType := metric.Labels.Get("operation_type") 338 if opType == "" { 339 continue 340 } 341 dimID := "kubelet_docker_operations_errors_" + opType 342 if !chart.HasDim(dimID) { 343 _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) 344 chart.MarkNotCreated() 345 } 346 mx.Kubelet.Docker.OperationsErrors[opType] = mtx.Gauge(metric.Value) 347 } 348 }