github.com/netdata/go.d.plugin@v0.58.1/modules/k8s_kubelet/collect.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package k8s_kubelet
     4  
     5  import (
     6  	"math"
     7  
     8  	mtx "github.com/netdata/go.d.plugin/pkg/metrics"
     9  	"github.com/netdata/go.d.plugin/pkg/prometheus"
    10  	"github.com/netdata/go.d.plugin/pkg/stm"
    11  
    12  	"github.com/netdata/go.d.plugin/agent/module"
    13  )
    14  
    15  func (k *Kubelet) collect() (map[string]int64, error) {
    16  	raw, err := k.prom.ScrapeSeries()
    17  
    18  	if err != nil {
    19  		return nil, err
    20  	}
    21  
    22  	mx := newMetrics()
    23  
    24  	k.collectToken(raw, mx)
    25  	k.collectRESTClientHTTPRequests(raw, mx)
    26  	k.collectAPIServer(raw, mx)
    27  	k.collectKubelet(raw, mx)
    28  	k.collectVolumeManager(raw, mx)
    29  
    30  	return stm.ToMap(mx), nil
    31  }
    32  
    33  func (k *Kubelet) collectLogsUsagePerPod(raw prometheus.Series, mx *metrics) {
    34  	chart := k.charts.Get("kubelet_pods_log_filesystem_used_bytes")
    35  	seen := make(map[string]bool)
    36  
    37  	for _, metric := range raw.FindByName("kubelet_container_log_filesystem_used_bytes") {
    38  		pod := metric.Labels.Get("pod")
    39  		namespace := metric.Labels.Get("namespace")
    40  
    41  		if pod == "" || namespace == "" {
    42  			continue
    43  		}
    44  
    45  		key := namespace + "_" + pod
    46  		dimID := "kubelet_log_file_system_usage_" + key
    47  
    48  		if !chart.HasDim(dimID) {
    49  			_ = chart.AddDim(&Dim{ID: dimID, Name: pod})
    50  			chart.MarkNotCreated()
    51  		}
    52  
    53  		seen[dimID] = true
    54  		v := mx.Kubelet.PodLogFileSystemUsage[key]
    55  		v.Add(metric.Value)
    56  		mx.Kubelet.PodLogFileSystemUsage[key] = v
    57  	}
    58  
    59  	for _, dim := range chart.Dims {
    60  		if seen[dim.ID] {
    61  			continue
    62  		}
    63  		_ = chart.MarkDimRemove(dim.ID, false)
    64  		chart.MarkNotCreated()
    65  	}
    66  }
    67  
    68  func (k *Kubelet) collectVolumeManager(raw prometheus.Series, mx *metrics) {
    69  	vmPlugins := make(map[string]*volumeManagerPlugin)
    70  
    71  	for _, metric := range raw.FindByName("volume_manager_total_volumes") {
    72  		pluginName := metric.Labels.Get("plugin_name")
    73  		state := metric.Labels.Get("state")
    74  
    75  		if !k.collectedVMPlugins[pluginName] {
    76  			_ = k.charts.Add(newVolumeManagerChart(pluginName))
    77  			k.collectedVMPlugins[pluginName] = true
    78  		}
    79  		if _, ok := vmPlugins[pluginName]; !ok {
    80  			vmPlugins[pluginName] = &volumeManagerPlugin{}
    81  		}
    82  
    83  		switch state {
    84  		case "actual_state_of_world":
    85  			vmPlugins[pluginName].State.Actual.Set(metric.Value)
    86  		case "desired_state_of_world":
    87  			vmPlugins[pluginName].State.Desired.Set(metric.Value)
    88  		}
    89  	}
    90  
    91  	mx.VolumeManager.Plugins = vmPlugins
    92  }
    93  
    94  func (k *Kubelet) collectKubelet(raw prometheus.Series, mx *metrics) {
    95  	value := raw.FindByName("kubelet_node_config_error").Max()
    96  	mx.Kubelet.NodeConfigError.Set(value)
    97  
    98  	/*
    99  		# HELP kubelet_running_containers [ALPHA] Number of containers currently running
   100  		# TYPE kubelet_running_containers gauge
   101  		kubelet_running_containers{container_state="created"} 1
   102  		kubelet_running_containers{container_state="exited"} 13
   103  		kubelet_running_containers{container_state="running"} 42
   104  		kubelet_running_containers{container_state="unknown"} 1
   105  	*/
   106  
   107  	ms := raw.FindByName("kubelet_running_container_count")
   108  	value = ms.Max()
   109  	if ms.Len() == 0 {
   110  		for _, m := range raw.FindByName("kubelet_running_containers") {
   111  			if m.Labels.Get("container_state") == "running" {
   112  				value = m.Value
   113  				break
   114  			}
   115  		}
   116  	}
   117  	mx.Kubelet.RunningContainerCount.Set(value)
   118  
   119  	/*
   120  		# HELP kubelet_running_pods [ALPHA] Number of pods currently running
   121  		# TYPE kubelet_running_pods gauge
   122  		kubelet_running_pods 37
   123  	*/
   124  	value = raw.FindByNames("kubelet_running_pod_count", "kubelet_running_pods").Max()
   125  	mx.Kubelet.RunningPodCount.Set(value)
   126  
   127  	k.collectRuntimeOperations(raw, mx)
   128  	k.collectRuntimeOperationsErrors(raw, mx)
   129  	k.collectDockerOperations(raw, mx)
   130  	k.collectDockerOperationsErrors(raw, mx)
   131  	k.collectPLEGRelisting(raw, mx)
   132  	k.collectLogsUsagePerPod(raw, mx)
   133  }
   134  
   135  func (k *Kubelet) collectAPIServer(raw prometheus.Series, mx *metrics) {
   136  	value := raw.FindByName("apiserver_audit_requests_rejected_total").Max()
   137  	mx.APIServer.Audit.Requests.Rejected.Set(value)
   138  
   139  	value = raw.FindByName("apiserver_storage_data_key_generation_failures_total").Max()
   140  	mx.APIServer.Storage.DataKeyGeneration.Failures.Set(value)
   141  
   142  	value = raw.FindByName("apiserver_storage_envelope_transformation_cache_misses_total").Max()
   143  	mx.APIServer.Storage.EnvelopeTransformation.CacheMisses.Set(value)
   144  
   145  	k.collectStorageDataKeyGenerationLatencies(raw, mx)
   146  }
   147  
   148  func (k *Kubelet) collectToken(raw prometheus.Series, mx *metrics) {
   149  	value := raw.FindByName("get_token_count").Max()
   150  	mx.Token.Count.Set(value)
   151  
   152  	value = raw.FindByName("get_token_fail_count").Max()
   153  	mx.Token.FailCount.Set(value)
   154  }
   155  
   156  func (k *Kubelet) collectPLEGRelisting(raw prometheus.Series, mx *metrics) {
   157  	// Summary
   158  	for _, metric := range raw.FindByName("kubelet_pleg_relist_interval_microseconds") {
   159  		if math.IsNaN(metric.Value) {
   160  			continue
   161  		}
   162  		quantile := metric.Labels.Get("quantile")
   163  		switch quantile {
   164  		case "0.5":
   165  			mx.Kubelet.PLEG.Relist.Interval.Quantile05.Set(metric.Value)
   166  		case "0.9":
   167  			mx.Kubelet.PLEG.Relist.Interval.Quantile09.Set(metric.Value)
   168  		case "0.99":
   169  			mx.Kubelet.PLEG.Relist.Interval.Quantile099.Set(metric.Value)
   170  		}
   171  	}
   172  	for _, metric := range raw.FindByName("kubelet_pleg_relist_latency_microseconds") {
   173  		if math.IsNaN(metric.Value) {
   174  			continue
   175  		}
   176  		quantile := metric.Labels.Get("quantile")
   177  		switch quantile {
   178  		case "0.5":
   179  			mx.Kubelet.PLEG.Relist.Latency.Quantile05.Set(metric.Value)
   180  		case "0.9":
   181  			mx.Kubelet.PLEG.Relist.Latency.Quantile09.Set(metric.Value)
   182  		case "0.99":
   183  			mx.Kubelet.PLEG.Relist.Latency.Quantile099.Set(metric.Value)
   184  		}
   185  	}
   186  }
   187  
   188  func (k *Kubelet) collectStorageDataKeyGenerationLatencies(raw prometheus.Series, mx *metrics) {
   189  	latencies := &mx.APIServer.Storage.DataKeyGeneration.Latencies
   190  	metricName := "apiserver_storage_data_key_generation_latencies_microseconds_bucket"
   191  
   192  	for _, metric := range raw.FindByName(metricName) {
   193  		value := metric.Value
   194  		bucket := metric.Labels.Get("le")
   195  		switch bucket {
   196  		case "5":
   197  			latencies.LE5.Set(value)
   198  		case "10":
   199  			latencies.LE10.Set(value)
   200  		case "20":
   201  			latencies.LE20.Set(value)
   202  		case "40":
   203  			latencies.LE40.Set(value)
   204  		case "80":
   205  			latencies.LE80.Set(value)
   206  		case "160":
   207  			latencies.LE160.Set(value)
   208  		case "320":
   209  			latencies.LE320.Set(value)
   210  		case "640":
   211  			latencies.LE640.Set(value)
   212  		case "1280":
   213  			latencies.LE1280.Set(value)
   214  		case "2560":
   215  			latencies.LE2560.Set(value)
   216  		case "5120":
   217  			latencies.LE5120.Set(value)
   218  		case "10240":
   219  			latencies.LE10240.Set(value)
   220  		case "20480":
   221  			latencies.LE20480.Set(value)
   222  		case "40960":
   223  			latencies.LE40960.Set(value)
   224  		case "+Inf":
   225  			latencies.LEInf.Set(value)
   226  		}
   227  	}
   228  
   229  	latencies.LEInf.Sub(latencies.LE40960.Value())
   230  	latencies.LE40960.Sub(latencies.LE20480.Value())
   231  	latencies.LE20480.Sub(latencies.LE10240.Value())
   232  	latencies.LE10240.Sub(latencies.LE5120.Value())
   233  	latencies.LE5120.Sub(latencies.LE2560.Value())
   234  	latencies.LE2560.Sub(latencies.LE1280.Value())
   235  	latencies.LE1280.Sub(latencies.LE640.Value())
   236  	latencies.LE640.Sub(latencies.LE320.Value())
   237  	latencies.LE320.Sub(latencies.LE160.Value())
   238  	latencies.LE160.Sub(latencies.LE80.Value())
   239  	latencies.LE80.Sub(latencies.LE40.Value())
   240  	latencies.LE40.Sub(latencies.LE20.Value())
   241  	latencies.LE20.Sub(latencies.LE10.Value())
   242  	latencies.LE10.Sub(latencies.LE5.Value())
   243  }
   244  
   245  func (k *Kubelet) collectRESTClientHTTPRequests(raw prometheus.Series, mx *metrics) {
   246  	metricName := "rest_client_requests_total"
   247  	chart := k.charts.Get("rest_client_requests_by_code")
   248  
   249  	for _, metric := range raw.FindByName(metricName) {
   250  		code := metric.Labels.Get("code")
   251  		if code == "" {
   252  			continue
   253  		}
   254  		dimID := "rest_client_requests_" + code
   255  		if !chart.HasDim(dimID) {
   256  			_ = chart.AddDim(&Dim{ID: dimID, Name: code, Algo: module.Incremental})
   257  			chart.MarkNotCreated()
   258  		}
   259  		mx.RESTClient.Requests.ByStatusCode[code] = mtx.Gauge(metric.Value)
   260  	}
   261  
   262  	chart = k.charts.Get("rest_client_requests_by_method")
   263  
   264  	for _, metric := range raw.FindByName(metricName) {
   265  		method := metric.Labels.Get("method")
   266  		if method == "" {
   267  			continue
   268  		}
   269  		dimID := "rest_client_requests_" + method
   270  		if !chart.HasDim(dimID) {
   271  			_ = chart.AddDim(&Dim{ID: dimID, Name: method, Algo: module.Incremental})
   272  			chart.MarkNotCreated()
   273  		}
   274  		mx.RESTClient.Requests.ByMethod[method] = mtx.Gauge(metric.Value)
   275  	}
   276  }
   277  
   278  func (k *Kubelet) collectRuntimeOperations(raw prometheus.Series, mx *metrics) {
   279  	chart := k.charts.Get("kubelet_runtime_operations")
   280  
   281  	// kubelet_runtime_operations_total
   282  	for _, metric := range raw.FindByNames("kubelet_runtime_operations", "kubelet_runtime_operations_total") {
   283  		opType := metric.Labels.Get("operation_type")
   284  		if opType == "" {
   285  			continue
   286  		}
   287  		dimID := "kubelet_runtime_operations_" + opType
   288  		if !chart.HasDim(dimID) {
   289  			_ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental})
   290  			chart.MarkNotCreated()
   291  		}
   292  		mx.Kubelet.Runtime.Operations[opType] = mtx.Gauge(metric.Value)
   293  	}
   294  }
   295  
   296  func (k *Kubelet) collectRuntimeOperationsErrors(raw prometheus.Series, mx *metrics) {
   297  	chart := k.charts.Get("kubelet_runtime_operations_errors")
   298  
   299  	// kubelet_runtime_operations_errors_total
   300  	for _, metric := range raw.FindByNames("kubelet_runtime_operations_errors", "kubelet_runtime_operations_errors_total") {
   301  		opType := metric.Labels.Get("operation_type")
   302  		if opType == "" {
   303  			continue
   304  		}
   305  		dimID := "kubelet_runtime_operations_errors_" + opType
   306  		if !chart.HasDim(dimID) {
   307  			_ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental})
   308  			chart.MarkNotCreated()
   309  		}
   310  		mx.Kubelet.Runtime.OperationsErrors[opType] = mtx.Gauge(metric.Value)
   311  	}
   312  }
   313  
   314  func (k *Kubelet) collectDockerOperations(raw prometheus.Series, mx *metrics) {
   315  	chart := k.charts.Get("kubelet_docker_operations")
   316  
   317  	// kubelet_docker_operations_total
   318  	for _, metric := range raw.FindByNames("kubelet_docker_operations", "kubelet_docker_operations_total") {
   319  		opType := metric.Labels.Get("operation_type")
   320  		if opType == "" {
   321  			continue
   322  		}
   323  		dimID := "kubelet_docker_operations_" + opType
   324  		if !chart.HasDim(dimID) {
   325  			_ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental})
   326  			chart.MarkNotCreated()
   327  		}
   328  		mx.Kubelet.Docker.Operations[opType] = mtx.Gauge(metric.Value)
   329  	}
   330  }
   331  
   332  func (k *Kubelet) collectDockerOperationsErrors(raw prometheus.Series, mx *metrics) {
   333  	chart := k.charts.Get("kubelet_docker_operations_errors")
   334  
   335  	// kubelet_docker_operations_errors_total
   336  	for _, metric := range raw.FindByNames("kubelet_docker_operations_errors", "kubelet_docker_operations_errors_total") {
   337  		opType := metric.Labels.Get("operation_type")
   338  		if opType == "" {
   339  			continue
   340  		}
   341  		dimID := "kubelet_docker_operations_errors_" + opType
   342  		if !chart.HasDim(dimID) {
   343  			_ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental})
   344  			chart.MarkNotCreated()
   345  		}
   346  		mx.Kubelet.Docker.OperationsErrors[opType] = mtx.Gauge(metric.Value)
   347  	}
   348  }