github.com/netdata/go.d.plugin@v0.58.1/modules/k8s_state/collect.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package k8s_state
     4  
     5  import (
     6  	"errors"
     7  	"fmt"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/netdata/go.d.plugin/agent/module"
    12  
    13  	corev1 "k8s.io/api/core/v1"
    14  )
    15  
    16  const precision = 1000
    17  
    18  func (ks *KubeState) collect() (map[string]int64, error) {
    19  	if ks.discoverer == nil {
    20  		return nil, errors.New("nil discoverer")
    21  	}
    22  
    23  	ks.once.Do(func() {
    24  		ks.startTime = time.Now()
    25  		in := make(chan resource)
    26  
    27  		ks.wg.Add(1)
    28  		go func() { defer ks.wg.Done(); ks.runUpdateState(in) }()
    29  
    30  		ks.wg.Add(1)
    31  		go func() { defer ks.wg.Done(); ks.discoverer.run(ks.ctx, in) }()
    32  
    33  		ks.kubeClusterID = ks.getKubeClusterID()
    34  		ks.kubeClusterName = ks.getKubeClusterName()
    35  		if chart := ks.Charts().Get(discoveryStatusChart.ID); chart != nil {
    36  			chart.Labels = []module.Label{
    37  				{Key: labelKeyClusterID, Value: ks.kubeClusterID, Source: module.LabelSourceK8s},
    38  				{Key: labelKeyClusterName, Value: ks.kubeClusterName, Source: module.LabelSourceK8s},
    39  			}
    40  		}
    41  	})
    42  
    43  	mx := map[string]int64{
    44  		"discovery_node_discoverer_state": 1,
    45  		"discovery_pod_discoverer_state":  1,
    46  	}
    47  
    48  	if !ks.discoverer.ready() || time.Since(ks.startTime) < ks.initDelay {
    49  		return mx, nil
    50  	}
    51  
    52  	ks.state.Lock()
    53  	defer ks.state.Unlock()
    54  
    55  	ks.collectKubeState(mx)
    56  
    57  	return mx, nil
    58  }
    59  
    60  func (ks *KubeState) collectKubeState(mx map[string]int64) {
    61  	for _, ns := range ks.state.nodes {
    62  		ns.resetStats()
    63  	}
    64  	ks.collectPodsState(mx)
    65  	ks.collectNodesState(mx)
    66  }
    67  
    68  func (ks *KubeState) collectPodsState(mx map[string]int64) {
    69  	now := time.Now()
    70  	for _, ps := range ks.state.pods {
    71  		if ps.deleted {
    72  			delete(ks.state.pods, podSource(ps.namespace, ps.name))
    73  			ks.removePodCharts(ps)
    74  			continue
    75  		}
    76  		if ps.new {
    77  			ps.new = false
    78  			ks.addPodCharts(ps)
    79  			ps.unscheduled = ps.nodeName == ""
    80  		} else if ps.unscheduled && ps.nodeName != "" {
    81  			ps.unscheduled = false
    82  			ks.updatePodChartsNodeLabel(ps)
    83  		}
    84  
    85  		ns := ks.state.nodes[nodeSource(ps.nodeName)]
    86  		if ns != nil {
    87  			ns.stats.pods++
    88  			ns.stats.reqCPU += ps.reqCPU
    89  			ns.stats.limitCPU += ps.limitCPU
    90  			ns.stats.reqMem += ps.reqMem
    91  			ns.stats.limitMem += ps.limitMem
    92  			ns.stats.podsCondPodReady += condStatusToInt(ps.condPodReady)
    93  			ns.stats.podsCondPodScheduled += condStatusToInt(ps.condPodScheduled)
    94  			ns.stats.podsCondPodInitialized += condStatusToInt(ps.condPodInitialized)
    95  			ns.stats.podsCondContainersReady += condStatusToInt(ps.condContainersReady)
    96  			ns.stats.podsReadinessReady += boolToInt(ps.condPodReady == corev1.ConditionTrue)
    97  			ns.stats.podsReadinessUnready += boolToInt(ps.condPodReady != corev1.ConditionTrue)
    98  			ns.stats.podsPhasePending += boolToInt(ps.phase == corev1.PodPending)
    99  			ns.stats.podsPhaseRunning += boolToInt(ps.phase == corev1.PodRunning)
   100  			ns.stats.podsPhaseSucceeded += boolToInt(ps.phase == corev1.PodSucceeded)
   101  			ns.stats.podsPhaseFailed += boolToInt(ps.phase == corev1.PodFailed)
   102  			for _, cs := range ps.initContainers {
   103  				ns.stats.initContainers++
   104  				ns.stats.initContStateRunning += boolToInt(cs.stateRunning)
   105  				ns.stats.initContStateWaiting += boolToInt(cs.stateWaiting)
   106  				ns.stats.initContStateTerminated += boolToInt(cs.stateTerminated)
   107  			}
   108  			for _, cs := range ps.containers {
   109  				ns.stats.containers++
   110  				ns.stats.contStateRunning += boolToInt(cs.stateRunning)
   111  				ns.stats.contStateWaiting += boolToInt(cs.stateWaiting)
   112  				ns.stats.contStateTerminated += boolToInt(cs.stateTerminated)
   113  			}
   114  		}
   115  
   116  		px := fmt.Sprintf("pod_%s_", ps.id())
   117  
   118  		mx[px+"cond_podready"] = condStatusToInt(ps.condPodReady)
   119  		mx[px+"cond_podscheduled"] = condStatusToInt(ps.condPodScheduled)
   120  		mx[px+"cond_podinitialized"] = condStatusToInt(ps.condPodInitialized)
   121  		mx[px+"cond_containersready"] = condStatusToInt(ps.condContainersReady)
   122  		mx[px+"phase_running"] = boolToInt(ps.phase == corev1.PodRunning)
   123  		mx[px+"phase_failed"] = boolToInt(ps.phase == corev1.PodFailed)
   124  		mx[px+"phase_succeeded"] = boolToInt(ps.phase == corev1.PodSucceeded)
   125  		mx[px+"phase_pending"] = boolToInt(ps.phase == corev1.PodPending)
   126  		mx[px+"age"] = int64(now.Sub(ps.creationTime).Seconds())
   127  		mx[px+"cpu_requests_used"] = ps.reqCPU
   128  		mx[px+"cpu_limits_used"] = ps.limitCPU
   129  		mx[px+"mem_requests_used"] = ps.reqMem
   130  		mx[px+"mem_limits_used"] = ps.limitMem
   131  
   132  		mx[px+"init_containers"] = int64(len(ps.initContainers))
   133  		mx[px+"containers"] = int64(len(ps.containers))
   134  
   135  		mx[px+"init_containers_state_running"] = 0
   136  		mx[px+"init_containers_state_waiting"] = 0
   137  		mx[px+"init_containers_state_terminated"] = 0
   138  		for _, cs := range ps.initContainers {
   139  			mx[px+"init_containers_state_running"] += boolToInt(cs.stateRunning)
   140  			mx[px+"init_containers_state_waiting"] += boolToInt(cs.stateWaiting)
   141  			mx[px+"init_containers_state_terminated"] += boolToInt(cs.stateTerminated)
   142  		}
   143  		mx[px+"containers_state_running"] = 0
   144  		mx[px+"containers_state_waiting"] = 0
   145  		mx[px+"containers_state_terminated"] = 0
   146  		for _, cs := range ps.containers {
   147  			if cs.new {
   148  				cs.new = false
   149  				ks.addContainerCharts(ps, cs)
   150  			}
   151  			mx[px+"containers_state_running"] += boolToInt(cs.stateRunning)
   152  			mx[px+"containers_state_waiting"] += boolToInt(cs.stateWaiting)
   153  			mx[px+"containers_state_terminated"] += boolToInt(cs.stateTerminated)
   154  
   155  			ppx := fmt.Sprintf("%scontainer_%s_", px, cs.name)
   156  			mx[ppx+"state_running"] = boolToInt(cs.stateRunning)
   157  			mx[ppx+"state_waiting"] = boolToInt(cs.stateWaiting)
   158  			mx[ppx+"state_terminated"] = boolToInt(cs.stateTerminated)
   159  			mx[ppx+"readiness"] = boolToInt(cs.ready)
   160  			mx[ppx+"restarts"] = cs.restarts
   161  			for _, r := range cs.stateWaitingReasons {
   162  				if r.new {
   163  					r.new = false
   164  					ks.addContainerWaitingStateReasonToChart(ps, cs, r.reason)
   165  				}
   166  				mx[ppx+"state_waiting_reason_"+r.reason] = boolToInt(r.active)
   167  			}
   168  			for _, r := range cs.stateTerminatedReasons {
   169  				if r.new {
   170  					r.new = false
   171  					ks.addContainerTerminatedStateReasonToChart(ps, cs, r.reason)
   172  				}
   173  				mx[ppx+"state_terminated_reason_"+r.reason] = boolToInt(r.active)
   174  			}
   175  		}
   176  	}
   177  }
   178  
   179  func (ks *KubeState) collectNodesState(mx map[string]int64) {
   180  	now := time.Now()
   181  	for _, ns := range ks.state.nodes {
   182  		if ns.deleted {
   183  			delete(ks.state.nodes, nodeSource(ns.name))
   184  			ks.removeNodeCharts(ns)
   185  			continue
   186  		}
   187  		if ns.new {
   188  			ns.new = false
   189  			ks.addNodeCharts(ns)
   190  		}
   191  
   192  		px := fmt.Sprintf("node_%s_", ns.id())
   193  
   194  		for typ, cond := range ns.conditions {
   195  			if cond.new {
   196  				cond.new = false
   197  				ks.addNodeConditionToCharts(ns, typ)
   198  			}
   199  			mx[px+"cond_"+strings.ToLower(typ)] = condStatusToInt(cond.status)
   200  		}
   201  
   202  		mx[px+"age"] = int64(now.Sub(ns.creationTime).Seconds())
   203  		mx[px+"alloc_pods_util"] = calcPercentage(ns.stats.pods, ns.allocatablePods)
   204  		mx[px+"pods_readiness_ready"] = ns.stats.podsReadinessReady
   205  		mx[px+"pods_readiness_unready"] = ns.stats.podsReadinessUnready
   206  		mx[px+"pods_readiness"] = calcPercentage(ns.stats.podsReadinessReady, ns.stats.pods)
   207  		mx[px+"pods_phase_running"] = ns.stats.podsPhaseRunning
   208  		mx[px+"pods_phase_failed"] = ns.stats.podsPhaseFailed
   209  		mx[px+"pods_phase_succeeded"] = ns.stats.podsPhaseSucceeded
   210  		mx[px+"pods_phase_pending"] = ns.stats.podsPhasePending
   211  		mx[px+"pods_cond_podready"] = ns.stats.podsCondPodReady
   212  		mx[px+"pods_cond_podscheduled"] = ns.stats.podsCondPodScheduled
   213  		mx[px+"pods_cond_podinitialized"] = ns.stats.podsCondPodInitialized
   214  		mx[px+"pods_cond_containersready"] = ns.stats.podsCondContainersReady
   215  		mx[px+"pods_cond_containersready"] = ns.stats.podsCondContainersReady
   216  		mx[px+"schedulability_schedulable"] = boolToInt(!ns.unSchedulable)
   217  		mx[px+"schedulability_unschedulable"] = boolToInt(ns.unSchedulable)
   218  		mx[px+"alloc_pods_available"] = ns.allocatablePods - ns.stats.pods
   219  		mx[px+"alloc_pods_allocated"] = ns.stats.pods
   220  		mx[px+"alloc_cpu_requests_util"] = calcPercentage(ns.stats.reqCPU, ns.allocatableCPU)
   221  		mx[px+"alloc_cpu_limits_util"] = calcPercentage(ns.stats.limitCPU, ns.allocatableCPU)
   222  		mx[px+"alloc_mem_requests_util"] = calcPercentage(ns.stats.reqMem, ns.allocatableMem)
   223  		mx[px+"alloc_mem_limits_util"] = calcPercentage(ns.stats.limitMem, ns.allocatableMem)
   224  		mx[px+"alloc_cpu_requests_used"] = ns.stats.reqCPU
   225  		mx[px+"alloc_cpu_limits_used"] = ns.stats.limitCPU
   226  		mx[px+"alloc_mem_requests_used"] = ns.stats.reqMem
   227  		mx[px+"alloc_mem_limits_used"] = ns.stats.limitMem
   228  		mx[px+"init_containers"] = ns.stats.initContainers
   229  		mx[px+"containers"] = ns.stats.containers
   230  		mx[px+"containers_state_running"] = ns.stats.contStateRunning
   231  		mx[px+"containers_state_waiting"] = ns.stats.contStateWaiting
   232  		mx[px+"containers_state_terminated"] = ns.stats.contStateTerminated
   233  		mx[px+"init_containers_state_running"] = ns.stats.initContStateRunning
   234  		mx[px+"init_containers_state_waiting"] = ns.stats.initContStateWaiting
   235  		mx[px+"init_containers_state_terminated"] = ns.stats.initContStateTerminated
   236  	}
   237  }
   238  
   239  func boolToInt(v bool) int64 {
   240  	if v {
   241  		return 1
   242  	}
   243  	return 0
   244  }
   245  
   246  func condStatusToInt(cs corev1.ConditionStatus) int64 {
   247  	switch cs {
   248  	case corev1.ConditionFalse:
   249  		return 0
   250  	case corev1.ConditionTrue:
   251  		return 1
   252  	case corev1.ConditionUnknown:
   253  		return 0
   254  	default:
   255  		return 0
   256  	}
   257  }
   258  
   259  func calcPercentage(value, total int64) int64 {
   260  	if total == 0 {
   261  		return 0
   262  	}
   263  	return int64(float64(value) / float64(total) * 100 * precision)
   264  }