k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/metrics/resources/resources.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package resources provides a metrics collector that reports the 18 // resource consumption (requests and limits) of the pods in the cluster 19 // as the scheduler and kubelet would interpret it. 20 package resources 21 22 import ( 23 "net/http" 24 "strconv" 25 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/api/resource" 28 "k8s.io/apimachinery/pkg/labels" 29 corelisters "k8s.io/client-go/listers/core/v1" 30 "k8s.io/component-base/metrics" 31 32 v1resource "k8s.io/kubernetes/pkg/api/v1/resource" 33 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 34 ) 35 36 type resourceLifecycleDescriptors struct { 37 total *metrics.Desc 38 } 39 40 func (d resourceLifecycleDescriptors) Describe(ch chan<- *metrics.Desc) { 41 ch <- d.total 42 } 43 44 type resourceMetricsDescriptors struct { 45 requests resourceLifecycleDescriptors 46 limits resourceLifecycleDescriptors 47 } 48 49 func (d resourceMetricsDescriptors) Describe(ch chan<- *metrics.Desc) { 50 d.requests.Describe(ch) 51 d.limits.Describe(ch) 52 } 53 54 var podResourceDesc = resourceMetricsDescriptors{ 55 requests: resourceLifecycleDescriptors{ 56 total: metrics.NewDesc("kube_pod_resource_request", 57 "Resources requested by workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any.", 58 []string{"namespace", "pod", "node", "scheduler", "priority", "resource", "unit"}, 59 nil, 60 metrics.STABLE, 61 ""), 62 }, 63 limits: resourceLifecycleDescriptors{ 64 total: metrics.NewDesc("kube_pod_resource_limit", 65 "Resources limit for workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any.", 66 []string{"namespace", "pod", "node", "scheduler", "priority", "resource", "unit"}, 67 nil, 68 metrics.STABLE, 69 ""), 70 }, 71 } 72 73 // Handler creates a collector from the provided podLister and returns an http.Handler that 74 // will report the requested metrics in the prometheus format. It does not include any other 75 // metrics. 76 func Handler(podLister corelisters.PodLister) http.Handler { 77 collector := NewPodResourcesMetricsCollector(podLister) 78 registry := metrics.NewKubeRegistry() 79 registry.CustomMustRegister(collector) 80 return metrics.HandlerWithReset(registry, metrics.HandlerOpts{}) 81 } 82 83 // Check if resourceMetricsCollector implements necessary interface 84 var _ metrics.StableCollector = &podResourceCollector{} 85 86 // NewPodResourcesMetricsCollector registers a O(pods) cardinality metric that 87 // reports the current resources requested by all pods on the cluster within 88 // the Kubernetes resource model. Metrics are broken down by pod, node, resource, 89 // and phase of lifecycle. Each pod returns two series per resource - one for 90 // their aggregate usage (required to schedule) and one for their phase specific 91 // usage. This allows admins to assess the cost per resource at different phases 92 // of startup and compare to actual resource usage. 93 func NewPodResourcesMetricsCollector(podLister corelisters.PodLister) metrics.StableCollector { 94 return &podResourceCollector{ 95 lister: podLister, 96 } 97 } 98 99 type podResourceCollector struct { 100 metrics.BaseStableCollector 101 lister corelisters.PodLister 102 } 103 104 func (c *podResourceCollector) DescribeWithStability(ch chan<- *metrics.Desc) { 105 podResourceDesc.Describe(ch) 106 } 107 108 func (c *podResourceCollector) CollectWithStability(ch chan<- metrics.Metric) { 109 pods, err := c.lister.List(labels.Everything()) 110 if err != nil { 111 return 112 } 113 reuseReqs, reuseLimits := make(v1.ResourceList, 4), make(v1.ResourceList, 4) 114 for _, p := range pods { 115 reqs, limits, terminal := podRequestsAndLimitsByLifecycle(p, reuseReqs, reuseLimits) 116 if terminal { 117 // terminal pods are excluded from resource usage calculations 118 continue 119 } 120 for _, t := range []struct { 121 desc resourceLifecycleDescriptors 122 total v1.ResourceList 123 }{ 124 { 125 desc: podResourceDesc.requests, 126 total: reqs, 127 }, 128 { 129 desc: podResourceDesc.limits, 130 total: limits, 131 }, 132 } { 133 for resourceName, val := range t.total { 134 var unitName string 135 switch resourceName { 136 case v1.ResourceCPU: 137 unitName = "cores" 138 case v1.ResourceMemory: 139 unitName = "bytes" 140 case v1.ResourceStorage: 141 unitName = "bytes" 142 case v1.ResourceEphemeralStorage: 143 unitName = "bytes" 144 default: 145 switch { 146 case v1helper.IsHugePageResourceName(resourceName): 147 unitName = "bytes" 148 case v1helper.IsAttachableVolumeResourceName(resourceName): 149 unitName = "integer" 150 } 151 } 152 var priority string 153 if p.Spec.Priority != nil { 154 priority = strconv.FormatInt(int64(*p.Spec.Priority), 10) 155 } 156 recordMetricWithUnit(ch, t.desc.total, p.Namespace, p.Name, p.Spec.NodeName, p.Spec.SchedulerName, priority, resourceName, unitName, val) 157 } 158 } 159 } 160 } 161 162 func recordMetricWithUnit( 163 ch chan<- metrics.Metric, 164 desc *metrics.Desc, 165 namespace, name, nodeName, schedulerName, priority string, 166 resourceName v1.ResourceName, 167 unit string, 168 val resource.Quantity, 169 ) { 170 if val.IsZero() { 171 return 172 } 173 ch <- metrics.NewLazyConstMetric(desc, metrics.GaugeValue, 174 val.AsApproximateFloat64(), 175 namespace, name, nodeName, schedulerName, priority, string(resourceName), unit, 176 ) 177 } 178 179 // podRequestsAndLimitsByLifecycle returns a dictionary of all defined resources summed up for all 180 // containers of the pod. Pod overhead is added to the 181 // total container resource requests and to the total container limits which have a 182 // non-zero quantity. The caller may avoid allocations of resource lists by passing 183 // a requests and limits list to the function, which will be cleared before use. 184 // This method is the same as v1resource.PodRequestsAndLimits but avoids allocating in several 185 // scenarios for efficiency. 186 func podRequestsAndLimitsByLifecycle(pod *v1.Pod, reuseReqs, reuseLimits v1.ResourceList) (reqs, limits v1.ResourceList, terminal bool) { 187 switch { 188 case len(pod.Spec.NodeName) == 0: 189 // unscheduled pods cannot be terminal 190 case pod.Status.Phase == v1.PodSucceeded, pod.Status.Phase == v1.PodFailed: 191 terminal = true 192 // TODO: resolve https://github.com/kubernetes/kubernetes/issues/96515 and add a condition here 193 // for checking that terminal state 194 } 195 if terminal { 196 return 197 } 198 199 reqs = v1resource.PodRequests(pod, v1resource.PodResourcesOptions{Reuse: reuseReqs}) 200 limits = v1resource.PodLimits(pod, v1resource.PodResourcesOptions{Reuse: reuseLimits}) 201 return 202 }