k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/metrics/metrics_grabber.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "context" 21 "fmt" 22 "strings" 23 "sync" 24 "time" 25 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/fields" 28 "k8s.io/apimachinery/pkg/util/wait" 29 clientset "k8s.io/client-go/kubernetes" 30 "k8s.io/kubernetes/pkg/cluster/ports" 31 "k8s.io/kubernetes/test/e2e/framework" 32 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 33 34 "k8s.io/klog/v2" 35 ) 36 37 const ( 38 // kubeSchedulerPort is the default port for the scheduler status server. 39 kubeSchedulerPort = 10259 40 ) 41 42 // Collection is metrics collection of components 43 type Collection struct { 44 APIServerMetrics APIServerMetrics 45 ControllerManagerMetrics ControllerManagerMetrics 46 KubeletMetrics map[string]KubeletMetrics 47 SchedulerMetrics SchedulerMetrics 48 ClusterAutoscalerMetrics ClusterAutoscalerMetrics 49 } 50 51 // Grabber provides functions which grab metrics from components 52 type Grabber struct { 53 client clientset.Interface 54 externalClient clientset.Interface 55 grabFromAPIServer bool 56 grabFromControllerManager bool 57 grabFromKubelets bool 58 grabFromScheduler bool 59 grabFromClusterAutoscaler bool 60 masterName string 61 registeredMaster bool 62 waitForControllerManagerReadyOnce sync.Once 63 } 64 65 // deprecatedMightBeMasterNode returns true if given node is a registered master. 66 // This code must not be updated to use node role labels, since node role labels 67 // may not change behavior of the system. 68 // It has been copied from https://github.com/kubernetes/kubernetes/blob/9e991415386e4cf155a24b1da15becaa390438d8/test/e2e/system/system_utils.go#L27 69 // as it has been used in future k8s versions. 70 // TODO(mborsz): Remove dependency on this function. 71 func deprecatedMightBeMasterNode(nodeName string) bool { 72 // We are trying to capture "master(-...)?$" regexp. 73 // However, using regexp.MatchString() results even in more than 35% 74 // of all space allocations in ControllerManager spent in this function. 75 // That's why we are trying to be a bit smarter. 76 if strings.HasSuffix(nodeName, "master") { 77 return true 78 } 79 if len(nodeName) >= 10 { 80 return strings.HasSuffix(nodeName[:len(nodeName)-3], "master-") 81 } 82 return false 83 } 84 85 // NewMetricsGrabber returns new metrics which are initialized. 86 func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool) (*Grabber, error) { 87 registeredMaster := false 88 masterName := "" 89 nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) 90 if err != nil { 91 return nil, err 92 } 93 if len(nodeList.Items) < 1 { 94 klog.Warning("Can't find any Nodes in the API server to grab metrics from") 95 } 96 for _, node := range nodeList.Items { 97 if deprecatedMightBeMasterNode(node.Name) { 98 registeredMaster = true 99 masterName = node.Name 100 break 101 } 102 } 103 if !registeredMaster { 104 scheduler = false 105 controllers = false 106 clusterAutoscaler = ec != nil 107 if clusterAutoscaler { 108 klog.Warningf("Master node is not registered. Grabbing metrics from Scheduler, ControllerManager is disabled.") 109 } else { 110 klog.Warningf("Master node is not registered. Grabbing metrics from Scheduler, ControllerManager and ClusterAutoscaler is disabled.") 111 } 112 } 113 114 return &Grabber{ 115 client: c, 116 externalClient: ec, 117 grabFromAPIServer: apiServer, 118 grabFromControllerManager: controllers, 119 grabFromKubelets: kubelets, 120 grabFromScheduler: scheduler, 121 grabFromClusterAutoscaler: clusterAutoscaler, 122 masterName: masterName, 123 registeredMaster: registeredMaster, 124 }, nil 125 } 126 127 // HasRegisteredMaster returns if metrics grabber was able to find a master node 128 func (g *Grabber) HasRegisteredMaster() bool { 129 return g.registeredMaster 130 } 131 132 // GrabFromKubelet returns metrics from kubelet 133 func (g *Grabber) GrabFromKubelet(nodeName string) (KubeletMetrics, error) { 134 nodes, err := g.client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{FieldSelector: fields.Set{"metadata.name": nodeName}.AsSelector().String()}) 135 if err != nil { 136 return KubeletMetrics{}, err 137 } 138 if len(nodes.Items) != 1 { 139 return KubeletMetrics{}, fmt.Errorf("error listing nodes with name %v, got %v", nodeName, nodes.Items) 140 } 141 kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port 142 return g.grabFromKubeletInternal(nodeName, int(kubeletPort)) 143 } 144 145 func (g *Grabber) grabFromKubeletInternal(nodeName string, kubeletPort int) (KubeletMetrics, error) { 146 if kubeletPort <= 0 || kubeletPort > 65535 { 147 return KubeletMetrics{}, fmt.Errorf("invalid Kubelet port %v. Skipping Kubelet's metrics gathering", kubeletPort) 148 } 149 output, err := g.getMetricsFromNode(nodeName, int(kubeletPort)) 150 if err != nil { 151 return KubeletMetrics{}, err 152 } 153 return parseKubeletMetrics(output) 154 } 155 156 // GrabFromScheduler returns metrics from scheduler 157 func (g *Grabber) GrabFromScheduler() (SchedulerMetrics, error) { 158 if !g.registeredMaster { 159 return SchedulerMetrics{}, fmt.Errorf("master's Kubelet is not registered. Skipping Scheduler's metrics gathering") 160 } 161 output, err := g.getMetricsFromPod(g.client, fmt.Sprintf("%v-%v", "kube-scheduler", g.masterName), metav1.NamespaceSystem, kubeSchedulerPort, true) 162 if err != nil { 163 return SchedulerMetrics{}, err 164 } 165 return parseSchedulerMetrics(output) 166 } 167 168 // GrabFromClusterAutoscaler returns metrics from cluster autoscaler 169 func (g *Grabber) GrabFromClusterAutoscaler() (ClusterAutoscalerMetrics, error) { 170 if !g.registeredMaster && g.externalClient == nil { 171 return ClusterAutoscalerMetrics{}, fmt.Errorf("master's Kubelet is not registered. Skipping ClusterAutoscaler's metrics gathering") 172 } 173 var client clientset.Interface 174 var namespace string 175 if g.externalClient != nil { 176 client = g.externalClient 177 namespace = "kubemark" 178 } else { 179 client = g.client 180 namespace = metav1.NamespaceSystem 181 } 182 output, err := g.getMetricsFromPod(client, "cluster-autoscaler", namespace, 8085, false) 183 if err != nil { 184 return ClusterAutoscalerMetrics{}, err 185 } 186 return parseClusterAutoscalerMetrics(output) 187 } 188 189 // GrabFromControllerManager returns metrics from controller manager 190 func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error) { 191 if !g.registeredMaster { 192 return ControllerManagerMetrics{}, fmt.Errorf("master's Kubelet is not registered. Skipping ControllerManager's metrics gathering") 193 } 194 195 var err error 196 podName := fmt.Sprintf("%v-%v", "kube-controller-manager", g.masterName) 197 g.waitForControllerManagerReadyOnce.Do(func() { 198 if readyErr := e2epod.WaitTimeoutForPodReadyInNamespace(g.client, podName, metav1.NamespaceSystem, framework.PodStartTimeout); readyErr != nil { 199 err = fmt.Errorf("error waiting for controller manager pod to be ready: %w", readyErr) 200 return 201 } 202 203 var lastMetricsFetchErr error 204 if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { 205 _, lastMetricsFetchErr = g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, ports.KubeControllerManagerPort, true) 206 return lastMetricsFetchErr == nil, nil 207 }); metricsWaitErr != nil { 208 err = fmt.Errorf("error waiting for controller manager pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr) 209 return 210 } 211 }) 212 if err != nil { 213 return ControllerManagerMetrics{}, err 214 } 215 216 output, err := g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, ports.KubeControllerManagerPort, true) 217 if err != nil { 218 return ControllerManagerMetrics{}, err 219 } 220 return parseControllerManagerMetrics(output) 221 } 222 223 // GrabFromAPIServer returns metrics from API server 224 func (g *Grabber) GrabFromAPIServer() (APIServerMetrics, error) { 225 output, err := g.getMetricsFromAPIServer() 226 if err != nil { 227 return APIServerMetrics{}, nil 228 } 229 return parseAPIServerMetrics(output) 230 } 231 232 // Grab returns metrics from corresponding component 233 func (g *Grabber) Grab() (Collection, error) { 234 result := Collection{} 235 var errs []error 236 if g.grabFromAPIServer { 237 metrics, err := g.GrabFromAPIServer() 238 if err != nil { 239 errs = append(errs, err) 240 } else { 241 result.APIServerMetrics = metrics 242 } 243 } 244 if g.grabFromScheduler { 245 metrics, err := g.GrabFromScheduler() 246 if err != nil { 247 errs = append(errs, err) 248 } else { 249 result.SchedulerMetrics = metrics 250 } 251 } 252 if g.grabFromControllerManager { 253 metrics, err := g.GrabFromControllerManager() 254 if err != nil { 255 errs = append(errs, err) 256 } else { 257 result.ControllerManagerMetrics = metrics 258 } 259 } 260 if g.grabFromClusterAutoscaler { 261 metrics, err := g.GrabFromClusterAutoscaler() 262 if err != nil { 263 errs = append(errs, err) 264 } else { 265 result.ClusterAutoscalerMetrics = metrics 266 } 267 } 268 if g.grabFromKubelets { 269 result.KubeletMetrics = make(map[string]KubeletMetrics) 270 nodes, err := g.client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) 271 if err != nil { 272 errs = append(errs, err) 273 } else { 274 for _, node := range nodes.Items { 275 kubeletPort := node.Status.DaemonEndpoints.KubeletEndpoint.Port 276 metrics, err := g.grabFromKubeletInternal(node.Name, int(kubeletPort)) 277 if err != nil { 278 errs = append(errs, err) 279 } 280 result.KubeletMetrics[node.Name] = metrics 281 } 282 } 283 } 284 if len(errs) > 0 { 285 return result, fmt.Errorf("errors while grabbing metrics: %v", errs) 286 } 287 return result, nil 288 } 289 290 func (g *Grabber) getMetricsFromPod(client clientset.Interface, podName string, namespace string, port int, enableHTTPS bool) (string, error) { 291 var name string 292 if enableHTTPS { 293 name = fmt.Sprintf("https:%s:%d", podName, port) 294 } else { 295 name = fmt.Sprintf("%s:%d", podName, port) 296 } 297 rawOutput, err := client.CoreV1().RESTClient().Get(). 298 Namespace(namespace). 299 Resource("pods"). 300 SubResource("proxy"). 301 Name(name). 302 Suffix("metrics"). 303 Do(context.TODO()).Raw() 304 if err != nil { 305 return "", err 306 } 307 return string(rawOutput), nil 308 }