k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/metrics/metrics_grabber.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/fields"
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	clientset "k8s.io/client-go/kubernetes"
    30  	"k8s.io/kubernetes/pkg/cluster/ports"
    31  	"k8s.io/kubernetes/test/e2e/framework"
    32  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    33  
    34  	"k8s.io/klog/v2"
    35  )
    36  
    37  const (
    38  	// kubeSchedulerPort is the default port for the scheduler status server.
    39  	kubeSchedulerPort = 10259
    40  )
    41  
    42  // Collection is metrics collection of components
    43  type Collection struct {
    44  	APIServerMetrics         APIServerMetrics
    45  	ControllerManagerMetrics ControllerManagerMetrics
    46  	KubeletMetrics           map[string]KubeletMetrics
    47  	SchedulerMetrics         SchedulerMetrics
    48  	ClusterAutoscalerMetrics ClusterAutoscalerMetrics
    49  }
    50  
    51  // Grabber provides functions which grab metrics from components
    52  type Grabber struct {
    53  	client                            clientset.Interface
    54  	externalClient                    clientset.Interface
    55  	grabFromAPIServer                 bool
    56  	grabFromControllerManager         bool
    57  	grabFromKubelets                  bool
    58  	grabFromScheduler                 bool
    59  	grabFromClusterAutoscaler         bool
    60  	masterName                        string
    61  	registeredMaster                  bool
    62  	waitForControllerManagerReadyOnce sync.Once
    63  }
    64  
    65  // deprecatedMightBeMasterNode returns true if given node is a registered master.
    66  // This code must not be updated to use node role labels, since node role labels
    67  // may not change behavior of the system.
    68  // It has been copied from https://github.com/kubernetes/kubernetes/blob/9e991415386e4cf155a24b1da15becaa390438d8/test/e2e/system/system_utils.go#L27
    69  // as it has been used in future k8s versions.
    70  // TODO(mborsz): Remove dependency on this function.
    71  func deprecatedMightBeMasterNode(nodeName string) bool {
    72  	// We are trying to capture "master(-...)?$" regexp.
    73  	// However, using regexp.MatchString() results even in more than 35%
    74  	// of all space allocations in ControllerManager spent in this function.
    75  	// That's why we are trying to be a bit smarter.
    76  	if strings.HasSuffix(nodeName, "master") {
    77  		return true
    78  	}
    79  	if len(nodeName) >= 10 {
    80  		return strings.HasSuffix(nodeName[:len(nodeName)-3], "master-")
    81  	}
    82  	return false
    83  }
    84  
    85  // NewMetricsGrabber returns new metrics which are initialized.
    86  func NewMetricsGrabber(c clientset.Interface, ec clientset.Interface, kubelets bool, scheduler bool, controllers bool, apiServer bool, clusterAutoscaler bool) (*Grabber, error) {
    87  	registeredMaster := false
    88  	masterName := ""
    89  	nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	if len(nodeList.Items) < 1 {
    94  		klog.Warning("Can't find any Nodes in the API server to grab metrics from")
    95  	}
    96  	for _, node := range nodeList.Items {
    97  		if deprecatedMightBeMasterNode(node.Name) {
    98  			registeredMaster = true
    99  			masterName = node.Name
   100  			break
   101  		}
   102  	}
   103  	if !registeredMaster {
   104  		scheduler = false
   105  		controllers = false
   106  		clusterAutoscaler = ec != nil
   107  		if clusterAutoscaler {
   108  			klog.Warningf("Master node is not registered. Grabbing metrics from Scheduler, ControllerManager is disabled.")
   109  		} else {
   110  			klog.Warningf("Master node is not registered. Grabbing metrics from Scheduler, ControllerManager and ClusterAutoscaler is disabled.")
   111  		}
   112  	}
   113  
   114  	return &Grabber{
   115  		client:                    c,
   116  		externalClient:            ec,
   117  		grabFromAPIServer:         apiServer,
   118  		grabFromControllerManager: controllers,
   119  		grabFromKubelets:          kubelets,
   120  		grabFromScheduler:         scheduler,
   121  		grabFromClusterAutoscaler: clusterAutoscaler,
   122  		masterName:                masterName,
   123  		registeredMaster:          registeredMaster,
   124  	}, nil
   125  }
   126  
   127  // HasRegisteredMaster returns if metrics grabber was able to find a master node
   128  func (g *Grabber) HasRegisteredMaster() bool {
   129  	return g.registeredMaster
   130  }
   131  
   132  // GrabFromKubelet returns metrics from kubelet
   133  func (g *Grabber) GrabFromKubelet(nodeName string) (KubeletMetrics, error) {
   134  	nodes, err := g.client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{FieldSelector: fields.Set{"metadata.name": nodeName}.AsSelector().String()})
   135  	if err != nil {
   136  		return KubeletMetrics{}, err
   137  	}
   138  	if len(nodes.Items) != 1 {
   139  		return KubeletMetrics{}, fmt.Errorf("error listing nodes with name %v, got %v", nodeName, nodes.Items)
   140  	}
   141  	kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
   142  	return g.grabFromKubeletInternal(nodeName, int(kubeletPort))
   143  }
   144  
   145  func (g *Grabber) grabFromKubeletInternal(nodeName string, kubeletPort int) (KubeletMetrics, error) {
   146  	if kubeletPort <= 0 || kubeletPort > 65535 {
   147  		return KubeletMetrics{}, fmt.Errorf("invalid Kubelet port %v. Skipping Kubelet's metrics gathering", kubeletPort)
   148  	}
   149  	output, err := g.getMetricsFromNode(nodeName, int(kubeletPort))
   150  	if err != nil {
   151  		return KubeletMetrics{}, err
   152  	}
   153  	return parseKubeletMetrics(output)
   154  }
   155  
   156  // GrabFromScheduler returns metrics from scheduler
   157  func (g *Grabber) GrabFromScheduler() (SchedulerMetrics, error) {
   158  	if !g.registeredMaster {
   159  		return SchedulerMetrics{}, fmt.Errorf("master's Kubelet is not registered. Skipping Scheduler's metrics gathering")
   160  	}
   161  	output, err := g.getMetricsFromPod(g.client, fmt.Sprintf("%v-%v", "kube-scheduler", g.masterName), metav1.NamespaceSystem, kubeSchedulerPort, true)
   162  	if err != nil {
   163  		return SchedulerMetrics{}, err
   164  	}
   165  	return parseSchedulerMetrics(output)
   166  }
   167  
   168  // GrabFromClusterAutoscaler returns metrics from cluster autoscaler
   169  func (g *Grabber) GrabFromClusterAutoscaler() (ClusterAutoscalerMetrics, error) {
   170  	if !g.registeredMaster && g.externalClient == nil {
   171  		return ClusterAutoscalerMetrics{}, fmt.Errorf("master's Kubelet is not registered. Skipping ClusterAutoscaler's metrics gathering")
   172  	}
   173  	var client clientset.Interface
   174  	var namespace string
   175  	if g.externalClient != nil {
   176  		client = g.externalClient
   177  		namespace = "kubemark"
   178  	} else {
   179  		client = g.client
   180  		namespace = metav1.NamespaceSystem
   181  	}
   182  	output, err := g.getMetricsFromPod(client, "cluster-autoscaler", namespace, 8085, false)
   183  	if err != nil {
   184  		return ClusterAutoscalerMetrics{}, err
   185  	}
   186  	return parseClusterAutoscalerMetrics(output)
   187  }
   188  
   189  // GrabFromControllerManager returns metrics from controller manager
   190  func (g *Grabber) GrabFromControllerManager() (ControllerManagerMetrics, error) {
   191  	if !g.registeredMaster {
   192  		return ControllerManagerMetrics{}, fmt.Errorf("master's Kubelet is not registered. Skipping ControllerManager's metrics gathering")
   193  	}
   194  
   195  	var err error
   196  	podName := fmt.Sprintf("%v-%v", "kube-controller-manager", g.masterName)
   197  	g.waitForControllerManagerReadyOnce.Do(func() {
   198  		if readyErr := e2epod.WaitTimeoutForPodReadyInNamespace(g.client, podName, metav1.NamespaceSystem, framework.PodStartTimeout); readyErr != nil {
   199  			err = fmt.Errorf("error waiting for controller manager pod to be ready: %w", readyErr)
   200  			return
   201  		}
   202  
   203  		var lastMetricsFetchErr error
   204  		if metricsWaitErr := wait.PollImmediate(time.Second, time.Minute, func() (bool, error) {
   205  			_, lastMetricsFetchErr = g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, ports.KubeControllerManagerPort, true)
   206  			return lastMetricsFetchErr == nil, nil
   207  		}); metricsWaitErr != nil {
   208  			err = fmt.Errorf("error waiting for controller manager pod to expose metrics: %v; %v", metricsWaitErr, lastMetricsFetchErr)
   209  			return
   210  		}
   211  	})
   212  	if err != nil {
   213  		return ControllerManagerMetrics{}, err
   214  	}
   215  
   216  	output, err := g.getMetricsFromPod(g.client, podName, metav1.NamespaceSystem, ports.KubeControllerManagerPort, true)
   217  	if err != nil {
   218  		return ControllerManagerMetrics{}, err
   219  	}
   220  	return parseControllerManagerMetrics(output)
   221  }
   222  
   223  // GrabFromAPIServer returns metrics from API server
   224  func (g *Grabber) GrabFromAPIServer() (APIServerMetrics, error) {
   225  	output, err := g.getMetricsFromAPIServer()
   226  	if err != nil {
   227  		return APIServerMetrics{}, nil
   228  	}
   229  	return parseAPIServerMetrics(output)
   230  }
   231  
   232  // Grab returns metrics from corresponding component
   233  func (g *Grabber) Grab() (Collection, error) {
   234  	result := Collection{}
   235  	var errs []error
   236  	if g.grabFromAPIServer {
   237  		metrics, err := g.GrabFromAPIServer()
   238  		if err != nil {
   239  			errs = append(errs, err)
   240  		} else {
   241  			result.APIServerMetrics = metrics
   242  		}
   243  	}
   244  	if g.grabFromScheduler {
   245  		metrics, err := g.GrabFromScheduler()
   246  		if err != nil {
   247  			errs = append(errs, err)
   248  		} else {
   249  			result.SchedulerMetrics = metrics
   250  		}
   251  	}
   252  	if g.grabFromControllerManager {
   253  		metrics, err := g.GrabFromControllerManager()
   254  		if err != nil {
   255  			errs = append(errs, err)
   256  		} else {
   257  			result.ControllerManagerMetrics = metrics
   258  		}
   259  	}
   260  	if g.grabFromClusterAutoscaler {
   261  		metrics, err := g.GrabFromClusterAutoscaler()
   262  		if err != nil {
   263  			errs = append(errs, err)
   264  		} else {
   265  			result.ClusterAutoscalerMetrics = metrics
   266  		}
   267  	}
   268  	if g.grabFromKubelets {
   269  		result.KubeletMetrics = make(map[string]KubeletMetrics)
   270  		nodes, err := g.client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
   271  		if err != nil {
   272  			errs = append(errs, err)
   273  		} else {
   274  			for _, node := range nodes.Items {
   275  				kubeletPort := node.Status.DaemonEndpoints.KubeletEndpoint.Port
   276  				metrics, err := g.grabFromKubeletInternal(node.Name, int(kubeletPort))
   277  				if err != nil {
   278  					errs = append(errs, err)
   279  				}
   280  				result.KubeletMetrics[node.Name] = metrics
   281  			}
   282  		}
   283  	}
   284  	if len(errs) > 0 {
   285  		return result, fmt.Errorf("errors while grabbing metrics: %v", errs)
   286  	}
   287  	return result, nil
   288  }
   289  
   290  func (g *Grabber) getMetricsFromPod(client clientset.Interface, podName string, namespace string, port int, enableHTTPS bool) (string, error) {
   291  	var name string
   292  	if enableHTTPS {
   293  		name = fmt.Sprintf("https:%s:%d", podName, port)
   294  	} else {
   295  		name = fmt.Sprintf("%s:%d", podName, port)
   296  	}
   297  	rawOutput, err := client.CoreV1().RESTClient().Get().
   298  		Namespace(namespace).
   299  		Resource("pods").
   300  		SubResource("proxy").
   301  		Name(name).
   302  		Suffix("metrics").
   303  		Do(context.TODO()).Raw()
   304  	if err != nil {
   305  		return "", err
   306  	}
   307  	return string(rawOutput), nil
   308  }