k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/metrics/kubelet_metrics.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"net/http"
    24  	"sort"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"k8s.io/apimachinery/pkg/util/sets"
    30  	clientset "k8s.io/client-go/kubernetes"
    31  	"k8s.io/component-base/metrics/testutil"
    32  	dockermetrics "k8s.io/kubernetes/pkg/kubelet/dockershim/metrics"
    33  	kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
    34  	e2elog "k8s.io/kubernetes/test/e2e/framework/log"
    35  )
    36  
    37  const (
    38  	proxyTimeout = 2 * time.Minute
    39  )
    40  
    41  // KubeletMetrics is metrics for kubelet
    42  type KubeletMetrics testutil.Metrics
    43  
    44  // Equal returns true if all metrics are the same as the arguments.
    45  func (m *KubeletMetrics) Equal(o KubeletMetrics) bool {
    46  	return (*testutil.Metrics)(m).Equal(testutil.Metrics(o))
    47  }
    48  
    49  // NewKubeletMetrics returns new metrics which are initialized.
    50  func NewKubeletMetrics() KubeletMetrics {
    51  	result := testutil.NewMetrics()
    52  	return KubeletMetrics(result)
    53  }
    54  
    55  // GrabKubeletMetricsWithoutProxy retrieve metrics from the kubelet on the given node using a simple GET over http.
    56  // Currently only used in integration tests.
    57  func GrabKubeletMetricsWithoutProxy(nodeName, path string) (KubeletMetrics, error) {
    58  	resp, err := http.Get(fmt.Sprintf("http://%s%s", nodeName, path))
    59  	if err != nil {
    60  		return KubeletMetrics{}, err
    61  	}
    62  	defer resp.Body.Close()
    63  	body, err := ioutil.ReadAll(resp.Body)
    64  	if err != nil {
    65  		return KubeletMetrics{}, err
    66  	}
    67  	return parseKubeletMetrics(string(body))
    68  }
    69  
    70  func parseKubeletMetrics(data string) (KubeletMetrics, error) {
    71  	result := NewKubeletMetrics()
    72  	if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil {
    73  		return KubeletMetrics{}, err
    74  	}
    75  	return result, nil
    76  }
    77  
    78  func (g *Grabber) getMetricsFromNode(nodeName string, kubeletPort int) (string, error) {
    79  	// There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock.
    80  	finished := make(chan struct{}, 1)
    81  	var err error
    82  	var rawOutput []byte
    83  	go func() {
    84  		rawOutput, err = g.client.CoreV1().RESTClient().Get().
    85  			Resource("nodes").
    86  			SubResource("proxy").
    87  			Name(fmt.Sprintf("%v:%v", nodeName, kubeletPort)).
    88  			Suffix("metrics").
    89  			Do(context.TODO()).Raw()
    90  		finished <- struct{}{}
    91  	}()
    92  	select {
    93  	case <-time.After(proxyTimeout):
    94  		return "", fmt.Errorf("timed out when waiting for proxy to gather metrics from %v", nodeName)
    95  	case <-finished:
    96  		if err != nil {
    97  			return "", err
    98  		}
    99  		return string(rawOutput), nil
   100  	}
   101  }
   102  
   103  // KubeletLatencyMetric stores metrics scraped from the kubelet server's /metric endpoint.
   104  // TODO: Get some more structure around the metrics and this type
   105  type KubeletLatencyMetric struct {
   106  	// eg: list, info, create
   107  	Operation string
   108  	// eg: sync_pods, pod_worker
   109  	Method string
   110  	// 0 <= quantile <=1, e.g. 0.95 is 95%tile, 0.5 is median.
   111  	Quantile float64
   112  	Latency  time.Duration
   113  }
   114  
   115  // KubeletLatencyMetrics implements sort.Interface for []KubeletMetric based on
   116  // the latency field.
   117  type KubeletLatencyMetrics []KubeletLatencyMetric
   118  
   119  func (a KubeletLatencyMetrics) Len() int           { return len(a) }
   120  func (a KubeletLatencyMetrics) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   121  func (a KubeletLatencyMetrics) Less(i, j int) bool { return a[i].Latency > a[j].Latency }
   122  
   123  // If a apiserver client is passed in, the function will try to get kubelet metrics from metrics grabber;
   124  // or else, the function will try to get kubelet metrics directly from the node.
   125  func getKubeletMetricsFromNode(c clientset.Interface, nodeName string) (KubeletMetrics, error) {
   126  	if c == nil {
   127  		return GrabKubeletMetricsWithoutProxy(nodeName, "/metrics")
   128  	}
   129  	grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false)
   130  	if err != nil {
   131  		return KubeletMetrics{}, err
   132  	}
   133  	return grabber.GrabFromKubelet(nodeName)
   134  }
   135  
   136  // GetKubeletMetrics gets all metrics in kubelet subsystem from specified node and trims
   137  // the subsystem prefix.
   138  func GetKubeletMetrics(c clientset.Interface, nodeName string) (KubeletMetrics, error) {
   139  	ms, err := getKubeletMetricsFromNode(c, nodeName)
   140  	if err != nil {
   141  		return KubeletMetrics{}, err
   142  	}
   143  
   144  	kubeletMetrics := make(KubeletMetrics)
   145  	for name, samples := range ms {
   146  		const prefix = kubeletmetrics.KubeletSubsystem + "_"
   147  		if !strings.HasPrefix(name, prefix) {
   148  			// Not a kubelet metric.
   149  			continue
   150  		}
   151  		method := strings.TrimPrefix(name, prefix)
   152  		kubeletMetrics[method] = samples
   153  	}
   154  	return kubeletMetrics, nil
   155  }
   156  
   157  // GetDefaultKubeletLatencyMetrics calls GetKubeletLatencyMetrics with a set of default metricNames
   158  // identifying common latency metrics.
   159  // Note that the KubeletMetrics passed in should not contain subsystem prefix.
   160  func GetDefaultKubeletLatencyMetrics(ms KubeletMetrics) KubeletLatencyMetrics {
   161  	latencyMetricNames := sets.NewString(
   162  		kubeletmetrics.PodWorkerDurationKey,
   163  		kubeletmetrics.PodWorkerStartDurationKey,
   164  		kubeletmetrics.PodStartDurationKey,
   165  		kubeletmetrics.CgroupManagerOperationsKey,
   166  		dockermetrics.DockerOperationsLatencyKey,
   167  		kubeletmetrics.PodWorkerStartDurationKey,
   168  		kubeletmetrics.PLEGRelistDurationKey,
   169  	)
   170  	return GetKubeletLatencyMetrics(ms, latencyMetricNames)
   171  }
   172  
   173  // GetKubeletLatencyMetrics filters ms to include only those contained in the metricNames set,
   174  // then constructs a KubeletLatencyMetrics list based on the samples associated with those metrics.
   175  func GetKubeletLatencyMetrics(ms KubeletMetrics, filterMetricNames sets.String) KubeletLatencyMetrics {
   176  	var latencyMetrics KubeletLatencyMetrics
   177  	for name, samples := range ms {
   178  		if !filterMetricNames.Has(name) {
   179  			continue
   180  		}
   181  		for _, sample := range samples {
   182  			latency := sample.Value
   183  			operation := string(sample.Metric["operation_type"])
   184  			var quantile float64
   185  			if val, ok := sample.Metric[testutil.QuantileLabel]; ok {
   186  				var err error
   187  				if quantile, err = strconv.ParseFloat(string(val), 64); err != nil {
   188  					continue
   189  				}
   190  			}
   191  
   192  			latencyMetrics = append(latencyMetrics, KubeletLatencyMetric{
   193  				Operation: operation,
   194  				Method:    name,
   195  				Quantile:  quantile,
   196  				Latency:   time.Duration(int64(latency)) * time.Microsecond,
   197  			})
   198  		}
   199  	}
   200  	return latencyMetrics
   201  }
   202  
   203  // HighLatencyKubeletOperations logs and counts the high latency metrics exported by the kubelet server via /metrics.
   204  func HighLatencyKubeletOperations(c clientset.Interface, threshold time.Duration, nodeName string, logFunc func(fmt string, args ...interface{})) (KubeletLatencyMetrics, error) {
   205  	ms, err := GetKubeletMetrics(c, nodeName)
   206  	if err != nil {
   207  		return KubeletLatencyMetrics{}, err
   208  	}
   209  	latencyMetrics := GetDefaultKubeletLatencyMetrics(ms)
   210  	sort.Sort(latencyMetrics)
   211  	var badMetrics KubeletLatencyMetrics
   212  	logFunc("\nLatency metrics for node %v", nodeName)
   213  	for _, m := range latencyMetrics {
   214  		if m.Latency > threshold {
   215  			badMetrics = append(badMetrics, m)
   216  			e2elog.Logf("%+v", m)
   217  		}
   218  	}
   219  	return badMetrics, nil
   220  }