k8s.io/kubernetes@v1.29.3/test/e2e/framework/metrics/kubelet_metrics.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "net/http" 24 "sort" 25 "strconv" 26 "strings" 27 "time" 28 29 "k8s.io/apimachinery/pkg/util/sets" 30 clientset "k8s.io/client-go/kubernetes" 31 "k8s.io/component-base/metrics/testutil" 32 "k8s.io/kubernetes/test/e2e/framework" 33 ) 34 35 const ( 36 proxyTimeout = 2 * time.Minute 37 // dockerOperationsLatencyKey is the key for the operation latency metrics. 38 // Taken from k8s.io/kubernetes/pkg/kubelet/dockershim/metrics 39 dockerOperationsLatencyKey = "docker_operations_duration_seconds" 40 // Taken from k8s.io/kubernetes/pkg/kubelet/metrics 41 kubeletSubsystem = "kubelet" 42 // Taken from k8s.io/kubernetes/pkg/kubelet/metrics 43 podWorkerDurationKey = "pod_worker_duration_seconds" 44 // Taken from k8s.io/kubernetes/pkg/kubelet/metrics 45 podStartDurationKey = "pod_start_duration_seconds" 46 // Taken from k8s.io/kubernetes/pkg/kubelet/metrics 47 podStartSLIDurationKey = "pod_start_sli_duration_seconds" 48 // Taken from k8s.io/kubernetes/pkg/kubelet/metrics 49 cgroupManagerOperationsKey = "cgroup_manager_duration_seconds" 50 // Taken from k8s.io/kubernetes/pkg/kubelet/metrics 51 podWorkerStartDurationKey = "pod_worker_start_duration_seconds" 52 // Taken from k8s.io/kubernetes/pkg/kubelet/metrics 53 plegRelistDurationKey = "pleg_relist_duration_seconds" 54 ) 55 56 // KubeletMetrics is metrics for kubelet 57 type KubeletMetrics testutil.Metrics 58 59 // Equal returns true if all metrics are the same as the arguments. 60 func (m *KubeletMetrics) Equal(o KubeletMetrics) bool { 61 return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) 62 } 63 64 // NewKubeletMetrics returns new metrics which are initialized. 65 func NewKubeletMetrics() KubeletMetrics { 66 result := testutil.NewMetrics() 67 return KubeletMetrics(result) 68 } 69 70 // GrabKubeletMetricsWithoutProxy retrieve metrics from the kubelet on the given node using a simple GET over http. 71 func GrabKubeletMetricsWithoutProxy(ctx context.Context, nodeName, path string) (KubeletMetrics, error) { 72 req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://%s%s", nodeName, path), nil) 73 if err != nil { 74 return KubeletMetrics{}, err 75 } 76 resp, err := http.DefaultClient.Do(req) 77 if err != nil { 78 return KubeletMetrics{}, err 79 } 80 defer resp.Body.Close() 81 body, err := io.ReadAll(resp.Body) 82 if err != nil { 83 return KubeletMetrics{}, err 84 } 85 return parseKubeletMetrics(string(body)) 86 } 87 88 func parseKubeletMetrics(data string) (KubeletMetrics, error) { 89 result := NewKubeletMetrics() 90 if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { 91 return KubeletMetrics{}, err 92 } 93 return result, nil 94 } 95 96 // KubeletLatencyMetric stores metrics scraped from the kubelet server's /metric endpoint. 97 // TODO: Get some more structure around the metrics and this type 98 type KubeletLatencyMetric struct { 99 // eg: list, info, create 100 Operation string 101 // eg: sync_pods, pod_worker 102 Method string 103 // 0 <= quantile <=1, e.g. 0.95 is 95%tile, 0.5 is median. 104 Quantile float64 105 Latency time.Duration 106 } 107 108 // KubeletLatencyMetrics implements sort.Interface for []KubeletMetric based on 109 // the latency field. 110 type KubeletLatencyMetrics []KubeletLatencyMetric 111 112 func (a KubeletLatencyMetrics) Len() int { return len(a) } 113 func (a KubeletLatencyMetrics) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 114 func (a KubeletLatencyMetrics) Less(i, j int) bool { return a[i].Latency > a[j].Latency } 115 116 // If a apiserver client is passed in, the function will try to get kubelet metrics from metrics grabber; 117 // or else, the function will try to get kubelet metrics directly from the node. 118 func getKubeletMetricsFromNode(ctx context.Context, c clientset.Interface, nodeName string) (KubeletMetrics, error) { 119 if c == nil { 120 return GrabKubeletMetricsWithoutProxy(ctx, nodeName, "/metrics") 121 } 122 grabber, err := NewMetricsGrabber(ctx, c, nil, nil, true, false, false, false, false, false) 123 if err != nil { 124 return KubeletMetrics{}, err 125 } 126 return grabber.GrabFromKubelet(ctx, nodeName) 127 } 128 129 // GetKubeletMetrics gets all metrics in kubelet subsystem from specified node and trims 130 // the subsystem prefix. 131 func GetKubeletMetrics(ctx context.Context, c clientset.Interface, nodeName string) (KubeletMetrics, error) { 132 ms, err := getKubeletMetricsFromNode(ctx, c, nodeName) 133 if err != nil { 134 return KubeletMetrics{}, err 135 } 136 137 kubeletMetrics := make(KubeletMetrics) 138 for name, samples := range ms { 139 const prefix = kubeletSubsystem + "_" 140 if !strings.HasPrefix(name, prefix) { 141 // Not a kubelet metric. 142 continue 143 } 144 method := strings.TrimPrefix(name, prefix) 145 kubeletMetrics[method] = samples 146 } 147 return kubeletMetrics, nil 148 } 149 150 // GetDefaultKubeletLatencyMetrics calls GetKubeletLatencyMetrics with a set of default metricNames 151 // identifying common latency metrics. 152 // Note that the KubeletMetrics passed in should not contain subsystem prefix. 153 func GetDefaultKubeletLatencyMetrics(ms KubeletMetrics) KubeletLatencyMetrics { 154 latencyMetricNames := sets.NewString( 155 podWorkerDurationKey, 156 podWorkerStartDurationKey, 157 podStartDurationKey, 158 podStartSLIDurationKey, 159 cgroupManagerOperationsKey, 160 dockerOperationsLatencyKey, 161 podWorkerStartDurationKey, 162 plegRelistDurationKey, 163 ) 164 return GetKubeletLatencyMetrics(ms, latencyMetricNames) 165 } 166 167 // GetKubeletLatencyMetrics filters ms to include only those contained in the metricNames set, 168 // then constructs a KubeletLatencyMetrics list based on the samples associated with those metrics. 169 func GetKubeletLatencyMetrics(ms KubeletMetrics, filterMetricNames sets.String) KubeletLatencyMetrics { 170 var latencyMetrics KubeletLatencyMetrics 171 for name, samples := range ms { 172 if !filterMetricNames.Has(name) { 173 continue 174 } 175 for _, sample := range samples { 176 latency := sample.Value 177 operation := string(sample.Metric["operation_type"]) 178 var quantile float64 179 if val, ok := sample.Metric[testutil.QuantileLabel]; ok { 180 var err error 181 if quantile, err = strconv.ParseFloat(string(val), 64); err != nil { 182 continue 183 } 184 } 185 186 latencyMetrics = append(latencyMetrics, KubeletLatencyMetric{ 187 Operation: operation, 188 Method: name, 189 Quantile: quantile, 190 Latency: time.Duration(int64(latency)) * time.Microsecond, 191 }) 192 } 193 } 194 return latencyMetrics 195 } 196 197 // HighLatencyKubeletOperations logs and counts the high latency metrics exported by the kubelet server via /metrics. 198 func HighLatencyKubeletOperations(ctx context.Context, c clientset.Interface, threshold time.Duration, nodeName string, logFunc func(fmt string, args ...interface{})) (KubeletLatencyMetrics, error) { 199 ms, err := GetKubeletMetrics(ctx, c, nodeName) 200 if err != nil { 201 return KubeletLatencyMetrics{}, err 202 } 203 latencyMetrics := GetDefaultKubeletLatencyMetrics(ms) 204 sort.Sort(latencyMetrics) 205 var badMetrics KubeletLatencyMetrics 206 logFunc("\nLatency metrics for node %v", nodeName) 207 for _, m := range latencyMetrics { 208 if m.Latency > threshold { 209 badMetrics = append(badMetrics, m) 210 framework.Logf("%+v", m) 211 } 212 } 213 return badMetrics, nil 214 }