k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/metrics/kubelet_metrics.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "context" 21 "fmt" 22 "io/ioutil" 23 "net/http" 24 "sort" 25 "strconv" 26 "strings" 27 "time" 28 29 "k8s.io/apimachinery/pkg/util/sets" 30 clientset "k8s.io/client-go/kubernetes" 31 "k8s.io/component-base/metrics/testutil" 32 dockermetrics "k8s.io/kubernetes/pkg/kubelet/dockershim/metrics" 33 kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" 34 e2elog "k8s.io/kubernetes/test/e2e/framework/log" 35 ) 36 37 const ( 38 proxyTimeout = 2 * time.Minute 39 ) 40 41 // KubeletMetrics is metrics for kubelet 42 type KubeletMetrics testutil.Metrics 43 44 // Equal returns true if all metrics are the same as the arguments. 45 func (m *KubeletMetrics) Equal(o KubeletMetrics) bool { 46 return (*testutil.Metrics)(m).Equal(testutil.Metrics(o)) 47 } 48 49 // NewKubeletMetrics returns new metrics which are initialized. 50 func NewKubeletMetrics() KubeletMetrics { 51 result := testutil.NewMetrics() 52 return KubeletMetrics(result) 53 } 54 55 // GrabKubeletMetricsWithoutProxy retrieve metrics from the kubelet on the given node using a simple GET over http. 56 // Currently only used in integration tests. 57 func GrabKubeletMetricsWithoutProxy(nodeName, path string) (KubeletMetrics, error) { 58 resp, err := http.Get(fmt.Sprintf("http://%s%s", nodeName, path)) 59 if err != nil { 60 return KubeletMetrics{}, err 61 } 62 defer resp.Body.Close() 63 body, err := ioutil.ReadAll(resp.Body) 64 if err != nil { 65 return KubeletMetrics{}, err 66 } 67 return parseKubeletMetrics(string(body)) 68 } 69 70 func parseKubeletMetrics(data string) (KubeletMetrics, error) { 71 result := NewKubeletMetrics() 72 if err := testutil.ParseMetrics(data, (*testutil.Metrics)(&result)); err != nil { 73 return KubeletMetrics{}, err 74 } 75 return result, nil 76 } 77 78 func (g *Grabber) getMetricsFromNode(nodeName string, kubeletPort int) (string, error) { 79 // There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock. 80 finished := make(chan struct{}, 1) 81 var err error 82 var rawOutput []byte 83 go func() { 84 rawOutput, err = g.client.CoreV1().RESTClient().Get(). 85 Resource("nodes"). 86 SubResource("proxy"). 87 Name(fmt.Sprintf("%v:%v", nodeName, kubeletPort)). 88 Suffix("metrics"). 89 Do(context.TODO()).Raw() 90 finished <- struct{}{} 91 }() 92 select { 93 case <-time.After(proxyTimeout): 94 return "", fmt.Errorf("timed out when waiting for proxy to gather metrics from %v", nodeName) 95 case <-finished: 96 if err != nil { 97 return "", err 98 } 99 return string(rawOutput), nil 100 } 101 } 102 103 // KubeletLatencyMetric stores metrics scraped from the kubelet server's /metric endpoint. 104 // TODO: Get some more structure around the metrics and this type 105 type KubeletLatencyMetric struct { 106 // eg: list, info, create 107 Operation string 108 // eg: sync_pods, pod_worker 109 Method string 110 // 0 <= quantile <=1, e.g. 0.95 is 95%tile, 0.5 is median. 111 Quantile float64 112 Latency time.Duration 113 } 114 115 // KubeletLatencyMetrics implements sort.Interface for []KubeletMetric based on 116 // the latency field. 117 type KubeletLatencyMetrics []KubeletLatencyMetric 118 119 func (a KubeletLatencyMetrics) Len() int { return len(a) } 120 func (a KubeletLatencyMetrics) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 121 func (a KubeletLatencyMetrics) Less(i, j int) bool { return a[i].Latency > a[j].Latency } 122 123 // If a apiserver client is passed in, the function will try to get kubelet metrics from metrics grabber; 124 // or else, the function will try to get kubelet metrics directly from the node. 125 func getKubeletMetricsFromNode(c clientset.Interface, nodeName string) (KubeletMetrics, error) { 126 if c == nil { 127 return GrabKubeletMetricsWithoutProxy(nodeName, "/metrics") 128 } 129 grabber, err := NewMetricsGrabber(c, nil, true, false, false, false, false) 130 if err != nil { 131 return KubeletMetrics{}, err 132 } 133 return grabber.GrabFromKubelet(nodeName) 134 } 135 136 // GetKubeletMetrics gets all metrics in kubelet subsystem from specified node and trims 137 // the subsystem prefix. 138 func GetKubeletMetrics(c clientset.Interface, nodeName string) (KubeletMetrics, error) { 139 ms, err := getKubeletMetricsFromNode(c, nodeName) 140 if err != nil { 141 return KubeletMetrics{}, err 142 } 143 144 kubeletMetrics := make(KubeletMetrics) 145 for name, samples := range ms { 146 const prefix = kubeletmetrics.KubeletSubsystem + "_" 147 if !strings.HasPrefix(name, prefix) { 148 // Not a kubelet metric. 149 continue 150 } 151 method := strings.TrimPrefix(name, prefix) 152 kubeletMetrics[method] = samples 153 } 154 return kubeletMetrics, nil 155 } 156 157 // GetDefaultKubeletLatencyMetrics calls GetKubeletLatencyMetrics with a set of default metricNames 158 // identifying common latency metrics. 159 // Note that the KubeletMetrics passed in should not contain subsystem prefix. 160 func GetDefaultKubeletLatencyMetrics(ms KubeletMetrics) KubeletLatencyMetrics { 161 latencyMetricNames := sets.NewString( 162 kubeletmetrics.PodWorkerDurationKey, 163 kubeletmetrics.PodWorkerStartDurationKey, 164 kubeletmetrics.PodStartDurationKey, 165 kubeletmetrics.CgroupManagerOperationsKey, 166 dockermetrics.DockerOperationsLatencyKey, 167 kubeletmetrics.PodWorkerStartDurationKey, 168 kubeletmetrics.PLEGRelistDurationKey, 169 ) 170 return GetKubeletLatencyMetrics(ms, latencyMetricNames) 171 } 172 173 // GetKubeletLatencyMetrics filters ms to include only those contained in the metricNames set, 174 // then constructs a KubeletLatencyMetrics list based on the samples associated with those metrics. 175 func GetKubeletLatencyMetrics(ms KubeletMetrics, filterMetricNames sets.String) KubeletLatencyMetrics { 176 var latencyMetrics KubeletLatencyMetrics 177 for name, samples := range ms { 178 if !filterMetricNames.Has(name) { 179 continue 180 } 181 for _, sample := range samples { 182 latency := sample.Value 183 operation := string(sample.Metric["operation_type"]) 184 var quantile float64 185 if val, ok := sample.Metric[testutil.QuantileLabel]; ok { 186 var err error 187 if quantile, err = strconv.ParseFloat(string(val), 64); err != nil { 188 continue 189 } 190 } 191 192 latencyMetrics = append(latencyMetrics, KubeletLatencyMetric{ 193 Operation: operation, 194 Method: name, 195 Quantile: quantile, 196 Latency: time.Duration(int64(latency)) * time.Microsecond, 197 }) 198 } 199 } 200 return latencyMetrics 201 } 202 203 // HighLatencyKubeletOperations logs and counts the high latency metrics exported by the kubelet server via /metrics. 204 func HighLatencyKubeletOperations(c clientset.Interface, threshold time.Duration, nodeName string, logFunc func(fmt string, args ...interface{})) (KubeletLatencyMetrics, error) { 205 ms, err := GetKubeletMetrics(c, nodeName) 206 if err != nil { 207 return KubeletLatencyMetrics{}, err 208 } 209 latencyMetrics := GetDefaultKubeletLatencyMetrics(ms) 210 sort.Sort(latencyMetrics) 211 var badMetrics KubeletLatencyMetrics 212 logFunc("\nLatency metrics for node %v", nodeName) 213 for _, m := range latencyMetrics { 214 if m.Latency > threshold { 215 badMetrics = append(badMetrics, m) 216 e2elog.Logf("%+v", m) 217 } 218 } 219 return badMetrics, nil 220 }