k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/kube_state_metrics_measurement.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "k8s.io/perf-tests/clusterloader2/pkg/measurement" 26 measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util" 27 "k8s.io/perf-tests/clusterloader2/pkg/util" 28 29 "github.com/prometheus/common/model" 30 clientset "k8s.io/client-go/kubernetes" 31 "k8s.io/klog/v2" 32 ) 33 34 const ( 35 ksmLatencyName = "KubeStateMetricsLatency" 36 ksmRequestDurationMetricName = model.LabelValue("http_request_duration_seconds_bucket") 37 probeIntervalDefault = 30 * time.Second 38 ksmNamespace = "kube-state-metrics-perf-test" 39 ksmServiceName = "kube-state-metrics" 40 ksmSelfPort = 8081 41 ksmMetricsPort = 8080 42 ) 43 44 type ksmLatencyMeasurement struct { 45 ctx context.Context 46 cancel func() 47 isRunning bool 48 namespace string 49 serviceName string 50 metricsPort int 51 selfPort int 52 initialLatency *measurementutil.Histogram 53 wg sync.WaitGroup 54 } 55 56 func init() { 57 if err := measurement.Register(ksmLatencyName, CreateKSMLatencyMeasurement); err != nil { 58 klog.Fatalf("Cannot register %s: %v", ksmLatencyName, err) 59 } 60 } 61 62 // CreateKSMLatencyMeasurement creates a new Kube State 63 // Metrics Measurement. 64 func CreateKSMLatencyMeasurement() measurement.Measurement { 65 ctx, cancel := context.WithCancel(context.Background()) 66 return &ksmLatencyMeasurement{ 67 namespace: ksmNamespace, 68 serviceName: ksmServiceName, 69 selfPort: ksmSelfPort, 70 metricsPort: ksmMetricsPort, 71 ctx: ctx, 72 cancel: cancel, 73 } 74 } 75 76 // Execute supports two actions: 77 // - start - starts goroutine and queries /metrics every probeIntervalDefault interval, 78 // it also collects initial latency metrics. 79 // - gather - gathers latency metrics and creates a latency summary. 80 func (m *ksmLatencyMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) { 81 if !config.CloudProvider.Features().SupportKubeStateMetrics { 82 klog.Infof("not executing KSMLatencyMeasurement: unsupported for provider, %s", config.ClusterFramework.GetClusterConfig().Provider.Name()) 83 return nil, nil 84 } 85 action, err := util.GetString(config.Params, "action") 86 if err != nil { 87 return nil, err 88 } 89 client := config.ClusterFramework.GetClientSets().GetClient() 90 switch action { 91 case "start": 92 if m.isRunning { 93 klog.V(2).Infof("%s: measurement already running", m) 94 return nil, nil 95 } 96 // Start executing calls towards the kube-state-metrics /metrics endpoint 97 // every probeIntervalDefault until gather is called. 98 // probeIntervalDefault equals the scrapping interval we suggest. 99 // If we cannot get metrics for two minutes we are already going over 100 // the scrape interval so we should cancel. 101 m.startQuerying(m.ctx, client, probeIntervalDefault) 102 // Retrieve initial latency when first call is done. 103 m.initialLatency, err = m.retrieveKSMLatencyMetrics(m.ctx, client) 104 return nil, err 105 case "gather": 106 defer m.cancel() 107 return m.createKSMLatencySummary(m.ctx, client) 108 default: 109 return nil, fmt.Errorf("unknown action %v", action) 110 } 111 } 112 113 func (m *ksmLatencyMeasurement) stop() error { 114 if !m.isRunning { 115 return fmt.Errorf("%s: measurement was not running", m) 116 } 117 m.cancel() 118 m.wg.Wait() 119 return nil 120 } 121 122 // createKSMLatencyReport gathers the latency one last time and creates the summary based on the Quantile from the sub histograms. 123 // Afterwards it creates the Summary Report. 124 func (m *ksmLatencyMeasurement) createKSMLatencySummary(ctx context.Context, client clientset.Interface) ([]measurement.Summary, error) { 125 latestLatency, err := m.retrieveKSMLatencyMetrics(ctx, client) 126 if err != nil { 127 return nil, err 128 } 129 if err = m.stop(); err != nil { 130 return nil, err 131 } 132 // We want to subtract the latest histogram from the first one we collect. 133 finalLatency := HistogramSub(latestLatency, m.initialLatency) 134 // Pretty Print the report. 135 result := &measurementutil.LatencyMetric{} 136 if err = SetQuantileFromHistogram(result, finalLatency); err != nil { 137 return nil, err 138 } 139 content, err := util.PrettyPrintJSON(result) 140 if err != nil { 141 return nil, err 142 } 143 // Create Summary. 144 return []measurement.Summary{measurement.CreateSummary(ksmLatencyName, "json", content)}, nil 145 } 146 147 // startQuerying queries /metrics endpoint of kube-state-metrics kube_ metrics every interval 148 // and stops when stop is called. 149 func (m *ksmLatencyMeasurement) startQuerying(ctx context.Context, client clientset.Interface, interval time.Duration) { 150 m.isRunning = true 151 m.wg.Add(1) 152 go m.queryLoop(ctx, client, interval) 153 } 154 155 func (m *ksmLatencyMeasurement) queryLoop(ctx context.Context, client clientset.Interface, interval time.Duration) { 156 defer m.wg.Done() 157 for { 158 select { 159 case <-ctx.Done(): 160 return 161 case <-time.After(interval): 162 var output string 163 output, err := m.getMetricsFromService(ctx, client, m.metricsPort) 164 if err != nil { 165 klog.V(2).Infof("error during fetching metrics from service: %v", err) 166 } 167 if output == "" { 168 klog.V(2).Infof("/metrics endpoint of kube-state-metrics returned no data in namespace: %s from service: %s port: %d", m.namespace, m.serviceName, m.metricsPort) 169 } 170 171 } 172 } 173 } 174 175 func (m *ksmLatencyMeasurement) retrieveKSMLatencyMetrics(ctx context.Context, c clientset.Interface) (*measurementutil.Histogram, error) { 176 ksmHist := measurementutil.NewHistogram(nil) 177 output, err := m.getMetricsFromService(ctx, c, m.selfPort) 178 if err != nil { 179 return ksmHist, err 180 } 181 samples, err := measurementutil.ExtractMetricSamples(output) 182 if err != nil { 183 return ksmHist, err 184 } 185 for _, sample := range samples { 186 switch sample.Metric[model.MetricNameLabel] { 187 case ksmRequestDurationMetricName: 188 measurementutil.ConvertSampleToHistogram(sample, ksmHist) 189 } 190 } 191 return ksmHist, nil 192 } 193 194 func (m *ksmLatencyMeasurement) getMetricsFromService(ctx context.Context, client clientset.Interface, port int) (string, error) { 195 ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) 196 defer cancel() 197 out, err := client.CoreV1().RESTClient().Get(). 198 Resource("services"). 199 SubResource("proxy"). 200 Namespace(m.namespace). 201 Name(fmt.Sprintf("%v:%v", m.serviceName, port)). 202 Suffix("metrics"). 203 Do(ctx).Raw() 204 return string(out), err 205 } 206 207 // Dispose cleans up after the measurement. 208 func (m *ksmLatencyMeasurement) Dispose() { 209 if err := m.stop(); err != nil { 210 klog.V(2).Infof("error during dispose call: %v", err) 211 } 212 } 213 214 // String returns string representation of this measurement. 215 func (m *ksmLatencyMeasurement) String() string { 216 return ksmLatencyName 217 }