k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/slos/network_programming.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package slos
    18  
    19  import (
    20  	"fmt"
    21  	"time"
    22  
    23  	"k8s.io/klog/v2"
    24  	"k8s.io/perf-tests/clusterloader2/pkg/errors"
    25  	"k8s.io/perf-tests/clusterloader2/pkg/measurement"
    26  	"k8s.io/perf-tests/clusterloader2/pkg/measurement/common"
    27  	measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
    28  	"k8s.io/perf-tests/clusterloader2/pkg/util"
    29  )
    30  
    31  const (
    32  	netProg = "NetworkProgrammingLatency"
    33  
    34  	metricVersion = "v1"
    35  
    36  	// Query measuring 99th percentile of Xth percentiles (where X=50,90,99) of network programming latency over last 5min.
    37  	// %v should be replaced with query window size (duration of the test).
    38  	// This measurement assumes, that there is no data points for the rest of the cluster-day.
    39  	// Definition: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/network_programming_latency.md
    40  	query = "quantile_over_time(0.99, kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{}[%v])"
    41  )
    42  
    43  func init() {
    44  	create := func() measurement.Measurement { return common.CreatePrometheusMeasurement(&netProgGatherer{}) }
    45  	if err := measurement.Register(netProg, create); err != nil {
    46  		klog.Fatalf("Cannot register %s: %v", netProg, err)
    47  	}
    48  }
    49  
    50  type netProgGatherer struct {
    51  	enableViolations bool
    52  	threshold        time.Duration
    53  }
    54  
    55  func (n *netProgGatherer) Configure(config *measurement.Config) error {
    56  	enableViolations, err := util.GetBoolOrDefault(config.Params, "enableViolations", false)
    57  	if err != nil {
    58  		return err
    59  	}
    60  	n.enableViolations = enableViolations
    61  
    62  	threshold, err := util.GetDuration(config.Params, "threshold")
    63  	if err != nil {
    64  		return err
    65  	}
    66  	n.threshold = threshold
    67  	return nil
    68  }
    69  
    70  func (n *netProgGatherer) IsEnabled(config *measurement.Config) bool {
    71  	// Disable NetworkProgrammingLatency measurement if scraping kube-proxy is disabled.
    72  	if !config.ClusterLoaderConfig.PrometheusConfig.ScrapeKubeProxy {
    73  		return false
    74  	}
    75  	// TODO(#1399): remove the dependency of provider name.
    76  	return config.CloudProvider.Name() != "kubemark"
    77  }
    78  
    79  func (n *netProgGatherer) Gather(executor common.QueryExecutor, startTime, endTime time.Time, config *measurement.Config) ([]measurement.Summary, error) {
    80  	latency, err := n.query(executor, startTime, endTime)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  
    85  	klog.V(2).Infof("%s: got %v", netProg, latency)
    86  	summary, err := n.createSummary(latency)
    87  	return []measurement.Summary{summary}, err
    88  }
    89  
    90  func (n *netProgGatherer) String() string {
    91  	return netProg
    92  }
    93  
    94  func (n *netProgGatherer) query(executor common.QueryExecutor, startTime, endTime time.Time) (*measurementutil.LatencyMetric, error) {
    95  	duration := endTime.Sub(startTime)
    96  
    97  	boundedQuery := fmt.Sprintf(query, measurementutil.ToPrometheusTime(duration))
    98  
    99  	samples, err := executor.Query(boundedQuery, endTime)
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  	if len(samples) != 3 {
   104  		return nil, fmt.Errorf("got unexpected number of samples: %d", len(samples))
   105  	}
   106  	return measurementutil.NewLatencyMetricPrometheus(samples)
   107  }
   108  
   109  func (n *netProgGatherer) createSummary(latency *measurementutil.LatencyMetric) (measurement.Summary, error) {
   110  	content, err := util.PrettyPrintJSON(&measurementutil.PerfData{
   111  		Version:   metricVersion,
   112  		DataItems: []measurementutil.DataItem{latency.ToPerfData(netProg)},
   113  	})
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  	if n.enableViolations {
   118  		klog.V(2).Infof("%s: programming thresholds validation is enabled", n.enableViolations)
   119  		if slosErr := latency.VerifyThreshold(n.threshold); slosErr != nil {
   120  			err = errors.NewMetricViolationError("Network Programming Latency", slosErr.Error())
   121  			klog.Errorf("%s: %v", n, err)
   122  		}
   123  	}
   124  	return measurement.CreateSummary(netProg, "json", content), err
   125  }