k8s.io/kubernetes@v1.29.3/test/integration/scheduler_perf/util.go

k8s.io/kubernetes@v1.29.3/test/integration/scheduler_perf/util.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package benchmark
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/json"
    23  	"flag"
    24  	"fmt"
    25  	"math"
    26  	"os"
    27  	"path"
    28  	"sort"
    29  	"testing"
    30  	"time"
    31  
    32  	v1 "k8s.io/api/core/v1"
    33  	resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
    34  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    35  	"k8s.io/apimachinery/pkg/labels"
    36  	"k8s.io/apimachinery/pkg/util/sets"
    37  	"k8s.io/client-go/dynamic"
    38  	"k8s.io/client-go/informers"
    39  	coreinformers "k8s.io/client-go/informers/core/v1"
    40  	clientset "k8s.io/client-go/kubernetes"
    41  	restclient "k8s.io/client-go/rest"
    42  	cliflag "k8s.io/component-base/cli/flag"
    43  	"k8s.io/component-base/featuregate"
    44  	"k8s.io/component-base/metrics/legacyregistry"
    45  	"k8s.io/component-base/metrics/testutil"
    46  	"k8s.io/klog/v2"
    47  	kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1"
    48  	"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
    49  	"k8s.io/kubernetes/pkg/features"
    50  	"k8s.io/kubernetes/pkg/scheduler/apis/config"
    51  	kubeschedulerscheme "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
    52  	"k8s.io/kubernetes/test/integration/framework"
    53  	"k8s.io/kubernetes/test/integration/util"
    54  	testutils "k8s.io/kubernetes/test/utils"
    55  )
    56  
    57  const (
    58  	dateFormat               = "2006-01-02T15:04:05Z"
    59  	testNamespace            = "sched-test"
    60  	setupNamespace           = "sched-setup"
    61  	throughputSampleInterval = time.Second
    62  )
    63  
    64  var dataItemsDir = flag.String("data-items-dir", "", "destination directory for storing generated data items for perf dashboard")
    65  
    66  func newDefaultComponentConfig() (*config.KubeSchedulerConfiguration, error) {
    67  	gvk := kubeschedulerconfigv1.SchemeGroupVersion.WithKind("KubeSchedulerConfiguration")
    68  	cfg := config.KubeSchedulerConfiguration{}
    69  	_, _, err := kubeschedulerscheme.Codecs.UniversalDecoder().Decode(nil, &gvk, &cfg)
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  	return &cfg, nil
    74  }
    75  
    76  // mustSetupCluster starts the following components:
    77  // - k8s api server
    78  // - scheduler
    79  // - some of the kube-controller-manager controllers
    80  //
    81  // It returns regular and dynamic clients, and destroyFunc which should be used to
    82  // remove resources after finished.
    83  // Notes on rate limiter:
    84  //   - client rate limit is set to 5000.
    85  func mustSetupCluster(ctx context.Context, tb testing.TB, config *config.KubeSchedulerConfiguration, enabledFeatures map[featuregate.Feature]bool) (informers.SharedInformerFactory, clientset.Interface, dynamic.Interface) {
    86  	// Run API server with minimimal logging by default. Can be raised with -v.
    87  	framework.MinVerbosity = 0
    88  
    89  	_, kubeConfig, tearDownFn := framework.StartTestServer(ctx, tb, framework.TestServerSetup{
    90  		ModifyServerRunOptions: func(opts *options.ServerRunOptions) {
    91  			// Disable ServiceAccount admission plugin as we don't have serviceaccount controller running.
    92  			opts.Admission.GenericAdmission.DisablePlugins = []string{"ServiceAccount", "TaintNodesByCondition", "Priority"}
    93  
    94  			// Enable DRA API group.
    95  			if enabledFeatures[features.DynamicResourceAllocation] {
    96  				opts.APIEnablement.RuntimeConfig = cliflag.ConfigurationMap{
    97  					resourcev1alpha2.SchemeGroupVersion.String(): "true",
    98  				}
    99  			}
   100  		},
   101  	})
   102  	tb.Cleanup(tearDownFn)
   103  
   104  	// Cleanup will be in reverse order: first the clients get cancelled,
   105  	// then the apiserver is torn down.
   106  	ctx, cancel := context.WithCancel(ctx)
   107  	tb.Cleanup(cancel)
   108  
   109  	// TODO: client connection configuration, such as QPS or Burst is configurable in theory, this could be derived from the `config`, need to
   110  	// support this when there is any testcase that depends on such configuration.
   111  	cfg := restclient.CopyConfig(kubeConfig)
   112  	cfg.QPS = 5000.0
   113  	cfg.Burst = 5000
   114  
   115  	// use default component config if config here is nil
   116  	if config == nil {
   117  		var err error
   118  		config, err = newDefaultComponentConfig()
   119  		if err != nil {
   120  			tb.Fatalf("Error creating default component config: %v", err)
   121  		}
   122  	}
   123  
   124  	client := clientset.NewForConfigOrDie(cfg)
   125  	dynClient := dynamic.NewForConfigOrDie(cfg)
   126  
   127  	// Not all config options will be effective but only those mostly related with scheduler performance will
   128  	// be applied to start a scheduler, most of them are defined in `scheduler.schedulerOptions`.
   129  	_, informerFactory := util.StartScheduler(ctx, client, cfg, config)
   130  	util.StartFakePVController(ctx, client, informerFactory)
   131  	runGC := util.CreateGCController(ctx, tb, *cfg, informerFactory)
   132  	runNS := util.CreateNamespaceController(ctx, tb, *cfg, informerFactory)
   133  
   134  	runResourceClaimController := func() {}
   135  	if enabledFeatures[features.DynamicResourceAllocation] {
   136  		// Testing of DRA with inline resource claims depends on this
   137  		// controller for creating and removing ResourceClaims.
   138  		runResourceClaimController = util.CreateResourceClaimController(ctx, tb, client, informerFactory)
   139  	}
   140  
   141  	informerFactory.Start(ctx.Done())
   142  	informerFactory.WaitForCacheSync(ctx.Done())
   143  	go runGC()
   144  	go runNS()
   145  	go runResourceClaimController()
   146  
   147  	return informerFactory, client, dynClient
   148  }
   149  
   150  // Returns the list of scheduled and unscheduled pods in the specified namespaces.
   151  // Note that no namespaces specified matches all namespaces.
   152  func getScheduledPods(podInformer coreinformers.PodInformer, namespaces ...string) ([]*v1.Pod, []*v1.Pod, error) {
   153  	pods, err := podInformer.Lister().List(labels.Everything())
   154  	if err != nil {
   155  		return nil, nil, err
   156  	}
   157  
   158  	s := sets.New(namespaces...)
   159  	scheduled := make([]*v1.Pod, 0, len(pods))
   160  	unscheduled := make([]*v1.Pod, 0, len(pods))
   161  	for i := range pods {
   162  		pod := pods[i]
   163  		if len(s) == 0 || s.Has(pod.Namespace) {
   164  			if len(pod.Spec.NodeName) > 0 {
   165  				scheduled = append(scheduled, pod)
   166  			} else {
   167  				unscheduled = append(unscheduled, pod)
   168  			}
   169  		}
   170  	}
   171  	return scheduled, unscheduled, nil
   172  }
   173  
   174  // DataItem is the data point.
   175  type DataItem struct {
   176  	// Data is a map from bucket to real data point (e.g. "Perc90" -> 23.5). Notice
   177  	// that all data items with the same label combination should have the same buckets.
   178  	Data map[string]float64 `json:"data"`
   179  	// Unit is the data unit. Notice that all data items with the same label combination
   180  	// should have the same unit.
   181  	Unit string `json:"unit"`
   182  	// Labels is the labels of the data item.
   183  	Labels map[string]string `json:"labels,omitempty"`
   184  }
   185  
   186  // DataItems is the data point set. It is the struct that perf dashboard expects.
   187  type DataItems struct {
   188  	Version   string     `json:"version"`
   189  	DataItems []DataItem `json:"dataItems"`
   190  }
   191  
   192  // makeBasePod creates a Pod object to be used as a template.
   193  func makeBasePod() *v1.Pod {
   194  	basePod := &v1.Pod{
   195  		ObjectMeta: metav1.ObjectMeta{
   196  			GenerateName: "pod-",
   197  		},
   198  		Spec: testutils.MakePodSpec(),
   199  	}
   200  	return basePod
   201  }
   202  
   203  func dataItems2JSONFile(dataItems DataItems, namePrefix string) error {
   204  	// perfdash expects all data items to have the same set of labels.  It
   205  	// then renders drop-down buttons for each label with all values found
   206  	// for each label. If we were to store data items that don't have a
   207  	// certain label, then perfdash will never show those data items
   208  	// because it will only show data items that have the currently
   209  	// selected label value. To avoid that, we collect all labels used
   210  	// anywhere and then add missing labels with "not applicable" as value.
   211  	labels := sets.New[string]()
   212  	for _, item := range dataItems.DataItems {
   213  		for label := range item.Labels {
   214  			labels.Insert(label)
   215  		}
   216  	}
   217  	for _, item := range dataItems.DataItems {
   218  		for label := range labels {
   219  			if _, ok := item.Labels[label]; !ok {
   220  				item.Labels[label] = "not applicable"
   221  			}
   222  		}
   223  	}
   224  
   225  	b, err := json.Marshal(dataItems)
   226  	if err != nil {
   227  		return err
   228  	}
   229  
   230  	destFile := fmt.Sprintf("%v_%v.json", namePrefix, time.Now().Format(dateFormat))
   231  	if *dataItemsDir != "" {
   232  		// Ensure the "dataItemsDir" path to be valid.
   233  		if err := os.MkdirAll(*dataItemsDir, 0750); err != nil {
   234  			return fmt.Errorf("dataItemsDir path %v does not exist and cannot be created: %v", *dataItemsDir, err)
   235  		}
   236  		destFile = path.Join(*dataItemsDir, destFile)
   237  	}
   238  	formatted := &bytes.Buffer{}
   239  	if err := json.Indent(formatted, b, "", "  "); err != nil {
   240  		return fmt.Errorf("indenting error: %v", err)
   241  	}
   242  	return os.WriteFile(destFile, formatted.Bytes(), 0644)
   243  }
   244  
   245  type labelValues struct {
   246  	label  string
   247  	values []string
   248  }
   249  
   250  // metricsCollectorConfig is the config to be marshalled to YAML config file.
   251  // NOTE: The mapping here means only one filter is supported, either value in the list of `values` is able to be collected.
   252  type metricsCollectorConfig struct {
   253  	Metrics map[string]*labelValues
   254  }
   255  
   256  // metricsCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint.
   257  // Currently only Histogram metrics are supported.
   258  type metricsCollector struct {
   259  	*metricsCollectorConfig
   260  	labels map[string]string
   261  }
   262  
   263  func newMetricsCollector(config *metricsCollectorConfig, labels map[string]string) *metricsCollector {
   264  	return &metricsCollector{
   265  		metricsCollectorConfig: config,
   266  		labels:                 labels,
   267  	}
   268  }
   269  
   270  func (*metricsCollector) run(ctx context.Context) {
   271  	// metricCollector doesn't need to start before the tests, so nothing to do here.
   272  }
   273  
   274  func (pc *metricsCollector) collect() []DataItem {
   275  	var dataItems []DataItem
   276  	for metric, labelVals := range pc.Metrics {
   277  		// no filter is specified, aggregate all the metrics within the same metricFamily.
   278  		if labelVals == nil {
   279  			dataItem := collectHistogramVec(metric, pc.labels, nil)
   280  			if dataItem != nil {
   281  				dataItems = append(dataItems, *dataItem)
   282  			}
   283  		} else {
   284  			// fetch the metric from metricFamily which match each of the lvMap.
   285  			for _, value := range labelVals.values {
   286  				lvMap := map[string]string{labelVals.label: value}
   287  				dataItem := collectHistogramVec(metric, pc.labels, lvMap)
   288  				if dataItem != nil {
   289  					dataItems = append(dataItems, *dataItem)
   290  				}
   291  			}
   292  		}
   293  	}
   294  	return dataItems
   295  }
   296  
   297  func collectHistogramVec(metric string, labels map[string]string, lvMap map[string]string) *DataItem {
   298  	vec, err := testutil.GetHistogramVecFromGatherer(legacyregistry.DefaultGatherer, metric, lvMap)
   299  	if err != nil {
   300  		klog.Error(err)
   301  		return nil
   302  	}
   303  
   304  	if err := vec.Validate(); err != nil {
   305  		klog.ErrorS(err, "the validation for HistogramVec is failed. The data for this metric won't be stored in a benchmark result file", "metric", metric, "labels", labels)
   306  		return nil
   307  	}
   308  
   309  	if vec.GetAggregatedSampleCount() == 0 {
   310  		klog.InfoS("It is expected that this metric wasn't recorded. The data for this metric won't be stored in a benchmark result file", "metric", metric, "labels", labels)
   311  		return nil
   312  	}
   313  
   314  	q50 := vec.Quantile(0.50)
   315  	q90 := vec.Quantile(0.90)
   316  	q95 := vec.Quantile(0.95)
   317  	q99 := vec.Quantile(0.99)
   318  	avg := vec.Average()
   319  
   320  	msFactor := float64(time.Second) / float64(time.Millisecond)
   321  
   322  	// Copy labels and add "Metric" label for this metric.
   323  	labelMap := map[string]string{"Metric": metric}
   324  	for k, v := range labels {
   325  		labelMap[k] = v
   326  	}
   327  	for k, v := range lvMap {
   328  		labelMap[k] = v
   329  	}
   330  	return &DataItem{
   331  		Labels: labelMap,
   332  		Data: map[string]float64{
   333  			"Perc50":  q50 * msFactor,
   334  			"Perc90":  q90 * msFactor,
   335  			"Perc95":  q95 * msFactor,
   336  			"Perc99":  q99 * msFactor,
   337  			"Average": avg * msFactor,
   338  		},
   339  		Unit: "ms",
   340  	}
   341  }
   342  
   343  type throughputCollector struct {
   344  	tb                    testing.TB
   345  	podInformer           coreinformers.PodInformer
   346  	schedulingThroughputs []float64
   347  	labels                map[string]string
   348  	namespaces            []string
   349  	errorMargin           float64
   350  }
   351  
   352  func newThroughputCollector(tb testing.TB, podInformer coreinformers.PodInformer, labels map[string]string, namespaces []string, errorMargin float64) *throughputCollector {
   353  	return &throughputCollector{
   354  		tb:          tb,
   355  		podInformer: podInformer,
   356  		labels:      labels,
   357  		namespaces:  namespaces,
   358  		errorMargin: errorMargin,
   359  	}
   360  }
   361  
   362  func (tc *throughputCollector) run(ctx context.Context) {
   363  	podsScheduled, _, err := getScheduledPods(tc.podInformer, tc.namespaces...)
   364  	if err != nil {
   365  		klog.Fatalf("%v", err)
   366  	}
   367  	lastScheduledCount := len(podsScheduled)
   368  	ticker := time.NewTicker(throughputSampleInterval)
   369  	defer ticker.Stop()
   370  	lastSampleTime := time.Now()
   371  	started := false
   372  	skipped := 0
   373  
   374  	for {
   375  		select {
   376  		case <-ctx.Done():
   377  			return
   378  		case <-ticker.C:
   379  			now := time.Now()
   380  			podsScheduled, _, err := getScheduledPods(tc.podInformer, tc.namespaces...)
   381  			if err != nil {
   382  				klog.Fatalf("%v", err)
   383  			}
   384  
   385  			scheduled := len(podsScheduled)
   386  			// Only do sampling if number of scheduled pods is greater than zero.
   387  			if scheduled == 0 {
   388  				continue
   389  			}
   390  			if !started {
   391  				started = true
   392  				// Skip the initial sample. It's likely to be an outlier because
   393  				// sampling and creating pods get started independently.
   394  				lastScheduledCount = scheduled
   395  				lastSampleTime = now
   396  				continue
   397  			}
   398  
   399  			newScheduled := scheduled - lastScheduledCount
   400  			if newScheduled == 0 {
   401  				// Throughput would be zero for the interval.
   402  				// Instead of recording 0 pods/s, keep waiting
   403  				// until we see at least one additional pod
   404  				// being scheduled.
   405  				skipped++
   406  				continue
   407  			}
   408  
   409  			// This should be roughly equal to
   410  			// throughputSampleInterval * (skipped + 1), but we
   411  			// don't count on that because the goroutine might not
   412  			// be scheduled immediately when the timer
   413  			// triggers. Instead we track the actual time stamps.
   414  			duration := now.Sub(lastSampleTime)
   415  			durationInSeconds := duration.Seconds()
   416  			throughput := float64(newScheduled) / durationInSeconds
   417  			expectedDuration := throughputSampleInterval * time.Duration(skipped+1)
   418  			errorMargin := (duration - expectedDuration).Seconds() / expectedDuration.Seconds() * 100
   419  			if tc.errorMargin > 0 && math.Abs(errorMargin) > tc.errorMargin {
   420  				// This might affect the result, report it.
   421  				tc.tb.Errorf("ERROR: Expected throuput collector to sample at regular time intervals. The %d most recent intervals took %s instead of %s, a difference of %0.1f%%.", skipped+1, duration, expectedDuration, errorMargin)
   422  			}
   423  
   424  			// To keep percentiles accurate, we have to record multiple samples with the same
   425  			// throughput value if we skipped some intervals.
   426  			for i := 0; i <= skipped; i++ {
   427  				tc.schedulingThroughputs = append(tc.schedulingThroughputs, throughput)
   428  			}
   429  			lastScheduledCount = scheduled
   430  			klog.Infof("%d pods scheduled", lastScheduledCount)
   431  			skipped = 0
   432  			lastSampleTime = now
   433  		}
   434  	}
   435  }
   436  
   437  func (tc *throughputCollector) collect() []DataItem {
   438  	throughputSummary := DataItem{Labels: tc.labels}
   439  	if length := len(tc.schedulingThroughputs); length > 0 {
   440  		sort.Float64s(tc.schedulingThroughputs)
   441  		sum := 0.0
   442  		for i := range tc.schedulingThroughputs {
   443  			sum += tc.schedulingThroughputs[i]
   444  		}
   445  
   446  		throughputSummary.Labels["Metric"] = "SchedulingThroughput"
   447  		throughputSummary.Data = map[string]float64{
   448  			"Average": sum / float64(length),
   449  			"Perc50":  tc.schedulingThroughputs[int(math.Ceil(float64(length*50)/100))-1],
   450  			"Perc90":  tc.schedulingThroughputs[int(math.Ceil(float64(length*90)/100))-1],
   451  			"Perc95":  tc.schedulingThroughputs[int(math.Ceil(float64(length*95)/100))-1],
   452  			"Perc99":  tc.schedulingThroughputs[int(math.Ceil(float64(length*99)/100))-1],
   453  		}
   454  		throughputSummary.Unit = "pods/s"
   455  	}
   456  
   457  	return []DataItem{throughputSummary}
   458  }