k8s.io/kubernetes@v1.29.3/test/e2e_node/density_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2015 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"math"
    26  	"sort"
    27  	"strconv"
    28  	"sync"
    29  	"time"
    30  
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  
    33  	"github.com/onsi/ginkgo/v2"
    34  	"github.com/onsi/gomega"
    35  	v1 "k8s.io/api/core/v1"
    36  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    37  	"k8s.io/apimachinery/pkg/labels"
    38  	"k8s.io/apimachinery/pkg/runtime"
    39  	"k8s.io/apimachinery/pkg/watch"
    40  	"k8s.io/client-go/tools/cache"
    41  	kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
    42  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    43  	kubemetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
    44  	"k8s.io/kubernetes/test/e2e/framework"
    45  	e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
    46  	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
    47  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    48  	imageutils "k8s.io/kubernetes/test/utils/image"
    49  	admissionapi "k8s.io/pod-security-admission/api"
    50  )
    51  
    52  const (
    53  	kubeletAddr = "localhost:10255"
    54  )
    55  
    56  var _ = SIGDescribe("Density", framework.WithSerial(), framework.WithSlow(), func() {
    57  	const (
    58  		// The data collection time of resource collector and the standalone cadvisor
    59  		// is not synchronized, so resource collector may miss data or
    60  		// collect duplicated data
    61  		containerStatsPollingPeriod = 500 * time.Millisecond
    62  	)
    63  
    64  	var (
    65  		rc *ResourceCollector
    66  	)
    67  
    68  	f := framework.NewDefaultFramework("density-test")
    69  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    70  
    71  	ginkgo.BeforeEach(func(ctx context.Context) {
    72  		// Start a standalone cadvisor pod using 'createSync', the pod is running when it returns
    73  		e2epod.NewPodClient(f).CreateSync(ctx, getCadvisorPod())
    74  		// Resource collector monitors fine-grain CPU/memory usage by a standalone Cadvisor with
    75  		// 1s housingkeeping interval
    76  		rc = NewResourceCollector(containerStatsPollingPeriod)
    77  	})
    78  
    79  	ginkgo.Context("create a batch of pods", func() {
    80  		// TODO(coufon): the values are generous, set more precise limits with benchmark data
    81  		// and add more tests
    82  		dTests := []densityTest{
    83  			{
    84  				podsNr:   10,
    85  				interval: 0 * time.Millisecond,
    86  				cpuLimits: e2ekubelet.ContainersCPUSummary{
    87  					kubeletstatsv1alpha1.SystemContainerKubelet: {0.50: 0.30, 0.95: 0.50},
    88  					kubeletstatsv1alpha1.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
    89  				},
    90  				memLimits: e2ekubelet.ResourceUsagePerContainer{
    91  					kubeletstatsv1alpha1.SystemContainerKubelet: &e2ekubelet.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
    92  					kubeletstatsv1alpha1.SystemContainerRuntime: &e2ekubelet.ContainerResourceUsage{MemoryRSSInBytes: 500 * 1024 * 1024},
    93  				},
    94  				// percentile limit of single pod startup latency
    95  				podStartupLimits: e2emetrics.LatencyMetric{
    96  					Perc50: 16 * time.Second,
    97  					Perc90: 18 * time.Second,
    98  					Perc99: 20 * time.Second,
    99  				},
   100  				// upbound of startup latency of a batch of pods
   101  				podBatchStartupLimit: 25 * time.Second,
   102  			},
   103  		}
   104  
   105  		for _, testArg := range dTests {
   106  			itArg := testArg
   107  			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval)
   108  			ginkgo.It(desc, func(ctx context.Context) {
   109  				itArg.createMethod = "batch"
   110  				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
   111  
   112  				batchLag, e2eLags := runDensityBatchTest(ctx, f, rc, itArg, testInfo, false)
   113  
   114  				ginkgo.By("Verifying latency")
   115  				logAndVerifyLatency(ctx, batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, true)
   116  
   117  				ginkgo.By("Verifying resource")
   118  				logAndVerifyResource(ctx, f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, true)
   119  			})
   120  		}
   121  	})
   122  
   123  	ginkgo.Context("create a batch of pods", func() {
   124  		dTests := []densityTest{
   125  			{
   126  				podsNr:   10,
   127  				interval: 0 * time.Millisecond,
   128  			},
   129  			{
   130  				podsNr:   35,
   131  				interval: 0 * time.Millisecond,
   132  			},
   133  			{
   134  				podsNr:   90,
   135  				interval: 0 * time.Millisecond,
   136  			},
   137  			{
   138  				podsNr:   10,
   139  				interval: 100 * time.Millisecond,
   140  			},
   141  			{
   142  				podsNr:   35,
   143  				interval: 100 * time.Millisecond,
   144  			},
   145  			{
   146  				podsNr:   90,
   147  				interval: 100 * time.Millisecond,
   148  			},
   149  			{
   150  				podsNr:   10,
   151  				interval: 300 * time.Millisecond,
   152  			},
   153  			{
   154  				podsNr:   35,
   155  				interval: 300 * time.Millisecond,
   156  			},
   157  			{
   158  				podsNr:   90,
   159  				interval: 300 * time.Millisecond,
   160  			},
   161  		}
   162  
   163  		for _, testArg := range dTests {
   164  			itArg := testArg
   165  			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark][NodeSpecialFeature:Benchmark]", itArg.podsNr, itArg.interval)
   166  			ginkgo.It(desc, func(ctx context.Context) {
   167  				itArg.createMethod = "batch"
   168  				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
   169  
   170  				batchLag, e2eLags := runDensityBatchTest(ctx, f, rc, itArg, testInfo, true)
   171  
   172  				ginkgo.By("Verifying latency")
   173  				logAndVerifyLatency(ctx, batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, false)
   174  
   175  				ginkgo.By("Verifying resource")
   176  				logAndVerifyResource(ctx, f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false)
   177  			})
   178  		}
   179  	})
   180  
   181  	ginkgo.Context("create a batch of pods with higher API QPS", func() {
   182  		dTests := []densityTest{
   183  			{
   184  				podsNr:      90,
   185  				interval:    0 * time.Millisecond,
   186  				APIQPSLimit: 60,
   187  			},
   188  			{
   189  				podsNr:      90,
   190  				interval:    100 * time.Millisecond,
   191  				APIQPSLimit: 60,
   192  			},
   193  			{
   194  				podsNr:      90,
   195  				interval:    300 * time.Millisecond,
   196  				APIQPSLimit: 60,
   197  			},
   198  		}
   199  
   200  		for _, testArg := range dTests {
   201  			itArg := testArg
   202  			ginkgo.Context("", func() {
   203  				desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark][NodeSpecialFeature:Benchmark]", itArg.podsNr, itArg.interval, itArg.APIQPSLimit)
   204  				// The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency.
   205  				// It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated.
   206  				// Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance.
   207  				// Note that it will cause higher resource usage.
   208  				tempSetCurrentKubeletConfig(f, func(ctx context.Context, cfg *kubeletconfig.KubeletConfiguration) {
   209  					framework.Logf("Old QPS limit is: %d", cfg.KubeAPIQPS)
   210  					// Set new API QPS limit
   211  					cfg.KubeAPIQPS = int32(itArg.APIQPSLimit)
   212  				})
   213  				ginkgo.It(desc, func(ctx context.Context) {
   214  					itArg.createMethod = "batch"
   215  					testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
   216  					batchLag, e2eLags := runDensityBatchTest(ctx, f, rc, itArg, testInfo, true)
   217  
   218  					ginkgo.By("Verifying latency")
   219  					logAndVerifyLatency(ctx, batchLag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, false)
   220  
   221  					ginkgo.By("Verifying resource")
   222  					logAndVerifyResource(ctx, f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false)
   223  				})
   224  			})
   225  		}
   226  	})
   227  
   228  	ginkgo.Context("create a sequence of pods", func() {
   229  		dTests := []densityTest{
   230  			{
   231  				podsNr:   10,
   232  				bgPodsNr: 50,
   233  				cpuLimits: e2ekubelet.ContainersCPUSummary{
   234  					kubeletstatsv1alpha1.SystemContainerKubelet: {0.50: 0.30, 0.95: 0.50},
   235  					kubeletstatsv1alpha1.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
   236  				},
   237  				memLimits: e2ekubelet.ResourceUsagePerContainer{
   238  					kubeletstatsv1alpha1.SystemContainerKubelet: &e2ekubelet.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
   239  					kubeletstatsv1alpha1.SystemContainerRuntime: &e2ekubelet.ContainerResourceUsage{MemoryRSSInBytes: 500 * 1024 * 1024},
   240  				},
   241  				podStartupLimits: e2emetrics.LatencyMetric{
   242  					Perc50: 5000 * time.Millisecond,
   243  					Perc90: 9000 * time.Millisecond,
   244  					Perc99: 10000 * time.Millisecond,
   245  				},
   246  			},
   247  		}
   248  
   249  		for _, testArg := range dTests {
   250  			itArg := testArg
   251  			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr)
   252  			ginkgo.It(desc, func(ctx context.Context) {
   253  				itArg.createMethod = "sequence"
   254  				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
   255  				batchlag, e2eLags := runDensitySeqTest(ctx, f, rc, itArg, testInfo)
   256  
   257  				ginkgo.By("Verifying latency")
   258  				logAndVerifyLatency(ctx, batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, true)
   259  
   260  				ginkgo.By("Verifying resource")
   261  				logAndVerifyResource(ctx, f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, true)
   262  			})
   263  		}
   264  	})
   265  
   266  	ginkgo.Context("create a sequence of pods", func() {
   267  		dTests := []densityTest{
   268  			{
   269  				podsNr:   10,
   270  				bgPodsNr: 50,
   271  			},
   272  			{
   273  				podsNr:   30,
   274  				bgPodsNr: 50,
   275  			},
   276  			{
   277  				podsNr:   50,
   278  				bgPodsNr: 50,
   279  			},
   280  		}
   281  
   282  		for _, testArg := range dTests {
   283  			itArg := testArg
   284  			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark][NodeSpeicalFeature:Benchmark]", itArg.podsNr, itArg.bgPodsNr)
   285  			ginkgo.It(desc, func(ctx context.Context) {
   286  				itArg.createMethod = "sequence"
   287  				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
   288  				batchlag, e2eLags := runDensitySeqTest(ctx, f, rc, itArg, testInfo)
   289  
   290  				ginkgo.By("Verifying latency")
   291  				logAndVerifyLatency(ctx, batchlag, e2eLags, itArg.podStartupLimits, itArg.podBatchStartupLimit, testInfo, false)
   292  
   293  				ginkgo.By("Verifying resource")
   294  				logAndVerifyResource(ctx, f, rc, itArg.cpuLimits, itArg.memLimits, testInfo, false)
   295  			})
   296  		}
   297  	})
   298  })
   299  
   300  type densityTest struct {
   301  	// number of pods
   302  	podsNr int
   303  	// number of background pods
   304  	bgPodsNr int
   305  	// interval between creating pod (rate control)
   306  	interval time.Duration
   307  	// create pods in 'batch' or 'sequence'
   308  	createMethod string
   309  	// API QPS limit
   310  	APIQPSLimit int
   311  	// performance limits
   312  	cpuLimits            e2ekubelet.ContainersCPUSummary
   313  	memLimits            e2ekubelet.ResourceUsagePerContainer
   314  	podStartupLimits     e2emetrics.LatencyMetric
   315  	podBatchStartupLimit time.Duration
   316  }
   317  
   318  func (dt *densityTest) getTestName() string {
   319  	// The current default API QPS limit is 5
   320  	// TODO(coufon): is there any way to not hard code this?
   321  	APIQPSLimit := 5
   322  	if dt.APIQPSLimit > 0 {
   323  		APIQPSLimit = dt.APIQPSLimit
   324  	}
   325  	return fmt.Sprintf("density_create_%s_%d_%d_%d_%d", dt.createMethod, dt.podsNr, dt.bgPodsNr,
   326  		dt.interval.Nanoseconds()/1000000, APIQPSLimit)
   327  }
   328  
   329  // runDensityBatchTest runs the density batch pod creation test
   330  func runDensityBatchTest(ctx context.Context, f *framework.Framework, rc *ResourceCollector, testArg densityTest, testInfo map[string]string,
   331  	isLogTimeSeries bool) (time.Duration, []e2emetrics.PodLatencyData) {
   332  	const (
   333  		podType               = "density_test_pod"
   334  		sleepBeforeCreatePods = 30 * time.Second
   335  	)
   336  	var (
   337  		mutex      = &sync.Mutex{}
   338  		watchTimes = make(map[string]metav1.Time, 0)
   339  		stopCh     = make(chan struct{})
   340  	)
   341  
   342  	// create test pod data structure
   343  	pods := newTestPods(testArg.podsNr, true, imageutils.GetPauseImageName(), podType)
   344  
   345  	// the controller watches the change of pod status
   346  	controller := newInformerWatchPod(ctx, f, mutex, watchTimes, podType)
   347  	go controller.Run(stopCh)
   348  	defer close(stopCh)
   349  
   350  	// TODO(coufon): in the test we found kubelet starts while it is busy on something, as a result 'syncLoop'
   351  	// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
   352  	// The node status has already been 'ready' so `wait and check node being ready does not help here.
   353  	// Now wait here for a grace period to let 'syncLoop' be ready
   354  	time.Sleep(sleepBeforeCreatePods)
   355  
   356  	rc.Start()
   357  
   358  	ginkgo.By("Creating a batch of pods")
   359  	// It returns a map['pod name']'creation time' containing the creation timestamps
   360  	createTimes := createBatchPodWithRateControl(ctx, f, pods, testArg.interval)
   361  
   362  	ginkgo.By("Waiting for all Pods to be observed by the watch...")
   363  
   364  	gomega.Eventually(ctx, func() bool {
   365  		return len(watchTimes) == testArg.podsNr
   366  	}, 10*time.Minute, 10*time.Second).Should(gomega.BeTrue())
   367  
   368  	if len(watchTimes) < testArg.podsNr {
   369  		framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.")
   370  	}
   371  
   372  	// Analyze results
   373  	var (
   374  		firstCreate metav1.Time
   375  		lastRunning metav1.Time
   376  		init        = true
   377  		e2eLags     = make([]e2emetrics.PodLatencyData, 0)
   378  	)
   379  
   380  	for name, create := range createTimes {
   381  		watch := watchTimes[name]
   382  		gomega.Expect(watchTimes).To(gomega.HaveKey(name))
   383  
   384  		e2eLags = append(e2eLags,
   385  			e2emetrics.PodLatencyData{Name: name, Latency: watch.Time.Sub(create.Time)})
   386  
   387  		if !init {
   388  			if firstCreate.Time.After(create.Time) {
   389  				firstCreate = create
   390  			}
   391  			if lastRunning.Time.Before(watch.Time) {
   392  				lastRunning = watch
   393  			}
   394  		} else {
   395  			init = false
   396  			firstCreate, lastRunning = create, watch
   397  		}
   398  	}
   399  
   400  	sort.Sort(e2emetrics.LatencySlice(e2eLags))
   401  	batchLag := lastRunning.Time.Sub(firstCreate.Time)
   402  
   403  	rc.Stop()
   404  	deletePodsSync(ctx, f, pods)
   405  
   406  	// Log time series data.
   407  	if isLogTimeSeries {
   408  		logDensityTimeSeries(rc, createTimes, watchTimes, testInfo)
   409  	}
   410  	// Log throughput data.
   411  	logPodCreateThroughput(batchLag, e2eLags, testArg.podsNr, testInfo)
   412  
   413  	deletePodsSync(ctx, f, []*v1.Pod{getCadvisorPod()})
   414  
   415  	return batchLag, e2eLags
   416  }
   417  
   418  // runDensitySeqTest runs the density sequential pod creation test
   419  func runDensitySeqTest(ctx context.Context, f *framework.Framework, rc *ResourceCollector, testArg densityTest, testInfo map[string]string) (time.Duration, []e2emetrics.PodLatencyData) {
   420  	const (
   421  		podType               = "density_test_pod"
   422  		sleepBeforeCreatePods = 30 * time.Second
   423  	)
   424  	bgPods := newTestPods(testArg.bgPodsNr, true, imageutils.GetPauseImageName(), "background_pod")
   425  	testPods := newTestPods(testArg.podsNr, true, imageutils.GetPauseImageName(), podType)
   426  
   427  	ginkgo.By("Creating a batch of background pods")
   428  
   429  	// CreatBatch is synchronized, all pods are running when it returns
   430  	e2epod.NewPodClient(f).CreateBatch(ctx, bgPods)
   431  
   432  	time.Sleep(sleepBeforeCreatePods)
   433  
   434  	rc.Start()
   435  
   436  	// Create pods sequentially (back-to-back). e2eLags have been sorted.
   437  	batchlag, e2eLags := createBatchPodSequential(ctx, f, testPods, podType)
   438  
   439  	rc.Stop()
   440  	deletePodsSync(ctx, f, append(bgPods, testPods...))
   441  
   442  	// Log throughput data.
   443  	logPodCreateThroughput(batchlag, e2eLags, testArg.podsNr, testInfo)
   444  
   445  	deletePodsSync(ctx, f, []*v1.Pod{getCadvisorPod()})
   446  
   447  	return batchlag, e2eLags
   448  }
   449  
   450  // createBatchPodWithRateControl creates a batch of pods concurrently, uses one goroutine for each creation.
   451  // between creations there is an interval for throughput control
   452  func createBatchPodWithRateControl(ctx context.Context, f *framework.Framework, pods []*v1.Pod, interval time.Duration) map[string]metav1.Time {
   453  	createTimes := make(map[string]metav1.Time)
   454  	for i := range pods {
   455  		pod := pods[i]
   456  		createTimes[pod.ObjectMeta.Name] = metav1.Now()
   457  		go e2epod.NewPodClient(f).Create(ctx, pod)
   458  		time.Sleep(interval)
   459  	}
   460  	return createTimes
   461  }
   462  
   463  // getPodStartLatency gets prometheus metric 'pod start latency' from kubelet
   464  func getPodStartLatency(ctx context.Context, node string) (e2emetrics.KubeletLatencyMetrics, error) {
   465  	latencyMetrics := e2emetrics.KubeletLatencyMetrics{}
   466  	ms, err := e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, node, "/metrics")
   467  	framework.ExpectNoError(err, "Failed to get kubelet metrics without proxy in node %s", node)
   468  
   469  	for _, samples := range ms {
   470  		for _, sample := range samples {
   471  			if sample.Metric["__name__"] == kubemetrics.KubeletSubsystem+"_"+kubemetrics.PodStartDurationKey {
   472  				quantile, _ := strconv.ParseFloat(string(sample.Metric["quantile"]), 64)
   473  				latencyMetrics = append(latencyMetrics,
   474  					e2emetrics.KubeletLatencyMetric{
   475  						Quantile: quantile,
   476  						Method:   kubemetrics.PodStartDurationKey,
   477  						Latency:  time.Duration(int(sample.Value)) * time.Microsecond})
   478  			}
   479  		}
   480  	}
   481  	return latencyMetrics, nil
   482  }
   483  
   484  // newInformerWatchPod creates an informer to check whether all pods are running.
   485  func newInformerWatchPod(ctx context.Context, f *framework.Framework, mutex *sync.Mutex, watchTimes map[string]metav1.Time, podType string) cache.Controller {
   486  	ns := f.Namespace.Name
   487  	checkPodRunning := func(p *v1.Pod) {
   488  		mutex.Lock()
   489  		defer mutex.Unlock()
   490  		defer ginkgo.GinkgoRecover()
   491  
   492  		if p.Status.Phase == v1.PodRunning {
   493  			if _, found := watchTimes[p.Name]; !found {
   494  				watchTimes[p.Name] = metav1.Now()
   495  			}
   496  		}
   497  	}
   498  
   499  	_, controller := cache.NewInformer(
   500  		&cache.ListWatch{
   501  			ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
   502  				options.LabelSelector = labels.SelectorFromSet(labels.Set{"type": podType}).String()
   503  				obj, err := f.ClientSet.CoreV1().Pods(ns).List(ctx, options)
   504  				return runtime.Object(obj), err
   505  			},
   506  			WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   507  				options.LabelSelector = labels.SelectorFromSet(labels.Set{"type": podType}).String()
   508  				return f.ClientSet.CoreV1().Pods(ns).Watch(ctx, options)
   509  			},
   510  		},
   511  		&v1.Pod{},
   512  		0,
   513  		cache.ResourceEventHandlerFuncs{
   514  			AddFunc: func(obj interface{}) {
   515  				p, ok := obj.(*v1.Pod)
   516  				if !ok {
   517  					framework.Failf("Failed to cast object %T to Pod", obj)
   518  				}
   519  				go checkPodRunning(p)
   520  			},
   521  			UpdateFunc: func(oldObj, newObj interface{}) {
   522  				p, ok := newObj.(*v1.Pod)
   523  				if !ok {
   524  					framework.Failf("Failed to cast object %T to Pod", newObj)
   525  				}
   526  				go checkPodRunning(p)
   527  			},
   528  		},
   529  	)
   530  	return controller
   531  }
   532  
   533  // createBatchPodSequential creates pods back-to-back in sequence.
   534  func createBatchPodSequential(ctx context.Context, f *framework.Framework, pods []*v1.Pod, podType string) (time.Duration, []e2emetrics.PodLatencyData) {
   535  	var (
   536  		mutex       = &sync.Mutex{}
   537  		watchTimes  = make(map[string]metav1.Time, 0)
   538  		stopCh      = make(chan struct{})
   539  		firstCreate metav1.Time
   540  		lastRunning metav1.Time
   541  		init        = true
   542  	)
   543  	// the controller watches the change of pod status
   544  	controller := newInformerWatchPod(ctx, f, mutex, watchTimes, podType)
   545  	go controller.Run(stopCh)
   546  	defer close(stopCh)
   547  
   548  	batchStartTime := metav1.Now()
   549  	e2eLags := make([]e2emetrics.PodLatencyData, 0)
   550  	createTimes := make(map[string]metav1.Time)
   551  	for _, pod := range pods {
   552  		create := metav1.Now()
   553  		createTimes[pod.Name] = create
   554  		p := e2epod.NewPodClient(f).Create(ctx, pod)
   555  		framework.ExpectNoError(wait.PollUntilContextTimeout(ctx, 2*time.Second, framework.PodStartTimeout, true, podWatchedRunning(watchTimes, p.Name)))
   556  		e2eLags = append(e2eLags,
   557  			e2emetrics.PodLatencyData{Name: pod.Name, Latency: watchTimes[pod.Name].Time.Sub(create.Time)})
   558  	}
   559  
   560  	for name, create := range createTimes {
   561  		watch := watchTimes[name]
   562  		gomega.Expect(watchTimes).To(gomega.HaveKey(name))
   563  		if !init {
   564  			if firstCreate.Time.After(create.Time) {
   565  				firstCreate = create
   566  			}
   567  			if lastRunning.Time.Before(watch.Time) {
   568  				lastRunning = watch
   569  			}
   570  		} else {
   571  			init = false
   572  			firstCreate, lastRunning = create, watch
   573  		}
   574  	}
   575  	batchLag := lastRunning.Time.Sub(batchStartTime.Time)
   576  	sort.Sort(e2emetrics.LatencySlice(e2eLags))
   577  	return batchLag, e2eLags
   578  }
   579  
   580  // podWatchedRunning verifies whether the pod becomes Running, as the watchTime was set by informer
   581  func podWatchedRunning(watchTimes map[string]metav1.Time, podName string) wait.ConditionWithContextFunc {
   582  	return func(ctx context.Context) (done bool, err error) {
   583  		if _, found := watchTimes[podName]; found {
   584  			return true, nil
   585  		}
   586  		return false, nil
   587  	}
   588  }
   589  
   590  // verifyLatencyWithinThreshold verifies whether 50, 90 and 99th percentiles of a latency metric are
   591  // within the expected threshold.
   592  func verifyLatencyWithinThreshold(threshold, actual e2emetrics.LatencyMetric, metricName string) error {
   593  	if actual.Perc50 > threshold.Perc50 {
   594  		return fmt.Errorf("too high %v latency 50th percentile: %v", metricName, actual.Perc50)
   595  	}
   596  	if actual.Perc90 > threshold.Perc90 {
   597  		return fmt.Errorf("too high %v latency 90th percentile: %v", metricName, actual.Perc90)
   598  	}
   599  	if actual.Perc99 > threshold.Perc99 {
   600  		return fmt.Errorf("too high %v latency 99th percentile: %v", metricName, actual.Perc99)
   601  	}
   602  	return nil
   603  }
   604  
   605  // extractLatencyMetrics returns latency metrics for each percentile(50th, 90th and 99th).
   606  func extractLatencyMetrics(latencies []e2emetrics.PodLatencyData) e2emetrics.LatencyMetric {
   607  	length := len(latencies)
   608  	perc50 := latencies[int(math.Ceil(float64(length*50)/100))-1].Latency
   609  	perc90 := latencies[int(math.Ceil(float64(length*90)/100))-1].Latency
   610  	perc99 := latencies[int(math.Ceil(float64(length*99)/100))-1].Latency
   611  	perc100 := latencies[length-1].Latency
   612  	return e2emetrics.LatencyMetric{Perc50: perc50, Perc90: perc90, Perc99: perc99, Perc100: perc100}
   613  }
   614  
   615  // printLatencies outputs latencies to log with readable format.
   616  func printLatencies(latencies []e2emetrics.PodLatencyData, header string) {
   617  	metrics := extractLatencyMetrics(latencies)
   618  	framework.Logf("10%% %s: %v", header, latencies[(len(latencies)*9)/10:])
   619  	framework.Logf("perc50: %v, perc90: %v, perc99: %v", metrics.Perc50, metrics.Perc90, metrics.Perc99)
   620  }
   621  
   622  // logAndVerifyLatency verifies that whether pod creation latency satisfies the limit.
   623  func logAndVerifyLatency(ctx context.Context, batchLag time.Duration, e2eLags []e2emetrics.PodLatencyData, podStartupLimits e2emetrics.LatencyMetric,
   624  	podBatchStartupLimit time.Duration, testInfo map[string]string, isVerify bool) {
   625  	printLatencies(e2eLags, "worst client e2e total latencies")
   626  
   627  	// TODO(coufon): do not trust 'kubelet' metrics since they are not reset!
   628  	latencyMetrics, _ := getPodStartLatency(ctx, kubeletAddr)
   629  	framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics))
   630  
   631  	podStartupLatency := extractLatencyMetrics(e2eLags)
   632  
   633  	// log latency perf data
   634  	logPerfData(getLatencyPerfData(podStartupLatency, testInfo), "latency")
   635  
   636  	if isVerify {
   637  		// check whether e2e pod startup time is acceptable.
   638  		framework.ExpectNoError(verifyLatencyWithinThreshold(podStartupLimits, podStartupLatency, "pod startup"))
   639  
   640  		// check bactch pod creation latency
   641  		if podBatchStartupLimit > 0 {
   642  			if batchLag > podBatchStartupLimit {
   643  				framework.Failf("Batch creation startup time %v exceed limit %v", batchLag, podBatchStartupLimit)
   644  			}
   645  		}
   646  	}
   647  }
   648  
   649  // logThroughput calculates and logs pod creation throughput.
   650  func logPodCreateThroughput(batchLag time.Duration, e2eLags []e2emetrics.PodLatencyData, podsNr int, testInfo map[string]string) {
   651  	logPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo), "throughput")
   652  }