k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/restart_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2015 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os/exec"
    26  	"time"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/watch"
    32  	"k8s.io/client-go/tools/cache"
    33  	watchtools "k8s.io/client-go/tools/watch"
    34  	"k8s.io/kubernetes/test/e2e/framework"
    35  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    36  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    37  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    38  	testutils "k8s.io/kubernetes/test/utils"
    39  	imageutils "k8s.io/kubernetes/test/utils/image"
    40  	admissionapi "k8s.io/pod-security-admission/api"
    41  
    42  	"github.com/onsi/ginkgo/v2"
    43  	"github.com/onsi/gomega"
    44  	"k8s.io/apimachinery/pkg/util/uuid"
    45  )
    46  
    47  type podCondition func(pod *v1.Pod) (bool, error)
    48  
    49  // waitForPodsCondition waits for `podCount` number of pods to match a specific pod condition within a timeout duration.
    50  // If the timeout is hit, it returns the list of currently running pods.
    51  func waitForPodsCondition(ctx context.Context, f *framework.Framework, podCount int, timeout time.Duration, condition podCondition) (runningPods []*v1.Pod) {
    52  	for start := time.Now(); time.Since(start) < timeout; time.Sleep(10 * time.Second) {
    53  		podList, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{})
    54  		if err != nil {
    55  			framework.Logf("Failed to list pods on node: %v", err)
    56  			continue
    57  		}
    58  
    59  		runningPods = []*v1.Pod{}
    60  		for i := range podList.Items {
    61  			pod := podList.Items[i]
    62  			if r, err := condition(&pod); err != nil || !r {
    63  				continue
    64  			}
    65  			runningPods = append(runningPods, &pod)
    66  		}
    67  		framework.Logf("Running pod count %d", len(runningPods))
    68  		if len(runningPods) >= podCount {
    69  			break
    70  		}
    71  	}
    72  	return runningPods
    73  }
    74  
    75  var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), framework.WithDisruptive(), func() {
    76  	const (
    77  		// Saturate the node. It's not necessary that all these pods enter
    78  		// Running/Ready, because we don't know the number of cores in the
    79  		// test node or default limits applied (if any). It's is essential
    80  		// that no containers end up in terminated. 100 was chosen because
    81  		// it's the max pods per node.
    82  		podCount            = 100
    83  		podCreationInterval = 100 * time.Millisecond
    84  		recoverTimeout      = 5 * time.Minute
    85  		startTimeout        = 3 * time.Minute
    86  		// restartCount is chosen so even with minPods we exhaust the default
    87  		// allocation of a /24.
    88  		minPods      = 50
    89  		restartCount = 6
    90  	)
    91  
    92  	f := framework.NewDefaultFramework("restart-test")
    93  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    94  	ginkgo.Context("Container Runtime", func() {
    95  		ginkgo.Context("Network", func() {
    96  			ginkgo.It("should recover from ip leak", func(ctx context.Context) {
    97  				pods := newTestPods(podCount, false, imageutils.GetPauseImageName(), "restart-container-runtime-test")
    98  				ginkgo.By(fmt.Sprintf("Trying to create %d pods on node", len(pods)))
    99  				createBatchPodWithRateControl(ctx, f, pods, podCreationInterval)
   100  				ginkgo.DeferCleanup(deletePodsSync, f, pods)
   101  
   102  				// Give the node some time to stabilize, assume pods that enter RunningReady within
   103  				// startTimeout fit on the node and the node is now saturated.
   104  				runningPods := waitForPodsCondition(ctx, f, podCount, startTimeout, testutils.PodRunningReadyOrSucceeded)
   105  				if len(runningPods) < minPods {
   106  					framework.Failf("Failed to start %d pods, cannot test that restarting container runtime doesn't leak IPs", minPods)
   107  				}
   108  
   109  				for i := 0; i < restartCount; i++ {
   110  					ginkgo.By(fmt.Sprintf("Killing container runtime iteration %d", i))
   111  					// Wait for container runtime to be running
   112  					var pid int
   113  					gomega.Eventually(ctx, func() error {
   114  						runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile)
   115  						if err != nil {
   116  							return err
   117  						}
   118  						if len(runtimePids) != 1 {
   119  							return fmt.Errorf("unexpected container runtime pid list: %+v", runtimePids)
   120  						}
   121  						// Make sure the container runtime is running, pid got from pid file may not be running.
   122  						pid = runtimePids[0]
   123  						if _, err := exec.Command("sudo", "ps", "-p", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil {
   124  							return err
   125  						}
   126  						return nil
   127  					}, 1*time.Minute, 2*time.Second).Should(gomega.BeNil())
   128  					if stdout, err := exec.Command("sudo", "kill", "-SIGKILL", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil {
   129  						framework.Failf("Failed to kill container runtime (pid=%d): %v, stdout: %q", pid, err, string(stdout))
   130  					}
   131  					// Assume that container runtime will be restarted by systemd/supervisord etc.
   132  					time.Sleep(20 * time.Second)
   133  				}
   134  
   135  				ginkgo.By("Checking currently Running/Ready pods")
   136  				postRestartRunningPods := waitForPodsCondition(ctx, f, len(runningPods), recoverTimeout, testutils.PodRunningReadyOrSucceeded)
   137  				if len(postRestartRunningPods) == 0 {
   138  					framework.Failf("Failed to start *any* pods after container runtime restart, this might indicate an IP leak")
   139  				}
   140  				ginkgo.By("Confirm no containers have terminated")
   141  				for _, pod := range postRestartRunningPods {
   142  					if c := testutils.TerminatedContainers(pod); len(c) != 0 {
   143  						framework.Failf("Pod %q has failed containers %+v after container runtime restart, this might indicate an IP leak", pod.Name, c)
   144  					}
   145  				}
   146  				ginkgo.By(fmt.Sprintf("Container runtime restart test passed with %d pods", len(postRestartRunningPods)))
   147  			})
   148  		})
   149  	})
   150  	ginkgo.Context("Kubelet", func() {
   151  		ginkgo.It("should correctly account for terminated pods after restart", func(ctx context.Context) {
   152  			node := getLocalNode(ctx, f)
   153  			cpus := node.Status.Allocatable[v1.ResourceCPU]
   154  			numCpus := int((&cpus).Value())
   155  			if numCpus < 1 {
   156  				e2eskipper.Skipf("insufficient CPU available for kubelet restart test")
   157  			}
   158  			if numCpus > 18 {
   159  				// 950m * 19 = 1805 CPUs -> not enough to block the scheduling of another 950m pod
   160  				e2eskipper.Skipf("test will return false positives on a machine with >18 cores")
   161  			}
   162  
   163  			// create as many restartNever pods as there are allocatable CPU
   164  			// nodes; if they are not correctly accounted for as terminated
   165  			// later, this will fill up all node capacity
   166  			podCountRestartNever := numCpus
   167  			ginkgo.By(fmt.Sprintf("creating %d RestartNever pods on node", podCountRestartNever))
   168  			restartNeverPods := newTestPods(podCountRestartNever, false, imageutils.GetE2EImage(imageutils.BusyBox), "restart-kubelet-test")
   169  			for _, pod := range restartNeverPods {
   170  				pod.Spec.RestartPolicy = "Never"
   171  				pod.Spec.Containers[0].Command = []string{"echo", "hi"}
   172  				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
   173  					v1.ResourceCPU: resource.MustParse("950m"), // leave a little room for other workloads
   174  				}
   175  			}
   176  			createBatchPodWithRateControl(ctx, f, restartNeverPods, podCreationInterval)
   177  			ginkgo.DeferCleanup(deletePodsSync, f, restartNeverPods)
   178  			completedPods := waitForPodsCondition(ctx, f, podCountRestartNever, startTimeout, testutils.PodSucceeded)
   179  
   180  			if len(completedPods) < podCountRestartNever {
   181  				framework.Failf("Failed to run sufficient restartNever pods, got %d but expected %d", len(completedPods), podCountRestartNever)
   182  			}
   183  
   184  			podCountRestartAlways := (numCpus / 2) + 1
   185  			ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", podCountRestartAlways))
   186  			restartAlwaysPods := newTestPods(podCountRestartAlways, false, imageutils.GetPauseImageName(), "restart-kubelet-test")
   187  			for _, pod := range restartAlwaysPods {
   188  				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
   189  					v1.ResourceCPU: resource.MustParse("1"),
   190  				}
   191  			}
   192  			createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval)
   193  			ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods)
   194  
   195  			numAllPods := podCountRestartNever + podCountRestartAlways
   196  			allPods := waitForPodsCondition(ctx, f, numAllPods, startTimeout, testutils.PodRunningReadyOrSucceeded)
   197  			if len(allPods) < numAllPods {
   198  				framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), numAllPods)
   199  			}
   200  
   201  			ginkgo.By("killing and restarting kubelet")
   202  			// We want to kill the kubelet rather than a graceful restart
   203  			startKubelet := stopKubelet()
   204  			startKubelet()
   205  
   206  			// If this test works correctly, each of these pods will exit
   207  			// with no issue. But if accounting breaks, pods scheduled after
   208  			// restart may think these old pods are consuming CPU and we
   209  			// will get an OutOfCpu error.
   210  			ginkgo.By("verifying restartNever pods succeed and restartAlways pods stay running")
   211  			for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) {
   212  				postRestartRunningPods := waitForPodsCondition(ctx, f, numAllPods, recoverTimeout, testutils.PodRunningReadyOrSucceeded)
   213  				if len(postRestartRunningPods) < numAllPods {
   214  					framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods)
   215  				}
   216  			}
   217  		})
   218  		// Regression test for https://issues.k8s.io/116925
   219  		ginkgo.It("should delete pods which are marked as terminal and have a deletion timestamp set after restart", func(ctx context.Context) {
   220  			podName := "terminal-restart-pod" + string(uuid.NewUUID())
   221  			gracePeriod := int64(30)
   222  			podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   223  				ObjectMeta: metav1.ObjectMeta{
   224  					Name: podName,
   225  				},
   226  				Spec: v1.PodSpec{
   227  					TerminationGracePeriodSeconds: &gracePeriod,
   228  					RestartPolicy:                 v1.RestartPolicyNever,
   229  					Containers: []v1.Container{
   230  						{
   231  							Name:    podName,
   232  							Image:   imageutils.GetE2EImage(imageutils.BusyBox),
   233  							Command: []string{"sh", "-c"},
   234  							Args: []string{`
   235  							sleep 9999999 &
   236  							PID=$!
   237  
   238  							_term () {
   239  							   kill $PID
   240  							   echo "Caught SIGTERM!"
   241  							}
   242  
   243  							trap _term SIGTERM
   244  							touch /tmp/trap-marker
   245  
   246  							wait $PID
   247  							trap - TERM
   248  
   249  							# Wait for the long running sleep to exit
   250  							wait $PID
   251  
   252  							exit 0
   253  							`,
   254  							},
   255  							ReadinessProbe: &v1.Probe{
   256  								PeriodSeconds: 1,
   257  								ProbeHandler: v1.ProbeHandler{
   258  									Exec: &v1.ExecAction{
   259  										Command: []string{"/bin/sh", "-c", "cat /tmp/trap-marker"},
   260  									},
   261  								},
   262  							},
   263  						},
   264  					},
   265  				},
   266  			})
   267  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v) with restart policy: %v", f.Namespace.Name, podName, podSpec.Spec.RestartPolicy))
   268  			pod := e2epod.NewPodClient(f).Create(ctx, podSpec)
   269  
   270  			ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running, and with the SIGTERM trap registered", f.Namespace.Name, pod.Name))
   271  			err := e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name, f.Timeouts.PodStart)
   272  			framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
   273  
   274  			w := &cache.ListWatch{
   275  				WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   276  					return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(ctx, options)
   277  				},
   278  			}
   279  
   280  			podsList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, metav1.ListOptions{})
   281  			framework.ExpectNoError(err, "Failed to list pods in namespace: %s", f.Namespace.Name)
   282  
   283  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   284  			err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   285  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   286  
   287  			ctxUntil, cancel := context.WithTimeout(ctx, f.Timeouts.PodStart)
   288  			defer cancel()
   289  
   290  			ginkgo.By(fmt.Sprintf("Started watch for pod (%v/%v) to enter succeeded phase", pod.Namespace, pod.Name))
   291  			_, err = watchtools.Until(ctxUntil, podsList.ResourceVersion, w, func(event watch.Event) (bool, error) {
   292  				if pod, ok := event.Object.(*v1.Pod); ok {
   293  					found := pod.ObjectMeta.Name == podName &&
   294  						pod.ObjectMeta.Namespace == f.Namespace.Name &&
   295  						pod.Status.Phase == v1.PodSucceeded
   296  					if !found {
   297  						ginkgo.By(fmt.Sprintf("Observed Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase))
   298  						return false, nil
   299  					}
   300  					ginkgo.By(fmt.Sprintf("Found Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase))
   301  					return found, nil
   302  				}
   303  				ginkgo.By(fmt.Sprintf("Observed event: %+v", event.Object))
   304  				return false, nil
   305  			})
   306  			ginkgo.By("Ended watch for pod entering succeeded phase")
   307  			framework.ExpectNoError(err, "failed to see event that pod (%s/%s) enter succeeded phase: %v", pod.Namespace, pod.Name, err)
   308  
   309  			// As soon as the pod enters succeeded phase (detected by the watch above); kill the kubelet.
   310  			// This is a bit racy, but the goal is to stop the kubelet before the kubelet is able to delete the pod from the API-sever in order to repro https://issues.k8s.io/116925
   311  			ginkgo.By("Stopping the kubelet")
   312  			startKubelet := stopKubelet()
   313  			// wait until the kubelet health check will fail
   314  			gomega.Eventually(ctx, func() bool {
   315  				return kubeletHealthCheck(kubeletHealthCheckURL)
   316  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   317  
   318  			ginkgo.By("Starting the kubelet")
   319  			startKubelet()
   320  
   321  			// wait until the kubelet health check will succeed
   322  			gomega.Eventually(ctx, func() bool {
   323  				return kubeletHealthCheck(kubeletHealthCheckURL)
   324  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   325  
   326  			// Wait for the Kubelet to be ready.
   327  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   328  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   329  				framework.ExpectNoError(err)
   330  				return nodes == 1
   331  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   332  
   333  			ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%s/%s) is deleted by kubelet", pod.Namespace, pod.Name))
   334  			gomega.Eventually(ctx, func(ctx context.Context) error {
   335  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   336  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   337  		})
   338  		// Regression test for https://issues.k8s.io/118472
   339  		ginkgo.It("should force-delete non-admissible pods created and deleted during kubelet restart", func(ctx context.Context) {
   340  			podName := "rejected-deleted-pod" + string(uuid.NewUUID())
   341  			gracePeriod := int64(30)
   342  			nodeName := getNodeName(ctx, f)
   343  			podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   344  				ObjectMeta: metav1.ObjectMeta{
   345  					Name:      podName,
   346  					Namespace: f.Namespace.Name,
   347  				},
   348  				Spec: v1.PodSpec{
   349  					NodeName: nodeName,
   350  					NodeSelector: map[string]string{
   351  						"this-label": "does-not-exist-on-any-nodes",
   352  					},
   353  					TerminationGracePeriodSeconds: &gracePeriod,
   354  					RestartPolicy:                 v1.RestartPolicyNever,
   355  					Containers: []v1.Container{
   356  						{
   357  							Name:  podName,
   358  							Image: imageutils.GetPauseImageName(),
   359  						},
   360  					},
   361  				},
   362  			})
   363  			ginkgo.By("Stopping the kubelet")
   364  			startKubelet := stopKubelet()
   365  
   366  			// wait until the kubelet health check will fail
   367  			gomega.Eventually(ctx, func() bool {
   368  				return kubeletHealthCheck(kubeletHealthCheckURL)
   369  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   370  
   371  			// Create the pod bound to the node. It will remain in the Pending
   372  			// phase as Kubelet is down.
   373  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
   374  			pod := e2epod.NewPodClient(f).Create(ctx, podSpec)
   375  
   376  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   377  			err := e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   378  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   379  
   380  			// Restart Kubelet so that it proceeds with deletion
   381  			ginkgo.By("Starting the kubelet")
   382  			startKubelet()
   383  
   384  			// wait until the kubelet health check will succeed
   385  			gomega.Eventually(ctx, func() bool {
   386  				return kubeletHealthCheck(kubeletHealthCheckURL)
   387  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   388  
   389  			// Wait for the Kubelet to be ready.
   390  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   391  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   392  				framework.ExpectNoError(err)
   393  				return nodes == 1
   394  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   395  
   396  			ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
   397  			gomega.Eventually(ctx, func(ctx context.Context) error {
   398  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   399  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   400  		})
   401  		// Regression test for an extended scenario for https://issues.k8s.io/118472
   402  		ginkgo.It("should force-delete non-admissible pods that was admitted and running before kubelet restart", func(ctx context.Context) {
   403  			nodeLabelKey := "custom-label-key-required"
   404  			nodeLabelValueRequired := "custom-label-value-required-for-admission"
   405  			podName := "rejected-deleted-run" + string(uuid.NewUUID())
   406  			gracePeriod := int64(30)
   407  			nodeName := getNodeName(ctx, f)
   408  			pod := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   409  				ObjectMeta: metav1.ObjectMeta{
   410  					Name:      podName,
   411  					Namespace: f.Namespace.Name,
   412  				},
   413  				Spec: v1.PodSpec{
   414  					NodeSelector: map[string]string{
   415  						nodeLabelKey: nodeLabelValueRequired,
   416  					},
   417  					NodeName:                      nodeName,
   418  					TerminationGracePeriodSeconds: &gracePeriod,
   419  					RestartPolicy:                 v1.RestartPolicyNever,
   420  					Containers: []v1.Container{
   421  						{
   422  							Name:  podName,
   423  							Image: imageutils.GetPauseImageName(),
   424  						},
   425  					},
   426  				},
   427  			})
   428  
   429  			ginkgo.By(fmt.Sprintf("Adding node label for node (%v) to allow admission of pod (%v/%v)", nodeName, f.Namespace.Name, podName))
   430  			e2enode.AddOrUpdateLabelOnNode(f.ClientSet, nodeName, nodeLabelKey, nodeLabelValueRequired)
   431  			ginkgo.DeferCleanup(func() { e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey) })
   432  
   433  			// Create the pod bound to the node. It will start, but will be rejected after kubelet restart.
   434  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
   435  			pod = e2epod.NewPodClient(f).Create(ctx, pod)
   436  
   437  			ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name))
   438  			err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)
   439  			framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
   440  
   441  			ginkgo.By("Stopping the kubelet")
   442  			startKubelet := stopKubelet()
   443  
   444  			// wait until the kubelet health check will fail
   445  			gomega.Eventually(ctx, func() bool {
   446  				return kubeletHealthCheck(kubeletHealthCheckURL)
   447  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   448  
   449  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   450  			err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   451  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   452  
   453  			ginkgo.By(fmt.Sprintf("Removing node label for node (%v) to ensure the pod (%v/%v) is rejected after kubelet restart", nodeName, f.Namespace.Name, podName))
   454  			e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey)
   455  
   456  			// Restart Kubelet so that it proceeds with deletion
   457  			ginkgo.By("Starting the kubelet")
   458  			startKubelet()
   459  
   460  			// wait until the kubelet health check will succeed
   461  			gomega.Eventually(ctx, func() bool {
   462  				return kubeletHealthCheck(kubeletHealthCheckURL)
   463  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   464  
   465  			// Wait for the Kubelet to be ready.
   466  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   467  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   468  				framework.ExpectNoError(err)
   469  				return nodes == 1
   470  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   471  
   472  			ginkgo.By(fmt.Sprintf("Once Kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
   473  			gomega.Eventually(ctx, func(ctx context.Context) error {
   474  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   475  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   476  		})
   477  	})
   478  
   479  })