k8s.io/kubernetes@v1.29.3/test/e2e_node/restart_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2015 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os/exec"
    26  	"time"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/watch"
    32  	"k8s.io/client-go/tools/cache"
    33  	watchtools "k8s.io/client-go/tools/watch"
    34  	"k8s.io/kubernetes/test/e2e/framework"
    35  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    36  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    37  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    38  	testutils "k8s.io/kubernetes/test/utils"
    39  	imageutils "k8s.io/kubernetes/test/utils/image"
    40  	admissionapi "k8s.io/pod-security-admission/api"
    41  
    42  	"github.com/onsi/ginkgo/v2"
    43  	"github.com/onsi/gomega"
    44  	"k8s.io/apimachinery/pkg/util/uuid"
    45  )
    46  
    47  type podCondition func(pod *v1.Pod) (bool, error)
    48  
    49  // waitForPodsCondition waits for `podCount` number of pods to match a specific pod condition within a timeout duration.
    50  // If the timeout is hit, it returns the list of currently running pods.
    51  func waitForPodsCondition(ctx context.Context, f *framework.Framework, podCount int, timeout time.Duration, condition podCondition) (runningPods []*v1.Pod) {
    52  	for start := time.Now(); time.Since(start) < timeout; time.Sleep(10 * time.Second) {
    53  		podList, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{})
    54  		if err != nil {
    55  			framework.Logf("Failed to list pods on node: %v", err)
    56  			continue
    57  		}
    58  
    59  		runningPods = []*v1.Pod{}
    60  		for i := range podList.Items {
    61  			pod := podList.Items[i]
    62  			if r, err := condition(&pod); err != nil || !r {
    63  				continue
    64  			}
    65  			runningPods = append(runningPods, &pod)
    66  		}
    67  		framework.Logf("Running pod count %d", len(runningPods))
    68  		if len(runningPods) >= podCount {
    69  			break
    70  		}
    71  	}
    72  	return runningPods
    73  }
    74  
    75  var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), framework.WithDisruptive(), func() {
    76  	const (
    77  		// Saturate the node. It's not necessary that all these pods enter
    78  		// Running/Ready, because we don't know the number of cores in the
    79  		// test node or default limits applied (if any). It's is essential
    80  		// that no containers end up in terminated. 100 was chosen because
    81  		// it's the max pods per node.
    82  		podCount            = 100
    83  		podCreationInterval = 100 * time.Millisecond
    84  		recoverTimeout      = 5 * time.Minute
    85  		startTimeout        = 3 * time.Minute
    86  		// restartCount is chosen so even with minPods we exhaust the default
    87  		// allocation of a /24.
    88  		minPods      = 50
    89  		restartCount = 6
    90  	)
    91  
    92  	f := framework.NewDefaultFramework("restart-test")
    93  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    94  	ginkgo.Context("Container Runtime", func() {
    95  		ginkgo.Context("Network", func() {
    96  			ginkgo.It("should recover from ip leak", func(ctx context.Context) {
    97  				pods := newTestPods(podCount, false, imageutils.GetPauseImageName(), "restart-container-runtime-test")
    98  				ginkgo.By(fmt.Sprintf("Trying to create %d pods on node", len(pods)))
    99  				createBatchPodWithRateControl(ctx, f, pods, podCreationInterval)
   100  				ginkgo.DeferCleanup(deletePodsSync, f, pods)
   101  
   102  				// Give the node some time to stabilize, assume pods that enter RunningReady within
   103  				// startTimeout fit on the node and the node is now saturated.
   104  				runningPods := waitForPodsCondition(ctx, f, podCount, startTimeout, testutils.PodRunningReadyOrSucceeded)
   105  				if len(runningPods) < minPods {
   106  					framework.Failf("Failed to start %d pods, cannot test that restarting container runtime doesn't leak IPs", minPods)
   107  				}
   108  
   109  				for i := 0; i < restartCount; i++ {
   110  					ginkgo.By(fmt.Sprintf("Killing container runtime iteration %d", i))
   111  					// Wait for container runtime to be running
   112  					var pid int
   113  					gomega.Eventually(ctx, func() error {
   114  						runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile)
   115  						if err != nil {
   116  							return err
   117  						}
   118  						if len(runtimePids) != 1 {
   119  							return fmt.Errorf("unexpected container runtime pid list: %+v", runtimePids)
   120  						}
   121  						// Make sure the container runtime is running, pid got from pid file may not be running.
   122  						pid = runtimePids[0]
   123  						if _, err := exec.Command("sudo", "ps", "-p", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil {
   124  							return err
   125  						}
   126  						return nil
   127  					}, 1*time.Minute, 2*time.Second).Should(gomega.BeNil())
   128  					if stdout, err := exec.Command("sudo", "kill", "-SIGKILL", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil {
   129  						framework.Failf("Failed to kill container runtime (pid=%d): %v, stdout: %q", pid, err, string(stdout))
   130  					}
   131  					// Assume that container runtime will be restarted by systemd/supervisord etc.
   132  					time.Sleep(20 * time.Second)
   133  				}
   134  
   135  				ginkgo.By("Checking currently Running/Ready pods")
   136  				postRestartRunningPods := waitForPodsCondition(ctx, f, len(runningPods), recoverTimeout, testutils.PodRunningReadyOrSucceeded)
   137  				if len(postRestartRunningPods) == 0 {
   138  					framework.Failf("Failed to start *any* pods after container runtime restart, this might indicate an IP leak")
   139  				}
   140  				ginkgo.By("Confirm no containers have terminated")
   141  				for _, pod := range postRestartRunningPods {
   142  					if c := testutils.TerminatedContainers(pod); len(c) != 0 {
   143  						framework.Failf("Pod %q has failed containers %+v after container runtime restart, this might indicate an IP leak", pod.Name, c)
   144  					}
   145  				}
   146  				ginkgo.By(fmt.Sprintf("Container runtime restart test passed with %d pods", len(postRestartRunningPods)))
   147  			})
   148  		})
   149  	})
   150  
   151  	ginkgo.Context("Dbus", func() {
   152  		ginkgo.It("should continue to run pods after a restart", func(ctx context.Context) {
   153  			// Allow dbus to be restarted on ubuntu
   154  			err := overlayDbusConfig()
   155  			framework.ExpectNoError(err)
   156  			defer func() {
   157  				err := restoreDbusConfig()
   158  				framework.ExpectNoError(err)
   159  			}()
   160  
   161  			preRestartPodCount := 2
   162  			ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", preRestartPodCount))
   163  			restartAlwaysPods := newTestPods(preRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test")
   164  			createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval)
   165  			ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods)
   166  
   167  			allPods := waitForPodsCondition(ctx, f, preRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded)
   168  			if len(allPods) < preRestartPodCount {
   169  				framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), preRestartPodCount)
   170  			}
   171  
   172  			ginkgo.By("restarting dbus and systemd", func() {
   173  				stdout, err := exec.Command("sudo", "systemctl", "reset-failed", "dbus").CombinedOutput()
   174  				framework.ExpectNoError(err, "Failed to reset dbus start-limit with systemctl: %v, %s", err, string(stdout))
   175  
   176  				stdout, err = exec.Command("sudo", "systemctl", "restart", "dbus").CombinedOutput()
   177  				framework.ExpectNoError(err, "Failed to restart dbus with systemctl: %v, %s", err, string(stdout))
   178  
   179  				stdout, err = exec.Command("sudo", "systemctl", "daemon-reexec").CombinedOutput()
   180  				framework.ExpectNoError(err, "Failed to restart systemd with systemctl: %v, %s", err, string(stdout))
   181  			})
   182  
   183  			ginkgo.By("verifying restartAlways pods stay running", func() {
   184  				for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) {
   185  					postRestartRunningPods := waitForPodsCondition(ctx, f, preRestartPodCount, recoverTimeout, testutils.PodRunningReadyOrSucceeded)
   186  					if len(postRestartRunningPods) < preRestartPodCount {
   187  						framework.Failf("fewer pods are running after systemd restart, got %d but expected %d", len(postRestartRunningPods), preRestartPodCount)
   188  					}
   189  				}
   190  			})
   191  
   192  			ginkgo.By("verifying new pods can be started after a dbus restart")
   193  			postRestartPodCount := 2
   194  			postRestartPods := newTestPods(postRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test")
   195  			createBatchPodWithRateControl(ctx, f, postRestartPods, podCreationInterval)
   196  			ginkgo.DeferCleanup(deletePodsSync, f, postRestartPods)
   197  
   198  			allPods = waitForPodsCondition(ctx, f, preRestartPodCount+postRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded)
   199  			if len(allPods) < preRestartPodCount+postRestartPodCount {
   200  				framework.Failf("Failed to run pods after restarting dbus, got %d but expected %d", len(allPods), preRestartPodCount+postRestartPodCount)
   201  			}
   202  		})
   203  	})
   204  
   205  	ginkgo.Context("Kubelet", func() {
   206  		ginkgo.It("should correctly account for terminated pods after restart", func(ctx context.Context) {
   207  			node := getLocalNode(ctx, f)
   208  			cpus := node.Status.Allocatable[v1.ResourceCPU]
   209  			numCpus := int((&cpus).Value())
   210  			if numCpus < 1 {
   211  				e2eskipper.Skipf("insufficient CPU available for kubelet restart test")
   212  			}
   213  			if numCpus > 18 {
   214  				// 950m * 19 = 1805 CPUs -> not enough to block the scheduling of another 950m pod
   215  				e2eskipper.Skipf("test will return false positives on a machine with >18 cores")
   216  			}
   217  
   218  			// create as many restartNever pods as there are allocatable CPU
   219  			// nodes; if they are not correctly accounted for as terminated
   220  			// later, this will fill up all node capacity
   221  			podCountRestartNever := numCpus
   222  			ginkgo.By(fmt.Sprintf("creating %d RestartNever pods on node", podCountRestartNever))
   223  			restartNeverPods := newTestPods(podCountRestartNever, false, imageutils.GetE2EImage(imageutils.BusyBox), "restart-kubelet-test")
   224  			for _, pod := range restartNeverPods {
   225  				pod.Spec.RestartPolicy = "Never"
   226  				pod.Spec.Containers[0].Command = []string{"echo", "hi"}
   227  				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
   228  					v1.ResourceCPU: resource.MustParse("950m"), // leave a little room for other workloads
   229  				}
   230  			}
   231  			createBatchPodWithRateControl(ctx, f, restartNeverPods, podCreationInterval)
   232  			ginkgo.DeferCleanup(deletePodsSync, f, restartNeverPods)
   233  			completedPods := waitForPodsCondition(ctx, f, podCountRestartNever, startTimeout, testutils.PodSucceeded)
   234  
   235  			if len(completedPods) < podCountRestartNever {
   236  				framework.Failf("Failed to run sufficient restartNever pods, got %d but expected %d", len(completedPods), podCountRestartNever)
   237  			}
   238  
   239  			podCountRestartAlways := (numCpus / 2) + 1
   240  			ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", podCountRestartAlways))
   241  			restartAlwaysPods := newTestPods(podCountRestartAlways, false, imageutils.GetPauseImageName(), "restart-kubelet-test")
   242  			for _, pod := range restartAlwaysPods {
   243  				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
   244  					v1.ResourceCPU: resource.MustParse("1"),
   245  				}
   246  			}
   247  			createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval)
   248  			ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods)
   249  
   250  			numAllPods := podCountRestartNever + podCountRestartAlways
   251  			allPods := waitForPodsCondition(ctx, f, numAllPods, startTimeout, testutils.PodRunningReadyOrSucceeded)
   252  			if len(allPods) < numAllPods {
   253  				framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), numAllPods)
   254  			}
   255  
   256  			ginkgo.By("killing and restarting kubelet")
   257  			// We want to kill the kubelet rather than a graceful restart
   258  			startKubelet := stopKubelet()
   259  			startKubelet()
   260  
   261  			// If this test works correctly, each of these pods will exit
   262  			// with no issue. But if accounting breaks, pods scheduled after
   263  			// restart may think these old pods are consuming CPU and we
   264  			// will get an OutOfCpu error.
   265  			ginkgo.By("verifying restartNever pods succeed and restartAlways pods stay running")
   266  			for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) {
   267  				postRestartRunningPods := waitForPodsCondition(ctx, f, numAllPods, recoverTimeout, testutils.PodRunningReadyOrSucceeded)
   268  				if len(postRestartRunningPods) < numAllPods {
   269  					framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods)
   270  				}
   271  			}
   272  		})
   273  		// Regression test for https://issues.k8s.io/116925
   274  		ginkgo.It("should delete pods which are marked as terminal and have a deletion timestamp set after restart", func(ctx context.Context) {
   275  			podName := "terminal-restart-pod" + string(uuid.NewUUID())
   276  			gracePeriod := int64(30)
   277  			podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   278  				ObjectMeta: metav1.ObjectMeta{
   279  					Name: podName,
   280  				},
   281  				Spec: v1.PodSpec{
   282  					TerminationGracePeriodSeconds: &gracePeriod,
   283  					RestartPolicy:                 v1.RestartPolicyNever,
   284  					Containers: []v1.Container{
   285  						{
   286  							Name:    podName,
   287  							Image:   imageutils.GetE2EImage(imageutils.BusyBox),
   288  							Command: []string{"sh", "-c"},
   289  							Args: []string{`
   290  							sleep 9999999 &
   291  							PID=$!
   292  
   293  							_term () {
   294  							   kill $PID
   295  							   echo "Caught SIGTERM!"
   296  							}
   297  
   298  							trap _term SIGTERM
   299  							wait $PID
   300  							trap - TERM
   301  
   302  							# Wait for the long running sleep to exit
   303  							wait $PID
   304  
   305  							exit 0
   306  							`,
   307  							},
   308  						},
   309  					},
   310  				},
   311  			})
   312  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v) with restart policy: %v", f.Namespace.Name, podName, podSpec.Spec.RestartPolicy))
   313  			pod := e2epod.NewPodClient(f).Create(ctx, podSpec)
   314  
   315  			ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name))
   316  			err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)
   317  			framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
   318  
   319  			w := &cache.ListWatch{
   320  				WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   321  					return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(ctx, options)
   322  				},
   323  			}
   324  
   325  			podsList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, metav1.ListOptions{})
   326  			framework.ExpectNoError(err, "Failed to list pods in namespace: %s", f.Namespace.Name)
   327  
   328  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   329  			time.Sleep(time.Second)
   330  			err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   331  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   332  
   333  			ctxUntil, cancel := context.WithTimeout(ctx, f.Timeouts.PodStart)
   334  			defer cancel()
   335  
   336  			ginkgo.By(fmt.Sprintf("Started watch for pod (%v/%v) to enter succeeded phase", pod.Namespace, pod.Name))
   337  			_, err = watchtools.Until(ctxUntil, podsList.ResourceVersion, w, func(event watch.Event) (bool, error) {
   338  				if pod, ok := event.Object.(*v1.Pod); ok {
   339  					found := pod.ObjectMeta.Name == podName &&
   340  						pod.ObjectMeta.Namespace == f.Namespace.Name &&
   341  						pod.Status.Phase == v1.PodSucceeded
   342  					if !found {
   343  						ginkgo.By(fmt.Sprintf("Observed Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase))
   344  						return false, nil
   345  					}
   346  					ginkgo.By(fmt.Sprintf("Found Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase))
   347  					return found, nil
   348  				}
   349  				ginkgo.By(fmt.Sprintf("Observed event: %+v", event.Object))
   350  				return false, nil
   351  			})
   352  			ginkgo.By("Ended watch for pod entering succeeded phase")
   353  			framework.ExpectNoError(err, "failed to see event that pod (%s/%s) enter succeeded phase: %v", pod.Namespace, pod.Name, err)
   354  
   355  			// As soon as the pod enters succeeded phase (detected by the watch above); kill the kubelet.
   356  			// This is a bit racy, but the goal is to stop the kubelet before the kubelet is able to delete the pod from the API-sever in order to repro https://issues.k8s.io/116925
   357  			ginkgo.By("Stopping the kubelet")
   358  			startKubelet := stopKubelet()
   359  			// wait until the kubelet health check will fail
   360  			gomega.Eventually(ctx, func() bool {
   361  				return kubeletHealthCheck(kubeletHealthCheckURL)
   362  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   363  
   364  			ginkgo.By("Starting the kubelet")
   365  			startKubelet()
   366  
   367  			// wait until the kubelet health check will succeed
   368  			gomega.Eventually(ctx, func() bool {
   369  				return kubeletHealthCheck(kubeletHealthCheckURL)
   370  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   371  
   372  			// Wait for the Kubelet to be ready.
   373  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   374  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   375  				framework.ExpectNoError(err)
   376  				return nodes == 1
   377  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   378  
   379  			ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%s/%s) is deleted by kubelet", pod.Namespace, pod.Name))
   380  			gomega.Eventually(ctx, func(ctx context.Context) error {
   381  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   382  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   383  		})
   384  		// Regression test for https://issues.k8s.io/118472
   385  		ginkgo.It("should force-delete non-admissible pods created and deleted during kubelet restart", func(ctx context.Context) {
   386  			podName := "rejected-deleted-pod" + string(uuid.NewUUID())
   387  			gracePeriod := int64(30)
   388  			nodeName := getNodeName(ctx, f)
   389  			podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   390  				ObjectMeta: metav1.ObjectMeta{
   391  					Name:      podName,
   392  					Namespace: f.Namespace.Name,
   393  				},
   394  				Spec: v1.PodSpec{
   395  					NodeName: nodeName,
   396  					NodeSelector: map[string]string{
   397  						"this-label": "does-not-exist-on-any-nodes",
   398  					},
   399  					TerminationGracePeriodSeconds: &gracePeriod,
   400  					RestartPolicy:                 v1.RestartPolicyNever,
   401  					Containers: []v1.Container{
   402  						{
   403  							Name:  podName,
   404  							Image: imageutils.GetPauseImageName(),
   405  						},
   406  					},
   407  				},
   408  			})
   409  			ginkgo.By("Stopping the kubelet")
   410  			startKubelet := stopKubelet()
   411  
   412  			// wait until the kubelet health check will fail
   413  			gomega.Eventually(ctx, func() bool {
   414  				return kubeletHealthCheck(kubeletHealthCheckURL)
   415  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   416  
   417  			// Create the pod bound to the node. It will remain in the Pending
   418  			// phase as Kubelet is down.
   419  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
   420  			pod := e2epod.NewPodClient(f).Create(ctx, podSpec)
   421  
   422  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   423  			err := e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   424  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   425  
   426  			// Restart Kubelet so that it proceeds with deletion
   427  			ginkgo.By("Starting the kubelet")
   428  			startKubelet()
   429  
   430  			// wait until the kubelet health check will succeed
   431  			gomega.Eventually(ctx, func() bool {
   432  				return kubeletHealthCheck(kubeletHealthCheckURL)
   433  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   434  
   435  			// Wait for the Kubelet to be ready.
   436  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   437  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   438  				framework.ExpectNoError(err)
   439  				return nodes == 1
   440  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   441  
   442  			ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
   443  			gomega.Eventually(ctx, func(ctx context.Context) error {
   444  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   445  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   446  		})
   447  		// Regression test for an extended scenario for https://issues.k8s.io/118472
   448  		ginkgo.It("should force-delete non-admissible pods that was admitted and running before kubelet restart", func(ctx context.Context) {
   449  			nodeLabelKey := "custom-label-key-required"
   450  			nodeLabelValueRequired := "custom-label-value-required-for-admission"
   451  			podName := "rejected-deleted-run" + string(uuid.NewUUID())
   452  			gracePeriod := int64(30)
   453  			nodeName := getNodeName(ctx, f)
   454  			pod := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   455  				ObjectMeta: metav1.ObjectMeta{
   456  					Name:      podName,
   457  					Namespace: f.Namespace.Name,
   458  				},
   459  				Spec: v1.PodSpec{
   460  					NodeSelector: map[string]string{
   461  						nodeLabelKey: nodeLabelValueRequired,
   462  					},
   463  					NodeName:                      nodeName,
   464  					TerminationGracePeriodSeconds: &gracePeriod,
   465  					RestartPolicy:                 v1.RestartPolicyNever,
   466  					Containers: []v1.Container{
   467  						{
   468  							Name:  podName,
   469  							Image: imageutils.GetPauseImageName(),
   470  						},
   471  					},
   472  				},
   473  			})
   474  
   475  			ginkgo.By(fmt.Sprintf("Adding node label for node (%v) to allow admission of pod (%v/%v)", nodeName, f.Namespace.Name, podName))
   476  			e2enode.AddOrUpdateLabelOnNode(f.ClientSet, nodeName, nodeLabelKey, nodeLabelValueRequired)
   477  			ginkgo.DeferCleanup(func() { e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey) })
   478  
   479  			// Create the pod bound to the node. It will start, but will be rejected after kubelet restart.
   480  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
   481  			pod = e2epod.NewPodClient(f).Create(ctx, pod)
   482  
   483  			ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name))
   484  			err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)
   485  			framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
   486  
   487  			ginkgo.By("Stopping the kubelet")
   488  			startKubelet := stopKubelet()
   489  
   490  			// wait until the kubelet health check will fail
   491  			gomega.Eventually(ctx, func() bool {
   492  				return kubeletHealthCheck(kubeletHealthCheckURL)
   493  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   494  
   495  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   496  			err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   497  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   498  
   499  			ginkgo.By(fmt.Sprintf("Removing node label for node (%v) to ensure the pod (%v/%v) is rejected after kubelet restart", nodeName, f.Namespace.Name, podName))
   500  			e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey)
   501  
   502  			// Restart Kubelet so that it proceeds with deletion
   503  			ginkgo.By("Starting the kubelet")
   504  			startKubelet()
   505  
   506  			// wait until the kubelet health check will succeed
   507  			gomega.Eventually(ctx, func() bool {
   508  				return kubeletHealthCheck(kubeletHealthCheckURL)
   509  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   510  
   511  			// Wait for the Kubelet to be ready.
   512  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   513  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   514  				framework.ExpectNoError(err)
   515  				return nodes == 1
   516  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   517  
   518  			ginkgo.By(fmt.Sprintf("Once Kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
   519  			gomega.Eventually(ctx, func(ctx context.Context) error {
   520  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   521  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   522  		})
   523  	})
   524  
   525  })