k8s.io/kubernetes@v1.29.3/test/e2e_node/restart_test.go (about) 1 //go:build linux 2 // +build linux 3 4 /* 5 Copyright 2015 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package e2enode 21 22 import ( 23 "context" 24 "fmt" 25 "os/exec" 26 "time" 27 28 v1 "k8s.io/api/core/v1" 29 "k8s.io/apimachinery/pkg/api/resource" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/watch" 32 "k8s.io/client-go/tools/cache" 33 watchtools "k8s.io/client-go/tools/watch" 34 "k8s.io/kubernetes/test/e2e/framework" 35 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 36 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 37 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 38 testutils "k8s.io/kubernetes/test/utils" 39 imageutils "k8s.io/kubernetes/test/utils/image" 40 admissionapi "k8s.io/pod-security-admission/api" 41 42 "github.com/onsi/ginkgo/v2" 43 "github.com/onsi/gomega" 44 "k8s.io/apimachinery/pkg/util/uuid" 45 ) 46 47 type podCondition func(pod *v1.Pod) (bool, error) 48 49 // waitForPodsCondition waits for `podCount` number of pods to match a specific pod condition within a timeout duration. 50 // If the timeout is hit, it returns the list of currently running pods. 51 func waitForPodsCondition(ctx context.Context, f *framework.Framework, podCount int, timeout time.Duration, condition podCondition) (runningPods []*v1.Pod) { 52 for start := time.Now(); time.Since(start) < timeout; time.Sleep(10 * time.Second) { 53 podList, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{}) 54 if err != nil { 55 framework.Logf("Failed to list pods on node: %v", err) 56 continue 57 } 58 59 runningPods = []*v1.Pod{} 60 for i := range podList.Items { 61 pod := podList.Items[i] 62 if r, err := condition(&pod); err != nil || !r { 63 continue 64 } 65 runningPods = append(runningPods, &pod) 66 } 67 framework.Logf("Running pod count %d", len(runningPods)) 68 if len(runningPods) >= podCount { 69 break 70 } 71 } 72 return runningPods 73 } 74 75 var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), framework.WithDisruptive(), func() { 76 const ( 77 // Saturate the node. It's not necessary that all these pods enter 78 // Running/Ready, because we don't know the number of cores in the 79 // test node or default limits applied (if any). It's is essential 80 // that no containers end up in terminated. 100 was chosen because 81 // it's the max pods per node. 82 podCount = 100 83 podCreationInterval = 100 * time.Millisecond 84 recoverTimeout = 5 * time.Minute 85 startTimeout = 3 * time.Minute 86 // restartCount is chosen so even with minPods we exhaust the default 87 // allocation of a /24. 88 minPods = 50 89 restartCount = 6 90 ) 91 92 f := framework.NewDefaultFramework("restart-test") 93 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 94 ginkgo.Context("Container Runtime", func() { 95 ginkgo.Context("Network", func() { 96 ginkgo.It("should recover from ip leak", func(ctx context.Context) { 97 pods := newTestPods(podCount, false, imageutils.GetPauseImageName(), "restart-container-runtime-test") 98 ginkgo.By(fmt.Sprintf("Trying to create %d pods on node", len(pods))) 99 createBatchPodWithRateControl(ctx, f, pods, podCreationInterval) 100 ginkgo.DeferCleanup(deletePodsSync, f, pods) 101 102 // Give the node some time to stabilize, assume pods that enter RunningReady within 103 // startTimeout fit on the node and the node is now saturated. 104 runningPods := waitForPodsCondition(ctx, f, podCount, startTimeout, testutils.PodRunningReadyOrSucceeded) 105 if len(runningPods) < minPods { 106 framework.Failf("Failed to start %d pods, cannot test that restarting container runtime doesn't leak IPs", minPods) 107 } 108 109 for i := 0; i < restartCount; i++ { 110 ginkgo.By(fmt.Sprintf("Killing container runtime iteration %d", i)) 111 // Wait for container runtime to be running 112 var pid int 113 gomega.Eventually(ctx, func() error { 114 runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile) 115 if err != nil { 116 return err 117 } 118 if len(runtimePids) != 1 { 119 return fmt.Errorf("unexpected container runtime pid list: %+v", runtimePids) 120 } 121 // Make sure the container runtime is running, pid got from pid file may not be running. 122 pid = runtimePids[0] 123 if _, err := exec.Command("sudo", "ps", "-p", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil { 124 return err 125 } 126 return nil 127 }, 1*time.Minute, 2*time.Second).Should(gomega.BeNil()) 128 if stdout, err := exec.Command("sudo", "kill", "-SIGKILL", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil { 129 framework.Failf("Failed to kill container runtime (pid=%d): %v, stdout: %q", pid, err, string(stdout)) 130 } 131 // Assume that container runtime will be restarted by systemd/supervisord etc. 132 time.Sleep(20 * time.Second) 133 } 134 135 ginkgo.By("Checking currently Running/Ready pods") 136 postRestartRunningPods := waitForPodsCondition(ctx, f, len(runningPods), recoverTimeout, testutils.PodRunningReadyOrSucceeded) 137 if len(postRestartRunningPods) == 0 { 138 framework.Failf("Failed to start *any* pods after container runtime restart, this might indicate an IP leak") 139 } 140 ginkgo.By("Confirm no containers have terminated") 141 for _, pod := range postRestartRunningPods { 142 if c := testutils.TerminatedContainers(pod); len(c) != 0 { 143 framework.Failf("Pod %q has failed containers %+v after container runtime restart, this might indicate an IP leak", pod.Name, c) 144 } 145 } 146 ginkgo.By(fmt.Sprintf("Container runtime restart test passed with %d pods", len(postRestartRunningPods))) 147 }) 148 }) 149 }) 150 151 ginkgo.Context("Dbus", func() { 152 ginkgo.It("should continue to run pods after a restart", func(ctx context.Context) { 153 // Allow dbus to be restarted on ubuntu 154 err := overlayDbusConfig() 155 framework.ExpectNoError(err) 156 defer func() { 157 err := restoreDbusConfig() 158 framework.ExpectNoError(err) 159 }() 160 161 preRestartPodCount := 2 162 ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", preRestartPodCount)) 163 restartAlwaysPods := newTestPods(preRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test") 164 createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval) 165 ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods) 166 167 allPods := waitForPodsCondition(ctx, f, preRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded) 168 if len(allPods) < preRestartPodCount { 169 framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), preRestartPodCount) 170 } 171 172 ginkgo.By("restarting dbus and systemd", func() { 173 stdout, err := exec.Command("sudo", "systemctl", "reset-failed", "dbus").CombinedOutput() 174 framework.ExpectNoError(err, "Failed to reset dbus start-limit with systemctl: %v, %s", err, string(stdout)) 175 176 stdout, err = exec.Command("sudo", "systemctl", "restart", "dbus").CombinedOutput() 177 framework.ExpectNoError(err, "Failed to restart dbus with systemctl: %v, %s", err, string(stdout)) 178 179 stdout, err = exec.Command("sudo", "systemctl", "daemon-reexec").CombinedOutput() 180 framework.ExpectNoError(err, "Failed to restart systemd with systemctl: %v, %s", err, string(stdout)) 181 }) 182 183 ginkgo.By("verifying restartAlways pods stay running", func() { 184 for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) { 185 postRestartRunningPods := waitForPodsCondition(ctx, f, preRestartPodCount, recoverTimeout, testutils.PodRunningReadyOrSucceeded) 186 if len(postRestartRunningPods) < preRestartPodCount { 187 framework.Failf("fewer pods are running after systemd restart, got %d but expected %d", len(postRestartRunningPods), preRestartPodCount) 188 } 189 } 190 }) 191 192 ginkgo.By("verifying new pods can be started after a dbus restart") 193 postRestartPodCount := 2 194 postRestartPods := newTestPods(postRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test") 195 createBatchPodWithRateControl(ctx, f, postRestartPods, podCreationInterval) 196 ginkgo.DeferCleanup(deletePodsSync, f, postRestartPods) 197 198 allPods = waitForPodsCondition(ctx, f, preRestartPodCount+postRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded) 199 if len(allPods) < preRestartPodCount+postRestartPodCount { 200 framework.Failf("Failed to run pods after restarting dbus, got %d but expected %d", len(allPods), preRestartPodCount+postRestartPodCount) 201 } 202 }) 203 }) 204 205 ginkgo.Context("Kubelet", func() { 206 ginkgo.It("should correctly account for terminated pods after restart", func(ctx context.Context) { 207 node := getLocalNode(ctx, f) 208 cpus := node.Status.Allocatable[v1.ResourceCPU] 209 numCpus := int((&cpus).Value()) 210 if numCpus < 1 { 211 e2eskipper.Skipf("insufficient CPU available for kubelet restart test") 212 } 213 if numCpus > 18 { 214 // 950m * 19 = 1805 CPUs -> not enough to block the scheduling of another 950m pod 215 e2eskipper.Skipf("test will return false positives on a machine with >18 cores") 216 } 217 218 // create as many restartNever pods as there are allocatable CPU 219 // nodes; if they are not correctly accounted for as terminated 220 // later, this will fill up all node capacity 221 podCountRestartNever := numCpus 222 ginkgo.By(fmt.Sprintf("creating %d RestartNever pods on node", podCountRestartNever)) 223 restartNeverPods := newTestPods(podCountRestartNever, false, imageutils.GetE2EImage(imageutils.BusyBox), "restart-kubelet-test") 224 for _, pod := range restartNeverPods { 225 pod.Spec.RestartPolicy = "Never" 226 pod.Spec.Containers[0].Command = []string{"echo", "hi"} 227 pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{ 228 v1.ResourceCPU: resource.MustParse("950m"), // leave a little room for other workloads 229 } 230 } 231 createBatchPodWithRateControl(ctx, f, restartNeverPods, podCreationInterval) 232 ginkgo.DeferCleanup(deletePodsSync, f, restartNeverPods) 233 completedPods := waitForPodsCondition(ctx, f, podCountRestartNever, startTimeout, testutils.PodSucceeded) 234 235 if len(completedPods) < podCountRestartNever { 236 framework.Failf("Failed to run sufficient restartNever pods, got %d but expected %d", len(completedPods), podCountRestartNever) 237 } 238 239 podCountRestartAlways := (numCpus / 2) + 1 240 ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", podCountRestartAlways)) 241 restartAlwaysPods := newTestPods(podCountRestartAlways, false, imageutils.GetPauseImageName(), "restart-kubelet-test") 242 for _, pod := range restartAlwaysPods { 243 pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{ 244 v1.ResourceCPU: resource.MustParse("1"), 245 } 246 } 247 createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval) 248 ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods) 249 250 numAllPods := podCountRestartNever + podCountRestartAlways 251 allPods := waitForPodsCondition(ctx, f, numAllPods, startTimeout, testutils.PodRunningReadyOrSucceeded) 252 if len(allPods) < numAllPods { 253 framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), numAllPods) 254 } 255 256 ginkgo.By("killing and restarting kubelet") 257 // We want to kill the kubelet rather than a graceful restart 258 startKubelet := stopKubelet() 259 startKubelet() 260 261 // If this test works correctly, each of these pods will exit 262 // with no issue. But if accounting breaks, pods scheduled after 263 // restart may think these old pods are consuming CPU and we 264 // will get an OutOfCpu error. 265 ginkgo.By("verifying restartNever pods succeed and restartAlways pods stay running") 266 for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) { 267 postRestartRunningPods := waitForPodsCondition(ctx, f, numAllPods, recoverTimeout, testutils.PodRunningReadyOrSucceeded) 268 if len(postRestartRunningPods) < numAllPods { 269 framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods) 270 } 271 } 272 }) 273 // Regression test for https://issues.k8s.io/116925 274 ginkgo.It("should delete pods which are marked as terminal and have a deletion timestamp set after restart", func(ctx context.Context) { 275 podName := "terminal-restart-pod" + string(uuid.NewUUID()) 276 gracePeriod := int64(30) 277 podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{ 278 ObjectMeta: metav1.ObjectMeta{ 279 Name: podName, 280 }, 281 Spec: v1.PodSpec{ 282 TerminationGracePeriodSeconds: &gracePeriod, 283 RestartPolicy: v1.RestartPolicyNever, 284 Containers: []v1.Container{ 285 { 286 Name: podName, 287 Image: imageutils.GetE2EImage(imageutils.BusyBox), 288 Command: []string{"sh", "-c"}, 289 Args: []string{` 290 sleep 9999999 & 291 PID=$! 292 293 _term () { 294 kill $PID 295 echo "Caught SIGTERM!" 296 } 297 298 trap _term SIGTERM 299 wait $PID 300 trap - TERM 301 302 # Wait for the long running sleep to exit 303 wait $PID 304 305 exit 0 306 `, 307 }, 308 }, 309 }, 310 }, 311 }) 312 ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v) with restart policy: %v", f.Namespace.Name, podName, podSpec.Spec.RestartPolicy)) 313 pod := e2epod.NewPodClient(f).Create(ctx, podSpec) 314 315 ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name)) 316 err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name) 317 framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name) 318 319 w := &cache.ListWatch{ 320 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 321 return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(ctx, options) 322 }, 323 } 324 325 podsList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, metav1.ListOptions{}) 326 framework.ExpectNoError(err, "Failed to list pods in namespace: %s", f.Namespace.Name) 327 328 ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name)) 329 time.Sleep(time.Second) 330 err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod}) 331 framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name) 332 333 ctxUntil, cancel := context.WithTimeout(ctx, f.Timeouts.PodStart) 334 defer cancel() 335 336 ginkgo.By(fmt.Sprintf("Started watch for pod (%v/%v) to enter succeeded phase", pod.Namespace, pod.Name)) 337 _, err = watchtools.Until(ctxUntil, podsList.ResourceVersion, w, func(event watch.Event) (bool, error) { 338 if pod, ok := event.Object.(*v1.Pod); ok { 339 found := pod.ObjectMeta.Name == podName && 340 pod.ObjectMeta.Namespace == f.Namespace.Name && 341 pod.Status.Phase == v1.PodSucceeded 342 if !found { 343 ginkgo.By(fmt.Sprintf("Observed Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase)) 344 return false, nil 345 } 346 ginkgo.By(fmt.Sprintf("Found Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase)) 347 return found, nil 348 } 349 ginkgo.By(fmt.Sprintf("Observed event: %+v", event.Object)) 350 return false, nil 351 }) 352 ginkgo.By("Ended watch for pod entering succeeded phase") 353 framework.ExpectNoError(err, "failed to see event that pod (%s/%s) enter succeeded phase: %v", pod.Namespace, pod.Name, err) 354 355 // As soon as the pod enters succeeded phase (detected by the watch above); kill the kubelet. 356 // This is a bit racy, but the goal is to stop the kubelet before the kubelet is able to delete the pod from the API-sever in order to repro https://issues.k8s.io/116925 357 ginkgo.By("Stopping the kubelet") 358 startKubelet := stopKubelet() 359 // wait until the kubelet health check will fail 360 gomega.Eventually(ctx, func() bool { 361 return kubeletHealthCheck(kubeletHealthCheckURL) 362 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse()) 363 364 ginkgo.By("Starting the kubelet") 365 startKubelet() 366 367 // wait until the kubelet health check will succeed 368 gomega.Eventually(ctx, func() bool { 369 return kubeletHealthCheck(kubeletHealthCheckURL) 370 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue()) 371 372 // Wait for the Kubelet to be ready. 373 gomega.Eventually(ctx, func(ctx context.Context) bool { 374 nodes, err := e2enode.TotalReady(ctx, f.ClientSet) 375 framework.ExpectNoError(err) 376 return nodes == 1 377 }, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue()) 378 379 ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%s/%s) is deleted by kubelet", pod.Namespace, pod.Name)) 380 gomega.Eventually(ctx, func(ctx context.Context) error { 381 return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace) 382 }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil()) 383 }) 384 // Regression test for https://issues.k8s.io/118472 385 ginkgo.It("should force-delete non-admissible pods created and deleted during kubelet restart", func(ctx context.Context) { 386 podName := "rejected-deleted-pod" + string(uuid.NewUUID()) 387 gracePeriod := int64(30) 388 nodeName := getNodeName(ctx, f) 389 podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{ 390 ObjectMeta: metav1.ObjectMeta{ 391 Name: podName, 392 Namespace: f.Namespace.Name, 393 }, 394 Spec: v1.PodSpec{ 395 NodeName: nodeName, 396 NodeSelector: map[string]string{ 397 "this-label": "does-not-exist-on-any-nodes", 398 }, 399 TerminationGracePeriodSeconds: &gracePeriod, 400 RestartPolicy: v1.RestartPolicyNever, 401 Containers: []v1.Container{ 402 { 403 Name: podName, 404 Image: imageutils.GetPauseImageName(), 405 }, 406 }, 407 }, 408 }) 409 ginkgo.By("Stopping the kubelet") 410 startKubelet := stopKubelet() 411 412 // wait until the kubelet health check will fail 413 gomega.Eventually(ctx, func() bool { 414 return kubeletHealthCheck(kubeletHealthCheckURL) 415 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse()) 416 417 // Create the pod bound to the node. It will remain in the Pending 418 // phase as Kubelet is down. 419 ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName)) 420 pod := e2epod.NewPodClient(f).Create(ctx, podSpec) 421 422 ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name)) 423 err := e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod}) 424 framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name) 425 426 // Restart Kubelet so that it proceeds with deletion 427 ginkgo.By("Starting the kubelet") 428 startKubelet() 429 430 // wait until the kubelet health check will succeed 431 gomega.Eventually(ctx, func() bool { 432 return kubeletHealthCheck(kubeletHealthCheckURL) 433 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue()) 434 435 // Wait for the Kubelet to be ready. 436 gomega.Eventually(ctx, func(ctx context.Context) bool { 437 nodes, err := e2enode.TotalReady(ctx, f.ClientSet) 438 framework.ExpectNoError(err) 439 return nodes == 1 440 }, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue()) 441 442 ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name)) 443 gomega.Eventually(ctx, func(ctx context.Context) error { 444 return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace) 445 }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil()) 446 }) 447 // Regression test for an extended scenario for https://issues.k8s.io/118472 448 ginkgo.It("should force-delete non-admissible pods that was admitted and running before kubelet restart", func(ctx context.Context) { 449 nodeLabelKey := "custom-label-key-required" 450 nodeLabelValueRequired := "custom-label-value-required-for-admission" 451 podName := "rejected-deleted-run" + string(uuid.NewUUID()) 452 gracePeriod := int64(30) 453 nodeName := getNodeName(ctx, f) 454 pod := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{ 455 ObjectMeta: metav1.ObjectMeta{ 456 Name: podName, 457 Namespace: f.Namespace.Name, 458 }, 459 Spec: v1.PodSpec{ 460 NodeSelector: map[string]string{ 461 nodeLabelKey: nodeLabelValueRequired, 462 }, 463 NodeName: nodeName, 464 TerminationGracePeriodSeconds: &gracePeriod, 465 RestartPolicy: v1.RestartPolicyNever, 466 Containers: []v1.Container{ 467 { 468 Name: podName, 469 Image: imageutils.GetPauseImageName(), 470 }, 471 }, 472 }, 473 }) 474 475 ginkgo.By(fmt.Sprintf("Adding node label for node (%v) to allow admission of pod (%v/%v)", nodeName, f.Namespace.Name, podName)) 476 e2enode.AddOrUpdateLabelOnNode(f.ClientSet, nodeName, nodeLabelKey, nodeLabelValueRequired) 477 ginkgo.DeferCleanup(func() { e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey) }) 478 479 // Create the pod bound to the node. It will start, but will be rejected after kubelet restart. 480 ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName)) 481 pod = e2epod.NewPodClient(f).Create(ctx, pod) 482 483 ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name)) 484 err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name) 485 framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name) 486 487 ginkgo.By("Stopping the kubelet") 488 startKubelet := stopKubelet() 489 490 // wait until the kubelet health check will fail 491 gomega.Eventually(ctx, func() bool { 492 return kubeletHealthCheck(kubeletHealthCheckURL) 493 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse()) 494 495 ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name)) 496 err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod}) 497 framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name) 498 499 ginkgo.By(fmt.Sprintf("Removing node label for node (%v) to ensure the pod (%v/%v) is rejected after kubelet restart", nodeName, f.Namespace.Name, podName)) 500 e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey) 501 502 // Restart Kubelet so that it proceeds with deletion 503 ginkgo.By("Starting the kubelet") 504 startKubelet() 505 506 // wait until the kubelet health check will succeed 507 gomega.Eventually(ctx, func() bool { 508 return kubeletHealthCheck(kubeletHealthCheckURL) 509 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue()) 510 511 // Wait for the Kubelet to be ready. 512 gomega.Eventually(ctx, func(ctx context.Context) bool { 513 nodes, err := e2enode.TotalReady(ctx, f.ClientSet) 514 framework.ExpectNoError(err) 515 return nodes == 1 516 }, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue()) 517 518 ginkgo.By(fmt.Sprintf("Once Kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name)) 519 gomega.Eventually(ctx, func(ctx context.Context) error { 520 return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace) 521 }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil()) 522 }) 523 }) 524 525 })