k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/node_shutdown_linux_test.go (about) 1 //go:build linux 2 // +build linux 3 4 /* 5 Copyright 2021 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package e2enode 21 22 import ( 23 "context" 24 "fmt" 25 "os" 26 "os/exec" 27 "regexp" 28 "strconv" 29 "time" 30 31 apierrors "k8s.io/apimachinery/pkg/api/errors" 32 "k8s.io/apimachinery/pkg/fields" 33 "k8s.io/apimachinery/pkg/watch" 34 "k8s.io/client-go/tools/cache" 35 watchtools "k8s.io/client-go/tools/watch" 36 "k8s.io/kubectl/pkg/util/podutils" 37 38 admissionapi "k8s.io/pod-security-admission/api" 39 40 "github.com/onsi/ginkgo/v2" 41 "github.com/onsi/gomega" 42 "k8s.io/kubernetes/pkg/apis/scheduling" 43 "k8s.io/kubernetes/test/e2e/framework" 44 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 45 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 46 "k8s.io/kubernetes/test/e2e/nodefeature" 47 48 "github.com/godbus/dbus/v5" 49 v1 "k8s.io/api/core/v1" 50 schedulingv1 "k8s.io/api/scheduling/v1" 51 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 52 "k8s.io/apimachinery/pkg/util/uuid" 53 "k8s.io/apimachinery/pkg/util/wait" 54 "k8s.io/kubernetes/pkg/features" 55 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 56 kubelettypes "k8s.io/kubernetes/pkg/kubelet/types" 57 testutils "k8s.io/kubernetes/test/utils" 58 ) 59 60 var _ = SIGDescribe("GracefulNodeShutdown", framework.WithSerial(), nodefeature.GracefulNodeShutdown, nodefeature.GracefulNodeShutdownBasedOnPodPriority, func() { 61 f := framework.NewDefaultFramework("graceful-node-shutdown") 62 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 63 64 ginkgo.BeforeEach(func() { 65 if _, err := exec.LookPath("systemd-run"); err == nil { 66 if version, verr := exec.Command("systemd-run", "--version").Output(); verr == nil { 67 // sample output from $ systemd-run --version 68 // systemd 245 (245.4-4ubuntu3.13) 69 re := regexp.MustCompile(`systemd (\d+)`) 70 if match := re.FindSubmatch(version); len(match) > 1 { 71 systemdVersion, err := strconv.Atoi(string(match[1])) 72 if err != nil { 73 framework.Logf("failed to parse systemd version with error %v, 'systemd-run --version' output was [%s]", err, version) 74 } else { 75 // See comments in issue 107043, this is a known problem for a long time that this feature does not work on older systemd 76 // https://github.com/kubernetes/kubernetes/issues/107043#issuecomment-997546598 77 if systemdVersion < 245 { 78 e2eskipper.Skipf("skipping GracefulNodeShutdown tests as we are running on an old version of systemd : %d", systemdVersion) 79 } 80 } 81 } 82 } 83 } 84 }) 85 86 f.Context("graceful node shutdown when PodDisruptionConditions are enabled", nodefeature.PodDisruptionConditions, func() { 87 88 const ( 89 pollInterval = 1 * time.Second 90 podStatusUpdateTimeout = 30 * time.Second 91 nodeStatusUpdateTimeout = 30 * time.Second 92 nodeShutdownGracePeriod = 30 * time.Second 93 ) 94 95 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 96 initialConfig.FeatureGates = map[string]bool{ 97 string(features.GracefulNodeShutdown): true, 98 string(features.PodDisruptionConditions): true, 99 string(features.GracefulNodeShutdownBasedOnPodPriority): false, 100 } 101 initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod} 102 }) 103 104 ginkgo.BeforeEach(func(ctx context.Context) { 105 ginkgo.By("Wait for the node to be ready") 106 waitForNodeReady(ctx) 107 }) 108 109 ginkgo.AfterEach(func() { 110 ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown") 111 err := emitSignalPrepareForShutdown(false) 112 framework.ExpectNoError(err) 113 }) 114 115 ginkgo.It("should add the DisruptionTarget pod failure condition to the evicted pods", func(ctx context.Context) { 116 nodeName := getNodeName(ctx, f) 117 nodeSelector := fields.Set{ 118 "spec.nodeName": nodeName, 119 }.AsSelector().String() 120 121 // Define test pods 122 pods := []*v1.Pod{ 123 getGracePeriodOverrideTestPod("pod-to-evict-"+string(uuid.NewUUID()), nodeName, 5, ""), 124 } 125 126 ctx, cancel := context.WithCancel(context.Background()) 127 defer cancel() 128 129 ginkgo.By("reating batch pods") 130 e2epod.NewPodClient(f).CreateBatch(ctx, pods) 131 132 list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 133 FieldSelector: nodeSelector, 134 }) 135 136 framework.ExpectNoError(err) 137 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 138 139 list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 140 FieldSelector: nodeSelector, 141 }) 142 if err != nil { 143 framework.Failf("Failed to start batch pod: %q", err) 144 } 145 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 146 147 for _, pod := range list.Items { 148 framework.Logf("Pod (%v/%v) status conditions: %q", pod.Namespace, pod.Name, &pod.Status.Conditions) 149 } 150 151 ginkgo.By("Verifying batch pods are running") 152 for _, pod := range list.Items { 153 if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady { 154 framework.Failf("Failed to start batch pod: (%v/%v)", pod.Namespace, pod.Name) 155 } 156 } 157 158 ginkgo.By("Emitting shutdown signal") 159 err = emitSignalPrepareForShutdown(true) 160 framework.ExpectNoError(err) 161 162 ginkgo.By("Verifying that all pods are shutdown") 163 // All pod should be shutdown 164 gomega.Eventually(func() error { 165 list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 166 FieldSelector: nodeSelector, 167 }) 168 if err != nil { 169 return err 170 } 171 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 172 173 for _, pod := range list.Items { 174 if !isPodShutdown(&pod) { 175 framework.Logf("Expecting pod to be shutdown, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason) 176 return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase) 177 } 178 podDisruptionCondition := e2epod.FindPodConditionByType(&pod.Status, v1.DisruptionTarget) 179 if podDisruptionCondition == nil { 180 framework.Failf("pod (%v/%v) should have the condition: %q, pod status: %v", pod.Namespace, pod.Name, v1.DisruptionTarget, pod.Status) 181 } 182 } 183 return nil 184 }, podStatusUpdateTimeout+(nodeShutdownGracePeriod), pollInterval).Should(gomega.BeNil()) 185 }) 186 }) 187 188 ginkgo.Context("when gracefully shutting down", func() { 189 190 const ( 191 pollInterval = 1 * time.Second 192 podStatusUpdateTimeout = 30 * time.Second 193 nodeStatusUpdateTimeout = 30 * time.Second 194 nodeShutdownGracePeriod = 20 * time.Second 195 nodeShutdownGracePeriodCriticalPods = 10 * time.Second 196 ) 197 198 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 199 initialConfig.FeatureGates = map[string]bool{ 200 string(features.GracefulNodeShutdown): true, 201 string(features.GracefulNodeShutdownBasedOnPodPriority): false, 202 string(features.PodReadyToStartContainersCondition): true, 203 } 204 initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod} 205 initialConfig.ShutdownGracePeriodCriticalPods = metav1.Duration{Duration: nodeShutdownGracePeriodCriticalPods} 206 }) 207 208 ginkgo.BeforeEach(func(ctx context.Context) { 209 ginkgo.By("Wait for the node to be ready") 210 waitForNodeReady(ctx) 211 }) 212 213 ginkgo.AfterEach(func(ctx context.Context) { 214 ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown") 215 err := emitSignalPrepareForShutdown(false) 216 framework.ExpectNoError(err) 217 }) 218 219 ginkgo.It("should be able to gracefully shutdown pods with various grace periods", func(ctx context.Context) { 220 nodeName := getNodeName(ctx, f) 221 nodeSelector := fields.Set{ 222 "spec.nodeName": nodeName, 223 }.AsSelector().String() 224 225 // Define test pods 226 pods := []*v1.Pod{ 227 getGracePeriodOverrideTestPod("period-120-"+string(uuid.NewUUID()), nodeName, 120, ""), 228 getGracePeriodOverrideTestPod("period-5-"+string(uuid.NewUUID()), nodeName, 5, ""), 229 getGracePeriodOverrideTestPod("period-critical-120-"+string(uuid.NewUUID()), nodeName, 120, scheduling.SystemNodeCritical), 230 getGracePeriodOverrideTestPod("period-critical-5-"+string(uuid.NewUUID()), nodeName, 5, scheduling.SystemNodeCritical), 231 } 232 233 ginkgo.By("Creating batch pods") 234 e2epod.NewPodClient(f).CreateBatch(ctx, pods) 235 236 list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 237 FieldSelector: nodeSelector, 238 }) 239 framework.ExpectNoError(err) 240 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 241 242 ctx, cancel := context.WithCancel(ctx) 243 defer cancel() 244 go func() { 245 defer ginkgo.GinkgoRecover() 246 w := &cache.ListWatch{ 247 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 248 return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(ctx, options) 249 }, 250 } 251 252 // Setup watch to continuously monitor any pod events and detect invalid pod status updates 253 _, err = watchtools.Until(ctx, list.ResourceVersion, w, func(event watch.Event) (bool, error) { 254 if pod, ok := event.Object.(*v1.Pod); ok { 255 if isPodStatusAffectedByIssue108594(pod) { 256 return false, fmt.Errorf("failing test due to detecting invalid pod status") 257 } 258 // Watch will never terminate (only when the test ends due to context cancellation) 259 return false, nil 260 } 261 return false, nil 262 }) 263 264 // Ignore timeout error since the context will be explicitly cancelled and the watch will never return true 265 if err != nil && err != wait.ErrWaitTimeout { 266 framework.Failf("watch for invalid pod status failed: %v", err.Error()) 267 } 268 }() 269 270 ginkgo.By("Verifying batch pods are running") 271 for _, pod := range list.Items { 272 if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady { 273 framework.Failf("Failed to start batch pod: %v", pod.Name) 274 } 275 } 276 277 ginkgo.By("Emitting shutdown signal") 278 err = emitSignalPrepareForShutdown(true) 279 framework.ExpectNoError(err) 280 281 ginkgo.By("Verifying that non-critical pods are shutdown") 282 // Not critical pod should be shutdown 283 gomega.Eventually(ctx, func(ctx context.Context) error { 284 list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 285 FieldSelector: nodeSelector, 286 }) 287 if err != nil { 288 return err 289 } 290 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 291 292 for _, pod := range list.Items { 293 if kubelettypes.IsCriticalPod(&pod) { 294 if isPodShutdown(&pod) { 295 framework.Logf("Expecting critical pod (%v/%v) to be running, but it's not currently. Pod Status %+v", pod.Namespace, pod.Name, pod.Status) 296 return fmt.Errorf("critical pod (%v/%v) should not be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase) 297 } 298 } else { 299 if !isPodShutdown(&pod) { 300 framework.Logf("Expecting non-critical pod (%v/%v) to be shutdown, but it's not currently. Pod Status %+v", pod.Namespace, pod.Name, pod.Status) 301 return fmt.Errorf("pod (%v/%v) should be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase) 302 } 303 } 304 } 305 return nil 306 }, podStatusUpdateTimeout, pollInterval).Should(gomega.Succeed()) 307 308 ginkgo.By("Verifying that all pods are shutdown") 309 // All pod should be shutdown 310 gomega.Eventually(ctx, func(ctx context.Context) error { 311 list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 312 FieldSelector: nodeSelector, 313 }) 314 if err != nil { 315 return err 316 } 317 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 318 319 for _, pod := range list.Items { 320 if !isPodShutdown(&pod) { 321 framework.Logf("Expecting pod (%v/%v) to be shutdown, but it's not currently: Pod Status %+v", pod.Namespace, pod.Name, pod.Status) 322 return fmt.Errorf("pod (%v/%v) should be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase) 323 } 324 } 325 return nil 326 }, 327 // Critical pod starts shutdown after (nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods) 328 podStatusUpdateTimeout+(nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods), 329 pollInterval).Should(gomega.Succeed()) 330 331 ginkgo.By("Verify that all pod ready to start condition are set to false after terminating") 332 // All pod ready to start condition should set to false 333 gomega.Eventually(ctx, func(ctx context.Context) error { 334 list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 335 FieldSelector: nodeSelector, 336 }) 337 if err != nil { 338 return err 339 } 340 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods))) 341 for _, pod := range list.Items { 342 if !isPodReadyToStartConditionSetToFalse(&pod) { 343 framework.Logf("Expecting pod (%v/%v) 's ready to start condition set to false, "+ 344 "but it's not currently: Pod Condition %+v", pod.Namespace, pod.Name, pod.Status.Conditions) 345 return fmt.Errorf("pod (%v/%v) 's ready to start condition should be false, condition: %s, phase: %s", 346 pod.Namespace, pod.Name, pod.Status.Conditions, pod.Status.Phase) 347 } 348 } 349 return nil 350 }, 351 ).Should(gomega.Succeed()) 352 }) 353 354 ginkgo.It("should be able to handle a cancelled shutdown", func(ctx context.Context) { 355 ginkgo.By("Emitting Shutdown signal") 356 err := emitSignalPrepareForShutdown(true) 357 framework.ExpectNoError(err) 358 gomega.Eventually(ctx, func(ctx context.Context) error { 359 isReady := getNodeReadyStatus(ctx, f) 360 if isReady { 361 return fmt.Errorf("node did not become shutdown as expected") 362 } 363 return nil 364 }, nodeStatusUpdateTimeout, pollInterval).Should(gomega.Succeed()) 365 366 ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown") 367 err = emitSignalPrepareForShutdown(false) 368 framework.ExpectNoError(err) 369 gomega.Eventually(ctx, func(ctx context.Context) error { 370 isReady := getNodeReadyStatus(ctx, f) 371 if !isReady { 372 return fmt.Errorf("node did not recover as expected") 373 } 374 return nil 375 }, nodeStatusUpdateTimeout, pollInterval).Should(gomega.Succeed()) 376 }) 377 }) 378 379 framework.Context("when gracefully shutting down with Pod priority", framework.WithFlaky(), func() { 380 381 const ( 382 pollInterval = 1 * time.Second 383 podStatusUpdateTimeout = 30 * time.Second 384 priorityClassesCreateTimeout = 10 * time.Second 385 ) 386 387 var ( 388 customClassA = getPriorityClass("custom-class-a", 100000) 389 customClassB = getPriorityClass("custom-class-b", 10000) 390 customClassC = getPriorityClass("custom-class-c", 1000) 391 ) 392 393 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 394 initialConfig.FeatureGates = map[string]bool{ 395 string(features.GracefulNodeShutdown): true, 396 string(features.GracefulNodeShutdownBasedOnPodPriority): true, 397 } 398 initialConfig.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{ 399 { 400 Priority: scheduling.SystemCriticalPriority, 401 ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second), 402 }, 403 { 404 Priority: customClassA.Value, 405 ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second), 406 }, 407 { 408 Priority: customClassB.Value, 409 ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second), 410 }, 411 { 412 Priority: customClassC.Value, 413 ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second), 414 }, 415 { 416 Priority: scheduling.DefaultPriorityWhenNoDefaultClassExists, 417 ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second), 418 }, 419 } 420 421 }) 422 423 ginkgo.BeforeEach(func(ctx context.Context) { 424 ginkgo.By("Wait for the node to be ready") 425 waitForNodeReady(ctx) 426 customClasses := []*schedulingv1.PriorityClass{customClassA, customClassB, customClassC} 427 for _, customClass := range customClasses { 428 _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(ctx, customClass, metav1.CreateOptions{}) 429 if err != nil && !apierrors.IsAlreadyExists(err) { 430 framework.ExpectNoError(err) 431 } 432 } 433 gomega.Eventually(ctx, func(ctx context.Context) error { 434 for _, customClass := range customClasses { 435 _, err := f.ClientSet.SchedulingV1().PriorityClasses().Get(ctx, customClass.Name, metav1.GetOptions{}) 436 if err != nil { 437 return err 438 } 439 } 440 return nil 441 }, priorityClassesCreateTimeout, pollInterval).Should(gomega.Succeed()) 442 }) 443 444 ginkgo.AfterEach(func() { 445 ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown") 446 err := emitSignalPrepareForShutdown(false) 447 framework.ExpectNoError(err) 448 }) 449 450 ginkgo.It("should be able to gracefully shutdown pods with various grace periods", func(ctx context.Context) { 451 nodeName := getNodeName(ctx, f) 452 nodeSelector := fields.Set{ 453 "spec.nodeName": nodeName, 454 }.AsSelector().String() 455 456 var ( 457 period5Name = "period-5-" + string(uuid.NewUUID()) 458 periodC5Name = "period-c-5-" + string(uuid.NewUUID()) 459 periodB5Name = "period-b-5-" + string(uuid.NewUUID()) 460 periodA5Name = "period-a-5-" + string(uuid.NewUUID()) 461 periodCritical5Name = "period-critical-5-" + string(uuid.NewUUID()) 462 ) 463 464 // Define test pods 465 pods := []*v1.Pod{ 466 getGracePeriodOverrideTestPod(period5Name, nodeName, 5, ""), 467 getGracePeriodOverrideTestPod(periodC5Name, nodeName, 5, customClassC.Name), 468 getGracePeriodOverrideTestPod(periodB5Name, nodeName, 5, customClassB.Name), 469 getGracePeriodOverrideTestPod(periodA5Name, nodeName, 5, customClassA.Name), 470 getGracePeriodOverrideTestPod(periodCritical5Name, nodeName, 5, scheduling.SystemNodeCritical), 471 } 472 473 // Expected down steps 474 downSteps := [][]string{ 475 { 476 period5Name, 477 }, 478 { 479 period5Name, 480 periodC5Name, 481 }, 482 { 483 484 period5Name, 485 periodC5Name, 486 periodB5Name, 487 }, 488 { 489 period5Name, 490 periodC5Name, 491 periodB5Name, 492 periodA5Name, 493 }, 494 { 495 period5Name, 496 periodC5Name, 497 periodB5Name, 498 periodA5Name, 499 periodCritical5Name, 500 }, 501 } 502 503 ginkgo.By("Creating batch pods") 504 e2epod.NewPodClient(f).CreateBatch(ctx, pods) 505 506 list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 507 FieldSelector: nodeSelector, 508 }) 509 framework.ExpectNoError(err) 510 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 511 512 ginkgo.By("Verifying batch pods are running") 513 for _, pod := range list.Items { 514 if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady { 515 framework.Failf("Failed to start batch pod: (%v/%v)", pod.Namespace, pod.Name) 516 } 517 } 518 519 ginkgo.By("Emitting shutdown signal") 520 err = emitSignalPrepareForShutdown(true) 521 framework.ExpectNoError(err) 522 523 ginkgo.By("Verifying that pods are shutdown") 524 525 for _, step := range downSteps { 526 gomega.Eventually(ctx, func(ctx context.Context) error { 527 list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{ 528 FieldSelector: nodeSelector, 529 }) 530 if err != nil { 531 return err 532 } 533 gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected") 534 for _, pod := range list.Items { 535 shouldShutdown := false 536 for _, podName := range step { 537 if podName == pod.Name { 538 shouldShutdown = true 539 break 540 } 541 } 542 if !shouldShutdown { 543 if pod.Status.Phase != v1.PodRunning { 544 framework.Logf("Expecting pod to be running, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason) 545 return fmt.Errorf("pod (%v/%v) should not be shutdown, phase: %s, reason: %s", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason) 546 } 547 } else { 548 if pod.Status.Reason != podShutdownReason { 549 framework.Logf("Expecting pod to be shutdown, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason) 550 for _, item := range list.Items { 551 framework.Logf("DEBUG %s, %s, %s", item.Name, item.Status.Phase, pod.Status.Reason) 552 } 553 return fmt.Errorf("pod (%v/%v) should be shutdown, reason: %s", pod.Namespace, pod.Name, pod.Status.Reason) 554 } 555 } 556 } 557 return nil 558 }, podStatusUpdateTimeout, pollInterval).Should(gomega.Succeed()) 559 } 560 561 ginkgo.By("should have state file") 562 stateFile := "/var/lib/kubelet/graceful_node_shutdown_state" 563 _, err = os.Stat(stateFile) 564 framework.ExpectNoError(err) 565 }) 566 }) 567 }) 568 569 func getPriorityClass(name string, value int32) *schedulingv1.PriorityClass { 570 priority := &schedulingv1.PriorityClass{ 571 TypeMeta: metav1.TypeMeta{ 572 Kind: "PriorityClass", 573 APIVersion: "scheduling.k8s.io/v1", 574 }, 575 ObjectMeta: metav1.ObjectMeta{ 576 Name: name, 577 }, 578 Value: value, 579 } 580 return priority 581 } 582 583 // getGracePeriodOverrideTestPod returns a new Pod object containing a container 584 // runs a shell script, hangs the process until a SIGTERM signal is received. 585 // The script waits for $PID to ensure that the process does not exist. 586 // If priorityClassName is scheduling.SystemNodeCritical, the Pod is marked as critical and a comment is added. 587 func getGracePeriodOverrideTestPod(name string, node string, gracePeriod int64, priorityClassName string) *v1.Pod { 588 pod := &v1.Pod{ 589 TypeMeta: metav1.TypeMeta{ 590 Kind: "Pod", 591 APIVersion: "v1", 592 }, 593 ObjectMeta: metav1.ObjectMeta{ 594 Name: name, 595 }, 596 Spec: v1.PodSpec{ 597 Containers: []v1.Container{ 598 { 599 Name: name, 600 Image: busyboxImage, 601 Command: []string{"sh", "-c"}, 602 Args: []string{` 603 sleep 9999999 & 604 PID=$! 605 _term() { 606 echo "Caught SIGTERM signal!" 607 wait $PID 608 } 609 610 trap _term SIGTERM 611 wait $PID 612 `}, 613 }, 614 }, 615 TerminationGracePeriodSeconds: &gracePeriod, 616 NodeName: node, 617 }, 618 } 619 if priorityClassName == scheduling.SystemNodeCritical { 620 pod.ObjectMeta.Annotations = map[string]string{ 621 kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource, 622 } 623 pod.Spec.PriorityClassName = priorityClassName 624 if !kubelettypes.IsCriticalPod(pod) { 625 framework.Failf("pod %q should be a critical pod", pod.Name) 626 } 627 } else { 628 pod.Spec.PriorityClassName = priorityClassName 629 if kubelettypes.IsCriticalPod(pod) { 630 framework.Failf("pod %q should not be a critical pod", pod.Name) 631 } 632 } 633 return pod 634 } 635 636 // Emits a fake PrepareForShutdown dbus message on system dbus. Will cause kubelet to react to an active shutdown event. 637 func emitSignalPrepareForShutdown(b bool) error { 638 conn, err := dbus.ConnectSystemBus() 639 if err != nil { 640 return err 641 } 642 defer conn.Close() 643 return conn.Emit("/org/freedesktop/login1", "org.freedesktop.login1.Manager.PrepareForShutdown", b) 644 } 645 646 func getNodeReadyStatus(ctx context.Context, f *framework.Framework) bool { 647 nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 648 framework.ExpectNoError(err) 649 // Assuming that there is only one node, because this is a node e2e test. 650 gomega.Expect(nodeList.Items).To(gomega.HaveLen(1), "the number of nodes is not as expected") 651 return isNodeReady(&nodeList.Items[0]) 652 } 653 654 const ( 655 // https://github.com/kubernetes/kubernetes/blob/1dd781ddcad454cc381806fbc6bd5eba8fa368d7/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go#L43-L44 656 podShutdownReason = "Terminated" 657 podShutdownMessage = "Pod was terminated in response to imminent node shutdown." 658 ) 659 660 func isPodShutdown(pod *v1.Pod) bool { 661 if pod == nil { 662 return false 663 } 664 665 hasContainersNotReadyCondition := false 666 for _, cond := range pod.Status.Conditions { 667 if cond.Type == v1.ContainersReady && cond.Status == v1.ConditionFalse { 668 hasContainersNotReadyCondition = true 669 } 670 } 671 672 return pod.Status.Message == podShutdownMessage && pod.Status.Reason == podShutdownReason && hasContainersNotReadyCondition && pod.Status.Phase == v1.PodFailed 673 } 674 675 // Pods should never report failed phase and have ready condition = true (https://github.com/kubernetes/kubernetes/issues/108594) 676 func isPodStatusAffectedByIssue108594(pod *v1.Pod) bool { 677 return pod.Status.Phase == v1.PodFailed && podutils.IsPodReady(pod) 678 } 679 680 func isPodReadyToStartConditionSetToFalse(pod *v1.Pod) bool { 681 if pod == nil { 682 return false 683 } 684 readyToStartConditionSetToFalse := false 685 for _, cond := range pod.Status.Conditions { 686 if cond.Status == v1.ConditionFalse { 687 readyToStartConditionSetToFalse = true 688 } 689 } 690 691 return readyToStartConditionSetToFalse 692 }