k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/node_shutdown_linux_test.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/node_shutdown_linux_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2021 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os"
    26  	"os/exec"
    27  	"regexp"
    28  	"strconv"
    29  	"time"
    30  
    31  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    32  	"k8s.io/apimachinery/pkg/fields"
    33  	"k8s.io/apimachinery/pkg/watch"
    34  	"k8s.io/client-go/tools/cache"
    35  	watchtools "k8s.io/client-go/tools/watch"
    36  	"k8s.io/kubectl/pkg/util/podutils"
    37  
    38  	admissionapi "k8s.io/pod-security-admission/api"
    39  
    40  	"github.com/onsi/ginkgo/v2"
    41  	"github.com/onsi/gomega"
    42  	"k8s.io/kubernetes/pkg/apis/scheduling"
    43  	"k8s.io/kubernetes/test/e2e/framework"
    44  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    45  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    46  	"k8s.io/kubernetes/test/e2e/nodefeature"
    47  
    48  	"github.com/godbus/dbus/v5"
    49  	v1 "k8s.io/api/core/v1"
    50  	schedulingv1 "k8s.io/api/scheduling/v1"
    51  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    52  	"k8s.io/apimachinery/pkg/util/uuid"
    53  	"k8s.io/apimachinery/pkg/util/wait"
    54  	"k8s.io/kubernetes/pkg/features"
    55  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    56  	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
    57  	testutils "k8s.io/kubernetes/test/utils"
    58  )
    59  
    60  var _ = SIGDescribe("GracefulNodeShutdown", framework.WithSerial(), nodefeature.GracefulNodeShutdown, nodefeature.GracefulNodeShutdownBasedOnPodPriority, func() {
    61  	f := framework.NewDefaultFramework("graceful-node-shutdown")
    62  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    63  
    64  	ginkgo.BeforeEach(func() {
    65  		if _, err := exec.LookPath("systemd-run"); err == nil {
    66  			if version, verr := exec.Command("systemd-run", "--version").Output(); verr == nil {
    67  				// sample output from $ systemd-run --version
    68  				// systemd 245 (245.4-4ubuntu3.13)
    69  				re := regexp.MustCompile(`systemd (\d+)`)
    70  				if match := re.FindSubmatch(version); len(match) > 1 {
    71  					systemdVersion, err := strconv.Atoi(string(match[1]))
    72  					if err != nil {
    73  						framework.Logf("failed to parse systemd version with error %v, 'systemd-run --version' output was [%s]", err, version)
    74  					} else {
    75  						// See comments in issue 107043, this is a known problem for a long time that this feature does not work on older systemd
    76  						// https://github.com/kubernetes/kubernetes/issues/107043#issuecomment-997546598
    77  						if systemdVersion < 245 {
    78  							e2eskipper.Skipf("skipping GracefulNodeShutdown tests as we are running on an old version of systemd : %d", systemdVersion)
    79  						}
    80  					}
    81  				}
    82  			}
    83  		}
    84  	})
    85  
    86  	f.Context("graceful node shutdown when PodDisruptionConditions are enabled", nodefeature.PodDisruptionConditions, func() {
    87  
    88  		const (
    89  			pollInterval            = 1 * time.Second
    90  			podStatusUpdateTimeout  = 30 * time.Second
    91  			nodeStatusUpdateTimeout = 30 * time.Second
    92  			nodeShutdownGracePeriod = 30 * time.Second
    93  		)
    94  
    95  		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
    96  			initialConfig.FeatureGates = map[string]bool{
    97  				string(features.GracefulNodeShutdown):                   true,
    98  				string(features.PodDisruptionConditions):                true,
    99  				string(features.GracefulNodeShutdownBasedOnPodPriority): false,
   100  			}
   101  			initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod}
   102  		})
   103  
   104  		ginkgo.BeforeEach(func(ctx context.Context) {
   105  			ginkgo.By("Wait for the node to be ready")
   106  			waitForNodeReady(ctx)
   107  		})
   108  
   109  		ginkgo.AfterEach(func() {
   110  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   111  			err := emitSignalPrepareForShutdown(false)
   112  			framework.ExpectNoError(err)
   113  		})
   114  
   115  		ginkgo.It("should add the DisruptionTarget pod failure condition to the evicted pods", func(ctx context.Context) {
   116  			nodeName := getNodeName(ctx, f)
   117  			nodeSelector := fields.Set{
   118  				"spec.nodeName": nodeName,
   119  			}.AsSelector().String()
   120  
   121  			// Define test pods
   122  			pods := []*v1.Pod{
   123  				getGracePeriodOverrideTestPod("pod-to-evict-"+string(uuid.NewUUID()), nodeName, 5, ""),
   124  			}
   125  
   126  			ctx, cancel := context.WithCancel(context.Background())
   127  			defer cancel()
   128  
   129  			ginkgo.By("reating batch pods")
   130  			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
   131  
   132  			list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   133  				FieldSelector: nodeSelector,
   134  			})
   135  
   136  			framework.ExpectNoError(err)
   137  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   138  
   139  			list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   140  				FieldSelector: nodeSelector,
   141  			})
   142  			if err != nil {
   143  				framework.Failf("Failed to start batch pod: %q", err)
   144  			}
   145  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   146  
   147  			for _, pod := range list.Items {
   148  				framework.Logf("Pod (%v/%v) status conditions: %q", pod.Namespace, pod.Name, &pod.Status.Conditions)
   149  			}
   150  
   151  			ginkgo.By("Verifying batch pods are running")
   152  			for _, pod := range list.Items {
   153  				if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
   154  					framework.Failf("Failed to start batch pod: (%v/%v)", pod.Namespace, pod.Name)
   155  				}
   156  			}
   157  
   158  			ginkgo.By("Emitting shutdown signal")
   159  			err = emitSignalPrepareForShutdown(true)
   160  			framework.ExpectNoError(err)
   161  
   162  			ginkgo.By("Verifying that all pods are shutdown")
   163  			// All pod should be shutdown
   164  			gomega.Eventually(func() error {
   165  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   166  					FieldSelector: nodeSelector,
   167  				})
   168  				if err != nil {
   169  					return err
   170  				}
   171  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   172  
   173  				for _, pod := range list.Items {
   174  					if !isPodShutdown(&pod) {
   175  						framework.Logf("Expecting pod to be shutdown, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   176  						return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase)
   177  					}
   178  					podDisruptionCondition := e2epod.FindPodConditionByType(&pod.Status, v1.DisruptionTarget)
   179  					if podDisruptionCondition == nil {
   180  						framework.Failf("pod (%v/%v) should have the condition: %q, pod status: %v", pod.Namespace, pod.Name, v1.DisruptionTarget, pod.Status)
   181  					}
   182  				}
   183  				return nil
   184  			}, podStatusUpdateTimeout+(nodeShutdownGracePeriod), pollInterval).Should(gomega.BeNil())
   185  		})
   186  	})
   187  
   188  	ginkgo.Context("when gracefully shutting down", func() {
   189  
   190  		const (
   191  			pollInterval                        = 1 * time.Second
   192  			podStatusUpdateTimeout              = 30 * time.Second
   193  			nodeStatusUpdateTimeout             = 30 * time.Second
   194  			nodeShutdownGracePeriod             = 20 * time.Second
   195  			nodeShutdownGracePeriodCriticalPods = 10 * time.Second
   196  		)
   197  
   198  		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   199  			initialConfig.FeatureGates = map[string]bool{
   200  				string(features.GracefulNodeShutdown):                   true,
   201  				string(features.GracefulNodeShutdownBasedOnPodPriority): false,
   202  				string(features.PodReadyToStartContainersCondition):     true,
   203  			}
   204  			initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod}
   205  			initialConfig.ShutdownGracePeriodCriticalPods = metav1.Duration{Duration: nodeShutdownGracePeriodCriticalPods}
   206  		})
   207  
   208  		ginkgo.BeforeEach(func(ctx context.Context) {
   209  			ginkgo.By("Wait for the node to be ready")
   210  			waitForNodeReady(ctx)
   211  		})
   212  
   213  		ginkgo.AfterEach(func(ctx context.Context) {
   214  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   215  			err := emitSignalPrepareForShutdown(false)
   216  			framework.ExpectNoError(err)
   217  		})
   218  
   219  		ginkgo.It("should be able to gracefully shutdown pods with various grace periods", func(ctx context.Context) {
   220  			nodeName := getNodeName(ctx, f)
   221  			nodeSelector := fields.Set{
   222  				"spec.nodeName": nodeName,
   223  			}.AsSelector().String()
   224  
   225  			// Define test pods
   226  			pods := []*v1.Pod{
   227  				getGracePeriodOverrideTestPod("period-120-"+string(uuid.NewUUID()), nodeName, 120, ""),
   228  				getGracePeriodOverrideTestPod("period-5-"+string(uuid.NewUUID()), nodeName, 5, ""),
   229  				getGracePeriodOverrideTestPod("period-critical-120-"+string(uuid.NewUUID()), nodeName, 120, scheduling.SystemNodeCritical),
   230  				getGracePeriodOverrideTestPod("period-critical-5-"+string(uuid.NewUUID()), nodeName, 5, scheduling.SystemNodeCritical),
   231  			}
   232  
   233  			ginkgo.By("Creating batch pods")
   234  			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
   235  
   236  			list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   237  				FieldSelector: nodeSelector,
   238  			})
   239  			framework.ExpectNoError(err)
   240  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   241  
   242  			ctx, cancel := context.WithCancel(ctx)
   243  			defer cancel()
   244  			go func() {
   245  				defer ginkgo.GinkgoRecover()
   246  				w := &cache.ListWatch{
   247  					WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   248  						return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(ctx, options)
   249  					},
   250  				}
   251  
   252  				// Setup watch to continuously monitor any pod events and detect invalid pod status updates
   253  				_, err = watchtools.Until(ctx, list.ResourceVersion, w, func(event watch.Event) (bool, error) {
   254  					if pod, ok := event.Object.(*v1.Pod); ok {
   255  						if isPodStatusAffectedByIssue108594(pod) {
   256  							return false, fmt.Errorf("failing test due to detecting invalid pod status")
   257  						}
   258  						// Watch will never terminate (only when the test ends due to context cancellation)
   259  						return false, nil
   260  					}
   261  					return false, nil
   262  				})
   263  
   264  				// Ignore timeout error since the context will be explicitly cancelled and the watch will never return true
   265  				if err != nil && err != wait.ErrWaitTimeout {
   266  					framework.Failf("watch for invalid pod status failed: %v", err.Error())
   267  				}
   268  			}()
   269  
   270  			ginkgo.By("Verifying batch pods are running")
   271  			for _, pod := range list.Items {
   272  				if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
   273  					framework.Failf("Failed to start batch pod: %v", pod.Name)
   274  				}
   275  			}
   276  
   277  			ginkgo.By("Emitting shutdown signal")
   278  			err = emitSignalPrepareForShutdown(true)
   279  			framework.ExpectNoError(err)
   280  
   281  			ginkgo.By("Verifying that non-critical pods are shutdown")
   282  			// Not critical pod should be shutdown
   283  			gomega.Eventually(ctx, func(ctx context.Context) error {
   284  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   285  					FieldSelector: nodeSelector,
   286  				})
   287  				if err != nil {
   288  					return err
   289  				}
   290  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   291  
   292  				for _, pod := range list.Items {
   293  					if kubelettypes.IsCriticalPod(&pod) {
   294  						if isPodShutdown(&pod) {
   295  							framework.Logf("Expecting critical pod (%v/%v) to be running, but it's not currently. Pod Status %+v", pod.Namespace, pod.Name, pod.Status)
   296  							return fmt.Errorf("critical pod (%v/%v) should not be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase)
   297  						}
   298  					} else {
   299  						if !isPodShutdown(&pod) {
   300  							framework.Logf("Expecting non-critical pod (%v/%v) to be shutdown, but it's not currently. Pod Status %+v", pod.Namespace, pod.Name, pod.Status)
   301  							return fmt.Errorf("pod (%v/%v) should be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase)
   302  						}
   303  					}
   304  				}
   305  				return nil
   306  			}, podStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   307  
   308  			ginkgo.By("Verifying that all pods are shutdown")
   309  			// All pod should be shutdown
   310  			gomega.Eventually(ctx, func(ctx context.Context) error {
   311  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   312  					FieldSelector: nodeSelector,
   313  				})
   314  				if err != nil {
   315  					return err
   316  				}
   317  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   318  
   319  				for _, pod := range list.Items {
   320  					if !isPodShutdown(&pod) {
   321  						framework.Logf("Expecting pod (%v/%v) to be shutdown, but it's not currently: Pod Status %+v", pod.Namespace, pod.Name, pod.Status)
   322  						return fmt.Errorf("pod (%v/%v) should be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase)
   323  					}
   324  				}
   325  				return nil
   326  			},
   327  				// Critical pod starts shutdown after (nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods)
   328  				podStatusUpdateTimeout+(nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods),
   329  				pollInterval).Should(gomega.Succeed())
   330  
   331  			ginkgo.By("Verify that all pod ready to start condition are set to false after terminating")
   332  			// All pod ready to start condition should set to false
   333  			gomega.Eventually(ctx, func(ctx context.Context) error {
   334  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   335  					FieldSelector: nodeSelector,
   336  				})
   337  				if err != nil {
   338  					return err
   339  				}
   340  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)))
   341  				for _, pod := range list.Items {
   342  					if !isPodReadyToStartConditionSetToFalse(&pod) {
   343  						framework.Logf("Expecting pod (%v/%v) 's ready to start condition set to false, "+
   344  							"but it's not currently: Pod Condition %+v", pod.Namespace, pod.Name, pod.Status.Conditions)
   345  						return fmt.Errorf("pod (%v/%v) 's ready to start condition should be false, condition: %s, phase: %s",
   346  							pod.Namespace, pod.Name, pod.Status.Conditions, pod.Status.Phase)
   347  					}
   348  				}
   349  				return nil
   350  			},
   351  			).Should(gomega.Succeed())
   352  		})
   353  
   354  		ginkgo.It("should be able to handle a cancelled shutdown", func(ctx context.Context) {
   355  			ginkgo.By("Emitting Shutdown signal")
   356  			err := emitSignalPrepareForShutdown(true)
   357  			framework.ExpectNoError(err)
   358  			gomega.Eventually(ctx, func(ctx context.Context) error {
   359  				isReady := getNodeReadyStatus(ctx, f)
   360  				if isReady {
   361  					return fmt.Errorf("node did not become shutdown as expected")
   362  				}
   363  				return nil
   364  			}, nodeStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   365  
   366  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   367  			err = emitSignalPrepareForShutdown(false)
   368  			framework.ExpectNoError(err)
   369  			gomega.Eventually(ctx, func(ctx context.Context) error {
   370  				isReady := getNodeReadyStatus(ctx, f)
   371  				if !isReady {
   372  					return fmt.Errorf("node did not recover as expected")
   373  				}
   374  				return nil
   375  			}, nodeStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   376  		})
   377  	})
   378  
   379  	framework.Context("when gracefully shutting down with Pod priority", framework.WithFlaky(), func() {
   380  
   381  		const (
   382  			pollInterval                 = 1 * time.Second
   383  			podStatusUpdateTimeout       = 30 * time.Second
   384  			priorityClassesCreateTimeout = 10 * time.Second
   385  		)
   386  
   387  		var (
   388  			customClassA = getPriorityClass("custom-class-a", 100000)
   389  			customClassB = getPriorityClass("custom-class-b", 10000)
   390  			customClassC = getPriorityClass("custom-class-c", 1000)
   391  		)
   392  
   393  		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   394  			initialConfig.FeatureGates = map[string]bool{
   395  				string(features.GracefulNodeShutdown):                   true,
   396  				string(features.GracefulNodeShutdownBasedOnPodPriority): true,
   397  			}
   398  			initialConfig.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{
   399  				{
   400  					Priority:                   scheduling.SystemCriticalPriority,
   401  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   402  				},
   403  				{
   404  					Priority:                   customClassA.Value,
   405  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   406  				},
   407  				{
   408  					Priority:                   customClassB.Value,
   409  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   410  				},
   411  				{
   412  					Priority:                   customClassC.Value,
   413  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   414  				},
   415  				{
   416  					Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
   417  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   418  				},
   419  			}
   420  
   421  		})
   422  
   423  		ginkgo.BeforeEach(func(ctx context.Context) {
   424  			ginkgo.By("Wait for the node to be ready")
   425  			waitForNodeReady(ctx)
   426  			customClasses := []*schedulingv1.PriorityClass{customClassA, customClassB, customClassC}
   427  			for _, customClass := range customClasses {
   428  				_, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(ctx, customClass, metav1.CreateOptions{})
   429  				if err != nil && !apierrors.IsAlreadyExists(err) {
   430  					framework.ExpectNoError(err)
   431  				}
   432  			}
   433  			gomega.Eventually(ctx, func(ctx context.Context) error {
   434  				for _, customClass := range customClasses {
   435  					_, err := f.ClientSet.SchedulingV1().PriorityClasses().Get(ctx, customClass.Name, metav1.GetOptions{})
   436  					if err != nil {
   437  						return err
   438  					}
   439  				}
   440  				return nil
   441  			}, priorityClassesCreateTimeout, pollInterval).Should(gomega.Succeed())
   442  		})
   443  
   444  		ginkgo.AfterEach(func() {
   445  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   446  			err := emitSignalPrepareForShutdown(false)
   447  			framework.ExpectNoError(err)
   448  		})
   449  
   450  		ginkgo.It("should be able to gracefully shutdown pods with various grace periods", func(ctx context.Context) {
   451  			nodeName := getNodeName(ctx, f)
   452  			nodeSelector := fields.Set{
   453  				"spec.nodeName": nodeName,
   454  			}.AsSelector().String()
   455  
   456  			var (
   457  				period5Name         = "period-5-" + string(uuid.NewUUID())
   458  				periodC5Name        = "period-c-5-" + string(uuid.NewUUID())
   459  				periodB5Name        = "period-b-5-" + string(uuid.NewUUID())
   460  				periodA5Name        = "period-a-5-" + string(uuid.NewUUID())
   461  				periodCritical5Name = "period-critical-5-" + string(uuid.NewUUID())
   462  			)
   463  
   464  			// Define test pods
   465  			pods := []*v1.Pod{
   466  				getGracePeriodOverrideTestPod(period5Name, nodeName, 5, ""),
   467  				getGracePeriodOverrideTestPod(periodC5Name, nodeName, 5, customClassC.Name),
   468  				getGracePeriodOverrideTestPod(periodB5Name, nodeName, 5, customClassB.Name),
   469  				getGracePeriodOverrideTestPod(periodA5Name, nodeName, 5, customClassA.Name),
   470  				getGracePeriodOverrideTestPod(periodCritical5Name, nodeName, 5, scheduling.SystemNodeCritical),
   471  			}
   472  
   473  			// Expected down steps
   474  			downSteps := [][]string{
   475  				{
   476  					period5Name,
   477  				},
   478  				{
   479  					period5Name,
   480  					periodC5Name,
   481  				},
   482  				{
   483  
   484  					period5Name,
   485  					periodC5Name,
   486  					periodB5Name,
   487  				},
   488  				{
   489  					period5Name,
   490  					periodC5Name,
   491  					periodB5Name,
   492  					periodA5Name,
   493  				},
   494  				{
   495  					period5Name,
   496  					periodC5Name,
   497  					periodB5Name,
   498  					periodA5Name,
   499  					periodCritical5Name,
   500  				},
   501  			}
   502  
   503  			ginkgo.By("Creating batch pods")
   504  			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
   505  
   506  			list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   507  				FieldSelector: nodeSelector,
   508  			})
   509  			framework.ExpectNoError(err)
   510  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   511  
   512  			ginkgo.By("Verifying batch pods are running")
   513  			for _, pod := range list.Items {
   514  				if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
   515  					framework.Failf("Failed to start batch pod: (%v/%v)", pod.Namespace, pod.Name)
   516  				}
   517  			}
   518  
   519  			ginkgo.By("Emitting shutdown signal")
   520  			err = emitSignalPrepareForShutdown(true)
   521  			framework.ExpectNoError(err)
   522  
   523  			ginkgo.By("Verifying that pods are shutdown")
   524  
   525  			for _, step := range downSteps {
   526  				gomega.Eventually(ctx, func(ctx context.Context) error {
   527  					list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   528  						FieldSelector: nodeSelector,
   529  					})
   530  					if err != nil {
   531  						return err
   532  					}
   533  					gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   534  					for _, pod := range list.Items {
   535  						shouldShutdown := false
   536  						for _, podName := range step {
   537  							if podName == pod.Name {
   538  								shouldShutdown = true
   539  								break
   540  							}
   541  						}
   542  						if !shouldShutdown {
   543  							if pod.Status.Phase != v1.PodRunning {
   544  								framework.Logf("Expecting pod to be running, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   545  								return fmt.Errorf("pod (%v/%v) should not be shutdown, phase: %s, reason: %s", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   546  							}
   547  						} else {
   548  							if pod.Status.Reason != podShutdownReason {
   549  								framework.Logf("Expecting pod to be shutdown, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   550  								for _, item := range list.Items {
   551  									framework.Logf("DEBUG %s, %s, %s", item.Name, item.Status.Phase, pod.Status.Reason)
   552  								}
   553  								return fmt.Errorf("pod (%v/%v) should be shutdown, reason: %s", pod.Namespace, pod.Name, pod.Status.Reason)
   554  							}
   555  						}
   556  					}
   557  					return nil
   558  				}, podStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   559  			}
   560  
   561  			ginkgo.By("should have state file")
   562  			stateFile := "/var/lib/kubelet/graceful_node_shutdown_state"
   563  			_, err = os.Stat(stateFile)
   564  			framework.ExpectNoError(err)
   565  		})
   566  	})
   567  })
   568  
   569  func getPriorityClass(name string, value int32) *schedulingv1.PriorityClass {
   570  	priority := &schedulingv1.PriorityClass{
   571  		TypeMeta: metav1.TypeMeta{
   572  			Kind:       "PriorityClass",
   573  			APIVersion: "scheduling.k8s.io/v1",
   574  		},
   575  		ObjectMeta: metav1.ObjectMeta{
   576  			Name: name,
   577  		},
   578  		Value: value,
   579  	}
   580  	return priority
   581  }
   582  
   583  // getGracePeriodOverrideTestPod returns a new Pod object containing a container
   584  // runs a shell script, hangs the process until a SIGTERM signal is received.
   585  // The script waits for $PID to ensure that the process does not exist.
   586  // If priorityClassName is scheduling.SystemNodeCritical, the Pod is marked as critical and a comment is added.
   587  func getGracePeriodOverrideTestPod(name string, node string, gracePeriod int64, priorityClassName string) *v1.Pod {
   588  	pod := &v1.Pod{
   589  		TypeMeta: metav1.TypeMeta{
   590  			Kind:       "Pod",
   591  			APIVersion: "v1",
   592  		},
   593  		ObjectMeta: metav1.ObjectMeta{
   594  			Name: name,
   595  		},
   596  		Spec: v1.PodSpec{
   597  			Containers: []v1.Container{
   598  				{
   599  					Name:    name,
   600  					Image:   busyboxImage,
   601  					Command: []string{"sh", "-c"},
   602  					Args: []string{`
   603  					sleep 9999999 &
   604  					PID=$!
   605  					_term() {
   606  						echo "Caught SIGTERM signal!"
   607  						wait $PID
   608  					}
   609  					
   610  					trap _term SIGTERM
   611  					wait $PID
   612  					`},
   613  				},
   614  			},
   615  			TerminationGracePeriodSeconds: &gracePeriod,
   616  			NodeName:                      node,
   617  		},
   618  	}
   619  	if priorityClassName == scheduling.SystemNodeCritical {
   620  		pod.ObjectMeta.Annotations = map[string]string{
   621  			kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
   622  		}
   623  		pod.Spec.PriorityClassName = priorityClassName
   624  		if !kubelettypes.IsCriticalPod(pod) {
   625  			framework.Failf("pod %q should be a critical pod", pod.Name)
   626  		}
   627  	} else {
   628  		pod.Spec.PriorityClassName = priorityClassName
   629  		if kubelettypes.IsCriticalPod(pod) {
   630  			framework.Failf("pod %q should not be a critical pod", pod.Name)
   631  		}
   632  	}
   633  	return pod
   634  }
   635  
   636  // Emits a fake PrepareForShutdown dbus message on system dbus. Will cause kubelet to react to an active shutdown event.
   637  func emitSignalPrepareForShutdown(b bool) error {
   638  	conn, err := dbus.ConnectSystemBus()
   639  	if err != nil {
   640  		return err
   641  	}
   642  	defer conn.Close()
   643  	return conn.Emit("/org/freedesktop/login1", "org.freedesktop.login1.Manager.PrepareForShutdown", b)
   644  }
   645  
   646  func getNodeReadyStatus(ctx context.Context, f *framework.Framework) bool {
   647  	nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
   648  	framework.ExpectNoError(err)
   649  	// Assuming that there is only one node, because this is a node e2e test.
   650  	gomega.Expect(nodeList.Items).To(gomega.HaveLen(1), "the number of nodes is not as expected")
   651  	return isNodeReady(&nodeList.Items[0])
   652  }
   653  
   654  const (
   655  	// https://github.com/kubernetes/kubernetes/blob/1dd781ddcad454cc381806fbc6bd5eba8fa368d7/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go#L43-L44
   656  	podShutdownReason  = "Terminated"
   657  	podShutdownMessage = "Pod was terminated in response to imminent node shutdown."
   658  )
   659  
   660  func isPodShutdown(pod *v1.Pod) bool {
   661  	if pod == nil {
   662  		return false
   663  	}
   664  
   665  	hasContainersNotReadyCondition := false
   666  	for _, cond := range pod.Status.Conditions {
   667  		if cond.Type == v1.ContainersReady && cond.Status == v1.ConditionFalse {
   668  			hasContainersNotReadyCondition = true
   669  		}
   670  	}
   671  
   672  	return pod.Status.Message == podShutdownMessage && pod.Status.Reason == podShutdownReason && hasContainersNotReadyCondition && pod.Status.Phase == v1.PodFailed
   673  }
   674  
   675  // Pods should never report failed phase and have ready condition = true (https://github.com/kubernetes/kubernetes/issues/108594)
   676  func isPodStatusAffectedByIssue108594(pod *v1.Pod) bool {
   677  	return pod.Status.Phase == v1.PodFailed && podutils.IsPodReady(pod)
   678  }
   679  
   680  func isPodReadyToStartConditionSetToFalse(pod *v1.Pod) bool {
   681  	if pod == nil {
   682  		return false
   683  	}
   684  	readyToStartConditionSetToFalse := false
   685  	for _, cond := range pod.Status.Conditions {
   686  		if cond.Status == v1.ConditionFalse {
   687  			readyToStartConditionSetToFalse = true
   688  		}
   689  	}
   690  
   691  	return readyToStartConditionSetToFalse
   692  }