k8s.io/kubernetes@v1.29.3/test/e2e_node/node_shutdown_linux_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2021 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os"
    26  	"os/exec"
    27  	"path/filepath"
    28  	"regexp"
    29  	"strconv"
    30  	"time"
    31  
    32  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    33  	"k8s.io/apimachinery/pkg/fields"
    34  	"k8s.io/apimachinery/pkg/watch"
    35  	"k8s.io/client-go/tools/cache"
    36  	watchtools "k8s.io/client-go/tools/watch"
    37  	"k8s.io/kubectl/pkg/util/podutils"
    38  
    39  	admissionapi "k8s.io/pod-security-admission/api"
    40  
    41  	"github.com/onsi/ginkgo/v2"
    42  	"github.com/onsi/gomega"
    43  	"k8s.io/kubernetes/pkg/apis/scheduling"
    44  	"k8s.io/kubernetes/test/e2e/framework"
    45  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    46  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    47  	"k8s.io/kubernetes/test/e2e/nodefeature"
    48  
    49  	"github.com/godbus/dbus/v5"
    50  	v1 "k8s.io/api/core/v1"
    51  	schedulingv1 "k8s.io/api/scheduling/v1"
    52  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    53  	"k8s.io/apimachinery/pkg/util/uuid"
    54  	"k8s.io/apimachinery/pkg/util/wait"
    55  	"k8s.io/kubernetes/pkg/features"
    56  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    57  	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
    58  	testutils "k8s.io/kubernetes/test/utils"
    59  )
    60  
    61  var _ = SIGDescribe("GracefulNodeShutdown", framework.WithSerial(), nodefeature.GracefulNodeShutdown, nodefeature.GracefulNodeShutdownBasedOnPodPriority, func() {
    62  	f := framework.NewDefaultFramework("graceful-node-shutdown")
    63  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    64  
    65  	ginkgo.BeforeEach(func() {
    66  		if _, err := exec.LookPath("systemd-run"); err == nil {
    67  			if version, verr := exec.Command("systemd-run", "--version").Output(); verr == nil {
    68  				// sample output from $ systemd-run --version
    69  				// systemd 245 (245.4-4ubuntu3.13)
    70  				re := regexp.MustCompile(`systemd (\d+)`)
    71  				if match := re.FindSubmatch(version); len(match) > 1 {
    72  					systemdVersion, err := strconv.Atoi(string(match[1]))
    73  					if err != nil {
    74  						framework.Logf("failed to parse systemd version with error %v, 'systemd-run --version' output was [%s]", err, version)
    75  					} else {
    76  						// See comments in issue 107043, this is a known problem for a long time that this feature does not work on older systemd
    77  						// https://github.com/kubernetes/kubernetes/issues/107043#issuecomment-997546598
    78  						if systemdVersion < 245 {
    79  							e2eskipper.Skipf("skipping GracefulNodeShutdown tests as we are running on an old version of systemd : %d", systemdVersion)
    80  						}
    81  					}
    82  				}
    83  			}
    84  		}
    85  	})
    86  
    87  	f.Context("graceful node shutdown when PodDisruptionConditions are enabled", nodefeature.PodDisruptionConditions, func() {
    88  
    89  		const (
    90  			pollInterval            = 1 * time.Second
    91  			podStatusUpdateTimeout  = 30 * time.Second
    92  			nodeStatusUpdateTimeout = 30 * time.Second
    93  			nodeShutdownGracePeriod = 30 * time.Second
    94  		)
    95  
    96  		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
    97  			initialConfig.FeatureGates = map[string]bool{
    98  				string(features.GracefulNodeShutdown):                   true,
    99  				string(features.PodDisruptionConditions):                true,
   100  				string(features.GracefulNodeShutdownBasedOnPodPriority): false,
   101  			}
   102  			initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod}
   103  		})
   104  
   105  		ginkgo.BeforeEach(func(ctx context.Context) {
   106  			ginkgo.By("Wait for the node to be ready")
   107  			waitForNodeReady(ctx)
   108  		})
   109  
   110  		ginkgo.AfterEach(func() {
   111  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   112  			err := emitSignalPrepareForShutdown(false)
   113  			framework.ExpectNoError(err)
   114  		})
   115  
   116  		ginkgo.It("should add the DisruptionTarget pod failure condition to the evicted pods", func(ctx context.Context) {
   117  			nodeName := getNodeName(ctx, f)
   118  			nodeSelector := fields.Set{
   119  				"spec.nodeName": nodeName,
   120  			}.AsSelector().String()
   121  
   122  			// Define test pods
   123  			pods := []*v1.Pod{
   124  				getGracePeriodOverrideTestPod("pod-to-evict-"+string(uuid.NewUUID()), nodeName, 5, ""),
   125  			}
   126  
   127  			ctx, cancel := context.WithCancel(context.Background())
   128  			defer cancel()
   129  
   130  			ginkgo.By("reating batch pods")
   131  			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
   132  
   133  			list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   134  				FieldSelector: nodeSelector,
   135  			})
   136  
   137  			framework.ExpectNoError(err)
   138  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   139  
   140  			list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   141  				FieldSelector: nodeSelector,
   142  			})
   143  			if err != nil {
   144  				framework.Failf("Failed to start batch pod: %q", err)
   145  			}
   146  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   147  
   148  			for _, pod := range list.Items {
   149  				framework.Logf("Pod (%v/%v) status conditions: %q", pod.Namespace, pod.Name, &pod.Status.Conditions)
   150  			}
   151  
   152  			ginkgo.By("Verifying batch pods are running")
   153  			for _, pod := range list.Items {
   154  				if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
   155  					framework.Failf("Failed to start batch pod: (%v/%v)", pod.Namespace, pod.Name)
   156  				}
   157  			}
   158  
   159  			ginkgo.By("Emitting shutdown signal")
   160  			err = emitSignalPrepareForShutdown(true)
   161  			framework.ExpectNoError(err)
   162  
   163  			ginkgo.By("Verifying that all pods are shutdown")
   164  			// All pod should be shutdown
   165  			gomega.Eventually(func() error {
   166  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   167  					FieldSelector: nodeSelector,
   168  				})
   169  				if err != nil {
   170  					return err
   171  				}
   172  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   173  
   174  				for _, pod := range list.Items {
   175  					if !isPodShutdown(&pod) {
   176  						framework.Logf("Expecting pod to be shutdown, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   177  						return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase)
   178  					}
   179  					podDisruptionCondition := e2epod.FindPodConditionByType(&pod.Status, v1.DisruptionTarget)
   180  					if podDisruptionCondition == nil {
   181  						framework.Failf("pod (%v/%v) should have the condition: %q, pod status: %v", pod.Namespace, pod.Name, v1.DisruptionTarget, pod.Status)
   182  					}
   183  				}
   184  				return nil
   185  			}, podStatusUpdateTimeout+(nodeShutdownGracePeriod), pollInterval).Should(gomega.BeNil())
   186  		})
   187  	})
   188  
   189  	ginkgo.Context("when gracefully shutting down", func() {
   190  
   191  		const (
   192  			pollInterval                        = 1 * time.Second
   193  			podStatusUpdateTimeout              = 30 * time.Second
   194  			nodeStatusUpdateTimeout             = 30 * time.Second
   195  			nodeShutdownGracePeriod             = 20 * time.Second
   196  			nodeShutdownGracePeriodCriticalPods = 10 * time.Second
   197  		)
   198  
   199  		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   200  			initialConfig.FeatureGates = map[string]bool{
   201  				string(features.GracefulNodeShutdown):                   true,
   202  				string(features.GracefulNodeShutdownBasedOnPodPriority): false,
   203  				string(features.PodReadyToStartContainersCondition):     true,
   204  			}
   205  			initialConfig.ShutdownGracePeriod = metav1.Duration{Duration: nodeShutdownGracePeriod}
   206  			initialConfig.ShutdownGracePeriodCriticalPods = metav1.Duration{Duration: nodeShutdownGracePeriodCriticalPods}
   207  		})
   208  
   209  		ginkgo.BeforeEach(func(ctx context.Context) {
   210  			ginkgo.By("Wait for the node to be ready")
   211  			waitForNodeReady(ctx)
   212  		})
   213  
   214  		ginkgo.AfterEach(func(ctx context.Context) {
   215  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   216  			err := emitSignalPrepareForShutdown(false)
   217  			framework.ExpectNoError(err)
   218  		})
   219  
   220  		ginkgo.It("should be able to gracefully shutdown pods with various grace periods", func(ctx context.Context) {
   221  			nodeName := getNodeName(ctx, f)
   222  			nodeSelector := fields.Set{
   223  				"spec.nodeName": nodeName,
   224  			}.AsSelector().String()
   225  
   226  			// Define test pods
   227  			pods := []*v1.Pod{
   228  				getGracePeriodOverrideTestPod("period-120-"+string(uuid.NewUUID()), nodeName, 120, ""),
   229  				getGracePeriodOverrideTestPod("period-5-"+string(uuid.NewUUID()), nodeName, 5, ""),
   230  				getGracePeriodOverrideTestPod("period-critical-120-"+string(uuid.NewUUID()), nodeName, 120, scheduling.SystemNodeCritical),
   231  				getGracePeriodOverrideTestPod("period-critical-5-"+string(uuid.NewUUID()), nodeName, 5, scheduling.SystemNodeCritical),
   232  			}
   233  
   234  			ginkgo.By("Creating batch pods")
   235  			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
   236  
   237  			list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   238  				FieldSelector: nodeSelector,
   239  			})
   240  			framework.ExpectNoError(err)
   241  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   242  
   243  			ctx, cancel := context.WithCancel(ctx)
   244  			defer cancel()
   245  			go func() {
   246  				defer ginkgo.GinkgoRecover()
   247  				w := &cache.ListWatch{
   248  					WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   249  						return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(ctx, options)
   250  					},
   251  				}
   252  
   253  				// Setup watch to continuously monitor any pod events and detect invalid pod status updates
   254  				_, err = watchtools.Until(ctx, list.ResourceVersion, w, func(event watch.Event) (bool, error) {
   255  					if pod, ok := event.Object.(*v1.Pod); ok {
   256  						if isPodStatusAffectedByIssue108594(pod) {
   257  							return false, fmt.Errorf("failing test due to detecting invalid pod status")
   258  						}
   259  						// Watch will never terminate (only when the test ends due to context cancellation)
   260  						return false, nil
   261  					}
   262  					return false, nil
   263  				})
   264  
   265  				// Ignore timeout error since the context will be explicitly cancelled and the watch will never return true
   266  				if err != nil && err != wait.ErrWaitTimeout {
   267  					framework.Failf("watch for invalid pod status failed: %v", err.Error())
   268  				}
   269  			}()
   270  
   271  			ginkgo.By("Verifying batch pods are running")
   272  			for _, pod := range list.Items {
   273  				if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
   274  					framework.Failf("Failed to start batch pod: %v", pod.Name)
   275  				}
   276  			}
   277  
   278  			ginkgo.By("Emitting shutdown signal")
   279  			err = emitSignalPrepareForShutdown(true)
   280  			framework.ExpectNoError(err)
   281  
   282  			ginkgo.By("Verifying that non-critical pods are shutdown")
   283  			// Not critical pod should be shutdown
   284  			gomega.Eventually(ctx, func(ctx context.Context) error {
   285  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   286  					FieldSelector: nodeSelector,
   287  				})
   288  				if err != nil {
   289  					return err
   290  				}
   291  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   292  
   293  				for _, pod := range list.Items {
   294  					if kubelettypes.IsCriticalPod(&pod) {
   295  						if isPodShutdown(&pod) {
   296  							framework.Logf("Expecting critical pod (%v/%v) to be running, but it's not currently. Pod Status %+v", pod.Namespace, pod.Name, pod.Status)
   297  							return fmt.Errorf("critical pod (%v/%v) should not be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase)
   298  						}
   299  					} else {
   300  						if !isPodShutdown(&pod) {
   301  							framework.Logf("Expecting non-critical pod (%v/%v) to be shutdown, but it's not currently. Pod Status %+v", pod.Name, pod.Status)
   302  							return fmt.Errorf("pod (%v/%v) should be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase)
   303  						}
   304  					}
   305  				}
   306  				return nil
   307  			}, podStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   308  
   309  			ginkgo.By("Verifying that all pods are shutdown")
   310  			// All pod should be shutdown
   311  			gomega.Eventually(ctx, func(ctx context.Context) error {
   312  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   313  					FieldSelector: nodeSelector,
   314  				})
   315  				if err != nil {
   316  					return err
   317  				}
   318  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   319  
   320  				for _, pod := range list.Items {
   321  					if !isPodShutdown(&pod) {
   322  						framework.Logf("Expecting pod (%v/%v) to be shutdown, but it's not currently: Pod Status %+v", pod.Namespace, pod.Name, pod.Status)
   323  						return fmt.Errorf("pod (%v/%v) should be shutdown, phase: %s", pod.Namespace, pod.Name, pod.Status.Phase)
   324  					}
   325  				}
   326  				return nil
   327  			},
   328  				// Critical pod starts shutdown after (nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods)
   329  				podStatusUpdateTimeout+(nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods),
   330  				pollInterval).Should(gomega.Succeed())
   331  
   332  			ginkgo.By("Verify that all pod ready to start condition are set to false after terminating")
   333  			// All pod ready to start condition should set to false
   334  			gomega.Eventually(ctx, func(ctx context.Context) error {
   335  				list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   336  					FieldSelector: nodeSelector,
   337  				})
   338  				if err != nil {
   339  					return err
   340  				}
   341  				gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)))
   342  				for _, pod := range list.Items {
   343  					if !isPodReadyToStartConditionSetToFalse(&pod) {
   344  						framework.Logf("Expecting pod (%v/%v) 's ready to start condition set to false, "+
   345  							"but it's not currently: Pod Condition %+v", pod.Namespace, pod.Name, pod.Status.Conditions)
   346  						return fmt.Errorf("pod (%v/%v) 's ready to start condition should be false, condition: %s, phase: %s",
   347  							pod.Namespace, pod.Name, pod.Status.Conditions, pod.Status.Phase)
   348  					}
   349  				}
   350  				return nil
   351  			},
   352  			).Should(gomega.Succeed())
   353  		})
   354  
   355  		ginkgo.It("should be able to handle a cancelled shutdown", func(ctx context.Context) {
   356  			ginkgo.By("Emitting Shutdown signal")
   357  			err := emitSignalPrepareForShutdown(true)
   358  			framework.ExpectNoError(err)
   359  			gomega.Eventually(ctx, func(ctx context.Context) error {
   360  				isReady := getNodeReadyStatus(ctx, f)
   361  				if isReady {
   362  					return fmt.Errorf("node did not become shutdown as expected")
   363  				}
   364  				return nil
   365  			}, nodeStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   366  
   367  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   368  			err = emitSignalPrepareForShutdown(false)
   369  			framework.ExpectNoError(err)
   370  			gomega.Eventually(ctx, func(ctx context.Context) error {
   371  				isReady := getNodeReadyStatus(ctx, f)
   372  				if !isReady {
   373  					return fmt.Errorf("node did not recover as expected")
   374  				}
   375  				return nil
   376  			}, nodeStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   377  		})
   378  	})
   379  
   380  	ginkgo.Context("when gracefully shutting down with Pod priority", func() {
   381  
   382  		const (
   383  			pollInterval                 = 1 * time.Second
   384  			podStatusUpdateTimeout       = 30 * time.Second
   385  			priorityClassesCreateTimeout = 10 * time.Second
   386  		)
   387  
   388  		var (
   389  			customClassA = getPriorityClass("custom-class-a", 100000)
   390  			customClassB = getPriorityClass("custom-class-b", 10000)
   391  			customClassC = getPriorityClass("custom-class-c", 1000)
   392  		)
   393  
   394  		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   395  			initialConfig.FeatureGates = map[string]bool{
   396  				string(features.GracefulNodeShutdown):                   true,
   397  				string(features.GracefulNodeShutdownBasedOnPodPriority): true,
   398  			}
   399  			initialConfig.ShutdownGracePeriodByPodPriority = []kubeletconfig.ShutdownGracePeriodByPodPriority{
   400  				{
   401  					Priority:                   scheduling.SystemCriticalPriority,
   402  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   403  				},
   404  				{
   405  					Priority:                   customClassA.Value,
   406  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   407  				},
   408  				{
   409  					Priority:                   customClassB.Value,
   410  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   411  				},
   412  				{
   413  					Priority:                   customClassC.Value,
   414  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   415  				},
   416  				{
   417  					Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
   418  					ShutdownGracePeriodSeconds: int64(podStatusUpdateTimeout / time.Second),
   419  				},
   420  			}
   421  
   422  		})
   423  
   424  		ginkgo.BeforeEach(func(ctx context.Context) {
   425  			ginkgo.By("Wait for the node to be ready")
   426  			waitForNodeReady(ctx)
   427  			customClasses := []*schedulingv1.PriorityClass{customClassA, customClassB, customClassC}
   428  			for _, customClass := range customClasses {
   429  				_, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(ctx, customClass, metav1.CreateOptions{})
   430  				if err != nil && !apierrors.IsAlreadyExists(err) {
   431  					framework.ExpectNoError(err)
   432  				}
   433  			}
   434  			gomega.Eventually(ctx, func(ctx context.Context) error {
   435  				for _, customClass := range customClasses {
   436  					_, err := f.ClientSet.SchedulingV1().PriorityClasses().Get(ctx, customClass.Name, metav1.GetOptions{})
   437  					if err != nil {
   438  						return err
   439  					}
   440  				}
   441  				return nil
   442  			}, priorityClassesCreateTimeout, pollInterval).Should(gomega.Succeed())
   443  		})
   444  
   445  		ginkgo.AfterEach(func() {
   446  			ginkgo.By("Emitting Shutdown false signal; cancelling the shutdown")
   447  			err := emitSignalPrepareForShutdown(false)
   448  			framework.ExpectNoError(err)
   449  		})
   450  
   451  		ginkgo.It("should be able to gracefully shutdown pods with various grace periods", func(ctx context.Context) {
   452  			nodeName := getNodeName(ctx, f)
   453  			nodeSelector := fields.Set{
   454  				"spec.nodeName": nodeName,
   455  			}.AsSelector().String()
   456  
   457  			var (
   458  				period5Name         = "period-5-" + string(uuid.NewUUID())
   459  				periodC5Name        = "period-c-5-" + string(uuid.NewUUID())
   460  				periodB5Name        = "period-b-5-" + string(uuid.NewUUID())
   461  				periodA5Name        = "period-a-5-" + string(uuid.NewUUID())
   462  				periodCritical5Name = "period-critical-5-" + string(uuid.NewUUID())
   463  			)
   464  
   465  			// Define test pods
   466  			pods := []*v1.Pod{
   467  				getGracePeriodOverrideTestPod(period5Name, nodeName, 5, ""),
   468  				getGracePeriodOverrideTestPod(periodC5Name, nodeName, 5, customClassC.Name),
   469  				getGracePeriodOverrideTestPod(periodB5Name, nodeName, 5, customClassB.Name),
   470  				getGracePeriodOverrideTestPod(periodA5Name, nodeName, 5, customClassA.Name),
   471  				getGracePeriodOverrideTestPod(periodCritical5Name, nodeName, 5, scheduling.SystemNodeCritical),
   472  			}
   473  
   474  			// Expected down steps
   475  			downSteps := [][]string{
   476  				{
   477  					period5Name,
   478  				},
   479  				{
   480  					period5Name,
   481  					periodC5Name,
   482  				},
   483  				{
   484  
   485  					period5Name,
   486  					periodC5Name,
   487  					periodB5Name,
   488  				},
   489  				{
   490  					period5Name,
   491  					periodC5Name,
   492  					periodB5Name,
   493  					periodA5Name,
   494  				},
   495  				{
   496  					period5Name,
   497  					periodC5Name,
   498  					periodB5Name,
   499  					periodA5Name,
   500  					periodCritical5Name,
   501  				},
   502  			}
   503  
   504  			ginkgo.By("Creating batch pods")
   505  			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
   506  
   507  			list, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   508  				FieldSelector: nodeSelector,
   509  			})
   510  			framework.ExpectNoError(err)
   511  			gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   512  
   513  			ginkgo.By("Verifying batch pods are running")
   514  			for _, pod := range list.Items {
   515  				if podReady, err := testutils.PodRunningReady(&pod); err != nil || !podReady {
   516  					framework.Failf("Failed to start batch pod: (%v/%v)", pod.Namespace, pod.Name)
   517  				}
   518  			}
   519  
   520  			ginkgo.By("Emitting shutdown signal")
   521  			err = emitSignalPrepareForShutdown(true)
   522  			framework.ExpectNoError(err)
   523  
   524  			ginkgo.By("Verifying that pods are shutdown")
   525  
   526  			for _, step := range downSteps {
   527  				gomega.Eventually(ctx, func(ctx context.Context) error {
   528  					list, err = e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{
   529  						FieldSelector: nodeSelector,
   530  					})
   531  					if err != nil {
   532  						return err
   533  					}
   534  					gomega.Expect(list.Items).To(gomega.HaveLen(len(pods)), "the number of pods is not as expected")
   535  					for _, pod := range list.Items {
   536  						shouldShutdown := false
   537  						for _, podName := range step {
   538  							if podName == pod.Name {
   539  								shouldShutdown = true
   540  								break
   541  							}
   542  						}
   543  						if !shouldShutdown {
   544  							if pod.Status.Phase != v1.PodRunning {
   545  								framework.Logf("Expecting pod to be running, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   546  								return fmt.Errorf("pod (%v/%v) should not be shutdown, phase: %s, reason: %s", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   547  							}
   548  						} else {
   549  							if pod.Status.Reason != podShutdownReason {
   550  								framework.Logf("Expecting pod to be shutdown, but it's not currently. Pod: (%v/%v), Pod Status Phase: %q, Pod Status Reason: %q", pod.Namespace, pod.Name, pod.Status.Phase, pod.Status.Reason)
   551  								for _, item := range list.Items {
   552  									framework.Logf("DEBUG %s, %s, %s", item.Name, item.Status.Phase, pod.Status.Reason)
   553  								}
   554  								return fmt.Errorf("pod (%v/%v) should be shutdown, reason: %s", pod.Namespace, pod.Name, pod.Status.Reason)
   555  							}
   556  						}
   557  					}
   558  					return nil
   559  				}, podStatusUpdateTimeout, pollInterval).Should(gomega.Succeed())
   560  			}
   561  
   562  			ginkgo.By("should have state file")
   563  			stateFile := "/var/lib/kubelet/graceful_node_shutdown_state"
   564  			_, err = os.Stat(stateFile)
   565  			framework.ExpectNoError(err)
   566  		})
   567  	})
   568  })
   569  
   570  func getPriorityClass(name string, value int32) *schedulingv1.PriorityClass {
   571  	priority := &schedulingv1.PriorityClass{
   572  		TypeMeta: metav1.TypeMeta{
   573  			Kind:       "PriorityClass",
   574  			APIVersion: "scheduling.k8s.io/v1",
   575  		},
   576  		ObjectMeta: metav1.ObjectMeta{
   577  			Name: name,
   578  		},
   579  		Value: value,
   580  	}
   581  	return priority
   582  }
   583  
   584  // getGracePeriodOverrideTestPod returns a new Pod object containing a container
   585  // runs a shell script, hangs the process until a SIGTERM signal is received.
   586  // The script waits for $PID to ensure that the process does not exist.
   587  // If priorityClassName is scheduling.SystemNodeCritical, the Pod is marked as critical and a comment is added.
   588  func getGracePeriodOverrideTestPod(name string, node string, gracePeriod int64, priorityClassName string) *v1.Pod {
   589  	pod := &v1.Pod{
   590  		TypeMeta: metav1.TypeMeta{
   591  			Kind:       "Pod",
   592  			APIVersion: "v1",
   593  		},
   594  		ObjectMeta: metav1.ObjectMeta{
   595  			Name: name,
   596  		},
   597  		Spec: v1.PodSpec{
   598  			Containers: []v1.Container{
   599  				{
   600  					Name:    name,
   601  					Image:   busyboxImage,
   602  					Command: []string{"sh", "-c"},
   603  					Args: []string{`
   604  					sleep 9999999 &
   605  					PID=$!
   606  					_term() {
   607  						echo "Caught SIGTERM signal!"
   608  						wait $PID
   609  					}
   610  					
   611  					trap _term SIGTERM
   612  					wait $PID
   613  					`},
   614  				},
   615  			},
   616  			TerminationGracePeriodSeconds: &gracePeriod,
   617  			NodeName:                      node,
   618  		},
   619  	}
   620  	if priorityClassName == scheduling.SystemNodeCritical {
   621  		pod.ObjectMeta.Annotations = map[string]string{
   622  			kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
   623  		}
   624  		pod.Spec.PriorityClassName = priorityClassName
   625  		if !kubelettypes.IsCriticalPod(pod) {
   626  			framework.Failf("pod %q should be a critical pod", pod.Name)
   627  		}
   628  	} else {
   629  		pod.Spec.PriorityClassName = priorityClassName
   630  		if kubelettypes.IsCriticalPod(pod) {
   631  			framework.Failf("pod %q should not be a critical pod", pod.Name)
   632  		}
   633  	}
   634  	return pod
   635  }
   636  
   637  // Emits a fake PrepareForShutdown dbus message on system dbus. Will cause kubelet to react to an active shutdown event.
   638  func emitSignalPrepareForShutdown(b bool) error {
   639  	conn, err := dbus.ConnectSystemBus()
   640  	if err != nil {
   641  		return err
   642  	}
   643  	defer conn.Close()
   644  	return conn.Emit("/org/freedesktop/login1", "org.freedesktop.login1.Manager.PrepareForShutdown", b)
   645  }
   646  
   647  func getNodeReadyStatus(ctx context.Context, f *framework.Framework) bool {
   648  	nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
   649  	framework.ExpectNoError(err)
   650  	// Assuming that there is only one node, because this is a node e2e test.
   651  	gomega.Expect(nodeList.Items).To(gomega.HaveLen(1), "the number of nodes is not as expected")
   652  	return isNodeReady(&nodeList.Items[0])
   653  }
   654  
   655  func systemctlDaemonReload() error {
   656  	cmd := "systemctl daemon-reload"
   657  	_, err := runCommand("sh", "-c", cmd)
   658  	return err
   659  }
   660  
   661  var (
   662  	dbusConfPath = "/etc/systemd/system/dbus.service.d/k8s-graceful-node-shutdown-e2e.conf"
   663  	dbusConf     = `
   664  [Unit]
   665  RefuseManualStart=no
   666  RefuseManualStop=no
   667  [Service]
   668  KillMode=control-group
   669  ExecStop=
   670  `
   671  )
   672  
   673  func overlayDbusConfig() error {
   674  	err := os.MkdirAll(filepath.Dir(dbusConfPath), 0755)
   675  	if err != nil {
   676  		return err
   677  	}
   678  	err = os.WriteFile(dbusConfPath, []byte(dbusConf), 0644)
   679  	if err != nil {
   680  		return err
   681  	}
   682  	return systemctlDaemonReload()
   683  }
   684  
   685  func restoreDbusConfig() error {
   686  	err := os.Remove(dbusConfPath)
   687  	if err != nil {
   688  		return err
   689  	}
   690  	return systemctlDaemonReload()
   691  }
   692  
   693  const (
   694  	// https://github.com/kubernetes/kubernetes/blob/1dd781ddcad454cc381806fbc6bd5eba8fa368d7/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go#L43-L44
   695  	podShutdownReason  = "Terminated"
   696  	podShutdownMessage = "Pod was terminated in response to imminent node shutdown."
   697  )
   698  
   699  func isPodShutdown(pod *v1.Pod) bool {
   700  	if pod == nil {
   701  		return false
   702  	}
   703  
   704  	hasContainersNotReadyCondition := false
   705  	for _, cond := range pod.Status.Conditions {
   706  		if cond.Type == v1.ContainersReady && cond.Status == v1.ConditionFalse {
   707  			hasContainersNotReadyCondition = true
   708  		}
   709  	}
   710  
   711  	return pod.Status.Message == podShutdownMessage && pod.Status.Reason == podShutdownReason && hasContainersNotReadyCondition && pod.Status.Phase == v1.PodFailed
   712  }
   713  
   714  // Pods should never report failed phase and have ready condition = true (https://github.com/kubernetes/kubernetes/issues/108594)
   715  func isPodStatusAffectedByIssue108594(pod *v1.Pod) bool {
   716  	return pod.Status.Phase == v1.PodFailed && podutils.IsPodReady(pod)
   717  }
   718  
   719  func isPodReadyToStartConditionSetToFalse(pod *v1.Pod) bool {
   720  	if pod == nil {
   721  		return false
   722  	}
   723  	readyToStartConditionSetToFalse := false
   724  	for _, cond := range pod.Status.Conditions {
   725  		if cond.Status == v1.ConditionFalse {
   726  			readyToStartConditionSetToFalse = true
   727  		}
   728  	}
   729  
   730  	return readyToStartConditionSetToFalse
   731  }