k8s.io/kubernetes@v1.29.3/test/e2e/storage/non_graceful_node_shutdown.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package storage
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	"github.com/onsi/ginkgo/v2"
    24  	appsv1 "k8s.io/api/apps/v1"
    25  	v1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/fields"
    28  	"k8s.io/apimachinery/pkg/labels"
    29  	clientset "k8s.io/client-go/kubernetes"
    30  	"k8s.io/kubernetes/test/e2e/feature"
    31  	"k8s.io/kubernetes/test/e2e/framework"
    32  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    33  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    34  	e2epv "k8s.io/kubernetes/test/e2e/framework/pv"
    35  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    36  	"k8s.io/kubernetes/test/e2e/storage/drivers"
    37  	storageframework "k8s.io/kubernetes/test/e2e/storage/framework"
    38  	"k8s.io/kubernetes/test/e2e/storage/utils"
    39  	imageutils "k8s.io/kubernetes/test/utils/image"
    40  	admissionapi "k8s.io/pod-security-admission/api"
    41  )
    42  
    43  /*
    44  This test assumes the following:
    45  - The infra is GCP.
    46  - NodeOutOfServiceVolumeDetach feature is enabled.
    47  
    48  This test performs the following:
    49  - Deploys a gce-pd csi driver
    50  - Creates a gce-pd csi storage class
    51  - Creates a pvc using the created gce-pd storage class
    52  - Creates an app deployment with replica count 1 and uses the created pvc for volume
    53  - Shutdowns the kubelet of node on which the app pod is scheduled.
    54    This shutdown is a non graceful shutdown as by default the grace period is 0 on Kubelet.
    55  - Adds `out-of-service` taint on the node which is shut down.
    56  - Verifies that pod gets immediately scheduled to a different node and gets into running and ready state.
    57  - Starts the kubelet back.
    58  - Removes the `out-of-service` taint from the node.
    59  */
    60  
    61  var _ = utils.SIGDescribe(feature.NodeOutOfServiceVolumeDetach, framework.WithDisruptive(), "[LinuxOnly] NonGracefulNodeShutdown", func() {
    62  	var (
    63  		c  clientset.Interface
    64  		ns string
    65  	)
    66  	f := framework.NewDefaultFramework("non-graceful-shutdown")
    67  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    68  
    69  	ginkgo.BeforeEach(func(ctx context.Context) {
    70  		c = f.ClientSet
    71  		ns = f.Namespace.Name
    72  		e2eskipper.SkipUnlessProviderIs("gce")
    73  		nodeList, err := e2enode.GetReadySchedulableNodes(ctx, c)
    74  		if err != nil {
    75  			framework.Logf("Failed to list node: %v", err)
    76  		}
    77  		if len(nodeList.Items) < 2 {
    78  			ginkgo.Skip("At least 2 nodes are required to run the test")
    79  		}
    80  	})
    81  
    82  	ginkgo.Describe("[NonGracefulNodeShutdown] pod that uses a persistent volume via gce pd driver", func() {
    83  		ginkgo.It("should get immediately rescheduled to a different node after non graceful node shutdown ", func(ctx context.Context) {
    84  			// Install gce pd csi driver
    85  			ginkgo.By("deploying csi gce-pd driver")
    86  			driver := drivers.InitGcePDCSIDriver()
    87  			config := driver.PrepareTest(ctx, f)
    88  			dDriver, ok := driver.(storageframework.DynamicPVTestDriver)
    89  			if !ok {
    90  				e2eskipper.Skipf("csi driver expected DynamicPVTestDriver but got %v", driver)
    91  			}
    92  			ginkgo.By("Creating a gce-pd storage class")
    93  			sc := dDriver.GetDynamicProvisionStorageClass(ctx, config, "")
    94  			_, err := c.StorageV1().StorageClasses().Create(ctx, sc, metav1.CreateOptions{})
    95  			framework.ExpectNoError(err, "failed to create a storageclass")
    96  			scName := &sc.Name
    97  
    98  			deploymentName := "sts-pod-gcepd"
    99  			podLabels := map[string]string{"app": deploymentName}
   100  			pod := createAndVerifyStatefulDeployment(ctx, scName, deploymentName, ns, podLabels, c)
   101  			oldNodeName := pod.Spec.NodeName
   102  
   103  			ginkgo.By("Stopping the kubelet non gracefully for pod" + pod.Name)
   104  			utils.KubeletCommand(ctx, utils.KStop, c, pod)
   105  			ginkgo.DeferCleanup(utils.KubeletCommand, utils.KStart, c, pod)
   106  
   107  			ginkgo.By("Adding out of service taint on node " + oldNodeName)
   108  			// taint this node as out-of-service node
   109  			taint := v1.Taint{
   110  				Key:    v1.TaintNodeOutOfService,
   111  				Effect: v1.TaintEffectNoExecute,
   112  			}
   113  			e2enode.AddOrUpdateTaintOnNode(ctx, c, oldNodeName, taint)
   114  			ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, c, oldNodeName, taint)
   115  
   116  			ginkgo.By(fmt.Sprintf("Checking if the pod %s got rescheduled to a new node", pod.Name))
   117  			labelSelectorStr := labels.SelectorFromSet(podLabels).String()
   118  			podListOpts := metav1.ListOptions{
   119  				LabelSelector: labelSelectorStr,
   120  				FieldSelector: fields.OneTermNotEqualSelector("spec.nodeName", oldNodeName).String(),
   121  			}
   122  			_, err = e2epod.WaitForPods(ctx, c, ns, podListOpts, e2epod.Range{MinMatching: 1}, framework.PodStartTimeout, "be running and ready", e2epod.RunningReady)
   123  			framework.ExpectNoError(err)
   124  
   125  			// Bring the node back online and remove the taint
   126  			utils.KubeletCommand(ctx, utils.KStart, c, pod)
   127  			e2enode.RemoveTaintOffNode(ctx, c, oldNodeName, taint)
   128  
   129  			// Verify that a pod gets scheduled to the older node that was terminated non gracefully and now
   130  			// is back online
   131  			newDeploymentName := "sts-pod-gcepd-new"
   132  			newPodLabels := map[string]string{"app": newDeploymentName}
   133  			createAndVerifyStatefulDeployment(ctx, scName, newDeploymentName, ns, newPodLabels, c)
   134  		})
   135  	})
   136  })
   137  
   138  // createAndVerifyStatefulDeployment creates:
   139  // i) a pvc using the provided storage class
   140  // ii) creates a deployment with replica count 1 using the created pvc
   141  // iii) finally verifies if the pod is running and ready and returns the pod object
   142  func createAndVerifyStatefulDeployment(ctx context.Context, scName *string, name, ns string, podLabels map[string]string,
   143  	c clientset.Interface) *v1.Pod {
   144  	ginkgo.By("Creating a pvc using the storage class " + *scName)
   145  	pvc := e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{
   146  		StorageClassName: scName,
   147  	}, ns)
   148  	gotPVC, err := c.CoreV1().PersistentVolumeClaims(ns).Create(ctx, pvc, metav1.CreateOptions{})
   149  	framework.ExpectNoError(err, "failed to create a persistent volume claim")
   150  
   151  	ginkgo.By("Creating a deployment using the pvc " + pvc.Name)
   152  	dep := makeDeployment(ns, name, gotPVC.Name, podLabels)
   153  	_, err = c.AppsV1().Deployments(ns).Create(ctx, dep, metav1.CreateOptions{})
   154  	framework.ExpectNoError(err, "failed to created the deployment")
   155  
   156  	ginkgo.By(fmt.Sprintf("Ensuring that the pod of deployment %s is running and ready", dep.Name))
   157  	labelSelector := labels.SelectorFromSet(labels.Set(podLabels))
   158  	podList, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, ns, labelSelector, 1, framework.PodStartTimeout)
   159  	framework.ExpectNoError(err)
   160  	pod := &podList.Items[0]
   161  	return pod
   162  }
   163  
   164  func makeDeployment(ns, name, pvcName string, labels map[string]string) *appsv1.Deployment {
   165  	ssReplicas := int32(1)
   166  	return &appsv1.Deployment{
   167  		ObjectMeta: metav1.ObjectMeta{
   168  			Name:      name,
   169  			Namespace: ns,
   170  		},
   171  		Spec: appsv1.DeploymentSpec{
   172  			Replicas: &ssReplicas,
   173  			Selector: &metav1.LabelSelector{
   174  				MatchLabels: labels,
   175  			},
   176  			Template: v1.PodTemplateSpec{
   177  				ObjectMeta: metav1.ObjectMeta{
   178  					Labels: labels,
   179  				},
   180  				Spec: v1.PodSpec{
   181  					Containers: []v1.Container{
   182  						{
   183  							Name:  "sts-pod-nginx",
   184  							Image: imageutils.GetE2EImage(imageutils.Agnhost),
   185  							Command: []string{
   186  								"/bin/sh",
   187  								"-c",
   188  								"while true; do echo $(date) >> /mnt/managed/outfile; sleep 1; done",
   189  							},
   190  							VolumeMounts: []v1.VolumeMount{
   191  								{
   192  									Name:      "managed",
   193  									MountPath: "/mnt/managed",
   194  								},
   195  							},
   196  						},
   197  					},
   198  					Volumes: []v1.Volume{
   199  						{
   200  							Name: "managed",
   201  							VolumeSource: v1.VolumeSource{
   202  								PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
   203  									ClaimName: pvcName,
   204  								},
   205  							},
   206  						},
   207  					},
   208  				},
   209  			},
   210  		},
   211  	}
   212  }