k8s.io/kubernetes@v1.29.3/test/e2e/storage/non_graceful_node_shutdown.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package storage 18 19 import ( 20 "context" 21 "fmt" 22 23 "github.com/onsi/ginkgo/v2" 24 appsv1 "k8s.io/api/apps/v1" 25 v1 "k8s.io/api/core/v1" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/fields" 28 "k8s.io/apimachinery/pkg/labels" 29 clientset "k8s.io/client-go/kubernetes" 30 "k8s.io/kubernetes/test/e2e/feature" 31 "k8s.io/kubernetes/test/e2e/framework" 32 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 33 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 34 e2epv "k8s.io/kubernetes/test/e2e/framework/pv" 35 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 36 "k8s.io/kubernetes/test/e2e/storage/drivers" 37 storageframework "k8s.io/kubernetes/test/e2e/storage/framework" 38 "k8s.io/kubernetes/test/e2e/storage/utils" 39 imageutils "k8s.io/kubernetes/test/utils/image" 40 admissionapi "k8s.io/pod-security-admission/api" 41 ) 42 43 /* 44 This test assumes the following: 45 - The infra is GCP. 46 - NodeOutOfServiceVolumeDetach feature is enabled. 47 48 This test performs the following: 49 - Deploys a gce-pd csi driver 50 - Creates a gce-pd csi storage class 51 - Creates a pvc using the created gce-pd storage class 52 - Creates an app deployment with replica count 1 and uses the created pvc for volume 53 - Shutdowns the kubelet of node on which the app pod is scheduled. 54 This shutdown is a non graceful shutdown as by default the grace period is 0 on Kubelet. 55 - Adds `out-of-service` taint on the node which is shut down. 56 - Verifies that pod gets immediately scheduled to a different node and gets into running and ready state. 57 - Starts the kubelet back. 58 - Removes the `out-of-service` taint from the node. 59 */ 60 61 var _ = utils.SIGDescribe(feature.NodeOutOfServiceVolumeDetach, framework.WithDisruptive(), "[LinuxOnly] NonGracefulNodeShutdown", func() { 62 var ( 63 c clientset.Interface 64 ns string 65 ) 66 f := framework.NewDefaultFramework("non-graceful-shutdown") 67 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 68 69 ginkgo.BeforeEach(func(ctx context.Context) { 70 c = f.ClientSet 71 ns = f.Namespace.Name 72 e2eskipper.SkipUnlessProviderIs("gce") 73 nodeList, err := e2enode.GetReadySchedulableNodes(ctx, c) 74 if err != nil { 75 framework.Logf("Failed to list node: %v", err) 76 } 77 if len(nodeList.Items) < 2 { 78 ginkgo.Skip("At least 2 nodes are required to run the test") 79 } 80 }) 81 82 ginkgo.Describe("[NonGracefulNodeShutdown] pod that uses a persistent volume via gce pd driver", func() { 83 ginkgo.It("should get immediately rescheduled to a different node after non graceful node shutdown ", func(ctx context.Context) { 84 // Install gce pd csi driver 85 ginkgo.By("deploying csi gce-pd driver") 86 driver := drivers.InitGcePDCSIDriver() 87 config := driver.PrepareTest(ctx, f) 88 dDriver, ok := driver.(storageframework.DynamicPVTestDriver) 89 if !ok { 90 e2eskipper.Skipf("csi driver expected DynamicPVTestDriver but got %v", driver) 91 } 92 ginkgo.By("Creating a gce-pd storage class") 93 sc := dDriver.GetDynamicProvisionStorageClass(ctx, config, "") 94 _, err := c.StorageV1().StorageClasses().Create(ctx, sc, metav1.CreateOptions{}) 95 framework.ExpectNoError(err, "failed to create a storageclass") 96 scName := &sc.Name 97 98 deploymentName := "sts-pod-gcepd" 99 podLabels := map[string]string{"app": deploymentName} 100 pod := createAndVerifyStatefulDeployment(ctx, scName, deploymentName, ns, podLabels, c) 101 oldNodeName := pod.Spec.NodeName 102 103 ginkgo.By("Stopping the kubelet non gracefully for pod" + pod.Name) 104 utils.KubeletCommand(ctx, utils.KStop, c, pod) 105 ginkgo.DeferCleanup(utils.KubeletCommand, utils.KStart, c, pod) 106 107 ginkgo.By("Adding out of service taint on node " + oldNodeName) 108 // taint this node as out-of-service node 109 taint := v1.Taint{ 110 Key: v1.TaintNodeOutOfService, 111 Effect: v1.TaintEffectNoExecute, 112 } 113 e2enode.AddOrUpdateTaintOnNode(ctx, c, oldNodeName, taint) 114 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, c, oldNodeName, taint) 115 116 ginkgo.By(fmt.Sprintf("Checking if the pod %s got rescheduled to a new node", pod.Name)) 117 labelSelectorStr := labels.SelectorFromSet(podLabels).String() 118 podListOpts := metav1.ListOptions{ 119 LabelSelector: labelSelectorStr, 120 FieldSelector: fields.OneTermNotEqualSelector("spec.nodeName", oldNodeName).String(), 121 } 122 _, err = e2epod.WaitForPods(ctx, c, ns, podListOpts, e2epod.Range{MinMatching: 1}, framework.PodStartTimeout, "be running and ready", e2epod.RunningReady) 123 framework.ExpectNoError(err) 124 125 // Bring the node back online and remove the taint 126 utils.KubeletCommand(ctx, utils.KStart, c, pod) 127 e2enode.RemoveTaintOffNode(ctx, c, oldNodeName, taint) 128 129 // Verify that a pod gets scheduled to the older node that was terminated non gracefully and now 130 // is back online 131 newDeploymentName := "sts-pod-gcepd-new" 132 newPodLabels := map[string]string{"app": newDeploymentName} 133 createAndVerifyStatefulDeployment(ctx, scName, newDeploymentName, ns, newPodLabels, c) 134 }) 135 }) 136 }) 137 138 // createAndVerifyStatefulDeployment creates: 139 // i) a pvc using the provided storage class 140 // ii) creates a deployment with replica count 1 using the created pvc 141 // iii) finally verifies if the pod is running and ready and returns the pod object 142 func createAndVerifyStatefulDeployment(ctx context.Context, scName *string, name, ns string, podLabels map[string]string, 143 c clientset.Interface) *v1.Pod { 144 ginkgo.By("Creating a pvc using the storage class " + *scName) 145 pvc := e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ 146 StorageClassName: scName, 147 }, ns) 148 gotPVC, err := c.CoreV1().PersistentVolumeClaims(ns).Create(ctx, pvc, metav1.CreateOptions{}) 149 framework.ExpectNoError(err, "failed to create a persistent volume claim") 150 151 ginkgo.By("Creating a deployment using the pvc " + pvc.Name) 152 dep := makeDeployment(ns, name, gotPVC.Name, podLabels) 153 _, err = c.AppsV1().Deployments(ns).Create(ctx, dep, metav1.CreateOptions{}) 154 framework.ExpectNoError(err, "failed to created the deployment") 155 156 ginkgo.By(fmt.Sprintf("Ensuring that the pod of deployment %s is running and ready", dep.Name)) 157 labelSelector := labels.SelectorFromSet(labels.Set(podLabels)) 158 podList, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, ns, labelSelector, 1, framework.PodStartTimeout) 159 framework.ExpectNoError(err) 160 pod := &podList.Items[0] 161 return pod 162 } 163 164 func makeDeployment(ns, name, pvcName string, labels map[string]string) *appsv1.Deployment { 165 ssReplicas := int32(1) 166 return &appsv1.Deployment{ 167 ObjectMeta: metav1.ObjectMeta{ 168 Name: name, 169 Namespace: ns, 170 }, 171 Spec: appsv1.DeploymentSpec{ 172 Replicas: &ssReplicas, 173 Selector: &metav1.LabelSelector{ 174 MatchLabels: labels, 175 }, 176 Template: v1.PodTemplateSpec{ 177 ObjectMeta: metav1.ObjectMeta{ 178 Labels: labels, 179 }, 180 Spec: v1.PodSpec{ 181 Containers: []v1.Container{ 182 { 183 Name: "sts-pod-nginx", 184 Image: imageutils.GetE2EImage(imageutils.Agnhost), 185 Command: []string{ 186 "/bin/sh", 187 "-c", 188 "while true; do echo $(date) >> /mnt/managed/outfile; sleep 1; done", 189 }, 190 VolumeMounts: []v1.VolumeMount{ 191 { 192 Name: "managed", 193 MountPath: "/mnt/managed", 194 }, 195 }, 196 }, 197 }, 198 Volumes: []v1.Volume{ 199 { 200 Name: "managed", 201 VolumeSource: v1.VolumeSource{ 202 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ 203 ClaimName: pvcName, 204 }, 205 }, 206 }, 207 }, 208 }, 209 }, 210 }, 211 } 212 }