k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/storage/nfs_persistent_volume-disruptive.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package storage 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 "time" 24 25 "github.com/onsi/ginkgo/v2" 26 "github.com/onsi/gomega" 27 28 v1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/labels" 31 utilerrors "k8s.io/apimachinery/pkg/util/errors" 32 clientset "k8s.io/client-go/kubernetes" 33 "k8s.io/kubernetes/test/e2e/framework" 34 e2ekubesystem "k8s.io/kubernetes/test/e2e/framework/kubesystem" 35 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 36 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 37 e2epv "k8s.io/kubernetes/test/e2e/framework/pv" 38 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 39 e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" 40 e2evolume "k8s.io/kubernetes/test/e2e/framework/volume" 41 "k8s.io/kubernetes/test/e2e/storage/utils" 42 admissionapi "k8s.io/pod-security-admission/api" 43 ) 44 45 type testBody func(ctx context.Context, c clientset.Interface, f *framework.Framework, clientPod *v1.Pod, volumePath string) 46 type disruptiveTest struct { 47 testItStmt string 48 runTest testBody 49 } 50 51 // checkForControllerManagerHealthy checks that the controller manager does not crash within "duration" 52 func checkForControllerManagerHealthy(ctx context.Context, duration time.Duration) error { 53 var PID string 54 cmd := "pidof kube-controller-manager" 55 for start := time.Now(); time.Since(start) < duration && ctx.Err() == nil; time.Sleep(5 * time.Second) { 56 result, err := e2essh.SSH(ctx, cmd, net.JoinHostPort(framework.APIAddress(), e2essh.SSHPort), framework.TestContext.Provider) 57 if err != nil { 58 // We don't necessarily know that it crashed, pipe could just be broken 59 e2essh.LogResult(result) 60 return fmt.Errorf("master unreachable after %v", time.Since(start)) 61 } else if result.Code != 0 { 62 e2essh.LogResult(result) 63 return fmt.Errorf("SSH result code not 0. actually: %v after %v", result.Code, time.Since(start)) 64 } else if result.Stdout != PID { 65 if PID == "" { 66 PID = result.Stdout 67 } else { 68 //its dead 69 return fmt.Errorf("controller manager crashed, old PID: %s, new PID: %s", PID, result.Stdout) 70 } 71 } else { 72 framework.Logf("kube-controller-manager still healthy after %v", time.Since(start)) 73 } 74 } 75 return nil 76 } 77 78 var _ = utils.SIGDescribe("NFSPersistentVolumes", framework.WithDisruptive(), framework.WithFlaky(), func() { 79 80 f := framework.NewDefaultFramework("disruptive-pv") 81 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 82 var ( 83 c clientset.Interface 84 ns string 85 nfsServerPod *v1.Pod 86 nfsPVconfig e2epv.PersistentVolumeConfig 87 pvcConfig e2epv.PersistentVolumeClaimConfig 88 nfsServerHost, clientNodeIP string 89 clientNode *v1.Node 90 volLabel labels.Set 91 selector *metav1.LabelSelector 92 ) 93 94 ginkgo.BeforeEach(func(ctx context.Context) { 95 // To protect the NFS volume pod from the kubelet restart, we isolate it on its own node. 96 e2eskipper.SkipUnlessNodeCountIsAtLeast(minNodes) 97 e2eskipper.SkipIfProviderIs("local") 98 99 c = f.ClientSet 100 ns = f.Namespace.Name 101 volLabel = labels.Set{e2epv.VolumeSelectorKey: ns} 102 selector = metav1.SetAsLabelSelector(volLabel) 103 // Start the NFS server pod. 104 _, nfsServerPod, nfsServerHost = e2evolume.NewNFSServer(ctx, c, ns, []string{"-G", "777", "/exports"}) 105 ginkgo.DeferCleanup(e2epod.DeletePodWithWait, c, nfsServerPod) 106 nfsPVconfig = e2epv.PersistentVolumeConfig{ 107 NamePrefix: "nfs-", 108 Labels: volLabel, 109 PVSource: v1.PersistentVolumeSource{ 110 NFS: &v1.NFSVolumeSource{ 111 Server: nfsServerHost, 112 Path: "/exports", 113 ReadOnly: false, 114 }, 115 }, 116 } 117 emptyStorageClass := "" 118 pvcConfig = e2epv.PersistentVolumeClaimConfig{ 119 Selector: selector, 120 StorageClassName: &emptyStorageClass, 121 } 122 // Get the first ready node IP that is not hosting the NFS pod. 123 if clientNodeIP == "" { 124 framework.Logf("Designating test node") 125 nodes, err := e2enode.GetReadySchedulableNodes(ctx, c) 126 framework.ExpectNoError(err) 127 for _, node := range nodes.Items { 128 if node.Name != nfsServerPod.Spec.NodeName { 129 clientNode = &node 130 clientNodeIP, err = e2enode.GetSSHExternalIP(clientNode) 131 framework.ExpectNoError(err) 132 break 133 } 134 } 135 gomega.Expect(clientNodeIP).NotTo(gomega.BeEmpty()) 136 } 137 }) 138 139 ginkgo.Context("when kube-controller-manager restarts", func() { 140 var ( 141 diskName1, diskName2 string 142 err error 143 pvConfig1, pvConfig2 e2epv.PersistentVolumeConfig 144 pv1, pv2 *v1.PersistentVolume 145 pvSource1, pvSource2 *v1.PersistentVolumeSource 146 pvc1, pvc2 *v1.PersistentVolumeClaim 147 clientPod *v1.Pod 148 ) 149 150 ginkgo.BeforeEach(func(ctx context.Context) { 151 e2eskipper.SkipUnlessProviderIs("gce") 152 e2eskipper.SkipUnlessSSHKeyPresent() 153 154 ginkgo.By("Initializing first PD with PVPVC binding") 155 pvSource1, diskName1 = createGCEVolume(ctx) 156 framework.ExpectNoError(err) 157 pvConfig1 = e2epv.PersistentVolumeConfig{ 158 NamePrefix: "gce-", 159 Labels: volLabel, 160 PVSource: *pvSource1, 161 Prebind: nil, 162 } 163 pv1, pvc1, err = e2epv.CreatePVPVC(ctx, c, f.Timeouts, pvConfig1, pvcConfig, ns, false) 164 framework.ExpectNoError(err) 165 framework.ExpectNoError(e2epv.WaitOnPVandPVC(ctx, c, f.Timeouts, ns, pv1, pvc1)) 166 167 ginkgo.By("Initializing second PD with PVPVC binding") 168 pvSource2, diskName2 = createGCEVolume(ctx) 169 framework.ExpectNoError(err) 170 pvConfig2 = e2epv.PersistentVolumeConfig{ 171 NamePrefix: "gce-", 172 Labels: volLabel, 173 PVSource: *pvSource2, 174 Prebind: nil, 175 } 176 pv2, pvc2, err = e2epv.CreatePVPVC(ctx, c, f.Timeouts, pvConfig2, pvcConfig, ns, false) 177 framework.ExpectNoError(err) 178 framework.ExpectNoError(e2epv.WaitOnPVandPVC(ctx, c, f.Timeouts, ns, pv2, pvc2)) 179 180 ginkgo.By("Attaching both PVC's to a single pod") 181 clientPod, err = e2epod.CreatePod(ctx, c, ns, nil, []*v1.PersistentVolumeClaim{pvc1, pvc2}, f.NamespacePodSecurityLevel, "") 182 framework.ExpectNoError(err) 183 }) 184 185 ginkgo.AfterEach(func(ctx context.Context) { 186 // Delete client/user pod first 187 framework.ExpectNoError(e2epod.DeletePodWithWait(ctx, c, clientPod)) 188 189 // Delete PV and PVCs 190 if errs := e2epv.PVPVCCleanup(ctx, c, ns, pv1, pvc1); len(errs) > 0 { 191 framework.Failf("AfterEach: Failed to delete PVC and/or PV. Errors: %v", utilerrors.NewAggregate(errs)) 192 } 193 pv1, pvc1 = nil, nil 194 if errs := e2epv.PVPVCCleanup(ctx, c, ns, pv2, pvc2); len(errs) > 0 { 195 framework.Failf("AfterEach: Failed to delete PVC and/or PV. Errors: %v", utilerrors.NewAggregate(errs)) 196 } 197 pv2, pvc2 = nil, nil 198 199 // Delete the actual disks 200 if diskName1 != "" { 201 framework.ExpectNoError(e2epv.DeletePDWithRetry(ctx, diskName1)) 202 } 203 if diskName2 != "" { 204 framework.ExpectNoError(e2epv.DeletePDWithRetry(ctx, diskName2)) 205 } 206 }) 207 208 ginkgo.It("should delete a bound PVC from a clientPod, restart the kube-control-manager, and ensure the kube-controller-manager does not crash", func(ctx context.Context) { 209 e2eskipper.SkipUnlessSSHKeyPresent() 210 211 ginkgo.By("Deleting PVC for volume 2") 212 err = e2epv.DeletePersistentVolumeClaim(ctx, c, pvc2.Name, ns) 213 framework.ExpectNoError(err) 214 pvc2 = nil 215 216 ginkgo.By("Restarting the kube-controller-manager") 217 err = e2ekubesystem.RestartControllerManager(ctx) 218 framework.ExpectNoError(err) 219 err = e2ekubesystem.WaitForControllerManagerUp(ctx) 220 framework.ExpectNoError(err) 221 framework.Logf("kube-controller-manager restarted") 222 223 ginkgo.By("Observing the kube-controller-manager healthy for at least 2 minutes") 224 // Continue checking for 2 minutes to make sure kube-controller-manager is healthy 225 err = checkForControllerManagerHealthy(ctx, 2*time.Minute) 226 framework.ExpectNoError(err) 227 }) 228 229 }) 230 231 ginkgo.Context("when kubelet restarts", func() { 232 var ( 233 clientPod *v1.Pod 234 pv *v1.PersistentVolume 235 pvc *v1.PersistentVolumeClaim 236 ) 237 238 ginkgo.BeforeEach(func(ctx context.Context) { 239 framework.Logf("Initializing test spec") 240 clientPod, pv, pvc = initTestCase(ctx, f, c, nfsPVconfig, pvcConfig, ns, clientNode.Name) 241 }) 242 243 ginkgo.AfterEach(func(ctx context.Context) { 244 framework.Logf("Tearing down test spec") 245 tearDownTestCase(ctx, c, f, ns, clientPod, pvc, pv, true /* force PV delete */) 246 pv, pvc, clientPod = nil, nil, nil 247 }) 248 249 // Test table housing the ginkgo.It() title string and test spec. runTest is type testBody, defined at 250 // the start of this file. To add tests, define a function mirroring the testBody signature and assign 251 // to runTest. 252 disruptiveTestTable := []disruptiveTest{ 253 { 254 testItStmt: "Should test that a file written to the mount before kubelet restart is readable after restart.", 255 runTest: utils.TestKubeletRestartsAndRestoresMount, 256 }, 257 { 258 testItStmt: "Should test that a volume mounted to a pod that is deleted while the kubelet is down unmounts when the kubelet returns.", 259 runTest: utils.TestVolumeUnmountsFromDeletedPod, 260 }, 261 { 262 testItStmt: "Should test that a volume mounted to a pod that is force deleted while the kubelet is down unmounts when the kubelet returns.", 263 runTest: utils.TestVolumeUnmountsFromForceDeletedPod, 264 }, 265 } 266 267 // Test loop executes each disruptiveTest iteratively. 268 for _, test := range disruptiveTestTable { 269 func(t disruptiveTest) { 270 ginkgo.It(t.testItStmt, func(ctx context.Context) { 271 e2eskipper.SkipUnlessSSHKeyPresent() 272 ginkgo.By("Executing Spec") 273 t.runTest(ctx, c, f, clientPod, e2epod.VolumeMountPath1) 274 }) 275 }(test) 276 } 277 }) 278 }) 279 280 // createGCEVolume creates PersistentVolumeSource for GCEVolume. 281 func createGCEVolume(ctx context.Context) (*v1.PersistentVolumeSource, string) { 282 diskName, err := e2epv.CreatePDWithRetry(ctx) 283 framework.ExpectNoError(err) 284 return &v1.PersistentVolumeSource{ 285 GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{ 286 PDName: diskName, 287 FSType: "ext3", 288 ReadOnly: false, 289 }, 290 }, diskName 291 } 292 293 // initTestCase initializes spec resources (pv, pvc, and pod) and returns pointers to be consumed 294 // by the test. 295 func initTestCase(ctx context.Context, f *framework.Framework, c clientset.Interface, pvConfig e2epv.PersistentVolumeConfig, pvcConfig e2epv.PersistentVolumeClaimConfig, ns, nodeName string) (*v1.Pod, *v1.PersistentVolume, *v1.PersistentVolumeClaim) { 296 pv, pvc, err := e2epv.CreatePVPVC(ctx, c, f.Timeouts, pvConfig, pvcConfig, ns, false) 297 defer func() { 298 if err != nil { 299 ginkgo.DeferCleanup(e2epv.DeletePersistentVolumeClaim, c, pvc.Name, ns) 300 ginkgo.DeferCleanup(e2epv.DeletePersistentVolume, c, pv.Name) 301 } 302 }() 303 framework.ExpectNoError(err) 304 pod := e2epod.MakePod(ns, nil, []*v1.PersistentVolumeClaim{pvc}, f.NamespacePodSecurityLevel, "") 305 pod.Spec.NodeName = nodeName 306 framework.Logf("Creating NFS client pod.") 307 pod, err = c.CoreV1().Pods(ns).Create(ctx, pod, metav1.CreateOptions{}) 308 framework.Logf("NFS client Pod %q created on Node %q", pod.Name, nodeName) 309 framework.ExpectNoError(err) 310 defer func() { 311 if err != nil { 312 ginkgo.DeferCleanup(e2epod.DeletePodWithWait, c, pod) 313 } 314 }() 315 err = e2epod.WaitTimeoutForPodRunningInNamespace(ctx, c, pod.Name, pod.Namespace, f.Timeouts.PodStart) 316 framework.ExpectNoError(err, fmt.Sprintf("Pod %q timed out waiting for phase: Running", pod.Name)) 317 // Return created api objects 318 pod, err = c.CoreV1().Pods(ns).Get(ctx, pod.Name, metav1.GetOptions{}) 319 framework.ExpectNoError(err) 320 pvc, err = c.CoreV1().PersistentVolumeClaims(ns).Get(ctx, pvc.Name, metav1.GetOptions{}) 321 framework.ExpectNoError(err) 322 pv, err = c.CoreV1().PersistentVolumes().Get(ctx, pv.Name, metav1.GetOptions{}) 323 framework.ExpectNoError(err) 324 return pod, pv, pvc 325 } 326 327 // tearDownTestCase destroy resources created by initTestCase. 328 func tearDownTestCase(ctx context.Context, c clientset.Interface, f *framework.Framework, ns string, client *v1.Pod, pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume, forceDeletePV bool) { 329 // Ignore deletion errors. Failing on them will interrupt test cleanup. 330 e2epod.DeletePodWithWait(ctx, c, client) 331 e2epv.DeletePersistentVolumeClaim(ctx, c, pvc.Name, ns) 332 if forceDeletePV && pv != nil { 333 e2epv.DeletePersistentVolume(ctx, c, pv.Name) 334 return 335 } 336 err := e2epv.WaitForPersistentVolumeDeleted(ctx, c, pv.Name, 5*time.Second, 5*time.Minute) 337 framework.ExpectNoError(err, "Persistent Volume %v not deleted by dynamic provisioner", pv.Name) 338 }