k8s.io/kubernetes@v1.29.3/test/e2e/storage/testsuites/snapshottable.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package testsuites 18 19 import ( 20 "context" 21 "fmt" 22 "strings" 23 "time" 24 25 "github.com/onsi/ginkgo/v2" 26 "github.com/onsi/gomega" 27 28 v1 "k8s.io/api/core/v1" 29 storagev1 "k8s.io/api/storage/v1" 30 apierrors "k8s.io/apimachinery/pkg/api/errors" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 33 "k8s.io/client-go/dynamic" 34 clientset "k8s.io/client-go/kubernetes" 35 "k8s.io/component-helpers/storage/ephemeral" 36 "k8s.io/kubernetes/test/e2e/feature" 37 "k8s.io/kubernetes/test/e2e/framework" 38 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 39 e2eoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" 40 e2epv "k8s.io/kubernetes/test/e2e/framework/pv" 41 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 42 e2evolume "k8s.io/kubernetes/test/e2e/framework/volume" 43 storageframework "k8s.io/kubernetes/test/e2e/storage/framework" 44 storageutils "k8s.io/kubernetes/test/e2e/storage/utils" 45 admissionapi "k8s.io/pod-security-admission/api" 46 ) 47 48 // data file name 49 const datapath = "/mnt/test/data" 50 51 type snapshottableTestSuite struct { 52 tsInfo storageframework.TestSuiteInfo 53 } 54 55 var ( 56 sDriver storageframework.SnapshottableTestDriver 57 dDriver storageframework.DynamicPVTestDriver 58 ) 59 60 // InitCustomSnapshottableTestSuite returns snapshottableTestSuite that implements TestSuite interface 61 // using custom test patterns 62 func InitCustomSnapshottableTestSuite(patterns []storageframework.TestPattern) storageframework.TestSuite { 63 return &snapshottableTestSuite{ 64 tsInfo: storageframework.TestSuiteInfo{ 65 Name: "snapshottable", 66 TestPatterns: patterns, 67 SupportedSizeRange: e2evolume.SizeRange{ 68 Min: "1Mi", 69 }, 70 TestTags: []interface{}{feature.VolumeSnapshotDataSource}, 71 }, 72 } 73 } 74 75 // InitSnapshottableTestSuite returns snapshottableTestSuite that implements TestSuite interface 76 // using testsuite default patterns 77 func InitSnapshottableTestSuite() storageframework.TestSuite { 78 patterns := []storageframework.TestPattern{ 79 storageframework.DynamicSnapshotDelete, 80 storageframework.DynamicSnapshotRetain, 81 storageframework.EphemeralSnapshotDelete, 82 storageframework.EphemeralSnapshotRetain, 83 storageframework.PreprovisionedSnapshotDelete, 84 storageframework.PreprovisionedSnapshotRetain, 85 } 86 return InitCustomSnapshottableTestSuite(patterns) 87 } 88 89 func (s *snapshottableTestSuite) GetTestSuiteInfo() storageframework.TestSuiteInfo { 90 return s.tsInfo 91 } 92 93 func (s *snapshottableTestSuite) SkipUnsupportedTests(driver storageframework.TestDriver, pattern storageframework.TestPattern) { 94 // Check preconditions. 95 dInfo := driver.GetDriverInfo() 96 ok := false 97 _, ok = driver.(storageframework.SnapshottableTestDriver) 98 if !dInfo.Capabilities[storageframework.CapSnapshotDataSource] || !ok { 99 e2eskipper.Skipf("Driver %q does not support snapshots - skipping", dInfo.Name) 100 } 101 _, ok = driver.(storageframework.DynamicPVTestDriver) 102 if !ok { 103 e2eskipper.Skipf("Driver %q does not support dynamic provisioning - skipping", driver.GetDriverInfo().Name) 104 } 105 } 106 107 func (s *snapshottableTestSuite) DefineTests(driver storageframework.TestDriver, pattern storageframework.TestPattern) { 108 109 // Beware that it also registers an AfterEach which renders f unusable. Any code using 110 // f must run inside an It or Context callback. 111 f := framework.NewDefaultFramework("snapshotting") 112 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 113 114 ginkgo.Describe("volume snapshot controller", func() { 115 var ( 116 err error 117 config *storageframework.PerTestConfig 118 119 cs clientset.Interface 120 dc dynamic.Interface 121 pvc *v1.PersistentVolumeClaim 122 sc *storagev1.StorageClass 123 volumeResource *storageframework.VolumeResource 124 pod *v1.Pod 125 claimSize string 126 originalMntTestData string 127 ) 128 init := func(ctx context.Context) { 129 sDriver, _ = driver.(storageframework.SnapshottableTestDriver) 130 dDriver, _ = driver.(storageframework.DynamicPVTestDriver) 131 // init snap class, create a source PV, PVC, Pod 132 cs = f.ClientSet 133 dc = f.DynamicClient 134 135 // Now do the more expensive test initialization. 136 config = driver.PrepareTest(ctx, f) 137 138 volumeResource = storageframework.CreateVolumeResource(ctx, dDriver, config, pattern, s.GetTestSuiteInfo().SupportedSizeRange) 139 ginkgo.DeferCleanup(volumeResource.CleanupResource) 140 141 ginkgo.By("[init] starting a pod to use the claim") 142 originalMntTestData = fmt.Sprintf("hello from %s namespace", f.Namespace.Name) 143 // After writing data to a file `sync` flushes the data from memory to disk. 144 // sync is available in the Linux and Windows versions of agnhost. 145 command := fmt.Sprintf("echo '%s' > %s; sync", originalMntTestData, datapath) 146 147 pod = StartInPodWithVolumeSource(ctx, cs, *volumeResource.VolSource, f.Namespace.Name, "pvc-snapshottable-tester", command, config.ClientNodeSelection) 148 149 // At this point a pod is created with a PVC. How to proceed depends on which test is running. 150 } 151 152 ginkgo.Context("", func() { 153 ginkgo.It("should check snapshot fields, check restore correctly works, check deletion (ephemeral)", func(ctx context.Context) { 154 if pattern.VolType != storageframework.GenericEphemeralVolume { 155 e2eskipper.Skipf("volume type %q is not ephemeral", pattern.VolType) 156 } 157 init(ctx) 158 159 // delete the pod at the end of the test 160 ginkgo.DeferCleanup(e2epod.DeletePodWithWait, cs, pod) 161 162 // We can test snapshotting of generic 163 // ephemeral volumes by creating the snapshot 164 // while the pod is running (online). We cannot do it after pod deletion, 165 // because then the PVC also gets marked and snapshotting no longer works 166 // (even when a finalizer prevents actual removal of the PVC). 167 // 168 // Because data consistency cannot be 169 // guaranteed, this flavor of the test doesn't 170 // check the content of the snapshot. 171 172 framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, cs, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow)) 173 pod, err = cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) 174 framework.ExpectNoError(err, "check pod after it terminated") 175 176 // Get new copy of the claim 177 ginkgo.By("[init] checking the claim") 178 pvcName := ephemeral.VolumeClaimName(pod, &pod.Spec.Volumes[0]) 179 pvcNamespace := pod.Namespace 180 181 parameters := map[string]string{} 182 sr := storageframework.CreateSnapshotResource(ctx, sDriver, config, pattern, pvcName, pvcNamespace, f.Timeouts, parameters) 183 ginkgo.DeferCleanup(sr.CleanupResource, f.Timeouts) 184 185 err = e2epv.WaitForPersistentVolumeClaimPhase(ctx, v1.ClaimBound, cs, pvcNamespace, pvcName, framework.Poll, f.Timeouts.ClaimProvision) 186 framework.ExpectNoError(err) 187 188 pvc, err = cs.CoreV1().PersistentVolumeClaims(pvcNamespace).Get(ctx, pvcName, metav1.GetOptions{}) 189 framework.ExpectNoError(err, "get PVC") 190 claimSize = pvc.Spec.Resources.Requests.Storage().String() 191 sc = volumeResource.Sc 192 193 // Get the bound PV 194 ginkgo.By("[init] checking the PV") 195 _, err := cs.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}) 196 framework.ExpectNoError(err) 197 198 vs := sr.Vs 199 // get the snapshot and check SnapshotContent properties 200 vscontent := checkSnapshot(ctx, dc, sr, pattern) 201 202 var restoredPVC *v1.PersistentVolumeClaim 203 var restoredPod *v1.Pod 204 205 ginkgo.By("creating a pvc from the snapshot") 206 restoredPVC = e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ 207 ClaimSize: claimSize, 208 StorageClassName: &(sc.Name), 209 }, config.Framework.Namespace.Name) 210 211 group := "snapshot.storage.k8s.io" 212 213 restoredPVC.Spec.DataSource = &v1.TypedLocalObjectReference{ 214 APIGroup: &group, 215 Kind: "VolumeSnapshot", 216 Name: vs.GetName(), 217 } 218 219 ginkgo.By("starting a pod to use the snapshot") 220 volSrc := v1.VolumeSource{ 221 Ephemeral: &v1.EphemeralVolumeSource{ 222 VolumeClaimTemplate: &v1.PersistentVolumeClaimTemplate{ 223 Spec: restoredPVC.Spec, 224 }, 225 }, 226 } 227 228 restoredPod = StartInPodWithVolumeSource(ctx, cs, volSrc, restoredPVC.Namespace, "restored-pvc-tester", "sleep 300", config.ClientNodeSelection) 229 ginkgo.DeferCleanup(e2epod.DeletePodWithWait, cs, restoredPod) 230 231 framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, cs, restoredPod.Name, restoredPod.Namespace, f.Timeouts.PodStartSlow)) 232 if pattern.VolType != storageframework.GenericEphemeralVolume { 233 commands := e2evolume.GenerateReadFileCmd(datapath) 234 _, err = e2eoutput.LookForStringInPodExec(restoredPod.Namespace, restoredPod.Name, commands, originalMntTestData, time.Minute) 235 framework.ExpectNoError(err) 236 } 237 238 ginkgo.By("should delete the VolumeSnapshotContent according to its deletion policy") 239 // Delete both Snapshot and restored Pod/PVC at the same time because different storage systems 240 // have different ordering of deletion. Some may require delete the restored PVC first before 241 // Snapshot deletion and some are opposite. 242 err = storageutils.DeleteSnapshotWithoutWaiting(ctx, dc, vs.GetNamespace(), vs.GetName()) 243 framework.ExpectNoError(err) 244 framework.Logf("deleting restored pod %q/%q", restoredPod.Namespace, restoredPod.Name) 245 err = cs.CoreV1().Pods(restoredPod.Namespace).Delete(context.TODO(), restoredPod.Name, metav1.DeleteOptions{}) 246 framework.ExpectNoError(err) 247 deleteVolumeSnapshot(ctx, f, dc, sr, pattern, vscontent) 248 }) 249 250 ginkgo.It("should check snapshot fields, check restore correctly works after modifying source data, check deletion (persistent)", func(ctx context.Context) { 251 if pattern.VolType == storageframework.GenericEphemeralVolume { 252 e2eskipper.Skipf("volume type %q is ephemeral", pattern.VolType) 253 } 254 init(ctx) 255 256 pvc = volumeResource.Pvc 257 sc = volumeResource.Sc 258 259 // The pod should be in the Success state. 260 ginkgo.By("[init] check pod success") 261 pod, err = cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) 262 framework.ExpectNoError(err, "Failed to fetch pod: %v", err) 263 framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, cs, pod.Name, pod.Namespace, f.Timeouts.PodStartSlow)) 264 // Sync the pod to know additional fields. 265 pod, err = cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) 266 framework.ExpectNoError(err, "Failed to fetch pod: %v", err) 267 268 ginkgo.By("[init] checking the claim") 269 err = e2epv.WaitForPersistentVolumeClaimPhase(ctx, v1.ClaimBound, cs, pvc.Namespace, pvc.Name, framework.Poll, f.Timeouts.ClaimProvision) 270 framework.ExpectNoError(err) 271 // Get new copy of the claim. 272 pvc, err = cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(ctx, pvc.Name, metav1.GetOptions{}) 273 framework.ExpectNoError(err) 274 275 // Get the bound PV. 276 ginkgo.By("[init] checking the PV") 277 pv, err := cs.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}) 278 framework.ExpectNoError(err) 279 280 // Delete the pod to force NodeUnpublishVolume (unlike the ephemeral case where the pod is deleted at the end of the test). 281 ginkgo.By("[init] deleting the pod") 282 StopPod(ctx, cs, pod) 283 284 // At this point we know that: 285 // - a pod was created with a PV that's supposed to have data 286 // 287 // However there's a caching issue that @jinxu97 explained and it's related with the pod & volume 288 // lifecycle, to understand it we first analyze what the volumemanager does: 289 // - when a pod is delete the volumemanager will try to cleanup the volume mounts 290 // - NodeUnpublishVolume: unbinds the bind mount from the container 291 // - Linux: the bind mount is removed, which does not flush any cache 292 // - Windows: we delete a symlink, data's not flushed yet to disk 293 // - NodeUnstageVolume: unmount the global mount 294 // - Linux: disk is unmounted and all caches flushed. 295 // - Windows: data is flushed to disk and the disk is detached 296 // 297 // Pod deletion might not guarantee a data flush to disk, however NodeUnstageVolume adds the logic 298 // to flush the data to disk (see #81690 for details). We need to wait for NodeUnstageVolume, as 299 // NodeUnpublishVolume only removes the bind mount, which doesn't force the caches to flush. 300 // It's possible to create empty snapshots if we don't wait (see #101279 for details). 301 // 302 // In the following code by checking if the PV is not in the node.Status.VolumesInUse field we 303 // ensure that the volume is not used by the node anymore (an indicator that NodeUnstageVolume has 304 // already finished) 305 nodeName := pod.Spec.NodeName 306 gomega.Expect(nodeName).NotTo(gomega.BeEmpty(), "pod.Spec.NodeName must not be empty") 307 308 // Snapshot tests are only executed for CSI drivers. When CSI drivers 309 // are attached to the node they use VolumeHandle instead of the pv.Name. 310 volumeName := pv.Spec.PersistentVolumeSource.CSI.VolumeHandle 311 312 ginkgo.By(fmt.Sprintf("[init] waiting until the node=%s is not using the volume=%s", nodeName, volumeName)) 313 success := storageutils.WaitUntil(framework.Poll, f.Timeouts.PVDelete, func() bool { 314 node, err := cs.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 315 framework.ExpectNoError(err) 316 volumesInUse := node.Status.VolumesInUse 317 framework.Logf("current volumes in use: %+v", volumesInUse) 318 for i := 0; i < len(volumesInUse); i++ { 319 if strings.HasSuffix(string(volumesInUse[i]), volumeName) { 320 return false 321 } 322 } 323 return true 324 }) 325 if !success { 326 framework.Failf("timed out waiting for node=%s to not use the volume=%s", nodeName, volumeName) 327 } 328 329 // Take the snapshot. 330 parameters := map[string]string{} 331 sr := storageframework.CreateSnapshotResource(ctx, sDriver, config, pattern, pvc.Name, pvc.Namespace, f.Timeouts, parameters) 332 ginkgo.DeferCleanup(sr.CleanupResource, f.Timeouts) 333 vs := sr.Vs 334 // get the snapshot and check SnapshotContent properties 335 vscontent := checkSnapshot(ctx, dc, sr, pattern) 336 337 ginkgo.By("Modifying source data test") 338 var restoredPVC *v1.PersistentVolumeClaim 339 var restoredPod *v1.Pod 340 modifiedMntTestData := fmt.Sprintf("modified data from %s namespace", pvc.GetNamespace()) 341 342 ginkgo.By("modifying the data in the source PVC") 343 344 // After writing data to a file `sync` flushes the data from memory to disk. 345 // sync is available in the Linux and Windows versions of agnhost. 346 command := fmt.Sprintf("echo '%s' > %s; sync", modifiedMntTestData, datapath) 347 RunInPodWithVolume(ctx, cs, f.Timeouts, pvc.Namespace, pvc.Name, "pvc-snapshottable-data-tester", command, config.ClientNodeSelection) 348 349 ginkgo.By("creating a pvc from the snapshot") 350 claimSize = pvc.Spec.Resources.Requests.Storage().String() 351 restoredPVC = e2epv.MakePersistentVolumeClaim(e2epv.PersistentVolumeClaimConfig{ 352 ClaimSize: claimSize, 353 StorageClassName: &(sc.Name), 354 }, config.Framework.Namespace.Name) 355 356 group := "snapshot.storage.k8s.io" 357 358 restoredPVC.Spec.DataSource = &v1.TypedLocalObjectReference{ 359 APIGroup: &group, 360 Kind: "VolumeSnapshot", 361 Name: vs.GetName(), 362 } 363 364 restoredPVC, err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Create(ctx, restoredPVC, metav1.CreateOptions{}) 365 framework.ExpectNoError(err) 366 ginkgo.DeferCleanup(func(ctx context.Context) { 367 framework.Logf("deleting claim %q/%q", restoredPVC.Namespace, restoredPVC.Name) 368 // typically this claim has already been deleted 369 err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(ctx, restoredPVC.Name, metav1.DeleteOptions{}) 370 if err != nil && !apierrors.IsNotFound(err) { 371 framework.Failf("Error deleting claim %q. Error: %v", restoredPVC.Name, err) 372 } 373 }) 374 375 ginkgo.By("starting a pod to use the snapshot") 376 restoredPod = StartInPodWithVolume(ctx, cs, restoredPVC.Namespace, restoredPVC.Name, "restored-pvc-tester", "sleep 300", config.ClientNodeSelection) 377 ginkgo.DeferCleanup(StopPod, cs, restoredPod) 378 framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, cs, restoredPod.Name, restoredPod.Namespace, f.Timeouts.PodStartSlow)) 379 commands := e2evolume.GenerateReadFileCmd(datapath) 380 _, err = e2eoutput.LookForStringInPodExec(restoredPod.Namespace, restoredPod.Name, commands, originalMntTestData, time.Minute) 381 framework.ExpectNoError(err) 382 383 ginkgo.By("should delete the VolumeSnapshotContent according to its deletion policy") 384 385 // Delete both Snapshot and restored Pod/PVC at the same time because different storage systems 386 // have different ordering of deletion. Some may require delete the restored PVC first before 387 // Snapshot deletion and some are opposite. 388 err = storageutils.DeleteSnapshotWithoutWaiting(ctx, dc, vs.GetNamespace(), vs.GetName()) 389 framework.ExpectNoError(err) 390 framework.Logf("deleting restored pod %q/%q", restoredPod.Namespace, restoredPod.Name) 391 err = cs.CoreV1().Pods(restoredPod.Namespace).Delete(ctx, restoredPod.Name, metav1.DeleteOptions{}) 392 framework.ExpectNoError(err) 393 framework.Logf("deleting restored PVC %q/%q", restoredPVC.Namespace, restoredPVC.Name) 394 err = cs.CoreV1().PersistentVolumeClaims(restoredPVC.Namespace).Delete(ctx, restoredPVC.Name, metav1.DeleteOptions{}) 395 framework.ExpectNoError(err) 396 397 deleteVolumeSnapshot(ctx, f, dc, sr, pattern, vscontent) 398 }) 399 }) 400 }) 401 } 402 403 func deleteVolumeSnapshot(ctx context.Context, f *framework.Framework, dc dynamic.Interface, sr *storageframework.SnapshotResource, pattern storageframework.TestPattern, vscontent *unstructured.Unstructured) { 404 vs := sr.Vs 405 406 // Wait for the Snapshot to be actually deleted from API server 407 err := storageutils.WaitForNamespacedGVRDeletion(ctx, dc, storageutils.SnapshotGVR, vs.GetNamespace(), vs.GetNamespace(), framework.Poll, f.Timeouts.SnapshotDelete) 408 framework.ExpectNoError(err) 409 410 switch pattern.SnapshotDeletionPolicy { 411 case storageframework.DeleteSnapshot: 412 ginkgo.By("checking the SnapshotContent has been deleted") 413 err = storageutils.WaitForGVRDeletion(ctx, dc, storageutils.SnapshotContentGVR, vscontent.GetName(), framework.Poll, f.Timeouts.SnapshotDelete) 414 framework.ExpectNoError(err) 415 case storageframework.RetainSnapshot: 416 ginkgo.By("checking the SnapshotContent has not been deleted") 417 err = storageutils.WaitForGVRDeletion(ctx, dc, storageutils.SnapshotContentGVR, vscontent.GetName(), 1*time.Second /* poll */, 30*time.Second /* timeout */) 418 framework.ExpectError(err) 419 } 420 } 421 422 func checkSnapshot(ctx context.Context, dc dynamic.Interface, sr *storageframework.SnapshotResource, pattern storageframework.TestPattern) *unstructured.Unstructured { 423 vs := sr.Vs 424 vsc := sr.Vsclass 425 426 // Get new copy of the snapshot 427 ginkgo.By("checking the snapshot") 428 vs, err := dc.Resource(storageutils.SnapshotGVR).Namespace(vs.GetNamespace()).Get(ctx, vs.GetName(), metav1.GetOptions{}) 429 framework.ExpectNoError(err) 430 431 // Get the bound snapshotContent 432 snapshotStatus := vs.Object["status"].(map[string]interface{}) 433 snapshotContentName := snapshotStatus["boundVolumeSnapshotContentName"].(string) 434 vscontent, err := dc.Resource(storageutils.SnapshotContentGVR).Get(ctx, snapshotContentName, metav1.GetOptions{}) 435 framework.ExpectNoError(err) 436 437 snapshotContentSpec := vscontent.Object["spec"].(map[string]interface{}) 438 volumeSnapshotRef := snapshotContentSpec["volumeSnapshotRef"].(map[string]interface{}) 439 440 // Check SnapshotContent properties 441 ginkgo.By("checking the SnapshotContent") 442 // PreprovisionedCreatedSnapshot do not need to set volume snapshot class name 443 if pattern.SnapshotType != storageframework.PreprovisionedCreatedSnapshot { 444 gomega.Expect(snapshotContentSpec["volumeSnapshotClassName"]).To(gomega.Equal(vsc.GetName())) 445 } 446 gomega.Expect(volumeSnapshotRef).To(gomega.HaveKeyWithValue("name", vs.GetName())) 447 gomega.Expect(volumeSnapshotRef).To(gomega.HaveKeyWithValue("namespace", vs.GetNamespace())) 448 return vscontent 449 }