k8s.io/kubernetes@v1.29.3/test/e2e/storage/pd.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package storage
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math/rand"
    23  	"strings"
    24  	"time"
    25  
    26  	"google.golang.org/api/googleapi"
    27  
    28  	"github.com/onsi/ginkgo/v2"
    29  	"github.com/onsi/gomega"
    30  	v1 "k8s.io/api/core/v1"
    31  	policyv1 "k8s.io/api/policy/v1"
    32  	"k8s.io/apimachinery/pkg/api/resource"
    33  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	"k8s.io/apimachinery/pkg/types"
    35  	"k8s.io/apimachinery/pkg/util/uuid"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	clientset "k8s.io/client-go/kubernetes"
    38  	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
    39  	"k8s.io/kubernetes/test/e2e/feature"
    40  	"k8s.io/kubernetes/test/e2e/framework"
    41  	e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
    42  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    43  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    44  	"k8s.io/kubernetes/test/e2e/framework/providers/gce"
    45  	e2epv "k8s.io/kubernetes/test/e2e/framework/pv"
    46  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    47  	"k8s.io/kubernetes/test/e2e/storage/utils"
    48  	imageutils "k8s.io/kubernetes/test/utils/image"
    49  	admissionapi "k8s.io/pod-security-admission/api"
    50  )
    51  
    52  const (
    53  	gcePDDetachTimeout  = 10 * time.Minute
    54  	gcePDDetachPollTime = 10 * time.Second
    55  	nodeStatusTimeout   = 10 * time.Minute
    56  	nodeStatusPollTime  = 1 * time.Second
    57  	podEvictTimeout     = 2 * time.Minute
    58  	minNodes            = 2
    59  )
    60  
    61  var _ = utils.SIGDescribe("Pod Disks", feature.StorageProvider, func() {
    62  	var (
    63  		ns         string
    64  		cs         clientset.Interface
    65  		podClient  v1core.PodInterface
    66  		nodeClient v1core.NodeInterface
    67  		host0Name  types.NodeName
    68  		host1Name  types.NodeName
    69  		nodes      *v1.NodeList
    70  	)
    71  	f := framework.NewDefaultFramework("pod-disks")
    72  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    73  
    74  	ginkgo.BeforeEach(func(ctx context.Context) {
    75  		e2eskipper.SkipUnlessNodeCountIsAtLeast(minNodes)
    76  		cs = f.ClientSet
    77  		ns = f.Namespace.Name
    78  
    79  		e2eskipper.SkipIfMultizone(ctx, cs)
    80  
    81  		podClient = cs.CoreV1().Pods(ns)
    82  		nodeClient = cs.CoreV1().Nodes()
    83  		var err error
    84  		nodes, err = e2enode.GetReadySchedulableNodes(ctx, cs)
    85  		framework.ExpectNoError(err)
    86  		if len(nodes.Items) < minNodes {
    87  			e2eskipper.Skipf("The test requires %d schedulable nodes, got only %d", minNodes, len(nodes.Items))
    88  		}
    89  		host0Name = types.NodeName(nodes.Items[0].ObjectMeta.Name)
    90  		host1Name = types.NodeName(nodes.Items[1].ObjectMeta.Name)
    91  	})
    92  
    93  	f.Context("schedule pods each with a PD, delete pod and verify detach", f.WithSlow(), func() {
    94  		const (
    95  			podDefaultGrace   = "default (30s)"
    96  			podImmediateGrace = "immediate (0s)"
    97  		)
    98  		var readOnlyMap = map[bool]string{
    99  			true:  "read-only",
   100  			false: "RW",
   101  		}
   102  		type testT struct {
   103  			descr     string               // It description
   104  			readOnly  bool                 // true means pd is read-only
   105  			deleteOpt metav1.DeleteOptions // pod delete option
   106  		}
   107  		tests := []testT{
   108  			{
   109  				descr:     podImmediateGrace,
   110  				readOnly:  false,
   111  				deleteOpt: *metav1.NewDeleteOptions(0),
   112  			},
   113  			{
   114  				descr:     podDefaultGrace,
   115  				readOnly:  false,
   116  				deleteOpt: metav1.DeleteOptions{},
   117  			},
   118  			{
   119  				descr:     podImmediateGrace,
   120  				readOnly:  true,
   121  				deleteOpt: *metav1.NewDeleteOptions(0),
   122  			},
   123  			{
   124  				descr:     podDefaultGrace,
   125  				readOnly:  true,
   126  				deleteOpt: metav1.DeleteOptions{},
   127  			},
   128  		}
   129  
   130  		for _, t := range tests {
   131  			podDelOpt := t.deleteOpt
   132  			readOnly := t.readOnly
   133  			readOnlyTxt := readOnlyMap[readOnly]
   134  
   135  			ginkgo.It(fmt.Sprintf("for %s PD with pod delete grace period of %q", readOnlyTxt, t.descr), func(ctx context.Context) {
   136  				e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
   137  				if readOnly {
   138  					e2eskipper.SkipIfProviderIs("aws")
   139  				}
   140  
   141  				ginkgo.By("creating PD")
   142  				diskName, err := e2epv.CreatePDWithRetry(ctx)
   143  				framework.ExpectNoError(err, "Error creating PD")
   144  
   145  				var fmtPod *v1.Pod
   146  				if readOnly {
   147  					// if all test pods are RO then need a RW pod to format pd
   148  					ginkgo.By("creating RW fmt Pod to ensure PD is formatted")
   149  					fmtPod = testPDPod([]string{diskName}, host0Name, false, 1)
   150  					_, err = podClient.Create(ctx, fmtPod, metav1.CreateOptions{})
   151  					framework.ExpectNoError(err, "Failed to create fmtPod")
   152  					framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, fmtPod.Name, f.Namespace.Name, f.Timeouts.PodStartSlow))
   153  
   154  					ginkgo.By("deleting the fmtPod")
   155  					framework.ExpectNoError(podClient.Delete(ctx, fmtPod.Name, *metav1.NewDeleteOptions(0)), "Failed to delete fmtPod")
   156  					framework.Logf("deleted fmtPod %q", fmtPod.Name)
   157  					ginkgo.By("waiting for PD to detach")
   158  					framework.ExpectNoError(waitForPDDetach(diskName, host0Name))
   159  				}
   160  
   161  				// prepare to create two test pods on separate nodes
   162  				host0Pod := testPDPod([]string{diskName}, host0Name, readOnly, 1)
   163  				host1Pod := testPDPod([]string{diskName}, host1Name, readOnly, 1)
   164  
   165  				defer func() {
   166  					// Teardown should do nothing unless test failed
   167  					ginkgo.By("defer: cleaning up PD-RW test environment")
   168  					framework.Logf("defer cleanup errors can usually be ignored")
   169  					if fmtPod != nil {
   170  						podClient.Delete(ctx, fmtPod.Name, podDelOpt)
   171  					}
   172  					podClient.Delete(ctx, host0Pod.Name, podDelOpt)
   173  					podClient.Delete(ctx, host1Pod.Name, podDelOpt)
   174  					detachAndDeletePDs(ctx, diskName, []types.NodeName{host0Name, host1Name})
   175  				}()
   176  
   177  				ginkgo.By("creating host0Pod on node0")
   178  				_, err = podClient.Create(ctx, host0Pod, metav1.CreateOptions{})
   179  				framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err))
   180  				framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, host0Pod.Name, f.Namespace.Name, f.Timeouts.PodStartSlow))
   181  				framework.Logf("host0Pod: %q, node0: %q", host0Pod.Name, host0Name)
   182  
   183  				var containerName, testFile, testFileContents string
   184  				if !readOnly {
   185  					ginkgo.By("writing content to host0Pod on node0")
   186  					containerName = "mycontainer"
   187  					testFile = "/testpd1/tracker"
   188  					testFileContents = fmt.Sprintf("%v", rand.Int())
   189  					tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, ns)
   190  					framework.ExpectNoError(tk.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents))
   191  					framework.Logf("wrote %q to file %q in pod %q on node %q", testFileContents, testFile, host0Pod.Name, host0Name)
   192  					ginkgo.By("verifying PD is present in node0's VolumeInUse list")
   193  					framework.ExpectNoError(waitForPDInVolumesInUse(ctx, nodeClient, diskName, host0Name, nodeStatusTimeout, true /* shouldExist */))
   194  					ginkgo.By("deleting host0Pod") // delete this pod before creating next pod
   195  					framework.ExpectNoError(podClient.Delete(ctx, host0Pod.Name, podDelOpt), "Failed to delete host0Pod")
   196  					framework.Logf("deleted host0Pod %q", host0Pod.Name)
   197  					e2epod.WaitForPodNotFoundInNamespace(ctx, cs, host0Pod.Name, host0Pod.Namespace, f.Timeouts.PodDelete)
   198  					framework.Logf("deleted host0Pod %q disappeared", host0Pod.Name)
   199  				}
   200  
   201  				ginkgo.By("creating host1Pod on node1")
   202  				_, err = podClient.Create(ctx, host1Pod, metav1.CreateOptions{})
   203  				framework.ExpectNoError(err, "Failed to create host1Pod")
   204  				framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, host1Pod.Name, f.Namespace.Name, f.Timeouts.PodStartSlow))
   205  				framework.Logf("host1Pod: %q, node1: %q", host1Pod.Name, host1Name)
   206  
   207  				if readOnly {
   208  					ginkgo.By("deleting host0Pod")
   209  					framework.ExpectNoError(podClient.Delete(ctx, host0Pod.Name, podDelOpt), "Failed to delete host0Pod")
   210  					framework.Logf("deleted host0Pod %q", host0Pod.Name)
   211  				} else {
   212  					ginkgo.By("verifying PD contents in host1Pod")
   213  					verifyPDContentsViaContainer(ns, f, host1Pod.Name, containerName, map[string]string{testFile: testFileContents})
   214  					framework.Logf("verified PD contents in pod %q", host1Pod.Name)
   215  					ginkgo.By("verifying PD is removed from node0")
   216  					framework.ExpectNoError(waitForPDInVolumesInUse(ctx, nodeClient, diskName, host0Name, nodeStatusTimeout, false /* shouldExist */))
   217  					framework.Logf("PD %q removed from node %q's VolumeInUse list", diskName, host1Pod.Name)
   218  				}
   219  
   220  				ginkgo.By("deleting host1Pod")
   221  				framework.ExpectNoError(podClient.Delete(ctx, host1Pod.Name, podDelOpt), "Failed to delete host1Pod")
   222  				framework.Logf("deleted host1Pod %q", host1Pod.Name)
   223  
   224  				ginkgo.By("Test completed successfully, waiting for PD to detach from both nodes")
   225  				waitForPDDetach(diskName, host0Name)
   226  				waitForPDDetach(diskName, host1Name)
   227  			})
   228  		}
   229  	})
   230  
   231  	f.Context("schedule a pod w/ RW PD(s) mounted to 1 or more containers, write to PD, verify content, delete pod, and repeat in rapid succession", f.WithSlow(), func() {
   232  		type testT struct {
   233  			numContainers int
   234  			numPDs        int
   235  			repeatCnt     int
   236  		}
   237  		tests := []testT{
   238  			{
   239  				numContainers: 4,
   240  				numPDs:        1,
   241  				repeatCnt:     3,
   242  			},
   243  			{
   244  				numContainers: 1,
   245  				numPDs:        2,
   246  				repeatCnt:     3,
   247  			},
   248  		}
   249  
   250  		for _, t := range tests {
   251  			numPDs := t.numPDs
   252  			numContainers := t.numContainers
   253  			t := t
   254  
   255  			ginkgo.It(fmt.Sprintf("using %d containers and %d PDs", numContainers, numPDs), func(ctx context.Context) {
   256  				e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
   257  				var host0Pod *v1.Pod
   258  				var err error
   259  				fileAndContentToVerify := make(map[string]string)
   260  				diskNames := make([]string, 0, numPDs)
   261  
   262  				ginkgo.By(fmt.Sprintf("creating %d PD(s)", numPDs))
   263  				for i := 0; i < numPDs; i++ {
   264  					name, err := e2epv.CreatePDWithRetry(ctx)
   265  					framework.ExpectNoError(err, fmt.Sprintf("Error creating PD %d", i))
   266  					diskNames = append(diskNames, name)
   267  				}
   268  
   269  				defer func() {
   270  					// Teardown should do nothing unless test failed.
   271  					ginkgo.By("defer: cleaning up PD-RW test environment")
   272  					framework.Logf("defer cleanup errors can usually be ignored")
   273  					if host0Pod != nil {
   274  						podClient.Delete(ctx, host0Pod.Name, *metav1.NewDeleteOptions(0))
   275  					}
   276  					for _, diskName := range diskNames {
   277  						detachAndDeletePDs(ctx, diskName, []types.NodeName{host0Name})
   278  					}
   279  				}()
   280  
   281  				for i := 0; i < t.repeatCnt; i++ { // "rapid" repeat loop
   282  					framework.Logf("PD Read/Writer Iteration #%v", i)
   283  					ginkgo.By(fmt.Sprintf("creating host0Pod with %d containers on node0", numContainers))
   284  					host0Pod = testPDPod(diskNames, host0Name, false /* readOnly */, numContainers)
   285  					_, err = podClient.Create(ctx, host0Pod, metav1.CreateOptions{})
   286  					framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err))
   287  					framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, host0Pod.Name, f.Namespace.Name, f.Timeouts.PodStartSlow))
   288  
   289  					ginkgo.By(fmt.Sprintf("writing %d file(s) via a container", numPDs))
   290  					containerName := "mycontainer"
   291  					if numContainers > 1 {
   292  						containerName = fmt.Sprintf("mycontainer%v", rand.Intn(numContainers)+1)
   293  					}
   294  					for x := 1; x <= numPDs; x++ {
   295  						testFile := fmt.Sprintf("/testpd%d/tracker%d", x, i)
   296  						testFileContents := fmt.Sprintf("%v", rand.Int())
   297  						fileAndContentToVerify[testFile] = testFileContents
   298  						tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, ns)
   299  						framework.ExpectNoError(tk.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents))
   300  						framework.Logf("wrote %q to file %q in pod %q (container %q) on node %q", testFileContents, testFile, host0Pod.Name, containerName, host0Name)
   301  					}
   302  
   303  					ginkgo.By("verifying PD contents via a container")
   304  					if numContainers > 1 {
   305  						containerName = fmt.Sprintf("mycontainer%v", rand.Intn(numContainers)+1)
   306  					}
   307  					verifyPDContentsViaContainer(ns, f, host0Pod.Name, containerName, fileAndContentToVerify)
   308  
   309  					ginkgo.By("deleting host0Pod")
   310  					framework.ExpectNoError(podClient.Delete(ctx, host0Pod.Name, *metav1.NewDeleteOptions(0)), "Failed to delete host0Pod")
   311  				}
   312  				ginkgo.By(fmt.Sprintf("Test completed successfully, waiting for %d PD(s) to detach from node0", numPDs))
   313  				for _, diskName := range diskNames {
   314  					waitForPDDetach(diskName, host0Name)
   315  				}
   316  			})
   317  		}
   318  	})
   319  
   320  	f.Context("detach in a disrupted environment", f.WithSlow(), f.WithDisruptive(), func() {
   321  		const (
   322  			deleteNode    = 1 // delete physical node
   323  			deleteNodeObj = 2 // delete node's api object only
   324  			evictPod      = 3 // evict host0Pod on node0
   325  		)
   326  		type testT struct {
   327  			descr     string // It description
   328  			disruptOp int    // disruptive operation performed on target node
   329  		}
   330  		tests := []testT{
   331  			// https://github.com/kubernetes/kubernetes/issues/85972
   332  			// This test case is flawed. Disabling for now.
   333  			// {
   334  			//		descr:     "node is deleted",
   335  			//		disruptOp: deleteNode,
   336  			// },
   337  			{
   338  				descr:     "node's API object is deleted",
   339  				disruptOp: deleteNodeObj,
   340  			},
   341  			{
   342  				descr:     "pod is evicted",
   343  				disruptOp: evictPod,
   344  			},
   345  		}
   346  
   347  		for _, t := range tests {
   348  			disruptOp := t.disruptOp
   349  			ginkgo.It(fmt.Sprintf("when %s", t.descr), func(ctx context.Context) {
   350  				e2eskipper.SkipUnlessProviderIs("gce")
   351  				origNodeCnt := len(nodes.Items) // healhy nodes running kubelet
   352  
   353  				ginkgo.By("creating a pd")
   354  				diskName, err := e2epv.CreatePDWithRetry(ctx)
   355  				framework.ExpectNoError(err, "Error creating a pd")
   356  
   357  				targetNode := &nodes.Items[0] // for node delete ops
   358  				host0Pod := testPDPod([]string{diskName}, host0Name, false, 1)
   359  				containerName := "mycontainer"
   360  
   361  				ginkgo.DeferCleanup(func(ctx context.Context) {
   362  					ginkgo.By("defer: cleaning up PD-RW test env")
   363  					framework.Logf("defer cleanup errors can usually be ignored")
   364  					ginkgo.By("defer: delete host0Pod")
   365  					podClient.Delete(ctx, host0Pod.Name, *metav1.NewDeleteOptions(0))
   366  					ginkgo.By("defer: detach and delete PDs")
   367  					detachAndDeletePDs(ctx, diskName, []types.NodeName{host0Name})
   368  					if disruptOp == deleteNode || disruptOp == deleteNodeObj {
   369  						if disruptOp == deleteNodeObj {
   370  							targetNode.ObjectMeta.SetResourceVersion("0")
   371  							// need to set the resource version or else the Create() fails
   372  							ginkgo.By("defer: re-create host0 node object")
   373  							_, err := nodeClient.Create(ctx, targetNode, metav1.CreateOptions{})
   374  							framework.ExpectNoError(err, fmt.Sprintf("defer: Unable to re-create the deleted node object %q", targetNode.Name))
   375  						}
   376  						ginkgo.By("defer: verify the number of ready nodes")
   377  						numNodes := countReadyNodes(ctx, cs, host0Name)
   378  						// if this defer is reached due to an Expect then nested
   379  						// Expects are lost, so use Failf here
   380  						if numNodes != origNodeCnt {
   381  							framework.Failf("defer: Requires current node count (%d) to return to original node count (%d)", numNodes, origNodeCnt)
   382  						}
   383  					}
   384  				})
   385  
   386  				ginkgo.By("creating host0Pod on node0")
   387  				_, err = podClient.Create(ctx, host0Pod, metav1.CreateOptions{})
   388  				framework.ExpectNoError(err, fmt.Sprintf("Failed to create host0Pod: %v", err))
   389  				ginkgo.By("waiting for host0Pod to be running")
   390  				framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, host0Pod.Name, f.Namespace.Name, f.Timeouts.PodStartSlow))
   391  
   392  				ginkgo.By("writing content to host0Pod")
   393  				testFile := "/testpd1/tracker"
   394  				testFileContents := fmt.Sprintf("%v", rand.Int())
   395  				tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, ns)
   396  				framework.ExpectNoError(tk.WriteFileViaContainer(host0Pod.Name, containerName, testFile, testFileContents))
   397  				framework.Logf("wrote %q to file %q in pod %q on node %q", testFileContents, testFile, host0Pod.Name, host0Name)
   398  
   399  				ginkgo.By("verifying PD is present in node0's VolumeInUse list")
   400  				framework.ExpectNoError(waitForPDInVolumesInUse(ctx, nodeClient, diskName, host0Name, nodeStatusTimeout, true /* should exist*/))
   401  
   402  				if disruptOp == deleteNode {
   403  					ginkgo.By("getting gce instances")
   404  					gceCloud, err := gce.GetGCECloud()
   405  					framework.ExpectNoError(err, fmt.Sprintf("Unable to create gcloud client err=%v", err))
   406  					output, err := gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone)
   407  					framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output))
   408  					gomega.Expect(string(output)).Should(gomega.ContainSubstring(string(host0Name)))
   409  
   410  					ginkgo.By("deleting host0")
   411  					err = gceCloud.DeleteInstance(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone, string(host0Name))
   412  					framework.ExpectNoError(err, fmt.Sprintf("Failed to delete host0Pod: err=%v", err))
   413  					ginkgo.By("expecting host0 node to be re-created")
   414  					numNodes := countReadyNodes(ctx, cs, host0Name)
   415  					gomega.Expect(numNodes).To(gomega.Equal(origNodeCnt), fmt.Sprintf("Requires current node count (%d) to return to original node count (%d)", numNodes, origNodeCnt))
   416  					output, err = gceCloud.ListInstanceNames(framework.TestContext.CloudConfig.ProjectID, framework.TestContext.CloudConfig.Zone)
   417  					framework.ExpectNoError(err, fmt.Sprintf("Unable to get list of node instances err=%v output=%s", err, output))
   418  					gomega.Expect(string(output)).Should(gomega.ContainSubstring(string(host0Name)))
   419  
   420  				} else if disruptOp == deleteNodeObj {
   421  					ginkgo.By("deleting host0's node api object")
   422  					framework.ExpectNoError(nodeClient.Delete(ctx, string(host0Name), *metav1.NewDeleteOptions(0)), "Unable to delete host0's node object")
   423  					ginkgo.By("deleting host0Pod")
   424  					framework.ExpectNoError(podClient.Delete(ctx, host0Pod.Name, *metav1.NewDeleteOptions(0)), "Unable to delete host0Pod")
   425  
   426  				} else if disruptOp == evictPod {
   427  					evictTarget := &policyv1.Eviction{
   428  						ObjectMeta: metav1.ObjectMeta{
   429  							Name:      host0Pod.Name,
   430  							Namespace: ns,
   431  						},
   432  					}
   433  					ginkgo.By("evicting host0Pod")
   434  					err = wait.PollImmediate(framework.Poll, podEvictTimeout, func() (bool, error) {
   435  						if err := cs.CoreV1().Pods(ns).EvictV1(ctx, evictTarget); err != nil {
   436  							framework.Logf("Failed to evict host0Pod, ignoring error: %v", err)
   437  							return false, nil
   438  						}
   439  						return true, nil
   440  					})
   441  					framework.ExpectNoError(err, "failed to evict host0Pod after %v", podEvictTimeout)
   442  				}
   443  
   444  				ginkgo.By("waiting for pd to detach from host0")
   445  				waitForPDDetach(diskName, host0Name)
   446  			})
   447  		}
   448  	})
   449  
   450  	ginkgo.It("should be able to delete a non-existent PD without error", func(ctx context.Context) {
   451  		e2eskipper.SkipUnlessProviderIs("gce")
   452  
   453  		ginkgo.By("delete a PD")
   454  		framework.ExpectNoError(e2epv.DeletePDWithRetry(ctx, "non-exist"))
   455  	})
   456  
   457  	// This test is marked to run as serial so as device selection on AWS does not
   458  	// conflict with other concurrent attach operations.
   459  	f.It(f.WithSerial(), "attach on previously attached volumes should work", func(ctx context.Context) {
   460  		e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
   461  		ginkgo.By("creating PD")
   462  		diskName, err := e2epv.CreatePDWithRetry(ctx)
   463  		framework.ExpectNoError(err, "Error creating PD")
   464  
   465  		// this should be safe to do because if attach fails then detach will be considered
   466  		// successful and we will delete the volume.
   467  		ginkgo.DeferCleanup(detachAndDeletePDs, diskName, []types.NodeName{host0Name})
   468  
   469  		ginkgo.By("Attaching volume to a node")
   470  		err = attachPD(host0Name, diskName)
   471  		framework.ExpectNoError(err, "Error attaching PD")
   472  
   473  		pod := testPDPod([]string{diskName}, host0Name /*readOnly*/, false, 1)
   474  		ginkgo.By("Creating test pod with same volume")
   475  		_, err = podClient.Create(ctx, pod, metav1.CreateOptions{})
   476  		framework.ExpectNoError(err, "Failed to create pod")
   477  		framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name, f.Timeouts.PodStartSlow))
   478  
   479  		ginkgo.By("deleting the pod")
   480  		framework.ExpectNoError(podClient.Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0)), "Failed to delete pod")
   481  		framework.Logf("deleted pod %q", pod.Name)
   482  		ginkgo.By("waiting for PD to detach")
   483  		framework.ExpectNoError(waitForPDDetach(diskName, host0Name))
   484  	})
   485  })
   486  
   487  func countReadyNodes(ctx context.Context, c clientset.Interface, hostName types.NodeName) int {
   488  	e2enode.WaitForNodeToBeReady(ctx, c, string(hostName), nodeStatusTimeout)
   489  	e2enode.WaitForAllNodesSchedulable(ctx, c, nodeStatusTimeout)
   490  	nodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
   491  	framework.ExpectNoError(err)
   492  	return len(nodes.Items)
   493  }
   494  
   495  func verifyPDContentsViaContainer(namespace string, f *framework.Framework, podName, containerName string, fileAndContentToVerify map[string]string) {
   496  	for filePath, expectedContents := range fileAndContentToVerify {
   497  		// No retry loop as there should not be temporal based failures
   498  		tk := e2ekubectl.NewTestKubeconfig(framework.TestContext.CertDir, framework.TestContext.Host, framework.TestContext.KubeConfig, framework.TestContext.KubeContext, framework.TestContext.KubectlPath, namespace)
   499  		v, err := tk.ReadFileViaContainer(podName, containerName, filePath)
   500  		framework.ExpectNoError(err, "Error reading file %s via container %s", filePath, containerName)
   501  		framework.Logf("Read file %q with content: %v", filePath, v)
   502  		if strings.TrimSpace(v) != strings.TrimSpace(expectedContents) {
   503  			framework.Failf("Read content <%q> does not match execpted content <%q>.", v, expectedContents)
   504  		}
   505  	}
   506  }
   507  
   508  // TODO: move detachPD to standard cloudprovider functions so as these tests can run on other cloudproviders too
   509  func detachPD(nodeName types.NodeName, pdName string) error {
   510  	if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
   511  		gceCloud, err := gce.GetGCECloud()
   512  		if err != nil {
   513  			return err
   514  		}
   515  		err = gceCloud.DetachDisk(pdName, nodeName)
   516  		if err != nil {
   517  			if gerr, ok := err.(*googleapi.Error); ok && strings.Contains(gerr.Message, "Invalid value for field 'disk'") {
   518  				// PD already detached, ignore error.
   519  				return nil
   520  			}
   521  			framework.Logf("Error detaching PD %q: %v", pdName, err)
   522  		}
   523  		return err
   524  
   525  	} else {
   526  		return fmt.Errorf("Provider does not support volume detaching")
   527  	}
   528  }
   529  
   530  // TODO: move attachPD to standard cloudprovider functions so as these tests can run on other cloudproviders too
   531  func attachPD(nodeName types.NodeName, pdName string) error {
   532  	if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
   533  		gceCloud, err := gce.GetGCECloud()
   534  		if err != nil {
   535  			return err
   536  		}
   537  		err = gceCloud.AttachDisk(pdName, nodeName, false /*readOnly*/, false /*regional*/)
   538  		if err != nil {
   539  			framework.Logf("Error attaching PD %q: %v", pdName, err)
   540  		}
   541  		return err
   542  
   543  	} else {
   544  		return fmt.Errorf("Provider does not support volume attaching")
   545  	}
   546  }
   547  
   548  // Returns pod spec suitable for api Create call. Handles gce, gke and aws providers only and
   549  // escapes if a different provider is supplied.
   550  // The first container name is hard-coded to "mycontainer". Subsequent containers are named:
   551  // "mycontainer<number> where <number> is 1..numContainers. Note if there is only one container it's
   552  // name has no number.
   553  // Container's volumeMounts are hard-coded to "/testpd<number>" where <number> is 1..len(diskNames).
   554  func testPDPod(diskNames []string, targetNode types.NodeName, readOnly bool, numContainers int) *v1.Pod {
   555  	// escape if not a supported provider
   556  	if !(framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" ||
   557  		framework.TestContext.Provider == "aws") {
   558  		framework.Failf(fmt.Sprintf("func `testPDPod` only supports gce, gke, and aws providers, not %v", framework.TestContext.Provider))
   559  	}
   560  
   561  	containers := make([]v1.Container, numContainers)
   562  	for i := range containers {
   563  		containers[i].Name = "mycontainer"
   564  		if numContainers > 1 {
   565  			containers[i].Name = fmt.Sprintf("mycontainer%v", i+1)
   566  		}
   567  		containers[i].Image = e2epod.GetTestImage(imageutils.BusyBox)
   568  		containers[i].Command = []string{"sleep", "6000"}
   569  		containers[i].VolumeMounts = make([]v1.VolumeMount, len(diskNames))
   570  		for k := range diskNames {
   571  			containers[i].VolumeMounts[k].Name = fmt.Sprintf("testpd%v", k+1)
   572  			containers[i].VolumeMounts[k].MountPath = fmt.Sprintf("/testpd%v", k+1)
   573  		}
   574  		containers[i].Resources.Limits = v1.ResourceList{}
   575  		containers[i].Resources.Limits[v1.ResourceCPU] = *resource.NewQuantity(int64(0), resource.DecimalSI)
   576  	}
   577  
   578  	pod := &v1.Pod{
   579  		TypeMeta: metav1.TypeMeta{
   580  			Kind:       "Pod",
   581  			APIVersion: "v1",
   582  		},
   583  		ObjectMeta: metav1.ObjectMeta{
   584  			Name: "pd-test-" + string(uuid.NewUUID()),
   585  		},
   586  		Spec: v1.PodSpec{
   587  			Containers: containers,
   588  			NodeName:   string(targetNode),
   589  		},
   590  	}
   591  
   592  	pod.Spec.Volumes = make([]v1.Volume, len(diskNames))
   593  	for k, diskName := range diskNames {
   594  		pod.Spec.Volumes[k].Name = fmt.Sprintf("testpd%v", k+1)
   595  		if framework.TestContext.Provider == "aws" {
   596  			pod.Spec.Volumes[k].VolumeSource = v1.VolumeSource{
   597  				AWSElasticBlockStore: &v1.AWSElasticBlockStoreVolumeSource{
   598  					VolumeID: diskName,
   599  					FSType:   "ext4",
   600  					ReadOnly: readOnly,
   601  				},
   602  			}
   603  		} else { // "gce" or "gke"
   604  			pod.Spec.Volumes[k].VolumeSource = v1.VolumeSource{
   605  				GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{
   606  					PDName:   diskName,
   607  					FSType:   e2epv.GetDefaultFSType(),
   608  					ReadOnly: readOnly,
   609  				},
   610  			}
   611  		}
   612  	}
   613  	return pod
   614  }
   615  
   616  // Waits for specified PD to detach from specified hostName
   617  func waitForPDDetach(diskName string, nodeName types.NodeName) error {
   618  	if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
   619  		framework.Logf("Waiting for GCE PD %q to detach from node %q.", diskName, nodeName)
   620  		gceCloud, err := gce.GetGCECloud()
   621  		if err != nil {
   622  			return err
   623  		}
   624  		for start := time.Now(); time.Since(start) < gcePDDetachTimeout; time.Sleep(gcePDDetachPollTime) {
   625  			diskAttached, err := gceCloud.DiskIsAttached(diskName, nodeName)
   626  			if err != nil {
   627  				framework.Logf("Error waiting for PD %q to detach from node %q. 'DiskIsAttached(...)' failed with %v", diskName, nodeName, err)
   628  				return err
   629  			}
   630  			if !diskAttached {
   631  				// Specified disk does not appear to be attached to specified node
   632  				framework.Logf("GCE PD %q appears to have successfully detached from %q.", diskName, nodeName)
   633  				return nil
   634  			}
   635  			framework.Logf("Waiting for GCE PD %q to detach from %q.", diskName, nodeName)
   636  		}
   637  		return fmt.Errorf("Gave up waiting for GCE PD %q to detach from %q after %v", diskName, nodeName, gcePDDetachTimeout)
   638  	}
   639  	return nil
   640  }
   641  
   642  func detachAndDeletePDs(ctx context.Context, diskName string, hosts []types.NodeName) {
   643  	for _, host := range hosts {
   644  		framework.Logf("Detaching GCE PD %q from node %q.", diskName, host)
   645  		detachPD(host, diskName)
   646  		ginkgo.By(fmt.Sprintf("Waiting for PD %q to detach from %q", diskName, host))
   647  		waitForPDDetach(diskName, host)
   648  	}
   649  	ginkgo.By(fmt.Sprintf("Deleting PD %q", diskName))
   650  	framework.ExpectNoError(e2epv.DeletePDWithRetry(ctx, diskName))
   651  }
   652  
   653  func waitForPDInVolumesInUse(
   654  	ctx context.Context,
   655  	nodeClient v1core.NodeInterface,
   656  	diskName string,
   657  	nodeName types.NodeName,
   658  	timeout time.Duration,
   659  	shouldExist bool) error {
   660  	logStr := "to contain"
   661  	if !shouldExist {
   662  		logStr = "to NOT contain"
   663  	}
   664  	framework.Logf("Waiting for node %s's VolumesInUse Status %s PD %q", nodeName, logStr, diskName)
   665  	for start := time.Now(); time.Since(start) < timeout; time.Sleep(nodeStatusPollTime) {
   666  		nodeObj, err := nodeClient.Get(ctx, string(nodeName), metav1.GetOptions{})
   667  		if err != nil || nodeObj == nil {
   668  			framework.Logf("Failed to fetch node object %q from API server. err=%v", nodeName, err)
   669  			continue
   670  		}
   671  		exists := false
   672  		for _, volumeInUse := range nodeObj.Status.VolumesInUse {
   673  			volumeInUseStr := string(volumeInUse)
   674  			if strings.Contains(volumeInUseStr, diskName) {
   675  				if shouldExist {
   676  					framework.Logf("Found PD %q in node %q's VolumesInUse Status: %q", diskName, nodeName, volumeInUseStr)
   677  					return nil
   678  				}
   679  				exists = true
   680  			}
   681  		}
   682  		if !shouldExist && !exists {
   683  			framework.Logf("Verified PD %q does not exist in node %q's VolumesInUse Status.", diskName, nodeName)
   684  			return nil
   685  		}
   686  	}
   687  	return fmt.Errorf("Timed out waiting for node %s VolumesInUse Status %s diskName %q", nodeName, logStr, diskName)
   688  }