k8s.io/kubernetes@v1.29.3/test/e2e/storage/vsphere/vsphere_volume_node_poweroff.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vsphere
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/onsi/ginkgo/v2"
    25  	"github.com/onsi/gomega"
    26  	"github.com/vmware/govmomi/object"
    27  	vimtypes "github.com/vmware/govmomi/vim25/types"
    28  
    29  	appsv1 "k8s.io/api/apps/v1"
    30  	v1 "k8s.io/api/core/v1"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	"k8s.io/apimachinery/pkg/util/wait"
    33  	clientset "k8s.io/client-go/kubernetes"
    34  	"k8s.io/kubernetes/test/e2e/feature"
    35  	"k8s.io/kubernetes/test/e2e/framework"
    36  	e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment"
    37  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    38  	e2epv "k8s.io/kubernetes/test/e2e/framework/pv"
    39  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    40  	"k8s.io/kubernetes/test/e2e/storage/utils"
    41  	admissionapi "k8s.io/pod-security-admission/api"
    42  )
    43  
    44  /*
    45  Test to verify volume status after node power off:
    46  1. Verify the pod got provisioned on a different node with volume attached to it
    47  2. Verify the volume is detached from the powered off node
    48  */
    49  var _ = utils.SIGDescribe("Node Poweroff", feature.Vsphere, framework.WithSlow(), framework.WithDisruptive(), func() {
    50  	f := framework.NewDefaultFramework("node-poweroff")
    51  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    52  	var (
    53  		client    clientset.Interface
    54  		namespace string
    55  	)
    56  
    57  	ginkgo.BeforeEach(func(ctx context.Context) {
    58  		e2eskipper.SkipUnlessProviderIs("vsphere")
    59  		Bootstrap(f)
    60  		client = f.ClientSet
    61  		namespace = f.Namespace.Name
    62  		framework.ExpectNoError(e2enode.WaitForAllNodesSchedulable(ctx, client, f.Timeouts.NodeSchedulable))
    63  		nodeList, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet)
    64  		framework.ExpectNoError(err)
    65  		if len(nodeList.Items) < 2 {
    66  			framework.Failf("At least 2 nodes are required for this test, got instead: %v", len(nodeList.Items))
    67  		}
    68  	})
    69  
    70  	/*
    71  		Steps:
    72  		1. Create a StorageClass
    73  		2. Create a PVC with the StorageClass
    74  		3. Create a Deployment with 1 replica, using the PVC
    75  		4. Verify the pod got provisioned on a node
    76  		5. Verify the volume is attached to the node
    77  		6. Power off the node where pod got provisioned
    78  		7. Verify the pod got provisioned on a different node
    79  		8. Verify the volume is attached to the new node
    80  		9. Verify the volume is detached from the old node
    81  		10. Delete the Deployment and wait for the volume to be detached
    82  		11. Delete the PVC
    83  		12. Delete the StorageClass
    84  	*/
    85  	ginkgo.It("verify volume status after node power off", func(ctx context.Context) {
    86  		ginkgo.By("Creating a Storage Class")
    87  		storageClassSpec := getVSphereStorageClassSpec("test-sc", nil, nil, "")
    88  		storageclass, err := client.StorageV1().StorageClasses().Create(ctx, storageClassSpec, metav1.CreateOptions{})
    89  		framework.ExpectNoError(err, fmt.Sprintf("Failed to create storage class with err: %v", err))
    90  		ginkgo.DeferCleanup(framework.IgnoreNotFound(client.StorageV1().StorageClasses().Delete), storageclass.Name, metav1.DeleteOptions{})
    91  
    92  		ginkgo.By("Creating PVC using the Storage Class")
    93  		pvclaimSpec := getVSphereClaimSpecWithStorageClass(namespace, "1Gi", storageclass)
    94  		pvclaim, err := e2epv.CreatePVC(ctx, client, namespace, pvclaimSpec)
    95  		framework.ExpectNoError(err, fmt.Sprintf("Failed to create PVC with err: %v", err))
    96  		ginkgo.DeferCleanup(e2epv.DeletePersistentVolumeClaim, client, pvclaim.Name, namespace)
    97  
    98  		ginkgo.By("Waiting for PVC to be in bound phase")
    99  		pvclaims := []*v1.PersistentVolumeClaim{pvclaim}
   100  		pvs, err := e2epv.WaitForPVClaimBoundPhase(ctx, client, pvclaims, f.Timeouts.ClaimProvision)
   101  		framework.ExpectNoError(err, fmt.Sprintf("Failed to wait until PVC phase set to bound: %v", err))
   102  		volumePath := pvs[0].Spec.VsphereVolume.VolumePath
   103  
   104  		ginkgo.By("Creating a Deployment")
   105  		deployment, err := e2edeployment.CreateDeployment(ctx, client, int32(1), map[string]string{"test": "app"}, nil, namespace, pvclaims, admissionapi.LevelRestricted, "")
   106  		framework.ExpectNoError(err, fmt.Sprintf("Failed to create Deployment with err: %v", err))
   107  		ginkgo.DeferCleanup(framework.IgnoreNotFound(client.AppsV1().Deployments(namespace).Delete), deployment.Name, metav1.DeleteOptions{})
   108  
   109  		ginkgo.By("Get pod from the deployment")
   110  		podList, err := e2edeployment.GetPodsForDeployment(ctx, client, deployment)
   111  		framework.ExpectNoError(err, fmt.Sprintf("Failed to get pod from the deployment with err: %v", err))
   112  		gomega.Expect(podList.Items).NotTo(gomega.BeEmpty())
   113  		pod := podList.Items[0]
   114  		node1 := pod.Spec.NodeName
   115  
   116  		ginkgo.By(fmt.Sprintf("Verify disk is attached to the node: %v", node1))
   117  		isAttached, err := diskIsAttached(ctx, volumePath, node1)
   118  		framework.ExpectNoError(err)
   119  		if !isAttached {
   120  			framework.Failf("Volume: %s is not attached to the node: %v", volumePath, node1)
   121  		}
   122  
   123  		ginkgo.By(fmt.Sprintf("Power off the node: %v", node1))
   124  
   125  		nodeInfo := TestContext.NodeMapper.GetNodeInfo(node1)
   126  		vm := object.NewVirtualMachine(nodeInfo.VSphere.Client.Client, nodeInfo.VirtualMachineRef)
   127  		_, err = vm.PowerOff(ctx)
   128  		framework.ExpectNoError(err)
   129  		ginkgo.DeferCleanup(vm.PowerOn)
   130  
   131  		err = vm.WaitForPowerState(ctx, vimtypes.VirtualMachinePowerStatePoweredOff)
   132  		framework.ExpectNoError(err, "Unable to power off the node")
   133  
   134  		// Waiting for the pod to be failed over to a different node
   135  		node2, err := waitForPodToFailover(ctx, client, deployment, node1)
   136  		framework.ExpectNoError(err, "Pod did not fail over to a different node")
   137  
   138  		ginkgo.By(fmt.Sprintf("Waiting for disk to be attached to the new node: %v", node2))
   139  		err = waitForVSphereDiskToAttach(ctx, volumePath, node2)
   140  		framework.ExpectNoError(err, "Disk is not attached to the node")
   141  
   142  		ginkgo.By(fmt.Sprintf("Waiting for disk to be detached from the previous node: %v", node1))
   143  		err = waitForVSphereDiskToDetach(ctx, volumePath, node1)
   144  		framework.ExpectNoError(err, "Disk is not detached from the node")
   145  
   146  		ginkgo.By(fmt.Sprintf("Power on the previous node: %v", node1))
   147  		vm.PowerOn(ctx)
   148  		err = vm.WaitForPowerState(ctx, vimtypes.VirtualMachinePowerStatePoweredOn)
   149  		framework.ExpectNoError(err, "Unable to power on the node")
   150  	})
   151  })
   152  
   153  // Wait until the pod failed over to a different node, or time out after 3 minutes
   154  func waitForPodToFailover(ctx context.Context, client clientset.Interface, deployment *appsv1.Deployment, oldNode string) (string, error) {
   155  	var (
   156  		timeout  = 3 * time.Minute
   157  		pollTime = 10 * time.Second
   158  	)
   159  
   160  	waitErr := wait.PollWithContext(ctx, pollTime, timeout, func(ctx context.Context) (bool, error) {
   161  		currentNode, err := getNodeForDeployment(ctx, client, deployment)
   162  		if err != nil {
   163  			return true, err
   164  		}
   165  
   166  		if currentNode != oldNode {
   167  			framework.Logf("The pod has been failed over from %q to %q", oldNode, currentNode)
   168  			return true, nil
   169  		}
   170  
   171  		framework.Logf("Waiting for pod to be failed over from %q", oldNode)
   172  		return false, nil
   173  	})
   174  
   175  	if waitErr != nil {
   176  		if wait.Interrupted(waitErr) {
   177  			return "", fmt.Errorf("pod has not failed over after %v: %v", timeout, waitErr)
   178  		}
   179  		return "", fmt.Errorf("pod did not fail over from %q: %v", oldNode, waitErr)
   180  	}
   181  
   182  	return getNodeForDeployment(ctx, client, deployment)
   183  }
   184  
   185  // getNodeForDeployment returns node name for the Deployment
   186  func getNodeForDeployment(ctx context.Context, client clientset.Interface, deployment *appsv1.Deployment) (string, error) {
   187  	podList, err := e2edeployment.GetPodsForDeployment(ctx, client, deployment)
   188  	if err != nil {
   189  		return "", err
   190  	}
   191  	return podList.Items[0].Spec.NodeName, nil
   192  }