k8s.io/kubernetes@v1.29.3/test/e2e/cloud/gcp/node_lease.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package gcp
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  	"time"
    24  
    25  	v1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	clientset "k8s.io/client-go/kubernetes"
    28  	"k8s.io/kubernetes/test/e2e/framework"
    29  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    30  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    31  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    32  	admissionapi "k8s.io/pod-security-admission/api"
    33  
    34  	"github.com/onsi/ginkgo/v2"
    35  	"github.com/onsi/gomega"
    36  )
    37  
    38  var _ = SIGDescribe(framework.WithDisruptive(), "NodeLease", func() {
    39  	f := framework.NewDefaultFramework("node-lease-test")
    40  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    41  	var systemPodsNo int32
    42  	var c clientset.Interface
    43  	var ns string
    44  	var group string
    45  
    46  	ginkgo.BeforeEach(func(ctx context.Context) {
    47  		c = f.ClientSet
    48  		ns = f.Namespace.Name
    49  		systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{})
    50  		framework.ExpectNoError(err)
    51  		systemPodsNo = int32(len(systemPods))
    52  		if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
    53  			framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
    54  		} else {
    55  			group = framework.TestContext.CloudConfig.NodeInstanceGroup
    56  		}
    57  	})
    58  
    59  	ginkgo.Describe("NodeLease deletion", func() {
    60  		var skipped bool
    61  
    62  		ginkgo.BeforeEach(func() {
    63  			skipped = true
    64  			e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
    65  			e2eskipper.SkipUnlessNodeCountIsAtLeast(2)
    66  			skipped = false
    67  		})
    68  
    69  		ginkgo.AfterEach(func(ctx context.Context) {
    70  			if skipped {
    71  				return
    72  			}
    73  
    74  			ginkgo.By("restoring the original node instance group size")
    75  			if err := framework.ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
    76  				framework.Failf("Couldn't restore the original node instance group size: %v", err)
    77  			}
    78  			// In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a
    79  			// rebooted/deleted node) for up to 5 minutes before all tunnels are dropped and recreated.
    80  			// Most tests make use of some proxy feature to verify functionality. So, if a reboot test runs
    81  			// right before a test that tries to get logs, for example, we may get unlucky and try to use a
    82  			// closed tunnel to a node that was recently rebooted. There's no good way to framework.Poll for proxies
    83  			// being closed, so we sleep.
    84  			//
    85  			// TODO(cjcullen) reduce this sleep (#19314)
    86  			if framework.ProviderIs("gke") {
    87  				ginkgo.By("waiting 5 minutes for all dead tunnels to be dropped")
    88  				time.Sleep(5 * time.Minute)
    89  			}
    90  			if err := framework.WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
    91  				framework.Failf("Couldn't restore the original node instance group size: %v", err)
    92  			}
    93  
    94  			if err := e2enode.WaitForReadyNodes(ctx, c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
    95  				framework.Failf("Couldn't restore the original cluster size: %v", err)
    96  			}
    97  			// Many e2e tests assume that the cluster is fully healthy before they start.  Wait until
    98  			// the cluster is restored to health.
    99  			ginkgo.By("waiting for system pods to successfully restart")
   100  			err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout)
   101  			framework.ExpectNoError(err)
   102  		})
   103  
   104  		ginkgo.It("node lease should be deleted when corresponding node is deleted", func(ctx context.Context) {
   105  			leaseClient := c.CoordinationV1().Leases(v1.NamespaceNodeLease)
   106  			err := e2enode.WaitForReadyNodes(ctx, c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute)
   107  			framework.ExpectNoError(err)
   108  
   109  			ginkgo.By("verify node lease exists for every nodes")
   110  			originalNodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
   111  			framework.ExpectNoError(err)
   112  			gomega.Expect(originalNodes.Items).To(gomega.HaveLen(framework.TestContext.CloudConfig.NumNodes))
   113  
   114  			gomega.Eventually(ctx, func() error {
   115  				pass := true
   116  				for _, node := range originalNodes.Items {
   117  					if _, err := leaseClient.Get(ctx, node.ObjectMeta.Name, metav1.GetOptions{}); err != nil {
   118  						framework.Logf("Try to get lease of node %s, but got error: %v", node.ObjectMeta.Name, err)
   119  						pass = false
   120  					}
   121  				}
   122  				if pass {
   123  					return nil
   124  				}
   125  				return fmt.Errorf("some node lease is not ready")
   126  			}, 1*time.Minute, 5*time.Second).Should(gomega.BeNil())
   127  
   128  			targetNumNodes := int32(framework.TestContext.CloudConfig.NumNodes - 1)
   129  			ginkgo.By(fmt.Sprintf("decreasing cluster size to %d", targetNumNodes))
   130  			err = framework.ResizeGroup(group, targetNumNodes)
   131  			framework.ExpectNoError(err)
   132  			err = framework.WaitForGroupSize(group, targetNumNodes)
   133  			framework.ExpectNoError(err)
   134  			err = e2enode.WaitForReadyNodes(ctx, c, framework.TestContext.CloudConfig.NumNodes-1, 10*time.Minute)
   135  			framework.ExpectNoError(err)
   136  			targetNodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
   137  			framework.ExpectNoError(err)
   138  			gomega.Expect(targetNodes.Items).To(gomega.HaveLen(int(targetNumNodes)))
   139  
   140  			ginkgo.By("verify node lease is deleted for the deleted node")
   141  			var deletedNodeName string
   142  			for _, originalNode := range originalNodes.Items {
   143  				originalNodeName := originalNode.ObjectMeta.Name
   144  				for _, targetNode := range targetNodes.Items {
   145  					if originalNodeName == targetNode.ObjectMeta.Name {
   146  						continue
   147  					}
   148  				}
   149  				deletedNodeName = originalNodeName
   150  				break
   151  			}
   152  			framework.ExpectNotEqual(deletedNodeName, "")
   153  			gomega.Eventually(ctx, func() error {
   154  				if _, err := leaseClient.Get(ctx, deletedNodeName, metav1.GetOptions{}); err == nil {
   155  					return fmt.Errorf("node lease is not deleted yet for node %q", deletedNodeName)
   156  				}
   157  				return nil
   158  			}, 1*time.Minute, 5*time.Second).Should(gomega.BeNil())
   159  
   160  			ginkgo.By("verify node leases still exist for remaining nodes")
   161  			gomega.Eventually(ctx, func() error {
   162  				for _, node := range targetNodes.Items {
   163  					if _, err := leaseClient.Get(ctx, node.ObjectMeta.Name, metav1.GetOptions{}); err != nil {
   164  						return err
   165  					}
   166  				}
   167  				return nil
   168  			}, 1*time.Minute, 5*time.Second).Should(gomega.BeNil())
   169  		})
   170  	})
   171  })