k8s.io/kubernetes@v1.29.3/test/e2e/cloud/gcp/resize_nodes.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package gcp
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  	"time"
    24  
    25  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    26  	clientset "k8s.io/client-go/kubernetes"
    27  	"k8s.io/kubernetes/test/e2e/common"
    28  	"k8s.io/kubernetes/test/e2e/framework"
    29  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    30  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    31  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    32  	admissionapi "k8s.io/pod-security-admission/api"
    33  
    34  	"github.com/onsi/ginkgo/v2"
    35  )
    36  
    37  func resizeRC(ctx context.Context, c clientset.Interface, ns, name string, replicas int32) error {
    38  	rc, err := c.CoreV1().ReplicationControllers(ns).Get(ctx, name, metav1.GetOptions{})
    39  	if err != nil {
    40  		return err
    41  	}
    42  	*(rc.Spec.Replicas) = replicas
    43  	_, err = c.CoreV1().ReplicationControllers(rc.Namespace).Update(ctx, rc, metav1.UpdateOptions{})
    44  	return err
    45  }
    46  
    47  var _ = SIGDescribe("Nodes", framework.WithDisruptive(), func() {
    48  	f := framework.NewDefaultFramework("resize-nodes")
    49  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    50  	var systemPodsNo int32
    51  	var c clientset.Interface
    52  	var ns string
    53  	var group string
    54  
    55  	ginkgo.BeforeEach(func(ctx context.Context) {
    56  		c = f.ClientSet
    57  		ns = f.Namespace.Name
    58  		systemPods, err := e2epod.GetPodsInNamespace(ctx, c, ns, map[string]string{})
    59  		framework.ExpectNoError(err)
    60  		systemPodsNo = int32(len(systemPods))
    61  		if strings.Contains(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
    62  			framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
    63  		} else {
    64  			group = framework.TestContext.CloudConfig.NodeInstanceGroup
    65  		}
    66  	})
    67  
    68  	// Slow issue #13323 (8 min)
    69  	f.Describe("Resize", framework.WithSlow(), func() {
    70  		var originalNodeCount int32
    71  
    72  		ginkgo.BeforeEach(func() {
    73  			e2eskipper.SkipUnlessProviderIs("gce", "gke")
    74  			e2eskipper.SkipUnlessNodeCountIsAtLeast(2)
    75  			ginkgo.DeferCleanup(func(ctx context.Context) {
    76  				ginkgo.By("restoring the original node instance group size")
    77  				if err := framework.ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
    78  					framework.Failf("Couldn't restore the original node instance group size: %v", err)
    79  				}
    80  				// In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a
    81  				// rebooted/deleted node) for up to 5 minutes before all tunnels are dropped and recreated.
    82  				// Most tests make use of some proxy feature to verify functionality. So, if a reboot test runs
    83  				// right before a test that tries to get logs, for example, we may get unlucky and try to use a
    84  				// closed tunnel to a node that was recently rebooted. There's no good way to framework.Poll for proxies
    85  				// being closed, so we sleep.
    86  				//
    87  				// TODO(cjcullen) reduce this sleep (#19314)
    88  				if framework.ProviderIs("gke") {
    89  					ginkgo.By("waiting 5 minutes for all dead tunnels to be dropped")
    90  					time.Sleep(5 * time.Minute)
    91  				}
    92  				if err := framework.WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
    93  					framework.Failf("Couldn't restore the original node instance group size: %v", err)
    94  				}
    95  
    96  				if err := e2enode.WaitForReadyNodes(ctx, c, int(originalNodeCount), 10*time.Minute); err != nil {
    97  					framework.Failf("Couldn't restore the original cluster size: %v", err)
    98  				}
    99  				// Many e2e tests assume that the cluster is fully healthy before they start.  Wait until
   100  				// the cluster is restored to health.
   101  				ginkgo.By("waiting for system pods to successfully restart")
   102  				err := e2epod.WaitForPodsRunningReady(ctx, c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout)
   103  				framework.ExpectNoError(err)
   104  			})
   105  		})
   106  
   107  		ginkgo.It("should be able to delete nodes", func(ctx context.Context) {
   108  			// Create a replication controller for a service that serves its hostname.
   109  			// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
   110  			name := "my-hostname-delete-node"
   111  			numNodes, err := e2enode.TotalRegistered(ctx, c)
   112  			framework.ExpectNoError(err)
   113  			originalNodeCount = int32(numNodes)
   114  			common.NewRCByName(c, ns, name, originalNodeCount, nil, nil)
   115  			err = e2epod.VerifyPods(ctx, c, ns, name, true, originalNodeCount)
   116  			framework.ExpectNoError(err)
   117  
   118  			targetNumNodes := int32(framework.TestContext.CloudConfig.NumNodes - 1)
   119  			ginkgo.By(fmt.Sprintf("decreasing cluster size to %d", targetNumNodes))
   120  			err = framework.ResizeGroup(group, targetNumNodes)
   121  			framework.ExpectNoError(err)
   122  			err = framework.WaitForGroupSize(group, targetNumNodes)
   123  			framework.ExpectNoError(err)
   124  			err = e2enode.WaitForReadyNodes(ctx, c, int(originalNodeCount-1), 10*time.Minute)
   125  			framework.ExpectNoError(err)
   126  
   127  			ginkgo.By("waiting 2 minutes for the watch in the podGC to catch up, remove any pods scheduled on " +
   128  				"the now non-existent node and the RC to recreate it")
   129  			time.Sleep(f.Timeouts.PodStartShort)
   130  
   131  			ginkgo.By("verifying whether the pods from the removed node are recreated")
   132  			err = e2epod.VerifyPods(ctx, c, ns, name, true, originalNodeCount)
   133  			framework.ExpectNoError(err)
   134  		})
   135  
   136  		// TODO: Bug here - testName is not correct
   137  		ginkgo.It("should be able to add nodes", func(ctx context.Context) {
   138  			// Create a replication controller for a service that serves its hostname.
   139  			// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
   140  			name := "my-hostname-add-node"
   141  			common.NewSVCByName(c, ns, name)
   142  			numNodes, err := e2enode.TotalRegistered(ctx, c)
   143  			framework.ExpectNoError(err)
   144  			originalNodeCount = int32(numNodes)
   145  			common.NewRCByName(c, ns, name, originalNodeCount, nil, nil)
   146  			err = e2epod.VerifyPods(ctx, c, ns, name, true, originalNodeCount)
   147  			framework.ExpectNoError(err)
   148  
   149  			targetNumNodes := int32(framework.TestContext.CloudConfig.NumNodes + 1)
   150  			ginkgo.By(fmt.Sprintf("increasing cluster size to %d", targetNumNodes))
   151  			err = framework.ResizeGroup(group, targetNumNodes)
   152  			framework.ExpectNoError(err)
   153  			err = framework.WaitForGroupSize(group, targetNumNodes)
   154  			framework.ExpectNoError(err)
   155  			err = e2enode.WaitForReadyNodes(ctx, c, int(originalNodeCount+1), 10*time.Minute)
   156  			framework.ExpectNoError(err)
   157  
   158  			ginkgo.By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", originalNodeCount+1))
   159  			err = resizeRC(ctx, c, ns, name, originalNodeCount+1)
   160  			framework.ExpectNoError(err)
   161  			err = e2epod.VerifyPods(ctx, c, ns, name, true, originalNodeCount+1)
   162  			framework.ExpectNoError(err)
   163  		})
   164  	})
   165  })