k8s.io/kubernetes@v1.29.3/test/e2e/autoscaling/autoscaling_timer.go

k8s.io/kubernetes@v1.29.3/test/e2e/autoscaling/autoscaling_timer.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package autoscaling
    18  
    19  import (
    20  	"context"
    21  	"strings"
    22  	"time"
    23  
    24  	v1 "k8s.io/api/core/v1"
    25  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    26  	"k8s.io/kubernetes/test/e2e/feature"
    27  	"k8s.io/kubernetes/test/e2e/framework"
    28  	e2eautoscaling "k8s.io/kubernetes/test/e2e/framework/autoscaling"
    29  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    30  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    31  	admissionapi "k8s.io/pod-security-admission/api"
    32  
    33  	"github.com/onsi/ginkgo/v2"
    34  	"github.com/onsi/gomega"
    35  	"github.com/onsi/gomega/gmeasure"
    36  )
    37  
    38  var _ = SIGDescribe(feature.ClusterSizeAutoscalingScaleUp, framework.WithSlow(), "Autoscaling", func() {
    39  	f := framework.NewDefaultFramework("autoscaling")
    40  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    41  	var experiment *gmeasure.Experiment
    42  
    43  	ginkgo.Describe("Autoscaling a service", func() {
    44  		ginkgo.BeforeEach(func(ctx context.Context) {
    45  			// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap.
    46  			_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get(ctx, "cluster-autoscaler-status", metav1.GetOptions{})
    47  			if err != nil {
    48  				e2eskipper.Skipf("test expects Cluster Autoscaler to be enabled")
    49  			}
    50  			experiment = gmeasure.NewExperiment("Autoscaling a service")
    51  			ginkgo.AddReportEntry(experiment.Name, experiment)
    52  		})
    53  
    54  		ginkgo.Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() {
    55  			const nodesNum = 3       // Expect there to be 3 nodes before and after the test.
    56  			var nodeGroupName string // Set by BeforeEach, used by AfterEach to scale this node group down after the test.
    57  			var nodes *v1.NodeList   // Set by BeforeEach, used by Measure to calculate CPU request based on node's sizes.
    58  
    59  			ginkgo.BeforeEach(func(ctx context.Context) {
    60  				// Make sure there is only 1 node group, otherwise this test becomes useless.
    61  				nodeGroups := strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",")
    62  				if len(nodeGroups) != 1 {
    63  					e2eskipper.Skipf("test expects 1 node group, found %d", len(nodeGroups))
    64  				}
    65  				nodeGroupName = nodeGroups[0]
    66  
    67  				// Make sure the node group has exactly 'nodesNum' nodes, otherwise this test becomes useless.
    68  				nodeGroupSize, err := framework.GroupSize(nodeGroupName)
    69  				framework.ExpectNoError(err)
    70  				if nodeGroupSize != nodesNum {
    71  					e2eskipper.Skipf("test expects %d nodes, found %d", nodesNum, nodeGroupSize)
    72  				}
    73  
    74  				// Make sure all nodes are schedulable, otherwise we are in some kind of a problem state.
    75  				nodes, err = e2enode.GetReadySchedulableNodes(ctx, f.ClientSet)
    76  				framework.ExpectNoError(err)
    77  				gomega.Expect(nodes.Items).To(gomega.HaveLen(nodeGroupSize), "not all nodes are schedulable")
    78  			})
    79  
    80  			ginkgo.AfterEach(func(ctx context.Context) {
    81  				// Attempt cleanup only if a node group was targeted for scale up.
    82  				// Otherwise the test was probably skipped and we'll get a gcloud error due to invalid parameters.
    83  				if len(nodeGroupName) > 0 {
    84  					// Scale down back to only 'nodesNum' nodes, as expected at the start of the test.
    85  					framework.ExpectNoError(framework.ResizeGroup(nodeGroupName, nodesNum))
    86  					framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, nodesNum, 15*time.Minute))
    87  				}
    88  			})
    89  
    90  			ginkgo.It("takes less than 15 minutes", func(ctx context.Context) {
    91  				// Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient.
    92  				const timeToWait = 15 * time.Minute
    93  
    94  				// Calculate the CPU request of the service.
    95  				// This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes.
    96  				// Make it so that 'nodesNum' pods fit perfectly per node.
    97  				nodeCpus := nodes.Items[0].Status.Allocatable[v1.ResourceCPU]
    98  				nodeCPUMillis := (&nodeCpus).MilliValue()
    99  				cpuRequestMillis := int64(nodeCPUMillis / nodesNum)
   100  
   101  				// Start the service we want to scale and wait for it to be up and running.
   102  				nodeMemoryBytes := nodes.Items[0].Status.Allocatable[v1.ResourceMemory]
   103  				nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024
   104  				memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's allocatable memory.
   105  				replicas := 1
   106  				resourceConsumer := e2eautoscaling.NewDynamicResourceConsumer(ctx, "resource-consumer", f.Namespace.Name, e2eautoscaling.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f.ClientSet, f.ScalesGetter, e2eautoscaling.Disable, e2eautoscaling.Idle)
   107  				ginkgo.DeferCleanup(resourceConsumer.CleanUp)
   108  				resourceConsumer.WaitForReplicas(ctx, replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough.
   109  
   110  				// Enable Horizontal Pod Autoscaler with 50% target utilization and
   111  				// scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied.
   112  				targetCPUUtilizationPercent := int32(50)
   113  				hpa := e2eautoscaling.CreateCPUResourceHorizontalPodAutoscaler(ctx, resourceConsumer, targetCPUUtilizationPercent, 1, 10)
   114  				ginkgo.DeferCleanup(e2eautoscaling.DeleteHorizontalPodAutoscaler, resourceConsumer, hpa.Name)
   115  				cpuLoad := 8 * cpuRequestMillis * int64(targetCPUUtilizationPercent) / 100 // 8 pods utilized to the target level
   116  				resourceConsumer.ConsumeCPU(int(cpuLoad))
   117  
   118  				// Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each.
   119  				experiment.SampleDuration("total scale-up time", func(idx int) {
   120  					resourceConsumer.WaitForReplicas(ctx, 8, timeToWait)
   121  				}, gmeasure.SamplingConfig{N: 1})
   122  			}) // Increase to run the test more than once.
   123  		})
   124  	})
   125  })