k8s.io/kubernetes@v1.29.3/test/e2e/autoscaling/dns_autoscaling.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package autoscaling
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"strings"
    24  	"time"
    25  
    26  	v1 "k8s.io/api/core/v1"
    27  	"k8s.io/apimachinery/pkg/api/resource"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	"k8s.io/apimachinery/pkg/util/wait"
    31  	clientset "k8s.io/client-go/kubernetes"
    32  	"k8s.io/kubernetes/test/e2e/framework"
    33  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    34  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    35  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    36  	admissionapi "k8s.io/pod-security-admission/api"
    37  
    38  	"github.com/onsi/ginkgo/v2"
    39  )
    40  
    41  // Constants used in dns-autoscaling test.
    42  const (
    43  	DNSdefaultTimeout      = 5 * time.Minute
    44  	ClusterAddonLabelKey   = "k8s-app"
    45  	DNSLabelName           = "kube-dns"
    46  	DNSAutoscalerLabelName = "kube-dns-autoscaler"
    47  )
    48  
    49  var _ = SIGDescribe("DNS horizontal autoscaling", func() {
    50  	f := framework.NewDefaultFramework("dns-autoscaling")
    51  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    52  	var c clientset.Interface
    53  	var previousParams map[string]string
    54  	var originDNSReplicasCount int
    55  	var DNSParams1 DNSParamsLinear
    56  	var DNSParams2 DNSParamsLinear
    57  	var DNSParams3 DNSParamsLinear
    58  
    59  	ginkgo.BeforeEach(func(ctx context.Context) {
    60  		e2eskipper.SkipUnlessProviderIs("gce", "gke")
    61  		c = f.ClientSet
    62  
    63  		nodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
    64  		framework.ExpectNoError(err)
    65  		nodeCount := len(nodes.Items)
    66  
    67  		ginkgo.By("Collecting original replicas count and DNS scaling params")
    68  		originDNSReplicasCount, err = getDNSReplicas(ctx, c)
    69  		framework.ExpectNoError(err)
    70  
    71  		pcm, err := fetchDNSScalingConfigMap(ctx, c)
    72  		framework.ExpectNoError(err)
    73  		previousParams = pcm.Data
    74  
    75  		if nodeCount <= 500 {
    76  			DNSParams1 = DNSParamsLinear{
    77  				nodesPerReplica: 1,
    78  			}
    79  			DNSParams2 = DNSParamsLinear{
    80  				nodesPerReplica: 2,
    81  			}
    82  			DNSParams3 = DNSParamsLinear{
    83  				nodesPerReplica: 3,
    84  				coresPerReplica: 3,
    85  			}
    86  		} else {
    87  			// In large clusters, avoid creating/deleting too many DNS pods,
    88  			// it is supposed to be correctness test, not performance one.
    89  			// The default setup is: 256 cores/replica, 16 nodes/replica.
    90  			// With nodeCount > 500, nodes/13, nodes/14, nodes/15 and nodes/16
    91  			// are different numbers.
    92  			DNSParams1 = DNSParamsLinear{
    93  				nodesPerReplica: 13,
    94  			}
    95  			DNSParams2 = DNSParamsLinear{
    96  				nodesPerReplica: 14,
    97  			}
    98  			DNSParams3 = DNSParamsLinear{
    99  				nodesPerReplica: 15,
   100  				coresPerReplica: 15,
   101  			}
   102  		}
   103  	})
   104  
   105  	// This test is separated because it is slow and need to run serially.
   106  	// Will take around 5 minutes to run on a 4 nodes cluster.
   107  	// TODO(upodroid) This test will be removed in 1.33 when kubeup is removed
   108  	f.It(f.WithSerial(), f.WithSlow(), f.WithLabel("KubeUp"), "kube-dns-autoscaler should scale kube-dns pods when cluster size changed", func(ctx context.Context) {
   109  		numNodes, err := e2enode.TotalRegistered(ctx, c)
   110  		framework.ExpectNoError(err)
   111  
   112  		ginkgo.By("Replace the dns autoscaling parameters with testing parameters")
   113  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams1)))
   114  		framework.ExpectNoError(err)
   115  		defer func() {
   116  			ginkgo.By("Restoring initial dns autoscaling parameters")
   117  			err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(previousParams))
   118  			framework.ExpectNoError(err)
   119  
   120  			ginkgo.By("Wait for number of running and ready kube-dns pods recover")
   121  			label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName}))
   122  			_, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, metav1.NamespaceSystem, label, originDNSReplicasCount, DNSdefaultTimeout)
   123  			framework.ExpectNoError(err)
   124  		}()
   125  		ginkgo.By("Wait for kube-dns scaled to expected number")
   126  		getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   127  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   128  		framework.ExpectNoError(err)
   129  
   130  		originalSizes := make(map[string]int)
   131  		for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
   132  			size, err := framework.GroupSize(mig)
   133  			framework.ExpectNoError(err)
   134  			ginkgo.By(fmt.Sprintf("Initial size of %s: %d", mig, size))
   135  			originalSizes[mig] = size
   136  		}
   137  
   138  		ginkgo.By("Manually increase cluster size")
   139  		increasedSizes := make(map[string]int)
   140  		for key, val := range originalSizes {
   141  			increasedSizes[key] = val + 1
   142  		}
   143  		setMigSizes(increasedSizes)
   144  		err = WaitForClusterSizeFunc(ctx, c,
   145  			func(size int) bool { return size == numNodes+len(originalSizes) }, scaleUpTimeout)
   146  		framework.ExpectNoError(err)
   147  
   148  		ginkgo.By("Wait for kube-dns scaled to expected number")
   149  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   150  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   151  		framework.ExpectNoError(err)
   152  
   153  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   154  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams3)))
   155  		framework.ExpectNoError(err)
   156  
   157  		ginkgo.By("Wait for kube-dns scaled to expected number")
   158  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3)
   159  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   160  		framework.ExpectNoError(err)
   161  
   162  		ginkgo.By("Restoring cluster size")
   163  		setMigSizes(originalSizes)
   164  		err = e2enode.WaitForReadyNodes(ctx, c, numNodes, scaleDownTimeout)
   165  		framework.ExpectNoError(err)
   166  
   167  		ginkgo.By("Wait for kube-dns scaled to expected number")
   168  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   169  		framework.ExpectNoError(err)
   170  	})
   171  
   172  	ginkgo.It("kube-dns-autoscaler should scale kube-dns pods in both nonfaulty and faulty scenarios", func(ctx context.Context) {
   173  
   174  		ginkgo.By("Replace the dns autoscaling parameters with testing parameters")
   175  		err := updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams1)))
   176  		framework.ExpectNoError(err)
   177  		defer func() {
   178  			ginkgo.By("Restoring initial dns autoscaling parameters")
   179  			err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(previousParams))
   180  			framework.ExpectNoError(err)
   181  		}()
   182  		ginkgo.By("Wait for kube-dns scaled to expected number")
   183  		getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   184  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   185  		framework.ExpectNoError(err)
   186  
   187  		ginkgo.By("--- Scenario: should scale kube-dns based on changed parameters ---")
   188  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   189  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams3)))
   190  		framework.ExpectNoError(err)
   191  		ginkgo.By("Wait for kube-dns scaled to expected number")
   192  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3)
   193  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   194  		framework.ExpectNoError(err)
   195  
   196  		ginkgo.By("--- Scenario: should re-create scaling parameters with default value when parameters got deleted ---")
   197  		ginkgo.By("Delete the ConfigMap for autoscaler")
   198  		err = deleteDNSScalingConfigMap(ctx, c)
   199  		framework.ExpectNoError(err)
   200  
   201  		ginkgo.By("Wait for the ConfigMap got re-created")
   202  		_, err = waitForDNSConfigMapCreated(ctx, c, DNSdefaultTimeout)
   203  		framework.ExpectNoError(err)
   204  
   205  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   206  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams2)))
   207  		framework.ExpectNoError(err)
   208  		ginkgo.By("Wait for kube-dns scaled to expected number")
   209  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams2)
   210  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   211  		framework.ExpectNoError(err)
   212  
   213  		ginkgo.By("--- Scenario: should recover after autoscaler pod got deleted ---")
   214  		ginkgo.By("Delete the autoscaler pod for kube-dns")
   215  		err = deleteDNSAutoscalerPod(ctx, c)
   216  		framework.ExpectNoError(err)
   217  
   218  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   219  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams1)))
   220  		framework.ExpectNoError(err)
   221  		ginkgo.By("Wait for kube-dns scaled to expected number")
   222  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   223  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   224  		framework.ExpectNoError(err)
   225  	})
   226  })
   227  
   228  // DNSParamsLinear is a struct for number of DNS pods.
   229  type DNSParamsLinear struct {
   230  	nodesPerReplica float64
   231  	coresPerReplica float64
   232  	min             int
   233  	max             int
   234  }
   235  
   236  type getExpectReplicasFunc func(c clientset.Interface) int
   237  
   238  func getExpectReplicasFuncLinear(ctx context.Context, c clientset.Interface, params *DNSParamsLinear) getExpectReplicasFunc {
   239  	return func(c clientset.Interface) int {
   240  		var replicasFromNodes float64
   241  		var replicasFromCores float64
   242  		nodes, err := e2enode.GetReadyNodesIncludingTainted(ctx, c)
   243  		framework.ExpectNoError(err)
   244  		if params.nodesPerReplica > 0 {
   245  			replicasFromNodes = math.Ceil(float64(len(nodes.Items)) / params.nodesPerReplica)
   246  		}
   247  		if params.coresPerReplica > 0 {
   248  			replicasFromCores = math.Ceil(float64(getSchedulableCores(nodes.Items)) / params.coresPerReplica)
   249  		}
   250  		return int(math.Max(1.0, math.Max(replicasFromNodes, replicasFromCores)))
   251  	}
   252  }
   253  
   254  func getSchedulableCores(nodes []v1.Node) int64 {
   255  	var sc resource.Quantity
   256  	for _, node := range nodes {
   257  		if !node.Spec.Unschedulable {
   258  			sc.Add(node.Status.Allocatable[v1.ResourceCPU])
   259  		}
   260  	}
   261  	return sc.Value()
   262  }
   263  
   264  func fetchDNSScalingConfigMap(ctx context.Context, c clientset.Interface) (*v1.ConfigMap, error) {
   265  	cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, DNSAutoscalerLabelName, metav1.GetOptions{})
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  	return cm, nil
   270  }
   271  
   272  func deleteDNSScalingConfigMap(ctx context.Context, c clientset.Interface) error {
   273  	if err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(ctx, DNSAutoscalerLabelName, metav1.DeleteOptions{}); err != nil {
   274  		return err
   275  	}
   276  	framework.Logf("DNS autoscaling ConfigMap deleted.")
   277  	return nil
   278  }
   279  
   280  func packLinearParams(params *DNSParamsLinear) map[string]string {
   281  	paramsMap := make(map[string]string)
   282  	paramsMap["linear"] = fmt.Sprintf("{\"nodesPerReplica\": %v,\"coresPerReplica\": %v,\"min\": %v,\"max\": %v}",
   283  		params.nodesPerReplica,
   284  		params.coresPerReplica,
   285  		params.min,
   286  		params.max)
   287  	return paramsMap
   288  }
   289  
   290  func packDNSScalingConfigMap(params map[string]string) *v1.ConfigMap {
   291  	configMap := v1.ConfigMap{}
   292  	configMap.ObjectMeta.Name = DNSAutoscalerLabelName
   293  	configMap.ObjectMeta.Namespace = metav1.NamespaceSystem
   294  	configMap.Data = params
   295  	return &configMap
   296  }
   297  
   298  func updateDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMap *v1.ConfigMap) error {
   299  	_, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Update(ctx, configMap, metav1.UpdateOptions{})
   300  	if err != nil {
   301  		return err
   302  	}
   303  	framework.Logf("DNS autoscaling ConfigMap updated.")
   304  	return nil
   305  }
   306  
   307  func getDNSReplicas(ctx context.Context, c clientset.Interface) (int, error) {
   308  	label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName}))
   309  	listOpts := metav1.ListOptions{LabelSelector: label.String()}
   310  	deployments, err := c.AppsV1().Deployments(metav1.NamespaceSystem).List(ctx, listOpts)
   311  	if err != nil {
   312  		return 0, err
   313  	}
   314  	if len(deployments.Items) != 1 {
   315  		return 0, fmt.Errorf("expected 1 DNS deployment, got %v", len(deployments.Items))
   316  	}
   317  
   318  	deployment := deployments.Items[0]
   319  	return int(*(deployment.Spec.Replicas)), nil
   320  }
   321  
   322  func deleteDNSAutoscalerPod(ctx context.Context, c clientset.Interface) error {
   323  	label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSAutoscalerLabelName}))
   324  	listOpts := metav1.ListOptions{LabelSelector: label.String()}
   325  	pods, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(ctx, listOpts)
   326  	if err != nil {
   327  		return err
   328  	}
   329  	if len(pods.Items) != 1 {
   330  		return fmt.Errorf("expected 1 autoscaler pod, got %v", len(pods.Items))
   331  	}
   332  
   333  	podName := pods.Items[0].Name
   334  	if err := c.CoreV1().Pods(metav1.NamespaceSystem).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil {
   335  		return err
   336  	}
   337  	framework.Logf("DNS autoscaling pod %v deleted.", podName)
   338  	return nil
   339  }
   340  
   341  func waitForDNSReplicasSatisfied(ctx context.Context, c clientset.Interface, getExpected getExpectReplicasFunc, timeout time.Duration) (err error) {
   342  	var current int
   343  	var expected int
   344  	framework.Logf("Waiting up to %v for kube-dns to reach expected replicas", timeout)
   345  	condition := func() (bool, error) {
   346  		current, err = getDNSReplicas(ctx, c)
   347  		if err != nil {
   348  			return false, err
   349  		}
   350  		expected = getExpected(c)
   351  		if current != expected {
   352  			framework.Logf("Replicas not as expected: got %v, expected %v", current, expected)
   353  			return false, nil
   354  		}
   355  		return true, nil
   356  	}
   357  
   358  	if err = wait.Poll(2*time.Second, timeout, condition); err != nil {
   359  		return fmt.Errorf("err waiting for DNS replicas to satisfy %v, got %v: %w", expected, current, err)
   360  	}
   361  	framework.Logf("kube-dns reaches expected replicas: %v", expected)
   362  	return nil
   363  }
   364  
   365  func waitForDNSConfigMapCreated(ctx context.Context, c clientset.Interface, timeout time.Duration) (configMap *v1.ConfigMap, err error) {
   366  	framework.Logf("Waiting up to %v for DNS autoscaling ConfigMap got re-created", timeout)
   367  	condition := func() (bool, error) {
   368  		configMap, err = fetchDNSScalingConfigMap(ctx, c)
   369  		if err != nil {
   370  			return false, nil
   371  		}
   372  		return true, nil
   373  	}
   374  
   375  	if err = wait.Poll(time.Second, timeout, condition); err != nil {
   376  		return nil, fmt.Errorf("err waiting for DNS autoscaling ConfigMap got re-created: %w", err)
   377  	}
   378  	return configMap, nil
   379  }