k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/autoscaling/dns_autoscaling.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package autoscaling 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "strings" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/api/resource" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/labels" 30 "k8s.io/apimachinery/pkg/util/wait" 31 clientset "k8s.io/client-go/kubernetes" 32 "k8s.io/kubernetes/test/e2e/framework" 33 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 34 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 35 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 36 admissionapi "k8s.io/pod-security-admission/api" 37 38 "github.com/onsi/ginkgo/v2" 39 ) 40 41 // This test requires coredns to be installed on the cluster with autoscaling enabled. 42 // Compare your coredns manifest against the command below 43 // helm template coredns -n kube-system coredns/coredns --set k8sAppLabelOverride=kube-dns --set fullnameOverride=coredns --set autoscaler.enabled=true 44 45 // Constants used in dns-autoscaling test. 46 const ( 47 DNSdefaultTimeout = 5 * time.Minute 48 ClusterAddonLabelKey = "k8s-app" 49 DNSLabelName = "kube-dns" 50 ) 51 52 var _ = SIGDescribe("DNS horizontal autoscaling", func() { 53 f := framework.NewDefaultFramework("dns-autoscaling") 54 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 55 var c clientset.Interface 56 var previousParams map[string]string 57 var configMapNames map[string]string 58 var originDNSReplicasCount int 59 var DNSParams1 DNSParamsLinear 60 var DNSParams2 DNSParamsLinear 61 var DNSParams3 DNSParamsLinear 62 63 ginkgo.BeforeEach(func(ctx context.Context) { 64 e2eskipper.SkipUnlessProviderIs("gce", "gke") 65 c = f.ClientSet 66 67 nodes, err := e2enode.GetReadySchedulableNodes(ctx, c) 68 framework.ExpectNoError(err) 69 nodeCount := len(nodes.Items) 70 71 ginkgo.By("Collecting original replicas count and DNS scaling params") 72 73 // Check if we are running coredns or kube-dns, the only difference is the name of the autoscaling CM. 74 // The test should be have identically on both dns providers 75 provider, err := detectDNSProvider(ctx, c) 76 framework.ExpectNoError(err) 77 78 originDNSReplicasCount, err = getDNSReplicas(ctx, c) 79 framework.ExpectNoError(err) 80 configMapNames = map[string]string{ 81 "kube-dns": "kube-dns-autoscaler", 82 "coredns": "coredns-autoscaler", 83 } 84 85 pcm, err := fetchDNSScalingConfigMap(ctx, c, configMapNames[provider]) 86 framework.Logf("original DNS scaling params: %v", pcm) 87 framework.ExpectNoError(err) 88 previousParams = pcm.Data 89 90 if nodeCount <= 500 { 91 DNSParams1 = DNSParamsLinear{ 92 nodesPerReplica: 1, 93 } 94 DNSParams2 = DNSParamsLinear{ 95 nodesPerReplica: 2, 96 } 97 DNSParams3 = DNSParamsLinear{ 98 nodesPerReplica: 3, 99 coresPerReplica: 3, 100 } 101 } else { 102 // In large clusters, avoid creating/deleting too many DNS pods, 103 // it is supposed to be correctness test, not performance one. 104 // The default setup is: 256 cores/replica, 16 nodes/replica. 105 // With nodeCount > 500, nodes/13, nodes/14, nodes/15 and nodes/16 106 // are different numbers. 107 DNSParams1 = DNSParamsLinear{ 108 nodesPerReplica: 13, 109 } 110 DNSParams2 = DNSParamsLinear{ 111 nodesPerReplica: 14, 112 } 113 DNSParams3 = DNSParamsLinear{ 114 nodesPerReplica: 15, 115 coresPerReplica: 15, 116 } 117 } 118 }) 119 120 // This test is separated because it is slow and need to run serially. 121 // Will take around 5 minutes to run on a 4 nodes cluster. 122 // TODO(upodroid) This test will be removed in 1.33 when kubeup is removed 123 f.It(f.WithSerial(), f.WithSlow(), f.WithLabel("KubeUp"), "kube-dns-autoscaler should scale kube-dns pods when cluster size changed", func(ctx context.Context) { 124 numNodes, err := e2enode.TotalRegistered(ctx, c) 125 framework.ExpectNoError(err) 126 127 configMapNames = map[string]string{ 128 "kube-dns": "kube-dns-autoscaler", 129 "coredns": "coredns-autoscaler", 130 } 131 provider, err := detectDNSProvider(ctx, c) 132 framework.ExpectNoError(err) 133 134 ginkgo.By("Replace the dns autoscaling parameters with testing parameters") 135 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1))) 136 framework.ExpectNoError(err) 137 defer func() { 138 ginkgo.By("Restoring initial dns autoscaling parameters") 139 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams)) 140 framework.ExpectNoError(err) 141 142 ginkgo.By("Wait for number of running and ready kube-dns pods recover") 143 label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName})) 144 _, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, metav1.NamespaceSystem, label, originDNSReplicasCount, DNSdefaultTimeout) 145 framework.ExpectNoError(err) 146 }() 147 ginkgo.By("Wait for kube-dns scaled to expected number") 148 getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 149 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 150 framework.ExpectNoError(err) 151 152 originalSizes := make(map[string]int) 153 for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") { 154 size, err := framework.GroupSize(mig) 155 framework.ExpectNoError(err) 156 ginkgo.By(fmt.Sprintf("Initial size of %s: %d", mig, size)) 157 originalSizes[mig] = size 158 } 159 160 ginkgo.By("Manually increase cluster size") 161 increasedSizes := make(map[string]int) 162 for key, val := range originalSizes { 163 increasedSizes[key] = val + 1 164 } 165 setMigSizes(increasedSizes) 166 err = WaitForClusterSizeFunc(ctx, c, 167 func(size int) bool { return size == numNodes+len(originalSizes) }, scaleUpTimeout) 168 framework.ExpectNoError(err) 169 170 ginkgo.By("Wait for kube-dns scaled to expected number") 171 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 172 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 173 framework.ExpectNoError(err) 174 175 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 176 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3))) 177 framework.ExpectNoError(err) 178 179 ginkgo.By("Wait for kube-dns scaled to expected number") 180 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3) 181 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 182 framework.ExpectNoError(err) 183 184 ginkgo.By("Restoring cluster size") 185 setMigSizes(originalSizes) 186 err = e2enode.WaitForReadyNodes(ctx, c, numNodes, scaleDownTimeout) 187 framework.ExpectNoError(err) 188 189 ginkgo.By("Wait for kube-dns scaled to expected number") 190 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 191 framework.ExpectNoError(err) 192 }) 193 194 ginkgo.It("kube-dns-autoscaler should scale kube-dns pods in both nonfaulty and faulty scenarios", func(ctx context.Context) { 195 196 configMapNames = map[string]string{ 197 "kube-dns": "kube-dns-autoscaler", 198 "coredns": "coredns-autoscaler", 199 } 200 provider, err := detectDNSProvider(ctx, c) 201 framework.ExpectNoError(err) 202 203 ginkgo.By("Replace the dns autoscaling parameters with testing parameters") 204 cm := packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1)) 205 framework.Logf("Updating the following cm: %v", cm) 206 err = updateDNSScalingConfigMap(ctx, c, cm) 207 framework.ExpectNoError(err) 208 defer func() { 209 ginkgo.By("Restoring initial dns autoscaling parameters") 210 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams)) 211 framework.ExpectNoError(err) 212 }() 213 ginkgo.By("Wait for kube-dns scaled to expected number") 214 getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 215 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 216 framework.ExpectNoError(err) 217 218 ginkgo.By("--- Scenario: should scale kube-dns based on changed parameters ---") 219 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 220 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3))) 221 framework.ExpectNoError(err) 222 ginkgo.By("Wait for kube-dns scaled to expected number") 223 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3) 224 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 225 framework.ExpectNoError(err) 226 227 ginkgo.By("--- Scenario: should re-create scaling parameters with default value when parameters got deleted ---") 228 ginkgo.By("Delete the ConfigMap for autoscaler") 229 err = deleteDNSScalingConfigMap(ctx, c, configMapNames[provider]) 230 framework.ExpectNoError(err) 231 232 ginkgo.By("Wait for the ConfigMap got re-created") 233 _, err = waitForDNSConfigMapCreated(ctx, c, DNSdefaultTimeout, configMapNames[provider]) 234 framework.ExpectNoError(err) 235 236 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 237 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams2))) 238 framework.ExpectNoError(err) 239 ginkgo.By("Wait for kube-dns/coredns scaled to expected number") 240 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams2) 241 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 242 framework.ExpectNoError(err) 243 244 ginkgo.By("--- Scenario: should recover after autoscaler pod got deleted ---") 245 ginkgo.By("Delete the autoscaler pod for kube-dns/coredns") 246 err = deleteDNSAutoscalerPod(ctx, c) 247 framework.ExpectNoError(err) 248 249 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 250 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1))) 251 framework.ExpectNoError(err) 252 ginkgo.By("Wait for kube-dns/coredns scaled to expected number") 253 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 254 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 255 framework.ExpectNoError(err) 256 }) 257 }) 258 259 // DNSParamsLinear is a struct for number of DNS pods. 260 type DNSParamsLinear struct { 261 nodesPerReplica float64 262 coresPerReplica float64 263 min int 264 max int 265 } 266 267 type getExpectReplicasFunc func(c clientset.Interface) int 268 269 func getExpectReplicasFuncLinear(ctx context.Context, c clientset.Interface, params *DNSParamsLinear) getExpectReplicasFunc { 270 return func(c clientset.Interface) int { 271 var replicasFromNodes float64 272 var replicasFromCores float64 273 nodes, err := e2enode.GetReadyNodesIncludingTainted(ctx, c) 274 framework.ExpectNoError(err) 275 if params.nodesPerReplica > 0 { 276 replicasFromNodes = math.Ceil(float64(len(nodes.Items)) / params.nodesPerReplica) 277 } 278 if params.coresPerReplica > 0 { 279 replicasFromCores = math.Ceil(float64(getSchedulableCores(nodes.Items)) / params.coresPerReplica) 280 } 281 return int(math.Max(1.0, math.Max(replicasFromNodes, replicasFromCores))) 282 } 283 } 284 285 func getSchedulableCores(nodes []v1.Node) int64 { 286 var sc resource.Quantity 287 for _, node := range nodes { 288 if !node.Spec.Unschedulable { 289 sc.Add(node.Status.Allocatable[v1.ResourceCPU]) 290 } 291 } 292 return sc.Value() 293 } 294 295 func detectDNSProvider(ctx context.Context, c clientset.Interface) (string, error) { 296 cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "coredns-autoscaler", metav1.GetOptions{}) 297 if cm != nil && err == nil { 298 return "coredns", nil 299 } 300 301 cm, err = c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "kube-dns-autoscaler", metav1.GetOptions{}) 302 if cm != nil && err == nil { 303 return "kube-dns", nil 304 } 305 306 return "", fmt.Errorf("the cluster doesn't have kube-dns or coredns autoscaling configured") 307 } 308 309 func fetchDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) (*v1.ConfigMap, error) { 310 cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, configMapName, metav1.GetOptions{}) 311 if err != nil { 312 return nil, err 313 } 314 return cm, nil 315 } 316 317 func deleteDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) error { 318 if err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(ctx, configMapName, metav1.DeleteOptions{}); err != nil { 319 return err 320 } 321 framework.Logf("DNS autoscaling ConfigMap deleted.") 322 return nil 323 } 324 325 func packLinearParams(params *DNSParamsLinear) map[string]string { 326 paramsMap := make(map[string]string) 327 paramsMap["linear"] = fmt.Sprintf("{\"nodesPerReplica\": %v,\"coresPerReplica\": %v,\"min\": %v,\"max\": %v}", 328 params.nodesPerReplica, 329 params.coresPerReplica, 330 params.min, 331 params.max) 332 return paramsMap 333 } 334 335 func packDNSScalingConfigMap(configMapName string, params map[string]string) *v1.ConfigMap { 336 configMap := v1.ConfigMap{} 337 configMap.ObjectMeta.Name = configMapName 338 configMap.ObjectMeta.Namespace = metav1.NamespaceSystem 339 configMap.Data = params 340 return &configMap 341 } 342 343 func updateDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMap *v1.ConfigMap) error { 344 _, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Update(ctx, configMap, metav1.UpdateOptions{}) 345 if err != nil { 346 return err 347 } 348 framework.Logf("DNS autoscaling ConfigMap updated.") 349 return nil 350 } 351 352 func getDNSReplicas(ctx context.Context, c clientset.Interface) (int, error) { 353 label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName})) 354 listOpts := metav1.ListOptions{LabelSelector: label.String()} 355 deployments, err := c.AppsV1().Deployments(metav1.NamespaceSystem).List(ctx, listOpts) 356 if err != nil { 357 return 0, err 358 } 359 if len(deployments.Items) != 1 { 360 return 0, fmt.Errorf("expected 1 DNS deployment, got %v", len(deployments.Items)) 361 } 362 363 deployment := deployments.Items[0] 364 return int(*(deployment.Spec.Replicas)), nil 365 } 366 367 func deleteDNSAutoscalerPod(ctx context.Context, c clientset.Interface) error { 368 selector, _ := labels.Parse(fmt.Sprintf("%s in (kube-dns-autoscaler, coredns-autoscaler)", ClusterAddonLabelKey)) 369 listOpts := metav1.ListOptions{LabelSelector: selector.String()} 370 pods, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(ctx, listOpts) 371 if err != nil { 372 return err 373 } 374 if len(pods.Items) != 1 { 375 return fmt.Errorf("expected 1 autoscaler pod, got %v", len(pods.Items)) 376 } 377 378 podName := pods.Items[0].Name 379 if err := c.CoreV1().Pods(metav1.NamespaceSystem).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil { 380 return err 381 } 382 framework.Logf("DNS autoscaling pod %v deleted.", podName) 383 return nil 384 } 385 386 func waitForDNSReplicasSatisfied(ctx context.Context, c clientset.Interface, getExpected getExpectReplicasFunc, timeout time.Duration) (err error) { 387 var current int 388 var expected int 389 framework.Logf("Waiting up to %v for kube-dns to reach expected replicas", timeout) 390 condition := func(ctx context.Context) (bool, error) { 391 current, err = getDNSReplicas(ctx, c) 392 if err != nil { 393 return false, err 394 } 395 expected = getExpected(c) 396 if current != expected { 397 framework.Logf("Replicas not as expected: got %v, expected %v", current, expected) 398 return false, nil 399 } 400 return true, nil 401 } 402 403 if err = wait.PollUntilContextTimeout(ctx, 2*time.Second, timeout, false, condition); err != nil { 404 return fmt.Errorf("err waiting for DNS replicas to satisfy %v, got %v: %w", expected, current, err) 405 } 406 framework.Logf("kube-dns reaches expected replicas: %v", expected) 407 return nil 408 } 409 410 func waitForDNSConfigMapCreated(ctx context.Context, c clientset.Interface, timeout time.Duration, configMapName string) (configMap *v1.ConfigMap, err error) { 411 framework.Logf("Waiting up to %v for DNS autoscaling ConfigMap to be re-created", timeout) 412 condition := func(ctx context.Context) (bool, error) { 413 configMap, err = fetchDNSScalingConfigMap(ctx, c, configMapName) 414 if err != nil { 415 return false, nil 416 } 417 return true, nil 418 } 419 420 if err = wait.PollUntilContextTimeout(ctx, time.Second, timeout, false, condition); err != nil { 421 return nil, fmt.Errorf("err waiting for DNS autoscaling ConfigMap got re-created: %w", err) 422 } 423 return configMap, nil 424 }