k8s.io/kubernetes@v1.29.3/test/e2e/autoscaling/dns_autoscaling.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package autoscaling 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "strings" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/api/resource" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/labels" 30 "k8s.io/apimachinery/pkg/util/wait" 31 clientset "k8s.io/client-go/kubernetes" 32 "k8s.io/kubernetes/test/e2e/framework" 33 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 34 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 35 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 36 admissionapi "k8s.io/pod-security-admission/api" 37 38 "github.com/onsi/ginkgo/v2" 39 ) 40 41 // Constants used in dns-autoscaling test. 42 const ( 43 DNSdefaultTimeout = 5 * time.Minute 44 ClusterAddonLabelKey = "k8s-app" 45 DNSLabelName = "kube-dns" 46 DNSAutoscalerLabelName = "kube-dns-autoscaler" 47 ) 48 49 var _ = SIGDescribe("DNS horizontal autoscaling", func() { 50 f := framework.NewDefaultFramework("dns-autoscaling") 51 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 52 var c clientset.Interface 53 var previousParams map[string]string 54 var originDNSReplicasCount int 55 var DNSParams1 DNSParamsLinear 56 var DNSParams2 DNSParamsLinear 57 var DNSParams3 DNSParamsLinear 58 59 ginkgo.BeforeEach(func(ctx context.Context) { 60 e2eskipper.SkipUnlessProviderIs("gce", "gke") 61 c = f.ClientSet 62 63 nodes, err := e2enode.GetReadySchedulableNodes(ctx, c) 64 framework.ExpectNoError(err) 65 nodeCount := len(nodes.Items) 66 67 ginkgo.By("Collecting original replicas count and DNS scaling params") 68 originDNSReplicasCount, err = getDNSReplicas(ctx, c) 69 framework.ExpectNoError(err) 70 71 pcm, err := fetchDNSScalingConfigMap(ctx, c) 72 framework.ExpectNoError(err) 73 previousParams = pcm.Data 74 75 if nodeCount <= 500 { 76 DNSParams1 = DNSParamsLinear{ 77 nodesPerReplica: 1, 78 } 79 DNSParams2 = DNSParamsLinear{ 80 nodesPerReplica: 2, 81 } 82 DNSParams3 = DNSParamsLinear{ 83 nodesPerReplica: 3, 84 coresPerReplica: 3, 85 } 86 } else { 87 // In large clusters, avoid creating/deleting too many DNS pods, 88 // it is supposed to be correctness test, not performance one. 89 // The default setup is: 256 cores/replica, 16 nodes/replica. 90 // With nodeCount > 500, nodes/13, nodes/14, nodes/15 and nodes/16 91 // are different numbers. 92 DNSParams1 = DNSParamsLinear{ 93 nodesPerReplica: 13, 94 } 95 DNSParams2 = DNSParamsLinear{ 96 nodesPerReplica: 14, 97 } 98 DNSParams3 = DNSParamsLinear{ 99 nodesPerReplica: 15, 100 coresPerReplica: 15, 101 } 102 } 103 }) 104 105 // This test is separated because it is slow and need to run serially. 106 // Will take around 5 minutes to run on a 4 nodes cluster. 107 // TODO(upodroid) This test will be removed in 1.33 when kubeup is removed 108 f.It(f.WithSerial(), f.WithSlow(), f.WithLabel("KubeUp"), "kube-dns-autoscaler should scale kube-dns pods when cluster size changed", func(ctx context.Context) { 109 numNodes, err := e2enode.TotalRegistered(ctx, c) 110 framework.ExpectNoError(err) 111 112 ginkgo.By("Replace the dns autoscaling parameters with testing parameters") 113 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams1))) 114 framework.ExpectNoError(err) 115 defer func() { 116 ginkgo.By("Restoring initial dns autoscaling parameters") 117 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(previousParams)) 118 framework.ExpectNoError(err) 119 120 ginkgo.By("Wait for number of running and ready kube-dns pods recover") 121 label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName})) 122 _, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, metav1.NamespaceSystem, label, originDNSReplicasCount, DNSdefaultTimeout) 123 framework.ExpectNoError(err) 124 }() 125 ginkgo.By("Wait for kube-dns scaled to expected number") 126 getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 127 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 128 framework.ExpectNoError(err) 129 130 originalSizes := make(map[string]int) 131 for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") { 132 size, err := framework.GroupSize(mig) 133 framework.ExpectNoError(err) 134 ginkgo.By(fmt.Sprintf("Initial size of %s: %d", mig, size)) 135 originalSizes[mig] = size 136 } 137 138 ginkgo.By("Manually increase cluster size") 139 increasedSizes := make(map[string]int) 140 for key, val := range originalSizes { 141 increasedSizes[key] = val + 1 142 } 143 setMigSizes(increasedSizes) 144 err = WaitForClusterSizeFunc(ctx, c, 145 func(size int) bool { return size == numNodes+len(originalSizes) }, scaleUpTimeout) 146 framework.ExpectNoError(err) 147 148 ginkgo.By("Wait for kube-dns scaled to expected number") 149 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 150 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 151 framework.ExpectNoError(err) 152 153 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 154 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams3))) 155 framework.ExpectNoError(err) 156 157 ginkgo.By("Wait for kube-dns scaled to expected number") 158 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3) 159 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 160 framework.ExpectNoError(err) 161 162 ginkgo.By("Restoring cluster size") 163 setMigSizes(originalSizes) 164 err = e2enode.WaitForReadyNodes(ctx, c, numNodes, scaleDownTimeout) 165 framework.ExpectNoError(err) 166 167 ginkgo.By("Wait for kube-dns scaled to expected number") 168 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 169 framework.ExpectNoError(err) 170 }) 171 172 ginkgo.It("kube-dns-autoscaler should scale kube-dns pods in both nonfaulty and faulty scenarios", func(ctx context.Context) { 173 174 ginkgo.By("Replace the dns autoscaling parameters with testing parameters") 175 err := updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams1))) 176 framework.ExpectNoError(err) 177 defer func() { 178 ginkgo.By("Restoring initial dns autoscaling parameters") 179 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(previousParams)) 180 framework.ExpectNoError(err) 181 }() 182 ginkgo.By("Wait for kube-dns scaled to expected number") 183 getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 184 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 185 framework.ExpectNoError(err) 186 187 ginkgo.By("--- Scenario: should scale kube-dns based on changed parameters ---") 188 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 189 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams3))) 190 framework.ExpectNoError(err) 191 ginkgo.By("Wait for kube-dns scaled to expected number") 192 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3) 193 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 194 framework.ExpectNoError(err) 195 196 ginkgo.By("--- Scenario: should re-create scaling parameters with default value when parameters got deleted ---") 197 ginkgo.By("Delete the ConfigMap for autoscaler") 198 err = deleteDNSScalingConfigMap(ctx, c) 199 framework.ExpectNoError(err) 200 201 ginkgo.By("Wait for the ConfigMap got re-created") 202 _, err = waitForDNSConfigMapCreated(ctx, c, DNSdefaultTimeout) 203 framework.ExpectNoError(err) 204 205 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 206 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams2))) 207 framework.ExpectNoError(err) 208 ginkgo.By("Wait for kube-dns scaled to expected number") 209 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams2) 210 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 211 framework.ExpectNoError(err) 212 213 ginkgo.By("--- Scenario: should recover after autoscaler pod got deleted ---") 214 ginkgo.By("Delete the autoscaler pod for kube-dns") 215 err = deleteDNSAutoscalerPod(ctx, c) 216 framework.ExpectNoError(err) 217 218 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters") 219 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(packLinearParams(&DNSParams1))) 220 framework.ExpectNoError(err) 221 ginkgo.By("Wait for kube-dns scaled to expected number") 222 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1) 223 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout) 224 framework.ExpectNoError(err) 225 }) 226 }) 227 228 // DNSParamsLinear is a struct for number of DNS pods. 229 type DNSParamsLinear struct { 230 nodesPerReplica float64 231 coresPerReplica float64 232 min int 233 max int 234 } 235 236 type getExpectReplicasFunc func(c clientset.Interface) int 237 238 func getExpectReplicasFuncLinear(ctx context.Context, c clientset.Interface, params *DNSParamsLinear) getExpectReplicasFunc { 239 return func(c clientset.Interface) int { 240 var replicasFromNodes float64 241 var replicasFromCores float64 242 nodes, err := e2enode.GetReadyNodesIncludingTainted(ctx, c) 243 framework.ExpectNoError(err) 244 if params.nodesPerReplica > 0 { 245 replicasFromNodes = math.Ceil(float64(len(nodes.Items)) / params.nodesPerReplica) 246 } 247 if params.coresPerReplica > 0 { 248 replicasFromCores = math.Ceil(float64(getSchedulableCores(nodes.Items)) / params.coresPerReplica) 249 } 250 return int(math.Max(1.0, math.Max(replicasFromNodes, replicasFromCores))) 251 } 252 } 253 254 func getSchedulableCores(nodes []v1.Node) int64 { 255 var sc resource.Quantity 256 for _, node := range nodes { 257 if !node.Spec.Unschedulable { 258 sc.Add(node.Status.Allocatable[v1.ResourceCPU]) 259 } 260 } 261 return sc.Value() 262 } 263 264 func fetchDNSScalingConfigMap(ctx context.Context, c clientset.Interface) (*v1.ConfigMap, error) { 265 cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, DNSAutoscalerLabelName, metav1.GetOptions{}) 266 if err != nil { 267 return nil, err 268 } 269 return cm, nil 270 } 271 272 func deleteDNSScalingConfigMap(ctx context.Context, c clientset.Interface) error { 273 if err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(ctx, DNSAutoscalerLabelName, metav1.DeleteOptions{}); err != nil { 274 return err 275 } 276 framework.Logf("DNS autoscaling ConfigMap deleted.") 277 return nil 278 } 279 280 func packLinearParams(params *DNSParamsLinear) map[string]string { 281 paramsMap := make(map[string]string) 282 paramsMap["linear"] = fmt.Sprintf("{\"nodesPerReplica\": %v,\"coresPerReplica\": %v,\"min\": %v,\"max\": %v}", 283 params.nodesPerReplica, 284 params.coresPerReplica, 285 params.min, 286 params.max) 287 return paramsMap 288 } 289 290 func packDNSScalingConfigMap(params map[string]string) *v1.ConfigMap { 291 configMap := v1.ConfigMap{} 292 configMap.ObjectMeta.Name = DNSAutoscalerLabelName 293 configMap.ObjectMeta.Namespace = metav1.NamespaceSystem 294 configMap.Data = params 295 return &configMap 296 } 297 298 func updateDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMap *v1.ConfigMap) error { 299 _, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Update(ctx, configMap, metav1.UpdateOptions{}) 300 if err != nil { 301 return err 302 } 303 framework.Logf("DNS autoscaling ConfigMap updated.") 304 return nil 305 } 306 307 func getDNSReplicas(ctx context.Context, c clientset.Interface) (int, error) { 308 label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName})) 309 listOpts := metav1.ListOptions{LabelSelector: label.String()} 310 deployments, err := c.AppsV1().Deployments(metav1.NamespaceSystem).List(ctx, listOpts) 311 if err != nil { 312 return 0, err 313 } 314 if len(deployments.Items) != 1 { 315 return 0, fmt.Errorf("expected 1 DNS deployment, got %v", len(deployments.Items)) 316 } 317 318 deployment := deployments.Items[0] 319 return int(*(deployment.Spec.Replicas)), nil 320 } 321 322 func deleteDNSAutoscalerPod(ctx context.Context, c clientset.Interface) error { 323 label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSAutoscalerLabelName})) 324 listOpts := metav1.ListOptions{LabelSelector: label.String()} 325 pods, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(ctx, listOpts) 326 if err != nil { 327 return err 328 } 329 if len(pods.Items) != 1 { 330 return fmt.Errorf("expected 1 autoscaler pod, got %v", len(pods.Items)) 331 } 332 333 podName := pods.Items[0].Name 334 if err := c.CoreV1().Pods(metav1.NamespaceSystem).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil { 335 return err 336 } 337 framework.Logf("DNS autoscaling pod %v deleted.", podName) 338 return nil 339 } 340 341 func waitForDNSReplicasSatisfied(ctx context.Context, c clientset.Interface, getExpected getExpectReplicasFunc, timeout time.Duration) (err error) { 342 var current int 343 var expected int 344 framework.Logf("Waiting up to %v for kube-dns to reach expected replicas", timeout) 345 condition := func() (bool, error) { 346 current, err = getDNSReplicas(ctx, c) 347 if err != nil { 348 return false, err 349 } 350 expected = getExpected(c) 351 if current != expected { 352 framework.Logf("Replicas not as expected: got %v, expected %v", current, expected) 353 return false, nil 354 } 355 return true, nil 356 } 357 358 if err = wait.Poll(2*time.Second, timeout, condition); err != nil { 359 return fmt.Errorf("err waiting for DNS replicas to satisfy %v, got %v: %w", expected, current, err) 360 } 361 framework.Logf("kube-dns reaches expected replicas: %v", expected) 362 return nil 363 } 364 365 func waitForDNSConfigMapCreated(ctx context.Context, c clientset.Interface, timeout time.Duration) (configMap *v1.ConfigMap, err error) { 366 framework.Logf("Waiting up to %v for DNS autoscaling ConfigMap got re-created", timeout) 367 condition := func() (bool, error) { 368 configMap, err = fetchDNSScalingConfigMap(ctx, c) 369 if err != nil { 370 return false, nil 371 } 372 return true, nil 373 } 374 375 if err = wait.Poll(time.Second, timeout, condition); err != nil { 376 return nil, fmt.Errorf("err waiting for DNS autoscaling ConfigMap got re-created: %w", err) 377 } 378 return configMap, nil 379 }