k8s.io/kubernetes@v1.29.3/test/e2e_node/cpu_manager_test.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "fmt" 22 "os/exec" 23 "regexp" 24 "strconv" 25 "strings" 26 "time" 27 28 v1 "k8s.io/api/core/v1" 29 "k8s.io/apimachinery/pkg/api/resource" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" 32 "k8s.io/kubelet/pkg/types" 33 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 34 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" 35 admissionapi "k8s.io/pod-security-admission/api" 36 "k8s.io/utils/cpuset" 37 38 "github.com/onsi/ginkgo/v2" 39 "github.com/onsi/gomega" 40 "k8s.io/kubernetes/test/e2e/feature" 41 "k8s.io/kubernetes/test/e2e/framework" 42 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 43 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 44 ) 45 46 // Helper for makeCPUManagerPod(). 47 type ctnAttribute struct { 48 ctnName string 49 cpuRequest string 50 cpuLimit string 51 restartPolicy *v1.ContainerRestartPolicy 52 } 53 54 // makeCPUMangerPod returns a pod with the provided ctnAttributes. 55 func makeCPUManagerPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { 56 var containers []v1.Container 57 for _, ctnAttr := range ctnAttributes { 58 cpusetCmd := fmt.Sprintf("grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d") 59 ctn := v1.Container{ 60 Name: ctnAttr.ctnName, 61 Image: busyboxImage, 62 Resources: v1.ResourceRequirements{ 63 Requests: v1.ResourceList{ 64 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), 65 v1.ResourceMemory: resource.MustParse("100Mi"), 66 }, 67 Limits: v1.ResourceList{ 68 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), 69 v1.ResourceMemory: resource.MustParse("100Mi"), 70 }, 71 }, 72 Command: []string{"sh", "-c", cpusetCmd}, 73 } 74 containers = append(containers, ctn) 75 } 76 77 return &v1.Pod{ 78 ObjectMeta: metav1.ObjectMeta{ 79 Name: podName, 80 }, 81 Spec: v1.PodSpec{ 82 RestartPolicy: v1.RestartPolicyNever, 83 Containers: containers, 84 }, 85 } 86 } 87 88 // makeCPUMangerInitContainersPod returns a pod with init containers with the 89 // provided ctnAttributes. 90 func makeCPUManagerInitContainersPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { 91 var containers []v1.Container 92 cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2" 93 cpusetAndSleepCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d" 94 for _, ctnAttr := range ctnAttributes { 95 ctn := v1.Container{ 96 Name: ctnAttr.ctnName, 97 Image: busyboxImage, 98 Resources: v1.ResourceRequirements{ 99 Requests: v1.ResourceList{ 100 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), 101 v1.ResourceMemory: resource.MustParse("100Mi"), 102 }, 103 Limits: v1.ResourceList{ 104 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), 105 v1.ResourceMemory: resource.MustParse("100Mi"), 106 }, 107 }, 108 Command: []string{"sh", "-c", cpusetCmd}, 109 RestartPolicy: ctnAttr.restartPolicy, 110 } 111 if ctnAttr.restartPolicy != nil && *ctnAttr.restartPolicy == v1.ContainerRestartPolicyAlways { 112 ctn.Command = []string{"sh", "-c", cpusetAndSleepCmd} 113 } 114 containers = append(containers, ctn) 115 } 116 117 return &v1.Pod{ 118 ObjectMeta: metav1.ObjectMeta{ 119 Name: podName, 120 }, 121 Spec: v1.PodSpec{ 122 RestartPolicy: v1.RestartPolicyNever, 123 InitContainers: containers, 124 Containers: []v1.Container{ 125 { 126 Name: "regular", 127 Image: busyboxImage, 128 Resources: v1.ResourceRequirements{ 129 Requests: v1.ResourceList{ 130 v1.ResourceCPU: resource.MustParse("1000m"), 131 v1.ResourceMemory: resource.MustParse("100Mi"), 132 }, 133 Limits: v1.ResourceList{ 134 v1.ResourceCPU: resource.MustParse("1000m"), 135 v1.ResourceMemory: resource.MustParse("100Mi"), 136 }, 137 }, 138 Command: []string{"sh", "-c", cpusetAndSleepCmd}, 139 }, 140 }, 141 }, 142 } 143 } 144 145 func deletePodSyncByName(ctx context.Context, f *framework.Framework, podName string) { 146 gp := int64(0) 147 delOpts := metav1.DeleteOptions{ 148 GracePeriodSeconds: &gp, 149 } 150 e2epod.NewPodClient(f).DeleteSync(ctx, podName, delOpts, e2epod.DefaultPodDeletionTimeout) 151 } 152 153 func deletePods(ctx context.Context, f *framework.Framework, podNames []string) { 154 for _, podName := range podNames { 155 deletePodSyncByName(ctx, f, podName) 156 } 157 } 158 159 func getLocalNodeCPUDetails(ctx context.Context, f *framework.Framework) (cpuCapVal int64, cpuAllocVal int64, cpuResVal int64) { 160 localNodeCap := getLocalNode(ctx, f).Status.Capacity 161 cpuCap := localNodeCap[v1.ResourceCPU] 162 localNodeAlloc := getLocalNode(ctx, f).Status.Allocatable 163 cpuAlloc := localNodeAlloc[v1.ResourceCPU] 164 cpuRes := cpuCap.DeepCopy() 165 cpuRes.Sub(cpuAlloc) 166 167 // RoundUp reserved CPUs to get only integer cores. 168 cpuRes.RoundUp(0) 169 170 return cpuCap.Value(), cpuCap.Value() - cpuRes.Value(), cpuRes.Value() 171 } 172 173 func waitForContainerRemoval(ctx context.Context, containerName, podName, podNS string) { 174 rs, _, err := getCRIClient() 175 framework.ExpectNoError(err) 176 gomega.Eventually(ctx, func(ctx context.Context) bool { 177 containers, err := rs.ListContainers(ctx, &runtimeapi.ContainerFilter{ 178 LabelSelector: map[string]string{ 179 types.KubernetesPodNameLabel: podName, 180 types.KubernetesPodNamespaceLabel: podNS, 181 types.KubernetesContainerNameLabel: containerName, 182 }, 183 }) 184 if err != nil { 185 return false 186 } 187 return len(containers) == 0 188 }, 2*time.Minute, 1*time.Second).Should(gomega.BeTrue()) 189 } 190 191 func isHTEnabled() bool { 192 outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"Thread(s) per core:\" | cut -d \":\" -f 2").Output() 193 framework.ExpectNoError(err) 194 195 threadsPerCore, err := strconv.Atoi(strings.TrimSpace(string(outData))) 196 framework.ExpectNoError(err) 197 198 return threadsPerCore > 1 199 } 200 201 func isMultiNUMA() bool { 202 outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"NUMA node(s):\" | cut -d \":\" -f 2").Output() 203 framework.ExpectNoError(err) 204 205 numaNodes, err := strconv.Atoi(strings.TrimSpace(string(outData))) 206 framework.ExpectNoError(err) 207 208 return numaNodes > 1 209 } 210 211 func getSMTLevel() int { 212 cpuID := 0 // this is just the most likely cpu to be present in a random system. No special meaning besides this. 213 out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/thread_siblings_list | tr -d \"\n\r\"", cpuID)).Output() 214 framework.ExpectNoError(err) 215 // how many thread sibling you have = SMT level 216 // example: 2-way SMT means 2 threads sibling for each thread 217 cpus, err := cpuset.Parse(strings.TrimSpace(string(out))) 218 framework.ExpectNoError(err) 219 return cpus.Size() 220 } 221 222 func getCPUSiblingList(cpuRes int64) string { 223 out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/thread_siblings_list | tr -d \"\n\r\"", cpuRes)).Output() 224 framework.ExpectNoError(err) 225 return string(out) 226 } 227 228 func getCoreSiblingList(cpuRes int64) string { 229 out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/core_siblings_list | tr -d \"\n\r\"", cpuRes)).Output() 230 framework.ExpectNoError(err) 231 return string(out) 232 } 233 234 type cpuManagerKubeletArguments struct { 235 policyName string 236 enableCPUManagerOptions bool 237 reservedSystemCPUs cpuset.CPUSet 238 options map[string]string 239 } 240 241 func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration { 242 newCfg := oldCfg.DeepCopy() 243 if newCfg.FeatureGates == nil { 244 newCfg.FeatureGates = make(map[string]bool) 245 } 246 247 newCfg.FeatureGates["CPUManagerPolicyOptions"] = kubeletArguments.enableCPUManagerOptions 248 newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = kubeletArguments.enableCPUManagerOptions 249 newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions 250 251 newCfg.CPUManagerPolicy = kubeletArguments.policyName 252 newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} 253 254 if kubeletArguments.options != nil { 255 newCfg.CPUManagerPolicyOptions = kubeletArguments.options 256 } 257 258 if kubeletArguments.reservedSystemCPUs.Size() > 0 { 259 cpus := kubeletArguments.reservedSystemCPUs.String() 260 framework.Logf("configureCPUManagerInKubelet: using reservedSystemCPUs=%q", cpus) 261 newCfg.ReservedSystemCPUs = cpus 262 } else { 263 // The Kubelet panics if either kube-reserved or system-reserved is not set 264 // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that 265 // kubelet doesn't panic. 266 if newCfg.KubeReserved == nil { 267 newCfg.KubeReserved = map[string]string{} 268 } 269 270 if _, ok := newCfg.KubeReserved["cpu"]; !ok { 271 newCfg.KubeReserved["cpu"] = "200m" 272 } 273 } 274 275 return newCfg 276 } 277 278 func runGuPodTest(ctx context.Context, f *framework.Framework, cpuCount int) { 279 var pod *v1.Pod 280 281 ctnAttrs := []ctnAttribute{ 282 { 283 ctnName: "gu-container", 284 cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount), 285 cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount), 286 }, 287 } 288 pod = makeCPUManagerPod("gu-pod", ctnAttrs) 289 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 290 291 ginkgo.By("checking if the expected cpuset was assigned") 292 // any full CPU is fine - we cannot nor we should predict which one, though 293 for _, cnt := range pod.Spec.Containers { 294 ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) 295 296 logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) 297 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) 298 299 framework.Logf("got pod logs: %v", logs) 300 cpus, err := cpuset.Parse(strings.TrimSpace(logs)) 301 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name) 302 303 gomega.Expect(cpus.Size()).To(gomega.Equal(cpuCount), "expected cpu set size == %d, got %q", cpuCount, cpus.String()) 304 } 305 306 ginkgo.By("by deleting the pods and waiting for container removal") 307 deletePods(ctx, f, []string{pod.Name}) 308 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) 309 } 310 311 func runNonGuPodTest(ctx context.Context, f *framework.Framework, cpuCap int64) { 312 var ctnAttrs []ctnAttribute 313 var err error 314 var pod *v1.Pod 315 var expAllowedCPUsListRegex string 316 317 ctnAttrs = []ctnAttribute{ 318 { 319 ctnName: "non-gu-container", 320 cpuRequest: "100m", 321 cpuLimit: "200m", 322 }, 323 } 324 pod = makeCPUManagerPod("non-gu-pod", ctnAttrs) 325 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 326 327 ginkgo.By("checking if the expected cpuset was assigned") 328 expAllowedCPUsListRegex = fmt.Sprintf("^0-%d\n$", cpuCap-1) 329 // on the single CPU node the only possible value is 0 330 if cpuCap == 1 { 331 expAllowedCPUsListRegex = "^0\n$" 332 } 333 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex) 334 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 335 pod.Spec.Containers[0].Name, pod.Name) 336 337 ginkgo.By("by deleting the pods and waiting for container removal") 338 deletePods(ctx, f, []string{pod.Name}) 339 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) 340 } 341 342 func mustParseCPUSet(s string) cpuset.CPUSet { 343 res, err := cpuset.Parse(s) 344 framework.ExpectNoError(err) 345 return res 346 } 347 348 func runMultipleGuNonGuPods(ctx context.Context, f *framework.Framework, cpuCap int64, cpuAlloc int64) { 349 var cpuListString, expAllowedCPUsListRegex string 350 var cpuList []int 351 var cpu1 int 352 var cset cpuset.CPUSet 353 var err error 354 var ctnAttrs []ctnAttribute 355 var pod1, pod2 *v1.Pod 356 357 ctnAttrs = []ctnAttribute{ 358 { 359 ctnName: "gu-container", 360 cpuRequest: "1000m", 361 cpuLimit: "1000m", 362 }, 363 } 364 pod1 = makeCPUManagerPod("gu-pod", ctnAttrs) 365 pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1) 366 367 ctnAttrs = []ctnAttribute{ 368 { 369 ctnName: "non-gu-container", 370 cpuRequest: "200m", 371 cpuLimit: "300m", 372 }, 373 } 374 pod2 = makeCPUManagerPod("non-gu-pod", ctnAttrs) 375 pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2) 376 377 ginkgo.By("checking if the expected cpuset was assigned") 378 cpu1 = 1 379 if isHTEnabled() { 380 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() 381 cpu1 = cpuList[1] 382 } else if isMultiNUMA() { 383 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() 384 if len(cpuList) > 1 { 385 cpu1 = cpuList[1] 386 } 387 } 388 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1) 389 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex) 390 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 391 pod1.Spec.Containers[0].Name, pod1.Name) 392 393 cpuListString = "0" 394 if cpuAlloc > 2 { 395 cset = mustParseCPUSet(fmt.Sprintf("0-%d", cpuCap-1)) 396 cpuListString = fmt.Sprintf("%s", cset.Difference(cpuset.New(cpu1))) 397 } 398 expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString) 399 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex) 400 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 401 pod2.Spec.Containers[0].Name, pod2.Name) 402 ginkgo.By("by deleting the pods and waiting for container removal") 403 deletePods(ctx, f, []string{pod1.Name, pod2.Name}) 404 waitForContainerRemoval(ctx, pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace) 405 waitForContainerRemoval(ctx, pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) 406 } 407 408 func runMultipleCPUGuPod(ctx context.Context, f *framework.Framework) { 409 var cpuListString, expAllowedCPUsListRegex string 410 var cpuList []int 411 var cset cpuset.CPUSet 412 var err error 413 var ctnAttrs []ctnAttribute 414 var pod *v1.Pod 415 416 ctnAttrs = []ctnAttribute{ 417 { 418 ctnName: "gu-container", 419 cpuRequest: "2000m", 420 cpuLimit: "2000m", 421 }, 422 } 423 pod = makeCPUManagerPod("gu-pod", ctnAttrs) 424 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 425 426 ginkgo.By("checking if the expected cpuset was assigned") 427 cpuListString = "1-2" 428 if isMultiNUMA() { 429 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() 430 if len(cpuList) > 1 { 431 cset = mustParseCPUSet(getCPUSiblingList(int64(cpuList[1]))) 432 if !isHTEnabled() && len(cpuList) > 2 { 433 cset = mustParseCPUSet(fmt.Sprintf("%d-%d", cpuList[1], cpuList[2])) 434 } 435 cpuListString = fmt.Sprintf("%s", cset) 436 } 437 } else if isHTEnabled() { 438 cpuListString = "2-3" 439 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() 440 if cpuList[1] != 1 { 441 cset = mustParseCPUSet(getCPUSiblingList(1)) 442 cpuListString = fmt.Sprintf("%s", cset) 443 } 444 } 445 expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString) 446 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex) 447 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 448 pod.Spec.Containers[0].Name, pod.Name) 449 450 ginkgo.By("by deleting the pods and waiting for container removal") 451 deletePods(ctx, f, []string{pod.Name}) 452 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) 453 } 454 455 func runMultipleCPUContainersGuPod(ctx context.Context, f *framework.Framework) { 456 var expAllowedCPUsListRegex string 457 var cpuList []int 458 var cpu1, cpu2 int 459 var err error 460 var ctnAttrs []ctnAttribute 461 var pod *v1.Pod 462 ctnAttrs = []ctnAttribute{ 463 { 464 ctnName: "gu-container1", 465 cpuRequest: "1000m", 466 cpuLimit: "1000m", 467 }, 468 { 469 ctnName: "gu-container2", 470 cpuRequest: "1000m", 471 cpuLimit: "1000m", 472 }, 473 } 474 pod = makeCPUManagerPod("gu-pod", ctnAttrs) 475 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 476 477 ginkgo.By("checking if the expected cpuset was assigned") 478 cpu1, cpu2 = 1, 2 479 if isHTEnabled() { 480 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() 481 if cpuList[1] != 1 { 482 cpu1, cpu2 = cpuList[1], 1 483 } 484 if isMultiNUMA() { 485 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() 486 if len(cpuList) > 1 { 487 cpu2 = cpuList[1] 488 } 489 } 490 } else if isMultiNUMA() { 491 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() 492 if len(cpuList) > 2 { 493 cpu1, cpu2 = cpuList[1], cpuList[2] 494 } 495 } 496 expAllowedCPUsListRegex = fmt.Sprintf("^%d|%d\n$", cpu1, cpu2) 497 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex) 498 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 499 pod.Spec.Containers[0].Name, pod.Name) 500 501 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[1].Name, expAllowedCPUsListRegex) 502 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 503 pod.Spec.Containers[1].Name, pod.Name) 504 505 ginkgo.By("by deleting the pods and waiting for container removal") 506 deletePods(ctx, f, []string{pod.Name}) 507 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) 508 waitForContainerRemoval(ctx, pod.Spec.Containers[1].Name, pod.Name, pod.Namespace) 509 } 510 511 func runMultipleGuPods(ctx context.Context, f *framework.Framework) { 512 var expAllowedCPUsListRegex string 513 var cpuList []int 514 var cpu1, cpu2 int 515 var err error 516 var ctnAttrs []ctnAttribute 517 var pod1, pod2 *v1.Pod 518 519 ctnAttrs = []ctnAttribute{ 520 { 521 ctnName: "gu-container1", 522 cpuRequest: "1000m", 523 cpuLimit: "1000m", 524 }, 525 } 526 pod1 = makeCPUManagerPod("gu-pod1", ctnAttrs) 527 pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1) 528 529 ctnAttrs = []ctnAttribute{ 530 { 531 ctnName: "gu-container2", 532 cpuRequest: "1000m", 533 cpuLimit: "1000m", 534 }, 535 } 536 pod2 = makeCPUManagerPod("gu-pod2", ctnAttrs) 537 pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2) 538 539 ginkgo.By("checking if the expected cpuset was assigned") 540 cpu1, cpu2 = 1, 2 541 if isHTEnabled() { 542 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() 543 if cpuList[1] != 1 { 544 cpu1, cpu2 = cpuList[1], 1 545 } 546 if isMultiNUMA() { 547 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() 548 if len(cpuList) > 1 { 549 cpu2 = cpuList[1] 550 } 551 } 552 } else if isMultiNUMA() { 553 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() 554 if len(cpuList) > 2 { 555 cpu1, cpu2 = cpuList[1], cpuList[2] 556 } 557 } 558 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1) 559 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex) 560 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 561 pod1.Spec.Containers[0].Name, pod1.Name) 562 563 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu2) 564 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex) 565 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 566 pod2.Spec.Containers[0].Name, pod2.Name) 567 ginkgo.By("by deleting the pods and waiting for container removal") 568 deletePods(ctx, f, []string{pod1.Name, pod2.Name}) 569 waitForContainerRemoval(ctx, pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace) 570 waitForContainerRemoval(ctx, pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) 571 } 572 573 func runCPUManagerTests(f *framework.Framework) { 574 var cpuCap, cpuAlloc int64 575 var oldCfg *kubeletconfig.KubeletConfiguration 576 var expAllowedCPUsListRegex string 577 var cpuList []int 578 var cpu1 int 579 var err error 580 var ctnAttrs []ctnAttribute 581 var pod *v1.Pod 582 583 ginkgo.BeforeEach(func(ctx context.Context) { 584 var err error 585 if oldCfg == nil { 586 oldCfg, err = getCurrentKubeletConfig(ctx) 587 framework.ExpectNoError(err) 588 } 589 }) 590 591 ginkgo.It("should assign CPUs as expected based on the Pod spec", func(ctx context.Context) { 592 cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) 593 594 // Skip CPU Manager tests altogether if the CPU capacity < 2. 595 if cpuCap < 2 { 596 e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2") 597 } 598 599 // Enable CPU Manager in the kubelet. 600 newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ 601 policyName: string(cpumanager.PolicyStatic), 602 reservedSystemCPUs: cpuset.CPUSet{}, 603 }) 604 updateKubeletConfig(ctx, f, newCfg, true) 605 606 ginkgo.By("running a non-Gu pod") 607 runNonGuPodTest(ctx, f, cpuCap) 608 609 ginkgo.By("running a Gu pod") 610 runGuPodTest(ctx, f, 1) 611 612 ginkgo.By("running multiple Gu and non-Gu pods") 613 runMultipleGuNonGuPods(ctx, f, cpuCap, cpuAlloc) 614 615 // Skip rest of the tests if CPU capacity < 3. 616 if cpuCap < 3 { 617 e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3") 618 } 619 620 ginkgo.By("running a Gu pod requesting multiple CPUs") 621 runMultipleCPUGuPod(ctx, f) 622 623 ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs") 624 runMultipleCPUContainersGuPod(ctx, f) 625 626 ginkgo.By("running multiple Gu pods") 627 runMultipleGuPods(ctx, f) 628 629 ginkgo.By("test for automatically remove inactive pods from cpumanager state file.") 630 // First running a Gu Pod, 631 // second disable cpu manager in kubelet, 632 // then delete the Gu Pod, 633 // then enable cpu manager in kubelet, 634 // at last wait for the reconcile process cleaned up the state file, if the assignments map is empty, 635 // it proves that the automatic cleanup in the reconcile process is in effect. 636 ginkgo.By("running a Gu pod for test remove") 637 ctnAttrs = []ctnAttribute{ 638 { 639 ctnName: "gu-container-testremove", 640 cpuRequest: "1000m", 641 cpuLimit: "1000m", 642 }, 643 } 644 pod = makeCPUManagerPod("gu-pod-testremove", ctnAttrs) 645 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 646 647 ginkgo.By("checking if the expected cpuset was assigned") 648 cpu1 = 1 649 if isHTEnabled() { 650 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List() 651 cpu1 = cpuList[1] 652 } else if isMultiNUMA() { 653 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List() 654 if len(cpuList) > 1 { 655 cpu1 = cpuList[1] 656 } 657 } 658 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1) 659 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex) 660 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", 661 pod.Spec.Containers[0].Name, pod.Name) 662 663 deletePodSyncByName(ctx, f, pod.Name) 664 // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. 665 // this is in turn needed because we will have an unavoidable (in the current framework) race with the 666 // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire 667 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) 668 }) 669 670 ginkgo.It("should assign CPUs as expected with enhanced policy based on strict SMT alignment", func(ctx context.Context) { 671 fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) 672 _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) 673 smtLevel := getSMTLevel() 674 675 // strict SMT alignment is trivially verified and granted on non-SMT systems 676 if smtLevel < 2 { 677 e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) 678 } 679 680 // our tests want to allocate a full core, so we need at last 2*2=4 virtual cpus 681 if cpuAlloc < int64(smtLevel*2) { 682 e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < 4", fullCPUsOnlyOpt) 683 } 684 685 framework.Logf("SMT level %d", smtLevel) 686 687 // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably 688 // check what we do have in the node. 689 cpuPolicyOptions := map[string]string{ 690 cpumanager.FullPCPUsOnlyOption: "true", 691 } 692 newCfg := configureCPUManagerInKubelet(oldCfg, 693 &cpuManagerKubeletArguments{ 694 policyName: string(cpumanager.PolicyStatic), 695 reservedSystemCPUs: cpuset.New(0), 696 enableCPUManagerOptions: true, 697 options: cpuPolicyOptions, 698 }, 699 ) 700 updateKubeletConfig(ctx, f, newCfg, true) 701 702 // the order between negative and positive doesn't really matter 703 runSMTAlignmentNegativeTests(ctx, f) 704 runSMTAlignmentPositiveTests(ctx, f, smtLevel) 705 }) 706 707 ginkgo.It("should not reuse CPUs of restartable init containers [NodeAlphaFeature:SidecarContainers]", func(ctx context.Context) { 708 cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) 709 710 // Skip rest of the tests if CPU capacity < 3. 711 if cpuCap < 3 { 712 e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3, got %d", cpuCap) 713 } 714 715 // Enable CPU Manager in the kubelet. 716 newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ 717 policyName: string(cpumanager.PolicyStatic), 718 reservedSystemCPUs: cpuset.CPUSet{}, 719 }) 720 updateKubeletConfig(ctx, f, newCfg, true) 721 722 ginkgo.By("running a Gu pod with a regular init container and a restartable init container") 723 ctrAttrs := []ctnAttribute{ 724 { 725 ctnName: "gu-init-container1", 726 cpuRequest: "1000m", 727 cpuLimit: "1000m", 728 }, 729 { 730 ctnName: "gu-restartable-init-container2", 731 cpuRequest: "1000m", 732 cpuLimit: "1000m", 733 restartPolicy: &containerRestartPolicyAlways, 734 }, 735 } 736 pod := makeCPUManagerInitContainersPod("gu-pod", ctrAttrs) 737 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 738 739 ginkgo.By("checking if the expected cpuset was assigned") 740 logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[0].Name) 741 framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name) 742 743 framework.Logf("got pod logs: %v", logs) 744 reusableCPUs, err := cpuset.Parse(strings.TrimSpace(logs)) 745 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name) 746 747 gomega.Expect(reusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", reusableCPUs.String()) 748 749 logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[1].Name) 750 framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[1].Name, pod.Name) 751 752 framework.Logf("got pod logs: %v", logs) 753 nonReusableCPUs, err := cpuset.Parse(strings.TrimSpace(logs)) 754 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", pod.Spec.InitContainers[1].Name, pod.Name) 755 756 gomega.Expect(nonReusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", nonReusableCPUs.String()) 757 758 logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) 759 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod.Spec.Containers[0].Name, pod.Name) 760 761 framework.Logf("got pod logs: %v", logs) 762 cpus, err := cpuset.Parse(strings.TrimSpace(logs)) 763 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", pod.Spec.Containers[0].Name, pod.Name) 764 765 gomega.Expect(cpus.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", cpus.String()) 766 767 gomega.Expect(reusableCPUs.Equals(nonReusableCPUs)).To(gomega.BeTrue(), "expected reusable cpuset [%s] to be equal to non-reusable cpuset [%s]", reusableCPUs.String(), nonReusableCPUs.String()) 768 gomega.Expect(nonReusableCPUs.Intersection(cpus).IsEmpty()).To(gomega.BeTrue(), "expected non-reusable cpuset [%s] to be disjoint from cpuset [%s]", nonReusableCPUs.String(), cpus.String()) 769 770 ginkgo.By("by deleting the pods and waiting for container removal") 771 deletePods(ctx, f, []string{pod.Name}) 772 waitForContainerRemoval(ctx, pod.Spec.InitContainers[0].Name, pod.Name, pod.Namespace) 773 waitForContainerRemoval(ctx, pod.Spec.InitContainers[1].Name, pod.Name, pod.Namespace) 774 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) 775 }) 776 777 ginkgo.AfterEach(func(ctx context.Context) { 778 updateKubeletConfig(ctx, f, oldCfg, true) 779 }) 780 } 781 782 func runSMTAlignmentNegativeTests(ctx context.Context, f *framework.Framework) { 783 // negative test: try to run a container whose requests aren't a multiple of SMT level, expect a rejection 784 ctnAttrs := []ctnAttribute{ 785 { 786 ctnName: "gu-container-neg", 787 cpuRequest: "1000m", 788 cpuLimit: "1000m", 789 }, 790 } 791 pod := makeCPUManagerPod("gu-pod", ctnAttrs) 792 // CreateSync would wait for pod to become Ready - which will never happen if production code works as intended! 793 pod = e2epod.NewPodClient(f).Create(ctx, pod) 794 795 err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) { 796 if pod.Status.Phase != v1.PodPending { 797 return true, nil 798 } 799 return false, nil 800 }) 801 framework.ExpectNoError(err) 802 pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{}) 803 framework.ExpectNoError(err) 804 805 if pod.Status.Phase != v1.PodFailed { 806 framework.Failf("pod %s not failed: %v", pod.Name, pod.Status) 807 } 808 if !isSMTAlignmentError(pod) { 809 framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason) 810 } 811 812 deletePodSyncByName(ctx, f, pod.Name) 813 // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. 814 // this is in turn needed because we will have an unavoidable (in the current framework) race with th 815 // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire 816 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) 817 } 818 819 func runSMTAlignmentPositiveTests(ctx context.Context, f *framework.Framework, smtLevel int) { 820 // positive test: try to run a container whose requests are a multiple of SMT level, check allocated cores 821 // 1. are core siblings 822 // 2. take a full core 823 // WARNING: this assumes 2-way SMT systems - we don't know how to access other SMT levels. 824 // this means on more-than-2-way SMT systems this test will prove nothing 825 ctnAttrs := []ctnAttribute{ 826 { 827 ctnName: "gu-container-pos", 828 cpuRequest: "2000m", 829 cpuLimit: "2000m", 830 }, 831 } 832 pod := makeCPUManagerPod("gu-pod", ctnAttrs) 833 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 834 835 for _, cnt := range pod.Spec.Containers { 836 ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name)) 837 838 logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name) 839 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name) 840 841 framework.Logf("got pod logs: %v", logs) 842 cpus, err := cpuset.Parse(strings.TrimSpace(logs)) 843 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name) 844 845 validateSMTAlignment(cpus, smtLevel, pod, &cnt) 846 } 847 848 deletePodSyncByName(ctx, f, pod.Name) 849 // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state. 850 // this is in turn needed because we will have an unavoidable (in the current framework) race with th 851 // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire 852 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace) 853 } 854 855 func validateSMTAlignment(cpus cpuset.CPUSet, smtLevel int, pod *v1.Pod, cnt *v1.Container) { 856 framework.Logf("validating cpus: %v", cpus) 857 858 if cpus.Size()%smtLevel != 0 { 859 framework.Failf("pod %q cnt %q received non-smt-multiple cpuset %v (SMT level %d)", pod.Name, cnt.Name, cpus, smtLevel) 860 } 861 862 // now check all the given cpus are thread siblings. 863 // to do so the easiest way is to rebuild the expected set of siblings from all the cpus we got. 864 // if the expected set matches the given set, the given set was good. 865 siblingsCPUs := cpuset.New() 866 for _, cpuID := range cpus.UnsortedList() { 867 threadSiblings, err := cpuset.Parse(strings.TrimSpace(getCPUSiblingList(int64(cpuID)))) 868 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name) 869 siblingsCPUs = siblingsCPUs.Union(threadSiblings) 870 } 871 872 framework.Logf("siblings cpus: %v", siblingsCPUs) 873 if !siblingsCPUs.Equals(cpus) { 874 framework.Failf("pod %q cnt %q received non-smt-aligned cpuset %v (expected %v)", pod.Name, cnt.Name, cpus, siblingsCPUs) 875 } 876 } 877 878 func isSMTAlignmentError(pod *v1.Pod) bool { 879 re := regexp.MustCompile(`SMT.*Alignment.*Error`) 880 return re.MatchString(pod.Status.Reason) 881 } 882 883 // Serial because the test updates kubelet configuration. 884 var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() { 885 f := framework.NewDefaultFramework("cpu-manager-test") 886 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 887 888 ginkgo.Context("With kubeconfig updated with static CPU Manager policy run the CPU Manager tests", func() { 889 runCPUManagerTests(f) 890 }) 891 })