k8s.io/kubernetes@v1.29.3/test/e2e_node/hugepages_test.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "os/exec" 24 "strconv" 25 "strings" 26 "time" 27 28 "github.com/onsi/ginkgo/v2" 29 "github.com/onsi/gomega" 30 31 v1 "k8s.io/api/core/v1" 32 "k8s.io/apimachinery/pkg/api/resource" 33 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 34 "k8s.io/apimachinery/pkg/types" 35 "k8s.io/apimachinery/pkg/util/uuid" 36 "k8s.io/kubernetes/pkg/kubelet/cm" 37 "k8s.io/kubernetes/test/e2e/feature" 38 "k8s.io/kubernetes/test/e2e/framework" 39 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 40 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 41 admissionapi "k8s.io/pod-security-admission/api" 42 ) 43 44 const ( 45 hugepagesSize2M = 2048 46 hugepagesSize1G = 1048576 47 hugepagesDirPrefix = "/sys/kernel/mm/hugepages/hugepages" 48 hugepagesCapacityFile = "nr_hugepages" 49 hugepagesResourceName2Mi = "hugepages-2Mi" 50 hugepagesResourceName1Gi = "hugepages-1Gi" 51 hugepagesCgroup2MB = "hugetlb.2MB" 52 hugepagesCgroup1GB = "hugetlb.1GB" 53 mediumHugepages = "HugePages" 54 mediumHugepages2Mi = "HugePages-2Mi" 55 mediumHugepages1Gi = "HugePages-1Gi" 56 ) 57 58 var ( 59 resourceToSize = map[string]int{ 60 hugepagesResourceName2Mi: hugepagesSize2M, 61 hugepagesResourceName1Gi: hugepagesSize1G, 62 } 63 resourceToCgroup = map[string]string{ 64 hugepagesResourceName2Mi: hugepagesCgroup2MB, 65 hugepagesResourceName1Gi: hugepagesCgroup1GB, 66 } 67 ) 68 69 // makePodToVerifyHugePages returns a pod that verifies specified cgroup with hugetlb 70 func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity, hugepagesCgroup string) *v1.Pod { 71 // convert the cgroup name to its literal form 72 cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName) 73 cgroupFsName := "" 74 if kubeletCfg.CgroupDriver == "systemd" { 75 cgroupFsName = cgroupName.ToSystemd() 76 } else { 77 cgroupFsName = cgroupName.ToCgroupfs() 78 } 79 80 hugetlbLimitFile := "" 81 // this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.<LIMIT> 82 if IsCgroup2UnifiedMode() { 83 hugetlbLimitFile = fmt.Sprintf("/tmp/%s/%s.max", cgroupFsName, hugepagesCgroup) 84 } else { 85 hugetlbLimitFile = fmt.Sprintf("/tmp/hugetlb/%s/%s.limit_in_bytes", cgroupFsName, hugepagesCgroup) 86 } 87 88 command := fmt.Sprintf("expected=%v; actual=$(cat %v); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), hugetlbLimitFile) 89 framework.Logf("Pod to run command: %v", command) 90 pod := &v1.Pod{ 91 ObjectMeta: metav1.ObjectMeta{ 92 Name: "pod" + string(uuid.NewUUID()), 93 }, 94 Spec: v1.PodSpec{ 95 RestartPolicy: v1.RestartPolicyNever, 96 Containers: []v1.Container{ 97 { 98 Image: busyboxImage, 99 Name: "container" + string(uuid.NewUUID()), 100 Command: []string{"sh", "-c", command}, 101 VolumeMounts: []v1.VolumeMount{ 102 { 103 Name: "sysfscgroup", 104 MountPath: "/tmp", 105 }, 106 }, 107 }, 108 }, 109 Volumes: []v1.Volume{ 110 { 111 Name: "sysfscgroup", 112 VolumeSource: v1.VolumeSource{ 113 HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"}, 114 }, 115 }, 116 }, 117 }, 118 } 119 return pod 120 } 121 122 // configureHugePages attempts to allocate hugepages of the specified size 123 func configureHugePages(hugepagesSize int, hugepagesCount int, numaNodeID *int) error { 124 // Compact memory to make bigger contiguous blocks of memory available 125 // before allocating huge pages. 126 // https://www.kernel.org/doc/Documentation/sysctl/vm.txt 127 if _, err := os.Stat("/proc/sys/vm/compact_memory"); err == nil { 128 if err := exec.Command("/bin/sh", "-c", "echo 1 > /proc/sys/vm/compact_memory").Run(); err != nil { 129 return err 130 } 131 } 132 133 // e.g. hugepages/hugepages-2048kB/nr_hugepages 134 hugepagesSuffix := fmt.Sprintf("hugepages/hugepages-%dkB/%s", hugepagesSize, hugepagesCapacityFile) 135 136 // e.g. /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages 137 hugepagesFile := fmt.Sprintf("/sys/kernel/mm/%s", hugepagesSuffix) 138 if numaNodeID != nil { 139 // e.g. /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages 140 hugepagesFile = fmt.Sprintf("/sys/devices/system/node/node%d/%s", *numaNodeID, hugepagesSuffix) 141 } 142 143 // Reserve number of hugepages 144 // e.g. /bin/sh -c "echo 5 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" 145 command := fmt.Sprintf("echo %d > %s", hugepagesCount, hugepagesFile) 146 if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil { 147 return err 148 } 149 150 // verify that the number of hugepages was updated 151 // e.g. /bin/sh -c "cat /sys/kernel/mm/hugepages/hugepages-2048kB/vm.nr_hugepages" 152 command = fmt.Sprintf("cat %s", hugepagesFile) 153 outData, err := exec.Command("/bin/sh", "-c", command).Output() 154 if err != nil { 155 return err 156 } 157 158 numHugePages, err := strconv.Atoi(strings.TrimSpace(string(outData))) 159 if err != nil { 160 return err 161 } 162 163 framework.Logf("Hugepages total is set to %v", numHugePages) 164 if numHugePages == hugepagesCount { 165 return nil 166 } 167 168 return fmt.Errorf("expected hugepages %v, but found %v", hugepagesCount, numHugePages) 169 } 170 171 // isHugePageAvailable returns true if hugepages of the specified size is available on the host 172 func isHugePageAvailable(hugepagesSize int) bool { 173 path := fmt.Sprintf("%s-%dkB/%s", hugepagesDirPrefix, hugepagesSize, hugepagesCapacityFile) 174 if _, err := os.Stat(path); err != nil { 175 return false 176 } 177 return true 178 } 179 180 func getHugepagesTestPod(f *framework.Framework, limits v1.ResourceList, mounts []v1.VolumeMount, volumes []v1.Volume) *v1.Pod { 181 return &v1.Pod{ 182 ObjectMeta: metav1.ObjectMeta{ 183 GenerateName: "hugepages-", 184 Namespace: f.Namespace.Name, 185 }, 186 Spec: v1.PodSpec{ 187 Containers: []v1.Container{ 188 { 189 Name: "container" + string(uuid.NewUUID()), 190 Image: busyboxImage, 191 Resources: v1.ResourceRequirements{ 192 Limits: limits, 193 }, 194 Command: []string{"sleep", "3600"}, 195 VolumeMounts: mounts, 196 }, 197 }, 198 Volumes: volumes, 199 }, 200 } 201 } 202 203 // Serial because the test updates kubelet configuration. 204 var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[NodeSpecialFeature:HugePages]", func() { 205 f := framework.NewDefaultFramework("hugepages-test") 206 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 207 208 ginkgo.It("should remove resources for huge page sizes no longer supported", func(ctx context.Context) { 209 ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status") 210 patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`) 211 result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx) 212 framework.ExpectNoError(result.Error(), "while patching") 213 214 node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) 215 framework.ExpectNoError(err, "while getting node status") 216 217 ginkgo.By("Verifying that the node now supports huge pages with size 3Mi") 218 value, ok := node.Status.Capacity["hugepages-3Mi"] 219 if !ok { 220 framework.Failf("capacity should contain resource hugepages-3Mi: %v", node.Status.Capacity) 221 } 222 gomega.Expect(value.String()).To(gomega.Equal("9Mi"), "huge pages with size 3Mi should be supported") 223 224 ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported") 225 restartKubelet(true) 226 227 ginkgo.By("verifying that the hugepages-3Mi resource no longer is present") 228 gomega.Eventually(ctx, func() bool { 229 node, err = f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) 230 framework.ExpectNoError(err, "while getting node status") 231 _, isPresent := node.Status.Capacity["hugepages-3Mi"] 232 return isPresent 233 }, 30*time.Second, framework.Poll).Should(gomega.BeFalse()) 234 }) 235 236 ginkgo.It("should add resources for new huge page sizes on kubelet restart", func(ctx context.Context) { 237 ginkgo.By("Stopping kubelet") 238 startKubelet := stopKubelet() 239 ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`) 240 patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`) 241 result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx) 242 framework.ExpectNoError(result.Error(), "while patching") 243 244 ginkgo.By("Starting kubelet again") 245 startKubelet() 246 247 ginkgo.By("verifying that the hugepages-2Mi resource is present") 248 gomega.Eventually(ctx, func() bool { 249 node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) 250 framework.ExpectNoError(err, "while getting node status") 251 _, isPresent := node.Status.Capacity["hugepages-2Mi"] 252 return isPresent 253 }, 30*time.Second, framework.Poll).Should(gomega.BeTrue()) 254 }) 255 256 ginkgo.When("start the pod", func() { 257 var ( 258 testpod *v1.Pod 259 limits v1.ResourceList 260 mounts []v1.VolumeMount 261 volumes []v1.Volume 262 hugepages map[string]int 263 ) 264 265 setHugepages := func(ctx context.Context) { 266 for hugepagesResource, count := range hugepages { 267 size := resourceToSize[hugepagesResource] 268 ginkgo.By(fmt.Sprintf("Verifying hugepages %d are supported", size)) 269 if !isHugePageAvailable(size) { 270 e2eskipper.Skipf("skipping test because hugepages of size %d not supported", size) 271 return 272 } 273 274 ginkgo.By(fmt.Sprintf("Configuring the host to reserve %d of pre-allocated hugepages of size %d", count, size)) 275 gomega.Eventually(ctx, func() error { 276 if err := configureHugePages(size, count, nil); err != nil { 277 return err 278 } 279 return nil 280 }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) 281 } 282 } 283 284 waitForHugepages := func(ctx context.Context) { 285 ginkgo.By("Waiting for hugepages resource to become available on the local node") 286 gomega.Eventually(ctx, func(ctx context.Context) error { 287 node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) 288 if err != nil { 289 return err 290 } 291 292 for hugepagesResource, count := range hugepages { 293 capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResource)] 294 if !ok { 295 return fmt.Errorf("the node does not have the resource %s", hugepagesResource) 296 } 297 298 size, succeed := capacity.AsInt64() 299 if !succeed { 300 return fmt.Errorf("failed to convert quantity to int64") 301 } 302 303 expectedSize := count * resourceToSize[hugepagesResource] * 1024 304 if size != int64(expectedSize) { 305 return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize) 306 } 307 } 308 return nil 309 }, time.Minute, framework.Poll).Should(gomega.BeNil()) 310 } 311 312 releaseHugepages := func(ctx context.Context) { 313 ginkgo.By("Releasing hugepages") 314 gomega.Eventually(ctx, func() error { 315 for hugepagesResource := range hugepages { 316 command := fmt.Sprintf("echo 0 > %s-%dkB/%s", hugepagesDirPrefix, resourceToSize[hugepagesResource], hugepagesCapacityFile) 317 if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil { 318 return err 319 } 320 } 321 return nil 322 }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) 323 } 324 325 runHugePagesTests := func() { 326 ginkgo.It("should set correct hugetlb mount and limit under the container cgroup", func(ctx context.Context) { 327 ginkgo.By("getting mounts for the test pod") 328 command := []string{"mount"} 329 out := e2epod.ExecCommandInContainer(f, testpod.Name, testpod.Spec.Containers[0].Name, command...) 330 331 for _, mount := range mounts { 332 ginkgo.By(fmt.Sprintf("checking that the hugetlb mount %s exists under the container", mount.MountPath)) 333 gomega.Expect(out).To(gomega.ContainSubstring(mount.MountPath)) 334 } 335 336 for resourceName := range hugepages { 337 verifyPod := makePodToVerifyHugePages( 338 "pod"+string(testpod.UID), 339 testpod.Spec.Containers[0].Resources.Limits[v1.ResourceName(resourceName)], 340 resourceToCgroup[resourceName], 341 ) 342 ginkgo.By("checking if the expected hugetlb settings were applied") 343 e2epod.NewPodClient(f).Create(ctx, verifyPod) 344 err := e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, verifyPod.Name, f.Namespace.Name) 345 framework.ExpectNoError(err) 346 } 347 }) 348 } 349 350 // setup 351 ginkgo.JustBeforeEach(func(ctx context.Context) { 352 setHugepages(ctx) 353 354 ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") 355 restartKubelet(true) 356 357 waitForHugepages(ctx) 358 359 pod := getHugepagesTestPod(f, limits, mounts, volumes) 360 361 ginkgo.By("by running a test pod that requests hugepages") 362 testpod = e2epod.NewPodClient(f).CreateSync(ctx, pod) 363 }) 364 365 // we should use JustAfterEach because framework will teardown the client under the AfterEach method 366 ginkgo.JustAfterEach(func(ctx context.Context) { 367 ginkgo.By(fmt.Sprintf("deleting test pod %s", testpod.Name)) 368 e2epod.NewPodClient(f).DeleteSync(ctx, testpod.Name, metav1.DeleteOptions{}, 2*time.Minute) 369 370 releaseHugepages(ctx) 371 372 ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") 373 restartKubelet(true) 374 375 waitForHugepages(ctx) 376 }) 377 378 ginkgo.Context("with the resources requests that contain only one hugepages resource ", func() { 379 ginkgo.Context("with the backward compatible API", func() { 380 ginkgo.BeforeEach(func() { 381 limits = v1.ResourceList{ 382 v1.ResourceCPU: resource.MustParse("10m"), 383 v1.ResourceMemory: resource.MustParse("100Mi"), 384 hugepagesResourceName2Mi: resource.MustParse("6Mi"), 385 } 386 mounts = []v1.VolumeMount{ 387 { 388 Name: "hugepages", 389 MountPath: "/hugepages", 390 }, 391 } 392 volumes = []v1.Volume{ 393 { 394 Name: "hugepages", 395 VolumeSource: v1.VolumeSource{ 396 EmptyDir: &v1.EmptyDirVolumeSource{ 397 Medium: mediumHugepages, 398 }, 399 }, 400 }, 401 } 402 hugepages = map[string]int{hugepagesResourceName2Mi: 5} 403 }) 404 // run tests 405 runHugePagesTests() 406 }) 407 408 ginkgo.Context("with the new API", func() { 409 ginkgo.BeforeEach(func() { 410 limits = v1.ResourceList{ 411 v1.ResourceCPU: resource.MustParse("10m"), 412 v1.ResourceMemory: resource.MustParse("100Mi"), 413 hugepagesResourceName2Mi: resource.MustParse("6Mi"), 414 } 415 mounts = []v1.VolumeMount{ 416 { 417 Name: "hugepages-2mi", 418 MountPath: "/hugepages-2Mi", 419 }, 420 } 421 volumes = []v1.Volume{ 422 { 423 Name: "hugepages-2mi", 424 VolumeSource: v1.VolumeSource{ 425 EmptyDir: &v1.EmptyDirVolumeSource{ 426 Medium: mediumHugepages2Mi, 427 }, 428 }, 429 }, 430 } 431 hugepages = map[string]int{hugepagesResourceName2Mi: 5} 432 }) 433 434 runHugePagesTests() 435 }) 436 437 ginkgo.JustAfterEach(func() { 438 hugepages = map[string]int{hugepagesResourceName2Mi: 0} 439 }) 440 }) 441 442 ginkgo.Context("with the resources requests that contain multiple hugepages resources ", func() { 443 ginkgo.BeforeEach(func() { 444 hugepages = map[string]int{ 445 hugepagesResourceName2Mi: 5, 446 hugepagesResourceName1Gi: 1, 447 } 448 limits = v1.ResourceList{ 449 v1.ResourceCPU: resource.MustParse("10m"), 450 v1.ResourceMemory: resource.MustParse("100Mi"), 451 hugepagesResourceName2Mi: resource.MustParse("6Mi"), 452 hugepagesResourceName1Gi: resource.MustParse("1Gi"), 453 } 454 mounts = []v1.VolumeMount{ 455 { 456 Name: "hugepages-2mi", 457 MountPath: "/hugepages-2Mi", 458 }, 459 { 460 Name: "hugepages-1gi", 461 MountPath: "/hugepages-1Gi", 462 }, 463 } 464 volumes = []v1.Volume{ 465 { 466 Name: "hugepages-2mi", 467 VolumeSource: v1.VolumeSource{ 468 EmptyDir: &v1.EmptyDirVolumeSource{ 469 Medium: mediumHugepages2Mi, 470 }, 471 }, 472 }, 473 { 474 Name: "hugepages-1gi", 475 VolumeSource: v1.VolumeSource{ 476 EmptyDir: &v1.EmptyDirVolumeSource{ 477 Medium: mediumHugepages1Gi, 478 }, 479 }, 480 }, 481 } 482 }) 483 484 runHugePagesTests() 485 486 ginkgo.JustAfterEach(func() { 487 hugepages = map[string]int{ 488 hugepagesResourceName2Mi: 0, 489 hugepagesResourceName1Gi: 0, 490 } 491 }) 492 }) 493 }) 494 })