k8s.io/kubernetes@v1.29.3/test/e2e_node/hugepages_test.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"os/exec"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  
    28  	"github.com/onsi/ginkgo/v2"
    29  	"github.com/onsi/gomega"
    30  
    31  	v1 "k8s.io/api/core/v1"
    32  	"k8s.io/apimachinery/pkg/api/resource"
    33  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	"k8s.io/apimachinery/pkg/types"
    35  	"k8s.io/apimachinery/pkg/util/uuid"
    36  	"k8s.io/kubernetes/pkg/kubelet/cm"
    37  	"k8s.io/kubernetes/test/e2e/feature"
    38  	"k8s.io/kubernetes/test/e2e/framework"
    39  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    40  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    41  	admissionapi "k8s.io/pod-security-admission/api"
    42  )
    43  
    44  const (
    45  	hugepagesSize2M          = 2048
    46  	hugepagesSize1G          = 1048576
    47  	hugepagesDirPrefix       = "/sys/kernel/mm/hugepages/hugepages"
    48  	hugepagesCapacityFile    = "nr_hugepages"
    49  	hugepagesResourceName2Mi = "hugepages-2Mi"
    50  	hugepagesResourceName1Gi = "hugepages-1Gi"
    51  	hugepagesCgroup2MB       = "hugetlb.2MB"
    52  	hugepagesCgroup1GB       = "hugetlb.1GB"
    53  	mediumHugepages          = "HugePages"
    54  	mediumHugepages2Mi       = "HugePages-2Mi"
    55  	mediumHugepages1Gi       = "HugePages-1Gi"
    56  )
    57  
    58  var (
    59  	resourceToSize = map[string]int{
    60  		hugepagesResourceName2Mi: hugepagesSize2M,
    61  		hugepagesResourceName1Gi: hugepagesSize1G,
    62  	}
    63  	resourceToCgroup = map[string]string{
    64  		hugepagesResourceName2Mi: hugepagesCgroup2MB,
    65  		hugepagesResourceName1Gi: hugepagesCgroup1GB,
    66  	}
    67  )
    68  
    69  // makePodToVerifyHugePages returns a pod that verifies specified cgroup with hugetlb
    70  func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity, hugepagesCgroup string) *v1.Pod {
    71  	// convert the cgroup name to its literal form
    72  	cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName)
    73  	cgroupFsName := ""
    74  	if kubeletCfg.CgroupDriver == "systemd" {
    75  		cgroupFsName = cgroupName.ToSystemd()
    76  	} else {
    77  		cgroupFsName = cgroupName.ToCgroupfs()
    78  	}
    79  
    80  	hugetlbLimitFile := ""
    81  	// this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.<LIMIT>
    82  	if IsCgroup2UnifiedMode() {
    83  		hugetlbLimitFile = fmt.Sprintf("/tmp/%s/%s.max", cgroupFsName, hugepagesCgroup)
    84  	} else {
    85  		hugetlbLimitFile = fmt.Sprintf("/tmp/hugetlb/%s/%s.limit_in_bytes", cgroupFsName, hugepagesCgroup)
    86  	}
    87  
    88  	command := fmt.Sprintf("expected=%v; actual=$(cat %v); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), hugetlbLimitFile)
    89  	framework.Logf("Pod to run command: %v", command)
    90  	pod := &v1.Pod{
    91  		ObjectMeta: metav1.ObjectMeta{
    92  			Name: "pod" + string(uuid.NewUUID()),
    93  		},
    94  		Spec: v1.PodSpec{
    95  			RestartPolicy: v1.RestartPolicyNever,
    96  			Containers: []v1.Container{
    97  				{
    98  					Image:   busyboxImage,
    99  					Name:    "container" + string(uuid.NewUUID()),
   100  					Command: []string{"sh", "-c", command},
   101  					VolumeMounts: []v1.VolumeMount{
   102  						{
   103  							Name:      "sysfscgroup",
   104  							MountPath: "/tmp",
   105  						},
   106  					},
   107  				},
   108  			},
   109  			Volumes: []v1.Volume{
   110  				{
   111  					Name: "sysfscgroup",
   112  					VolumeSource: v1.VolumeSource{
   113  						HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"},
   114  					},
   115  				},
   116  			},
   117  		},
   118  	}
   119  	return pod
   120  }
   121  
   122  // configureHugePages attempts to allocate hugepages of the specified size
   123  func configureHugePages(hugepagesSize int, hugepagesCount int, numaNodeID *int) error {
   124  	// Compact memory to make bigger contiguous blocks of memory available
   125  	// before allocating huge pages.
   126  	// https://www.kernel.org/doc/Documentation/sysctl/vm.txt
   127  	if _, err := os.Stat("/proc/sys/vm/compact_memory"); err == nil {
   128  		if err := exec.Command("/bin/sh", "-c", "echo 1 > /proc/sys/vm/compact_memory").Run(); err != nil {
   129  			return err
   130  		}
   131  	}
   132  
   133  	// e.g. hugepages/hugepages-2048kB/nr_hugepages
   134  	hugepagesSuffix := fmt.Sprintf("hugepages/hugepages-%dkB/%s", hugepagesSize, hugepagesCapacityFile)
   135  
   136  	// e.g. /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
   137  	hugepagesFile := fmt.Sprintf("/sys/kernel/mm/%s", hugepagesSuffix)
   138  	if numaNodeID != nil {
   139  		// e.g. /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
   140  		hugepagesFile = fmt.Sprintf("/sys/devices/system/node/node%d/%s", *numaNodeID, hugepagesSuffix)
   141  	}
   142  
   143  	// Reserve number of hugepages
   144  	// e.g. /bin/sh -c "echo 5 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
   145  	command := fmt.Sprintf("echo %d > %s", hugepagesCount, hugepagesFile)
   146  	if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil {
   147  		return err
   148  	}
   149  
   150  	// verify that the number of hugepages was updated
   151  	// e.g. /bin/sh -c "cat /sys/kernel/mm/hugepages/hugepages-2048kB/vm.nr_hugepages"
   152  	command = fmt.Sprintf("cat %s", hugepagesFile)
   153  	outData, err := exec.Command("/bin/sh", "-c", command).Output()
   154  	if err != nil {
   155  		return err
   156  	}
   157  
   158  	numHugePages, err := strconv.Atoi(strings.TrimSpace(string(outData)))
   159  	if err != nil {
   160  		return err
   161  	}
   162  
   163  	framework.Logf("Hugepages total is set to %v", numHugePages)
   164  	if numHugePages == hugepagesCount {
   165  		return nil
   166  	}
   167  
   168  	return fmt.Errorf("expected hugepages %v, but found %v", hugepagesCount, numHugePages)
   169  }
   170  
   171  // isHugePageAvailable returns true if hugepages of the specified size is available on the host
   172  func isHugePageAvailable(hugepagesSize int) bool {
   173  	path := fmt.Sprintf("%s-%dkB/%s", hugepagesDirPrefix, hugepagesSize, hugepagesCapacityFile)
   174  	if _, err := os.Stat(path); err != nil {
   175  		return false
   176  	}
   177  	return true
   178  }
   179  
   180  func getHugepagesTestPod(f *framework.Framework, limits v1.ResourceList, mounts []v1.VolumeMount, volumes []v1.Volume) *v1.Pod {
   181  	return &v1.Pod{
   182  		ObjectMeta: metav1.ObjectMeta{
   183  			GenerateName: "hugepages-",
   184  			Namespace:    f.Namespace.Name,
   185  		},
   186  		Spec: v1.PodSpec{
   187  			Containers: []v1.Container{
   188  				{
   189  					Name:  "container" + string(uuid.NewUUID()),
   190  					Image: busyboxImage,
   191  					Resources: v1.ResourceRequirements{
   192  						Limits: limits,
   193  					},
   194  					Command:      []string{"sleep", "3600"},
   195  					VolumeMounts: mounts,
   196  				},
   197  			},
   198  			Volumes: volumes,
   199  		},
   200  	}
   201  }
   202  
   203  // Serial because the test updates kubelet configuration.
   204  var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[NodeSpecialFeature:HugePages]", func() {
   205  	f := framework.NewDefaultFramework("hugepages-test")
   206  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
   207  
   208  	ginkgo.It("should remove resources for huge page sizes no longer supported", func(ctx context.Context) {
   209  		ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status")
   210  		patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`)
   211  		result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
   212  		framework.ExpectNoError(result.Error(), "while patching")
   213  
   214  		node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
   215  		framework.ExpectNoError(err, "while getting node status")
   216  
   217  		ginkgo.By("Verifying that the node now supports huge pages with size 3Mi")
   218  		value, ok := node.Status.Capacity["hugepages-3Mi"]
   219  		if !ok {
   220  			framework.Failf("capacity should contain resource hugepages-3Mi: %v", node.Status.Capacity)
   221  		}
   222  		gomega.Expect(value.String()).To(gomega.Equal("9Mi"), "huge pages with size 3Mi should be supported")
   223  
   224  		ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
   225  		restartKubelet(true)
   226  
   227  		ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
   228  		gomega.Eventually(ctx, func() bool {
   229  			node, err = f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
   230  			framework.ExpectNoError(err, "while getting node status")
   231  			_, isPresent := node.Status.Capacity["hugepages-3Mi"]
   232  			return isPresent
   233  		}, 30*time.Second, framework.Poll).Should(gomega.BeFalse())
   234  	})
   235  
   236  	ginkgo.It("should add resources for new huge page sizes on kubelet restart", func(ctx context.Context) {
   237  		ginkgo.By("Stopping kubelet")
   238  		startKubelet := stopKubelet()
   239  		ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
   240  		patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
   241  		result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
   242  		framework.ExpectNoError(result.Error(), "while patching")
   243  
   244  		ginkgo.By("Starting kubelet again")
   245  		startKubelet()
   246  
   247  		ginkgo.By("verifying that the hugepages-2Mi resource is present")
   248  		gomega.Eventually(ctx, func() bool {
   249  			node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
   250  			framework.ExpectNoError(err, "while getting node status")
   251  			_, isPresent := node.Status.Capacity["hugepages-2Mi"]
   252  			return isPresent
   253  		}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
   254  	})
   255  
   256  	ginkgo.When("start the pod", func() {
   257  		var (
   258  			testpod   *v1.Pod
   259  			limits    v1.ResourceList
   260  			mounts    []v1.VolumeMount
   261  			volumes   []v1.Volume
   262  			hugepages map[string]int
   263  		)
   264  
   265  		setHugepages := func(ctx context.Context) {
   266  			for hugepagesResource, count := range hugepages {
   267  				size := resourceToSize[hugepagesResource]
   268  				ginkgo.By(fmt.Sprintf("Verifying hugepages %d are supported", size))
   269  				if !isHugePageAvailable(size) {
   270  					e2eskipper.Skipf("skipping test because hugepages of size %d not supported", size)
   271  					return
   272  				}
   273  
   274  				ginkgo.By(fmt.Sprintf("Configuring the host to reserve %d of pre-allocated hugepages of size %d", count, size))
   275  				gomega.Eventually(ctx, func() error {
   276  					if err := configureHugePages(size, count, nil); err != nil {
   277  						return err
   278  					}
   279  					return nil
   280  				}, 30*time.Second, framework.Poll).Should(gomega.BeNil())
   281  			}
   282  		}
   283  
   284  		waitForHugepages := func(ctx context.Context) {
   285  			ginkgo.By("Waiting for hugepages resource to become available on the local node")
   286  			gomega.Eventually(ctx, func(ctx context.Context) error {
   287  				node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
   288  				if err != nil {
   289  					return err
   290  				}
   291  
   292  				for hugepagesResource, count := range hugepages {
   293  					capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResource)]
   294  					if !ok {
   295  						return fmt.Errorf("the node does not have the resource %s", hugepagesResource)
   296  					}
   297  
   298  					size, succeed := capacity.AsInt64()
   299  					if !succeed {
   300  						return fmt.Errorf("failed to convert quantity to int64")
   301  					}
   302  
   303  					expectedSize := count * resourceToSize[hugepagesResource] * 1024
   304  					if size != int64(expectedSize) {
   305  						return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize)
   306  					}
   307  				}
   308  				return nil
   309  			}, time.Minute, framework.Poll).Should(gomega.BeNil())
   310  		}
   311  
   312  		releaseHugepages := func(ctx context.Context) {
   313  			ginkgo.By("Releasing hugepages")
   314  			gomega.Eventually(ctx, func() error {
   315  				for hugepagesResource := range hugepages {
   316  					command := fmt.Sprintf("echo 0 > %s-%dkB/%s", hugepagesDirPrefix, resourceToSize[hugepagesResource], hugepagesCapacityFile)
   317  					if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil {
   318  						return err
   319  					}
   320  				}
   321  				return nil
   322  			}, 30*time.Second, framework.Poll).Should(gomega.BeNil())
   323  		}
   324  
   325  		runHugePagesTests := func() {
   326  			ginkgo.It("should set correct hugetlb mount and limit under the container cgroup", func(ctx context.Context) {
   327  				ginkgo.By("getting mounts for the test pod")
   328  				command := []string{"mount"}
   329  				out := e2epod.ExecCommandInContainer(f, testpod.Name, testpod.Spec.Containers[0].Name, command...)
   330  
   331  				for _, mount := range mounts {
   332  					ginkgo.By(fmt.Sprintf("checking that the hugetlb mount %s exists under the container", mount.MountPath))
   333  					gomega.Expect(out).To(gomega.ContainSubstring(mount.MountPath))
   334  				}
   335  
   336  				for resourceName := range hugepages {
   337  					verifyPod := makePodToVerifyHugePages(
   338  						"pod"+string(testpod.UID),
   339  						testpod.Spec.Containers[0].Resources.Limits[v1.ResourceName(resourceName)],
   340  						resourceToCgroup[resourceName],
   341  					)
   342  					ginkgo.By("checking if the expected hugetlb settings were applied")
   343  					e2epod.NewPodClient(f).Create(ctx, verifyPod)
   344  					err := e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, verifyPod.Name, f.Namespace.Name)
   345  					framework.ExpectNoError(err)
   346  				}
   347  			})
   348  		}
   349  
   350  		// setup
   351  		ginkgo.JustBeforeEach(func(ctx context.Context) {
   352  			setHugepages(ctx)
   353  
   354  			ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
   355  			restartKubelet(true)
   356  
   357  			waitForHugepages(ctx)
   358  
   359  			pod := getHugepagesTestPod(f, limits, mounts, volumes)
   360  
   361  			ginkgo.By("by running a test pod that requests hugepages")
   362  			testpod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
   363  		})
   364  
   365  		// we should use JustAfterEach because framework will teardown the client under the AfterEach method
   366  		ginkgo.JustAfterEach(func(ctx context.Context) {
   367  			ginkgo.By(fmt.Sprintf("deleting test pod %s", testpod.Name))
   368  			e2epod.NewPodClient(f).DeleteSync(ctx, testpod.Name, metav1.DeleteOptions{}, 2*time.Minute)
   369  
   370  			releaseHugepages(ctx)
   371  
   372  			ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
   373  			restartKubelet(true)
   374  
   375  			waitForHugepages(ctx)
   376  		})
   377  
   378  		ginkgo.Context("with the resources requests that contain only one hugepages resource ", func() {
   379  			ginkgo.Context("with the backward compatible API", func() {
   380  				ginkgo.BeforeEach(func() {
   381  					limits = v1.ResourceList{
   382  						v1.ResourceCPU:           resource.MustParse("10m"),
   383  						v1.ResourceMemory:        resource.MustParse("100Mi"),
   384  						hugepagesResourceName2Mi: resource.MustParse("6Mi"),
   385  					}
   386  					mounts = []v1.VolumeMount{
   387  						{
   388  							Name:      "hugepages",
   389  							MountPath: "/hugepages",
   390  						},
   391  					}
   392  					volumes = []v1.Volume{
   393  						{
   394  							Name: "hugepages",
   395  							VolumeSource: v1.VolumeSource{
   396  								EmptyDir: &v1.EmptyDirVolumeSource{
   397  									Medium: mediumHugepages,
   398  								},
   399  							},
   400  						},
   401  					}
   402  					hugepages = map[string]int{hugepagesResourceName2Mi: 5}
   403  				})
   404  				// run tests
   405  				runHugePagesTests()
   406  			})
   407  
   408  			ginkgo.Context("with the new API", func() {
   409  				ginkgo.BeforeEach(func() {
   410  					limits = v1.ResourceList{
   411  						v1.ResourceCPU:           resource.MustParse("10m"),
   412  						v1.ResourceMemory:        resource.MustParse("100Mi"),
   413  						hugepagesResourceName2Mi: resource.MustParse("6Mi"),
   414  					}
   415  					mounts = []v1.VolumeMount{
   416  						{
   417  							Name:      "hugepages-2mi",
   418  							MountPath: "/hugepages-2Mi",
   419  						},
   420  					}
   421  					volumes = []v1.Volume{
   422  						{
   423  							Name: "hugepages-2mi",
   424  							VolumeSource: v1.VolumeSource{
   425  								EmptyDir: &v1.EmptyDirVolumeSource{
   426  									Medium: mediumHugepages2Mi,
   427  								},
   428  							},
   429  						},
   430  					}
   431  					hugepages = map[string]int{hugepagesResourceName2Mi: 5}
   432  				})
   433  
   434  				runHugePagesTests()
   435  			})
   436  
   437  			ginkgo.JustAfterEach(func() {
   438  				hugepages = map[string]int{hugepagesResourceName2Mi: 0}
   439  			})
   440  		})
   441  
   442  		ginkgo.Context("with the resources requests that contain multiple hugepages resources ", func() {
   443  			ginkgo.BeforeEach(func() {
   444  				hugepages = map[string]int{
   445  					hugepagesResourceName2Mi: 5,
   446  					hugepagesResourceName1Gi: 1,
   447  				}
   448  				limits = v1.ResourceList{
   449  					v1.ResourceCPU:           resource.MustParse("10m"),
   450  					v1.ResourceMemory:        resource.MustParse("100Mi"),
   451  					hugepagesResourceName2Mi: resource.MustParse("6Mi"),
   452  					hugepagesResourceName1Gi: resource.MustParse("1Gi"),
   453  				}
   454  				mounts = []v1.VolumeMount{
   455  					{
   456  						Name:      "hugepages-2mi",
   457  						MountPath: "/hugepages-2Mi",
   458  					},
   459  					{
   460  						Name:      "hugepages-1gi",
   461  						MountPath: "/hugepages-1Gi",
   462  					},
   463  				}
   464  				volumes = []v1.Volume{
   465  					{
   466  						Name: "hugepages-2mi",
   467  						VolumeSource: v1.VolumeSource{
   468  							EmptyDir: &v1.EmptyDirVolumeSource{
   469  								Medium: mediumHugepages2Mi,
   470  							},
   471  						},
   472  					},
   473  					{
   474  						Name: "hugepages-1gi",
   475  						VolumeSource: v1.VolumeSource{
   476  							EmptyDir: &v1.EmptyDirVolumeSource{
   477  								Medium: mediumHugepages1Gi,
   478  							},
   479  						},
   480  					},
   481  				}
   482  			})
   483  
   484  			runHugePagesTests()
   485  
   486  			ginkgo.JustAfterEach(func() {
   487  				hugepages = map[string]int{
   488  					hugepagesResourceName2Mi: 0,
   489  					hugepagesResourceName1Gi: 0,
   490  				}
   491  			})
   492  		})
   493  	})
   494  })