k8s.io/kubernetes@v1.29.3/test/e2e_node/node_container_manager_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2017 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os"
    26  	"path/filepath"
    27  	"strconv"
    28  	"strings"
    29  	"time"
    30  
    31  	v1 "k8s.io/api/core/v1"
    32  	"k8s.io/apimachinery/pkg/api/resource"
    33  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    35  	"k8s.io/kubernetes/pkg/kubelet/cm"
    36  	"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
    37  	admissionapi "k8s.io/pod-security-admission/api"
    38  
    39  	"k8s.io/kubernetes/test/e2e/framework"
    40  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    41  	"k8s.io/kubernetes/test/e2e/nodefeature"
    42  	e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig"
    43  
    44  	"github.com/onsi/ginkgo/v2"
    45  	"github.com/onsi/gomega"
    46  )
    47  
    48  func setDesiredConfiguration(initialConfig *kubeletconfig.KubeletConfiguration) {
    49  	initialConfig.EnforceNodeAllocatable = []string{"pods", kubeReservedCgroup, systemReservedCgroup}
    50  	initialConfig.SystemReserved = map[string]string{
    51  		string(v1.ResourceCPU):    "100m",
    52  		string(v1.ResourceMemory): "100Mi",
    53  		string(pidlimit.PIDs):     "1000",
    54  	}
    55  	initialConfig.KubeReserved = map[string]string{
    56  		string(v1.ResourceCPU):    "100m",
    57  		string(v1.ResourceMemory): "100Mi",
    58  		string(pidlimit.PIDs):     "738",
    59  	}
    60  	initialConfig.EvictionHard = map[string]string{"memory.available": "100Mi"}
    61  	// Necessary for allocatable cgroup creation.
    62  	initialConfig.CgroupsPerQOS = true
    63  	initialConfig.KubeReservedCgroup = kubeReservedCgroup
    64  	initialConfig.SystemReservedCgroup = systemReservedCgroup
    65  }
    66  
    67  var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
    68  	f := framework.NewDefaultFramework("node-container-manager")
    69  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    70  	f.Describe("Validate Node Allocatable", nodefeature.NodeAllocatable, func() {
    71  		ginkgo.It("sets up the node and runs the test", func(ctx context.Context) {
    72  			framework.ExpectNoError(runTest(ctx, f))
    73  		})
    74  	})
    75  })
    76  
    77  func expectFileValToEqual(filePath string, expectedValue, delta int64) error {
    78  	out, err := os.ReadFile(filePath)
    79  	if err != nil {
    80  		return fmt.Errorf("failed to read file %q", filePath)
    81  	}
    82  	actual, err := strconv.ParseInt(strings.TrimSpace(string(out)), 10, 64)
    83  	if err != nil {
    84  		return fmt.Errorf("failed to parse output %v", err)
    85  	}
    86  
    87  	// Ensure that values are within a delta range to work around rounding errors.
    88  	if (actual < (expectedValue - delta)) || (actual > (expectedValue + delta)) {
    89  		return fmt.Errorf("Expected value at %q to be between %d and %d. Got %d", filePath, (expectedValue - delta), (expectedValue + delta), actual)
    90  	}
    91  	return nil
    92  }
    93  
    94  func getAllocatableLimits(cpu, memory, pids string, capacity v1.ResourceList) (*resource.Quantity, *resource.Quantity, *resource.Quantity) {
    95  	var allocatableCPU, allocatableMemory, allocatablePIDs *resource.Quantity
    96  	// Total cpu reservation is 200m.
    97  	for k, v := range capacity {
    98  		if k == v1.ResourceCPU {
    99  			c := v.DeepCopy()
   100  			allocatableCPU = &c
   101  			allocatableCPU.Sub(resource.MustParse(cpu))
   102  		}
   103  		if k == v1.ResourceMemory {
   104  			c := v.DeepCopy()
   105  			allocatableMemory = &c
   106  			allocatableMemory.Sub(resource.MustParse(memory))
   107  		}
   108  	}
   109  	// Process IDs are not a node allocatable, so we have to do this ad hoc
   110  	pidlimits, err := pidlimit.Stats()
   111  	if err == nil && pidlimits != nil && pidlimits.MaxPID != nil {
   112  		allocatablePIDs = resource.NewQuantity(int64(*pidlimits.MaxPID), resource.DecimalSI)
   113  		allocatablePIDs.Sub(resource.MustParse(pids))
   114  	}
   115  	return allocatableCPU, allocatableMemory, allocatablePIDs
   116  }
   117  
   118  const (
   119  	kubeReservedCgroup   = "kube-reserved"
   120  	systemReservedCgroup = "system-reserved"
   121  )
   122  
   123  func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error {
   124  	if !cm.Exists(cgroupConfig.Name) {
   125  		if err := cm.Create(cgroupConfig); err != nil {
   126  			return err
   127  		}
   128  	}
   129  	return nil
   130  }
   131  
   132  func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
   133  	// Create kube reserved cgroup
   134  	cgroupConfig := &cm.CgroupConfig{
   135  		Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup),
   136  	}
   137  	if err := createIfNotExists(cgroupManager, cgroupConfig); err != nil {
   138  		return err
   139  	}
   140  	// Create system reserved cgroup
   141  	cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup)
   142  
   143  	return createIfNotExists(cgroupManager, cgroupConfig)
   144  }
   145  
   146  func destroyTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
   147  	// Create kube reserved cgroup
   148  	cgroupConfig := &cm.CgroupConfig{
   149  		Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup),
   150  	}
   151  	if err := cgroupManager.Destroy(cgroupConfig); err != nil {
   152  		return err
   153  	}
   154  	cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup)
   155  	return cgroupManager.Destroy(cgroupConfig)
   156  }
   157  
   158  // convertSharesToWeight converts from cgroup v1 cpu.shares to cgroup v2 cpu.weight
   159  func convertSharesToWeight(shares int64) int64 {
   160  	return 1 + ((shares-2)*9999)/262142
   161  }
   162  
   163  func runTest(ctx context.Context, f *framework.Framework) error {
   164  	var oldCfg *kubeletconfig.KubeletConfiguration
   165  	subsystems, err := cm.GetCgroupSubsystems()
   166  	if err != nil {
   167  		return err
   168  	}
   169  	// Get current kubelet configuration
   170  	oldCfg, err = getCurrentKubeletConfig(ctx)
   171  	if err != nil {
   172  		return err
   173  	}
   174  
   175  	// Test needs to be updated to make it run properly on systemd.
   176  	// In its current state it will result in kubelet error since
   177  	// kubeReservedCgroup and systemReservedCgroup are not configured
   178  	// correctly for systemd.
   179  	// See: https://github.com/kubernetes/kubernetes/issues/102394
   180  	if oldCfg.CgroupDriver == "systemd" {
   181  		e2eskipper.Skipf("unable to run test when using systemd as cgroup driver")
   182  	}
   183  
   184  	// Create a cgroup manager object for manipulating cgroups.
   185  	cgroupManager := cm.NewCgroupManager(subsystems, oldCfg.CgroupDriver)
   186  
   187  	ginkgo.DeferCleanup(destroyTemporaryCgroupsForReservation, cgroupManager)
   188  	ginkgo.DeferCleanup(func(ctx context.Context) {
   189  		if oldCfg != nil {
   190  			// Update the Kubelet configuration.
   191  			ginkgo.By("Stopping the kubelet")
   192  			startKubelet := stopKubelet()
   193  
   194  			// wait until the kubelet health check will fail
   195  			gomega.Eventually(ctx, func() bool {
   196  				return kubeletHealthCheck(kubeletHealthCheckURL)
   197  			}, time.Minute, time.Second).Should(gomega.BeFalse())
   198  
   199  			framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
   200  
   201  			ginkgo.By("Starting the kubelet")
   202  			startKubelet()
   203  
   204  			// wait until the kubelet health check will succeed
   205  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   206  				return kubeletHealthCheck(kubeletHealthCheckURL)
   207  			}, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue())
   208  		}
   209  	})
   210  	if err := createTemporaryCgroupsForReservation(cgroupManager); err != nil {
   211  		return err
   212  	}
   213  	newCfg := oldCfg.DeepCopy()
   214  	// Change existing kubelet configuration
   215  	setDesiredConfiguration(newCfg)
   216  	// Set the new kubelet configuration.
   217  	// Update the Kubelet configuration.
   218  	ginkgo.By("Stopping the kubelet")
   219  	startKubelet := stopKubelet()
   220  
   221  	// wait until the kubelet health check will fail
   222  	gomega.Eventually(ctx, func() bool {
   223  		return kubeletHealthCheck(kubeletHealthCheckURL)
   224  	}, time.Minute, time.Second).Should(gomega.BeFalse())
   225  
   226  	framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
   227  
   228  	ginkgo.By("Starting the kubelet")
   229  	startKubelet()
   230  
   231  	// wait until the kubelet health check will succeed
   232  	gomega.Eventually(ctx, func() bool {
   233  		return kubeletHealthCheck(kubeletHealthCheckURL)
   234  	}, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue())
   235  
   236  	if err != nil {
   237  		return err
   238  	}
   239  	// Set new config and current config.
   240  	currentConfig := newCfg
   241  
   242  	expectedNAPodCgroup := cm.ParseCgroupfsToCgroupName(currentConfig.CgroupRoot)
   243  	expectedNAPodCgroup = cm.NewCgroupName(expectedNAPodCgroup, "kubepods")
   244  	if !cgroupManager.Exists(expectedNAPodCgroup) {
   245  		return fmt.Errorf("Expected Node Allocatable Cgroup %q does not exist", expectedNAPodCgroup)
   246  	}
   247  
   248  	memoryLimitFile := "memory.limit_in_bytes"
   249  	if IsCgroup2UnifiedMode() {
   250  		memoryLimitFile = "memory.max"
   251  	}
   252  
   253  	// TODO: Update cgroupManager to expose a Status interface to get current Cgroup Settings.
   254  	// The node may not have updated capacity and allocatable yet, so check that it happens eventually.
   255  	gomega.Eventually(ctx, func(ctx context.Context) error {
   256  		nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
   257  		if err != nil {
   258  			return err
   259  		}
   260  		if len(nodeList.Items) != 1 {
   261  			return fmt.Errorf("Unexpected number of node objects for node e2e. Expects only one node: %+v", nodeList)
   262  		}
   263  		cgroupName := "kubepods"
   264  		if currentConfig.CgroupDriver == "systemd" {
   265  			cgroupName = "kubepods.slice"
   266  		}
   267  
   268  		node := nodeList.Items[0]
   269  		capacity := node.Status.Capacity
   270  		allocatableCPU, allocatableMemory, allocatablePIDs := getAllocatableLimits("200m", "200Mi", "1738", capacity)
   271  		// Total Memory reservation is 200Mi excluding eviction thresholds.
   272  		// Expect CPU shares on node allocatable cgroup to equal allocatable.
   273  		shares := int64(cm.MilliCPUToShares(allocatableCPU.MilliValue()))
   274  		if IsCgroup2UnifiedMode() {
   275  			// convert to the cgroup v2 cpu.weight value
   276  			if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupName, "cpu.weight"), convertSharesToWeight(shares), 10); err != nil {
   277  				return err
   278  			}
   279  		} else {
   280  			if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupName, "cpu.shares"), shares, 10); err != nil {
   281  				return err
   282  			}
   283  		}
   284  		// Expect Memory limit on node allocatable cgroup to equal allocatable.
   285  		if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupName, memoryLimitFile), allocatableMemory.Value(), 0); err != nil {
   286  			return err
   287  		}
   288  		// Expect PID limit on node allocatable cgroup to equal allocatable.
   289  		if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupName, "pids.max"), allocatablePIDs.Value(), 0); err != nil {
   290  			return err
   291  		}
   292  
   293  		// Check that Allocatable reported to scheduler includes eviction thresholds.
   294  		schedulerAllocatable := node.Status.Allocatable
   295  		// Memory allocatable should take into account eviction thresholds.
   296  		// Process IDs are not a scheduler resource and as such cannot be tested here.
   297  		allocatableCPU, allocatableMemory, _ = getAllocatableLimits("200m", "300Mi", "1738", capacity)
   298  		// Expect allocatable to include all resources in capacity.
   299  		if len(schedulerAllocatable) != len(capacity) {
   300  			return fmt.Errorf("Expected all resources in capacity to be found in allocatable")
   301  		}
   302  		// CPU based evictions are not supported.
   303  		if allocatableCPU.Cmp(schedulerAllocatable[v1.ResourceCPU]) != 0 {
   304  			return fmt.Errorf("Unexpected cpu allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableCPU, schedulerAllocatable[v1.ResourceCPU], capacity[v1.ResourceCPU])
   305  		}
   306  		if allocatableMemory.Cmp(schedulerAllocatable[v1.ResourceMemory]) != 0 {
   307  			return fmt.Errorf("Unexpected memory allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableMemory, schedulerAllocatable[v1.ResourceMemory], capacity[v1.ResourceMemory])
   308  		}
   309  		return nil
   310  	}, time.Minute, 5*time.Second).Should(gomega.Succeed())
   311  
   312  	cgroupPath := ""
   313  	if currentConfig.CgroupDriver == "systemd" {
   314  		cgroupPath = cm.ParseSystemdToCgroupName(kubeReservedCgroup).ToSystemd()
   315  	} else {
   316  		cgroupPath = cgroupManager.Name(cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup))
   317  	}
   318  	// Expect CPU shares on kube reserved cgroup to equal it's reservation which is `100m`.
   319  	kubeReservedCPU := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceCPU)])
   320  	shares := int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue()))
   321  	if IsCgroup2UnifiedMode() {
   322  		if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.weight"), convertSharesToWeight(shares), 10); err != nil {
   323  			return err
   324  		}
   325  	} else {
   326  		if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.shares"), shares, 10); err != nil {
   327  			return err
   328  		}
   329  	}
   330  	// Expect Memory limit kube reserved cgroup to equal configured value `100Mi`.
   331  	kubeReservedMemory := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceMemory)])
   332  	if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupPath, memoryLimitFile), kubeReservedMemory.Value(), 0); err != nil {
   333  		return err
   334  	}
   335  	// Expect process ID limit kube reserved cgroup to equal configured value `738`.
   336  	kubeReservedPIDs := resource.MustParse(currentConfig.KubeReserved[string(pidlimit.PIDs)])
   337  	if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupPath, "pids.max"), kubeReservedPIDs.Value(), 0); err != nil {
   338  		return err
   339  	}
   340  
   341  	if currentConfig.CgroupDriver == "systemd" {
   342  		cgroupPath = cm.ParseSystemdToCgroupName(systemReservedCgroup).ToSystemd()
   343  	} else {
   344  		cgroupPath = cgroupManager.Name(cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup))
   345  	}
   346  
   347  	// Expect CPU shares on system reserved cgroup to equal it's reservation which is `100m`.
   348  	systemReservedCPU := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceCPU)])
   349  	shares = int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue()))
   350  	if IsCgroup2UnifiedMode() {
   351  		if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.weight"), convertSharesToWeight(shares), 10); err != nil {
   352  			return err
   353  		}
   354  	} else {
   355  		if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.shares"), shares, 10); err != nil {
   356  			return err
   357  		}
   358  	}
   359  	// Expect Memory limit on node allocatable cgroup to equal allocatable.
   360  	systemReservedMemory := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceMemory)])
   361  	if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupPath, memoryLimitFile), systemReservedMemory.Value(), 0); err != nil {
   362  		return err
   363  	}
   364  	// Expect process ID limit system reserved cgroup to equal configured value `1000`.
   365  	systemReservedPIDs := resource.MustParse(currentConfig.SystemReserved[string(pidlimit.PIDs)])
   366  	if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupPath, "pids.max"), systemReservedPIDs.Value(), 0); err != nil {
   367  		return err
   368  	}
   369  	return nil
   370  }