k8s.io/kubernetes@v1.29.3/test/e2e_node/node_container_manager_test.go (about) 1 //go:build linux 2 // +build linux 3 4 /* 5 Copyright 2017 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package e2enode 21 22 import ( 23 "context" 24 "fmt" 25 "os" 26 "path/filepath" 27 "strconv" 28 "strings" 29 "time" 30 31 v1 "k8s.io/api/core/v1" 32 "k8s.io/apimachinery/pkg/api/resource" 33 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 34 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 35 "k8s.io/kubernetes/pkg/kubelet/cm" 36 "k8s.io/kubernetes/pkg/kubelet/stats/pidlimit" 37 admissionapi "k8s.io/pod-security-admission/api" 38 39 "k8s.io/kubernetes/test/e2e/framework" 40 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 41 "k8s.io/kubernetes/test/e2e/nodefeature" 42 e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" 43 44 "github.com/onsi/ginkgo/v2" 45 "github.com/onsi/gomega" 46 ) 47 48 func setDesiredConfiguration(initialConfig *kubeletconfig.KubeletConfiguration) { 49 initialConfig.EnforceNodeAllocatable = []string{"pods", kubeReservedCgroup, systemReservedCgroup} 50 initialConfig.SystemReserved = map[string]string{ 51 string(v1.ResourceCPU): "100m", 52 string(v1.ResourceMemory): "100Mi", 53 string(pidlimit.PIDs): "1000", 54 } 55 initialConfig.KubeReserved = map[string]string{ 56 string(v1.ResourceCPU): "100m", 57 string(v1.ResourceMemory): "100Mi", 58 string(pidlimit.PIDs): "738", 59 } 60 initialConfig.EvictionHard = map[string]string{"memory.available": "100Mi"} 61 // Necessary for allocatable cgroup creation. 62 initialConfig.CgroupsPerQOS = true 63 initialConfig.KubeReservedCgroup = kubeReservedCgroup 64 initialConfig.SystemReservedCgroup = systemReservedCgroup 65 } 66 67 var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() { 68 f := framework.NewDefaultFramework("node-container-manager") 69 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 70 f.Describe("Validate Node Allocatable", nodefeature.NodeAllocatable, func() { 71 ginkgo.It("sets up the node and runs the test", func(ctx context.Context) { 72 framework.ExpectNoError(runTest(ctx, f)) 73 }) 74 }) 75 }) 76 77 func expectFileValToEqual(filePath string, expectedValue, delta int64) error { 78 out, err := os.ReadFile(filePath) 79 if err != nil { 80 return fmt.Errorf("failed to read file %q", filePath) 81 } 82 actual, err := strconv.ParseInt(strings.TrimSpace(string(out)), 10, 64) 83 if err != nil { 84 return fmt.Errorf("failed to parse output %v", err) 85 } 86 87 // Ensure that values are within a delta range to work around rounding errors. 88 if (actual < (expectedValue - delta)) || (actual > (expectedValue + delta)) { 89 return fmt.Errorf("Expected value at %q to be between %d and %d. Got %d", filePath, (expectedValue - delta), (expectedValue + delta), actual) 90 } 91 return nil 92 } 93 94 func getAllocatableLimits(cpu, memory, pids string, capacity v1.ResourceList) (*resource.Quantity, *resource.Quantity, *resource.Quantity) { 95 var allocatableCPU, allocatableMemory, allocatablePIDs *resource.Quantity 96 // Total cpu reservation is 200m. 97 for k, v := range capacity { 98 if k == v1.ResourceCPU { 99 c := v.DeepCopy() 100 allocatableCPU = &c 101 allocatableCPU.Sub(resource.MustParse(cpu)) 102 } 103 if k == v1.ResourceMemory { 104 c := v.DeepCopy() 105 allocatableMemory = &c 106 allocatableMemory.Sub(resource.MustParse(memory)) 107 } 108 } 109 // Process IDs are not a node allocatable, so we have to do this ad hoc 110 pidlimits, err := pidlimit.Stats() 111 if err == nil && pidlimits != nil && pidlimits.MaxPID != nil { 112 allocatablePIDs = resource.NewQuantity(int64(*pidlimits.MaxPID), resource.DecimalSI) 113 allocatablePIDs.Sub(resource.MustParse(pids)) 114 } 115 return allocatableCPU, allocatableMemory, allocatablePIDs 116 } 117 118 const ( 119 kubeReservedCgroup = "kube-reserved" 120 systemReservedCgroup = "system-reserved" 121 ) 122 123 func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error { 124 if !cm.Exists(cgroupConfig.Name) { 125 if err := cm.Create(cgroupConfig); err != nil { 126 return err 127 } 128 } 129 return nil 130 } 131 132 func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error { 133 // Create kube reserved cgroup 134 cgroupConfig := &cm.CgroupConfig{ 135 Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup), 136 } 137 if err := createIfNotExists(cgroupManager, cgroupConfig); err != nil { 138 return err 139 } 140 // Create system reserved cgroup 141 cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) 142 143 return createIfNotExists(cgroupManager, cgroupConfig) 144 } 145 146 func destroyTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error { 147 // Create kube reserved cgroup 148 cgroupConfig := &cm.CgroupConfig{ 149 Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup), 150 } 151 if err := cgroupManager.Destroy(cgroupConfig); err != nil { 152 return err 153 } 154 cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) 155 return cgroupManager.Destroy(cgroupConfig) 156 } 157 158 // convertSharesToWeight converts from cgroup v1 cpu.shares to cgroup v2 cpu.weight 159 func convertSharesToWeight(shares int64) int64 { 160 return 1 + ((shares-2)*9999)/262142 161 } 162 163 func runTest(ctx context.Context, f *framework.Framework) error { 164 var oldCfg *kubeletconfig.KubeletConfiguration 165 subsystems, err := cm.GetCgroupSubsystems() 166 if err != nil { 167 return err 168 } 169 // Get current kubelet configuration 170 oldCfg, err = getCurrentKubeletConfig(ctx) 171 if err != nil { 172 return err 173 } 174 175 // Test needs to be updated to make it run properly on systemd. 176 // In its current state it will result in kubelet error since 177 // kubeReservedCgroup and systemReservedCgroup are not configured 178 // correctly for systemd. 179 // See: https://github.com/kubernetes/kubernetes/issues/102394 180 if oldCfg.CgroupDriver == "systemd" { 181 e2eskipper.Skipf("unable to run test when using systemd as cgroup driver") 182 } 183 184 // Create a cgroup manager object for manipulating cgroups. 185 cgroupManager := cm.NewCgroupManager(subsystems, oldCfg.CgroupDriver) 186 187 ginkgo.DeferCleanup(destroyTemporaryCgroupsForReservation, cgroupManager) 188 ginkgo.DeferCleanup(func(ctx context.Context) { 189 if oldCfg != nil { 190 // Update the Kubelet configuration. 191 ginkgo.By("Stopping the kubelet") 192 startKubelet := stopKubelet() 193 194 // wait until the kubelet health check will fail 195 gomega.Eventually(ctx, func() bool { 196 return kubeletHealthCheck(kubeletHealthCheckURL) 197 }, time.Minute, time.Second).Should(gomega.BeFalse()) 198 199 framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg)) 200 201 ginkgo.By("Starting the kubelet") 202 startKubelet() 203 204 // wait until the kubelet health check will succeed 205 gomega.Eventually(ctx, func(ctx context.Context) bool { 206 return kubeletHealthCheck(kubeletHealthCheckURL) 207 }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) 208 } 209 }) 210 if err := createTemporaryCgroupsForReservation(cgroupManager); err != nil { 211 return err 212 } 213 newCfg := oldCfg.DeepCopy() 214 // Change existing kubelet configuration 215 setDesiredConfiguration(newCfg) 216 // Set the new kubelet configuration. 217 // Update the Kubelet configuration. 218 ginkgo.By("Stopping the kubelet") 219 startKubelet := stopKubelet() 220 221 // wait until the kubelet health check will fail 222 gomega.Eventually(ctx, func() bool { 223 return kubeletHealthCheck(kubeletHealthCheckURL) 224 }, time.Minute, time.Second).Should(gomega.BeFalse()) 225 226 framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) 227 228 ginkgo.By("Starting the kubelet") 229 startKubelet() 230 231 // wait until the kubelet health check will succeed 232 gomega.Eventually(ctx, func() bool { 233 return kubeletHealthCheck(kubeletHealthCheckURL) 234 }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) 235 236 if err != nil { 237 return err 238 } 239 // Set new config and current config. 240 currentConfig := newCfg 241 242 expectedNAPodCgroup := cm.ParseCgroupfsToCgroupName(currentConfig.CgroupRoot) 243 expectedNAPodCgroup = cm.NewCgroupName(expectedNAPodCgroup, "kubepods") 244 if !cgroupManager.Exists(expectedNAPodCgroup) { 245 return fmt.Errorf("Expected Node Allocatable Cgroup %q does not exist", expectedNAPodCgroup) 246 } 247 248 memoryLimitFile := "memory.limit_in_bytes" 249 if IsCgroup2UnifiedMode() { 250 memoryLimitFile = "memory.max" 251 } 252 253 // TODO: Update cgroupManager to expose a Status interface to get current Cgroup Settings. 254 // The node may not have updated capacity and allocatable yet, so check that it happens eventually. 255 gomega.Eventually(ctx, func(ctx context.Context) error { 256 nodeList, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 257 if err != nil { 258 return err 259 } 260 if len(nodeList.Items) != 1 { 261 return fmt.Errorf("Unexpected number of node objects for node e2e. Expects only one node: %+v", nodeList) 262 } 263 cgroupName := "kubepods" 264 if currentConfig.CgroupDriver == "systemd" { 265 cgroupName = "kubepods.slice" 266 } 267 268 node := nodeList.Items[0] 269 capacity := node.Status.Capacity 270 allocatableCPU, allocatableMemory, allocatablePIDs := getAllocatableLimits("200m", "200Mi", "1738", capacity) 271 // Total Memory reservation is 200Mi excluding eviction thresholds. 272 // Expect CPU shares on node allocatable cgroup to equal allocatable. 273 shares := int64(cm.MilliCPUToShares(allocatableCPU.MilliValue())) 274 if IsCgroup2UnifiedMode() { 275 // convert to the cgroup v2 cpu.weight value 276 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupName, "cpu.weight"), convertSharesToWeight(shares), 10); err != nil { 277 return err 278 } 279 } else { 280 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupName, "cpu.shares"), shares, 10); err != nil { 281 return err 282 } 283 } 284 // Expect Memory limit on node allocatable cgroup to equal allocatable. 285 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupName, memoryLimitFile), allocatableMemory.Value(), 0); err != nil { 286 return err 287 } 288 // Expect PID limit on node allocatable cgroup to equal allocatable. 289 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupName, "pids.max"), allocatablePIDs.Value(), 0); err != nil { 290 return err 291 } 292 293 // Check that Allocatable reported to scheduler includes eviction thresholds. 294 schedulerAllocatable := node.Status.Allocatable 295 // Memory allocatable should take into account eviction thresholds. 296 // Process IDs are not a scheduler resource and as such cannot be tested here. 297 allocatableCPU, allocatableMemory, _ = getAllocatableLimits("200m", "300Mi", "1738", capacity) 298 // Expect allocatable to include all resources in capacity. 299 if len(schedulerAllocatable) != len(capacity) { 300 return fmt.Errorf("Expected all resources in capacity to be found in allocatable") 301 } 302 // CPU based evictions are not supported. 303 if allocatableCPU.Cmp(schedulerAllocatable[v1.ResourceCPU]) != 0 { 304 return fmt.Errorf("Unexpected cpu allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableCPU, schedulerAllocatable[v1.ResourceCPU], capacity[v1.ResourceCPU]) 305 } 306 if allocatableMemory.Cmp(schedulerAllocatable[v1.ResourceMemory]) != 0 { 307 return fmt.Errorf("Unexpected memory allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableMemory, schedulerAllocatable[v1.ResourceMemory], capacity[v1.ResourceMemory]) 308 } 309 return nil 310 }, time.Minute, 5*time.Second).Should(gomega.Succeed()) 311 312 cgroupPath := "" 313 if currentConfig.CgroupDriver == "systemd" { 314 cgroupPath = cm.ParseSystemdToCgroupName(kubeReservedCgroup).ToSystemd() 315 } else { 316 cgroupPath = cgroupManager.Name(cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup)) 317 } 318 // Expect CPU shares on kube reserved cgroup to equal it's reservation which is `100m`. 319 kubeReservedCPU := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceCPU)]) 320 shares := int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue())) 321 if IsCgroup2UnifiedMode() { 322 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.weight"), convertSharesToWeight(shares), 10); err != nil { 323 return err 324 } 325 } else { 326 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.shares"), shares, 10); err != nil { 327 return err 328 } 329 } 330 // Expect Memory limit kube reserved cgroup to equal configured value `100Mi`. 331 kubeReservedMemory := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceMemory)]) 332 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupPath, memoryLimitFile), kubeReservedMemory.Value(), 0); err != nil { 333 return err 334 } 335 // Expect process ID limit kube reserved cgroup to equal configured value `738`. 336 kubeReservedPIDs := resource.MustParse(currentConfig.KubeReserved[string(pidlimit.PIDs)]) 337 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupPath, "pids.max"), kubeReservedPIDs.Value(), 0); err != nil { 338 return err 339 } 340 341 if currentConfig.CgroupDriver == "systemd" { 342 cgroupPath = cm.ParseSystemdToCgroupName(systemReservedCgroup).ToSystemd() 343 } else { 344 cgroupPath = cgroupManager.Name(cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup)) 345 } 346 347 // Expect CPU shares on system reserved cgroup to equal it's reservation which is `100m`. 348 systemReservedCPU := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceCPU)]) 349 shares = int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue())) 350 if IsCgroup2UnifiedMode() { 351 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.weight"), convertSharesToWeight(shares), 10); err != nil { 352 return err 353 } 354 } else { 355 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupPath, "cpu.shares"), shares, 10); err != nil { 356 return err 357 } 358 } 359 // Expect Memory limit on node allocatable cgroup to equal allocatable. 360 systemReservedMemory := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceMemory)]) 361 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupPath, memoryLimitFile), systemReservedMemory.Value(), 0); err != nil { 362 return err 363 } 364 // Expect process ID limit system reserved cgroup to equal configured value `1000`. 365 systemReservedPIDs := resource.MustParse(currentConfig.SystemReserved[string(pidlimit.PIDs)]) 366 if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["pids"], cgroupPath, "pids.max"), systemReservedPIDs.Value(), 0); err != nil { 367 return err 368 } 369 return nil 370 }