k8s.io/kubernetes@v1.29.3/test/e2e_node/eviction_test.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "fmt" 22 "path/filepath" 23 "strconv" 24 "strings" 25 "time" 26 27 v1 "k8s.io/api/core/v1" 28 schedulingv1 "k8s.io/api/scheduling/v1" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 "k8s.io/apimachinery/pkg/api/resource" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/fields" 33 kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 34 "k8s.io/kubernetes/pkg/features" 35 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 36 "k8s.io/kubernetes/pkg/kubelet/eviction" 37 evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" 38 kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" 39 kubetypes "k8s.io/kubernetes/pkg/kubelet/types" 40 "k8s.io/kubernetes/test/e2e/feature" 41 "k8s.io/kubernetes/test/e2e/framework" 42 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 43 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 44 "k8s.io/kubernetes/test/e2e/nodefeature" 45 testutils "k8s.io/kubernetes/test/utils" 46 imageutils "k8s.io/kubernetes/test/utils/image" 47 admissionapi "k8s.io/pod-security-admission/api" 48 49 "github.com/onsi/ginkgo/v2" 50 "github.com/onsi/gomega" 51 ) 52 53 // Eviction Policy is described here: 54 // https://github.com/kubernetes/design-proposals-archive/blob/main/node/kubelet-eviction.md 55 56 const ( 57 postTestConditionMonitoringPeriod = 1 * time.Minute 58 evictionPollInterval = 2 * time.Second 59 pressureDisappearTimeout = 10 * time.Minute 60 // pressure conditions often surface after evictions because the kubelet only updates 61 // node conditions periodically. 62 // we wait this period after evictions to make sure that we wait out this delay 63 pressureDelay = 20 * time.Second 64 testContextFmt = "when we run containers that should cause %s" 65 noPressure = v1.NodeConditionType("NoPressure") 66 lotsOfDisk = 10240 // 10 Gb in Mb 67 lotsOfFiles = 1000000000 // 1 billion 68 resourceInodes = v1.ResourceName("inodes") 69 noStarvedResource = v1.ResourceName("none") 70 ) 71 72 // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods. 73 // Node disk pressure is induced by consuming all inodes on the node. 74 var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 75 f := framework.NewDefaultFramework("inode-eviction-test") 76 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 77 expectedNodeCondition := v1.NodeDiskPressure 78 expectedStarvedResource := resourceInodes 79 pressureTimeout := 15 * time.Minute 80 inodesConsumed := uint64(200000) 81 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 82 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 83 // Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction. 84 summary := eventuallyGetSummary(ctx) 85 inodesFree := *summary.Node.Fs.InodesFree 86 if inodesFree <= inodesConsumed { 87 e2eskipper.Skipf("Too few inodes free on the host for the InodeEviction test to run") 88 } 89 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)} 90 initialConfig.EvictionMinimumReclaim = map[string]string{} 91 }) 92 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{ 93 { 94 evictionPriority: 1, 95 pod: inodeConsumingPod("container-inode-hog", lotsOfFiles, nil), 96 }, 97 { 98 evictionPriority: 1, 99 pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}), 100 }, 101 { 102 evictionPriority: 0, 103 pod: innocentPod(), 104 }, 105 }) 106 }) 107 }) 108 109 // ImageGCNoEviction tests that the node does not evict pods when inodes are consumed by images 110 // Disk pressure is induced by pulling large images 111 var _ = SIGDescribe("ImageGCNoEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 112 f := framework.NewDefaultFramework("image-gc-eviction-test") 113 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 114 pressureTimeout := 10 * time.Minute 115 expectedNodeCondition := v1.NodeDiskPressure 116 expectedStarvedResource := resourceInodes 117 inodesConsumed := uint64(100000) 118 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 119 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 120 // Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction. 121 summary := eventuallyGetSummary(ctx) 122 inodesFree := *summary.Node.Fs.InodesFree 123 if inodesFree <= inodesConsumed { 124 e2eskipper.Skipf("Too few inodes free on the host for the InodeEviction test to run") 125 } 126 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)} 127 initialConfig.EvictionMinimumReclaim = map[string]string{} 128 }) 129 // Consume enough inodes to induce disk pressure, 130 // but expect that image garbage collection can reduce it enough to avoid an eviction 131 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ 132 { 133 evictionPriority: 0, 134 pod: inodeConsumingPod("container-inode", 110000, nil), 135 }, 136 }) 137 }) 138 }) 139 140 // MemoryAllocatableEviction tests that the node responds to node memory pressure by evicting only responsible pods. 141 // Node memory pressure is only encountered because we reserve the majority of the node's capacity via kube-reserved. 142 var _ = SIGDescribe("MemoryAllocatableEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 143 f := framework.NewDefaultFramework("memory-allocatable-eviction-test") 144 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 145 expectedNodeCondition := v1.NodeMemoryPressure 146 expectedStarvedResource := v1.ResourceMemory 147 pressureTimeout := 10 * time.Minute 148 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 149 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 150 // Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds. 151 kubeReserved := getNodeCPUAndMemoryCapacity(ctx, f)[v1.ResourceMemory] 152 // The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb 153 // We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb 154 kubeReserved.Sub(resource.MustParse("300Mi")) 155 initialConfig.KubeReserved = map[string]string{ 156 string(v1.ResourceMemory): kubeReserved.String(), 157 } 158 initialConfig.EnforceNodeAllocatable = []string{kubetypes.NodeAllocatableEnforcementKey} 159 initialConfig.CgroupsPerQOS = true 160 }) 161 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, []podEvictSpec{ 162 { 163 evictionPriority: 1, 164 pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}), 165 }, 166 { 167 evictionPriority: 0, 168 pod: innocentPod(), 169 }, 170 }) 171 }) 172 }) 173 174 // LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods 175 // Disk pressure is induced by running pods which consume disk space. 176 var _ = SIGDescribe("LocalStorageEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 177 f := framework.NewDefaultFramework("localstorage-eviction-test") 178 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 179 pressureTimeout := 15 * time.Minute 180 expectedNodeCondition := v1.NodeDiskPressure 181 expectedStarvedResource := v1.ResourceEphemeralStorage 182 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 183 184 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 185 summary := eventuallyGetSummary(ctx) 186 187 diskConsumedByTest := resource.MustParse("4Gi") 188 availableBytesOnSystem := *(summary.Node.Fs.AvailableBytes) 189 evictionThreshold := strconv.FormatUint(availableBytesOnSystem-uint64(diskConsumedByTest.Value()), 10) 190 191 if availableBytesOnSystem <= uint64(diskConsumedByTest.Value()) { 192 e2eskipper.Skipf("Too little disk free on the host for the LocalStorageEviction test to run") 193 } 194 195 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): evictionThreshold} 196 initialConfig.EvictionMinimumReclaim = map[string]string{} 197 }) 198 199 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ 200 { 201 evictionPriority: 1, 202 pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}), 203 }, 204 { 205 evictionPriority: 0, 206 pod: innocentPod(), 207 }, 208 }) 209 }) 210 }) 211 212 // LocalStorageEviction tests that the node responds to node disk pressure by evicting only responsible pods 213 // Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold. 214 // Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run. 215 var _ = SIGDescribe("LocalStorageSoftEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 216 f := framework.NewDefaultFramework("localstorage-eviction-test") 217 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 218 pressureTimeout := 10 * time.Minute 219 expectedNodeCondition := v1.NodeDiskPressure 220 expectedStarvedResource := v1.ResourceEphemeralStorage 221 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 222 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 223 diskConsumed := resource.MustParse("4Gi") 224 summary := eventuallyGetSummary(ctx) 225 availableBytes := *(summary.Node.Fs.AvailableBytes) 226 if availableBytes <= uint64(diskConsumed.Value()) { 227 e2eskipper.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run") 228 } 229 initialConfig.EvictionSoft = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))} 230 initialConfig.EvictionSoftGracePeriod = map[string]string{string(evictionapi.SignalNodeFsAvailable): "1m"} 231 // Defer to the pod default grace period 232 initialConfig.EvictionMaxPodGracePeriod = 30 233 initialConfig.EvictionMinimumReclaim = map[string]string{} 234 // Ensure that pods are not evicted because of the eviction-hard threshold 235 // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty) 236 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"} 237 }) 238 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ 239 { 240 evictionPriority: 1, 241 pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}), 242 }, 243 { 244 evictionPriority: 0, 245 pod: innocentPod(), 246 }, 247 }) 248 }) 249 }) 250 251 // This test validates that in-memory EmptyDir's are evicted when the Kubelet does 252 // not have Sized Memory Volumes enabled. When Sized volumes are enabled, it's 253 // not possible to exhaust the quota. 254 var _ = SIGDescribe("LocalStorageCapacityIsolationMemoryBackedVolumeEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.LocalStorageCapacityIsolation, nodefeature.Eviction, func() { 255 f := framework.NewDefaultFramework("localstorage-eviction-test") 256 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 257 evictionTestTimeout := 7 * time.Minute 258 ginkgo.Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() { 259 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 260 // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty) 261 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"} 262 if initialConfig.FeatureGates == nil { 263 initialConfig.FeatureGates = make(map[string]bool) 264 } 265 initialConfig.FeatureGates["SizeMemoryBackedVolumes"] = false 266 }) 267 268 sizeLimit := resource.MustParse("100Mi") 269 useOverLimit := 200 /* Mb */ 270 useUnderLimit := 80 /* Mb */ 271 containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit} 272 273 runEvictionTest(f, evictionTestTimeout, noPressure, noStarvedResource, logDiskMetrics, []podEvictSpec{ 274 { 275 evictionPriority: 1, // Should be evicted due to disk limit 276 pod: diskConsumingPod("emptydir-memory-over-volume-sizelimit", useOverLimit, &v1.VolumeSource{ 277 EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sizeLimit}, 278 }, v1.ResourceRequirements{}), 279 }, 280 { 281 evictionPriority: 0, // Should not be evicted, as container limits do not account for memory backed volumes 282 pod: diskConsumingPod("emptydir-memory-over-container-sizelimit", useOverLimit, &v1.VolumeSource{ 283 EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory"}, 284 }, v1.ResourceRequirements{Limits: containerLimit}), 285 }, 286 { 287 evictionPriority: 0, 288 pod: diskConsumingPod("emptydir-memory-innocent", useUnderLimit, &v1.VolumeSource{ 289 EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sizeLimit}, 290 }, v1.ResourceRequirements{}), 291 }, 292 }) 293 }) 294 }) 295 296 // LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions 297 var _ = SIGDescribe("LocalStorageCapacityIsolationEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.LocalStorageCapacityIsolation, nodefeature.Eviction, func() { 298 f := framework.NewDefaultFramework("localstorage-eviction-test") 299 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 300 evictionTestTimeout := 10 * time.Minute 301 ginkgo.Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() { 302 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 303 // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty) 304 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"} 305 }) 306 sizeLimit := resource.MustParse("100Mi") 307 useOverLimit := 101 /* Mb */ 308 useUnderLimit := 99 /* Mb */ 309 containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit} 310 311 runEvictionTest(f, evictionTestTimeout, noPressure, noStarvedResource, logDiskMetrics, []podEvictSpec{ 312 { 313 evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation 314 pod: diskConsumingPod("emptydir-disk-sizelimit", useOverLimit, &v1.VolumeSource{ 315 EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit}, 316 }, v1.ResourceRequirements{}), 317 }, 318 { 319 evictionPriority: 1, // This pod should cross the container limit by writing to its writable layer. 320 pod: diskConsumingPod("container-disk-limit", useOverLimit, nil, v1.ResourceRequirements{Limits: containerLimit}), 321 }, 322 { 323 evictionPriority: 1, // This pod should hit the container limit by writing to an emptydir 324 pod: diskConsumingPod("container-emptydir-disk-limit", useOverLimit, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}, 325 v1.ResourceRequirements{Limits: containerLimit}), 326 }, 327 { 328 evictionPriority: 0, // This pod should not be evicted because MemoryBackedVolumes cannot use more space than is allocated to them since SizeMemoryBackedVolumes was enabled 329 pod: diskConsumingPod("emptydir-memory-sizelimit", useOverLimit, &v1.VolumeSource{ 330 EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sizeLimit}, 331 }, v1.ResourceRequirements{}), 332 }, 333 { 334 evictionPriority: 0, // This pod should not be evicted because it uses less than its limit 335 pod: diskConsumingPod("emptydir-disk-below-sizelimit", useUnderLimit, &v1.VolumeSource{ 336 EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit}, 337 }, v1.ResourceRequirements{}), 338 }, 339 { 340 evictionPriority: 0, // This pod should not be evicted because it uses less than its limit 341 pod: diskConsumingPod("container-disk-below-sizelimit", useUnderLimit, nil, v1.ResourceRequirements{Limits: containerLimit}), 342 }, 343 }) 344 }) 345 }) 346 347 // PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods. 348 // This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before 349 // the higher priority pod. 350 var _ = SIGDescribe("PriorityMemoryEvictionOrdering", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 351 f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test") 352 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 353 expectedNodeCondition := v1.NodeMemoryPressure 354 expectedStarvedResource := v1.ResourceMemory 355 pressureTimeout := 10 * time.Minute 356 357 highPriorityClassName := f.BaseName + "-high-priority" 358 highPriority := int32(999999999) 359 360 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 361 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 362 memoryConsumed := resource.MustParse("600Mi") 363 summary := eventuallyGetSummary(ctx) 364 availableBytes := *(summary.Node.Memory.AvailableBytes) 365 if availableBytes <= uint64(memoryConsumed.Value()) { 366 e2eskipper.Skipf("Too little memory free on the host for the PriorityMemoryEvictionOrdering test to run") 367 } 368 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): fmt.Sprintf("%d", availableBytes-uint64(memoryConsumed.Value()))} 369 initialConfig.EvictionMinimumReclaim = map[string]string{} 370 }) 371 ginkgo.BeforeEach(func(ctx context.Context) { 372 _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(ctx, &schedulingv1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority}, metav1.CreateOptions{}) 373 if err != nil && !apierrors.IsAlreadyExists(err) { 374 framework.ExpectNoError(err, "failed to create priority class") 375 } 376 }) 377 ginkgo.AfterEach(func(ctx context.Context) { 378 err := f.ClientSet.SchedulingV1().PriorityClasses().Delete(ctx, highPriorityClassName, metav1.DeleteOptions{}) 379 framework.ExpectNoError(err) 380 }) 381 specs := []podEvictSpec{ 382 { 383 evictionPriority: 2, 384 pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}), 385 }, 386 { 387 evictionPriority: 1, 388 pod: getMemhogPod("high-priority-memory-hog-pod", "high-priority-memory-hog", v1.ResourceRequirements{}), 389 }, 390 { 391 evictionPriority: 0, 392 pod: getMemhogPod("guaranteed-pod", "guaranteed-pod", v1.ResourceRequirements{ 393 Requests: v1.ResourceList{ 394 v1.ResourceMemory: resource.MustParse("300Mi"), 395 }, 396 Limits: v1.ResourceList{ 397 v1.ResourceMemory: resource.MustParse("300Mi"), 398 }, 399 }), 400 }, 401 } 402 specs[1].pod.Spec.PriorityClassName = highPriorityClassName 403 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logMemoryMetrics, specs) 404 }) 405 }) 406 407 // PriorityLocalStorageEvictionOrdering tests that the node responds to node disk pressure by evicting pods. 408 // This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before 409 // the higher priority pod. 410 var _ = SIGDescribe("PriorityLocalStorageEvictionOrdering", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 411 f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test") 412 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 413 expectedNodeCondition := v1.NodeDiskPressure 414 expectedStarvedResource := v1.ResourceEphemeralStorage 415 pressureTimeout := 15 * time.Minute 416 417 highPriorityClassName := f.BaseName + "-high-priority" 418 highPriority := int32(999999999) 419 420 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 421 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 422 diskConsumed := resource.MustParse("4Gi") 423 summary := eventuallyGetSummary(ctx) 424 availableBytes := *(summary.Node.Fs.AvailableBytes) 425 if availableBytes <= uint64(diskConsumed.Value()) { 426 e2eskipper.Skipf("Too little disk free on the host for the PriorityLocalStorageEvictionOrdering test to run") 427 } 428 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))} 429 initialConfig.EvictionMinimumReclaim = map[string]string{} 430 }) 431 ginkgo.BeforeEach(func(ctx context.Context) { 432 _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(ctx, &schedulingv1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority}, metav1.CreateOptions{}) 433 if err != nil && !apierrors.IsAlreadyExists(err) { 434 framework.ExpectNoError(err, "failed to create priority class") 435 } 436 }) 437 ginkgo.AfterEach(func(ctx context.Context) { 438 err := f.ClientSet.SchedulingV1().PriorityClasses().Delete(ctx, highPriorityClassName, metav1.DeleteOptions{}) 439 framework.ExpectNoError(err) 440 }) 441 specs := []podEvictSpec{ 442 { 443 evictionPriority: 2, 444 pod: diskConsumingPod("best-effort-disk", lotsOfDisk, nil, v1.ResourceRequirements{}), 445 }, 446 { 447 evictionPriority: 1, 448 pod: diskConsumingPod("high-priority-disk", lotsOfDisk, nil, v1.ResourceRequirements{}), 449 }, 450 { 451 evictionPriority: 0, 452 // Only require 99% accuracy (297/300 Mb) because on some OS distributions, the file itself (excluding contents), consumes disk space. 453 pod: diskConsumingPod("guaranteed-disk", 297 /* Mb */, nil, v1.ResourceRequirements{ 454 Requests: v1.ResourceList{ 455 v1.ResourceEphemeralStorage: resource.MustParse("300Mi"), 456 }, 457 Limits: v1.ResourceList{ 458 v1.ResourceEphemeralStorage: resource.MustParse("300Mi"), 459 }, 460 }), 461 }, 462 } 463 specs[1].pod.Spec.PriorityClassName = highPriorityClassName 464 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, specs) 465 }) 466 }) 467 468 // PriorityPidEvictionOrdering tests that the node emits pid pressure in response to a fork bomb, and evicts pods by priority 469 var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() { 470 f := framework.NewDefaultFramework("pidpressure-eviction-test") 471 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 472 pressureTimeout := 10 * time.Minute 473 expectedNodeCondition := v1.NodePIDPressure 474 expectedStarvedResource := noStarvedResource 475 476 highPriorityClassName := f.BaseName + "-high-priority" 477 highPriority := int32(999999999) 478 479 ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { 480 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 481 pidsConsumed := int64(10000) 482 summary := eventuallyGetSummary(ctx) 483 availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses) 484 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)} 485 initialConfig.EvictionMinimumReclaim = map[string]string{} 486 }) 487 ginkgo.BeforeEach(func(ctx context.Context) { 488 _, err := f.ClientSet.SchedulingV1().PriorityClasses().Create(ctx, &schedulingv1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: highPriorityClassName}, Value: highPriority}, metav1.CreateOptions{}) 489 if err != nil && !apierrors.IsAlreadyExists(err) { 490 framework.ExpectNoError(err, "failed to create priority class") 491 } 492 }) 493 ginkgo.AfterEach(func(ctx context.Context) { 494 err := f.ClientSet.SchedulingV1().PriorityClasses().Delete(ctx, highPriorityClassName, metav1.DeleteOptions{}) 495 framework.ExpectNoError(err) 496 }) 497 specs := []podEvictSpec{ 498 { 499 evictionPriority: 2, 500 pod: pidConsumingPod("fork-bomb-container-with-low-priority", 12000), 501 }, 502 { 503 evictionPriority: 0, 504 pod: innocentPod(), 505 }, 506 { 507 evictionPriority: 1, 508 pod: pidConsumingPod("fork-bomb-container-with-high-priority", 12000), 509 }, 510 } 511 specs[1].pod.Spec.PriorityClassName = highPriorityClassName 512 specs[2].pod.Spec.PriorityClassName = highPriorityClassName 513 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs) 514 }) 515 516 f.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; PodDisruptionConditions enabled", nodefeature.PodDisruptionConditions, func() { 517 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { 518 pidsConsumed := int64(10000) 519 summary := eventuallyGetSummary(ctx) 520 availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses) 521 initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)} 522 initialConfig.EvictionMinimumReclaim = map[string]string{} 523 initialConfig.FeatureGates = map[string]bool{ 524 string(features.PodDisruptionConditions): true, 525 } 526 }) 527 disruptionTarget := v1.DisruptionTarget 528 specs := []podEvictSpec{ 529 { 530 evictionPriority: 1, 531 pod: pidConsumingPod("fork-bomb-container", 30000), 532 wantPodDisruptionCondition: &disruptionTarget, 533 }, 534 } 535 runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logPidMetrics, specs) 536 }) 537 }) 538 539 // Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods 540 type podEvictSpec struct { 541 // P0 should never be evicted, P1 shouldn't evict before P2, etc. 542 // If two are ranked at P1, either is permitted to fail before the other. 543 // The test ends when all pods other than p0 have been evicted 544 evictionPriority int 545 pod *v1.Pod 546 wantPodDisruptionCondition *v1.PodConditionType 547 } 548 549 // runEvictionTest sets up a testing environment given the provided pods, and checks a few things: 550 // 551 // It ensures that the desired expectedNodeCondition is actually triggered. 552 // It ensures that evictionPriority 0 pods are not evicted 553 // It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.) 554 // It ensures that all pods with non-zero evictionPriority are eventually evicted. 555 // 556 // runEvictionTest then cleans up the testing environment by deleting provided pods, and ensures that expectedNodeCondition no longer exists 557 func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, expectedStarvedResource v1.ResourceName, logFunc func(ctx context.Context), testSpecs []podEvictSpec) { 558 // Place the remainder of the test within a context so that the kubelet config is set before and after the test. 559 ginkgo.Context("", func() { 560 ginkgo.BeforeEach(func(ctx context.Context) { 561 // reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure 562 reduceAllocatableMemoryUsageIfCgroupv1() 563 // Nodes do not immediately report local storage capacity 564 // Sleep so that pods requesting local storage do not fail to schedule 565 time.Sleep(30 * time.Second) 566 ginkgo.By("setting up pods to be used by tests") 567 pods := []*v1.Pod{} 568 for _, spec := range testSpecs { 569 pods = append(pods, spec.pod) 570 } 571 e2epod.NewPodClient(f).CreateBatch(ctx, pods) 572 }) 573 574 ginkgo.It("should eventually evict all of the correct pods", func(ctx context.Context) { 575 ginkgo.By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition)) 576 gomega.Eventually(ctx, func(ctx context.Context) error { 577 logFunc(ctx) 578 if expectedNodeCondition == noPressure || hasNodeCondition(ctx, f, expectedNodeCondition) { 579 return nil 580 } 581 return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition) 582 }, pressureTimeout, evictionPollInterval).Should(gomega.BeNil()) 583 584 ginkgo.By("Waiting for evictions to occur") 585 gomega.Eventually(ctx, func(ctx context.Context) error { 586 if expectedNodeCondition != noPressure { 587 if hasNodeCondition(ctx, f, expectedNodeCondition) { 588 framework.Logf("Node has %s", expectedNodeCondition) 589 } else { 590 framework.Logf("Node does NOT have %s", expectedNodeCondition) 591 } 592 } 593 logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) 594 logFunc(ctx) 595 return verifyEvictionOrdering(ctx, f, testSpecs) 596 }, pressureTimeout, evictionPollInterval).Should(gomega.Succeed()) 597 598 ginkgo.By("checking for the expected pod conditions for evicted pods") 599 verifyPodConditions(ctx, f, testSpecs) 600 601 // We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats. 602 // This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager 603 // evicts a pod, and when we observe the pressure by querying the API server. Add a delay here to account for this delay 604 ginkgo.By("making sure pressure from test has surfaced before continuing") 605 time.Sleep(pressureDelay) 606 607 ginkgo.By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition)) 608 gomega.Eventually(ctx, func(ctx context.Context) error { 609 logFunc(ctx) 610 logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) 611 if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { 612 return fmt.Errorf("Conditions haven't returned to normal, node still has %s", expectedNodeCondition) 613 } 614 return nil 615 }, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil()) 616 617 ginkgo.By("checking for stable, pressure-free condition without unexpected pod failures") 618 gomega.Consistently(ctx, func(ctx context.Context) error { 619 if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { 620 return fmt.Errorf("%s disappeared and then reappeared", expectedNodeCondition) 621 } 622 logFunc(ctx) 623 logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) 624 return verifyEvictionOrdering(ctx, f, testSpecs) 625 }, postTestConditionMonitoringPeriod, evictionPollInterval).Should(gomega.Succeed()) 626 627 ginkgo.By("checking for correctly formatted eviction events") 628 verifyEvictionEvents(ctx, f, testSpecs, expectedStarvedResource) 629 }) 630 631 ginkgo.AfterEach(func(ctx context.Context) { 632 prePullImagesIfNeccecary := func() { 633 if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages { 634 // The disk eviction test may cause the prepulled images to be evicted, 635 // prepull those images again to ensure this test not affect following tests. 636 PrePullAllImages() 637 } 638 } 639 // Run prePull using a defer to make sure it is executed even when the assertions below fails 640 defer prePullImagesIfNeccecary() 641 642 ginkgo.By("deleting pods") 643 for _, spec := range testSpecs { 644 ginkgo.By(fmt.Sprintf("deleting pod: %s", spec.pod.Name)) 645 e2epod.NewPodClient(f).DeleteSync(ctx, spec.pod.Name, metav1.DeleteOptions{}, 10*time.Minute) 646 } 647 648 // In case a test fails before verifying that NodeCondition no longer exist on the node, 649 // we should wait for the NodeCondition to disappear 650 ginkgo.By(fmt.Sprintf("making sure NodeCondition %s no longer exists on the node", expectedNodeCondition)) 651 gomega.Eventually(ctx, func(ctx context.Context) error { 652 if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { 653 return fmt.Errorf("Conditions haven't returned to normal, node still has %s", expectedNodeCondition) 654 } 655 return nil 656 }, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil()) 657 658 reduceAllocatableMemoryUsageIfCgroupv1() 659 ginkgo.By("making sure we have all the required images for testing") 660 prePullImagesIfNeccecary() 661 662 // Ensure that the NodeCondition hasn't returned after pulling images 663 ginkgo.By(fmt.Sprintf("making sure NodeCondition %s doesn't exist again after pulling images", expectedNodeCondition)) 664 gomega.Eventually(ctx, func(ctx context.Context) error { 665 if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { 666 return fmt.Errorf("Conditions haven't returned to normal, node still has %s", expectedNodeCondition) 667 } 668 return nil 669 }, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil()) 670 671 ginkgo.By("making sure we can start a new pod after the test") 672 podName := "test-admit-pod" 673 e2epod.NewPodClient(f).CreateSync(ctx, &v1.Pod{ 674 ObjectMeta: metav1.ObjectMeta{ 675 Name: podName, 676 }, 677 Spec: v1.PodSpec{ 678 RestartPolicy: v1.RestartPolicyNever, 679 Containers: []v1.Container{ 680 { 681 Image: imageutils.GetPauseImageName(), 682 Name: podName, 683 }, 684 }, 685 }, 686 }) 687 688 if ginkgo.CurrentSpecReport().Failed() { 689 if framework.TestContext.DumpLogsOnFailure { 690 logPodEvents(ctx, f) 691 logNodeEvents(ctx, f) 692 } 693 } 694 }) 695 }) 696 } 697 698 // verifyEvictionOrdering returns an error if all non-zero priority pods have not been evicted, nil otherwise 699 // This function panics (via Expect) if eviction ordering is violated, or if a priority-zero pod fails. 700 func verifyEvictionOrdering(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec) error { 701 // Gather current information 702 updatedPodList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, metav1.ListOptions{}) 703 if err != nil { 704 return err 705 } 706 updatedPods := updatedPodList.Items 707 for _, p := range updatedPods { 708 framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase) 709 } 710 711 ginkgo.By("checking eviction ordering and ensuring important pods don't fail") 712 done := true 713 pendingPods := []string{} 714 for _, priorityPodSpec := range testSpecs { 715 var priorityPod v1.Pod 716 for _, p := range updatedPods { 717 if p.Name == priorityPodSpec.pod.Name { 718 priorityPod = p 719 } 720 } 721 gomega.Expect(priorityPod).NotTo(gomega.BeNil()) 722 gomega.Expect(priorityPod.Status.Phase).ToNot(gomega.Equal(v1.PodSucceeded), 723 fmt.Sprintf("pod: %s succeeded unexpectedly", priorityPod.Name)) 724 725 // Check eviction ordering. 726 // Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round, 727 // but never alright for a priority 1 pod to fail while the priority 2 pod is still running 728 for _, lowPriorityPodSpec := range testSpecs { 729 var lowPriorityPod v1.Pod 730 for _, p := range updatedPods { 731 if p.Name == lowPriorityPodSpec.pod.Name { 732 lowPriorityPod = p 733 } 734 } 735 gomega.Expect(lowPriorityPod).NotTo(gomega.BeNil()) 736 if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning { 737 gomega.Expect(priorityPod.Status.Phase).ToNot(gomega.Equal(v1.PodFailed), 738 fmt.Sprintf("priority %d pod: %s failed before priority %d pod: %s", 739 priorityPodSpec.evictionPriority, priorityPodSpec.pod.Name, lowPriorityPodSpec.evictionPriority, lowPriorityPodSpec.pod.Name)) 740 } 741 } 742 743 if priorityPod.Status.Phase == v1.PodFailed { 744 gomega.Expect(priorityPod.Status.Reason).To(gomega.Equal(eviction.Reason), "pod %s failed; expected Status.Reason to be %s, but got %s", 745 priorityPod.Name, eviction.Reason, priorityPod.Status.Reason) 746 } 747 748 // EvictionPriority 0 pods should not fail 749 if priorityPodSpec.evictionPriority == 0 { 750 gomega.Expect(priorityPod.Status.Phase).ToNot(gomega.Equal(v1.PodFailed), 751 fmt.Sprintf("priority 0 pod: %s failed", priorityPod.Name)) 752 } 753 754 // If a pod that is not evictionPriority 0 has not been evicted, we are not done 755 if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed { 756 pendingPods = append(pendingPods, priorityPod.ObjectMeta.Name) 757 done = false 758 } 759 } 760 if done { 761 return nil 762 } 763 return fmt.Errorf("pods that should be evicted are still running: %#v", pendingPods) 764 } 765 766 func verifyPodConditions(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec) { 767 for _, spec := range testSpecs { 768 if spec.wantPodDisruptionCondition != nil { 769 pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, spec.pod.Name, metav1.GetOptions{}) 770 framework.ExpectNoError(err, "Failed to get the recent pod object for name: %q", pod.Name) 771 772 cType := *spec.wantPodDisruptionCondition 773 podDisruptionCondition := e2epod.FindPodConditionByType(&pod.Status, cType) 774 if podDisruptionCondition == nil { 775 framework.Failf("pod %q should have the condition: %q, pod status: %v", pod.Name, cType, pod.Status) 776 } 777 } 778 } 779 } 780 781 func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) { 782 for _, spec := range testSpecs { 783 pod := spec.pod 784 if spec.evictionPriority != 0 { 785 selector := fields.Set{ 786 "involvedObject.kind": "Pod", 787 "involvedObject.name": pod.Name, 788 "involvedObject.namespace": f.Namespace.Name, 789 "reason": eviction.Reason, 790 }.AsSelector().String() 791 podEvictEvents, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{FieldSelector: selector}) 792 framework.ExpectNoError(err, "getting events") 793 gomega.Expect(podEvictEvents.Items).To(gomega.HaveLen(1), "Expected to find 1 eviction event for pod %s, got %d", pod.Name, len(podEvictEvents.Items)) 794 event := podEvictEvents.Items[0] 795 796 if expectedStarvedResource != noStarvedResource { 797 // Check the eviction.StarvedResourceKey 798 starved, found := event.Annotations[eviction.StarvedResourceKey] 799 if !found { 800 framework.Failf("Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found", 801 pod.Name, expectedStarvedResource) 802 } 803 starvedResource := v1.ResourceName(starved) 804 gomega.Expect(starvedResource).To(gomega.Equal(expectedStarvedResource), "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead", 805 pod.Name, expectedStarvedResource, starvedResource) 806 807 // We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present 808 if expectedStarvedResource == v1.ResourceMemory { 809 // Check the eviction.OffendingContainersKey 810 offendersString, found := event.Annotations[eviction.OffendingContainersKey] 811 if !found { 812 framework.Failf("Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found", 813 pod.Name) 814 } 815 offendingContainers := strings.Split(offendersString, ",") 816 gomega.Expect(offendingContainers).To(gomega.HaveLen(1), "Expected to find the offending container's usage in the %s annotation, but no container was found", 817 eviction.OffendingContainersKey) 818 gomega.Expect(offendingContainers[0]).To(gomega.Equal(pod.Spec.Containers[0].Name), "Expected to find the offending container: %s's usage in the %s annotation, but found %s instead", 819 pod.Spec.Containers[0].Name, eviction.OffendingContainersKey, offendingContainers[0]) 820 821 // Check the eviction.OffendingContainersUsageKey 822 offendingUsageString, found := event.Annotations[eviction.OffendingContainersUsageKey] 823 if !found { 824 framework.Failf("Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found", 825 pod.Name) 826 } 827 offendingContainersUsage := strings.Split(offendingUsageString, ",") 828 gomega.Expect(offendingContainersUsage).To(gomega.HaveLen(1), "Expected to find the offending container's usage in the %s annotation, but found %+v", 829 eviction.OffendingContainersUsageKey, offendingContainersUsage) 830 usageQuantity, err := resource.ParseQuantity(offendingContainersUsage[0]) 831 framework.ExpectNoError(err, "parsing pod %s's %s annotation as a quantity", pod.Name, eviction.OffendingContainersUsageKey) 832 request := pod.Spec.Containers[0].Resources.Requests[starvedResource] 833 gomega.Expect(usageQuantity.Cmp(request)).To(gomega.Equal(1), "Expected usage of offending container: %s in pod %s to exceed its request %s", 834 usageQuantity.String(), pod.Name, request.String()) 835 } 836 } 837 } 838 } 839 } 840 841 // Returns TRUE if the node has the node condition, FALSE otherwise 842 func hasNodeCondition(ctx context.Context, f *framework.Framework, expectedNodeCondition v1.NodeConditionType) bool { 843 localNodeStatus := getLocalNode(ctx, f).Status 844 _, actualNodeCondition := testutils.GetNodeCondition(&localNodeStatus, expectedNodeCondition) 845 gomega.Expect(actualNodeCondition).NotTo(gomega.BeNil()) 846 return actualNodeCondition.Status == v1.ConditionTrue 847 } 848 849 func logInodeMetrics(ctx context.Context) { 850 summary, err := getNodeSummary(ctx) 851 if err != nil { 852 framework.Logf("Error getting summary: %v", err) 853 return 854 } 855 if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.Inodes != nil && summary.Node.Runtime.ImageFs.InodesFree != nil { 856 framework.Logf("imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d", *summary.Node.Runtime.ImageFs.Inodes, *summary.Node.Runtime.ImageFs.InodesFree) 857 } 858 if summary.Node.Fs != nil && summary.Node.Fs.Inodes != nil && summary.Node.Fs.InodesFree != nil { 859 framework.Logf("rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d", *summary.Node.Fs.Inodes, *summary.Node.Fs.InodesFree) 860 } 861 for _, pod := range summary.Pods { 862 framework.Logf("Pod: %s", pod.PodRef.Name) 863 for _, container := range pod.Containers { 864 if container.Rootfs != nil && container.Rootfs.InodesUsed != nil { 865 framework.Logf("--- summary Container: %s inodeUsage: %d", container.Name, *container.Rootfs.InodesUsed) 866 } 867 } 868 for _, volume := range pod.VolumeStats { 869 if volume.FsStats.InodesUsed != nil { 870 framework.Logf("--- summary Volume: %s inodeUsage: %d", volume.Name, *volume.FsStats.InodesUsed) 871 } 872 } 873 } 874 } 875 876 func logDiskMetrics(ctx context.Context) { 877 summary, err := getNodeSummary(ctx) 878 if err != nil { 879 framework.Logf("Error getting summary: %v", err) 880 return 881 } 882 if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.CapacityBytes != nil && summary.Node.Runtime.ImageFs.AvailableBytes != nil { 883 framework.Logf("imageFsInfo.CapacityBytes: %d, imageFsInfo.AvailableBytes: %d", *summary.Node.Runtime.ImageFs.CapacityBytes, *summary.Node.Runtime.ImageFs.AvailableBytes) 884 } 885 if summary.Node.Fs != nil && summary.Node.Fs.CapacityBytes != nil && summary.Node.Fs.AvailableBytes != nil { 886 framework.Logf("rootFsInfo.CapacityBytes: %d, rootFsInfo.AvailableBytes: %d", *summary.Node.Fs.CapacityBytes, *summary.Node.Fs.AvailableBytes) 887 } 888 for _, pod := range summary.Pods { 889 framework.Logf("Pod: %s", pod.PodRef.Name) 890 for _, container := range pod.Containers { 891 if container.Rootfs != nil && container.Rootfs.UsedBytes != nil { 892 framework.Logf("--- summary Container: %s UsedBytes: %d", container.Name, *container.Rootfs.UsedBytes) 893 } 894 } 895 for _, volume := range pod.VolumeStats { 896 if volume.FsStats.InodesUsed != nil { 897 framework.Logf("--- summary Volume: %s UsedBytes: %d", volume.Name, *volume.FsStats.UsedBytes) 898 } 899 } 900 } 901 } 902 903 func logMemoryMetrics(ctx context.Context) { 904 summary, err := getNodeSummary(ctx) 905 if err != nil { 906 framework.Logf("Error getting summary: %v", err) 907 return 908 } 909 if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil { 910 framework.Logf("Node.Memory.WorkingSetBytes: %d, Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes) 911 } 912 for _, sysContainer := range summary.Node.SystemContainers { 913 if sysContainer.Name == kubeletstatsv1alpha1.SystemContainerPods && sysContainer.Memory != nil && sysContainer.Memory.WorkingSetBytes != nil && sysContainer.Memory.AvailableBytes != nil { 914 framework.Logf("Allocatable.Memory.WorkingSetBytes: %d, Allocatable.Memory.AvailableBytes: %d", *sysContainer.Memory.WorkingSetBytes, *sysContainer.Memory.AvailableBytes) 915 } 916 } 917 for _, pod := range summary.Pods { 918 framework.Logf("Pod: %s", pod.PodRef.Name) 919 for _, container := range pod.Containers { 920 if container.Memory != nil && container.Memory.WorkingSetBytes != nil { 921 framework.Logf("--- summary Container: %s WorkingSetBytes: %d", container.Name, *container.Memory.WorkingSetBytes) 922 } 923 } 924 } 925 } 926 927 func logPidMetrics(ctx context.Context) { 928 summary, err := getNodeSummary(ctx) 929 if err != nil { 930 framework.Logf("Error getting summary: %v", err) 931 return 932 } 933 if summary.Node.Rlimit != nil && summary.Node.Rlimit.MaxPID != nil && summary.Node.Rlimit.NumOfRunningProcesses != nil { 934 framework.Logf("Node.Rlimit.MaxPID: %d, Node.Rlimit.RunningProcesses: %d", *summary.Node.Rlimit.MaxPID, *summary.Node.Rlimit.NumOfRunningProcesses) 935 } 936 } 937 938 func eventuallyGetSummary(ctx context.Context) (s *kubeletstatsv1alpha1.Summary) { 939 gomega.Eventually(ctx, func() error { 940 summary, err := getNodeSummary(ctx) 941 if err != nil { 942 return err 943 } 944 if summary == nil || summary.Node.Fs == nil || summary.Node.Fs.InodesFree == nil || summary.Node.Fs.AvailableBytes == nil { 945 return fmt.Errorf("some part of data is nil") 946 } 947 s = summary 948 return nil 949 }, time.Minute, evictionPollInterval).Should(gomega.BeNil()) 950 return 951 } 952 953 // returns a pod that does not use any resources 954 func innocentPod() *v1.Pod { 955 // Due to https://github.com/kubernetes/kubernetes/issues/115819, 956 // When evictionHard to used, we were setting grace period to 0 which meant the default setting (30 seconds) 957 // This could help with flakiness as we should send sigterm right away. 958 var gracePeriod int64 = 1 959 return &v1.Pod{ 960 ObjectMeta: metav1.ObjectMeta{Name: "innocent-pod"}, 961 Spec: v1.PodSpec{ 962 RestartPolicy: v1.RestartPolicyNever, 963 TerminationGracePeriodSeconds: &gracePeriod, 964 Containers: []v1.Container{ 965 { 966 Image: busyboxImage, 967 Name: "innocent-container", 968 Command: []string{ 969 "sh", 970 "-c", 971 "while true; do sleep 5; done", 972 }, 973 }, 974 }, 975 }, 976 } 977 } 978 979 const ( 980 volumeMountPath = "/test-mnt" 981 volumeName = "test-volume" 982 ) 983 984 func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod { 985 path := "" 986 if volumeSource != nil { 987 path = volumeMountPath 988 } 989 // Each iteration creates an empty file 990 return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, fmt.Sprintf("touch %s${i}.txt; sleep 0.001;", filepath.Join(path, "file"))) 991 } 992 993 func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod { 994 path := "" 995 if volumeSource != nil { 996 path = volumeMountPath 997 } 998 // Each iteration writes 1 Mb, so do diskConsumedMB iterations. 999 return podWithCommand(volumeSource, resources, diskConsumedMB, name, fmt.Sprintf("dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null; sleep .1;", filepath.Join(path, "file"))) 1000 } 1001 1002 func pidConsumingPod(name string, numProcesses int) *v1.Pod { 1003 // Each iteration forks once, but creates two processes 1004 return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses/2, name, "(while true; do /bin/sleep 5; done)&") 1005 } 1006 1007 // podWithCommand returns a pod with the provided volumeSource and resourceRequirements. 1008 func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod { 1009 // Due to https://github.com/kubernetes/kubernetes/issues/115819, 1010 // When evictionHard to used, we were setting grace period to 0 which meant the default setting (30 seconds) 1011 // This could help with flakiness as we should send sigterm right away. 1012 var gracePeriod int64 = 1 1013 volumeMounts := []v1.VolumeMount{} 1014 volumes := []v1.Volume{} 1015 if volumeSource != nil { 1016 volumeMounts = []v1.VolumeMount{{MountPath: volumeMountPath, Name: volumeName}} 1017 volumes = []v1.Volume{{Name: volumeName, VolumeSource: *volumeSource}} 1018 } 1019 return &v1.Pod{ 1020 ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod", name)}, 1021 Spec: v1.PodSpec{ 1022 RestartPolicy: v1.RestartPolicyNever, 1023 TerminationGracePeriodSeconds: &gracePeriod, 1024 Containers: []v1.Container{ 1025 { 1026 Image: busyboxImage, 1027 Name: fmt.Sprintf("%s-container", name), 1028 Command: []string{ 1029 "sh", 1030 "-c", 1031 fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s i=$(($i+1)); done; while true; do sleep 5; done", iterations, command), 1032 }, 1033 Resources: resources, 1034 VolumeMounts: volumeMounts, 1035 }, 1036 }, 1037 Volumes: volumes, 1038 }, 1039 } 1040 } 1041 1042 func getMemhogPod(podName string, ctnName string, res v1.ResourceRequirements) *v1.Pod { 1043 // Due to https://github.com/kubernetes/kubernetes/issues/115819, 1044 // When evictionHard to used, we were setting grace period to 0 which meant the default setting (30 seconds) 1045 // This could help with flakiness as we should send sigterm right away. 1046 var gracePeriod int64 = 1 1047 env := []v1.EnvVar{ 1048 { 1049 Name: "MEMORY_LIMIT", 1050 ValueFrom: &v1.EnvVarSource{ 1051 ResourceFieldRef: &v1.ResourceFieldSelector{ 1052 Resource: "limits.memory", 1053 }, 1054 }, 1055 }, 1056 } 1057 1058 // If there is a limit specified, pass 80% of it for -mem-total, otherwise use the downward API 1059 // to pass limits.memory, which will be the total memory available. 1060 // This helps prevent a guaranteed pod from triggering an OOM kill due to it's low memory limit, 1061 // which will cause the test to fail inappropriately. 1062 var memLimit string 1063 if limit, ok := res.Limits[v1.ResourceMemory]; ok { 1064 memLimit = strconv.Itoa(int( 1065 float64(limit.Value()) * 0.8)) 1066 } else { 1067 memLimit = "$(MEMORY_LIMIT)" 1068 } 1069 1070 return &v1.Pod{ 1071 ObjectMeta: metav1.ObjectMeta{ 1072 Name: podName, 1073 }, 1074 Spec: v1.PodSpec{ 1075 RestartPolicy: v1.RestartPolicyNever, 1076 TerminationGracePeriodSeconds: &gracePeriod, 1077 Containers: []v1.Container{ 1078 { 1079 Name: ctnName, 1080 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1081 ImagePullPolicy: "Always", 1082 Env: env, 1083 // 60 min timeout * 60s / tick per 10s = 360 ticks before timeout => ~11.11Mi/tick 1084 // to fill ~4Gi of memory, so initial ballpark 12Mi/tick. 1085 // We might see flakes due to timeout if the total memory on the nodes increases. 1086 Args: []string{"stress", "--mem-alloc-size", "12Mi", "--mem-alloc-sleep", "10s", "--mem-total", memLimit}, 1087 Resources: res, 1088 }, 1089 }, 1090 }, 1091 } 1092 }