k8s.io/kubernetes@v1.29.3/test/e2e_node/container_manager_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2016 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os/exec"
    26  	"path"
    27  	"strconv"
    28  	"strings"
    29  	"time"
    30  
    31  	v1 "k8s.io/api/core/v1"
    32  	"k8s.io/apimachinery/pkg/api/resource"
    33  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	"k8s.io/apimachinery/pkg/util/sets"
    35  	"k8s.io/apimachinery/pkg/util/uuid"
    36  	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
    37  	"k8s.io/kubernetes/test/e2e/framework"
    38  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    39  	"k8s.io/kubernetes/test/e2e/nodefeature"
    40  	imageutils "k8s.io/kubernetes/test/utils/image"
    41  	admissionapi "k8s.io/pod-security-admission/api"
    42  
    43  	"github.com/onsi/ginkgo/v2"
    44  	"github.com/onsi/gomega"
    45  )
    46  
    47  func getOOMScoreForPid(pid int) (int, error) {
    48  	procfsPath := path.Join("/proc", strconv.Itoa(pid), "oom_score_adj")
    49  	out, err := exec.Command("sudo", "cat", procfsPath).CombinedOutput()
    50  	if err != nil {
    51  		return 0, err
    52  	}
    53  	return strconv.Atoi(strings.TrimSpace(string(out)))
    54  }
    55  
    56  func validateOOMScoreAdjSetting(pid int, expectedOOMScoreAdj int) error {
    57  	oomScore, err := getOOMScoreForPid(pid)
    58  	if err != nil {
    59  		return fmt.Errorf("failed to get oom_score_adj for %d: %w", pid, err)
    60  	}
    61  	if expectedOOMScoreAdj != oomScore {
    62  		return fmt.Errorf("expected pid %d's oom_score_adj to be %d; found %d", pid, expectedOOMScoreAdj, oomScore)
    63  	}
    64  	return nil
    65  }
    66  
    67  func validateOOMScoreAdjSettingIsInRange(pid int, expectedMinOOMScoreAdj, expectedMaxOOMScoreAdj int) error {
    68  	oomScore, err := getOOMScoreForPid(pid)
    69  	if err != nil {
    70  		return fmt.Errorf("failed to get oom_score_adj for %d", pid)
    71  	}
    72  	if oomScore < expectedMinOOMScoreAdj {
    73  		return fmt.Errorf("expected pid %d's oom_score_adj to be >= %d; found %d", pid, expectedMinOOMScoreAdj, oomScore)
    74  	}
    75  	if oomScore >= expectedMaxOOMScoreAdj {
    76  		return fmt.Errorf("expected pid %d's oom_score_adj to be < %d; found %d", pid, expectedMaxOOMScoreAdj, oomScore)
    77  	}
    78  	return nil
    79  }
    80  
    81  var _ = SIGDescribe("Container Manager Misc", framework.WithSerial(), func() {
    82  	f := framework.NewDefaultFramework("kubelet-container-manager")
    83  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    84  	f.Describe("Validate OOM score adjustments", nodefeature.OOMScoreAdj, func() {
    85  		ginkgo.Context("once the node is setup", func() {
    86  			ginkgo.It("container runtime's oom-score-adj should be -999", func(ctx context.Context) {
    87  				runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile)
    88  				framework.ExpectNoError(err, "failed to get list of container runtime pids")
    89  				for _, pid := range runtimePids {
    90  					gomega.Eventually(ctx, func() error {
    91  						return validateOOMScoreAdjSetting(pid, -999)
    92  					}, 5*time.Minute, 30*time.Second).Should(gomega.BeNil())
    93  				}
    94  			})
    95  			ginkgo.It("Kubelet's oom-score-adj should be -999", func(ctx context.Context) {
    96  				kubeletPids, err := getPidsForProcess(kubeletProcessName, "")
    97  				framework.ExpectNoError(err, "failed to get list of kubelet pids")
    98  				gomega.Expect(kubeletPids).To(gomega.HaveLen(1), "expected only one kubelet process; found %d", len(kubeletPids))
    99  				gomega.Eventually(ctx, func() error {
   100  					return validateOOMScoreAdjSetting(kubeletPids[0], -999)
   101  				}, 5*time.Minute, 30*time.Second).Should(gomega.BeNil())
   102  			})
   103  			ginkgo.Context("", func() {
   104  				ginkgo.It("pod infra containers oom-score-adj should be -998 and best effort container's should be 1000", func(ctx context.Context) {
   105  					// Take a snapshot of existing pause processes. These were
   106  					// created before this test, and may not be infra
   107  					// containers. They should be excluded from the test.
   108  					existingPausePIDs, err := getPidsForProcess("pause", "")
   109  					framework.ExpectNoError(err, "failed to list all pause processes on the node")
   110  					existingPausePIDSet := sets.NewInt(existingPausePIDs...)
   111  
   112  					podClient := e2epod.NewPodClient(f)
   113  					podName := "besteffort" + string(uuid.NewUUID())
   114  					podClient.Create(ctx, &v1.Pod{
   115  						ObjectMeta: metav1.ObjectMeta{
   116  							Name: podName,
   117  						},
   118  						Spec: v1.PodSpec{
   119  							Containers: []v1.Container{
   120  								{
   121  									Image: framework.ServeHostnameImage,
   122  									Name:  podName,
   123  								},
   124  							},
   125  						},
   126  					})
   127  
   128  					var pausePids []int
   129  					ginkgo.By("checking infra container's oom-score-adj")
   130  					gomega.Eventually(ctx, func() error {
   131  						pausePids, err = getPidsForProcess("pause", "")
   132  						if err != nil {
   133  							return fmt.Errorf("failed to get list of pause pids: %w", err)
   134  						}
   135  						for _, pid := range pausePids {
   136  							if existingPausePIDSet.Has(pid) {
   137  								// Not created by this test. Ignore it.
   138  								continue
   139  							}
   140  							if err := validateOOMScoreAdjSetting(pid, -998); err != nil {
   141  								return err
   142  							}
   143  						}
   144  						return nil
   145  					}, 2*time.Minute, time.Second*4).Should(gomega.BeNil())
   146  					var shPids []int
   147  					ginkgo.By("checking besteffort container's oom-score-adj")
   148  					gomega.Eventually(ctx, func() error {
   149  						shPids, err = getPidsForProcess("agnhost", "")
   150  						if err != nil {
   151  							return fmt.Errorf("failed to get list of serve hostname process pids: %w", err)
   152  						}
   153  						if len(shPids) != 1 {
   154  							return fmt.Errorf("expected only one agnhost process; found %d", len(shPids))
   155  						}
   156  						return validateOOMScoreAdjSetting(shPids[0], 1000)
   157  					}, 2*time.Minute, time.Second*4).Should(gomega.BeNil())
   158  				})
   159  				// Log the running containers here to help debugging.
   160  				ginkgo.AfterEach(func() {
   161  					if ginkgo.CurrentSpecReport().Failed() {
   162  						ginkgo.By("Dump all running containers")
   163  						runtime, _, err := getCRIClient()
   164  						framework.ExpectNoError(err)
   165  						containers, err := runtime.ListContainers(context.Background(), &runtimeapi.ContainerFilter{
   166  							State: &runtimeapi.ContainerStateValue{
   167  								State: runtimeapi.ContainerState_CONTAINER_RUNNING,
   168  							},
   169  						})
   170  						framework.ExpectNoError(err)
   171  						framework.Logf("Running containers:")
   172  						for _, c := range containers {
   173  							framework.Logf("%+v", c)
   174  						}
   175  					}
   176  				})
   177  			})
   178  			ginkgo.It("guaranteed container's oom-score-adj should be -998", func(ctx context.Context) {
   179  				podClient := e2epod.NewPodClient(f)
   180  				podName := "guaranteed" + string(uuid.NewUUID())
   181  				podClient.Create(ctx, &v1.Pod{
   182  					ObjectMeta: metav1.ObjectMeta{
   183  						Name: podName,
   184  					},
   185  					Spec: v1.PodSpec{
   186  						Containers: []v1.Container{
   187  							{
   188  								Image: imageutils.GetE2EImage(imageutils.Nginx),
   189  								Name:  podName,
   190  								Resources: v1.ResourceRequirements{
   191  									Limits: v1.ResourceList{
   192  										v1.ResourceCPU:    resource.MustParse("100m"),
   193  										v1.ResourceMemory: resource.MustParse("50Mi"),
   194  									},
   195  								},
   196  							},
   197  						},
   198  					},
   199  				})
   200  				var (
   201  					ngPids []int
   202  					err    error
   203  				)
   204  				gomega.Eventually(ctx, func() error {
   205  					ngPids, err = getPidsForProcess("nginx", "")
   206  					if err != nil {
   207  						return fmt.Errorf("failed to get list of nginx process pids: %w", err)
   208  					}
   209  					for _, pid := range ngPids {
   210  						if err := validateOOMScoreAdjSetting(pid, -998); err != nil {
   211  							return err
   212  						}
   213  					}
   214  
   215  					return nil
   216  				}, 2*time.Minute, time.Second*4).Should(gomega.BeNil())
   217  
   218  			})
   219  			ginkgo.It("burstable container's oom-score-adj should be between [2, 1000)", func(ctx context.Context) {
   220  				podClient := e2epod.NewPodClient(f)
   221  				podName := "burstable" + string(uuid.NewUUID())
   222  				podClient.Create(ctx, &v1.Pod{
   223  					ObjectMeta: metav1.ObjectMeta{
   224  						Name: podName,
   225  					},
   226  					Spec: v1.PodSpec{
   227  						Containers: []v1.Container{
   228  							{
   229  								Image: imageutils.GetE2EImage(imageutils.Agnhost),
   230  								Args:  []string{"test-webserver"},
   231  								Name:  podName,
   232  								Resources: v1.ResourceRequirements{
   233  									Requests: v1.ResourceList{
   234  										v1.ResourceCPU:    resource.MustParse("100m"),
   235  										v1.ResourceMemory: resource.MustParse("50Mi"),
   236  									},
   237  								},
   238  							},
   239  						},
   240  					},
   241  				})
   242  				var (
   243  					wsPids []int
   244  					err    error
   245  				)
   246  				gomega.Eventually(ctx, func() error {
   247  					wsPids, err = getPidsForProcess("agnhost", "")
   248  					if err != nil {
   249  						return fmt.Errorf("failed to get list of test-webserver process pids: %w", err)
   250  					}
   251  					for _, pid := range wsPids {
   252  						if err := validateOOMScoreAdjSettingIsInRange(pid, 2, 1000); err != nil {
   253  							return err
   254  						}
   255  					}
   256  					return nil
   257  				}, 2*time.Minute, time.Second*4).Should(gomega.BeNil())
   258  
   259  				// TODO: Test the oom-score-adj logic for burstable more accurately.
   260  			})
   261  		})
   262  	})
   263  })