k8s.io/kubernetes@v1.29.3/test/e2e_node/cpu_manager_metrics_test.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/onsi/ginkgo/v2"
    25  	"github.com/onsi/gomega"
    26  	"github.com/onsi/gomega/gstruct"
    27  	"github.com/onsi/gomega/types"
    28  
    29  	v1 "k8s.io/api/core/v1"
    30  	"k8s.io/apimachinery/pkg/api/resource"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    33  	"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
    34  	"k8s.io/kubernetes/test/e2e/feature"
    35  	"k8s.io/kubernetes/test/e2e/framework"
    36  	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
    37  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    38  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    39  	admissionapi "k8s.io/pod-security-admission/api"
    40  	"k8s.io/utils/cpuset"
    41  )
    42  
    43  var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUManager, func() {
    44  	f := framework.NewDefaultFramework("cpumanager-metrics")
    45  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    46  
    47  	ginkgo.Context("when querying /metrics", func() {
    48  		var oldCfg *kubeletconfig.KubeletConfiguration
    49  		var testPod *v1.Pod
    50  		var smtLevel int
    51  
    52  		ginkgo.BeforeEach(func(ctx context.Context) {
    53  			var err error
    54  			if oldCfg == nil {
    55  				oldCfg, err = getCurrentKubeletConfig(ctx)
    56  				framework.ExpectNoError(err)
    57  			}
    58  
    59  			fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption)
    60  			_, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
    61  			smtLevel = getSMTLevel()
    62  
    63  			// strict SMT alignment is trivially verified and granted on non-SMT systems
    64  			if smtLevel < 2 {
    65  				e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt)
    66  			}
    67  
    68  			// our tests want to allocate up to a full core, so we need at last 2*2=4 virtual cpus
    69  			if cpuAlloc < int64(smtLevel*2) {
    70  				e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < 4", fullCPUsOnlyOpt)
    71  			}
    72  
    73  			framework.Logf("SMT level %d", smtLevel)
    74  
    75  			// TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably
    76  			// check what we do have in the node.
    77  			cpuPolicyOptions := map[string]string{
    78  				cpumanager.FullPCPUsOnlyOption: "true",
    79  			}
    80  			newCfg := configureCPUManagerInKubelet(oldCfg,
    81  				&cpuManagerKubeletArguments{
    82  					policyName:              string(cpumanager.PolicyStatic),
    83  					reservedSystemCPUs:      cpuset.New(0),
    84  					enableCPUManagerOptions: true,
    85  					options:                 cpuPolicyOptions,
    86  				},
    87  			)
    88  			updateKubeletConfig(ctx, f, newCfg, true)
    89  		})
    90  
    91  		ginkgo.AfterEach(func(ctx context.Context) {
    92  			if testPod != nil {
    93  				deletePodSyncByName(ctx, f, testPod.Name)
    94  			}
    95  			updateKubeletConfig(ctx, f, oldCfg, true)
    96  		})
    97  
    98  		ginkgo.It("should report zero pinning counters after a fresh restart", func(ctx context.Context) {
    99  			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
   100  			// being [Serial], we can also assume noone else but us is running pods.
   101  			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with no pods running")
   102  
   103  			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
   104  				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   105  					"": timelessSample(0),
   106  				}),
   107  				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   108  					"": timelessSample(0),
   109  				}),
   110  			})
   111  
   112  			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
   113  			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   114  			ginkgo.By("Ensuring the metrics match the expectations a few more times")
   115  			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   116  		})
   117  
   118  		ginkgo.It("should report pinning failures when the cpumanager allocation is known to fail", func(ctx context.Context) {
   119  			ginkgo.By("Creating the test pod which will be rejected for SMTAlignmentError")
   120  			testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("smt-align-err", 1))
   121  
   122  			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
   123  			// being [Serial], we can also assume noone else but us is running pods.
   124  			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod failed to admit")
   125  
   126  			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
   127  				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   128  					"": timelessSample(1),
   129  				}),
   130  				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   131  					"": timelessSample(1),
   132  				}),
   133  			})
   134  
   135  			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
   136  			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   137  			ginkgo.By("Ensuring the metrics match the expectations a few more times")
   138  			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   139  		})
   140  
   141  		ginkgo.It("should not report any pinning failures when the cpumanager allocation is expected to succeed", func(ctx context.Context) {
   142  			ginkgo.By("Creating the test pod")
   143  			testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("smt-align-ok", smtLevel))
   144  
   145  			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
   146  			// being [Serial], we can also assume noone else but us is running pods.
   147  			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
   148  
   149  			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
   150  				"kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   151  					"": timelessSample(1),
   152  				}),
   153  				"kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   154  					"": timelessSample(0),
   155  				}),
   156  			})
   157  
   158  			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
   159  			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   160  			ginkgo.By("Ensuring the metrics match the expectations a few more times")
   161  			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   162  		})
   163  	})
   164  })
   165  
   166  func getKubeletMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
   167  	ginkgo.By("getting Kubelet metrics from the metrics API")
   168  	return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics")
   169  }
   170  
   171  func makeGuaranteedCPUExclusiveSleeperPod(name string, cpus int) *v1.Pod {
   172  	return &v1.Pod{
   173  		ObjectMeta: metav1.ObjectMeta{
   174  			Name: name + "-pod",
   175  		},
   176  		Spec: v1.PodSpec{
   177  			RestartPolicy: v1.RestartPolicyNever,
   178  			Containers: []v1.Container{
   179  				{
   180  					Name:  name + "-cnt",
   181  					Image: busyboxImage,
   182  					Resources: v1.ResourceRequirements{
   183  						Requests: v1.ResourceList{
   184  							v1.ResourceCPU:    resource.MustParse(fmt.Sprintf("%d", cpus)),
   185  							v1.ResourceMemory: resource.MustParse("64Mi"),
   186  						},
   187  						Limits: v1.ResourceList{
   188  							v1.ResourceCPU:    resource.MustParse(fmt.Sprintf("%d", cpus)),
   189  							v1.ResourceMemory: resource.MustParse("64Mi"),
   190  						},
   191  					},
   192  					Command: []string{"sh", "-c", "sleep", "1d"},
   193  				},
   194  			},
   195  		},
   196  	}
   197  }
   198  
   199  func timelessSample(value interface{}) types.GomegaMatcher {
   200  	return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
   201  		// We already check Metric when matching the Id
   202  		"Metric":    gstruct.Ignore(),
   203  		"Value":     gomega.BeNumerically("==", value),
   204  		"Timestamp": gstruct.Ignore(),
   205  		"Histogram": gstruct.Ignore(),
   206  	}))
   207  }