k8s.io/kubernetes@v1.29.3/test/e2e_node/cpu_manager_metrics_test.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "github.com/onsi/ginkgo/v2" 25 "github.com/onsi/gomega" 26 "github.com/onsi/gomega/gstruct" 27 "github.com/onsi/gomega/types" 28 29 v1 "k8s.io/api/core/v1" 30 "k8s.io/apimachinery/pkg/api/resource" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 33 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" 34 "k8s.io/kubernetes/test/e2e/feature" 35 "k8s.io/kubernetes/test/e2e/framework" 36 e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" 37 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 38 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 39 admissionapi "k8s.io/pod-security-admission/api" 40 "k8s.io/utils/cpuset" 41 ) 42 43 var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUManager, func() { 44 f := framework.NewDefaultFramework("cpumanager-metrics") 45 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 46 47 ginkgo.Context("when querying /metrics", func() { 48 var oldCfg *kubeletconfig.KubeletConfiguration 49 var testPod *v1.Pod 50 var smtLevel int 51 52 ginkgo.BeforeEach(func(ctx context.Context) { 53 var err error 54 if oldCfg == nil { 55 oldCfg, err = getCurrentKubeletConfig(ctx) 56 framework.ExpectNoError(err) 57 } 58 59 fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption) 60 _, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f) 61 smtLevel = getSMTLevel() 62 63 // strict SMT alignment is trivially verified and granted on non-SMT systems 64 if smtLevel < 2 { 65 e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt) 66 } 67 68 // our tests want to allocate up to a full core, so we need at last 2*2=4 virtual cpus 69 if cpuAlloc < int64(smtLevel*2) { 70 e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < 4", fullCPUsOnlyOpt) 71 } 72 73 framework.Logf("SMT level %d", smtLevel) 74 75 // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably 76 // check what we do have in the node. 77 cpuPolicyOptions := map[string]string{ 78 cpumanager.FullPCPUsOnlyOption: "true", 79 } 80 newCfg := configureCPUManagerInKubelet(oldCfg, 81 &cpuManagerKubeletArguments{ 82 policyName: string(cpumanager.PolicyStatic), 83 reservedSystemCPUs: cpuset.New(0), 84 enableCPUManagerOptions: true, 85 options: cpuPolicyOptions, 86 }, 87 ) 88 updateKubeletConfig(ctx, f, newCfg, true) 89 }) 90 91 ginkgo.AfterEach(func(ctx context.Context) { 92 if testPod != nil { 93 deletePodSyncByName(ctx, f, testPod.Name) 94 } 95 updateKubeletConfig(ctx, f, oldCfg, true) 96 }) 97 98 ginkgo.It("should report zero pinning counters after a fresh restart", func(ctx context.Context) { 99 // we updated the kubelet config in BeforeEach, so we can assume we start fresh. 100 // being [Serial], we can also assume noone else but us is running pods. 101 ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with no pods running") 102 103 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ 104 "kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ 105 "": timelessSample(0), 106 }), 107 "kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ 108 "": timelessSample(0), 109 }), 110 }) 111 112 ginkgo.By("Giving the Kubelet time to start up and produce metrics") 113 gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) 114 ginkgo.By("Ensuring the metrics match the expectations a few more times") 115 gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) 116 }) 117 118 ginkgo.It("should report pinning failures when the cpumanager allocation is known to fail", func(ctx context.Context) { 119 ginkgo.By("Creating the test pod which will be rejected for SMTAlignmentError") 120 testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("smt-align-err", 1)) 121 122 // we updated the kubelet config in BeforeEach, so we can assume we start fresh. 123 // being [Serial], we can also assume noone else but us is running pods. 124 ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod failed to admit") 125 126 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ 127 "kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ 128 "": timelessSample(1), 129 }), 130 "kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ 131 "": timelessSample(1), 132 }), 133 }) 134 135 ginkgo.By("Giving the Kubelet time to start up and produce metrics") 136 gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) 137 ginkgo.By("Ensuring the metrics match the expectations a few more times") 138 gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) 139 }) 140 141 ginkgo.It("should not report any pinning failures when the cpumanager allocation is expected to succeed", func(ctx context.Context) { 142 ginkgo.By("Creating the test pod") 143 testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("smt-align-ok", smtLevel)) 144 145 // we updated the kubelet config in BeforeEach, so we can assume we start fresh. 146 // being [Serial], we can also assume noone else but us is running pods. 147 ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted") 148 149 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ 150 "kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ 151 "": timelessSample(1), 152 }), 153 "kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{ 154 "": timelessSample(0), 155 }), 156 }) 157 158 ginkgo.By("Giving the Kubelet time to start up and produce metrics") 159 gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) 160 ginkgo.By("Ensuring the metrics match the expectations a few more times") 161 gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) 162 }) 163 }) 164 }) 165 166 func getKubeletMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) { 167 ginkgo.By("getting Kubelet metrics from the metrics API") 168 return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics") 169 } 170 171 func makeGuaranteedCPUExclusiveSleeperPod(name string, cpus int) *v1.Pod { 172 return &v1.Pod{ 173 ObjectMeta: metav1.ObjectMeta{ 174 Name: name + "-pod", 175 }, 176 Spec: v1.PodSpec{ 177 RestartPolicy: v1.RestartPolicyNever, 178 Containers: []v1.Container{ 179 { 180 Name: name + "-cnt", 181 Image: busyboxImage, 182 Resources: v1.ResourceRequirements{ 183 Requests: v1.ResourceList{ 184 v1.ResourceCPU: resource.MustParse(fmt.Sprintf("%d", cpus)), 185 v1.ResourceMemory: resource.MustParse("64Mi"), 186 }, 187 Limits: v1.ResourceList{ 188 v1.ResourceCPU: resource.MustParse(fmt.Sprintf("%d", cpus)), 189 v1.ResourceMemory: resource.MustParse("64Mi"), 190 }, 191 }, 192 Command: []string{"sh", "-c", "sleep", "1d"}, 193 }, 194 }, 195 }, 196 } 197 } 198 199 func timelessSample(value interface{}) types.GomegaMatcher { 200 return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{ 201 // We already check Metric when matching the Id 202 "Metric": gstruct.Ignore(), 203 "Value": gomega.BeNumerically("==", value), 204 "Timestamp": gstruct.Ignore(), 205 "Histogram": gstruct.Ignore(), 206 })) 207 }