k8s.io/kubernetes@v1.29.3/test/e2e_node/summary_test.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "strings" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/api/resource" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 30 "k8s.io/kubernetes/test/e2e/framework" 31 e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" 32 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 33 e2evolume "k8s.io/kubernetes/test/e2e/framework/volume" 34 admissionapi "k8s.io/pod-security-admission/api" 35 36 systemdutil "github.com/coreos/go-systemd/v22/util" 37 "github.com/onsi/ginkgo/v2" 38 "github.com/onsi/gomega" 39 "github.com/onsi/gomega/gstruct" 40 "github.com/onsi/gomega/types" 41 ) 42 43 var _ = SIGDescribe("Summary API", framework.WithNodeConformance(), func() { 44 f := framework.NewDefaultFramework("summary-test") 45 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 46 ginkgo.Context("when querying /stats/summary", func() { 47 ginkgo.AfterEach(func(ctx context.Context) { 48 if !ginkgo.CurrentSpecReport().Failed() { 49 return 50 } 51 if framework.TestContext.DumpLogsOnFailure { 52 e2ekubectl.LogFailedContainers(ctx, f.ClientSet, f.Namespace.Name, framework.Logf) 53 } 54 ginkgo.By("Recording processes in system cgroups") 55 recordSystemCgroupProcesses(ctx) 56 }) 57 ginkgo.It("should report resource usage through the stats api", func(ctx context.Context) { 58 const pod0 = "stats-busybox-0" 59 const pod1 = "stats-busybox-1" 60 61 ginkgo.By("Creating test pods") 62 numRestarts := int32(1) 63 pods := getSummaryTestPods(f, numRestarts, pod0, pod1) 64 e2epod.NewPodClient(f).CreateBatch(ctx, pods) 65 66 ginkgo.By("restarting the containers to ensure container metrics are still being gathered after a container is restarted") 67 gomega.Eventually(ctx, func() error { 68 for _, pod := range pods { 69 err := verifyPodRestartCount(ctx, f, pod.Name, len(pod.Spec.Containers), numRestarts) 70 if err != nil { 71 return err 72 } 73 } 74 return nil 75 }, time.Minute, 5*time.Second).Should(gomega.BeNil()) 76 77 ginkgo.By("Waiting 15 seconds for cAdvisor to collect 2 stats points") 78 time.Sleep(15 * time.Second) 79 80 // Setup expectations. 81 const ( 82 maxStartAge = time.Hour * 24 * 365 // 1 year 83 maxStatsAge = time.Minute 84 ) 85 ginkgo.By("Fetching node so we can match against an appropriate memory limit") 86 node := getLocalNode(ctx, f) 87 memoryCapacity := node.Status.Capacity["memory"] 88 memoryLimit := memoryCapacity.Value() 89 fsCapacityBounds := bounded(100*e2evolume.Mb, 10*e2evolume.Tb) 90 // Expectations for system containers. 91 sysContExpectations := func() types.GomegaMatcher { 92 return gstruct.MatchAllFields(gstruct.Fields{ 93 "Name": gstruct.Ignore(), 94 "StartTime": recent(maxStartAge), 95 "CPU": ptrMatchAllFields(gstruct.Fields{ 96 "Time": recent(maxStatsAge), 97 // CRI stats provider tries to estimate the value of UsageNanoCores. This value can be 98 // either 0 or between 10000 and 2e9. 99 // Please refer, https://github.com/kubernetes/kubernetes/pull/95345#discussion_r501630942 100 // for more information. 101 "UsageNanoCores": gomega.SatisfyAny(gstruct.PointTo(gomega.BeZero()), bounded(10000, 2e9)), 102 "UsageCoreNanoSeconds": bounded(10000000, 1e15), 103 }), 104 "Memory": ptrMatchAllFields(gstruct.Fields{ 105 "Time": recent(maxStatsAge), 106 // We don't limit system container memory. 107 "AvailableBytes": gomega.BeNil(), 108 "UsageBytes": bounded(1*e2evolume.Mb, memoryLimit), 109 "WorkingSetBytes": bounded(1*e2evolume.Mb, memoryLimit), 110 // this now returns /sys/fs/cgroup/memory.stat total_rss 111 "RSSBytes": bounded(1*e2evolume.Mb, memoryLimit), 112 "PageFaults": bounded(1000, 1e9), 113 "MajorPageFaults": bounded(0, 1e9), 114 }), 115 "Swap": swapExpectation(memoryLimit), 116 "Accelerators": gomega.BeEmpty(), 117 "Rootfs": gomega.BeNil(), 118 "Logs": gomega.BeNil(), 119 "UserDefinedMetrics": gomega.BeEmpty(), 120 }) 121 } 122 expectedPageFaultsUpperBound := 1000000 123 expectedMajorPageFaultsUpperBound := 1e9 124 if IsCgroup2UnifiedMode() { 125 // On cgroupv2 these stats are recursive, so make sure they are at least like the value set 126 // above for the container. 127 expectedPageFaultsUpperBound = 1e9 128 expectedMajorPageFaultsUpperBound = 1e9 129 } 130 131 podsContExpectations := sysContExpectations().(*gstruct.FieldsMatcher) 132 podsContExpectations.Fields["Memory"] = ptrMatchAllFields(gstruct.Fields{ 133 "Time": recent(maxStatsAge), 134 // Pods are limited by Node Allocatable 135 "AvailableBytes": bounded(1*e2evolume.Kb, memoryLimit), 136 "UsageBytes": bounded(10*e2evolume.Kb, memoryLimit), 137 "WorkingSetBytes": bounded(10*e2evolume.Kb, memoryLimit), 138 "RSSBytes": bounded(1*e2evolume.Kb, memoryLimit), 139 "PageFaults": bounded(0, expectedPageFaultsUpperBound), 140 "MajorPageFaults": bounded(0, expectedMajorPageFaultsUpperBound), 141 }) 142 runtimeContExpectations := sysContExpectations().(*gstruct.FieldsMatcher) 143 systemContainers := gstruct.Elements{ 144 "kubelet": sysContExpectations(), 145 "runtime": runtimeContExpectations, 146 "pods": podsContExpectations, 147 } 148 // The Kubelet only manages the 'misc' system container if the host is not running systemd. 149 if !systemdutil.IsRunningSystemd() { 150 framework.Logf("Host not running systemd; expecting 'misc' system container.") 151 miscContExpectations := sysContExpectations().(*gstruct.FieldsMatcher) 152 // Misc processes are system-dependent, so relax the memory constraints. 153 miscContExpectations.Fields["Memory"] = ptrMatchAllFields(gstruct.Fields{ 154 "Time": recent(maxStatsAge), 155 // We don't limit system container memory. 156 "AvailableBytes": gomega.BeNil(), 157 "UsageBytes": bounded(100*e2evolume.Kb, memoryLimit), 158 "WorkingSetBytes": bounded(100*e2evolume.Kb, memoryLimit), 159 "RSSBytes": bounded(100*e2evolume.Kb, memoryLimit), 160 "PageFaults": bounded(1000, 1e9), 161 "MajorPageFaults": bounded(0, 1e9), 162 }) 163 systemContainers["misc"] = miscContExpectations 164 } 165 // Expectations for pods. 166 podExpectations := gstruct.MatchAllFields(gstruct.Fields{ 167 "PodRef": gstruct.Ignore(), 168 "StartTime": recent(maxStartAge), 169 "Containers": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{ 170 "busybox-container": gstruct.MatchAllFields(gstruct.Fields{ 171 "Name": gomega.Equal("busybox-container"), 172 "StartTime": recent(maxStartAge), 173 "CPU": ptrMatchAllFields(gstruct.Fields{ 174 "Time": recent(maxStatsAge), 175 "UsageNanoCores": bounded(10000, 1e9), 176 "UsageCoreNanoSeconds": bounded(10000000, 1e11), 177 }), 178 "Memory": ptrMatchAllFields(gstruct.Fields{ 179 "Time": recent(maxStatsAge), 180 "AvailableBytes": bounded(1*e2evolume.Kb, 80*e2evolume.Mb), 181 "UsageBytes": bounded(10*e2evolume.Kb, 80*e2evolume.Mb), 182 "WorkingSetBytes": bounded(10*e2evolume.Kb, 80*e2evolume.Mb), 183 "RSSBytes": bounded(1*e2evolume.Kb, 80*e2evolume.Mb), 184 "PageFaults": bounded(100, expectedPageFaultsUpperBound), 185 "MajorPageFaults": bounded(0, expectedMajorPageFaultsUpperBound), 186 }), 187 "Swap": swapExpectation(memoryLimit), 188 "Accelerators": gomega.BeEmpty(), 189 "Rootfs": ptrMatchAllFields(gstruct.Fields{ 190 "Time": recent(maxStatsAge), 191 "AvailableBytes": fsCapacityBounds, 192 "CapacityBytes": fsCapacityBounds, 193 "UsedBytes": bounded(e2evolume.Kb, 10*e2evolume.Mb), 194 "InodesFree": bounded(1e4, 1e8), 195 "Inodes": bounded(1e4, 1e8), 196 "InodesUsed": bounded(0, 1e8), 197 }), 198 "Logs": ptrMatchAllFields(gstruct.Fields{ 199 "Time": recent(maxStatsAge), 200 "AvailableBytes": fsCapacityBounds, 201 "CapacityBytes": fsCapacityBounds, 202 "UsedBytes": bounded(e2evolume.Kb, 10*e2evolume.Mb), 203 "InodesFree": bounded(1e4, 1e8), 204 "Inodes": bounded(1e4, 1e8), 205 "InodesUsed": bounded(0, 1e8), 206 }), 207 "UserDefinedMetrics": gomega.BeEmpty(), 208 }), 209 }), 210 "Network": ptrMatchAllFields(gstruct.Fields{ 211 "Time": recent(maxStatsAge), 212 "InterfaceStats": gstruct.MatchAllFields(gstruct.Fields{ 213 "Name": gomega.Equal("eth0"), 214 "RxBytes": bounded(10, 10*e2evolume.Mb), 215 "RxErrors": bounded(0, 1000), 216 "TxBytes": bounded(10, 10*e2evolume.Mb), 217 "TxErrors": bounded(0, 1000), 218 }), 219 "Interfaces": gomega.Not(gomega.BeNil()), 220 }), 221 "CPU": ptrMatchAllFields(gstruct.Fields{ 222 "Time": recent(maxStatsAge), 223 "UsageNanoCores": bounded(10000, 1e9), 224 "UsageCoreNanoSeconds": bounded(10000000, 1e11), 225 }), 226 "Memory": ptrMatchAllFields(gstruct.Fields{ 227 "Time": recent(maxStatsAge), 228 "AvailableBytes": bounded(1*e2evolume.Kb, 80*e2evolume.Mb), 229 "UsageBytes": bounded(10*e2evolume.Kb, 80*e2evolume.Mb), 230 "WorkingSetBytes": bounded(10*e2evolume.Kb, 80*e2evolume.Mb), 231 "RSSBytes": bounded(1*e2evolume.Kb, 80*e2evolume.Mb), 232 "PageFaults": bounded(0, expectedPageFaultsUpperBound), 233 "MajorPageFaults": bounded(0, expectedMajorPageFaultsUpperBound), 234 }), 235 "Swap": swapExpectation(memoryLimit), 236 "VolumeStats": gstruct.MatchAllElements(summaryObjectID, gstruct.Elements{ 237 "test-empty-dir": gstruct.MatchAllFields(gstruct.Fields{ 238 "Name": gomega.Equal("test-empty-dir"), 239 "PVCRef": gomega.BeNil(), 240 "VolumeHealthStats": gomega.BeNil(), 241 "FsStats": gstruct.MatchAllFields(gstruct.Fields{ 242 "Time": recent(maxStatsAge), 243 "AvailableBytes": fsCapacityBounds, 244 "CapacityBytes": fsCapacityBounds, 245 "UsedBytes": bounded(e2evolume.Kb, 1*e2evolume.Mb), 246 "InodesFree": bounded(1e4, 1e8), 247 "Inodes": bounded(1e4, 1e8), 248 "InodesUsed": bounded(0, 1e8), 249 }), 250 }), 251 }), 252 "EphemeralStorage": ptrMatchAllFields(gstruct.Fields{ 253 "Time": recent(maxStatsAge), 254 "AvailableBytes": fsCapacityBounds, 255 "CapacityBytes": fsCapacityBounds, 256 "UsedBytes": bounded(e2evolume.Kb, 21*e2evolume.Mb), 257 "InodesFree": bounded(1e4, 1e8), 258 "Inodes": bounded(1e4, 1e8), 259 "InodesUsed": bounded(0, 1e8), 260 }), 261 "ProcessStats": ptrMatchAllFields(gstruct.Fields{ 262 "ProcessCount": bounded(0, 1e8), 263 }), 264 }) 265 266 matchExpectations := ptrMatchAllFields(gstruct.Fields{ 267 "Node": gstruct.MatchAllFields(gstruct.Fields{ 268 "NodeName": gomega.Equal(framework.TestContext.NodeName), 269 "StartTime": recent(maxStartAge), 270 "SystemContainers": gstruct.MatchAllElements(summaryObjectID, systemContainers), 271 "CPU": ptrMatchAllFields(gstruct.Fields{ 272 "Time": recent(maxStatsAge), 273 "UsageNanoCores": bounded(100e3, 2e9), 274 "UsageCoreNanoSeconds": bounded(1e9, 1e15), 275 }), 276 "Memory": ptrMatchAllFields(gstruct.Fields{ 277 "Time": recent(maxStatsAge), 278 "AvailableBytes": bounded(100*e2evolume.Mb, memoryLimit), 279 "UsageBytes": bounded(10*e2evolume.Mb, memoryLimit), 280 "WorkingSetBytes": bounded(10*e2evolume.Mb, memoryLimit), 281 // this now returns /sys/fs/cgroup/memory.stat total_rss 282 "RSSBytes": bounded(1*e2evolume.Kb, memoryLimit), 283 "PageFaults": bounded(1000, 1e9), 284 "MajorPageFaults": bounded(0, 1e9), 285 }), 286 "Swap": swapExpectation(memoryLimit), 287 // TODO(#28407): Handle non-eth0 network interface names. 288 "Network": ptrMatchAllFields(gstruct.Fields{ 289 "Time": recent(maxStatsAge), 290 "InterfaceStats": gstruct.MatchAllFields(gstruct.Fields{ 291 "Name": gomega.Or(gomega.BeEmpty(), gomega.Equal("eth0")), 292 "RxBytes": gomega.Or(gomega.BeNil(), bounded(1*e2evolume.Mb, 100*e2evolume.Gb)), 293 "RxErrors": gomega.Or(gomega.BeNil(), bounded(0, 100000)), 294 "TxBytes": gomega.Or(gomega.BeNil(), bounded(10*e2evolume.Kb, 10*e2evolume.Gb)), 295 "TxErrors": gomega.Or(gomega.BeNil(), bounded(0, 100000)), 296 }), 297 "Interfaces": gomega.Not(gomega.BeNil()), 298 }), 299 "Fs": ptrMatchAllFields(gstruct.Fields{ 300 "Time": recent(maxStatsAge), 301 "AvailableBytes": fsCapacityBounds, 302 "CapacityBytes": fsCapacityBounds, 303 // we assume we are not running tests on machines more than 10tb of disk 304 "UsedBytes": bounded(e2evolume.Kb, 10*e2evolume.Tb), 305 "InodesFree": bounded(1e4, 1e8), 306 "Inodes": bounded(1e4, 1e8), 307 "InodesUsed": bounded(0, 1e8), 308 }), 309 "Runtime": ptrMatchAllFields(gstruct.Fields{ 310 "ImageFs": ptrMatchAllFields(gstruct.Fields{ 311 "Time": recent(maxStatsAge), 312 "AvailableBytes": fsCapacityBounds, 313 "CapacityBytes": fsCapacityBounds, 314 // we assume we are not running tests on machines more than 10tb of disk 315 "UsedBytes": bounded(e2evolume.Kb, 10*e2evolume.Tb), 316 "InodesFree": bounded(1e4, 1e8), 317 "Inodes": bounded(1e4, 1e8), 318 "InodesUsed": bounded(0, 1e8), 319 }), 320 "ContainerFs": ptrMatchAllFields(gstruct.Fields{ 321 "Time": recent(maxStatsAge), 322 "AvailableBytes": fsCapacityBounds, 323 "CapacityBytes": fsCapacityBounds, 324 // we assume we are not running tests on machines more than 10tb of disk 325 "UsedBytes": bounded(e2evolume.Kb, 10*e2evolume.Tb), 326 "InodesFree": bounded(1e4, 1e8), 327 "Inodes": bounded(1e4, 1e8), 328 "InodesUsed": bounded(0, 1e8), 329 }), 330 }), 331 "Rlimit": ptrMatchAllFields(gstruct.Fields{ 332 "Time": recent(maxStatsAge), 333 "MaxPID": bounded(0, 1e8), 334 "NumOfRunningProcesses": bounded(0, 1e8), 335 }), 336 }), 337 // Ignore extra pods since the tests run in parallel. 338 "Pods": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{ 339 fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): podExpectations, 340 fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): podExpectations, 341 }), 342 }) 343 344 ginkgo.By("Validating /stats/summary") 345 // Give pods a minute to actually start up. 346 gomega.Eventually(ctx, getNodeSummary, 180*time.Second, 15*time.Second).Should(matchExpectations) 347 // Then the summary should match the expectations a few more times. 348 gomega.Consistently(ctx, getNodeSummary, 30*time.Second, 15*time.Second).Should(matchExpectations) 349 }) 350 }) 351 }) 352 353 func getSummaryTestPods(f *framework.Framework, numRestarts int32, names ...string) []*v1.Pod { 354 pods := make([]*v1.Pod, 0, len(names)) 355 for _, name := range names { 356 pods = append(pods, &v1.Pod{ 357 ObjectMeta: metav1.ObjectMeta{ 358 Name: name, 359 }, 360 Spec: v1.PodSpec{ 361 RestartPolicy: v1.RestartPolicyAlways, 362 Containers: []v1.Container{ 363 { 364 Name: "busybox-container", 365 Image: busyboxImage, 366 SecurityContext: &v1.SecurityContext{ 367 Capabilities: &v1.Capabilities{ 368 Add: []v1.Capability{"NET_RAW"}, 369 }, 370 }, 371 Command: getRestartingContainerCommand("/test-empty-dir-mnt", 0, numRestarts, "echo 'some bytes' >/outside_the_volume.txt; ping -c 1 google.com; echo 'hello world' >> /test-empty-dir-mnt/file;"), 372 Resources: v1.ResourceRequirements{ 373 Limits: v1.ResourceList{ 374 // Must set memory limit to get MemoryStats.AvailableBytes 375 v1.ResourceMemory: resource.MustParse("80M"), 376 }, 377 }, 378 VolumeMounts: []v1.VolumeMount{ 379 {MountPath: "/test-empty-dir-mnt", Name: "test-empty-dir"}, 380 }, 381 }, 382 }, 383 SecurityContext: &v1.PodSecurityContext{ 384 SELinuxOptions: &v1.SELinuxOptions{ 385 Level: "s0", 386 }, 387 }, 388 Volumes: []v1.Volume{ 389 // TODO(#28393): Test secret volumes 390 // TODO(#28394): Test hostpath volumes 391 {Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}}, 392 }, 393 }, 394 }) 395 } 396 return pods 397 } 398 399 // Mapping function for gstruct.MatchAllElements 400 func summaryObjectID(element interface{}) string { 401 switch el := element.(type) { 402 case kubeletstatsv1alpha1.PodStats: 403 return fmt.Sprintf("%s::%s", el.PodRef.Namespace, el.PodRef.Name) 404 case kubeletstatsv1alpha1.ContainerStats: 405 return el.Name 406 case kubeletstatsv1alpha1.VolumeStats: 407 return el.Name 408 case kubeletstatsv1alpha1.UserDefinedMetric: 409 return el.Name 410 default: 411 framework.Failf("Unknown type: %T", el) 412 return "???" 413 } 414 } 415 416 // Convenience functions for common matcher combinations. 417 func ptrMatchAllFields(fields gstruct.Fields) types.GomegaMatcher { 418 return gstruct.PointTo(gstruct.MatchAllFields(fields)) 419 } 420 421 func bounded(lower, upper interface{}) types.GomegaMatcher { 422 return gstruct.PointTo(gomega.And( 423 gomega.BeNumerically(">=", lower), 424 gomega.BeNumerically("<=", upper))) 425 } 426 427 func swapExpectation(upper interface{}) types.GomegaMatcher { 428 // Size after which we consider memory to be "unlimited". This is not 429 // MaxInt64 due to rounding by the kernel. 430 const maxMemorySize = uint64(1 << 62) 431 432 swapBytesMatcher := gomega.Or( 433 gomega.BeNil(), 434 bounded(0, upper), 435 gstruct.PointTo(gomega.BeNumerically(">=", maxMemorySize)), 436 ) 437 438 return gomega.Or( 439 gomega.BeNil(), 440 ptrMatchAllFields(gstruct.Fields{ 441 "Time": recent(maxStatsAge), 442 "SwapUsageBytes": swapBytesMatcher, 443 "SwapAvailableBytes": swapBytesMatcher, 444 }), 445 ) 446 } 447 448 func recent(d time.Duration) types.GomegaMatcher { 449 return gomega.WithTransform(func(t metav1.Time) time.Time { 450 return t.Time 451 }, gomega.And( 452 gomega.BeTemporally(">=", time.Now().Add(-d)), 453 // Now() is the test start time, not the match time, so permit a few extra minutes. 454 gomega.BeTemporally("<", time.Now().Add(3*time.Minute)))) 455 } 456 457 func recordSystemCgroupProcesses(ctx context.Context) { 458 cfg, err := getCurrentKubeletConfig(ctx) 459 if err != nil { 460 framework.Logf("Failed to read kubelet config: %v", err) 461 return 462 } 463 cgroups := map[string]string{ 464 "kubelet": cfg.KubeletCgroups, 465 "misc": cfg.SystemCgroups, 466 } 467 for name, cgroup := range cgroups { 468 if cgroup == "" { 469 framework.Logf("Skipping unconfigured cgroup %s", name) 470 continue 471 } 472 473 filePattern := "/sys/fs/cgroup/cpu/%s/cgroup.procs" 474 if IsCgroup2UnifiedMode() { 475 filePattern = "/sys/fs/cgroup/%s/cgroup.procs" 476 } 477 pids, err := os.ReadFile(fmt.Sprintf(filePattern, cgroup)) 478 if err != nil { 479 framework.Logf("Failed to read processes in cgroup %s: %v", name, err) 480 continue 481 } 482 483 framework.Logf("Processes in %s cgroup (%s):", name, cgroup) 484 for _, pid := range strings.Fields(string(pids)) { 485 path := fmt.Sprintf("/proc/%s/cmdline", pid) 486 cmd, err := os.ReadFile(path) 487 if err != nil { 488 framework.Logf(" ginkgo.Failed to read %s: %v", path, err) 489 } else { 490 framework.Logf(" %s", cmd) 491 } 492 } 493 } 494 }