k8s.io/kubernetes@v1.29.3/test/e2e_node/resource_metrics_test.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    25  	"k8s.io/kubernetes/test/e2e/framework"
    26  	e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl"
    27  	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
    28  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    29  	e2evolume "k8s.io/kubernetes/test/e2e/framework/volume"
    30  	"k8s.io/kubernetes/test/e2e/nodefeature"
    31  	admissionapi "k8s.io/pod-security-admission/api"
    32  
    33  	"github.com/prometheus/common/model"
    34  
    35  	"github.com/onsi/ginkgo/v2"
    36  	"github.com/onsi/gomega"
    37  	"github.com/onsi/gomega/gstruct"
    38  	"github.com/onsi/gomega/types"
    39  )
    40  
    41  const (
    42  	pod0        = "stats-busybox-0"
    43  	pod1        = "stats-busybox-1"
    44  	maxStatsAge = time.Minute
    45  )
    46  
    47  var _ = SIGDescribe("ResourceMetricsAPI", nodefeature.ResourceMetrics, func() {
    48  	f := framework.NewDefaultFramework("resource-metrics")
    49  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    50  	ginkgo.Context("when querying /resource/metrics", func() {
    51  		ginkgo.BeforeEach(func(ctx context.Context) {
    52  			ginkgo.By("Creating test pods to measure their resource usage")
    53  			numRestarts := int32(1)
    54  			pods := getSummaryTestPods(f, numRestarts, pod0, pod1)
    55  			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
    56  
    57  			ginkgo.By("restarting the containers to ensure container metrics are still being gathered after a container is restarted")
    58  			gomega.Eventually(ctx, func(ctx context.Context) error {
    59  				for _, pod := range pods {
    60  					err := verifyPodRestartCount(ctx, f, pod.Name, len(pod.Spec.Containers), numRestarts)
    61  					if err != nil {
    62  						return err
    63  					}
    64  				}
    65  				return nil
    66  			}, time.Minute, 5*time.Second).Should(gomega.Succeed())
    67  
    68  			ginkgo.By("Waiting 15 seconds for cAdvisor to collect 2 stats points")
    69  			time.Sleep(15 * time.Second)
    70  		})
    71  		ginkgo.It("should report resource usage through the resource metrics api", func(ctx context.Context) {
    72  			ginkgo.By("Fetching node so we can match against an appropriate memory limit")
    73  			node := getLocalNode(ctx, f)
    74  			memoryCapacity := node.Status.Capacity["memory"]
    75  			memoryLimit := memoryCapacity.Value()
    76  
    77  			matchResourceMetrics := gomega.And(gstruct.MatchKeys(gstruct.IgnoreMissing, gstruct.Keys{
    78  				"resource_scrape_error": gstruct.Ignore(),
    79  				"node_cpu_usage_seconds_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
    80  					"": boundedSample(1, 1e6),
    81  				}),
    82  				"node_memory_working_set_bytes": gstruct.MatchAllElements(nodeID, gstruct.Elements{
    83  					"": boundedSample(10*e2evolume.Mb, memoryLimit),
    84  				}),
    85  
    86  				"container_cpu_usage_seconds_total": gstruct.MatchElements(containerID, gstruct.IgnoreExtras, gstruct.Elements{
    87  					fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod0, "busybox-container"): boundedSample(0, 100),
    88  					fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(0, 100),
    89  				}),
    90  
    91  				"container_memory_working_set_bytes": gstruct.MatchElements(containerID, gstruct.IgnoreExtras, gstruct.Elements{
    92  					fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod0, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
    93  					fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
    94  				}),
    95  
    96  				"container_start_time_seconds": gstruct.MatchElements(containerID, gstruct.IgnoreExtras, gstruct.Elements{
    97  					fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod0, "busybox-container"): boundedSample(time.Now().Add(-maxStatsAge).Unix(), time.Now().Add(2*time.Minute).Unix()),
    98  					fmt.Sprintf("%s::%s::%s", f.Namespace.Name, pod1, "busybox-container"): boundedSample(time.Now().Add(-maxStatsAge).Unix(), time.Now().Add(2*time.Minute).Unix()),
    99  				}),
   100  
   101  				"pod_cpu_usage_seconds_total": gstruct.MatchElements(podID, gstruct.IgnoreExtras, gstruct.Elements{
   102  					fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(0, 100),
   103  					fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(0, 100),
   104  				}),
   105  
   106  				"pod_memory_working_set_bytes": gstruct.MatchElements(podID, gstruct.IgnoreExtras, gstruct.Elements{
   107  					fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
   108  					fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(10*e2evolume.Kb, 80*e2evolume.Mb),
   109  				}),
   110  
   111  				"pod_swap_usage_bytes": gstruct.MatchElements(podID, gstruct.IgnoreExtras, gstruct.Elements{
   112  					fmt.Sprintf("%s::%s", f.Namespace.Name, pod0): boundedSample(0*e2evolume.Kb, 80*e2evolume.Mb),
   113  					fmt.Sprintf("%s::%s", f.Namespace.Name, pod1): boundedSample(0*e2evolume.Kb, 80*e2evolume.Mb),
   114  				}),
   115  			}),
   116  				haveKeys("resource_scrape_error", "node_cpu_usage_seconds_total", "node_memory_working_set_bytes", "container_cpu_usage_seconds_total",
   117  					"container_memory_working_set_bytes", "container_start_time_seconds", "pod_cpu_usage_seconds_total", "pod_memory_working_set_bytes"),
   118  			)
   119  			ginkgo.By("Giving pods a minute to start up and produce metrics")
   120  			gomega.Eventually(ctx, getResourceMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   121  			ginkgo.By("Ensuring the metrics match the expectations a few more times")
   122  			gomega.Consistently(ctx, getResourceMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   123  		})
   124  		ginkgo.AfterEach(func(ctx context.Context) {
   125  			ginkgo.By("Deleting test pods")
   126  			var zero int64 = 0
   127  			e2epod.NewPodClient(f).DeleteSync(ctx, pod0, metav1.DeleteOptions{GracePeriodSeconds: &zero}, 10*time.Minute)
   128  			e2epod.NewPodClient(f).DeleteSync(ctx, pod1, metav1.DeleteOptions{GracePeriodSeconds: &zero}, 10*time.Minute)
   129  			if !ginkgo.CurrentSpecReport().Failed() {
   130  				return
   131  			}
   132  			if framework.TestContext.DumpLogsOnFailure {
   133  				e2ekubectl.LogFailedContainers(ctx, f.ClientSet, f.Namespace.Name, framework.Logf)
   134  			}
   135  			ginkgo.By("Recording processes in system cgroups")
   136  			recordSystemCgroupProcesses(ctx)
   137  		})
   138  	})
   139  })
   140  
   141  func getResourceMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
   142  	ginkgo.By("getting stable resource metrics API")
   143  	return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics/resource")
   144  }
   145  
   146  func nodeID(element interface{}) string {
   147  	return ""
   148  }
   149  
   150  func podID(element interface{}) string {
   151  	el := element.(*model.Sample)
   152  	return fmt.Sprintf("%s::%s", el.Metric["namespace"], el.Metric["pod"])
   153  }
   154  
   155  func containerID(element interface{}) string {
   156  	el := element.(*model.Sample)
   157  	return fmt.Sprintf("%s::%s::%s", el.Metric["namespace"], el.Metric["pod"], el.Metric["container"])
   158  }
   159  
   160  func boundedSample(lower, upper interface{}) types.GomegaMatcher {
   161  	return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
   162  		// We already check Metric when matching the Id
   163  		"Metric": gstruct.Ignore(),
   164  		"Value":  gomega.And(gomega.BeNumerically(">=", lower), gomega.BeNumerically("<=", upper)),
   165  		"Timestamp": gomega.WithTransform(func(t model.Time) time.Time {
   166  			if t.Unix() <= 0 {
   167  				return time.Now()
   168  			}
   169  
   170  			// model.Time is in Milliseconds since epoch
   171  			return time.Unix(0, int64(t)*int64(time.Millisecond))
   172  		},
   173  			gomega.And(
   174  				gomega.BeTemporally(">=", time.Now().Add(-maxStatsAge)),
   175  				// Now() is the test start time, not the match time, so permit a few extra minutes.
   176  				gomega.BeTemporally("<", time.Now().Add(2*time.Minute))),
   177  		),
   178  		"Histogram": gstruct.Ignore(),
   179  	}))
   180  }
   181  
   182  func haveKeys(keys ...string) types.GomegaMatcher {
   183  	gomega.ExpectWithOffset(1, keys).ToNot(gomega.BeEmpty())
   184  	matcher := gomega.HaveKey(keys[0])
   185  
   186  	if len(keys) == 1 {
   187  		return matcher
   188  	}
   189  
   190  	for _, key := range keys[1:] {
   191  		matcher = gomega.And(matcher, gomega.HaveKey(key))
   192  	}
   193  
   194  	return matcher
   195  }