k8s.io/kubernetes@v1.29.3/test/e2e_node/resource_collector.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2015 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"bytes"
    24  	"context"
    25  	"fmt"
    26  	"log"
    27  	"sort"
    28  	"strings"
    29  	"sync"
    30  	"text/tabwriter"
    31  	"time"
    32  
    33  	cadvisorclient "github.com/google/cadvisor/client/v2"
    34  	cadvisorapiv2 "github.com/google/cadvisor/info/v2"
    35  	"github.com/opencontainers/runc/libcontainer/cgroups"
    36  	v1 "k8s.io/api/core/v1"
    37  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    38  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    39  	"k8s.io/apimachinery/pkg/util/uuid"
    40  	"k8s.io/apimachinery/pkg/util/wait"
    41  	kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
    42  	"k8s.io/kubernetes/test/e2e/framework"
    43  	e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
    44  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    45  	"k8s.io/kubernetes/test/e2e_node/perftype"
    46  
    47  	"github.com/onsi/ginkgo/v2"
    48  	"github.com/onsi/gomega"
    49  )
    50  
    51  const (
    52  	// resource monitoring
    53  	cadvisorImageName = "gcr.io/cadvisor/cadvisor:v0.47.2"
    54  	cadvisorPodName   = "cadvisor"
    55  	cadvisorPort      = 8090
    56  	// housekeeping interval of Cadvisor (second)
    57  	houseKeepingInterval = 1
    58  )
    59  
    60  var (
    61  	systemContainers map[string]string
    62  )
    63  
    64  // ResourceCollector is a collector object which collects
    65  // resource usage periodically from Cadvisor.
    66  type ResourceCollector struct {
    67  	client  *cadvisorclient.Client
    68  	request *cadvisorapiv2.RequestOptions
    69  
    70  	pollingInterval time.Duration
    71  	buffers         map[string][]*e2ekubelet.ContainerResourceUsage
    72  	lock            sync.RWMutex
    73  	stopCh          chan struct{}
    74  }
    75  
    76  // NewResourceCollector creates a resource collector object which collects
    77  // resource usage periodically from Cadvisor
    78  func NewResourceCollector(interval time.Duration) *ResourceCollector {
    79  	buffers := make(map[string][]*e2ekubelet.ContainerResourceUsage)
    80  	return &ResourceCollector{
    81  		pollingInterval: interval,
    82  		buffers:         buffers,
    83  	}
    84  }
    85  
    86  // Start starts resource collector and connects to the standalone Cadvisor pod
    87  // then repeatedly runs collectStats.
    88  func (r *ResourceCollector) Start() {
    89  	// Get the cgroup container names for kubelet and runtime
    90  	kubeletContainer, err1 := getContainerNameForProcess(kubeletProcessName, "")
    91  	runtimeContainer, err2 := getContainerNameForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile)
    92  	if err1 == nil && err2 == nil && kubeletContainer != "" && runtimeContainer != "" {
    93  		systemContainers = map[string]string{
    94  			kubeletstatsv1alpha1.SystemContainerKubelet: kubeletContainer,
    95  			kubeletstatsv1alpha1.SystemContainerRuntime: runtimeContainer,
    96  		}
    97  	} else {
    98  		framework.Failf("Failed to get runtime container name in test-e2e-node resource collector.")
    99  	}
   100  
   101  	wait.Poll(1*time.Second, 1*time.Minute, func() (bool, error) {
   102  		var err error
   103  		r.client, err = cadvisorclient.NewClient(fmt.Sprintf("http://localhost:%d/", cadvisorPort))
   104  		if err == nil {
   105  			return true, nil
   106  		}
   107  		return false, err
   108  	})
   109  
   110  	gomega.Expect(r.client).NotTo(gomega.BeNil(), "cadvisor client not ready")
   111  
   112  	r.request = &cadvisorapiv2.RequestOptions{IdType: "name", Count: 1, Recursive: false}
   113  	r.stopCh = make(chan struct{})
   114  
   115  	oldStatsMap := make(map[string]*cadvisorapiv2.ContainerStats)
   116  	go wait.Until(func() { r.collectStats(oldStatsMap) }, r.pollingInterval, r.stopCh)
   117  }
   118  
   119  // Stop stops resource collector collecting stats. It does not clear the buffer
   120  func (r *ResourceCollector) Stop() {
   121  	close(r.stopCh)
   122  }
   123  
   124  // Reset clears the stats buffer of resource collector.
   125  func (r *ResourceCollector) Reset() {
   126  	r.lock.Lock()
   127  	defer r.lock.Unlock()
   128  	for _, name := range systemContainers {
   129  		r.buffers[name] = []*e2ekubelet.ContainerResourceUsage{}
   130  	}
   131  }
   132  
   133  // GetCPUSummary gets CPU usage in percentile.
   134  func (r *ResourceCollector) GetCPUSummary() e2ekubelet.ContainersCPUSummary {
   135  	result := make(e2ekubelet.ContainersCPUSummary)
   136  	for key, name := range systemContainers {
   137  		data := r.GetBasicCPUStats(name)
   138  		result[key] = data
   139  	}
   140  	return result
   141  }
   142  
   143  // LogLatest logs the latest resource usage.
   144  func (r *ResourceCollector) LogLatest() {
   145  	summary, err := r.GetLatest()
   146  	if err != nil {
   147  		framework.Logf("%v", err)
   148  	}
   149  	framework.Logf("%s", formatResourceUsageStats(summary))
   150  }
   151  
   152  // collectStats collects resource usage from Cadvisor.
   153  func (r *ResourceCollector) collectStats(oldStatsMap map[string]*cadvisorapiv2.ContainerStats) {
   154  	for _, name := range systemContainers {
   155  		ret, err := r.client.Stats(name, r.request)
   156  		if err != nil {
   157  			framework.Logf("Error getting container stats, err: %v", err)
   158  			return
   159  		}
   160  		cStats, ok := ret[name]
   161  		if !ok {
   162  			framework.Logf("Missing info/stats for container %q", name)
   163  			return
   164  		}
   165  
   166  		newStats := cStats.Stats[0]
   167  
   168  		if oldStats, ok := oldStatsMap[name]; ok && oldStats.Timestamp.Before(newStats.Timestamp) {
   169  			r.buffers[name] = append(r.buffers[name], computeContainerResourceUsage(name, oldStats, newStats))
   170  		}
   171  		oldStatsMap[name] = newStats
   172  	}
   173  }
   174  
   175  // computeContainerResourceUsage computes resource usage based on new data sample.
   176  func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapiv2.ContainerStats) *e2ekubelet.ContainerResourceUsage {
   177  	return &e2ekubelet.ContainerResourceUsage{
   178  		Name:                    name,
   179  		Timestamp:               newStats.Timestamp,
   180  		CPUUsageInCores:         float64(newStats.Cpu.Usage.Total-oldStats.Cpu.Usage.Total) / float64(newStats.Timestamp.Sub(oldStats.Timestamp).Nanoseconds()),
   181  		MemoryUsageInBytes:      newStats.Memory.Usage,
   182  		MemoryWorkingSetInBytes: newStats.Memory.WorkingSet,
   183  		MemoryRSSInBytes:        newStats.Memory.RSS,
   184  		CPUInterval:             newStats.Timestamp.Sub(oldStats.Timestamp),
   185  	}
   186  }
   187  
   188  // GetLatest gets the latest resource usage from stats buffer.
   189  func (r *ResourceCollector) GetLatest() (e2ekubelet.ResourceUsagePerContainer, error) {
   190  	r.lock.RLock()
   191  	defer r.lock.RUnlock()
   192  	resourceUsage := make(e2ekubelet.ResourceUsagePerContainer)
   193  	for key, name := range systemContainers {
   194  		contStats, ok := r.buffers[name]
   195  		if !ok || len(contStats) == 0 {
   196  			return nil, fmt.Errorf("No resource usage data for %s container (%s)", key, name)
   197  		}
   198  		resourceUsage[key] = contStats[len(contStats)-1]
   199  	}
   200  	return resourceUsage, nil
   201  }
   202  
   203  type resourceUsageByCPU []*e2ekubelet.ContainerResourceUsage
   204  
   205  func (r resourceUsageByCPU) Len() int           { return len(r) }
   206  func (r resourceUsageByCPU) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }
   207  func (r resourceUsageByCPU) Less(i, j int) bool { return r[i].CPUUsageInCores < r[j].CPUUsageInCores }
   208  
   209  // The percentiles to report.
   210  var percentiles = [...]float64{0.50, 0.90, 0.95, 0.99, 1.00}
   211  
   212  // GetBasicCPUStats returns the percentiles the cpu usage in cores for
   213  // containerName. This method examines all data currently in the buffer.
   214  func (r *ResourceCollector) GetBasicCPUStats(containerName string) map[float64]float64 {
   215  	r.lock.RLock()
   216  	defer r.lock.RUnlock()
   217  	result := make(map[float64]float64, len(percentiles))
   218  
   219  	// We must make a copy of array, otherwise the timeseries order is changed.
   220  	usages := make([]*e2ekubelet.ContainerResourceUsage, 0)
   221  	usages = append(usages, r.buffers[containerName]...)
   222  
   223  	sort.Sort(resourceUsageByCPU(usages))
   224  	for _, q := range percentiles {
   225  		index := int(float64(len(usages))*q) - 1
   226  		if index < 0 {
   227  			// We don't have enough data.
   228  			result[q] = 0
   229  			continue
   230  		}
   231  		result[q] = usages[index].CPUUsageInCores
   232  	}
   233  	return result
   234  }
   235  
   236  func formatResourceUsageStats(containerStats e2ekubelet.ResourceUsagePerContainer) string {
   237  	// Example output:
   238  	//
   239  	// Resource usage:
   240  	//container cpu(cores) memory_working_set(MB) memory_rss(MB)
   241  	//"kubelet" 0.068      27.92                  15.43
   242  	//"runtime" 0.664      89.88                  68.13
   243  
   244  	buf := &bytes.Buffer{}
   245  	w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0)
   246  	fmt.Fprintf(w, "container\tcpu(cores)\tmemory_working_set(MB)\tmemory_rss(MB)\n")
   247  	for name, s := range containerStats {
   248  		fmt.Fprintf(w, "%q\t%.3f\t%.2f\t%.2f\n", name, s.CPUUsageInCores, float64(s.MemoryWorkingSetInBytes)/(1024*1024), float64(s.MemoryRSSInBytes)/(1024*1024))
   249  	}
   250  	w.Flush()
   251  	return fmt.Sprintf("Resource usage:\n%s", buf.String())
   252  }
   253  
   254  func formatCPUSummary(summary e2ekubelet.ContainersCPUSummary) string {
   255  	// Example output for a node (the percentiles may differ):
   256  	// CPU usage of containers:
   257  	// container        5th%  50th% 90th% 95th%
   258  	// "/"              0.051 0.159 0.387 0.455
   259  	// "/runtime        0.000 0.000 0.146 0.166
   260  	// "/kubelet"       0.036 0.053 0.091 0.154
   261  	// "/misc"          0.001 0.001 0.001 0.002
   262  	var summaryStrings []string
   263  	var header []string
   264  	header = append(header, "container")
   265  	for _, p := range percentiles {
   266  		header = append(header, fmt.Sprintf("%.0fth%%", p*100))
   267  	}
   268  
   269  	buf := &bytes.Buffer{}
   270  	w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0)
   271  	fmt.Fprintf(w, "%s\n", strings.Join(header, "\t"))
   272  
   273  	for _, containerName := range e2ekubelet.TargetContainers() {
   274  		var s []string
   275  		s = append(s, fmt.Sprintf("%q", containerName))
   276  		data, ok := summary[containerName]
   277  		for _, p := range percentiles {
   278  			value := "N/A"
   279  			if ok {
   280  				value = fmt.Sprintf("%.3f", data[p])
   281  			}
   282  			s = append(s, value)
   283  		}
   284  		fmt.Fprintf(w, "%s\n", strings.Join(s, "\t"))
   285  	}
   286  	w.Flush()
   287  	summaryStrings = append(summaryStrings, fmt.Sprintf("CPU usage of containers:\n%s", buf.String()))
   288  
   289  	return strings.Join(summaryStrings, "\n")
   290  }
   291  
   292  // createCadvisorPod creates a standalone cadvisor pod for fine-grain resource monitoring.
   293  func getCadvisorPod() *v1.Pod {
   294  	return &v1.Pod{
   295  		ObjectMeta: metav1.ObjectMeta{
   296  			Name: cadvisorPodName,
   297  		},
   298  		Spec: v1.PodSpec{
   299  			// It uses a host port for the tests to collect data.
   300  			// Currently we can not use port mapping in test-e2e-node.
   301  			HostNetwork:     true,
   302  			SecurityContext: &v1.PodSecurityContext{},
   303  			Containers: []v1.Container{
   304  				{
   305  					Image: cadvisorImageName,
   306  					Name:  cadvisorPodName,
   307  					Ports: []v1.ContainerPort{
   308  						{
   309  							Name:          "http",
   310  							HostPort:      cadvisorPort,
   311  							ContainerPort: cadvisorPort,
   312  							Protocol:      v1.ProtocolTCP,
   313  						},
   314  					},
   315  					VolumeMounts: []v1.VolumeMount{
   316  						{
   317  							Name:      "sys",
   318  							ReadOnly:  true,
   319  							MountPath: "/sys",
   320  						},
   321  						{
   322  							Name:      "var-run",
   323  							ReadOnly:  false,
   324  							MountPath: "/var/run",
   325  						},
   326  						{
   327  							Name:      "docker",
   328  							ReadOnly:  true,
   329  							MountPath: "/var/lib/docker/",
   330  						},
   331  						{
   332  							Name:      "rootfs",
   333  							ReadOnly:  true,
   334  							MountPath: "/rootfs",
   335  						},
   336  					},
   337  					Args: []string{
   338  						"--profiling",
   339  						fmt.Sprintf("--housekeeping_interval=%ds", houseKeepingInterval),
   340  						fmt.Sprintf("--port=%d", cadvisorPort),
   341  					},
   342  				},
   343  			},
   344  			Volumes: []v1.Volume{
   345  				{
   346  					Name:         "rootfs",
   347  					VolumeSource: v1.VolumeSource{HostPath: &v1.HostPathVolumeSource{Path: "/"}},
   348  				},
   349  				{
   350  					Name:         "var-run",
   351  					VolumeSource: v1.VolumeSource{HostPath: &v1.HostPathVolumeSource{Path: "/var/run"}},
   352  				},
   353  				{
   354  					Name:         "sys",
   355  					VolumeSource: v1.VolumeSource{HostPath: &v1.HostPathVolumeSource{Path: "/sys"}},
   356  				},
   357  				{
   358  					Name:         "docker",
   359  					VolumeSource: v1.VolumeSource{HostPath: &v1.HostPathVolumeSource{Path: "/var/lib/docker"}},
   360  				},
   361  			},
   362  		},
   363  	}
   364  }
   365  
   366  // deletePodsSync deletes a list of pods and block until pods disappear.
   367  func deletePodsSync(ctx context.Context, f *framework.Framework, pods []*v1.Pod) {
   368  	var wg sync.WaitGroup
   369  	for i := range pods {
   370  		pod := pods[i]
   371  		wg.Add(1)
   372  		go func() {
   373  			defer ginkgo.GinkgoRecover()
   374  			defer wg.Done()
   375  
   376  			err := e2epod.NewPodClient(f).Delete(ctx, pod.ObjectMeta.Name, *metav1.NewDeleteOptions(30))
   377  			if apierrors.IsNotFound(err) {
   378  				framework.Failf("Unexpected error trying to delete pod %s: %v", pod.Name, err)
   379  			}
   380  
   381  			framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.ObjectMeta.Name, f.Namespace.Name, 10*time.Minute))
   382  		}()
   383  	}
   384  	wg.Wait()
   385  	return
   386  }
   387  
   388  // newTestPods creates a list of pods (specification) for test.
   389  func newTestPods(numPods int, volume bool, imageName, podType string) []*v1.Pod {
   390  	var pods []*v1.Pod
   391  	for i := 0; i < numPods; i++ {
   392  		podName := "test-" + string(uuid.NewUUID())
   393  		labels := map[string]string{
   394  			"type": podType,
   395  			"name": podName,
   396  		}
   397  		if volume {
   398  			pods = append(pods,
   399  				&v1.Pod{
   400  					ObjectMeta: metav1.ObjectMeta{
   401  						Name:   podName,
   402  						Labels: labels,
   403  					},
   404  					Spec: v1.PodSpec{
   405  						// Restart policy is always (default).
   406  						Containers: []v1.Container{
   407  							{
   408  								Image: imageName,
   409  								Name:  podName,
   410  								VolumeMounts: []v1.VolumeMount{
   411  									{MountPath: "/test-volume-mnt", Name: podName + "-volume"},
   412  								},
   413  							},
   414  						},
   415  						Volumes: []v1.Volume{
   416  							{Name: podName + "-volume", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}},
   417  						},
   418  					},
   419  				})
   420  		} else {
   421  			pods = append(pods,
   422  				&v1.Pod{
   423  					ObjectMeta: metav1.ObjectMeta{
   424  						Name:   podName,
   425  						Labels: labels,
   426  					},
   427  					Spec: v1.PodSpec{
   428  						// Restart policy is always (default).
   429  						Containers: []v1.Container{
   430  							{
   431  								Image: imageName,
   432  								Name:  podName,
   433  							},
   434  						},
   435  					},
   436  				})
   437  		}
   438  
   439  	}
   440  	return pods
   441  }
   442  
   443  // GetResourceTimeSeries gets the time series of resource usage of each container.
   444  func (r *ResourceCollector) GetResourceTimeSeries() map[string]*perftype.ResourceSeries {
   445  	resourceSeries := make(map[string]*perftype.ResourceSeries)
   446  	for key, name := range systemContainers {
   447  		newSeries := &perftype.ResourceSeries{Units: map[string]string{
   448  			"cpu":    "mCPU",
   449  			"memory": "MB",
   450  		}}
   451  		resourceSeries[key] = newSeries
   452  		for _, usage := range r.buffers[name] {
   453  			newSeries.Timestamp = append(newSeries.Timestamp, usage.Timestamp.UnixNano())
   454  			newSeries.CPUUsageInMilliCores = append(newSeries.CPUUsageInMilliCores, int64(usage.CPUUsageInCores*1000))
   455  			newSeries.MemoryRSSInMegaBytes = append(newSeries.MemoryRSSInMegaBytes, int64(float64(usage.MemoryUsageInBytes)/(1024*1024)))
   456  		}
   457  	}
   458  	return resourceSeries
   459  }
   460  
   461  const kubeletProcessName = "kubelet"
   462  
   463  func getContainerNameForProcess(name, pidFile string) (string, error) {
   464  	pids, err := getPidsForProcess(name, pidFile)
   465  	if err != nil {
   466  		return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
   467  	}
   468  	if len(pids) == 0 {
   469  		return "", nil
   470  	}
   471  	cont, err := getContainer(pids[0])
   472  	if err != nil {
   473  		return "", err
   474  	}
   475  	return cont, nil
   476  }
   477  
   478  // getContainer returns the cgroup associated with the specified pid.
   479  // It enforces a unified hierarchy for memory and cpu cgroups.
   480  // On systemd environments, it uses the name=systemd cgroup for the specified pid.
   481  func getContainer(pid int) (string, error) {
   482  	cgs, err := cgroups.ParseCgroupFile(fmt.Sprintf("/proc/%d/cgroup", pid))
   483  	if err != nil {
   484  		return "", err
   485  	}
   486  
   487  	if cgroups.IsCgroup2UnifiedMode() {
   488  		unified, found := cgs[""]
   489  		if !found {
   490  			return "", cgroups.NewNotFoundError("unified")
   491  		}
   492  		return unified, nil
   493  	}
   494  
   495  	cpu, found := cgs["cpu"]
   496  	if !found {
   497  		return "", cgroups.NewNotFoundError("cpu")
   498  	}
   499  	memory, found := cgs["memory"]
   500  	if !found {
   501  		return "", cgroups.NewNotFoundError("memory")
   502  	}
   503  
   504  	// since we use this container for accounting, we need to ensure it is a unified hierarchy.
   505  	if cpu != memory {
   506  		return "", fmt.Errorf("cpu and memory cgroup hierarchy not unified.  cpu: %s, memory: %s", cpu, memory)
   507  	}
   508  
   509  	// on systemd, every pid is in a unified cgroup hierarchy (name=systemd as seen in systemd-cgls)
   510  	// cpu and memory accounting is off by default, users may choose to enable it per unit or globally.
   511  	// users could enable CPU and memory accounting globally via /etc/systemd/system.conf (DefaultCPUAccounting=true DefaultMemoryAccounting=true).
   512  	// users could also enable CPU and memory accounting per unit via CPUAccounting=true and MemoryAccounting=true
   513  	// we only warn if accounting is not enabled for CPU or memory so as to not break local development flows where kubelet is launched in a terminal.
   514  	// for example, the cgroup for the user session will be something like /user.slice/user-X.slice/session-X.scope, but the cpu and memory
   515  	// cgroup will be the closest ancestor where accounting is performed (most likely /) on systems that launch docker containers.
   516  	// as a result, on those systems, you will not get cpu or memory accounting statistics for kubelet.
   517  	// in addition, you would not get memory or cpu accounting for the runtime unless accounting was enabled on its unit (or globally).
   518  	if systemd, found := cgs["name=systemd"]; found {
   519  		if systemd != cpu {
   520  			log.Printf("CPUAccounting not enabled for pid: %d", pid)
   521  		}
   522  		if systemd != memory {
   523  			log.Printf("MemoryAccounting not enabled for pid: %d", pid)
   524  		}
   525  		return systemd, nil
   526  	}
   527  
   528  	return cpu, nil
   529  }