k8s.io/kubernetes@v1.29.3/test/e2e_node/podresources_test.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"strings"
    25  	"time"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/api/resource"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
    31  	kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
    32  	kubefeatures "k8s.io/kubernetes/pkg/features"
    33  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    34  	apisgrpc "k8s.io/kubernetes/pkg/kubelet/apis/grpc"
    35  	"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
    36  	"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
    37  	"k8s.io/kubernetes/pkg/kubelet/util"
    38  	testutils "k8s.io/kubernetes/test/utils"
    39  	admissionapi "k8s.io/pod-security-admission/api"
    40  	"k8s.io/utils/cpuset"
    41  
    42  	"github.com/onsi/ginkgo/v2"
    43  	"github.com/onsi/gomega"
    44  	"github.com/onsi/gomega/gstruct"
    45  	"github.com/onsi/gomega/types"
    46  	"k8s.io/kubernetes/test/e2e/feature"
    47  	"k8s.io/kubernetes/test/e2e/framework"
    48  	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
    49  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    50  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    51  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    52  	"k8s.io/kubernetes/test/e2e/nodefeature"
    53  )
    54  
    55  const (
    56  	defaultTopologyUnawareResourceName = "example.com/resource"
    57  )
    58  
    59  type podDesc struct {
    60  	podName        string
    61  	cntName        string
    62  	resourceName   string
    63  	resourceAmount int
    64  	cpuRequest     int // cpuRequest is in millicores
    65  }
    66  
    67  func (desc podDesc) CpuRequestQty() resource.Quantity {
    68  	qty := resource.NewMilliQuantity(int64(desc.cpuRequest), resource.DecimalSI)
    69  	return *qty
    70  }
    71  
    72  func (desc podDesc) CpuRequestExclusive() int {
    73  	if (desc.cpuRequest % 1000) != 0 {
    74  		// exclusive cpus are request only if the quantity is integral;
    75  		// hence, explicitly rule out non-integral requests
    76  		return 0
    77  	}
    78  	return desc.cpuRequest / 1000
    79  }
    80  
    81  func (desc podDesc) RequiresCPU() bool {
    82  	return desc.cpuRequest > 0
    83  }
    84  
    85  func (desc podDesc) RequiresDevices() bool {
    86  	return desc.resourceName != "" && desc.resourceAmount > 0
    87  }
    88  
    89  func makePodResourcesTestPod(desc podDesc) *v1.Pod {
    90  	cnt := v1.Container{
    91  		Name:  desc.cntName,
    92  		Image: busyboxImage,
    93  		Resources: v1.ResourceRequirements{
    94  			Requests: v1.ResourceList{},
    95  			Limits:   v1.ResourceList{},
    96  		},
    97  		Command: []string{"sh", "-c", "sleep 1d"},
    98  	}
    99  	if desc.RequiresCPU() {
   100  		cpuRequestQty := desc.CpuRequestQty()
   101  		cnt.Resources.Requests[v1.ResourceCPU] = cpuRequestQty
   102  		cnt.Resources.Limits[v1.ResourceCPU] = cpuRequestQty
   103  		// we don't really care, we only need to be in guaranteed QoS
   104  		cnt.Resources.Requests[v1.ResourceMemory] = resource.MustParse("100Mi")
   105  		cnt.Resources.Limits[v1.ResourceMemory] = resource.MustParse("100Mi")
   106  	}
   107  	if desc.RequiresDevices() {
   108  		cnt.Resources.Requests[v1.ResourceName(desc.resourceName)] = resource.MustParse(fmt.Sprintf("%d", desc.resourceAmount))
   109  		cnt.Resources.Limits[v1.ResourceName(desc.resourceName)] = resource.MustParse(fmt.Sprintf("%d", desc.resourceAmount))
   110  	}
   111  	return &v1.Pod{
   112  		ObjectMeta: metav1.ObjectMeta{
   113  			Name: desc.podName,
   114  		},
   115  		Spec: v1.PodSpec{
   116  			RestartPolicy: v1.RestartPolicyNever,
   117  			Containers: []v1.Container{
   118  				cnt,
   119  			},
   120  		},
   121  	}
   122  }
   123  
   124  func logPodResources(podIdx int, pr *kubeletpodresourcesv1.PodResources) {
   125  	ns := pr.GetNamespace()
   126  	cnts := pr.GetContainers()
   127  	if len(cnts) == 0 {
   128  		framework.Logf("#%02d/%02d/%02d - %s/%s/%s   No containers", podIdx, 0, 0, ns, pr.GetName(), "_")
   129  		return
   130  	}
   131  
   132  	for cntIdx, cnt := range cnts {
   133  		if len(cnt.Devices) == 0 {
   134  			framework.Logf("#%02d/%02d/%02d - %s/%s/%s   cpus -> %v   resources -> none", podIdx, cntIdx, 0, ns, pr.GetName(), cnt.Name, cnt.CpuIds)
   135  			continue
   136  		}
   137  
   138  		for devIdx, dev := range cnt.Devices {
   139  			framework.Logf("#%02d/%02d/%02d - %s/%s/%s   cpus -> %v   %s -> %s", podIdx, cntIdx, devIdx, ns, pr.GetName(), cnt.Name, cnt.CpuIds, dev.ResourceName, strings.Join(dev.DeviceIds, ", "))
   140  		}
   141  	}
   142  }
   143  
   144  type podResMap map[string]map[string]kubeletpodresourcesv1.ContainerResources
   145  
   146  func convertToMap(podsResources []*kubeletpodresourcesv1.PodResources) podResMap {
   147  	res := make(map[string]map[string]kubeletpodresourcesv1.ContainerResources)
   148  	for idx, podResource := range podsResources {
   149  		// to make troubleshooting easier
   150  		logPodResources(idx, podResource)
   151  
   152  		cnts := make(map[string]kubeletpodresourcesv1.ContainerResources)
   153  		for _, cnt := range podResource.GetContainers() {
   154  			cnts[cnt.GetName()] = *cnt
   155  		}
   156  		res[podResource.GetName()] = cnts
   157  	}
   158  	return res
   159  }
   160  
   161  func getPodResourcesValues(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient) (podResMap, error) {
   162  	resp, err := cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  	return convertToMap(resp.GetPodResources()), nil
   167  }
   168  
   169  type testPodData struct {
   170  	PodMap map[string]*v1.Pod
   171  }
   172  
   173  func newTestPodData() *testPodData {
   174  	return &testPodData{
   175  		PodMap: make(map[string]*v1.Pod),
   176  	}
   177  }
   178  
   179  func (tpd *testPodData) createPodsForTest(ctx context.Context, f *framework.Framework, podReqs []podDesc) {
   180  	for _, podReq := range podReqs {
   181  		pod := makePodResourcesTestPod(podReq)
   182  		pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
   183  
   184  		framework.Logf("created pod %s", podReq.podName)
   185  		tpd.PodMap[podReq.podName] = pod
   186  	}
   187  }
   188  
   189  /* deletePodsForTest clean up all the pods run for a testcase. Must ensure proper cleanup */
   190  func (tpd *testPodData) deletePodsForTest(ctx context.Context, f *framework.Framework) {
   191  	deletePodsAsync(ctx, f, tpd.PodMap)
   192  }
   193  
   194  /* deletePod removes pod during a test. Should do a best-effort clean up */
   195  func (tpd *testPodData) deletePod(ctx context.Context, f *framework.Framework, podName string) {
   196  	_, ok := tpd.PodMap[podName]
   197  	if !ok {
   198  		return
   199  	}
   200  	deletePodSyncByName(ctx, f, podName)
   201  	delete(tpd.PodMap, podName)
   202  }
   203  
   204  func findContainerDeviceByName(devs []*kubeletpodresourcesv1.ContainerDevices, resourceName string) *kubeletpodresourcesv1.ContainerDevices {
   205  	for _, dev := range devs {
   206  		if dev.ResourceName == resourceName {
   207  			return dev
   208  		}
   209  	}
   210  	return nil
   211  }
   212  
   213  func matchPodDescWithResources(expected []podDesc, found podResMap) error {
   214  	for _, podReq := range expected {
   215  		framework.Logf("matching: %#v", podReq)
   216  
   217  		podInfo, ok := found[podReq.podName]
   218  		if !ok {
   219  			return fmt.Errorf("no pod resources for pod %q", podReq.podName)
   220  		}
   221  		cntInfo, ok := podInfo[podReq.cntName]
   222  		if !ok {
   223  			return fmt.Errorf("no container resources for pod %q container %q", podReq.podName, podReq.cntName)
   224  		}
   225  		if podReq.RequiresCPU() {
   226  			if exclusiveCpus := podReq.CpuRequestExclusive(); exclusiveCpus != len(cntInfo.CpuIds) {
   227  				if exclusiveCpus == 0 {
   228  					return fmt.Errorf("pod %q container %q requested %d expected to be allocated CPUs from shared pool %v", podReq.podName, podReq.cntName, podReq.cpuRequest, cntInfo.CpuIds)
   229  				}
   230  				return fmt.Errorf("pod %q container %q expected %d cpus got %v", podReq.podName, podReq.cntName, exclusiveCpus, cntInfo.CpuIds)
   231  			}
   232  		}
   233  		if podReq.RequiresDevices() {
   234  			dev := findContainerDeviceByName(cntInfo.GetDevices(), podReq.resourceName)
   235  			if dev == nil {
   236  				return fmt.Errorf("pod %q container %q expected data for resource %q not found", podReq.podName, podReq.cntName, podReq.resourceName)
   237  			}
   238  			if len(dev.DeviceIds) != podReq.resourceAmount {
   239  				return fmt.Errorf("pod %q container %q resource %q expected %d items got %v", podReq.podName, podReq.cntName, podReq.resourceName, podReq.resourceAmount, dev.DeviceIds)
   240  			}
   241  		} else {
   242  			devs := cntInfo.GetDevices()
   243  			if len(devs) > 0 {
   244  				return fmt.Errorf("pod %q container %q expected no resources, got %v", podReq.podName, podReq.cntName, devs)
   245  			}
   246  		}
   247  		if cnts, ok := found[defaultTopologyUnawareResourceName]; ok {
   248  			for _, cnt := range cnts {
   249  				for _, cd := range cnt.GetDevices() {
   250  					if cd.ResourceName != defaultTopologyUnawareResourceName {
   251  						continue
   252  					}
   253  					if cd.Topology != nil {
   254  						//we expect nil topology
   255  						return fmt.Errorf("Nil topology is expected")
   256  					}
   257  				}
   258  
   259  			}
   260  		}
   261  	}
   262  	return nil
   263  }
   264  
   265  func expectPodResources(ctx context.Context, offset int, cli kubeletpodresourcesv1.PodResourcesListerClient, expected []podDesc) {
   266  	gomega.EventuallyWithOffset(1+offset, ctx, func(ctx context.Context) error {
   267  		found, err := getPodResourcesValues(ctx, cli)
   268  		if err != nil {
   269  			return err
   270  		}
   271  		return matchPodDescWithResources(expected, found)
   272  	}, time.Minute, 10*time.Second).Should(gomega.Succeed())
   273  }
   274  
   275  func filterOutDesc(descs []podDesc, name string) []podDesc {
   276  	var ret []podDesc
   277  	for _, desc := range descs {
   278  		if desc.podName == name {
   279  			continue
   280  		}
   281  		ret = append(ret, desc)
   282  	}
   283  	return ret
   284  }
   285  
   286  func podresourcesListTests(ctx context.Context, f *framework.Framework, cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData) {
   287  	var tpd *testPodData
   288  
   289  	var found podResMap
   290  	var expected []podDesc
   291  	var extra podDesc
   292  
   293  	expectedBasePods := 0 /* nothing but pods we create */
   294  	if sd != nil {
   295  		expectedBasePods = 1 // sriovdp
   296  	}
   297  
   298  	var err error
   299  	ginkgo.By("checking the output when no pods are present")
   300  	found, err = getPodResourcesValues(ctx, cli)
   301  	framework.ExpectNoError(err, "getPodResourcesValues() failed err: %v", err)
   302  	gomega.ExpectWithOffset(1, found).To(gomega.HaveLen(expectedBasePods), "base pod expectation mismatch")
   303  
   304  	tpd = newTestPodData()
   305  	ginkgo.By("checking the output when only pods which don't require resources are present")
   306  	expected = []podDesc{
   307  		{
   308  			podName: "pod-00",
   309  			cntName: "cnt-00",
   310  		},
   311  		{
   312  			podName: "pod-01",
   313  			cntName: "cnt-00",
   314  		},
   315  	}
   316  	tpd.createPodsForTest(ctx, f, expected)
   317  	expectPodResources(ctx, 1, cli, expected)
   318  	tpd.deletePodsForTest(ctx, f)
   319  
   320  	tpd = newTestPodData()
   321  	ginkgo.By("checking the output when only a subset of pods require resources")
   322  	if sd != nil {
   323  		expected = []podDesc{
   324  			{
   325  				podName: "pod-00",
   326  				cntName: "cnt-00",
   327  			},
   328  			{
   329  				podName:        "pod-01",
   330  				cntName:        "cnt-00",
   331  				resourceName:   sd.resourceName,
   332  				resourceAmount: 1,
   333  				cpuRequest:     2000,
   334  			},
   335  			{
   336  				podName:    "pod-02",
   337  				cntName:    "cnt-00",
   338  				cpuRequest: 2000,
   339  			},
   340  			{
   341  				podName:        "pod-03",
   342  				cntName:        "cnt-00",
   343  				resourceName:   sd.resourceName,
   344  				resourceAmount: 1,
   345  				cpuRequest:     1000,
   346  			},
   347  		}
   348  	} else {
   349  		expected = []podDesc{
   350  			{
   351  				podName: "pod-00",
   352  				cntName: "cnt-00",
   353  			},
   354  			{
   355  				podName:    "pod-01",
   356  				cntName:    "cnt-00",
   357  				cpuRequest: 2000,
   358  			},
   359  			{
   360  				podName:    "pod-02",
   361  				cntName:    "cnt-00",
   362  				cpuRequest: 2000,
   363  			},
   364  			{
   365  				podName:    "pod-03",
   366  				cntName:    "cnt-00",
   367  				cpuRequest: 1000,
   368  			},
   369  		}
   370  
   371  	}
   372  	tpd.createPodsForTest(ctx, f, expected)
   373  	expectPodResources(ctx, 1, cli, expected)
   374  	tpd.deletePodsForTest(ctx, f)
   375  
   376  	tpd = newTestPodData()
   377  	ginkgo.By("checking the output when creating pods which require resources between calls")
   378  	if sd != nil {
   379  		expected = []podDesc{
   380  			{
   381  				podName: "pod-00",
   382  				cntName: "cnt-00",
   383  			},
   384  			{
   385  				podName:        "pod-01",
   386  				cntName:        "cnt-00",
   387  				resourceName:   sd.resourceName,
   388  				resourceAmount: 1,
   389  				cpuRequest:     2000,
   390  			},
   391  			{
   392  				podName:    "pod-02",
   393  				cntName:    "cnt-00",
   394  				cpuRequest: 2000,
   395  			},
   396  		}
   397  	} else {
   398  		expected = []podDesc{
   399  			{
   400  				podName: "pod-00",
   401  				cntName: "cnt-00",
   402  			},
   403  			{
   404  				podName:    "pod-01",
   405  				cntName:    "cnt-00",
   406  				cpuRequest: 2000,
   407  			},
   408  			{
   409  				podName:    "pod-02",
   410  				cntName:    "cnt-00",
   411  				cpuRequest: 2000,
   412  			},
   413  		}
   414  	}
   415  
   416  	tpd.createPodsForTest(ctx, f, expected)
   417  	expectPodResources(ctx, 1, cli, expected)
   418  
   419  	if sd != nil {
   420  		extra = podDesc{
   421  			podName:        "pod-03",
   422  			cntName:        "cnt-00",
   423  			resourceName:   sd.resourceName,
   424  			resourceAmount: 1,
   425  			cpuRequest:     1000,
   426  		}
   427  	} else {
   428  		extra = podDesc{
   429  			podName:    "pod-03",
   430  			cntName:    "cnt-00",
   431  			cpuRequest: 1000,
   432  		}
   433  
   434  	}
   435  
   436  	tpd.createPodsForTest(ctx, f, []podDesc{
   437  		extra,
   438  	})
   439  
   440  	expected = append(expected, extra)
   441  	expectPodResources(ctx, 1, cli, expected)
   442  	tpd.deletePodsForTest(ctx, f)
   443  
   444  	tpd = newTestPodData()
   445  	ginkgo.By("checking the output when deleting pods which require resources between calls")
   446  
   447  	if sd != nil {
   448  		expected = []podDesc{
   449  			{
   450  				podName:    "pod-00",
   451  				cntName:    "cnt-00",
   452  				cpuRequest: 1000,
   453  			},
   454  			{
   455  				podName:        "pod-01",
   456  				cntName:        "cnt-00",
   457  				resourceName:   sd.resourceName,
   458  				resourceAmount: 1,
   459  				cpuRequest:     2000,
   460  			},
   461  			{
   462  				podName: "pod-02",
   463  				cntName: "cnt-00",
   464  			},
   465  			{
   466  				podName:        "pod-03",
   467  				cntName:        "cnt-00",
   468  				resourceName:   sd.resourceName,
   469  				resourceAmount: 1,
   470  				cpuRequest:     1000,
   471  			},
   472  		}
   473  	} else {
   474  		expected = []podDesc{
   475  			{
   476  				podName:    "pod-00",
   477  				cntName:    "cnt-00",
   478  				cpuRequest: 1000,
   479  			},
   480  			{
   481  				podName:    "pod-01",
   482  				cntName:    "cnt-00",
   483  				cpuRequest: 1000,
   484  			},
   485  			{
   486  				podName: "pod-02",
   487  				cntName: "cnt-00",
   488  			},
   489  			{
   490  				podName:    "pod-03",
   491  				cntName:    "cnt-00",
   492  				cpuRequest: 1000,
   493  			},
   494  		}
   495  	}
   496  	tpd.createPodsForTest(ctx, f, expected)
   497  	expectPodResources(ctx, 1, cli, expected)
   498  
   499  	tpd.deletePod(ctx, f, "pod-01")
   500  	expectedPostDelete := filterOutDesc(expected, "pod-01")
   501  	expectPodResources(ctx, 1, cli, expectedPostDelete)
   502  	tpd.deletePodsForTest(ctx, f)
   503  
   504  	tpd = newTestPodData()
   505  	ginkgo.By("checking the output when pods request non integral CPUs")
   506  	if sd != nil {
   507  		expected = []podDesc{
   508  			{
   509  				podName:    "pod-00",
   510  				cntName:    "cnt-00",
   511  				cpuRequest: 1500,
   512  			},
   513  			{
   514  				podName:        "pod-01",
   515  				cntName:        "cnt-00",
   516  				resourceName:   sd.resourceName,
   517  				resourceAmount: 1,
   518  				cpuRequest:     1500,
   519  			},
   520  		}
   521  	} else {
   522  		expected = []podDesc{
   523  			{
   524  				podName:    "pod-00",
   525  				cntName:    "cnt-00",
   526  				cpuRequest: 1500,
   527  			},
   528  		}
   529  
   530  	}
   531  	tpd.createPodsForTest(ctx, f, expected)
   532  	expectPodResources(ctx, 1, cli, expected)
   533  	tpd.deletePodsForTest(ctx, f)
   534  
   535  }
   536  
   537  func podresourcesGetAllocatableResourcesTests(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData, onlineCPUs, reservedSystemCPUs cpuset.CPUSet) {
   538  	ginkgo.By("checking the devices known to the kubelet")
   539  	resp, err := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
   540  	framework.ExpectNoErrorWithOffset(1, err)
   541  	devs := resp.GetDevices()
   542  	var cpus []int
   543  	for _, cpuid := range resp.GetCpuIds() {
   544  		cpus = append(cpus, int(cpuid))
   545  	}
   546  	allocatableCPUs := cpuset.New(cpus...)
   547  
   548  	if onlineCPUs.Size() == 0 {
   549  		ginkgo.By("expecting no CPUs reported")
   550  		gomega.ExpectWithOffset(1, onlineCPUs.Size()).To(gomega.Equal(reservedSystemCPUs.Size()), "with no online CPUs, no CPUs should be reserved")
   551  	} else {
   552  		ginkgo.By(fmt.Sprintf("expecting online CPUs reported - online=%v (%d) reserved=%v (%d)", onlineCPUs, onlineCPUs.Size(), reservedSystemCPUs, reservedSystemCPUs.Size()))
   553  		if reservedSystemCPUs.Size() > onlineCPUs.Size() {
   554  			ginkgo.Fail("more reserved CPUs than online")
   555  		}
   556  		expectedCPUs := onlineCPUs.Difference(reservedSystemCPUs)
   557  
   558  		ginkgo.By(fmt.Sprintf("expecting CPUs '%v'='%v'", allocatableCPUs, expectedCPUs))
   559  		gomega.ExpectWithOffset(1, allocatableCPUs.Equals(expectedCPUs)).To(gomega.BeTrue(), "mismatch expecting CPUs")
   560  	}
   561  
   562  	if sd == nil { // no devices in the environment, so expect no devices
   563  		ginkgo.By("expecting no devices reported")
   564  		gomega.ExpectWithOffset(1, devs).To(gomega.BeEmpty(), fmt.Sprintf("got unexpected devices %#v", devs))
   565  		return
   566  	}
   567  
   568  	ginkgo.By(fmt.Sprintf("expecting some %q devices reported", sd.resourceName))
   569  	gomega.ExpectWithOffset(1, devs).ToNot(gomega.BeEmpty())
   570  	for _, dev := range devs {
   571  		gomega.Expect(dev.ResourceName).To(gomega.Equal(sd.resourceName))
   572  		gomega.ExpectWithOffset(1, dev.DeviceIds).ToNot(gomega.BeEmpty())
   573  	}
   574  }
   575  
   576  func podresourcesGetTests(ctx context.Context, f *framework.Framework, cli kubeletpodresourcesv1.PodResourcesListerClient) {
   577  	//var err error
   578  	ginkgo.By("checking the output when no pods are present")
   579  	expected := []podDesc{}
   580  	resp, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "test", PodNamespace: f.Namespace.Name})
   581  	podResourceList := []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
   582  	gomega.Expect(err).To(gomega.HaveOccurred(), "pod not found")
   583  	res := convertToMap(podResourceList)
   584  	err = matchPodDescWithResources(expected, res)
   585  	framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
   586  
   587  	tpd := newTestPodData()
   588  	ginkgo.By("checking the output when only pods which don't require resources are present")
   589  	expected = []podDesc{
   590  		{
   591  			podName: "pod-00",
   592  			cntName: "cnt-00",
   593  		},
   594  	}
   595  	tpd.createPodsForTest(ctx, f, expected)
   596  	resp, err = cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-00", PodNamespace: f.Namespace.Name})
   597  	framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-00")
   598  	podResourceList = []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
   599  	res = convertToMap(podResourceList)
   600  	err = matchPodDescWithResources(expected, res)
   601  	framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
   602  	tpd.deletePodsForTest(ctx, f)
   603  
   604  	tpd = newTestPodData()
   605  	ginkgo.By("checking the output when only pod require CPU")
   606  	expected = []podDesc{
   607  		{
   608  			podName:    "pod-01",
   609  			cntName:    "cnt-00",
   610  			cpuRequest: 2000,
   611  		},
   612  	}
   613  	tpd.createPodsForTest(ctx, f, expected)
   614  	resp, err = cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-01", PodNamespace: f.Namespace.Name})
   615  	framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-01")
   616  	podResourceList = []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
   617  	res = convertToMap(podResourceList)
   618  	err = matchPodDescWithResources(expected, res)
   619  	framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
   620  	tpd.deletePodsForTest(ctx, f)
   621  }
   622  
   623  // Serial because the test updates kubelet configuration.
   624  var _ = SIGDescribe("POD Resources", framework.WithSerial(), feature.PodResources, nodefeature.PodResources, func() {
   625  	f := framework.NewDefaultFramework("podresources-test")
   626  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
   627  
   628  	reservedSystemCPUs := cpuset.New(1)
   629  
   630  	ginkgo.Context("with SRIOV devices in the system", func() {
   631  		ginkgo.BeforeEach(func() {
   632  			requireSRIOVDevices()
   633  		})
   634  
   635  		ginkgo.Context("with CPU manager Static policy", func() {
   636  			ginkgo.BeforeEach(func(ctx context.Context) {
   637  				// this is a very rough check. We just want to rule out system that does NOT have enough resources
   638  				_, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
   639  
   640  				if cpuAlloc < minCoreCount {
   641  					e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
   642  				}
   643  			})
   644  
   645  			// empty context to apply kubelet config changes
   646  			ginkgo.Context("", func() {
   647  				tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   648  					// Set the CPU Manager policy to static.
   649  					initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
   650  
   651  					// Set the CPU Manager reconcile period to 1 second.
   652  					initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
   653  
   654  					cpus := reservedSystemCPUs.String()
   655  					framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus)
   656  					initialConfig.ReservedSystemCPUs = cpus
   657  				})
   658  
   659  				ginkgo.It("should return the expected responses", func(ctx context.Context) {
   660  					onlineCPUs, err := getOnlineCPUs()
   661  					framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
   662  
   663  					configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
   664  					sd := setupSRIOVConfigOrFail(ctx, f, configMap)
   665  					ginkgo.DeferCleanup(teardownSRIOVConfigOrFail, f, sd)
   666  
   667  					waitForSRIOVResources(ctx, f, sd)
   668  
   669  					endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   670  					framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
   671  
   672  					cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   673  					framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
   674  					defer conn.Close()
   675  
   676  					waitForSRIOVResources(ctx, f, sd)
   677  
   678  					ginkgo.By("checking List()")
   679  					podresourcesListTests(ctx, f, cli, sd)
   680  					ginkgo.By("checking GetAllocatableResources()")
   681  					podresourcesGetAllocatableResourcesTests(ctx, cli, sd, onlineCPUs, reservedSystemCPUs)
   682  				})
   683  			})
   684  		})
   685  
   686  		ginkgo.Context("with CPU manager None policy", func() {
   687  			ginkgo.It("should return the expected responses", func(ctx context.Context) {
   688  				// current default is "none" policy - no need to restart the kubelet
   689  
   690  				requireSRIOVDevices()
   691  
   692  				configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
   693  				sd := setupSRIOVConfigOrFail(ctx, f, configMap)
   694  				ginkgo.DeferCleanup(teardownSRIOVConfigOrFail, f, sd)
   695  
   696  				waitForSRIOVResources(ctx, f, sd)
   697  
   698  				endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   699  				framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
   700  
   701  				cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   702  				framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
   703  				defer conn.Close()
   704  
   705  				waitForSRIOVResources(ctx, f, sd)
   706  
   707  				// intentionally passing empty cpuset instead of onlineCPUs because with none policy
   708  				// we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static
   709  				podresourcesGetAllocatableResourcesTests(ctx, cli, sd, cpuset.CPUSet{}, cpuset.CPUSet{})
   710  			})
   711  		})
   712  	})
   713  
   714  	ginkgo.Context("without SRIOV devices in the system", func() {
   715  		ginkgo.BeforeEach(func() {
   716  			requireLackOfSRIOVDevices()
   717  		})
   718  
   719  		ginkgo.Context("with CPU manager Static policy", func() {
   720  			ginkgo.BeforeEach(func(ctx context.Context) {
   721  				// this is a very rough check. We just want to rule out system that does NOT have enough resources
   722  				_, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
   723  
   724  				if cpuAlloc < minCoreCount {
   725  					e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
   726  				}
   727  			})
   728  
   729  			// empty context to apply kubelet config changes
   730  			ginkgo.Context("", func() {
   731  				tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   732  					// Set the CPU Manager policy to static.
   733  					initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
   734  
   735  					// Set the CPU Manager reconcile period to 1 second.
   736  					initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
   737  
   738  					cpus := reservedSystemCPUs.String()
   739  					framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus)
   740  					initialConfig.ReservedSystemCPUs = cpus
   741  					if initialConfig.FeatureGates == nil {
   742  						initialConfig.FeatureGates = make(map[string]bool)
   743  					}
   744  					initialConfig.FeatureGates[string(kubefeatures.KubeletPodResourcesGet)] = true
   745  				})
   746  
   747  				ginkgo.It("should return the expected responses", func(ctx context.Context) {
   748  					onlineCPUs, err := getOnlineCPUs()
   749  					framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
   750  
   751  					endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   752  					framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
   753  
   754  					cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   755  					framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
   756  					defer conn.Close()
   757  
   758  					podresourcesListTests(ctx, f, cli, nil)
   759  					podresourcesGetAllocatableResourcesTests(ctx, cli, nil, onlineCPUs, reservedSystemCPUs)
   760  					podresourcesGetTests(ctx, f, cli)
   761  				})
   762  				ginkgo.It("should account for resources of pods in terminal phase", func(ctx context.Context) {
   763  					pd := podDesc{
   764  						cntName:    "e2e-test-cnt",
   765  						podName:    "e2e-test-pod",
   766  						cpuRequest: 1000,
   767  					}
   768  					pod := makePodResourcesTestPod(pd)
   769  					pod.Spec.Containers[0].Command = []string{"sh", "-c", "/bin/true"}
   770  					pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
   771  					defer e2epod.NewPodClient(f).DeleteSync(ctx, pod.Name, metav1.DeleteOptions{}, time.Minute)
   772  					err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "Pod Succeeded", time.Minute*2, testutils.PodSucceeded)
   773  					framework.ExpectNoError(err)
   774  					endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   775  					framework.ExpectNoError(err)
   776  					cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   777  					framework.ExpectNoError(err)
   778  					defer conn.Close()
   779  					// although the pod moved into terminal state, PodResourcesAPI still list its cpus
   780  					expectPodResources(ctx, 1, cli, []podDesc{pd})
   781  
   782  				})
   783  			})
   784  		})
   785  
   786  		ginkgo.Context("with CPU manager None policy", func() {
   787  			ginkgo.It("should return the expected responses", func(ctx context.Context) {
   788  				endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   789  				framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
   790  
   791  				cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   792  				framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
   793  				defer conn.Close()
   794  
   795  				// intentionally passing empty cpuset instead of onlineCPUs because with none policy
   796  				// we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static
   797  				podresourcesGetAllocatableResourcesTests(ctx, cli, nil, cpuset.CPUSet{}, cpuset.CPUSet{})
   798  			})
   799  		})
   800  
   801  		ginkgo.Context("with disabled KubeletPodResourcesGet feature gate", func() {
   802  
   803  			ginkgo.It("should return the expected error with the feature gate disabled", func(ctx context.Context) {
   804  				endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   805  				framework.ExpectNoError(err, "LocalEndpoint() faild err %v", err)
   806  
   807  				cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   808  				framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
   809  				defer conn.Close()
   810  
   811  				ginkgo.By("checking Get fail if the feature gate is not enabled")
   812  				getRes, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "test", PodNamespace: f.Namespace.Name})
   813  				framework.Logf("Get result: %v, err: %v", getRes, err)
   814  				gomega.Expect(err).To(gomega.HaveOccurred(), "With feature gate disabled, the call must fail")
   815  			})
   816  		})
   817  	})
   818  
   819  	ginkgo.Context("with a topology-unaware device plugin, which reports resources w/o hardware topology", func() {
   820  		ginkgo.Context("with CPU manager Static policy", func() {
   821  			ginkgo.BeforeEach(func(ctx context.Context) {
   822  				// this is a very rough check. We just want to rule out system that does NOT have enough resources
   823  				_, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
   824  
   825  				if cpuAlloc < minCoreCount {
   826  					e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
   827  				}
   828  			})
   829  
   830  			// empty context to apply kubelet config changes
   831  			ginkgo.Context("", func() {
   832  				tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   833  					// Set the CPU Manager policy to static.
   834  					initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
   835  
   836  					// Set the CPU Manager reconcile period to 1 second.
   837  					initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
   838  
   839  					cpus := reservedSystemCPUs.String()
   840  					framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus)
   841  					initialConfig.ReservedSystemCPUs = cpus
   842  				})
   843  
   844  				ginkgo.It("should return proper podresources the same as before the restart of kubelet", func(ctx context.Context) {
   845  					dpPod := setupSampleDevicePluginOrFail(ctx, f)
   846  					ginkgo.DeferCleanup(teardownSampleDevicePluginOrFail, f, dpPod)
   847  
   848  					waitForTopologyUnawareResources(ctx, f)
   849  
   850  					endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   851  					framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
   852  
   853  					cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   854  					framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
   855  					defer conn.Close()
   856  
   857  					ginkgo.By("checking List and resources topology unaware resource should be without topology")
   858  
   859  					allocatableResponse, _ := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
   860  					for _, dev := range allocatableResponse.GetDevices() {
   861  						if dev.ResourceName != defaultTopologyUnawareResourceName {
   862  							continue
   863  						}
   864  						gomega.Expect(dev.Topology).To(gomega.BeNil(), "Topology is expected to be empty for topology unaware resources")
   865  					}
   866  
   867  					desc := podDesc{
   868  						podName:        "pod-01",
   869  						cntName:        "cnt-01",
   870  						resourceName:   defaultTopologyUnawareResourceName,
   871  						resourceAmount: 1,
   872  						cpuRequest:     1000,
   873  					}
   874  
   875  					tpd := newTestPodData()
   876  					tpd.createPodsForTest(ctx, f, []podDesc{
   877  						desc,
   878  					})
   879  
   880  					expectPodResources(ctx, 1, cli, []podDesc{desc})
   881  
   882  					ginkgo.By("Restarting Kubelet")
   883  					restartKubelet(true)
   884  
   885  					// we need to wait for the node to be reported ready before we can safely query
   886  					// the podresources endpoint again. Otherwise we will have false negatives.
   887  					ginkgo.By("Wait for node to be ready")
   888  					waitForTopologyUnawareResources(ctx, f)
   889  
   890  					expectPodResources(ctx, 1, cli, []podDesc{desc})
   891  					tpd.deletePodsForTest(ctx, f)
   892  				})
   893  			})
   894  		})
   895  	})
   896  
   897  	f.Context("when querying /metrics", f.WithNodeConformance(), func() {
   898  		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
   899  			if initialConfig.FeatureGates == nil {
   900  				initialConfig.FeatureGates = make(map[string]bool)
   901  			}
   902  			initialConfig.FeatureGates[string(kubefeatures.KubeletPodResourcesGet)] = true
   903  		})
   904  		ginkgo.BeforeEach(func(ctx context.Context) {
   905  			// ensure APIs have been called at least once
   906  			endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   907  			framework.ExpectNoError(err, "LocalEndpoint() failed err %v", err)
   908  
   909  			cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   910  			framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
   911  			defer conn.Close()
   912  
   913  			_, err = cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
   914  			framework.ExpectNoError(err, "List() failed err %v", err)
   915  
   916  			_, err = cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
   917  			framework.ExpectNoError(err, "GetAllocatableResources() failed err %v", err)
   918  
   919  			desc := podDesc{
   920  				podName: "pod-01",
   921  				cntName: "cnt-01",
   922  			}
   923  			tpd := newTestPodData()
   924  			tpd.createPodsForTest(ctx, f, []podDesc{
   925  				desc,
   926  			})
   927  			expectPodResources(ctx, 1, cli, []podDesc{desc})
   928  
   929  			expected := []podDesc{}
   930  			resp, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-01", PodNamespace: f.Namespace.Name})
   931  			framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-01")
   932  			podResourceList := []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
   933  			res := convertToMap(podResourceList)
   934  			err = matchPodDescWithResources(expected, res)
   935  			framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
   936  			tpd.deletePodsForTest(ctx, f)
   937  		})
   938  
   939  		ginkgo.It("should report the values for the podresources metrics", func(ctx context.Context) {
   940  			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
   941  			// being [Serial], we can also assume noone else but us is running pods.
   942  			ginkgo.By("Checking the value of the podresources metrics")
   943  
   944  			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
   945  				"kubelet_pod_resources_endpoint_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   946  					"": timelessSampleAtLeast(1),
   947  				}),
   948  				"kubelet_pod_resources_endpoint_requests_list": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   949  					"": timelessSampleAtLeast(1),
   950  				}),
   951  				"kubelet_pod_resources_endpoint_requests_get_allocatable": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   952  					"": timelessSampleAtLeast(1),
   953  				}),
   954  				"kubelet_pod_resources_endpoint_requests_get": gstruct.MatchAllElements(nodeID, gstruct.Elements{
   955  					"": timelessSampleAtLeast(1),
   956  				}),
   957  				// not checking errors: the calls don't have non-catastrophic (e.g. out of memory) error conditions yet.
   958  			})
   959  
   960  			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
   961  			gomega.Eventually(ctx, getPodResourcesMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   962  			ginkgo.By("Ensuring the metrics match the expectations a few more times")
   963  			gomega.Consistently(ctx, getPodResourcesMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
   964  		})
   965  	})
   966  
   967  	ginkgo.Context("with the builtin rate limit values", func() {
   968  		ginkgo.It("should hit throttling when calling podresources List in a tight loop", func(ctx context.Context) {
   969  			// ensure APIs have been called at least once
   970  			endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   971  			framework.ExpectNoError(err, "LocalEndpoint() failed err %v", err)
   972  
   973  			ginkgo.By("Connecting to the kubelet endpoint")
   974  			cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   975  			framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
   976  			defer conn.Close()
   977  
   978  			tries := podresources.DefaultQPS * 2 // This should also be greater than DefaultBurstTokens
   979  			errs := []error{}
   980  
   981  			ginkgo.By(fmt.Sprintf("Issuing %d List() calls in a tight loop", tries))
   982  			startTime := time.Now()
   983  			for try := 0; try < tries; try++ {
   984  				_, err = cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
   985  				errs = append(errs, err)
   986  			}
   987  			elapsed := time.Since(startTime)
   988  
   989  			ginkgo.By(fmt.Sprintf("Checking return codes for %d List() calls in %v", tries, elapsed))
   990  
   991  			framework.ExpectNoError(errs[0], "the first List() call unexpectedly failed with %v", errs[0])
   992  			// we would expect (burst) successes and then (tries-burst) errors on a clean test environment running with
   993  			// enough CPU power. CI is usually harsher. So we relax constraints, expecting at least _a_ failure, while
   994  			// we are likely to get much more. But we can't predict yet how more we should expect, so we prefer to relax
   995  			// constraints than to risk flakes at this stage.
   996  			errLimitExceededCount := 0
   997  			for _, err := range errs[1:] {
   998  				if errors.Is(err, apisgrpc.ErrorLimitExceeded) {
   999  					errLimitExceededCount++
  1000  				}
  1001  			}
  1002  			gomega.Expect(errLimitExceededCount).ToNot(gomega.BeZero(), "never hit the rate limit trying %d calls in %v", tries, elapsed)
  1003  
  1004  			framework.Logf("got %d/%d rate limit errors, at least one needed, the more the better", errLimitExceededCount, tries)
  1005  
  1006  			// this is not needed for this test. We're done. But we need to play nice with *other* tests which may run just after,
  1007  			// and which need to query the API. If they run "too fast", they can still be throttled because the throttling period
  1008  			// is not exhausted yet, yielding false negatives, leading to flakes.
  1009  			// We can't reset the period for the rate limit, we just wait "long enough" to make sure we absorb the burst
  1010  			// and other queries are not rejected because happening to soon
  1011  			ginkgo.By("Cooling down to reset the podresources API rate limit")
  1012  			time.Sleep(5 * time.Second)
  1013  		})
  1014  	})
  1015  })
  1016  
  1017  func requireLackOfSRIOVDevices() {
  1018  	if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount > 0 {
  1019  		e2eskipper.Skipf("this test is meant to run on a system with no configured VF from SRIOV device")
  1020  	}
  1021  }
  1022  
  1023  func getOnlineCPUs() (cpuset.CPUSet, error) {
  1024  	onlineCPUList, err := os.ReadFile("/sys/devices/system/cpu/online")
  1025  	if err != nil {
  1026  		return cpuset.CPUSet{}, err
  1027  	}
  1028  	return cpuset.Parse(strings.TrimSpace(string(onlineCPUList)))
  1029  }
  1030  
  1031  func setupSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework) *v1.Pod {
  1032  	e2enode.WaitForNodeToBeReady(ctx, f.ClientSet, framework.TestContext.NodeName, 5*time.Minute)
  1033  
  1034  	dp := getSampleDevicePluginPod(kubeletdevicepluginv1beta1.DevicePluginPath)
  1035  	dp.Spec.NodeName = framework.TestContext.NodeName
  1036  
  1037  	ginkgo.By("Create the sample device plugin pod")
  1038  
  1039  	dpPod := e2epod.NewPodClient(f).CreateSync(ctx, dp)
  1040  
  1041  	err := e2epod.WaitForPodCondition(ctx, f.ClientSet, dpPod.Namespace, dpPod.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
  1042  	if err != nil {
  1043  		framework.Logf("Sample Device Pod %v took too long to enter running/ready: %v", dp.Name, err)
  1044  	}
  1045  	framework.ExpectNoError(err, "WaitForPodCondition() failed err: %v", err)
  1046  
  1047  	return dpPod
  1048  }
  1049  
  1050  func teardownSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework, pod *v1.Pod) {
  1051  	gp := int64(0)
  1052  	deleteOptions := metav1.DeleteOptions{
  1053  		GracePeriodSeconds: &gp,
  1054  	}
  1055  	ginkgo.By(fmt.Sprintf("Delete sample device plugin pod %s/%s", pod.Namespace, pod.Name))
  1056  	err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, deleteOptions)
  1057  
  1058  	framework.ExpectNoError(err, "Failed to delete Pod %v in Namspace %v", pod.Name, pod.Namespace)
  1059  	waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
  1060  }
  1061  
  1062  func waitForTopologyUnawareResources(ctx context.Context, f *framework.Framework) {
  1063  	ginkgo.By(fmt.Sprintf("Waiting for %q resources to become available on the local node", defaultTopologyUnawareResourceName))
  1064  
  1065  	gomega.Eventually(ctx, func(ctx context.Context) bool {
  1066  		node := getLocalNode(ctx, f)
  1067  		resourceAmount := CountSampleDeviceAllocatable(node)
  1068  		return resourceAmount > 0
  1069  	}, 2*time.Minute, framework.Poll).Should(gomega.BeTrue())
  1070  }
  1071  
  1072  func getPodResourcesMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
  1073  	// we are running out of good names, so we need to be unnecessarily specific to avoid clashes
  1074  	ginkgo.By("getting Pod Resources metrics from the metrics API")
  1075  	return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics")
  1076  }
  1077  
  1078  func timelessSampleAtLeast(lower interface{}) types.GomegaMatcher {
  1079  	return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
  1080  		// We already check Metric when matching the Id
  1081  		"Metric":    gstruct.Ignore(),
  1082  		"Value":     gomega.BeNumerically(">=", lower),
  1083  		"Timestamp": gstruct.Ignore(),
  1084  		"Histogram": gstruct.Ignore(),
  1085  	}))
  1086  }