k8s.io/kubernetes@v1.29.3/test/e2e_node/device_manager_test.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"regexp"
    26  	"sort"
    27  	"strings"
    28  	"time"
    29  
    30  	v1 "k8s.io/api/core/v1"
    31  	"k8s.io/apimachinery/pkg/api/resource"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/util/uuid"
    34  	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
    35  	"k8s.io/klog/v2"
    36  	kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
    37  	"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
    38  	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
    39  	"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
    40  	"k8s.io/kubernetes/pkg/kubelet/util"
    41  	admissionapi "k8s.io/pod-security-admission/api"
    42  
    43  	"k8s.io/kubernetes/test/e2e/feature"
    44  	"k8s.io/kubernetes/test/e2e/framework"
    45  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    46  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    47  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    48  	e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
    49  	"k8s.io/kubernetes/test/e2e/nodefeature"
    50  	testutils "k8s.io/kubernetes/test/utils"
    51  
    52  	"github.com/onsi/ginkgo/v2"
    53  	"github.com/onsi/gomega"
    54  	"github.com/onsi/gomega/gcustom"
    55  	"github.com/onsi/gomega/types"
    56  )
    57  
    58  const (
    59  	devicePluginDir = "/var/lib/kubelet/device-plugins"
    60  	checkpointName  = "kubelet_internal_checkpoint"
    61  )
    62  
    63  // Serial because the test updates kubelet configuration.
    64  var _ = SIGDescribe("Device Manager", framework.WithSerial(), feature.DeviceManager, nodefeature.DeviceManager, func() {
    65  	checkpointFullPath := filepath.Join(devicePluginDir, checkpointName)
    66  	f := framework.NewDefaultFramework("devicemanager-test")
    67  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    68  
    69  	ginkgo.Context("With SRIOV devices in the system", func() {
    70  		// this test wants to reproduce what happened in https://github.com/kubernetes/kubernetes/issues/102880
    71  		ginkgo.It("should be able to recover V1 (aka pre-1.20) checkpoint data and reject pods before device re-registration", func(ctx context.Context) {
    72  			if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount == 0 {
    73  				e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
    74  			}
    75  
    76  			configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
    77  			sd := setupSRIOVConfigOrFail(ctx, f, configMap)
    78  
    79  			waitForSRIOVResources(ctx, f, sd)
    80  
    81  			cntName := "gu-container"
    82  			// we create and delete a pod to make sure the internal device manager state contains a pod allocation
    83  			ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sd.resourceName))
    84  			var initCtnAttrs []tmCtnAttribute
    85  			ctnAttrs := []tmCtnAttribute{
    86  				{
    87  					ctnName:       cntName,
    88  					cpuRequest:    "1000m",
    89  					cpuLimit:      "1000m",
    90  					deviceName:    sd.resourceName,
    91  					deviceRequest: "1",
    92  					deviceLimit:   "1",
    93  				},
    94  			}
    95  
    96  			podName := "gu-pod-rec-pre-1"
    97  			framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
    98  			pod := makeTopologyManagerTestPod(podName, ctnAttrs, initCtnAttrs)
    99  			pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
   100  
   101  			// now we need to simulate a node drain, so we remove all the pods, including the sriov device plugin.
   102  
   103  			ginkgo.By("deleting the pod")
   104  			// note we delete right now because we know the current implementation of devicemanager will NOT
   105  			// clean up on pod deletion. When this changes, the deletion needs to be done after the test is done.
   106  			deletePodSyncByName(ctx, f, pod.Name)
   107  			waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
   108  
   109  			ginkgo.By("teardown the sriov device plugin")
   110  			// since we will NOT be recreating the plugin, we clean up everything now
   111  			teardownSRIOVConfigOrFail(ctx, f, sd)
   112  
   113  			ginkgo.By("stopping the kubelet")
   114  			killKubelet("SIGSTOP")
   115  
   116  			ginkgo.By("rewriting the kubelet checkpoint file as v1")
   117  			err := rewriteCheckpointAsV1(devicePluginDir, checkpointName)
   118  			// make sure we remove any leftovers
   119  			defer os.Remove(checkpointFullPath)
   120  			framework.ExpectNoError(err)
   121  
   122  			// this mimics a kubelet restart after the upgrade
   123  			// TODO: is SIGTERM (less brutal) good enough?
   124  			ginkgo.By("killing the kubelet")
   125  			killKubelet("SIGKILL")
   126  
   127  			ginkgo.By("waiting for the kubelet to be ready again")
   128  			// Wait for the Kubelet to be ready.
   129  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   130  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   131  				framework.ExpectNoError(err)
   132  				return nodes == 1
   133  			}, time.Minute, time.Second).Should(gomega.BeTrue())
   134  
   135  			// note we DO NOT start the sriov device plugin. This is intentional.
   136  			// issue#102880 reproduces because of a race on startup caused by corrupted device manager
   137  			// state which leads to v1.Node object not updated on apiserver.
   138  			// So to hit the issue we need to receive the pod *before* the device plugin registers itself.
   139  			// The simplest and safest way to reproduce is just avoid to run the device plugin again
   140  
   141  			podName = "gu-pod-rec-post-2"
   142  			framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
   143  			pod = makeTopologyManagerTestPod(podName, ctnAttrs, initCtnAttrs)
   144  
   145  			pod = e2epod.NewPodClient(f).Create(ctx, pod)
   146  			err = e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) {
   147  				if pod.Status.Phase != v1.PodPending {
   148  					return true, nil
   149  				}
   150  				return false, nil
   151  			})
   152  			framework.ExpectNoError(err)
   153  			pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{})
   154  			framework.ExpectNoError(err)
   155  
   156  			if pod.Status.Phase != v1.PodFailed {
   157  				framework.Failf("pod %s not failed: %v", pod.Name, pod.Status)
   158  			}
   159  
   160  			framework.Logf("checking pod %s status reason (%s)", pod.Name, pod.Status.Reason)
   161  			if !isUnexpectedAdmissionError(pod) {
   162  				framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason)
   163  			}
   164  
   165  			deletePodSyncByName(ctx, f, pod.Name)
   166  		})
   167  
   168  		ginkgo.It("should be able to recover V1 (aka pre-1.20) checkpoint data and update topology info on device re-registration", func(ctx context.Context) {
   169  			if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount == 0 {
   170  				e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
   171  			}
   172  
   173  			endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
   174  			framework.ExpectNoError(err)
   175  
   176  			configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
   177  
   178  			sd := setupSRIOVConfigOrFail(ctx, f, configMap)
   179  			waitForSRIOVResources(ctx, f, sd)
   180  
   181  			cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   182  			framework.ExpectNoError(err)
   183  
   184  			resp, err := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
   185  			conn.Close()
   186  			framework.ExpectNoError(err)
   187  
   188  			suitableDevs := 0
   189  			for _, dev := range resp.GetDevices() {
   190  				for _, node := range dev.GetTopology().GetNodes() {
   191  					if node.GetID() != 0 {
   192  						suitableDevs++
   193  					}
   194  				}
   195  			}
   196  			if suitableDevs == 0 {
   197  				teardownSRIOVConfigOrFail(ctx, f, sd)
   198  				e2eskipper.Skipf("no devices found on NUMA Cell other than 0")
   199  			}
   200  
   201  			cntName := "gu-container"
   202  			// we create and delete a pod to make sure the internal device manager state contains a pod allocation
   203  			ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sd.resourceName))
   204  			var initCtnAttrs []tmCtnAttribute
   205  			ctnAttrs := []tmCtnAttribute{
   206  				{
   207  					ctnName:       cntName,
   208  					cpuRequest:    "1000m",
   209  					cpuLimit:      "1000m",
   210  					deviceName:    sd.resourceName,
   211  					deviceRequest: "1",
   212  					deviceLimit:   "1",
   213  				},
   214  			}
   215  
   216  			podName := "gu-pod-rec-pre-1"
   217  			framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
   218  			pod := makeTopologyManagerTestPod(podName, ctnAttrs, initCtnAttrs)
   219  			pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
   220  
   221  			// now we need to simulate a node drain, so we remove all the pods, including the sriov device plugin.
   222  
   223  			ginkgo.By("deleting the pod")
   224  			// note we delete right now because we know the current implementation of devicemanager will NOT
   225  			// clean up on pod deletion. When this changes, the deletion needs to be done after the test is done.
   226  			deletePodSyncByName(ctx, f, pod.Name)
   227  			waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
   228  
   229  			ginkgo.By("teardown the sriov device plugin")
   230  			// no need to delete the config now (speed up later)
   231  			deleteSRIOVPodOrFail(ctx, f, sd)
   232  
   233  			ginkgo.By("stopping the kubelet")
   234  			killKubelet("SIGSTOP")
   235  
   236  			ginkgo.By("rewriting the kubelet checkpoint file as v1")
   237  			err = rewriteCheckpointAsV1(devicePluginDir, checkpointName)
   238  			// make sure we remove any leftovers
   239  			defer os.Remove(checkpointFullPath)
   240  			framework.ExpectNoError(err)
   241  
   242  			// this mimics a kubelet restart after the upgrade
   243  			// TODO: is SIGTERM (less brutal) good enough?
   244  			ginkgo.By("killing the kubelet")
   245  			killKubelet("SIGKILL")
   246  
   247  			ginkgo.By("waiting for the kubelet to be ready again")
   248  			// Wait for the Kubelet to be ready.
   249  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   250  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   251  				framework.ExpectNoError(err)
   252  				return nodes == 1
   253  			}, time.Minute, time.Second).Should(gomega.BeTrue())
   254  
   255  			sd2 := &sriovData{
   256  				configMap:      sd.configMap,
   257  				serviceAccount: sd.serviceAccount,
   258  			}
   259  			sd2.pod = createSRIOVPodOrFail(ctx, f)
   260  			ginkgo.DeferCleanup(teardownSRIOVConfigOrFail, f, sd2)
   261  			waitForSRIOVResources(ctx, f, sd2)
   262  
   263  			compareSRIOVResources(sd, sd2)
   264  
   265  			cli, conn, err = podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
   266  			framework.ExpectNoError(err)
   267  			defer conn.Close()
   268  
   269  			resp2, err := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
   270  			framework.ExpectNoError(err)
   271  
   272  			cntDevs := stringifyContainerDevices(resp.GetDevices())
   273  			cntDevs2 := stringifyContainerDevices(resp2.GetDevices())
   274  			if cntDevs != cntDevs2 {
   275  				framework.Failf("different allocatable resources expected %v got %v", cntDevs, cntDevs2)
   276  			}
   277  		})
   278  
   279  	})
   280  
   281  	/*
   282  		This end to end test is to simulate a scenario where after kubelet restart/node
   283  		reboot application pods requesting devices appear before the device plugin
   284  		pod exposing those devices as resources.
   285  
   286  		The happy path is where after node reboot/ kubelet restart, the device plugin pod
   287  		appears before the application pod. This PR and this e2e test
   288  		aims to tackle the scenario where device plugin either does not appear first
   289  		or doesn't get the chance to re-register itself.
   290  
   291  		Since there is no way of controlling the order in which the pods appear after
   292  		kubelet restart/node reboot, we can't guarantee that the application pod
   293  		recovers before device plugin pod (the scenario we want to exercise here).
   294  		If the device plugin pod is recovered before the test pod, we still can
   295  		meaningfully reproduce the scenario by NOT sending the registration command.
   296  		To do so sample device plugin is enhanced. For implementation details, refer to:
   297  		`test/images/sample-device-plugin/sampledeviceplugin.go`. This enhancement
   298  		allows auto-registration of the plugin to be controlled with the help of an environment
   299  		variable: REGISTER_CONTROL_FILE. By default this environment variable is not present
   300  		and the device plugin autoregisters to kubelet. For this e2e test, we use sample device
   301  		plugin spec with REGISTER_CONTROL_FILE=/var/lib/kubelet/device-plugins/sample/registration
   302  		to allow manual registeration of the plugin to allow an application pod (requesting devices)
   303  		to successfully run on the node followed by kubelet restart where device plugin doesn't
   304  		register and the application pod fails with admission error.
   305  
   306  		   Breakdown of the steps implemented as part of this e2e test is as follows:
   307  		   1. Create a file `registration` at path `/var/lib/kubelet/device-plugins/sample/`
   308  		   2. Create sample device plugin with an environment variable with
   309  		      `REGISTER_CONTROL_FILE=/var/lib/kubelet/device-plugins/sample/registration` that
   310  			  waits for a client to delete the control file.
   311  		   3. Trigger plugin registeration by deleting the abovementioned directory.
   312  		   4. Create a test pod requesting devices exposed by the device plugin.
   313  		   5. Stop kubelet.
   314  		   6. Remove pods using CRI to ensure new pods are created after kubelet restart.
   315  		   7. Restart kubelet.
   316  		   8. Wait for the sample device plugin pod to be running. In this case,
   317  		      the registration is not triggered.
   318  		   9. Ensure that resource capacity/allocatable exported by the device plugin is zero.
   319  		   10. The test pod should fail with `UnexpectedAdmissionError`
   320  		   11. Delete the test pod.
   321  		   12. Delete the sample device plugin pod.
   322  		   13. Remove `/var/lib/kubelet/device-plugins/sample/` and its content, the directory created to control registration
   323  	*/
   324  	f.Context("With sample device plugin", f.WithSerial(), f.WithDisruptive(), func() {
   325  		var deviceCount int = 2
   326  		var devicePluginPod *v1.Pod
   327  		var triggerPathFile, triggerPathDir string
   328  
   329  		// this test wants to reproduce what happened in https://github.com/kubernetes/kubernetes/issues/109595
   330  		ginkgo.BeforeEach(func(ctx context.Context) {
   331  			ginkgo.By("Wait for node to be ready")
   332  			gomega.Eventually(ctx, e2enode.TotalReady).
   333  				WithArguments(f.ClientSet).
   334  				WithTimeout(time.Minute).
   335  				Should(gomega.BeEquivalentTo(1))
   336  
   337  			ginkgo.By("Setting up the directory and file for controlling registration")
   338  			triggerPathDir = filepath.Join(devicePluginDir, "sample")
   339  			if _, err := os.Stat(triggerPathDir); errors.Is(err, os.ErrNotExist) {
   340  				err := os.Mkdir(triggerPathDir, os.ModePerm)
   341  				if err != nil {
   342  					klog.Errorf("Directory creation %s failed: %v ", triggerPathDir, err)
   343  					panic(err)
   344  				}
   345  				klog.InfoS("Directory created successfully")
   346  
   347  				triggerPathFile = filepath.Join(triggerPathDir, "registration")
   348  				if _, err := os.Stat(triggerPathFile); errors.Is(err, os.ErrNotExist) {
   349  					_, err = os.Create(triggerPathFile)
   350  					if err != nil {
   351  						klog.Errorf("File creation %s failed: %v ", triggerPathFile, err)
   352  						panic(err)
   353  					}
   354  				}
   355  			}
   356  
   357  			ginkgo.By("Scheduling a sample device plugin pod")
   358  			data, err := e2etestfiles.Read(SampleDevicePluginControlRegistrationDSYAML)
   359  			if err != nil {
   360  				framework.Fail(err.Error())
   361  			}
   362  			ds := readDaemonSetV1OrDie(data)
   363  
   364  			dp := &v1.Pod{
   365  				ObjectMeta: metav1.ObjectMeta{
   366  					Name: SampleDevicePluginName,
   367  				},
   368  				Spec: ds.Spec.Template.Spec,
   369  			}
   370  
   371  			devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp)
   372  
   373  			go func() {
   374  				// Since autoregistration is disabled for the device plugin (as REGISTER_CONTROL_FILE
   375  				// environment variable is specified), device plugin registration needs to be triggerred
   376  				// manually.
   377  				// This is done by deleting the control file at the following path:
   378  				// `/var/lib/kubelet/device-plugins/sample/registration`.
   379  
   380  				defer ginkgo.GinkgoRecover()
   381  				framework.Logf("Deleting the control file: %q to trigger registration", triggerPathFile)
   382  				err := os.Remove(triggerPathFile)
   383  				framework.ExpectNoError(err)
   384  			}()
   385  
   386  			ginkgo.By("Waiting for devices to become available on the local node")
   387  
   388  			gomega.Eventually(ctx, isNodeReadyWithSampleResources).
   389  				WithArguments(f).
   390  				WithTimeout(5 * time.Minute).
   391  				Should(BeReady())
   392  
   393  			framework.Logf("Successfully created device plugin pod")
   394  
   395  			devsLen := int64(deviceCount) // shortcut
   396  			ginkgo.By("Waiting for the resource exported by the sample device plugin to become available on the local node")
   397  
   398  			gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
   399  				WithArguments(f, devsLen).
   400  				WithTimeout(5 * time.Minute).
   401  				Should(HaveAllocatableDevices())
   402  		})
   403  
   404  		ginkgo.It("should deploy pod consuming devices first but fail with admission error after kubelet restart in case device plugin hasn't re-registered", func(ctx context.Context) {
   405  			var err error
   406  			podCMD := "while true; do sleep 1000; done;"
   407  
   408  			ginkgo.By(fmt.Sprintf("creating a pods requiring %d %q", deviceCount, SampleDeviceResourceName))
   409  
   410  			pod := makeBusyboxDeviceRequiringPod(SampleDeviceResourceName, podCMD)
   411  			testPod := e2epod.NewPodClient(f).CreateSync(ctx, pod)
   412  
   413  			ginkgo.By("making sure all the pods are ready")
   414  
   415  			err = e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
   416  			framework.ExpectNoError(err, "pod %s/%s did not go running", testPod.Namespace, testPod.Name)
   417  			framework.Logf("pod %s/%s running", testPod.Namespace, testPod.Name)
   418  
   419  			ginkgo.By("stopping the kubelet")
   420  			startKubelet := stopKubelet()
   421  
   422  			ginkgo.By("stopping all the local containers - using CRI")
   423  			rs, _, err := getCRIClient()
   424  			framework.ExpectNoError(err)
   425  			sandboxes, err := rs.ListPodSandbox(ctx, &runtimeapi.PodSandboxFilter{})
   426  			framework.ExpectNoError(err)
   427  			for _, sandbox := range sandboxes {
   428  				gomega.Expect(sandbox.Metadata).ToNot(gomega.BeNil())
   429  				ginkgo.By(fmt.Sprintf("deleting pod using CRI: %s/%s -> %s", sandbox.Metadata.Namespace, sandbox.Metadata.Name, sandbox.Id))
   430  
   431  				err := rs.RemovePodSandbox(ctx, sandbox.Id)
   432  				framework.ExpectNoError(err)
   433  			}
   434  
   435  			ginkgo.By("restarting the kubelet")
   436  			startKubelet()
   437  
   438  			ginkgo.By("waiting for the kubelet to be ready again")
   439  			// Wait for the Kubelet to be ready.
   440  
   441  			gomega.Eventually(ctx, e2enode.TotalReady).
   442  				WithArguments(f.ClientSet).
   443  				WithTimeout(2 * time.Minute).
   444  				Should(gomega.BeEquivalentTo(1))
   445  
   446  			ginkgo.By("making sure all the pods are ready after the recovery")
   447  
   448  			var devicePluginPodAfterRestart *v1.Pod
   449  
   450  			devicePluginPodAfterRestart, err = e2epod.NewPodClient(f).Get(ctx, devicePluginPod.Name, metav1.GetOptions{})
   451  			framework.ExpectNoError(err)
   452  
   453  			err = e2epod.WaitForPodCondition(ctx, f.ClientSet, devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
   454  			framework.ExpectNoError(err, "pod %s/%s did not go running", devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name)
   455  			framework.Logf("pod %s/%s running", devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name)
   456  
   457  			ginkgo.By("Waiting for the resource capacity/allocatable exported by the sample device plugin to become zero")
   458  
   459  			// The device plugin pod has restarted but has not re-registered to kubelet (as AUTO_REGISTER= false)
   460  			// and registration wasn't triggered manually (by writing to the unix socket exposed at
   461  			// `/var/lib/kubelet/device-plugins/registered`). Because of this, the capacity and allocatable corresponding
   462  			// to the resource exposed by the device plugin should be zero.
   463  
   464  			gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
   465  				WithArguments(f, int64(0)).
   466  				WithTimeout(5 * time.Minute).
   467  				Should(HaveAllocatableDevices())
   468  
   469  			ginkgo.By("Checking that pod requesting devices failed to start because of admission error")
   470  
   471  			// NOTE: The device plugin won't re-register again and this is intentional.
   472  			// Because of this, the testpod (requesting a device) should fail with an admission error.
   473  
   474  			gomega.Eventually(ctx, getPod).
   475  				WithArguments(f, testPod.Name).
   476  				WithTimeout(time.Minute).
   477  				Should(HaveFailedWithAdmissionError(),
   478  					"the pod succeeded to start, when it should fail with the admission error")
   479  
   480  			ginkgo.By("removing application pods")
   481  			e2epod.NewPodClient(f).DeleteSync(ctx, testPod.Name, metav1.DeleteOptions{}, 2*time.Minute)
   482  		})
   483  
   484  		ginkgo.AfterEach(func(ctx context.Context) {
   485  			ginkgo.By("Deleting the device plugin pod")
   486  			e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, metav1.DeleteOptions{}, time.Minute)
   487  
   488  			ginkgo.By("Deleting the directory and file setup for controlling registration")
   489  			err := os.RemoveAll(triggerPathDir)
   490  			framework.ExpectNoError(err)
   491  
   492  			ginkgo.By("Deleting any Pods created by the test")
   493  			l, err := e2epod.NewPodClient(f).List(context.TODO(), metav1.ListOptions{})
   494  			framework.ExpectNoError(err)
   495  			for _, p := range l.Items {
   496  				if p.Namespace != f.Namespace.Name {
   497  					continue
   498  				}
   499  
   500  				framework.Logf("Deleting pod: %s", p.Name)
   501  				e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
   502  			}
   503  
   504  			ginkgo.By("Waiting for devices to become unavailable on the local node")
   505  			gomega.Eventually(ctx, isNodeReadyWithoutSampleResources).
   506  				WithArguments(f).
   507  				WithTimeout(5 * time.Minute).
   508  				Should(BeReady())
   509  		})
   510  
   511  	})
   512  
   513  })
   514  
   515  func compareSRIOVResources(expected, got *sriovData) {
   516  	if expected.resourceName != got.resourceName {
   517  		framework.Failf("different SRIOV resource name: expected %q got %q", expected.resourceName, got.resourceName)
   518  	}
   519  	if expected.resourceAmount != got.resourceAmount {
   520  		framework.Failf("different SRIOV resource amount: expected %d got %d", expected.resourceAmount, got.resourceAmount)
   521  	}
   522  }
   523  
   524  func isUnexpectedAdmissionError(pod *v1.Pod) bool {
   525  	re := regexp.MustCompile(`Unexpected.*Admission.*Error`)
   526  	return re.MatchString(pod.Status.Reason)
   527  }
   528  
   529  func rewriteCheckpointAsV1(dir, name string) error {
   530  	ginkgo.By(fmt.Sprintf("Creating temporary checkpoint manager (dir=%q)", dir))
   531  	checkpointManager, err := checkpointmanager.NewCheckpointManager(dir)
   532  	if err != nil {
   533  		return err
   534  	}
   535  	cp := checkpoint.New(make([]checkpoint.PodDevicesEntry, 0), make(map[string][]string))
   536  	err = checkpointManager.GetCheckpoint(name, cp)
   537  	if err != nil {
   538  		return err
   539  	}
   540  
   541  	ginkgo.By(fmt.Sprintf("Read checkpoint %q %#v", name, cp))
   542  
   543  	podDevices, registeredDevs := cp.GetDataInLatestFormat()
   544  	podDevicesV1 := convertPodDeviceEntriesToV1(podDevices)
   545  	cpV1 := checkpoint.NewV1(podDevicesV1, registeredDevs)
   546  
   547  	blob, err := cpV1.MarshalCheckpoint()
   548  	if err != nil {
   549  		return err
   550  	}
   551  
   552  	// TODO: why `checkpointManager.CreateCheckpoint(name, cpV1)` doesn't seem to work?
   553  	ckPath := filepath.Join(dir, name)
   554  	os.WriteFile(filepath.Join("/tmp", name), blob, 0600)
   555  	return os.WriteFile(ckPath, blob, 0600)
   556  }
   557  
   558  func convertPodDeviceEntriesToV1(entries []checkpoint.PodDevicesEntry) []checkpoint.PodDevicesEntryV1 {
   559  	entriesv1 := []checkpoint.PodDevicesEntryV1{}
   560  	for _, entry := range entries {
   561  		deviceIDs := []string{}
   562  		for _, perNUMANodeDevIDs := range entry.DeviceIDs {
   563  			deviceIDs = append(deviceIDs, perNUMANodeDevIDs...)
   564  		}
   565  		entriesv1 = append(entriesv1, checkpoint.PodDevicesEntryV1{
   566  			PodUID:        entry.PodUID,
   567  			ContainerName: entry.ContainerName,
   568  			ResourceName:  entry.ResourceName,
   569  			DeviceIDs:     deviceIDs,
   570  			AllocResp:     entry.AllocResp,
   571  		})
   572  	}
   573  	return entriesv1
   574  }
   575  
   576  func stringifyContainerDevices(devs []*kubeletpodresourcesv1.ContainerDevices) string {
   577  	entries := []string{}
   578  	for _, dev := range devs {
   579  		devIDs := dev.GetDeviceIds()
   580  		if devIDs != nil {
   581  			for _, devID := range dev.DeviceIds {
   582  				nodes := dev.GetTopology().GetNodes()
   583  				if nodes != nil {
   584  					for _, node := range nodes {
   585  						entries = append(entries, fmt.Sprintf("%s[%s]@NUMA=%d", dev.ResourceName, devID, node.GetID()))
   586  					}
   587  				} else {
   588  					entries = append(entries, fmt.Sprintf("%s[%s]@NUMA=none", dev.ResourceName, devID))
   589  				}
   590  			}
   591  		} else {
   592  			entries = append(entries, dev.ResourceName)
   593  		}
   594  	}
   595  	sort.Strings(entries)
   596  	return strings.Join(entries, ", ")
   597  }
   598  
   599  func makeBusyboxDeviceRequiringPod(resourceName, cmd string) *v1.Pod {
   600  	podName := "device-manager-test-" + string(uuid.NewUUID())
   601  	rl := v1.ResourceList{
   602  		v1.ResourceName(resourceName): *resource.NewQuantity(2, resource.DecimalSI),
   603  	}
   604  	return &v1.Pod{
   605  		ObjectMeta: metav1.ObjectMeta{
   606  			Name: podName,
   607  		},
   608  		Spec: v1.PodSpec{
   609  			RestartPolicy: v1.RestartPolicyNever,
   610  			Containers: []v1.Container{{
   611  				Image: busyboxImage,
   612  				Name:  podName,
   613  				// Runs the specified command in the test pod.
   614  				Command: []string{"sh", "-c", cmd},
   615  				Resources: v1.ResourceRequirements{
   616  					Limits:   rl,
   617  					Requests: rl,
   618  				},
   619  			}},
   620  		},
   621  	}
   622  }
   623  
   624  // BeReady verifies that a node is ready and devices have registered.
   625  func BeReady() types.GomegaMatcher {
   626  	return gomega.And(
   627  		// This additional matcher checks for the final error condition.
   628  		gcustom.MakeMatcher(func(ready bool) (bool, error) {
   629  			if !ready {
   630  				return false, fmt.Errorf("expected node to be ready=%t", ready)
   631  			}
   632  			return true, nil
   633  		}),
   634  		BeInReadyPhase(true),
   635  	)
   636  }
   637  
   638  // BeInReadyPhase matches if node is ready i.e. ready is true.
   639  func BeInReadyPhase(isReady bool) types.GomegaMatcher {
   640  	return gcustom.MakeMatcher(func(ready bool) (bool, error) {
   641  		return ready == isReady, nil
   642  	}).WithTemplate("expected Node Ready {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(isReady)
   643  }
   644  
   645  func isNodeReadyWithSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
   646  	node, ready := getLocalTestNode(ctx, f)
   647  	if !ready {
   648  		return false, fmt.Errorf("expected node to be ready=%t", ready)
   649  	}
   650  
   651  	if CountSampleDeviceCapacity(node) <= 0 {
   652  		return false, fmt.Errorf("expected devices to be advertised")
   653  	}
   654  	return true, nil
   655  }
   656  
   657  // HaveAllocatableDevices verifies that a node has allocatable devices.
   658  func HaveAllocatableDevices() types.GomegaMatcher {
   659  	return gomega.And(
   660  		// This additional matcher checks for the final error condition.
   661  		gcustom.MakeMatcher(func(hasAllocatable bool) (bool, error) {
   662  			if !hasAllocatable {
   663  				return false, fmt.Errorf("expected node to be have allocatable devices=%t", hasAllocatable)
   664  			}
   665  			return true, nil
   666  		}),
   667  		hasAllocatable(true),
   668  	)
   669  }
   670  
   671  // hasAllocatable matches if node is ready i.e. ready is true.
   672  func hasAllocatable(hasAllocatable bool) types.GomegaMatcher {
   673  	return gcustom.MakeMatcher(func(hasAllocatableDevices bool) (bool, error) {
   674  		return hasAllocatableDevices == hasAllocatable, nil
   675  	}).WithTemplate("expected Node with allocatable {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasAllocatable)
   676  }
   677  
   678  func isNodeReadyWithAllocatableSampleResources(ctx context.Context, f *framework.Framework, devCount int64) (bool, error) {
   679  	node, ready := getLocalTestNode(ctx, f)
   680  	if !ready {
   681  		return false, fmt.Errorf("expected node to be ready=%t", ready)
   682  	}
   683  
   684  	if CountSampleDeviceCapacity(node) != devCount {
   685  		return false, fmt.Errorf("expected devices capacity to be: %d", devCount)
   686  	}
   687  
   688  	if CountSampleDeviceAllocatable(node) != devCount {
   689  		return false, fmt.Errorf("expected devices allocatable to be: %d", devCount)
   690  	}
   691  	return true, nil
   692  }
   693  
   694  func isNodeReadyWithoutSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
   695  	node, ready := getLocalTestNode(ctx, f)
   696  	if !ready {
   697  		return false, fmt.Errorf("expected node to be ready=%t", ready)
   698  	}
   699  
   700  	if CountSampleDeviceCapacity(node) > 0 {
   701  		return false, fmt.Errorf("expected devices to be not present")
   702  	}
   703  	return true, nil
   704  }
   705  
   706  // HaveFailedWithAdmissionError verifies that a pod fails at admission.
   707  func HaveFailedWithAdmissionError() types.GomegaMatcher {
   708  	return gomega.And(
   709  		gcustom.MakeMatcher(func(hasFailed bool) (bool, error) {
   710  			if !hasFailed {
   711  				return false, fmt.Errorf("expected pod to have failed=%t", hasFailed)
   712  			}
   713  			return true, nil
   714  		}),
   715  		hasFailed(true),
   716  	)
   717  }
   718  
   719  // hasFailed matches if pod has failed.
   720  func hasFailed(hasFailed bool) types.GomegaMatcher {
   721  	return gcustom.MakeMatcher(func(hasPodFailed bool) (bool, error) {
   722  		return hasPodFailed == hasFailed, nil
   723  	}).WithTemplate("expected Pod failed {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasFailed)
   724  }
   725  
   726  func getPodByName(ctx context.Context, f *framework.Framework, podName string) (*v1.Pod, error) {
   727  	return e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
   728  }
   729  
   730  func getPod(ctx context.Context, f *framework.Framework, podName string) (bool, error) {
   731  	pod, err := getPodByName(ctx, f, podName)
   732  	if err != nil {
   733  		return false, err
   734  	}
   735  
   736  	expectedStatusReason := "UnexpectedAdmissionError"
   737  	expectedStatusMessage := "Allocate failed due to no healthy devices present; cannot allocate unhealthy devices"
   738  
   739  	// This additional matcher checks for the final error condition.
   740  	if pod.Status.Phase != v1.PodFailed {
   741  		return false, fmt.Errorf("expected pod to reach phase %q, got final phase %q instead.", v1.PodFailed, pod.Status.Phase)
   742  	}
   743  	if pod.Status.Reason != expectedStatusReason {
   744  		return false, fmt.Errorf("expected pod status reason to be %q, got %q instead.", expectedStatusReason, pod.Status.Reason)
   745  	}
   746  	if !strings.Contains(pod.Status.Message, expectedStatusMessage) {
   747  		return false, fmt.Errorf("expected pod status reason to contain %q, got %q instead.", expectedStatusMessage, pod.Status.Message)
   748  	}
   749  	return true, nil
   750  }