k8s.io/kubernetes@v1.29.3/test/e2e_node/device_plugin_test.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"regexp"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/onsi/ginkgo/v2"
    30  	"github.com/onsi/gomega"
    31  	"github.com/onsi/gomega/gcustom"
    32  	"github.com/onsi/gomega/types"
    33  
    34  	appsv1 "k8s.io/api/apps/v1"
    35  	v1 "k8s.io/api/core/v1"
    36  	"k8s.io/apimachinery/pkg/runtime"
    37  	"k8s.io/apimachinery/pkg/runtime/serializer"
    38  	k8stypes "k8s.io/apimachinery/pkg/types"
    39  	"k8s.io/apimachinery/pkg/util/sets"
    40  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    41  	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
    42  	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
    43  	admissionapi "k8s.io/pod-security-admission/api"
    44  
    45  	"k8s.io/apimachinery/pkg/api/resource"
    46  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    47  	"k8s.io/apimachinery/pkg/util/uuid"
    48  	"k8s.io/kubectl/pkg/util/podutils"
    49  	kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
    50  	kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
    51  	"k8s.io/kubernetes/pkg/features"
    52  	"k8s.io/kubernetes/test/e2e/feature"
    53  	"k8s.io/kubernetes/test/e2e/framework"
    54  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    55  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    56  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    57  	e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
    58  	"k8s.io/kubernetes/test/e2e/nodefeature"
    59  )
    60  
    61  var (
    62  	appsScheme = runtime.NewScheme()
    63  	appsCodecs = serializer.NewCodecFactory(appsScheme)
    64  )
    65  
    66  // Serial because the test restarts Kubelet
    67  var _ = SIGDescribe("Device Plugin", feature.DevicePluginProbe, nodefeature.DevicePluginProbe, framework.WithSerial(), func() {
    68  	f := framework.NewDefaultFramework("device-plugin-errors")
    69  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    70  	testDevicePlugin(f, kubeletdevicepluginv1beta1.DevicePluginPath)
    71  	testDevicePluginNodeReboot(f, kubeletdevicepluginv1beta1.DevicePluginPath)
    72  })
    73  
    74  // readDaemonSetV1OrDie reads daemonset object from bytes. Panics on error.
    75  func readDaemonSetV1OrDie(objBytes []byte) *appsv1.DaemonSet {
    76  	appsv1.AddToScheme(appsScheme)
    77  	requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(appsv1.SchemeGroupVersion), objBytes)
    78  	if err != nil {
    79  		panic(err)
    80  	}
    81  	return requiredObj.(*appsv1.DaemonSet)
    82  }
    83  
    84  const (
    85  	// TODO(vikasc): Instead of hard-coding number of devices, provide number of devices in the sample-device-plugin using configmap
    86  	// and then use the same here
    87  	expectedSampleDevsAmount int64 = 2
    88  
    89  	// This is the sleep interval specified in the command executed in the pod to ensure container is running "forever" in the test timescale
    90  	sleepIntervalForever string = "24h"
    91  
    92  	// This is the sleep interval specified in the command executed in the pod so that container is restarted within the expected test run time
    93  	sleepIntervalWithRestart string = "60s"
    94  )
    95  
    96  func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
    97  	pluginSockDir = filepath.Join(pluginSockDir) + "/"
    98  	f.Context("DevicePlugin", f.WithSerial(), f.WithDisruptive(), func() {
    99  		var devicePluginPod, dptemplate *v1.Pod
   100  		var v1alphaPodResources *kubeletpodresourcesv1alpha1.ListPodResourcesResponse
   101  		var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
   102  		var err error
   103  
   104  		ginkgo.BeforeEach(func(ctx context.Context) {
   105  			ginkgo.By("Wait for node to be ready")
   106  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   107  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   108  				framework.ExpectNoError(err)
   109  				return nodes == 1
   110  			}, time.Minute, time.Second).Should(gomega.BeTrue())
   111  
   112  			// Before we run the device plugin test, we need to ensure
   113  			// that the cluster is in a clean state and there are no
   114  			// pods running on this node.
   115  			// This is done in a gomega.Eventually with retries since a prior test in a different test suite could've run and the deletion of it's resources may still be in progress.
   116  			// xref: https://issue.k8s.io/115381
   117  			gomega.Eventually(ctx, func(ctx context.Context) error {
   118  				v1alphaPodResources, err = getV1alpha1NodeDevices(ctx)
   119  				if err != nil {
   120  					return fmt.Errorf("failed to get node local podresources by accessing the (v1alpha) podresources API endpoint: %v", err)
   121  				}
   122  
   123  				v1PodResources, err = getV1NodeDevices(ctx)
   124  				if err != nil {
   125  					return fmt.Errorf("failed to get node local podresources by accessing the (v1) podresources API endpoint: %v", err)
   126  				}
   127  
   128  				if len(v1alphaPodResources.PodResources) > 0 {
   129  					return fmt.Errorf("expected v1alpha pod resources to be empty, but got non-empty resources: %+v", v1alphaPodResources.PodResources)
   130  				}
   131  
   132  				if len(v1PodResources.PodResources) > 0 {
   133  					return fmt.Errorf("expected v1 pod resources to be empty, but got non-empty resources: %+v", v1PodResources.PodResources)
   134  				}
   135  				return nil
   136  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.Succeed())
   137  
   138  			ginkgo.By("Scheduling a sample device plugin pod")
   139  			dp := getSampleDevicePluginPod(pluginSockDir)
   140  			dptemplate = dp.DeepCopy()
   141  			devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp)
   142  
   143  			ginkgo.By("Waiting for devices to become available on the local node")
   144  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   145  				node, ready := getLocalTestNode(ctx, f)
   146  				return ready && CountSampleDeviceCapacity(node) > 0
   147  			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
   148  			framework.Logf("Successfully created device plugin pod")
   149  
   150  			ginkgo.By(fmt.Sprintf("Waiting for the resource exported by the sample device plugin to become available on the local node (instances: %d)", expectedSampleDevsAmount))
   151  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   152  				node, ready := getLocalTestNode(ctx, f)
   153  				return ready &&
   154  					CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
   155  					CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
   156  			}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
   157  		})
   158  
   159  		ginkgo.AfterEach(func(ctx context.Context) {
   160  			ginkgo.By("Deleting the device plugin pod")
   161  			e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, metav1.DeleteOptions{}, time.Minute)
   162  
   163  			ginkgo.By("Deleting any Pods created by the test")
   164  			l, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{})
   165  			framework.ExpectNoError(err)
   166  			for _, p := range l.Items {
   167  				if p.Namespace != f.Namespace.Name {
   168  					continue
   169  				}
   170  
   171  				framework.Logf("Deleting pod: %s", p.Name)
   172  				e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
   173  			}
   174  
   175  			restartKubelet(true)
   176  
   177  			ginkgo.By("Waiting for devices to become unavailable on the local node")
   178  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   179  				node, ready := getLocalTestNode(ctx, f)
   180  				return ready && CountSampleDeviceCapacity(node) <= 0
   181  			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
   182  
   183  			ginkgo.By("devices now unavailable on the local node")
   184  		})
   185  
   186  		ginkgo.It("Can schedule a pod that requires a device", func(ctx context.Context) {
   187  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
   188  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   189  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   190  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   191  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   192  			gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
   193  
   194  			v1alphaPodResources, err = getV1alpha1NodeDevices(ctx)
   195  			framework.ExpectNoError(err)
   196  
   197  			v1PodResources, err = getV1NodeDevices(ctx)
   198  			framework.ExpectNoError(err)
   199  
   200  			framework.Logf("v1alphaPodResources.PodResources:%+v\n", v1alphaPodResources.PodResources)
   201  			framework.Logf("v1PodResources.PodResources:%+v\n", v1PodResources.PodResources)
   202  			framework.Logf("len(v1alphaPodResources.PodResources):%+v", len(v1alphaPodResources.PodResources))
   203  			framework.Logf("len(v1PodResources.PodResources):%+v", len(v1PodResources.PodResources))
   204  
   205  			gomega.Expect(v1alphaPodResources.PodResources).To(gomega.HaveLen(2))
   206  			gomega.Expect(v1PodResources.PodResources).To(gomega.HaveLen(2))
   207  
   208  			var v1alphaResourcesForOurPod *kubeletpodresourcesv1alpha1.PodResources
   209  			for _, res := range v1alphaPodResources.GetPodResources() {
   210  				if res.Name == pod1.Name {
   211  					v1alphaResourcesForOurPod = res
   212  				}
   213  			}
   214  
   215  			var v1ResourcesForOurPod *kubeletpodresourcesv1.PodResources
   216  			for _, res := range v1PodResources.GetPodResources() {
   217  				if res.Name == pod1.Name {
   218  					v1ResourcesForOurPod = res
   219  				}
   220  			}
   221  
   222  			gomega.Expect(v1alphaResourcesForOurPod).NotTo(gomega.BeNil())
   223  			gomega.Expect(v1ResourcesForOurPod).NotTo(gomega.BeNil())
   224  
   225  			gomega.Expect(v1alphaResourcesForOurPod.Name).To(gomega.Equal(pod1.Name))
   226  			gomega.Expect(v1ResourcesForOurPod.Name).To(gomega.Equal(pod1.Name))
   227  
   228  			gomega.Expect(v1alphaResourcesForOurPod.Namespace).To(gomega.Equal(pod1.Namespace))
   229  			gomega.Expect(v1ResourcesForOurPod.Namespace).To(gomega.Equal(pod1.Namespace))
   230  
   231  			gomega.Expect(v1alphaResourcesForOurPod.Containers).To(gomega.HaveLen(1))
   232  			gomega.Expect(v1ResourcesForOurPod.Containers).To(gomega.HaveLen(1))
   233  
   234  			gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Name).To(gomega.Equal(pod1.Spec.Containers[0].Name))
   235  			gomega.Expect(v1ResourcesForOurPod.Containers[0].Name).To(gomega.Equal(pod1.Spec.Containers[0].Name))
   236  
   237  			gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Devices).To(gomega.HaveLen(1))
   238  			gomega.Expect(v1ResourcesForOurPod.Containers[0].Devices).To(gomega.HaveLen(1))
   239  
   240  			gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName))
   241  			gomega.Expect(v1ResourcesForOurPod.Containers[0].Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName))
   242  
   243  			gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Devices[0].DeviceIds).To(gomega.HaveLen(1))
   244  			gomega.Expect(v1ResourcesForOurPod.Containers[0].Devices[0].DeviceIds).To(gomega.HaveLen(1))
   245  		})
   246  
   247  		ginkgo.It("[NodeSpecialFeature:CDI] can make a CDI device accessible in a container", func(ctx context.Context) {
   248  			e2eskipper.SkipUnlessFeatureGateEnabled(features.DevicePluginCDIDevices)
   249  			// check if CDI_DEVICE env variable is set
   250  			// and only one correspondent device node /tmp/<CDI_DEVICE> is available inside a container
   251  			podObj := makeBusyboxPod(SampleDeviceResourceName, "[ $(ls /tmp/CDI-Dev-[1,2] | wc -l) -eq 1 -a -b /tmp/$CDI_DEVICE ]")
   252  			podObj.Spec.RestartPolicy = v1.RestartPolicyNever
   253  			pod := e2epod.NewPodClient(f).Create(ctx, podObj)
   254  			framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace))
   255  		})
   256  
   257  		// simulate container restart, while all other involved components (kubelet, device plugin) stay stable. To do so, in the container
   258  		// entry point we sleep for a limited and short period of time. The device assignment should be kept and be stable across the container
   259  		// restarts. For the sake of brevity we however check just the fist restart.
   260  		ginkgo.It("Keeps device plugin assignments across pod restarts (no kubelet restart, no device plugin restart)", func(ctx context.Context) {
   261  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
   262  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   263  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   264  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   265  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   266  			gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
   267  
   268  			pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
   269  			framework.ExpectNoError(err)
   270  
   271  			ginkgo.By("Waiting for container to restart")
   272  			ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
   273  
   274  			// check from the device assignment is preserved and stable from perspective of the container
   275  			ginkgo.By("Confirming that after a container restart, fake-device assignment is kept")
   276  			devIDRestart1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   277  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   278  			gomega.Expect(devIDRestart1).To(gomega.Equal(devID1))
   279  
   280  			// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
   281  			// needs to match the container perspective.
   282  			ginkgo.By("Verifying the device assignment after container restart using podresources API")
   283  			v1PodResources, err = getV1NodeDevices(ctx)
   284  			if err != nil {
   285  				framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
   286  			}
   287  			err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
   288  			framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
   289  
   290  			ginkgo.By("Creating another pod")
   291  			pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   292  			err = e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, pod2.Name, f.Namespace.Name, 1*time.Minute)
   293  			framework.ExpectNoError(err)
   294  
   295  			ginkgo.By("Checking that pod got a fake device")
   296  			devID2, err := parseLog(ctx, f, pod2.Name, pod2.Name, deviceIDRE)
   297  			framework.ExpectNoError(err, "getting logs for pod %q", pod2.Name)
   298  
   299  			gomega.Expect(devID2).To(gomega.Not(gomega.Equal("")), "pod2 requested a device but started successfully without")
   300  
   301  			ginkgo.By("Verifying the device assignment after extra container start using podresources API")
   302  			v1PodResources, err = getV1NodeDevices(ctx)
   303  			if err != nil {
   304  				framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
   305  			}
   306  			err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
   307  			framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod1")
   308  			err, _ = checkPodResourcesAssignment(v1PodResources, pod2.Namespace, pod2.Name, pod2.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID2})
   309  			framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
   310  		})
   311  
   312  		// simulate kubelet restart. A compliant device plugin is expected to re-register, while the pod and the container stays running.
   313  		// The flow with buggy or slow device plugin is deferred to another test.
   314  		// The device assignment should be kept and be stable across the kubelet restart, because it's the kubelet which performs the device allocation,
   315  		// and both the device plugin and the actual consumer (container) are stable.
   316  		ginkgo.It("Keeps device plugin assignments across kubelet restarts (no pod restart, no device plugin restart)", func(ctx context.Context) {
   317  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
   318  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   319  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   320  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   321  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   322  			gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
   323  
   324  			pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
   325  			framework.ExpectNoError(err)
   326  			framework.Logf("testing pod: pre-restart  UID=%s namespace=%s name=%s ready=%v", pod1.UID, pod1.Namespace, pod1.Name, podutils.IsPodReady(pod1))
   327  
   328  			ginkgo.By("Restarting Kubelet")
   329  			restartKubelet(true)
   330  
   331  			ginkgo.By("Wait for node to be ready again")
   332  			e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
   333  
   334  			ginkgo.By("Waiting for resource to become available on the local node after restart")
   335  			gomega.Eventually(ctx, func() bool {
   336  				node, ready := getLocalTestNode(ctx, f)
   337  				return ready &&
   338  					CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
   339  					CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
   340  			}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
   341  
   342  			ginkgo.By("Checking the same instance of the pod is still running")
   343  			gomega.Eventually(ctx, getPodByName).
   344  				WithArguments(f, pod1.Name).
   345  				WithTimeout(time.Minute).
   346  				Should(BeTheSamePodStillRunning(pod1),
   347  					"the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts")
   348  
   349  			pod2, err := e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
   350  			framework.ExpectNoError(err)
   351  			framework.Logf("testing pod: post-restart UID=%s namespace=%s name=%s ready=%v", pod2.UID, pod2.Namespace, pod2.Name, podutils.IsPodReady(pod2))
   352  
   353  			// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
   354  			// note we don't check again the logs of the container: the check is done at startup, the container
   355  			// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
   356  			// is useless.
   357  			ginkgo.By("Verifying the device assignment after kubelet restart using podresources API")
   358  			gomega.Eventually(ctx, func() error {
   359  				v1PodResources, err = getV1NodeDevices(ctx)
   360  				return err
   361  			}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
   362  
   363  			err, _ = checkPodResourcesAssignment(v1PodResources, pod2.Namespace, pod2.Name, pod2.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
   364  			framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
   365  		})
   366  
   367  		// simulate kubelet and container restart, *but not* device plugin restart.
   368  		// The device assignment should be kept and be stable across the kubelet and container restart, because it's the kubelet which
   369  		// performs the device allocation, and both the device plugin is stable.
   370  		ginkgo.It("Keeps device plugin assignments across pod and kubelet restarts (no device plugin restart)", func(ctx context.Context) {
   371  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
   372  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   373  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   374  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   375  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   376  
   377  			gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
   378  
   379  			pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
   380  			framework.ExpectNoError(err)
   381  
   382  			ginkgo.By("Wait for node to be ready again")
   383  			e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
   384  
   385  			ginkgo.By("Waiting for container to restart")
   386  			ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
   387  
   388  			ginkgo.By("Confirming that after a container restart, fake-device assignment is kept")
   389  			devIDRestart1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   390  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   391  			gomega.Expect(devIDRestart1).To(gomega.Equal(devID1))
   392  
   393  			ginkgo.By("Restarting Kubelet")
   394  			restartKubelet(true)
   395  
   396  			ginkgo.By("Wait for node to be ready again")
   397  			e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
   398  
   399  			ginkgo.By("Checking an instance of the pod is running")
   400  			gomega.Eventually(ctx, getPodByName).
   401  				WithArguments(f, pod1.Name).
   402  				// The kubelet restarts pod with an exponential back-off delay, with a maximum cap of 5 minutes.
   403  				// Allow 5 minutes and 10 seconds for the pod to start in a slow environment.
   404  				WithTimeout(5*time.Minute+10*time.Second).
   405  				Should(gomega.And(
   406  					BeAPodInPhase(v1.PodRunning),
   407  					BeAPodReady(),
   408  				),
   409  					"the pod should still be running, the workload should not be perturbed by kubelet restarts")
   410  
   411  			ginkgo.By("Verifying the device assignment after pod and kubelet restart using container logs")
   412  			var devID1Restarted string
   413  			gomega.Eventually(ctx, func() string {
   414  				devID1Restarted, err = parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   415  				if err != nil {
   416  					framework.Logf("error getting logds for pod %q: %v", pod1.Name, err)
   417  					return ""
   418  				}
   419  				return devID1Restarted
   420  			}, 30*time.Second, framework.Poll).Should(gomega.Equal(devID1), "pod %s reports a different device after restarts: %s (expected %s)", pod1.Name, devID1Restarted, devID1)
   421  
   422  			ginkgo.By("Verifying the device assignment after pod and kubelet restart using podresources API")
   423  			gomega.Eventually(ctx, func() error {
   424  				v1PodResources, err = getV1NodeDevices(ctx)
   425  				return err
   426  			}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
   427  
   428  			err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
   429  			framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
   430  		})
   431  
   432  		// simulate device plugin re-registration, *but not* container and kubelet restart.
   433  		// After the device plugin has re-registered, the list healthy devices is repopulated based on the devices discovered.
   434  		// Once Pod2 is running we determine the device that was allocated it. As long as the device allocation succeeds the
   435  		// test should pass.
   436  		ginkgo.It("Keeps device plugin assignments after the device plugin has restarted (no kubelet restart, pod restart)", func(ctx context.Context) {
   437  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
   438  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   439  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   440  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   441  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   442  			gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
   443  
   444  			pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
   445  			framework.ExpectNoError(err)
   446  
   447  			ginkgo.By("Wait for node to be ready again")
   448  			e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
   449  
   450  			ginkgo.By("Re-Register resources and delete the plugin pod")
   451  			gp := int64(0)
   452  			deleteOptions := metav1.DeleteOptions{
   453  				GracePeriodSeconds: &gp,
   454  			}
   455  			e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, deleteOptions, time.Minute)
   456  			waitForContainerRemoval(ctx, devicePluginPod.Spec.Containers[0].Name, devicePluginPod.Name, devicePluginPod.Namespace)
   457  
   458  			ginkgo.By("Recreating the plugin pod")
   459  			devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dptemplate)
   460  			err = e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, devicePluginPod.Name, devicePluginPod.Namespace, 1*time.Minute)
   461  			framework.ExpectNoError(err)
   462  
   463  			ginkgo.By("Waiting for resource to become available on the local node after re-registration")
   464  			gomega.Eventually(ctx, func() bool {
   465  				node, ready := getLocalTestNode(ctx, f)
   466  				return ready &&
   467  					CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
   468  					CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
   469  			}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
   470  
   471  			// crosscheck that after device plugin restart the device assignment is preserved and
   472  			// stable from the kubelet's perspective.
   473  			// note we don't check again the logs of the container: the check is done at startup, the container
   474  			// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
   475  			// is useless.
   476  			ginkgo.By("Verifying the device assignment after device plugin restart using podresources API")
   477  			gomega.Eventually(ctx, func() error {
   478  				v1PodResources, err = getV1NodeDevices(ctx)
   479  				return err
   480  			}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
   481  
   482  			err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
   483  			framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
   484  		})
   485  
   486  		// simulate kubelet restart *and* device plugin restart, while the pod and the container stays running.
   487  		// The device assignment should be kept and be stable across the kubelet/device plugin restart, as both the aforementioned components
   488  		// orchestrate the device allocation: the actual consumer (container) is stable.
   489  		ginkgo.It("Keeps device plugin assignments after kubelet restart and device plugin restart (no pod restart)", func(ctx context.Context) {
   490  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever) // the pod has to run "forever" in the timescale of this test
   491  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   492  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   493  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   494  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   495  
   496  			gomega.Expect(devID1).To(gomega.Not(gomega.BeEmpty()), "pod1 requested a device but started successfully without")
   497  
   498  			pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
   499  			framework.ExpectNoError(err)
   500  
   501  			ginkgo.By("Restarting Kubelet")
   502  			restartKubelet(true)
   503  
   504  			ginkgo.By("Wait for node to be ready again")
   505  			e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
   506  
   507  			ginkgo.By("Checking the same instance of the pod is still running after kubelet restart")
   508  			gomega.Eventually(ctx, getPodByName).
   509  				WithArguments(f, pod1.Name).
   510  				WithTimeout(time.Minute).
   511  				Should(BeTheSamePodStillRunning(pod1),
   512  					"the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts")
   513  
   514  			// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
   515  			// note we don't check again the logs of the container: the check is done at startup, the container
   516  			// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
   517  			// is useless.
   518  			ginkgo.By("Verifying the device assignment after kubelet restart using podresources API")
   519  			gomega.Eventually(ctx, func() error {
   520  				v1PodResources, err = getV1NodeDevices(ctx)
   521  				return err
   522  			}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
   523  
   524  			err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
   525  			framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
   526  
   527  			ginkgo.By("Re-Register resources by deleting the plugin pod")
   528  			gp := int64(0)
   529  			deleteOptions := metav1.DeleteOptions{
   530  				GracePeriodSeconds: &gp,
   531  			}
   532  			e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, deleteOptions, time.Minute)
   533  			waitForContainerRemoval(ctx, devicePluginPod.Spec.Containers[0].Name, devicePluginPod.Name, devicePluginPod.Namespace)
   534  
   535  			ginkgo.By("Recreating the plugin pod")
   536  			devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dptemplate)
   537  
   538  			ginkgo.By("Waiting for resource to become available on the local node after restart")
   539  			gomega.Eventually(ctx, func() bool {
   540  				node, ready := getLocalTestNode(ctx, f)
   541  				return ready &&
   542  					CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
   543  					CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
   544  			}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
   545  
   546  			ginkgo.By("Checking the same instance of the pod is still running after the device plugin restart")
   547  			gomega.Eventually(ctx, getPodByName).
   548  				WithArguments(f, pod1.Name).
   549  				WithTimeout(time.Minute).
   550  				Should(BeTheSamePodStillRunning(pod1),
   551  					"the same pod instance not running across kubelet restarts, workload should not be perturbed by device plugins restarts")
   552  		})
   553  
   554  		ginkgo.It("[OrphanedPods] Ensures pods consuming devices deleted while kubelet is down are cleaned up correctly", func(ctx context.Context) {
   555  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
   556  			pod := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   557  
   558  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   559  			devID, err := parseLog(ctx, f, pod.Name, pod.Name, deviceIDRE)
   560  			framework.ExpectNoError(err, "getting logs for pod %q", pod.Name)
   561  			gomega.Expect(devID).To(gomega.Not(gomega.BeEmpty()), "pod1 requested a device but started successfully without")
   562  
   563  			pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{})
   564  			framework.ExpectNoError(err)
   565  
   566  			ginkgo.By("stopping the kubelet")
   567  			startKubelet := stopKubelet()
   568  
   569  			// wait until the kubelet health check will fail
   570  			gomega.Eventually(ctx, func() bool {
   571  				ok := kubeletHealthCheck(kubeletHealthCheckURL)
   572  				framework.Logf("kubelet health check at %q value=%v", kubeletHealthCheckURL, ok)
   573  				return ok
   574  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   575  
   576  			framework.Logf("Delete the pod while the kubelet is not running")
   577  			// Delete pod sync by name will force delete the pod, removing it from kubelet's config
   578  			deletePodSyncByName(ctx, f, pod.Name)
   579  
   580  			framework.Logf("Starting the kubelet")
   581  			startKubelet()
   582  
   583  			// wait until the kubelet health check will succeed
   584  			gomega.Eventually(ctx, func() bool {
   585  				ok := kubeletHealthCheck(kubeletHealthCheckURL)
   586  				framework.Logf("kubelet health check at %q value=%v", kubeletHealthCheckURL, ok)
   587  				return ok
   588  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   589  
   590  			framework.Logf("wait for the pod %v to disappear", pod.Name)
   591  			gomega.Eventually(ctx, func(ctx context.Context) error {
   592  				err := checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   593  				framework.Logf("pod %s/%s disappear check err=%v", pod.Namespace, pod.Name, err)
   594  				return err
   595  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   596  
   597  			waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
   598  
   599  			ginkgo.By("Verifying the device assignment after device plugin restart using podresources API")
   600  			gomega.Eventually(ctx, func() error {
   601  				v1PodResources, err = getV1NodeDevices(ctx)
   602  				return err
   603  			}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
   604  			err, allocated := checkPodResourcesAssignment(v1PodResources, pod.Namespace, pod.Name, pod.Spec.Containers[0].Name, SampleDeviceResourceName, []string{})
   605  			if err == nil || allocated {
   606  				framework.Fail(fmt.Sprintf("stale device assignment after pod deletion while kubelet was down allocated=%v error=%v", allocated, err))
   607  			}
   608  		})
   609  
   610  		ginkgo.It("Can schedule a pod with a restartable init container [NodeAlphaFeature:SidecarContainers]", func(ctx context.Context) {
   611  			podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s"
   612  			sleepOneSecond := "1s"
   613  			rl := v1.ResourceList{v1.ResourceName(SampleDeviceResourceName): *resource.NewQuantity(1, resource.DecimalSI)}
   614  			pod := &v1.Pod{
   615  				ObjectMeta: metav1.ObjectMeta{Name: "device-plugin-test-" + string(uuid.NewUUID())},
   616  				Spec: v1.PodSpec{
   617  					RestartPolicy: v1.RestartPolicyAlways,
   618  					InitContainers: []v1.Container{
   619  						{
   620  							Image:   busyboxImage,
   621  							Name:    "init-1",
   622  							Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepOneSecond)},
   623  							Resources: v1.ResourceRequirements{
   624  								Limits:   rl,
   625  								Requests: rl,
   626  							},
   627  						},
   628  						{
   629  							Image:   busyboxImage,
   630  							Name:    "restartable-init-2",
   631  							Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepIntervalForever)},
   632  							Resources: v1.ResourceRequirements{
   633  								Limits:   rl,
   634  								Requests: rl,
   635  							},
   636  							RestartPolicy: &containerRestartPolicyAlways,
   637  						},
   638  					},
   639  					Containers: []v1.Container{{
   640  						Image:   busyboxImage,
   641  						Name:    "regular-1",
   642  						Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepIntervalForever)},
   643  						Resources: v1.ResourceRequirements{
   644  							Limits:   rl,
   645  							Requests: rl,
   646  						},
   647  					}},
   648  				},
   649  			}
   650  
   651  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, pod)
   652  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   653  
   654  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Spec.InitContainers[0].Name, deviceIDRE)
   655  			framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.InitContainers[0].Name)
   656  			gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1's init container requested a device but started successfully without")
   657  
   658  			devID2, err := parseLog(ctx, f, pod1.Name, pod1.Spec.InitContainers[1].Name, deviceIDRE)
   659  			framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.InitContainers[1].Name)
   660  			gomega.Expect(devID2).To(gomega.Not(gomega.Equal("")), "pod1's restartable init container requested a device but started successfully without")
   661  
   662  			gomega.Expect(devID2).To(gomega.Equal(devID1), "pod1's init container and restartable init container should share the same device")
   663  
   664  			devID3, err := parseLog(ctx, f, pod1.Name, pod1.Spec.Containers[0].Name, deviceIDRE)
   665  			framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.Containers[0].Name)
   666  			gomega.Expect(devID3).To(gomega.Not(gomega.Equal("")), "pod1's regular container requested a device but started successfully without")
   667  
   668  			gomega.Expect(devID3).NotTo(gomega.Equal(devID2), "pod1's restartable init container and regular container should not share the same device")
   669  
   670  			podResources, err := getV1NodeDevices(ctx)
   671  			framework.ExpectNoError(err)
   672  
   673  			framework.Logf("PodResources.PodResources:%+v\n", podResources.PodResources)
   674  			framework.Logf("len(PodResources.PodResources):%+v", len(podResources.PodResources))
   675  
   676  			gomega.Expect(podResources.PodResources).To(gomega.HaveLen(2))
   677  
   678  			var resourcesForOurPod *kubeletpodresourcesv1.PodResources
   679  			for _, res := range podResources.GetPodResources() {
   680  				if res.Name == pod1.Name {
   681  					resourcesForOurPod = res
   682  				}
   683  			}
   684  
   685  			gomega.Expect(resourcesForOurPod).NotTo(gomega.BeNil())
   686  
   687  			gomega.Expect(resourcesForOurPod.Name).To(gomega.Equal(pod1.Name))
   688  			gomega.Expect(resourcesForOurPod.Namespace).To(gomega.Equal(pod1.Namespace))
   689  
   690  			// Note that the kubelet does not report resources for restartable
   691  			// init containers for now.
   692  			// See https://github.com/kubernetes/kubernetes/issues/120501.
   693  			// TODO: Fix this test to check the resources allocated to the
   694  			// restartable init container once the kubelet reports resources
   695  			// for restartable init containers.
   696  			gomega.Expect(resourcesForOurPod.Containers).To(gomega.HaveLen(1))
   697  
   698  			gomega.Expect(resourcesForOurPod.Containers[0].Name).To(gomega.Equal(pod1.Spec.Containers[0].Name))
   699  
   700  			gomega.Expect(resourcesForOurPod.Containers[0].Devices).To(gomega.HaveLen(1))
   701  
   702  			gomega.Expect(resourcesForOurPod.Containers[0].Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName))
   703  
   704  			gomega.Expect(resourcesForOurPod.Containers[0].Devices[0].DeviceIds).To(gomega.HaveLen(1))
   705  		})
   706  	})
   707  }
   708  
   709  func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
   710  	f.Context("DevicePlugin", f.WithSerial(), f.WithDisruptive(), func() {
   711  		var devicePluginPod *v1.Pod
   712  		var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
   713  		var triggerPathFile, triggerPathDir string
   714  		var err error
   715  
   716  		ginkgo.BeforeEach(func(ctx context.Context) {
   717  			ginkgo.By("Wait for node to be ready")
   718  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   719  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   720  				framework.ExpectNoError(err)
   721  				return nodes == 1
   722  			}, time.Minute, time.Second).Should(gomega.BeTrue())
   723  
   724  			// Before we run the device plugin test, we need to ensure
   725  			// that the cluster is in a clean state and there are no
   726  			// pods running on this node.
   727  			// This is done in a gomega.Eventually with retries since a prior test in a different test suite could've run and the deletion of it's resources may still be in progress.
   728  			// xref: https://issue.k8s.io/115381
   729  			gomega.Eventually(ctx, func(ctx context.Context) error {
   730  				v1PodResources, err = getV1NodeDevices(ctx)
   731  				if err != nil {
   732  					return fmt.Errorf("failed to get node local podresources by accessing the (v1) podresources API endpoint: %v", err)
   733  				}
   734  
   735  				if len(v1PodResources.PodResources) > 0 {
   736  					return fmt.Errorf("expected v1 pod resources to be empty, but got non-empty resources: %+v", v1PodResources.PodResources)
   737  				}
   738  				return nil
   739  			}, f.Timeouts.SystemDaemonsetStartup, f.Timeouts.Poll).Should(gomega.Succeed())
   740  
   741  			ginkgo.By("Setting up the directory for controlling registration")
   742  			triggerPathDir = filepath.Join(devicePluginDir, "sample")
   743  			if _, err := os.Stat(triggerPathDir); err != nil {
   744  				if errors.Is(err, os.ErrNotExist) {
   745  					if err := os.Mkdir(triggerPathDir, os.ModePerm); err != nil {
   746  						framework.Fail(fmt.Sprintf("registration control directory %q creation failed: %v ", triggerPathDir, err))
   747  					}
   748  					framework.Logf("registration control directory created successfully")
   749  				} else {
   750  					framework.Fail(fmt.Sprintf("unexpected error checking %q: %v", triggerPathDir, err))
   751  				}
   752  			} else {
   753  				framework.Logf("registration control directory %q already present", triggerPathDir)
   754  			}
   755  
   756  			ginkgo.By("Setting up the file trigger for controlling registration")
   757  			triggerPathFile = filepath.Join(triggerPathDir, "registration")
   758  			if _, err := os.Stat(triggerPathFile); err != nil {
   759  				if errors.Is(err, os.ErrNotExist) {
   760  					if _, err = os.Create(triggerPathFile); err != nil {
   761  						framework.Fail(fmt.Sprintf("registration control file %q creation failed: %v", triggerPathFile, err))
   762  					}
   763  					framework.Logf("registration control file created successfully")
   764  				} else {
   765  					framework.Fail(fmt.Sprintf("unexpected error creating %q: %v", triggerPathFile, err))
   766  				}
   767  			} else {
   768  				framework.Logf("registration control file %q already present", triggerPathFile)
   769  			}
   770  
   771  			ginkgo.By("Scheduling a sample device plugin pod")
   772  			data, err := e2etestfiles.Read(SampleDevicePluginControlRegistrationDSYAML)
   773  			if err != nil {
   774  				framework.Fail(fmt.Sprintf("error reading test data %q: %v", SampleDevicePluginControlRegistrationDSYAML, err))
   775  			}
   776  			ds := readDaemonSetV1OrDie(data)
   777  
   778  			dp := &v1.Pod{
   779  				ObjectMeta: metav1.ObjectMeta{
   780  					Name: SampleDevicePluginName,
   781  				},
   782  				Spec: ds.Spec.Template.Spec,
   783  			}
   784  
   785  			devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp)
   786  
   787  			go func() {
   788  				// Since autoregistration is disabled for the device plugin (as REGISTER_CONTROL_FILE
   789  				// environment variable is specified), device plugin registration needs to be triggerred
   790  				// manually.
   791  				// This is done by deleting the control file at the following path:
   792  				// `/var/lib/kubelet/device-plugins/sample/registration`.
   793  
   794  				defer ginkgo.GinkgoRecover()
   795  				framework.Logf("Deleting the control file: %q to trigger registration", triggerPathFile)
   796  				err := os.Remove(triggerPathFile)
   797  				framework.ExpectNoError(err)
   798  			}()
   799  
   800  			ginkgo.By("Waiting for devices to become available on the local node")
   801  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   802  				node, ready := getLocalTestNode(ctx, f)
   803  				return ready && CountSampleDeviceCapacity(node) > 0
   804  			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
   805  			framework.Logf("Successfully created device plugin pod")
   806  
   807  			ginkgo.By(fmt.Sprintf("Waiting for the resource exported by the sample device plugin to become available on the local node (instances: %d)", expectedSampleDevsAmount))
   808  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   809  				node, ready := getLocalTestNode(ctx, f)
   810  				return ready &&
   811  					CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
   812  					CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
   813  			}, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
   814  		})
   815  
   816  		ginkgo.AfterEach(func(ctx context.Context) {
   817  			ginkgo.By("Deleting the device plugin pod")
   818  			e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, metav1.DeleteOptions{}, time.Minute)
   819  
   820  			ginkgo.By("Deleting any Pods created by the test")
   821  			l, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{})
   822  			framework.ExpectNoError(err)
   823  			for _, p := range l.Items {
   824  				if p.Namespace != f.Namespace.Name {
   825  					continue
   826  				}
   827  
   828  				framework.Logf("Deleting pod: %s", p.Name)
   829  				e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
   830  			}
   831  
   832  			err = os.Remove(triggerPathDir)
   833  			framework.ExpectNoError(err)
   834  
   835  			ginkgo.By("Waiting for devices to become unavailable on the local node")
   836  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   837  				node, ready := getLocalTestNode(ctx, f)
   838  				return ready && CountSampleDeviceCapacity(node) <= 0
   839  			}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
   840  
   841  			ginkgo.By("devices now unavailable on the local node")
   842  		})
   843  
   844  		// simulate node reboot scenario by removing pods using CRI before kubelet is started. In addition to that,
   845  		// intentionally a scenario is created where after node reboot, application pods requesting devices appear before the device plugin pod
   846  		// exposing those devices as resource has restarted. The expected behavior is that the application pod fails at admission time.
   847  		ginkgo.It("Keeps device plugin assignments across node reboots (no pod restart, no device plugin re-registration)", func(ctx context.Context) {
   848  			podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
   849  			pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
   850  			deviceIDRE := "stub devices: (Dev-[0-9]+)"
   851  			devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
   852  			framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
   853  
   854  			gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")))
   855  
   856  			pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
   857  			framework.ExpectNoError(err)
   858  
   859  			ginkgo.By("stopping the kubelet")
   860  			startKubelet := stopKubelet()
   861  
   862  			ginkgo.By("stopping all the local containers - using CRI")
   863  			rs, _, err := getCRIClient()
   864  			framework.ExpectNoError(err)
   865  			sandboxes, err := rs.ListPodSandbox(ctx, &runtimeapi.PodSandboxFilter{})
   866  			framework.ExpectNoError(err)
   867  			for _, sandbox := range sandboxes {
   868  				gomega.Expect(sandbox.Metadata).ToNot(gomega.BeNil())
   869  				ginkgo.By(fmt.Sprintf("deleting pod using CRI: %s/%s -> %s", sandbox.Metadata.Namespace, sandbox.Metadata.Name, sandbox.Id))
   870  
   871  				err := rs.RemovePodSandbox(ctx, sandbox.Id)
   872  				framework.ExpectNoError(err)
   873  			}
   874  
   875  			ginkgo.By("restarting the kubelet")
   876  			startKubelet()
   877  
   878  			ginkgo.By("Wait for node to be ready again")
   879  			e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
   880  
   881  			ginkgo.By("Waiting for the pod to fail with admission error as device plugin hasn't re-registered yet")
   882  			gomega.Eventually(ctx, getPod).
   883  				WithArguments(f, pod1.Name).
   884  				WithTimeout(time.Minute).
   885  				Should(HaveFailedWithAdmissionError(),
   886  					"the pod succeeded to start, when it should fail with the admission error")
   887  
   888  			// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
   889  			// note we don't check again the logs of the container: the check is done at startup, the container
   890  			// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
   891  			// is useless.
   892  			ginkgo.By("Verifying the device assignment after kubelet restart using podresources API")
   893  			gomega.Eventually(ctx, func() error {
   894  				v1PodResources, err = getV1NodeDevices(ctx)
   895  				return err
   896  			}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
   897  
   898  			err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
   899  			framework.ExpectNoError(err, "inconsistent device assignment after node reboot")
   900  
   901  		})
   902  	})
   903  }
   904  
   905  // makeBusyboxPod returns a simple Pod spec with a busybox container
   906  // that requests SampleDeviceResourceName and runs the specified command.
   907  func makeBusyboxPod(SampleDeviceResourceName, cmd string) *v1.Pod {
   908  	podName := "device-plugin-test-" + string(uuid.NewUUID())
   909  	rl := v1.ResourceList{v1.ResourceName(SampleDeviceResourceName): *resource.NewQuantity(1, resource.DecimalSI)}
   910  
   911  	return &v1.Pod{
   912  		ObjectMeta: metav1.ObjectMeta{Name: podName},
   913  		Spec: v1.PodSpec{
   914  			RestartPolicy: v1.RestartPolicyAlways,
   915  			Containers: []v1.Container{{
   916  				Image: busyboxImage,
   917  				Name:  podName,
   918  				// Runs the specified command in the test pod.
   919  				Command: []string{"sh", "-c", cmd},
   920  				Resources: v1.ResourceRequirements{
   921  					Limits:   rl,
   922  					Requests: rl,
   923  				},
   924  			}},
   925  		},
   926  	}
   927  }
   928  
   929  // ensurePodContainerRestart confirms that pod container has restarted at least once
   930  func ensurePodContainerRestart(ctx context.Context, f *framework.Framework, podName string, contName string) {
   931  	var initialCount int32
   932  	var currentCount int32
   933  	p, err := e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
   934  	if err != nil || len(p.Status.ContainerStatuses) < 1 {
   935  		framework.Failf("ensurePodContainerRestart failed for pod %q: %v", podName, err)
   936  	}
   937  	initialCount = p.Status.ContainerStatuses[0].RestartCount
   938  	gomega.Eventually(ctx, func() bool {
   939  		p, err = e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
   940  		if err != nil || len(p.Status.ContainerStatuses) < 1 {
   941  			return false
   942  		}
   943  		currentCount = p.Status.ContainerStatuses[0].RestartCount
   944  		framework.Logf("initial %v, current %v", initialCount, currentCount)
   945  		return currentCount > initialCount
   946  	}, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
   947  }
   948  
   949  // parseLog returns the matching string for the specified regular expression parsed from the container logs.
   950  func parseLog(ctx context.Context, f *framework.Framework, podName string, contName string, re string) (string, error) {
   951  	logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, podName, contName)
   952  	if err != nil {
   953  		return "", err
   954  	}
   955  
   956  	framework.Logf("got pod logs: %v", logs)
   957  	regex := regexp.MustCompile(re)
   958  	matches := regex.FindStringSubmatch(logs)
   959  	if len(matches) < 2 {
   960  		return "", fmt.Errorf("unexpected match in logs: %q", logs)
   961  	}
   962  
   963  	return matches[1], nil
   964  }
   965  
   966  func checkPodResourcesAssignment(v1PodRes *kubeletpodresourcesv1.ListPodResourcesResponse, podNamespace, podName, containerName, resourceName string, devs []string) (error, bool) {
   967  	for _, podRes := range v1PodRes.PodResources {
   968  		if podRes.Namespace != podNamespace || podRes.Name != podName {
   969  			continue
   970  		}
   971  		for _, contRes := range podRes.Containers {
   972  			if contRes.Name != containerName {
   973  				continue
   974  			}
   975  			return matchContainerDevices(podNamespace+"/"+podName+"/"+containerName, contRes.Devices, resourceName, devs)
   976  		}
   977  	}
   978  	err := fmt.Errorf("no resources found for %s/%s/%s", podNamespace, podName, containerName)
   979  	framework.Logf("%v", err)
   980  	return err, false
   981  }
   982  
   983  func matchContainerDevices(ident string, contDevs []*kubeletpodresourcesv1.ContainerDevices, resourceName string, devs []string) (error, bool) {
   984  	expected := sets.New[string](devs...)
   985  	assigned := sets.New[string]()
   986  	for _, contDev := range contDevs {
   987  		if contDev.ResourceName != resourceName {
   988  			continue
   989  		}
   990  		assigned = assigned.Insert(contDev.DeviceIds...)
   991  	}
   992  	expectedStr := strings.Join(expected.UnsortedList(), ",")
   993  	assignedStr := strings.Join(assigned.UnsortedList(), ",")
   994  	framework.Logf("%s: devices expected %q assigned %q", ident, expectedStr, assignedStr)
   995  	if !assigned.Equal(expected) {
   996  		return fmt.Errorf("device allocation mismatch for %s expected %s assigned %s", ident, expectedStr, assignedStr), true
   997  	}
   998  	return nil, true
   999  }
  1000  
  1001  // getSampleDevicePluginPod returns the Sample Device Plugin pod to be used e2e tests.
  1002  func getSampleDevicePluginPod(pluginSockDir string) *v1.Pod {
  1003  	data, err := e2etestfiles.Read(SampleDevicePluginDSYAML)
  1004  	if err != nil {
  1005  		framework.Fail(err.Error())
  1006  	}
  1007  
  1008  	ds := readDaemonSetV1OrDie(data)
  1009  	dp := &v1.Pod{
  1010  		ObjectMeta: metav1.ObjectMeta{
  1011  			Name: SampleDevicePluginName,
  1012  		},
  1013  		Spec: ds.Spec.Template.Spec,
  1014  	}
  1015  	for i := range dp.Spec.Containers[0].Env {
  1016  		if dp.Spec.Containers[0].Env[i].Name == SampleDeviceEnvVarNamePluginSockDir {
  1017  			dp.Spec.Containers[0].Env[i].Value = pluginSockDir
  1018  		}
  1019  	}
  1020  
  1021  	if utilfeature.DefaultFeatureGate.Enabled(features.DevicePluginCDIDevices) {
  1022  		dp.Spec.Containers[0].Env = append(dp.Spec.Containers[0].Env, v1.EnvVar{Name: "CDI_ENABLED", Value: "1"})
  1023  	}
  1024  
  1025  	return dp
  1026  }
  1027  
  1028  func BeTheSamePodStillRunning(expected *v1.Pod) types.GomegaMatcher {
  1029  	return gomega.And(
  1030  		BeTheSamePodAs(expected.UID),
  1031  		BeAPodInPhase(v1.PodRunning),
  1032  		BeAPodReady(),
  1033  	)
  1034  }
  1035  
  1036  // BeReady matches if the pod is reported ready
  1037  func BeAPodReady() types.GomegaMatcher {
  1038  	return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
  1039  		return podutils.IsPodReady(actual), nil
  1040  	}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} not ready yet")
  1041  }
  1042  
  1043  // BeAPodInPhase matches if the pod is running
  1044  func BeAPodInPhase(phase v1.PodPhase) types.GomegaMatcher {
  1045  	return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
  1046  		return actual.Status.Phase == phase, nil
  1047  	}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} failed {{.To}} be in phase {{.Data}} instead is in phase {{.Actual.Status.Phase}}").WithTemplateData(phase)
  1048  }
  1049  
  1050  // BeTheSamePodAs matches if the pod has the given UID
  1051  func BeTheSamePodAs(podUID k8stypes.UID) types.GomegaMatcher {
  1052  	return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
  1053  		return actual.UID == podUID, nil
  1054  	}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} expected UID {{.Data}} has UID instead {{.Actual.UID}}").WithTemplateData(podUID)
  1055  }