k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/device_manager_test.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"strings"
    26  	"time"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/util/uuid"
    32  	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
    33  	"k8s.io/klog/v2"
    34  	admissionapi "k8s.io/pod-security-admission/api"
    35  
    36  	"k8s.io/kubernetes/test/e2e/feature"
    37  	"k8s.io/kubernetes/test/e2e/framework"
    38  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    39  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    40  	e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
    41  	"k8s.io/kubernetes/test/e2e/nodefeature"
    42  	testutils "k8s.io/kubernetes/test/utils"
    43  
    44  	"github.com/onsi/ginkgo/v2"
    45  	"github.com/onsi/gomega"
    46  	"github.com/onsi/gomega/gcustom"
    47  	"github.com/onsi/gomega/types"
    48  )
    49  
    50  const (
    51  	devicePluginDir = "/var/lib/kubelet/device-plugins"
    52  )
    53  
    54  // Serial because the test updates kubelet configuration.
    55  var _ = SIGDescribe("Device Manager", framework.WithSerial(), feature.DeviceManager, nodefeature.DeviceManager, func() {
    56  	f := framework.NewDefaultFramework("devicemanager-test")
    57  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    58  
    59  	/*
    60  		This end to end test is to simulate a scenario where after kubelet restart/node
    61  		reboot application pods requesting devices appear before the device plugin
    62  		pod exposing those devices as resources.
    63  
    64  		The happy path is where after node reboot/ kubelet restart, the device plugin pod
    65  		appears before the application pod. This PR and this e2e test
    66  		aims to tackle the scenario where device plugin either does not appear first
    67  		or doesn't get the chance to re-register itself.
    68  
    69  		Since there is no way of controlling the order in which the pods appear after
    70  		kubelet restart/node reboot, we can't guarantee that the application pod
    71  		recovers before device plugin pod (the scenario we want to exercise here).
    72  		If the device plugin pod is recovered before the test pod, we still can
    73  		meaningfully reproduce the scenario by NOT sending the registration command.
    74  		To do so sample device plugin is enhanced. For implementation details, refer to:
    75  		`test/images/sample-device-plugin/sampledeviceplugin.go`. This enhancement
    76  		allows auto-registration of the plugin to be controlled with the help of an environment
    77  		variable: REGISTER_CONTROL_FILE. By default this environment variable is not present
    78  		and the device plugin autoregisters to kubelet. For this e2e test, we use sample device
    79  		plugin spec with REGISTER_CONTROL_FILE=/var/lib/kubelet/device-plugins/sample/registration
    80  		to allow manual registeration of the plugin to allow an application pod (requesting devices)
    81  		to successfully run on the node followed by kubelet restart where device plugin doesn't
    82  		register and the application pod fails with admission error.
    83  
    84  		   Breakdown of the steps implemented as part of this e2e test is as follows:
    85  		   1. Create a file `registration` at path `/var/lib/kubelet/device-plugins/sample/`
    86  		   2. Create sample device plugin with an environment variable with
    87  		      `REGISTER_CONTROL_FILE=/var/lib/kubelet/device-plugins/sample/registration` that
    88  			  waits for a client to delete the control file.
    89  		   3. Trigger plugin registeration by deleting the abovementioned directory.
    90  		   4. Create a test pod requesting devices exposed by the device plugin.
    91  		   5. Stop kubelet.
    92  		   6. Remove pods using CRI to ensure new pods are created after kubelet restart.
    93  		   7. Restart kubelet.
    94  		   8. Wait for the sample device plugin pod to be running. In this case,
    95  		      the registration is not triggered.
    96  		   9. Ensure that resource capacity/allocatable exported by the device plugin is zero.
    97  		   10. The test pod should fail with `UnexpectedAdmissionError`
    98  		   11. Delete the test pod.
    99  		   12. Delete the sample device plugin pod.
   100  		   13. Remove `/var/lib/kubelet/device-plugins/sample/` and its content, the directory created to control registration
   101  	*/
   102  	f.Context("With sample device plugin", f.WithSerial(), f.WithDisruptive(), func() {
   103  		var deviceCount int = 2
   104  		var devicePluginPod *v1.Pod
   105  		var triggerPathFile, triggerPathDir string
   106  
   107  		// this test wants to reproduce what happened in https://github.com/kubernetes/kubernetes/issues/109595
   108  		ginkgo.BeforeEach(func(ctx context.Context) {
   109  			ginkgo.By("Wait for node to be ready")
   110  			gomega.Eventually(ctx, e2enode.TotalReady).
   111  				WithArguments(f.ClientSet).
   112  				WithTimeout(time.Minute).
   113  				Should(gomega.BeEquivalentTo(1))
   114  
   115  			ginkgo.By("Setting up the directory and file for controlling registration")
   116  			triggerPathDir = filepath.Join(devicePluginDir, "sample")
   117  			if _, err := os.Stat(triggerPathDir); errors.Is(err, os.ErrNotExist) {
   118  				err := os.Mkdir(triggerPathDir, os.ModePerm)
   119  				if err != nil {
   120  					klog.Errorf("Directory creation %s failed: %v ", triggerPathDir, err)
   121  					panic(err)
   122  				}
   123  				klog.InfoS("Directory created successfully")
   124  
   125  				triggerPathFile = filepath.Join(triggerPathDir, "registration")
   126  				if _, err := os.Stat(triggerPathFile); errors.Is(err, os.ErrNotExist) {
   127  					_, err = os.Create(triggerPathFile)
   128  					if err != nil {
   129  						klog.Errorf("File creation %s failed: %v ", triggerPathFile, err)
   130  						panic(err)
   131  					}
   132  				}
   133  			}
   134  
   135  			ginkgo.By("Scheduling a sample device plugin pod")
   136  			data, err := e2etestfiles.Read(SampleDevicePluginControlRegistrationDSYAML)
   137  			if err != nil {
   138  				framework.Fail(err.Error())
   139  			}
   140  			ds := readDaemonSetV1OrDie(data)
   141  
   142  			dp := &v1.Pod{
   143  				ObjectMeta: metav1.ObjectMeta{
   144  					Name: SampleDevicePluginName,
   145  				},
   146  				Spec: ds.Spec.Template.Spec,
   147  			}
   148  
   149  			devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp)
   150  
   151  			go func() {
   152  				// Since autoregistration is disabled for the device plugin (as REGISTER_CONTROL_FILE
   153  				// environment variable is specified), device plugin registration needs to be triggerred
   154  				// manually.
   155  				// This is done by deleting the control file at the following path:
   156  				// `/var/lib/kubelet/device-plugins/sample/registration`.
   157  
   158  				defer ginkgo.GinkgoRecover()
   159  				framework.Logf("Deleting the control file: %q to trigger registration", triggerPathFile)
   160  				err := os.Remove(triggerPathFile)
   161  				framework.ExpectNoError(err)
   162  			}()
   163  
   164  			ginkgo.By("Waiting for devices to become available on the local node")
   165  
   166  			gomega.Eventually(ctx, isNodeReadyWithSampleResources).
   167  				WithArguments(f).
   168  				WithTimeout(5 * time.Minute).
   169  				Should(BeReady())
   170  
   171  			framework.Logf("Successfully created device plugin pod")
   172  
   173  			devsLen := int64(deviceCount) // shortcut
   174  			ginkgo.By("Waiting for the resource exported by the sample device plugin to become available on the local node")
   175  
   176  			gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
   177  				WithArguments(f, devsLen).
   178  				WithTimeout(5 * time.Minute).
   179  				Should(HaveAllocatableDevices())
   180  		})
   181  
   182  		framework.It("should deploy pod consuming devices first but fail with admission error after kubelet restart in case device plugin hasn't re-registered", framework.WithFlaky(), func(ctx context.Context) {
   183  			var err error
   184  			podCMD := "while true; do sleep 1000; done;"
   185  
   186  			ginkgo.By(fmt.Sprintf("creating a pods requiring %d %q", deviceCount, SampleDeviceResourceName))
   187  
   188  			pod := makeBusyboxDeviceRequiringPod(SampleDeviceResourceName, podCMD)
   189  			testPod := e2epod.NewPodClient(f).CreateSync(ctx, pod)
   190  
   191  			ginkgo.By("making sure all the pods are ready")
   192  
   193  			err = e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
   194  			framework.ExpectNoError(err, "pod %s/%s did not go running", testPod.Namespace, testPod.Name)
   195  			framework.Logf("pod %s/%s running", testPod.Namespace, testPod.Name)
   196  
   197  			ginkgo.By("stopping the kubelet")
   198  			startKubelet := stopKubelet()
   199  
   200  			ginkgo.By("stopping all the local containers - using CRI")
   201  			rs, _, err := getCRIClient()
   202  			framework.ExpectNoError(err)
   203  			sandboxes, err := rs.ListPodSandbox(ctx, &runtimeapi.PodSandboxFilter{})
   204  			framework.ExpectNoError(err)
   205  			for _, sandbox := range sandboxes {
   206  				gomega.Expect(sandbox.Metadata).ToNot(gomega.BeNil())
   207  				ginkgo.By(fmt.Sprintf("deleting pod using CRI: %s/%s -> %s", sandbox.Metadata.Namespace, sandbox.Metadata.Name, sandbox.Id))
   208  
   209  				err := rs.RemovePodSandbox(ctx, sandbox.Id)
   210  				framework.ExpectNoError(err)
   211  			}
   212  
   213  			ginkgo.By("restarting the kubelet")
   214  			startKubelet()
   215  
   216  			ginkgo.By("waiting for the kubelet to be ready again")
   217  			// Wait for the Kubelet to be ready.
   218  
   219  			gomega.Eventually(ctx, e2enode.TotalReady).
   220  				WithArguments(f.ClientSet).
   221  				WithTimeout(2 * time.Minute).
   222  				Should(gomega.BeEquivalentTo(1))
   223  
   224  			ginkgo.By("making sure all the pods are ready after the recovery")
   225  
   226  			var devicePluginPodAfterRestart *v1.Pod
   227  
   228  			devicePluginPodAfterRestart, err = e2epod.NewPodClient(f).Get(ctx, devicePluginPod.Name, metav1.GetOptions{})
   229  			framework.ExpectNoError(err)
   230  
   231  			err = e2epod.WaitForPodCondition(ctx, f.ClientSet, devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
   232  			framework.ExpectNoError(err, "pod %s/%s did not go running", devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name)
   233  			framework.Logf("pod %s/%s running", devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name)
   234  
   235  			ginkgo.By("Waiting for the resource capacity/allocatable exported by the sample device plugin to become zero")
   236  
   237  			// The device plugin pod has restarted but has not re-registered to kubelet (as AUTO_REGISTER= false)
   238  			// and registration wasn't triggered manually (by writing to the unix socket exposed at
   239  			// `/var/lib/kubelet/device-plugins/registered`). Because of this, the capacity and allocatable corresponding
   240  			// to the resource exposed by the device plugin should be zero.
   241  
   242  			gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
   243  				WithArguments(f, int64(0)).
   244  				WithTimeout(5 * time.Minute).
   245  				Should(HaveAllocatableDevices())
   246  
   247  			ginkgo.By("Checking that pod requesting devices failed to start because of admission error")
   248  
   249  			// NOTE: The device plugin won't re-register again and this is intentional.
   250  			// Because of this, the testpod (requesting a device) should fail with an admission error.
   251  
   252  			gomega.Eventually(ctx, getPod).
   253  				WithArguments(f, testPod.Name).
   254  				WithTimeout(time.Minute).
   255  				Should(HaveFailedWithAdmissionError(),
   256  					"the pod succeeded to start, when it should fail with the admission error")
   257  
   258  			ginkgo.By("removing application pods")
   259  			e2epod.NewPodClient(f).DeleteSync(ctx, testPod.Name, metav1.DeleteOptions{}, 2*time.Minute)
   260  		})
   261  
   262  		ginkgo.AfterEach(func(ctx context.Context) {
   263  			ginkgo.By("Deleting the device plugin pod")
   264  			e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, metav1.DeleteOptions{}, time.Minute)
   265  
   266  			ginkgo.By("Deleting the directory and file setup for controlling registration")
   267  			err := os.RemoveAll(triggerPathDir)
   268  			framework.ExpectNoError(err)
   269  
   270  			ginkgo.By("Deleting any Pods created by the test")
   271  			l, err := e2epod.NewPodClient(f).List(context.TODO(), metav1.ListOptions{})
   272  			framework.ExpectNoError(err)
   273  			for _, p := range l.Items {
   274  				if p.Namespace != f.Namespace.Name {
   275  					continue
   276  				}
   277  
   278  				framework.Logf("Deleting pod: %s", p.Name)
   279  				e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
   280  			}
   281  
   282  			ginkgo.By("Waiting for devices to become unavailable on the local node")
   283  			gomega.Eventually(ctx, isNodeReadyWithoutSampleResources).
   284  				WithArguments(f).
   285  				WithTimeout(5 * time.Minute).
   286  				Should(BeReady())
   287  		})
   288  
   289  	})
   290  
   291  })
   292  
   293  func makeBusyboxDeviceRequiringPod(resourceName, cmd string) *v1.Pod {
   294  	podName := "device-manager-test-" + string(uuid.NewUUID())
   295  	rl := v1.ResourceList{
   296  		v1.ResourceName(resourceName): *resource.NewQuantity(2, resource.DecimalSI),
   297  	}
   298  	return &v1.Pod{
   299  		ObjectMeta: metav1.ObjectMeta{
   300  			Name: podName,
   301  		},
   302  		Spec: v1.PodSpec{
   303  			RestartPolicy: v1.RestartPolicyNever,
   304  			Containers: []v1.Container{{
   305  				Image: busyboxImage,
   306  				Name:  podName,
   307  				// Runs the specified command in the test pod.
   308  				Command: []string{"sh", "-c", cmd},
   309  				Resources: v1.ResourceRequirements{
   310  					Limits:   rl,
   311  					Requests: rl,
   312  				},
   313  			}},
   314  		},
   315  	}
   316  }
   317  
   318  // BeReady verifies that a node is ready and devices have registered.
   319  func BeReady() types.GomegaMatcher {
   320  	return gomega.And(
   321  		// This additional matcher checks for the final error condition.
   322  		gcustom.MakeMatcher(func(ready bool) (bool, error) {
   323  			if !ready {
   324  				return false, fmt.Errorf("expected node to be ready=%t", ready)
   325  			}
   326  			return true, nil
   327  		}),
   328  		BeInReadyPhase(true),
   329  	)
   330  }
   331  
   332  // BeInReadyPhase matches if node is ready i.e. ready is true.
   333  func BeInReadyPhase(isReady bool) types.GomegaMatcher {
   334  	return gcustom.MakeMatcher(func(ready bool) (bool, error) {
   335  		return ready == isReady, nil
   336  	}).WithTemplate("expected Node Ready {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(isReady)
   337  }
   338  
   339  func isNodeReadyWithSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
   340  	node, ready := getLocalTestNode(ctx, f)
   341  	if !ready {
   342  		return false, fmt.Errorf("expected node to be ready=%t", ready)
   343  	}
   344  
   345  	if CountSampleDeviceCapacity(node) <= 0 {
   346  		return false, fmt.Errorf("expected devices to be advertised")
   347  	}
   348  	return true, nil
   349  }
   350  
   351  // HaveAllocatableDevices verifies that a node has allocatable devices.
   352  func HaveAllocatableDevices() types.GomegaMatcher {
   353  	return gomega.And(
   354  		// This additional matcher checks for the final error condition.
   355  		gcustom.MakeMatcher(func(hasAllocatable bool) (bool, error) {
   356  			if !hasAllocatable {
   357  				return false, fmt.Errorf("expected node to be have allocatable devices=%t", hasAllocatable)
   358  			}
   359  			return true, nil
   360  		}),
   361  		hasAllocatable(true),
   362  	)
   363  }
   364  
   365  // hasAllocatable matches if node is ready i.e. ready is true.
   366  func hasAllocatable(hasAllocatable bool) types.GomegaMatcher {
   367  	return gcustom.MakeMatcher(func(hasAllocatableDevices bool) (bool, error) {
   368  		return hasAllocatableDevices == hasAllocatable, nil
   369  	}).WithTemplate("expected Node with allocatable {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasAllocatable)
   370  }
   371  
   372  func isNodeReadyWithAllocatableSampleResources(ctx context.Context, f *framework.Framework, devCount int64) (bool, error) {
   373  	node, ready := getLocalTestNode(ctx, f)
   374  	if !ready {
   375  		return false, fmt.Errorf("expected node to be ready=%t", ready)
   376  	}
   377  
   378  	if CountSampleDeviceCapacity(node) != devCount {
   379  		return false, fmt.Errorf("expected devices capacity to be: %d", devCount)
   380  	}
   381  
   382  	if CountSampleDeviceAllocatable(node) != devCount {
   383  		return false, fmt.Errorf("expected devices allocatable to be: %d", devCount)
   384  	}
   385  	return true, nil
   386  }
   387  
   388  func isNodeReadyWithoutSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
   389  	node, ready := getLocalTestNode(ctx, f)
   390  	if !ready {
   391  		return false, fmt.Errorf("expected node to be ready=%t", ready)
   392  	}
   393  
   394  	if CountSampleDeviceCapacity(node) > 0 {
   395  		return false, fmt.Errorf("expected devices to be not present")
   396  	}
   397  	return true, nil
   398  }
   399  
   400  // HaveFailedWithAdmissionError verifies that a pod fails at admission.
   401  func HaveFailedWithAdmissionError() types.GomegaMatcher {
   402  	return gomega.And(
   403  		gcustom.MakeMatcher(func(hasFailed bool) (bool, error) {
   404  			if !hasFailed {
   405  				return false, fmt.Errorf("expected pod to have failed=%t", hasFailed)
   406  			}
   407  			return true, nil
   408  		}),
   409  		hasFailed(true),
   410  	)
   411  }
   412  
   413  // hasFailed matches if pod has failed.
   414  func hasFailed(hasFailed bool) types.GomegaMatcher {
   415  	return gcustom.MakeMatcher(func(hasPodFailed bool) (bool, error) {
   416  		return hasPodFailed == hasFailed, nil
   417  	}).WithTemplate("expected Pod failed {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasFailed)
   418  }
   419  
   420  func getPodByName(ctx context.Context, f *framework.Framework, podName string) (*v1.Pod, error) {
   421  	return e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
   422  }
   423  
   424  func getPod(ctx context.Context, f *framework.Framework, podName string) (bool, error) {
   425  	pod, err := getPodByName(ctx, f, podName)
   426  	if err != nil {
   427  		return false, err
   428  	}
   429  
   430  	expectedStatusReason := "UnexpectedAdmissionError"
   431  	expectedStatusMessage := "Allocate failed due to no healthy devices present; cannot allocate unhealthy devices"
   432  
   433  	// This additional matcher checks for the final error condition.
   434  	if pod.Status.Phase != v1.PodFailed {
   435  		return false, fmt.Errorf("expected pod to reach phase %q, got final phase %q instead.", v1.PodFailed, pod.Status.Phase)
   436  	}
   437  	if pod.Status.Reason != expectedStatusReason {
   438  		return false, fmt.Errorf("expected pod status reason to be %q, got %q instead.", expectedStatusReason, pod.Status.Reason)
   439  	}
   440  	if !strings.Contains(pod.Status.Message, expectedStatusMessage) {
   441  		return false, fmt.Errorf("expected pod status reason to contain %q, got %q instead.", expectedStatusMessage, pod.Status.Message)
   442  	}
   443  	return true, nil
   444  }