k8s.io/kubernetes@v1.29.3/test/e2e_node/dra_test.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 E2E Node test for DRA (Dynamic Resource Allocation) 19 This test covers node-specific aspects of DRA 20 The test can be run locally on Linux this way: 21 make test-e2e-node FOCUS='\[NodeAlphaFeature:DynamicResourceAllocation\]' SKIP='\[Flaky\]' PARALLELISM=1 \ 22 TEST_ARGS='--feature-gates="DynamicResourceAllocation=true" --service-feature-gates="DynamicResourceAllocation=true" --runtime-config=api/all=true' 23 */ 24 25 package e2enode 26 27 import ( 28 "context" 29 "os" 30 "path" 31 "path/filepath" 32 "time" 33 34 "github.com/onsi/ginkgo/v2" 35 "github.com/onsi/gomega" 36 37 v1 "k8s.io/api/core/v1" 38 resourcev1alpha2 "k8s.io/api/resource/v1alpha2" 39 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 40 "k8s.io/client-go/kubernetes" 41 "k8s.io/klog/v2" 42 dra "k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin" 43 admissionapi "k8s.io/pod-security-admission/api" 44 45 "k8s.io/kubernetes/test/e2e/feature" 46 "k8s.io/kubernetes/test/e2e/framework" 47 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 48 49 "k8s.io/dynamic-resource-allocation/kubeletplugin" 50 testdriver "k8s.io/kubernetes/test/e2e/dra/test-driver/app" 51 ) 52 53 const ( 54 driverName = "test-driver.cdi.k8s.io" 55 cdiDir = "/var/run/cdi" 56 endpoint = "/var/lib/kubelet/plugins/test-driver/dra.sock" 57 pluginRegistrationPath = "/var/lib/kubelet/plugins_registry" 58 draAddress = "/var/lib/kubelet/plugins/test-driver/dra.sock" 59 pluginRegistrationTimeout = time.Second * 60 // how long to wait for a node plugin to be registered 60 podInPendingStateTimeout = time.Second * 60 // how long to wait for a pod to stay in pending state 61 ) 62 63 var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation, "[NodeAlphaFeature:DynamicResourceAllocation]", func() { 64 f := framework.NewDefaultFramework("dra-node") 65 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 66 67 var kubeletPlugin *testdriver.ExamplePlugin 68 69 f.Context("Resource Kubelet Plugin", f.WithSerial(), func() { 70 ginkgo.BeforeEach(func(ctx context.Context) { 71 kubeletPlugin = newKubeletPlugin(getNodeName(ctx, f)) 72 }) 73 74 ginkgo.It("must register after Kubelet restart", func(ctx context.Context) { 75 oldCalls := kubeletPlugin.GetGRPCCalls() 76 getNewCalls := func() []testdriver.GRPCCall { 77 calls := kubeletPlugin.GetGRPCCalls() 78 return calls[len(oldCalls):] 79 } 80 81 ginkgo.By("restarting Kubelet") 82 restartKubelet(true) 83 84 ginkgo.By("wait for Kubelet plugin re-registration") 85 gomega.Eventually(getNewCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered) 86 }) 87 88 ginkgo.It("must register after plugin restart", func(ctx context.Context) { 89 ginkgo.By("restart Kubelet Plugin") 90 kubeletPlugin.Stop() 91 kubeletPlugin = newKubeletPlugin(getNodeName(ctx, f)) 92 93 ginkgo.By("wait for Kubelet plugin re-registration") 94 gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered) 95 }) 96 97 ginkgo.It("must process pod created when kubelet is not running", func(ctx context.Context) { 98 // Stop Kubelet 99 startKubelet := stopKubelet() 100 pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod") 101 // Pod must be in pending state 102 err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) { 103 return pod.Status.Phase == v1.PodPending, nil 104 }) 105 framework.ExpectNoError(err) 106 // Start Kubelet 107 startKubelet() 108 // Pod should succeed 109 err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout) 110 framework.ExpectNoError(err) 111 }) 112 113 ginkgo.It("must keep pod in pending state if NodePrepareResources times out", func(ctx context.Context) { 114 ginkgo.By("set delay for the NodePrepareResources call") 115 kubeletPlugin.Block() 116 pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod") 117 118 ginkgo.By("wait for pod to be in Pending state") 119 err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) { 120 return pod.Status.Phase == v1.PodPending, nil 121 }) 122 framework.ExpectNoError(err) 123 124 ginkgo.By("wait for NodePrepareResources call") 125 gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(dra.PluginClientTimeout * 2).Should(testdriver.NodePrepareResourcesCalled) 126 127 // TODO: Check condition or event when implemented 128 // see https://github.com/kubernetes/kubernetes/issues/118468 for details 129 ginkgo.By("check that pod is consistently in Pending state") 130 gomega.Consistently(ctx, e2epod.Get(f.ClientSet, pod)).WithTimeout(podInPendingStateTimeout).Should(e2epod.BeInPhase(v1.PodPending), 131 "Pod should be in Pending state as resource preparation time outed") 132 }) 133 }) 134 }) 135 136 // Run Kubelet plugin and wait until it's registered 137 func newKubeletPlugin(nodeName string) *testdriver.ExamplePlugin { 138 ginkgo.By("start Kubelet plugin") 139 logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", nodeName) 140 141 // Ensure that directories exist, creating them if necessary. We want 142 // to know early if there is a setup problem that would prevent 143 // creating those directories. 144 err := os.MkdirAll(cdiDir, os.FileMode(0750)) 145 framework.ExpectNoError(err, "create CDI directory") 146 err = os.MkdirAll(filepath.Dir(endpoint), 0750) 147 framework.ExpectNoError(err, "create socket directory") 148 149 plugin, err := testdriver.StartPlugin( 150 logger, 151 cdiDir, 152 driverName, 153 "", 154 testdriver.FileOperations{}, 155 kubeletplugin.PluginSocketPath(endpoint), 156 kubeletplugin.RegistrarSocketPath(path.Join(pluginRegistrationPath, driverName+"-reg.sock")), 157 kubeletplugin.KubeletPluginSocketPath(draAddress), 158 ) 159 framework.ExpectNoError(err) 160 161 gomega.Eventually(plugin.GetGRPCCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered) 162 163 ginkgo.DeferCleanup(plugin.Stop) 164 165 return plugin 166 } 167 168 // createTestObjects creates objects required by the test 169 // NOTE: as scheduler and controller manager are not running by the Node e2e, 170 // the objects must contain all required data to be processed correctly by the API server 171 // and placed on the node without involving the scheduler and the DRA controller 172 func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, nodename, namespace, className, claimName, podName string) *v1.Pod { 173 // ResourceClass 174 class := &resourcev1alpha2.ResourceClass{ 175 ObjectMeta: metav1.ObjectMeta{ 176 Name: className, 177 }, 178 DriverName: driverName, 179 } 180 _, err := clientSet.ResourceV1alpha2().ResourceClasses().Create(ctx, class, metav1.CreateOptions{}) 181 framework.ExpectNoError(err) 182 183 ginkgo.DeferCleanup(clientSet.ResourceV1alpha2().ResourceClasses().Delete, className, metav1.DeleteOptions{}) 184 185 // ResourceClaim 186 podClaimName := "resource-claim" 187 claim := &resourcev1alpha2.ResourceClaim{ 188 ObjectMeta: metav1.ObjectMeta{ 189 Name: claimName, 190 }, 191 Spec: resourcev1alpha2.ResourceClaimSpec{ 192 ResourceClassName: className, 193 }, 194 } 195 createdClaim, err := clientSet.ResourceV1alpha2().ResourceClaims(namespace).Create(ctx, claim, metav1.CreateOptions{}) 196 framework.ExpectNoError(err) 197 198 ginkgo.DeferCleanup(clientSet.ResourceV1alpha2().ResourceClaims(namespace).Delete, claimName, metav1.DeleteOptions{}) 199 200 // Pod 201 containerName := "testcontainer" 202 pod := &v1.Pod{ 203 ObjectMeta: metav1.ObjectMeta{ 204 Name: podName, 205 Namespace: namespace, 206 }, 207 Spec: v1.PodSpec{ 208 NodeName: nodename, // Assign the node as the scheduler is not running 209 ResourceClaims: []v1.PodResourceClaim{ 210 { 211 Name: podClaimName, 212 Source: v1.ClaimSource{ 213 ResourceClaimName: &claimName, 214 }, 215 }, 216 }, 217 Containers: []v1.Container{ 218 { 219 Name: containerName, 220 Image: e2epod.GetDefaultTestImage(), 221 Resources: v1.ResourceRequirements{ 222 Claims: []v1.ResourceClaim{{Name: podClaimName}}, 223 }, 224 Command: []string{"/bin/sh", "-c", "env | grep DRA_PARAM1=PARAM1_VALUE"}, 225 }, 226 }, 227 RestartPolicy: v1.RestartPolicyNever, 228 }, 229 } 230 createdPod, err := clientSet.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) 231 framework.ExpectNoError(err) 232 233 ginkgo.DeferCleanup(clientSet.CoreV1().Pods(namespace).Delete, podName, metav1.DeleteOptions{}) 234 235 // Update claim status: set ReservedFor and AllocationResult 236 // NOTE: This is usually done by the DRA controller 237 createdClaim.Status = resourcev1alpha2.ResourceClaimStatus{ 238 DriverName: driverName, 239 ReservedFor: []resourcev1alpha2.ResourceClaimConsumerReference{ 240 {Resource: "pods", Name: podName, UID: createdPod.UID}, 241 }, 242 Allocation: &resourcev1alpha2.AllocationResult{ 243 ResourceHandles: []resourcev1alpha2.ResourceHandle{ 244 { 245 DriverName: driverName, 246 Data: "{\"EnvVars\":{\"DRA_PARAM1\":\"PARAM1_VALUE\"},\"NodeName\":\"\"}", 247 }, 248 }, 249 }, 250 } 251 _, err = clientSet.ResourceV1alpha2().ResourceClaims(namespace).UpdateStatus(ctx, createdClaim, metav1.UpdateOptions{}) 252 framework.ExpectNoError(err) 253 254 return pod 255 }