k8s.io/kubernetes@v1.29.3/test/e2e/dra/deploy.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package dra 18 19 import ( 20 "bytes" 21 "context" 22 "errors" 23 "fmt" 24 "net" 25 "path" 26 "sort" 27 "sync" 28 "time" 29 30 "github.com/onsi/ginkgo/v2" 31 "github.com/onsi/gomega" 32 "google.golang.org/grpc" 33 34 appsv1 "k8s.io/api/apps/v1" 35 v1 "k8s.io/api/core/v1" 36 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 37 "k8s.io/apimachinery/pkg/labels" 38 "k8s.io/apimachinery/pkg/selection" 39 "k8s.io/dynamic-resource-allocation/kubeletplugin" 40 "k8s.io/klog/v2" 41 "k8s.io/kubernetes/test/e2e/dra/test-driver/app" 42 "k8s.io/kubernetes/test/e2e/framework" 43 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 44 e2ereplicaset "k8s.io/kubernetes/test/e2e/framework/replicaset" 45 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 46 "k8s.io/kubernetes/test/e2e/storage/drivers/proxy" 47 "k8s.io/kubernetes/test/e2e/storage/utils" 48 ) 49 50 const ( 51 NodePrepareResourceMethod = "/v1alpha2.Node/NodePrepareResource" 52 NodePrepareResourcesMethod = "/v1alpha3.Node/NodePrepareResources" 53 NodeUnprepareResourceMethod = "/v1alpha2.Node/NodeUnprepareResource" 54 NodeUnprepareResourcesMethod = "/v1alpha3.Node/NodeUnprepareResources" 55 ) 56 57 type Nodes struct { 58 NodeNames []string 59 } 60 61 // NewNodes selects nodes to run the test on. 62 func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes { 63 nodes := &Nodes{} 64 ginkgo.BeforeEach(func(ctx context.Context) { 65 ginkgo.By("selecting nodes") 66 // The kubelet plugin is harder. We deploy the builtin manifest 67 // after patching in the driver name and all nodes on which we 68 // want the plugin to run. 69 // 70 // Only a subset of the nodes are picked to avoid causing 71 // unnecessary load on a big cluster. 72 nodeList, err := e2enode.GetBoundedReadySchedulableNodes(ctx, f.ClientSet, maxNodes) 73 framework.ExpectNoError(err, "get nodes") 74 numNodes := int32(len(nodeList.Items)) 75 if int(numNodes) < minNodes { 76 e2eskipper.Skipf("%d ready nodes required, only have %d", minNodes, numNodes) 77 } 78 nodes.NodeNames = nil 79 for _, node := range nodeList.Items { 80 nodes.NodeNames = append(nodes.NodeNames, node.Name) 81 } 82 framework.Logf("testing on nodes %v", nodes.NodeNames) 83 }) 84 return nodes 85 } 86 87 // NewDriver sets up controller (as client of the cluster) and 88 // kubelet plugin (via proxy) before the test runs. It cleans 89 // up after the test. 90 func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources) *Driver { 91 d := &Driver{ 92 f: f, 93 fail: map[MethodInstance]bool{}, 94 callCounts: map[MethodInstance]int64{}, 95 NodeV1alpha2: true, 96 NodeV1alpha3: true, 97 } 98 99 ginkgo.BeforeEach(func() { 100 resources := configureResources() 101 if len(resources.Nodes) == 0 { 102 // This always has to be set because the driver might 103 // not run on all nodes. 104 resources.Nodes = nodes.NodeNames 105 } 106 d.SetUp(nodes, resources) 107 ginkgo.DeferCleanup(d.TearDown) 108 }) 109 return d 110 } 111 112 type MethodInstance struct { 113 Nodename string 114 FullMethod string 115 } 116 117 type Driver struct { 118 f *framework.Framework 119 ctx context.Context 120 cleanup []func() // executed first-in-first-out 121 wg sync.WaitGroup 122 123 NameSuffix string 124 Controller *app.ExampleController 125 Name string 126 Nodes map[string]*app.ExamplePlugin 127 128 NodeV1alpha2, NodeV1alpha3 bool 129 130 mutex sync.Mutex 131 fail map[MethodInstance]bool 132 callCounts map[MethodInstance]int64 133 } 134 135 func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) { 136 ginkgo.By(fmt.Sprintf("deploying driver on nodes %v", nodes.NodeNames)) 137 d.Nodes = map[string]*app.ExamplePlugin{} 138 d.Name = d.f.UniqueName + d.NameSuffix + ".k8s.io" 139 resources.DriverName = d.Name 140 141 ctx, cancel := context.WithCancel(context.Background()) 142 if d.NameSuffix != "" { 143 logger := klog.FromContext(ctx) 144 logger = klog.LoggerWithName(logger, "instance"+d.NameSuffix) 145 ctx = klog.NewContext(ctx, logger) 146 } 147 d.ctx = ctx 148 d.cleanup = append(d.cleanup, cancel) 149 150 // The controller is easy: we simply connect to the API server. 151 d.Controller = app.NewController(d.f.ClientSet, resources) 152 d.wg.Add(1) 153 go func() { 154 defer d.wg.Done() 155 d.Controller.Run(d.ctx, 5 /* workers */) 156 }() 157 158 manifests := []string{ 159 // The code below matches the content of this manifest (ports, 160 // container names, etc.). 161 "test/e2e/testing-manifests/dra/dra-test-driver-proxy.yaml", 162 } 163 instanceKey := "app.kubernetes.io/instance" 164 rsName := "" 165 draAddr := path.Join(framework.TestContext.KubeletRootDir, "plugins", d.Name+".sock") 166 numNodes := int32(len(nodes.NodeNames)) 167 err := utils.CreateFromManifests(ctx, d.f, d.f.Namespace, func(item interface{}) error { 168 switch item := item.(type) { 169 case *appsv1.ReplicaSet: 170 item.Name += d.NameSuffix 171 rsName = item.Name 172 item.Spec.Replicas = &numNodes 173 item.Spec.Selector.MatchLabels[instanceKey] = d.Name 174 item.Spec.Template.Labels[instanceKey] = d.Name 175 item.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution[0].LabelSelector.MatchLabels[instanceKey] = d.Name 176 item.Spec.Template.Spec.Affinity.NodeAffinity = &v1.NodeAffinity{ 177 RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ 178 NodeSelectorTerms: []v1.NodeSelectorTerm{ 179 { 180 MatchExpressions: []v1.NodeSelectorRequirement{ 181 { 182 Key: "kubernetes.io/hostname", 183 Operator: v1.NodeSelectorOpIn, 184 Values: nodes.NodeNames, 185 }, 186 }, 187 }, 188 }, 189 }, 190 } 191 item.Spec.Template.Spec.Volumes[0].HostPath.Path = path.Join(framework.TestContext.KubeletRootDir, "plugins") 192 item.Spec.Template.Spec.Volumes[2].HostPath.Path = path.Join(framework.TestContext.KubeletRootDir, "plugins_registry") 193 item.Spec.Template.Spec.Containers[0].Args = append(item.Spec.Template.Spec.Containers[0].Args, "--endpoint=/plugins_registry/"+d.Name+"-reg.sock") 194 item.Spec.Template.Spec.Containers[1].Args = append(item.Spec.Template.Spec.Containers[1].Args, "--endpoint=/dra/"+d.Name+".sock") 195 } 196 return nil 197 }, manifests...) 198 framework.ExpectNoError(err, "deploy kubelet plugin replicaset") 199 200 rs, err := d.f.ClientSet.AppsV1().ReplicaSets(d.f.Namespace.Name).Get(ctx, rsName, metav1.GetOptions{}) 201 framework.ExpectNoError(err, "get replicaset") 202 203 // Wait for all pods to be running. 204 if err := e2ereplicaset.WaitForReplicaSetTargetAvailableReplicas(ctx, d.f.ClientSet, rs, numNodes); err != nil { 205 framework.ExpectNoError(err, "all kubelet plugin proxies running") 206 } 207 requirement, err := labels.NewRequirement(instanceKey, selection.Equals, []string{d.Name}) 208 framework.ExpectNoError(err, "create label selector requirement") 209 selector := labels.NewSelector().Add(*requirement) 210 pods, err := d.f.ClientSet.CoreV1().Pods(d.f.Namespace.Name).List(ctx, metav1.ListOptions{LabelSelector: selector.String()}) 211 framework.ExpectNoError(err, "list proxy pods") 212 gomega.Expect(numNodes).To(gomega.Equal(int32(len(pods.Items))), "number of proxy pods") 213 214 // Run registar and plugin for each of the pods. 215 for _, pod := range pods.Items { 216 // Need a local variable, not the loop variable, for the anonymous 217 // callback functions below. 218 pod := pod 219 nodename := pod.Spec.NodeName 220 logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", pod.Spec.NodeName, "pod", klog.KObj(&pod)) 221 plugin, err := app.StartPlugin(logger, "/cdi", d.Name, nodename, 222 app.FileOperations{ 223 Create: func(name string, content []byte) error { 224 klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content)) 225 return d.createFile(&pod, name, content) 226 }, 227 Remove: func(name string) error { 228 klog.Background().Info("deleting CDI file", "node", nodename, "filename", name) 229 return d.removeFile(&pod, name) 230 }, 231 }, 232 kubeletplugin.GRPCVerbosity(0), 233 kubeletplugin.GRPCInterceptor(func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) { 234 return d.interceptor(nodename, ctx, req, info, handler) 235 }), 236 kubeletplugin.PluginListener(listen(ctx, d.f, pod.Name, "plugin", 9001)), 237 kubeletplugin.RegistrarListener(listen(ctx, d.f, pod.Name, "registrar", 9000)), 238 kubeletplugin.KubeletPluginSocketPath(draAddr), 239 kubeletplugin.NodeV1alpha2(d.NodeV1alpha2), 240 kubeletplugin.NodeV1alpha3(d.NodeV1alpha3), 241 ) 242 framework.ExpectNoError(err, "start kubelet plugin for node %s", pod.Spec.NodeName) 243 d.cleanup = append(d.cleanup, func() { 244 // Depends on cancel being called first. 245 plugin.Stop() 246 }) 247 d.Nodes[nodename] = plugin 248 } 249 250 // Wait for registration. 251 ginkgo.By("wait for plugin registration") 252 gomega.Eventually(func() map[string][]app.GRPCCall { 253 notRegistered := make(map[string][]app.GRPCCall) 254 for nodename, plugin := range d.Nodes { 255 calls := plugin.GetGRPCCalls() 256 if contains, err := app.BeRegistered.Match(calls); err != nil || !contains { 257 notRegistered[nodename] = calls 258 } 259 } 260 return notRegistered 261 }).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "hosts where the plugin has not been registered yet") 262 } 263 264 func (d *Driver) createFile(pod *v1.Pod, name string, content []byte) error { 265 buffer := bytes.NewBuffer(content) 266 // Writing the content can be slow. Better create a temporary file and 267 // move it to the final destination once it is complete. 268 tmpName := name + ".tmp" 269 if err := d.podIO(pod).CreateFile(tmpName, buffer); err != nil { 270 _ = d.podIO(pod).RemoveAll(tmpName) 271 return err 272 } 273 return d.podIO(pod).Rename(tmpName, name) 274 } 275 276 func (d *Driver) removeFile(pod *v1.Pod, name string) error { 277 return d.podIO(pod).RemoveAll(name) 278 } 279 280 func (d *Driver) podIO(pod *v1.Pod) proxy.PodDirIO { 281 logger := klog.Background() 282 return proxy.PodDirIO{ 283 F: d.f, 284 Namespace: pod.Namespace, 285 PodName: pod.Name, 286 ContainerName: "plugin", 287 Logger: &logger, 288 } 289 } 290 291 func listen(ctx context.Context, f *framework.Framework, podName, containerName string, port int) net.Listener { 292 addr := proxy.Addr{ 293 Namespace: f.Namespace.Name, 294 PodName: podName, 295 ContainerName: containerName, 296 Port: port, 297 } 298 listener, err := proxy.Listen(ctx, f.ClientSet, f.ClientConfig(), addr) 299 framework.ExpectNoError(err, "listen for connections from %+v", addr) 300 return listener 301 } 302 303 func (d *Driver) TearDown() { 304 for _, c := range d.cleanup { 305 c() 306 } 307 d.cleanup = nil 308 d.wg.Wait() 309 } 310 311 func (d *Driver) interceptor(nodename string, ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) { 312 d.mutex.Lock() 313 defer d.mutex.Unlock() 314 315 m := MethodInstance{nodename, info.FullMethod} 316 d.callCounts[m]++ 317 if d.fail[m] { 318 return nil, errors.New("injected error") 319 } 320 321 return handler(ctx, req) 322 } 323 324 func (d *Driver) Fail(m MethodInstance, injectError bool) { 325 d.mutex.Lock() 326 defer d.mutex.Unlock() 327 328 d.fail[m] = injectError 329 } 330 331 func (d *Driver) CallCount(m MethodInstance) int64 { 332 d.mutex.Lock() 333 defer d.mutex.Unlock() 334 335 return d.callCounts[m] 336 } 337 338 func (d *Driver) Nodenames() (nodenames []string) { 339 for nodename := range d.Nodes { 340 nodenames = append(nodenames, nodename) 341 } 342 sort.Strings(nodenames) 343 return 344 }