k8s.io/kubernetes@v1.29.3/test/e2e/dra/deploy.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package dra
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"net"
    25  	"path"
    26  	"sort"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/onsi/ginkgo/v2"
    31  	"github.com/onsi/gomega"
    32  	"google.golang.org/grpc"
    33  
    34  	appsv1 "k8s.io/api/apps/v1"
    35  	v1 "k8s.io/api/core/v1"
    36  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    37  	"k8s.io/apimachinery/pkg/labels"
    38  	"k8s.io/apimachinery/pkg/selection"
    39  	"k8s.io/dynamic-resource-allocation/kubeletplugin"
    40  	"k8s.io/klog/v2"
    41  	"k8s.io/kubernetes/test/e2e/dra/test-driver/app"
    42  	"k8s.io/kubernetes/test/e2e/framework"
    43  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    44  	e2ereplicaset "k8s.io/kubernetes/test/e2e/framework/replicaset"
    45  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    46  	"k8s.io/kubernetes/test/e2e/storage/drivers/proxy"
    47  	"k8s.io/kubernetes/test/e2e/storage/utils"
    48  )
    49  
    50  const (
    51  	NodePrepareResourceMethod    = "/v1alpha2.Node/NodePrepareResource"
    52  	NodePrepareResourcesMethod   = "/v1alpha3.Node/NodePrepareResources"
    53  	NodeUnprepareResourceMethod  = "/v1alpha2.Node/NodeUnprepareResource"
    54  	NodeUnprepareResourcesMethod = "/v1alpha3.Node/NodeUnprepareResources"
    55  )
    56  
    57  type Nodes struct {
    58  	NodeNames []string
    59  }
    60  
    61  // NewNodes selects nodes to run the test on.
    62  func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes {
    63  	nodes := &Nodes{}
    64  	ginkgo.BeforeEach(func(ctx context.Context) {
    65  		ginkgo.By("selecting nodes")
    66  		// The kubelet plugin is harder. We deploy the builtin manifest
    67  		// after patching in the driver name and all nodes on which we
    68  		// want the plugin to run.
    69  		//
    70  		// Only a subset of the nodes are picked to avoid causing
    71  		// unnecessary load on a big cluster.
    72  		nodeList, err := e2enode.GetBoundedReadySchedulableNodes(ctx, f.ClientSet, maxNodes)
    73  		framework.ExpectNoError(err, "get nodes")
    74  		numNodes := int32(len(nodeList.Items))
    75  		if int(numNodes) < minNodes {
    76  			e2eskipper.Skipf("%d ready nodes required, only have %d", minNodes, numNodes)
    77  		}
    78  		nodes.NodeNames = nil
    79  		for _, node := range nodeList.Items {
    80  			nodes.NodeNames = append(nodes.NodeNames, node.Name)
    81  		}
    82  		framework.Logf("testing on nodes %v", nodes.NodeNames)
    83  	})
    84  	return nodes
    85  }
    86  
    87  // NewDriver sets up controller (as client of the cluster) and
    88  // kubelet plugin (via proxy) before the test runs. It cleans
    89  // up after the test.
    90  func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources) *Driver {
    91  	d := &Driver{
    92  		f:            f,
    93  		fail:         map[MethodInstance]bool{},
    94  		callCounts:   map[MethodInstance]int64{},
    95  		NodeV1alpha2: true,
    96  		NodeV1alpha3: true,
    97  	}
    98  
    99  	ginkgo.BeforeEach(func() {
   100  		resources := configureResources()
   101  		if len(resources.Nodes) == 0 {
   102  			// This always has to be set because the driver might
   103  			// not run on all nodes.
   104  			resources.Nodes = nodes.NodeNames
   105  		}
   106  		d.SetUp(nodes, resources)
   107  		ginkgo.DeferCleanup(d.TearDown)
   108  	})
   109  	return d
   110  }
   111  
   112  type MethodInstance struct {
   113  	Nodename   string
   114  	FullMethod string
   115  }
   116  
   117  type Driver struct {
   118  	f       *framework.Framework
   119  	ctx     context.Context
   120  	cleanup []func() // executed first-in-first-out
   121  	wg      sync.WaitGroup
   122  
   123  	NameSuffix string
   124  	Controller *app.ExampleController
   125  	Name       string
   126  	Nodes      map[string]*app.ExamplePlugin
   127  
   128  	NodeV1alpha2, NodeV1alpha3 bool
   129  
   130  	mutex      sync.Mutex
   131  	fail       map[MethodInstance]bool
   132  	callCounts map[MethodInstance]int64
   133  }
   134  
   135  func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
   136  	ginkgo.By(fmt.Sprintf("deploying driver on nodes %v", nodes.NodeNames))
   137  	d.Nodes = map[string]*app.ExamplePlugin{}
   138  	d.Name = d.f.UniqueName + d.NameSuffix + ".k8s.io"
   139  	resources.DriverName = d.Name
   140  
   141  	ctx, cancel := context.WithCancel(context.Background())
   142  	if d.NameSuffix != "" {
   143  		logger := klog.FromContext(ctx)
   144  		logger = klog.LoggerWithName(logger, "instance"+d.NameSuffix)
   145  		ctx = klog.NewContext(ctx, logger)
   146  	}
   147  	d.ctx = ctx
   148  	d.cleanup = append(d.cleanup, cancel)
   149  
   150  	// The controller is easy: we simply connect to the API server.
   151  	d.Controller = app.NewController(d.f.ClientSet, resources)
   152  	d.wg.Add(1)
   153  	go func() {
   154  		defer d.wg.Done()
   155  		d.Controller.Run(d.ctx, 5 /* workers */)
   156  	}()
   157  
   158  	manifests := []string{
   159  		// The code below matches the content of this manifest (ports,
   160  		// container names, etc.).
   161  		"test/e2e/testing-manifests/dra/dra-test-driver-proxy.yaml",
   162  	}
   163  	instanceKey := "app.kubernetes.io/instance"
   164  	rsName := ""
   165  	draAddr := path.Join(framework.TestContext.KubeletRootDir, "plugins", d.Name+".sock")
   166  	numNodes := int32(len(nodes.NodeNames))
   167  	err := utils.CreateFromManifests(ctx, d.f, d.f.Namespace, func(item interface{}) error {
   168  		switch item := item.(type) {
   169  		case *appsv1.ReplicaSet:
   170  			item.Name += d.NameSuffix
   171  			rsName = item.Name
   172  			item.Spec.Replicas = &numNodes
   173  			item.Spec.Selector.MatchLabels[instanceKey] = d.Name
   174  			item.Spec.Template.Labels[instanceKey] = d.Name
   175  			item.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution[0].LabelSelector.MatchLabels[instanceKey] = d.Name
   176  			item.Spec.Template.Spec.Affinity.NodeAffinity = &v1.NodeAffinity{
   177  				RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   178  					NodeSelectorTerms: []v1.NodeSelectorTerm{
   179  						{
   180  							MatchExpressions: []v1.NodeSelectorRequirement{
   181  								{
   182  									Key:      "kubernetes.io/hostname",
   183  									Operator: v1.NodeSelectorOpIn,
   184  									Values:   nodes.NodeNames,
   185  								},
   186  							},
   187  						},
   188  					},
   189  				},
   190  			}
   191  			item.Spec.Template.Spec.Volumes[0].HostPath.Path = path.Join(framework.TestContext.KubeletRootDir, "plugins")
   192  			item.Spec.Template.Spec.Volumes[2].HostPath.Path = path.Join(framework.TestContext.KubeletRootDir, "plugins_registry")
   193  			item.Spec.Template.Spec.Containers[0].Args = append(item.Spec.Template.Spec.Containers[0].Args, "--endpoint=/plugins_registry/"+d.Name+"-reg.sock")
   194  			item.Spec.Template.Spec.Containers[1].Args = append(item.Spec.Template.Spec.Containers[1].Args, "--endpoint=/dra/"+d.Name+".sock")
   195  		}
   196  		return nil
   197  	}, manifests...)
   198  	framework.ExpectNoError(err, "deploy kubelet plugin replicaset")
   199  
   200  	rs, err := d.f.ClientSet.AppsV1().ReplicaSets(d.f.Namespace.Name).Get(ctx, rsName, metav1.GetOptions{})
   201  	framework.ExpectNoError(err, "get replicaset")
   202  
   203  	// Wait for all pods to be running.
   204  	if err := e2ereplicaset.WaitForReplicaSetTargetAvailableReplicas(ctx, d.f.ClientSet, rs, numNodes); err != nil {
   205  		framework.ExpectNoError(err, "all kubelet plugin proxies running")
   206  	}
   207  	requirement, err := labels.NewRequirement(instanceKey, selection.Equals, []string{d.Name})
   208  	framework.ExpectNoError(err, "create label selector requirement")
   209  	selector := labels.NewSelector().Add(*requirement)
   210  	pods, err := d.f.ClientSet.CoreV1().Pods(d.f.Namespace.Name).List(ctx, metav1.ListOptions{LabelSelector: selector.String()})
   211  	framework.ExpectNoError(err, "list proxy pods")
   212  	gomega.Expect(numNodes).To(gomega.Equal(int32(len(pods.Items))), "number of proxy pods")
   213  
   214  	// Run registar and plugin for each of the pods.
   215  	for _, pod := range pods.Items {
   216  		// Need a local variable, not the loop variable, for the anonymous
   217  		// callback functions below.
   218  		pod := pod
   219  		nodename := pod.Spec.NodeName
   220  		logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", pod.Spec.NodeName, "pod", klog.KObj(&pod))
   221  		plugin, err := app.StartPlugin(logger, "/cdi", d.Name, nodename,
   222  			app.FileOperations{
   223  				Create: func(name string, content []byte) error {
   224  					klog.Background().Info("creating CDI file", "node", nodename, "filename", name, "content", string(content))
   225  					return d.createFile(&pod, name, content)
   226  				},
   227  				Remove: func(name string) error {
   228  					klog.Background().Info("deleting CDI file", "node", nodename, "filename", name)
   229  					return d.removeFile(&pod, name)
   230  				},
   231  			},
   232  			kubeletplugin.GRPCVerbosity(0),
   233  			kubeletplugin.GRPCInterceptor(func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
   234  				return d.interceptor(nodename, ctx, req, info, handler)
   235  			}),
   236  			kubeletplugin.PluginListener(listen(ctx, d.f, pod.Name, "plugin", 9001)),
   237  			kubeletplugin.RegistrarListener(listen(ctx, d.f, pod.Name, "registrar", 9000)),
   238  			kubeletplugin.KubeletPluginSocketPath(draAddr),
   239  			kubeletplugin.NodeV1alpha2(d.NodeV1alpha2),
   240  			kubeletplugin.NodeV1alpha3(d.NodeV1alpha3),
   241  		)
   242  		framework.ExpectNoError(err, "start kubelet plugin for node %s", pod.Spec.NodeName)
   243  		d.cleanup = append(d.cleanup, func() {
   244  			// Depends on cancel being called first.
   245  			plugin.Stop()
   246  		})
   247  		d.Nodes[nodename] = plugin
   248  	}
   249  
   250  	// Wait for registration.
   251  	ginkgo.By("wait for plugin registration")
   252  	gomega.Eventually(func() map[string][]app.GRPCCall {
   253  		notRegistered := make(map[string][]app.GRPCCall)
   254  		for nodename, plugin := range d.Nodes {
   255  			calls := plugin.GetGRPCCalls()
   256  			if contains, err := app.BeRegistered.Match(calls); err != nil || !contains {
   257  				notRegistered[nodename] = calls
   258  			}
   259  		}
   260  		return notRegistered
   261  	}).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "hosts where the plugin has not been registered yet")
   262  }
   263  
   264  func (d *Driver) createFile(pod *v1.Pod, name string, content []byte) error {
   265  	buffer := bytes.NewBuffer(content)
   266  	// Writing the content can be slow. Better create a temporary file and
   267  	// move it to the final destination once it is complete.
   268  	tmpName := name + ".tmp"
   269  	if err := d.podIO(pod).CreateFile(tmpName, buffer); err != nil {
   270  		_ = d.podIO(pod).RemoveAll(tmpName)
   271  		return err
   272  	}
   273  	return d.podIO(pod).Rename(tmpName, name)
   274  }
   275  
   276  func (d *Driver) removeFile(pod *v1.Pod, name string) error {
   277  	return d.podIO(pod).RemoveAll(name)
   278  }
   279  
   280  func (d *Driver) podIO(pod *v1.Pod) proxy.PodDirIO {
   281  	logger := klog.Background()
   282  	return proxy.PodDirIO{
   283  		F:             d.f,
   284  		Namespace:     pod.Namespace,
   285  		PodName:       pod.Name,
   286  		ContainerName: "plugin",
   287  		Logger:        &logger,
   288  	}
   289  }
   290  
   291  func listen(ctx context.Context, f *framework.Framework, podName, containerName string, port int) net.Listener {
   292  	addr := proxy.Addr{
   293  		Namespace:     f.Namespace.Name,
   294  		PodName:       podName,
   295  		ContainerName: containerName,
   296  		Port:          port,
   297  	}
   298  	listener, err := proxy.Listen(ctx, f.ClientSet, f.ClientConfig(), addr)
   299  	framework.ExpectNoError(err, "listen for connections from %+v", addr)
   300  	return listener
   301  }
   302  
   303  func (d *Driver) TearDown() {
   304  	for _, c := range d.cleanup {
   305  		c()
   306  	}
   307  	d.cleanup = nil
   308  	d.wg.Wait()
   309  }
   310  
   311  func (d *Driver) interceptor(nodename string, ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
   312  	d.mutex.Lock()
   313  	defer d.mutex.Unlock()
   314  
   315  	m := MethodInstance{nodename, info.FullMethod}
   316  	d.callCounts[m]++
   317  	if d.fail[m] {
   318  		return nil, errors.New("injected error")
   319  	}
   320  
   321  	return handler(ctx, req)
   322  }
   323  
   324  func (d *Driver) Fail(m MethodInstance, injectError bool) {
   325  	d.mutex.Lock()
   326  	defer d.mutex.Unlock()
   327  
   328  	d.fail[m] = injectError
   329  }
   330  
   331  func (d *Driver) CallCount(m MethodInstance) int64 {
   332  	d.mutex.Lock()
   333  	defer d.mutex.Unlock()
   334  
   335  	return d.callCounts[m]
   336  }
   337  
   338  func (d *Driver) Nodenames() (nodenames []string) {
   339  	for nodename := range d.Nodes {
   340  		nodenames = append(nodenames, nodename)
   341  	}
   342  	sort.Strings(nodenames)
   343  	return
   344  }