k8s.io/kubernetes@v1.29.3/test/e2e/dra/test-driver/app/kubeletplugin.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package app
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"sync"
    26  
    27  	"google.golang.org/grpc"
    28  
    29  	"k8s.io/dynamic-resource-allocation/kubeletplugin"
    30  	"k8s.io/klog/v2"
    31  	drapbv1alpha2 "k8s.io/kubelet/pkg/apis/dra/v1alpha2"
    32  	drapbv1alpha3 "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
    33  )
    34  
    35  type ExamplePlugin struct {
    36  	logger  klog.Logger
    37  	d       kubeletplugin.DRAPlugin
    38  	fileOps FileOperations
    39  
    40  	cdiDir     string
    41  	driverName string
    42  	nodeName   string
    43  
    44  	mutex     sync.Mutex
    45  	prepared  map[ClaimID]bool
    46  	gRPCCalls []GRPCCall
    47  
    48  	block bool
    49  }
    50  
    51  type GRPCCall struct {
    52  	// FullMethod is the fully qualified, e.g. /package.service/method.
    53  	FullMethod string
    54  
    55  	// Request contains the parameters of the call.
    56  	Request interface{}
    57  
    58  	// Response contains the reply of the plugin. It is nil for calls that are in progress.
    59  	Response interface{}
    60  
    61  	// Err contains the error return value of the plugin. It is nil for calls that are in progress or succeeded.
    62  	Err error
    63  }
    64  
    65  // ClaimID contains both claim name and UID to simplify debugging. The
    66  // namespace is not included because it is random in E2E tests and the UID is
    67  // sufficient to make the ClaimID unique.
    68  type ClaimID struct {
    69  	Name string
    70  	UID  string
    71  }
    72  
    73  var _ drapbv1alpha2.NodeServer = &ExamplePlugin{}
    74  
    75  // getJSONFilePath returns the absolute path where CDI file is/should be.
    76  func (ex *ExamplePlugin) getJSONFilePath(claimUID string) string {
    77  	return filepath.Join(ex.cdiDir, fmt.Sprintf("%s-%s.json", ex.driverName, claimUID))
    78  }
    79  
    80  // FileOperations defines optional callbacks for handling CDI files.
    81  type FileOperations struct {
    82  	// Create must overwrite the file.
    83  	Create func(name string, content []byte) error
    84  
    85  	// Remove must remove the file. It must not return an error when the
    86  	// file does not exist.
    87  	Remove func(name string) error
    88  }
    89  
    90  // StartPlugin sets up the servers that are necessary for a DRA kubelet plugin.
    91  func StartPlugin(logger klog.Logger, cdiDir, driverName string, nodeName string, fileOps FileOperations, opts ...kubeletplugin.Option) (*ExamplePlugin, error) {
    92  	if fileOps.Create == nil {
    93  		fileOps.Create = func(name string, content []byte) error {
    94  			return os.WriteFile(name, content, os.FileMode(0644))
    95  		}
    96  	}
    97  	if fileOps.Remove == nil {
    98  		fileOps.Remove = func(name string) error {
    99  			if err := os.Remove(name); err != nil && !os.IsNotExist(err) {
   100  				return err
   101  			}
   102  			return nil
   103  		}
   104  	}
   105  	ex := &ExamplePlugin{
   106  		logger:     logger,
   107  		fileOps:    fileOps,
   108  		cdiDir:     cdiDir,
   109  		driverName: driverName,
   110  		nodeName:   nodeName,
   111  		prepared:   make(map[ClaimID]bool),
   112  	}
   113  
   114  	opts = append(opts,
   115  		kubeletplugin.Logger(logger),
   116  		kubeletplugin.DriverName(driverName),
   117  		kubeletplugin.GRPCInterceptor(ex.recordGRPCCall),
   118  	)
   119  	d, err := kubeletplugin.Start(ex, opts...)
   120  	if err != nil {
   121  		return nil, fmt.Errorf("start kubelet plugin: %w", err)
   122  	}
   123  	ex.d = d
   124  
   125  	return ex, nil
   126  }
   127  
   128  // stop ensures that all servers are stopped and resources freed.
   129  func (ex *ExamplePlugin) Stop() {
   130  	ex.d.Stop()
   131  }
   132  
   133  func (ex *ExamplePlugin) IsRegistered() bool {
   134  	status := ex.d.RegistrationStatus()
   135  	if status == nil {
   136  		return false
   137  	}
   138  	return status.PluginRegistered
   139  }
   140  
   141  // Block sets a flag to block Node[Un]PrepareResources
   142  // to emulate time consuming or stuck calls
   143  func (ex *ExamplePlugin) Block() {
   144  	ex.block = true
   145  }
   146  
   147  // NodePrepareResource ensures that the CDI file for the claim exists. It uses
   148  // a deterministic name to simplify NodeUnprepareResource (no need to remember
   149  // or discover the name) and idempotency (when called again, the file simply
   150  // gets written again).
   151  func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1alpha2.NodePrepareResourceRequest) (*drapbv1alpha2.NodePrepareResourceResponse, error) {
   152  	logger := klog.FromContext(ctx)
   153  
   154  	// Block to emulate plugin stuckness or slowness.
   155  	// By default the call will not be blocked as ex.block = false.
   156  	if ex.block {
   157  		<-ctx.Done()
   158  		return nil, ctx.Err()
   159  	}
   160  
   161  	// Determine environment variables.
   162  	var p parameters
   163  	if err := json.Unmarshal([]byte(req.ResourceHandle), &p); err != nil {
   164  		return nil, fmt.Errorf("unmarshal resource handle: %w", err)
   165  	}
   166  
   167  	// Sanity check scheduling.
   168  	if p.NodeName != "" && ex.nodeName != "" && p.NodeName != ex.nodeName {
   169  		return nil, fmt.Errorf("claim was allocated for %q, cannot be prepared on %q", p.NodeName, ex.nodeName)
   170  	}
   171  
   172  	// CDI wants env variables as set of strings.
   173  	envs := []string{}
   174  	for key, val := range p.EnvVars {
   175  		envs = append(envs, key+"="+val)
   176  	}
   177  
   178  	deviceName := "claim-" + req.ClaimUid
   179  	vendor := ex.driverName
   180  	class := "test"
   181  	spec := &spec{
   182  		Version: "0.3.0", // This has to be a version accepted by the runtimes.
   183  		Kind:    vendor + "/" + class,
   184  		// At least one device is required and its entry must have more
   185  		// than just the name.
   186  		Devices: []device{
   187  			{
   188  				Name: deviceName,
   189  				ContainerEdits: containerEdits{
   190  					Env: envs,
   191  				},
   192  			},
   193  		},
   194  	}
   195  	filePath := ex.getJSONFilePath(req.ClaimUid)
   196  	buffer, err := json.Marshal(spec)
   197  	if err != nil {
   198  		return nil, fmt.Errorf("marshal spec: %w", err)
   199  	}
   200  	if err := ex.fileOps.Create(filePath, buffer); err != nil {
   201  		return nil, fmt.Errorf("failed to write CDI file %v", err)
   202  	}
   203  
   204  	dev := vendor + "/" + class + "=" + deviceName
   205  	resp := &drapbv1alpha2.NodePrepareResourceResponse{CdiDevices: []string{dev}}
   206  
   207  	ex.mutex.Lock()
   208  	defer ex.mutex.Unlock()
   209  	ex.prepared[ClaimID{Name: req.ClaimName, UID: req.ClaimUid}] = true
   210  
   211  	logger.V(3).Info("CDI file created", "path", filePath, "device", dev)
   212  	return resp, nil
   213  }
   214  
   215  func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapbv1alpha3.NodePrepareResourcesRequest) (*drapbv1alpha3.NodePrepareResourcesResponse, error) {
   216  	resp := &drapbv1alpha3.NodePrepareResourcesResponse{
   217  		Claims: make(map[string]*drapbv1alpha3.NodePrepareResourceResponse),
   218  	}
   219  	for _, claimReq := range req.Claims {
   220  		claimResp, err := ex.NodePrepareResource(ctx, &drapbv1alpha2.NodePrepareResourceRequest{
   221  			Namespace:      claimReq.Namespace,
   222  			ClaimName:      claimReq.Name,
   223  			ClaimUid:       claimReq.Uid,
   224  			ResourceHandle: claimReq.ResourceHandle,
   225  		})
   226  		if err != nil {
   227  			resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{
   228  				Error: err.Error(),
   229  			}
   230  		} else {
   231  			resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{
   232  				CDIDevices: claimResp.CdiDevices,
   233  			}
   234  		}
   235  	}
   236  	return resp, nil
   237  }
   238  
   239  // NodeUnprepareResource removes the CDI file created by
   240  // NodePrepareResource. It's idempotent, therefore it is not an error when that
   241  // file is already gone.
   242  func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1alpha2.NodeUnprepareResourceRequest) (*drapbv1alpha2.NodeUnprepareResourceResponse, error) {
   243  	logger := klog.FromContext(ctx)
   244  
   245  	// Block to emulate plugin stuckness or slowness.
   246  	// By default the call will not be blocked as ex.block = false.
   247  	if ex.block {
   248  		<-ctx.Done()
   249  		return nil, ctx.Err()
   250  	}
   251  
   252  	filePath := ex.getJSONFilePath(req.ClaimUid)
   253  	if err := ex.fileOps.Remove(filePath); err != nil {
   254  		return nil, fmt.Errorf("error removing CDI file: %w", err)
   255  	}
   256  	logger.V(3).Info("CDI file removed", "path", filePath)
   257  
   258  	ex.mutex.Lock()
   259  	defer ex.mutex.Unlock()
   260  	delete(ex.prepared, ClaimID{Name: req.ClaimName, UID: req.ClaimUid})
   261  
   262  	return &drapbv1alpha2.NodeUnprepareResourceResponse{}, nil
   263  }
   264  
   265  func (ex *ExamplePlugin) NodeUnprepareResources(ctx context.Context, req *drapbv1alpha3.NodeUnprepareResourcesRequest) (*drapbv1alpha3.NodeUnprepareResourcesResponse, error) {
   266  	resp := &drapbv1alpha3.NodeUnprepareResourcesResponse{
   267  		Claims: make(map[string]*drapbv1alpha3.NodeUnprepareResourceResponse),
   268  	}
   269  	for _, claimReq := range req.Claims {
   270  		_, err := ex.NodeUnprepareResource(ctx, &drapbv1alpha2.NodeUnprepareResourceRequest{
   271  			Namespace:      claimReq.Namespace,
   272  			ClaimName:      claimReq.Name,
   273  			ClaimUid:       claimReq.Uid,
   274  			ResourceHandle: claimReq.ResourceHandle,
   275  		})
   276  		if err != nil {
   277  			resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{
   278  				Error: err.Error(),
   279  			}
   280  		} else {
   281  			resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{}
   282  		}
   283  	}
   284  	return resp, nil
   285  }
   286  
   287  func (ex *ExamplePlugin) GetPreparedResources() []ClaimID {
   288  	ex.mutex.Lock()
   289  	defer ex.mutex.Unlock()
   290  	var prepared []ClaimID
   291  	for claimID := range ex.prepared {
   292  		prepared = append(prepared, claimID)
   293  	}
   294  	return prepared
   295  }
   296  
   297  func (ex *ExamplePlugin) recordGRPCCall(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
   298  	call := GRPCCall{
   299  		FullMethod: info.FullMethod,
   300  		Request:    req,
   301  	}
   302  	ex.mutex.Lock()
   303  	ex.gRPCCalls = append(ex.gRPCCalls, call)
   304  	index := len(ex.gRPCCalls) - 1
   305  	ex.mutex.Unlock()
   306  
   307  	// We don't hold the mutex here to allow concurrent calls.
   308  	call.Response, call.Err = handler(ctx, req)
   309  
   310  	ex.mutex.Lock()
   311  	ex.gRPCCalls[index] = call
   312  	ex.mutex.Unlock()
   313  
   314  	return call.Response, call.Err
   315  }
   316  
   317  func (ex *ExamplePlugin) GetGRPCCalls() []GRPCCall {
   318  	ex.mutex.Lock()
   319  	defer ex.mutex.Unlock()
   320  
   321  	// We must return a new slice, otherwise adding new calls would become
   322  	// visible to the caller. We also need to copy the entries because
   323  	// they get mutated by recordGRPCCall.
   324  	calls := make([]GRPCCall, 0, len(ex.gRPCCalls))
   325  	calls = append(calls, ex.gRPCCalls...)
   326  	return calls
   327  }