k8s.io/kubernetes@v1.29.3/test/e2e/dra/test-driver/app/kubeletplugin.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package app 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "os" 24 "path/filepath" 25 "sync" 26 27 "google.golang.org/grpc" 28 29 "k8s.io/dynamic-resource-allocation/kubeletplugin" 30 "k8s.io/klog/v2" 31 drapbv1alpha2 "k8s.io/kubelet/pkg/apis/dra/v1alpha2" 32 drapbv1alpha3 "k8s.io/kubelet/pkg/apis/dra/v1alpha3" 33 ) 34 35 type ExamplePlugin struct { 36 logger klog.Logger 37 d kubeletplugin.DRAPlugin 38 fileOps FileOperations 39 40 cdiDir string 41 driverName string 42 nodeName string 43 44 mutex sync.Mutex 45 prepared map[ClaimID]bool 46 gRPCCalls []GRPCCall 47 48 block bool 49 } 50 51 type GRPCCall struct { 52 // FullMethod is the fully qualified, e.g. /package.service/method. 53 FullMethod string 54 55 // Request contains the parameters of the call. 56 Request interface{} 57 58 // Response contains the reply of the plugin. It is nil for calls that are in progress. 59 Response interface{} 60 61 // Err contains the error return value of the plugin. It is nil for calls that are in progress or succeeded. 62 Err error 63 } 64 65 // ClaimID contains both claim name and UID to simplify debugging. The 66 // namespace is not included because it is random in E2E tests and the UID is 67 // sufficient to make the ClaimID unique. 68 type ClaimID struct { 69 Name string 70 UID string 71 } 72 73 var _ drapbv1alpha2.NodeServer = &ExamplePlugin{} 74 75 // getJSONFilePath returns the absolute path where CDI file is/should be. 76 func (ex *ExamplePlugin) getJSONFilePath(claimUID string) string { 77 return filepath.Join(ex.cdiDir, fmt.Sprintf("%s-%s.json", ex.driverName, claimUID)) 78 } 79 80 // FileOperations defines optional callbacks for handling CDI files. 81 type FileOperations struct { 82 // Create must overwrite the file. 83 Create func(name string, content []byte) error 84 85 // Remove must remove the file. It must not return an error when the 86 // file does not exist. 87 Remove func(name string) error 88 } 89 90 // StartPlugin sets up the servers that are necessary for a DRA kubelet plugin. 91 func StartPlugin(logger klog.Logger, cdiDir, driverName string, nodeName string, fileOps FileOperations, opts ...kubeletplugin.Option) (*ExamplePlugin, error) { 92 if fileOps.Create == nil { 93 fileOps.Create = func(name string, content []byte) error { 94 return os.WriteFile(name, content, os.FileMode(0644)) 95 } 96 } 97 if fileOps.Remove == nil { 98 fileOps.Remove = func(name string) error { 99 if err := os.Remove(name); err != nil && !os.IsNotExist(err) { 100 return err 101 } 102 return nil 103 } 104 } 105 ex := &ExamplePlugin{ 106 logger: logger, 107 fileOps: fileOps, 108 cdiDir: cdiDir, 109 driverName: driverName, 110 nodeName: nodeName, 111 prepared: make(map[ClaimID]bool), 112 } 113 114 opts = append(opts, 115 kubeletplugin.Logger(logger), 116 kubeletplugin.DriverName(driverName), 117 kubeletplugin.GRPCInterceptor(ex.recordGRPCCall), 118 ) 119 d, err := kubeletplugin.Start(ex, opts...) 120 if err != nil { 121 return nil, fmt.Errorf("start kubelet plugin: %w", err) 122 } 123 ex.d = d 124 125 return ex, nil 126 } 127 128 // stop ensures that all servers are stopped and resources freed. 129 func (ex *ExamplePlugin) Stop() { 130 ex.d.Stop() 131 } 132 133 func (ex *ExamplePlugin) IsRegistered() bool { 134 status := ex.d.RegistrationStatus() 135 if status == nil { 136 return false 137 } 138 return status.PluginRegistered 139 } 140 141 // Block sets a flag to block Node[Un]PrepareResources 142 // to emulate time consuming or stuck calls 143 func (ex *ExamplePlugin) Block() { 144 ex.block = true 145 } 146 147 // NodePrepareResource ensures that the CDI file for the claim exists. It uses 148 // a deterministic name to simplify NodeUnprepareResource (no need to remember 149 // or discover the name) and idempotency (when called again, the file simply 150 // gets written again). 151 func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1alpha2.NodePrepareResourceRequest) (*drapbv1alpha2.NodePrepareResourceResponse, error) { 152 logger := klog.FromContext(ctx) 153 154 // Block to emulate plugin stuckness or slowness. 155 // By default the call will not be blocked as ex.block = false. 156 if ex.block { 157 <-ctx.Done() 158 return nil, ctx.Err() 159 } 160 161 // Determine environment variables. 162 var p parameters 163 if err := json.Unmarshal([]byte(req.ResourceHandle), &p); err != nil { 164 return nil, fmt.Errorf("unmarshal resource handle: %w", err) 165 } 166 167 // Sanity check scheduling. 168 if p.NodeName != "" && ex.nodeName != "" && p.NodeName != ex.nodeName { 169 return nil, fmt.Errorf("claim was allocated for %q, cannot be prepared on %q", p.NodeName, ex.nodeName) 170 } 171 172 // CDI wants env variables as set of strings. 173 envs := []string{} 174 for key, val := range p.EnvVars { 175 envs = append(envs, key+"="+val) 176 } 177 178 deviceName := "claim-" + req.ClaimUid 179 vendor := ex.driverName 180 class := "test" 181 spec := &spec{ 182 Version: "0.3.0", // This has to be a version accepted by the runtimes. 183 Kind: vendor + "/" + class, 184 // At least one device is required and its entry must have more 185 // than just the name. 186 Devices: []device{ 187 { 188 Name: deviceName, 189 ContainerEdits: containerEdits{ 190 Env: envs, 191 }, 192 }, 193 }, 194 } 195 filePath := ex.getJSONFilePath(req.ClaimUid) 196 buffer, err := json.Marshal(spec) 197 if err != nil { 198 return nil, fmt.Errorf("marshal spec: %w", err) 199 } 200 if err := ex.fileOps.Create(filePath, buffer); err != nil { 201 return nil, fmt.Errorf("failed to write CDI file %v", err) 202 } 203 204 dev := vendor + "/" + class + "=" + deviceName 205 resp := &drapbv1alpha2.NodePrepareResourceResponse{CdiDevices: []string{dev}} 206 207 ex.mutex.Lock() 208 defer ex.mutex.Unlock() 209 ex.prepared[ClaimID{Name: req.ClaimName, UID: req.ClaimUid}] = true 210 211 logger.V(3).Info("CDI file created", "path", filePath, "device", dev) 212 return resp, nil 213 } 214 215 func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapbv1alpha3.NodePrepareResourcesRequest) (*drapbv1alpha3.NodePrepareResourcesResponse, error) { 216 resp := &drapbv1alpha3.NodePrepareResourcesResponse{ 217 Claims: make(map[string]*drapbv1alpha3.NodePrepareResourceResponse), 218 } 219 for _, claimReq := range req.Claims { 220 claimResp, err := ex.NodePrepareResource(ctx, &drapbv1alpha2.NodePrepareResourceRequest{ 221 Namespace: claimReq.Namespace, 222 ClaimName: claimReq.Name, 223 ClaimUid: claimReq.Uid, 224 ResourceHandle: claimReq.ResourceHandle, 225 }) 226 if err != nil { 227 resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{ 228 Error: err.Error(), 229 } 230 } else { 231 resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{ 232 CDIDevices: claimResp.CdiDevices, 233 } 234 } 235 } 236 return resp, nil 237 } 238 239 // NodeUnprepareResource removes the CDI file created by 240 // NodePrepareResource. It's idempotent, therefore it is not an error when that 241 // file is already gone. 242 func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1alpha2.NodeUnprepareResourceRequest) (*drapbv1alpha2.NodeUnprepareResourceResponse, error) { 243 logger := klog.FromContext(ctx) 244 245 // Block to emulate plugin stuckness or slowness. 246 // By default the call will not be blocked as ex.block = false. 247 if ex.block { 248 <-ctx.Done() 249 return nil, ctx.Err() 250 } 251 252 filePath := ex.getJSONFilePath(req.ClaimUid) 253 if err := ex.fileOps.Remove(filePath); err != nil { 254 return nil, fmt.Errorf("error removing CDI file: %w", err) 255 } 256 logger.V(3).Info("CDI file removed", "path", filePath) 257 258 ex.mutex.Lock() 259 defer ex.mutex.Unlock() 260 delete(ex.prepared, ClaimID{Name: req.ClaimName, UID: req.ClaimUid}) 261 262 return &drapbv1alpha2.NodeUnprepareResourceResponse{}, nil 263 } 264 265 func (ex *ExamplePlugin) NodeUnprepareResources(ctx context.Context, req *drapbv1alpha3.NodeUnprepareResourcesRequest) (*drapbv1alpha3.NodeUnprepareResourcesResponse, error) { 266 resp := &drapbv1alpha3.NodeUnprepareResourcesResponse{ 267 Claims: make(map[string]*drapbv1alpha3.NodeUnprepareResourceResponse), 268 } 269 for _, claimReq := range req.Claims { 270 _, err := ex.NodeUnprepareResource(ctx, &drapbv1alpha2.NodeUnprepareResourceRequest{ 271 Namespace: claimReq.Namespace, 272 ClaimName: claimReq.Name, 273 ClaimUid: claimReq.Uid, 274 ResourceHandle: claimReq.ResourceHandle, 275 }) 276 if err != nil { 277 resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{ 278 Error: err.Error(), 279 } 280 } else { 281 resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{} 282 } 283 } 284 return resp, nil 285 } 286 287 func (ex *ExamplePlugin) GetPreparedResources() []ClaimID { 288 ex.mutex.Lock() 289 defer ex.mutex.Unlock() 290 var prepared []ClaimID 291 for claimID := range ex.prepared { 292 prepared = append(prepared, claimID) 293 } 294 return prepared 295 } 296 297 func (ex *ExamplePlugin) recordGRPCCall(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) { 298 call := GRPCCall{ 299 FullMethod: info.FullMethod, 300 Request: req, 301 } 302 ex.mutex.Lock() 303 ex.gRPCCalls = append(ex.gRPCCalls, call) 304 index := len(ex.gRPCCalls) - 1 305 ex.mutex.Unlock() 306 307 // We don't hold the mutex here to allow concurrent calls. 308 call.Response, call.Err = handler(ctx, req) 309 310 ex.mutex.Lock() 311 ex.gRPCCalls[index] = call 312 ex.mutex.Unlock() 313 314 return call.Response, call.Err 315 } 316 317 func (ex *ExamplePlugin) GetGRPCCalls() []GRPCCall { 318 ex.mutex.Lock() 319 defer ex.mutex.Unlock() 320 321 // We must return a new slice, otherwise adding new calls would become 322 // visible to the caller. We also need to copy the entries because 323 // they get mutated by recordGRPCCall. 324 calls := make([]GRPCCall, 0, len(ex.gRPCCalls)) 325 calls = append(calls, ex.gRPCCalls...) 326 return calls 327 }