k8s.io/kubernetes@v1.29.3/pkg/kubelet/cm/devicemanager/manager.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package devicemanager 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "path/filepath" 24 "runtime" 25 "sort" 26 "sync" 27 "time" 28 29 cadvisorapi "github.com/google/cadvisor/info/v1" 30 "k8s.io/klog/v2" 31 32 v1 "k8s.io/api/core/v1" 33 "k8s.io/apimachinery/pkg/api/resource" 34 errorsutil "k8s.io/apimachinery/pkg/util/errors" 35 "k8s.io/apimachinery/pkg/util/sets" 36 pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" 37 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" 38 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" 39 "k8s.io/kubernetes/pkg/kubelet/cm/containermap" 40 "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint" 41 plugin "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1" 42 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" 43 "k8s.io/kubernetes/pkg/kubelet/config" 44 "k8s.io/kubernetes/pkg/kubelet/lifecycle" 45 "k8s.io/kubernetes/pkg/kubelet/metrics" 46 "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" 47 "k8s.io/kubernetes/pkg/kubelet/types" 48 schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" 49 ) 50 51 const nodeWithoutTopology = -1 52 53 // ActivePodsFunc is a function that returns a list of pods to reconcile. 54 type ActivePodsFunc func() []*v1.Pod 55 56 // ManagerImpl is the structure in charge of managing Device Plugins. 57 type ManagerImpl struct { 58 checkpointdir string 59 60 endpoints map[string]endpointInfo // Key is ResourceName 61 mutex sync.Mutex 62 63 server plugin.Server 64 65 // activePods is a method for listing active pods on the node 66 // so the amount of pluginResources requested by existing pods 67 // could be counted when updating allocated devices 68 activePods ActivePodsFunc 69 70 // sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness. 71 // We use it to determine when we can purge inactive pods from checkpointed state. 72 sourcesReady config.SourcesReady 73 74 // allDevices holds all the devices currently registered to the device manager 75 allDevices ResourceDeviceInstances 76 77 // healthyDevices contains all of the registered healthy resourceNames and their exported device IDs. 78 healthyDevices map[string]sets.Set[string] 79 80 // unhealthyDevices contains all of the unhealthy devices and their exported device IDs. 81 unhealthyDevices map[string]sets.Set[string] 82 83 // allocatedDevices contains allocated deviceIds, keyed by resourceName. 84 allocatedDevices map[string]sets.Set[string] 85 86 // podDevices contains pod to allocated device mapping. 87 podDevices *podDevices 88 checkpointManager checkpointmanager.CheckpointManager 89 90 // List of NUMA Nodes available on the underlying machine 91 numaNodes []int 92 93 // Store of Topology Affinties that the Device Manager can query. 94 topologyAffinityStore topologymanager.Store 95 96 // devicesToReuse contains devices that can be reused as they have been allocated to 97 // init containers. 98 devicesToReuse PodReusableDevices 99 100 // pendingAdmissionPod contain the pod during the admission phase 101 pendingAdmissionPod *v1.Pod 102 103 // containerMap provides a mapping from (pod, container) -> containerID 104 // for all containers in a pod. Used to detect pods running across a restart 105 containerMap containermap.ContainerMap 106 107 // containerRunningSet identifies which container among those present in `containerMap` 108 // was reported running by the container runtime when `containerMap` was computed. 109 // Used to detect pods running across a restart 110 containerRunningSet sets.Set[string] 111 } 112 113 type endpointInfo struct { 114 e endpoint 115 opts *pluginapi.DevicePluginOptions 116 } 117 118 type sourcesReadyStub struct{} 119 120 // PodReusableDevices is a map by pod name of devices to reuse. 121 type PodReusableDevices map[string]map[string]sets.Set[string] 122 123 func (s *sourcesReadyStub) AddSource(source string) {} 124 func (s *sourcesReadyStub) AllReady() bool { return true } 125 126 // NewManagerImpl creates a new manager. 127 func NewManagerImpl(topology []cadvisorapi.Node, topologyAffinityStore topologymanager.Store) (*ManagerImpl, error) { 128 socketPath := pluginapi.KubeletSocket 129 if runtime.GOOS == "windows" { 130 socketPath = os.Getenv("SYSTEMDRIVE") + pluginapi.KubeletSocketWindows 131 } 132 return newManagerImpl(socketPath, topology, topologyAffinityStore) 133 } 134 135 func newManagerImpl(socketPath string, topology []cadvisorapi.Node, topologyAffinityStore topologymanager.Store) (*ManagerImpl, error) { 136 klog.V(2).InfoS("Creating Device Plugin manager", "path", socketPath) 137 138 var numaNodes []int 139 for _, node := range topology { 140 numaNodes = append(numaNodes, node.Id) 141 } 142 143 manager := &ManagerImpl{ 144 endpoints: make(map[string]endpointInfo), 145 146 allDevices: NewResourceDeviceInstances(), 147 healthyDevices: make(map[string]sets.Set[string]), 148 unhealthyDevices: make(map[string]sets.Set[string]), 149 allocatedDevices: make(map[string]sets.Set[string]), 150 podDevices: newPodDevices(), 151 numaNodes: numaNodes, 152 topologyAffinityStore: topologyAffinityStore, 153 devicesToReuse: make(PodReusableDevices), 154 } 155 156 server, err := plugin.NewServer(socketPath, manager, manager) 157 if err != nil { 158 return nil, fmt.Errorf("failed to create plugin server: %v", err) 159 } 160 161 manager.server = server 162 manager.checkpointdir, _ = filepath.Split(server.SocketPath()) 163 164 // The following structures are populated with real implementations in manager.Start() 165 // Before that, initializes them to perform no-op operations. 166 manager.activePods = func() []*v1.Pod { return []*v1.Pod{} } 167 manager.sourcesReady = &sourcesReadyStub{} 168 checkpointManager, err := checkpointmanager.NewCheckpointManager(manager.checkpointdir) 169 if err != nil { 170 return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err) 171 } 172 manager.checkpointManager = checkpointManager 173 174 return manager, nil 175 } 176 177 // CleanupPluginDirectory is to remove all existing unix sockets 178 // from /var/lib/kubelet/device-plugins on Device Plugin Manager start 179 func (m *ManagerImpl) CleanupPluginDirectory(dir string) error { 180 d, err := os.Open(dir) 181 if err != nil { 182 return err 183 } 184 defer d.Close() 185 names, err := d.Readdirnames(-1) 186 if err != nil { 187 return err 188 } 189 var errs []error 190 for _, name := range names { 191 filePath := filepath.Join(dir, name) 192 if filePath == m.checkpointFile() { 193 continue 194 } 195 // TODO: Until the bug - https://github.com/golang/go/issues/33357 is fixed, os.stat wouldn't return the 196 // right mode(socket) on windows. Hence deleting the file, without checking whether 197 // its a socket, on windows. 198 stat, err := os.Lstat(filePath) 199 if err != nil { 200 klog.ErrorS(err, "Failed to stat file", "path", filePath) 201 continue 202 } 203 if stat.IsDir() { 204 continue 205 } 206 err = os.RemoveAll(filePath) 207 if err != nil { 208 errs = append(errs, err) 209 klog.ErrorS(err, "Failed to remove file", "path", filePath) 210 continue 211 } 212 } 213 return errorsutil.NewAggregate(errs) 214 } 215 216 // PluginConnected is to connect a plugin to a new endpoint. 217 // This is done as part of device plugin registration. 218 func (m *ManagerImpl) PluginConnected(resourceName string, p plugin.DevicePlugin) error { 219 options, err := p.API().GetDevicePluginOptions(context.Background(), &pluginapi.Empty{}) 220 if err != nil { 221 return fmt.Errorf("failed to get device plugin options: %v", err) 222 } 223 224 e := newEndpointImpl(p) 225 226 m.mutex.Lock() 227 defer m.mutex.Unlock() 228 m.endpoints[resourceName] = endpointInfo{e, options} 229 230 klog.V(2).InfoS("Device plugin connected", "resourceName", resourceName) 231 return nil 232 } 233 234 // PluginDisconnected is to disconnect a plugin from an endpoint. 235 // This is done as part of device plugin deregistration. 236 func (m *ManagerImpl) PluginDisconnected(resourceName string) { 237 m.mutex.Lock() 238 defer m.mutex.Unlock() 239 240 if ep, exists := m.endpoints[resourceName]; exists { 241 m.markResourceUnhealthy(resourceName) 242 klog.V(2).InfoS("Endpoint became unhealthy", "resourceName", resourceName, "endpoint", ep) 243 244 ep.e.setStopTime(time.Now()) 245 } 246 } 247 248 // PluginListAndWatchReceiver receives ListAndWatchResponse from a device plugin 249 // and ensures that an upto date state (e.g. number of devices and device health) 250 // is captured. Also, registered device and device to container allocation 251 // information is checkpointed to the disk. 252 func (m *ManagerImpl) PluginListAndWatchReceiver(resourceName string, resp *pluginapi.ListAndWatchResponse) { 253 var devices []pluginapi.Device 254 for _, d := range resp.Devices { 255 devices = append(devices, *d) 256 } 257 m.genericDeviceUpdateCallback(resourceName, devices) 258 } 259 260 func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices []pluginapi.Device) { 261 healthyCount := 0 262 m.mutex.Lock() 263 m.healthyDevices[resourceName] = sets.New[string]() 264 m.unhealthyDevices[resourceName] = sets.New[string]() 265 m.allDevices[resourceName] = make(map[string]pluginapi.Device) 266 for _, dev := range devices { 267 m.allDevices[resourceName][dev.ID] = dev 268 if dev.Health == pluginapi.Healthy { 269 m.healthyDevices[resourceName].Insert(dev.ID) 270 healthyCount++ 271 } else { 272 m.unhealthyDevices[resourceName].Insert(dev.ID) 273 } 274 } 275 m.mutex.Unlock() 276 if err := m.writeCheckpoint(); err != nil { 277 klog.ErrorS(err, "Writing checkpoint encountered") 278 } 279 klog.V(2).InfoS("Processed device updates for resource", "resourceName", resourceName, "totalCount", len(devices), "healthyCount", healthyCount) 280 } 281 282 // GetWatcherHandler returns the plugin handler 283 func (m *ManagerImpl) GetWatcherHandler() cache.PluginHandler { 284 return m.server 285 } 286 287 // checkpointFile returns device plugin checkpoint file path. 288 func (m *ManagerImpl) checkpointFile() string { 289 return filepath.Join(m.checkpointdir, kubeletDeviceManagerCheckpoint) 290 } 291 292 // Start starts the Device Plugin Manager and start initialization of 293 // podDevices and allocatedDevices information from checkpointed state and 294 // starts device plugin registration service. 295 func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, initialContainers containermap.ContainerMap, initialContainerRunningSet sets.Set[string]) error { 296 klog.V(2).InfoS("Starting Device Plugin manager") 297 298 m.activePods = activePods 299 m.sourcesReady = sourcesReady 300 m.containerMap = initialContainers 301 m.containerRunningSet = initialContainerRunningSet 302 303 // Loads in allocatedDevices information from disk. 304 err := m.readCheckpoint() 305 if err != nil { 306 klog.InfoS("Continue after failing to read checkpoint file. Device allocation info may NOT be up-to-date", "err", err) 307 } 308 309 return m.server.Start() 310 } 311 312 // Stop is the function that can stop the plugin server. 313 // Can be called concurrently, more than once, and is safe to call 314 // without a prior Start. 315 func (m *ManagerImpl) Stop() error { 316 return m.server.Stop() 317 } 318 319 // Allocate is the call that you can use to allocate a set of devices 320 // from the registered device plugins. 321 func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error { 322 // The pod is during the admission phase. We need to save the pod to avoid it 323 // being cleaned before the admission ended 324 m.setPodPendingAdmission(pod) 325 326 if _, ok := m.devicesToReuse[string(pod.UID)]; !ok { 327 m.devicesToReuse[string(pod.UID)] = make(map[string]sets.Set[string]) 328 } 329 // If pod entries to m.devicesToReuse other than the current pod exist, delete them. 330 for podUID := range m.devicesToReuse { 331 if podUID != string(pod.UID) { 332 delete(m.devicesToReuse, podUID) 333 } 334 } 335 // Allocate resources for init containers first as we know the caller always loops 336 // through init containers before looping through app containers. Should the caller 337 // ever change those semantics, this logic will need to be amended. 338 for _, initContainer := range pod.Spec.InitContainers { 339 if container.Name == initContainer.Name { 340 if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil { 341 return err 342 } 343 if !types.IsRestartableInitContainer(&initContainer) { 344 m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)]) 345 } else { 346 // If the init container is restartable, we need to keep the 347 // devices allocated. In other words, we should remove them 348 // from the devicesToReuse. 349 m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)]) 350 } 351 return nil 352 } 353 } 354 if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil { 355 return err 356 } 357 m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)]) 358 return nil 359 } 360 361 // UpdatePluginResources updates node resources based on devices already allocated to pods. 362 func (m *ManagerImpl) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error { 363 pod := attrs.Pod 364 365 // quick return if no pluginResources requested 366 if !m.podDevices.hasPod(string(pod.UID)) { 367 return nil 368 } 369 370 m.sanitizeNodeAllocatable(node) 371 return nil 372 } 373 374 func (m *ManagerImpl) markResourceUnhealthy(resourceName string) { 375 klog.V(2).InfoS("Mark all resources Unhealthy for resource", "resourceName", resourceName) 376 healthyDevices := sets.New[string]() 377 if _, ok := m.healthyDevices[resourceName]; ok { 378 healthyDevices = m.healthyDevices[resourceName] 379 m.healthyDevices[resourceName] = sets.New[string]() 380 } 381 if _, ok := m.unhealthyDevices[resourceName]; !ok { 382 m.unhealthyDevices[resourceName] = sets.New[string]() 383 } 384 m.unhealthyDevices[resourceName] = m.unhealthyDevices[resourceName].Union(healthyDevices) 385 } 386 387 // GetCapacity is expected to be called when Kubelet updates its node status. 388 // The first returned variable contains the registered device plugin resource capacity. 389 // The second returned variable contains the registered device plugin resource allocatable. 390 // The third returned variable contains previously registered resources that are no longer active. 391 // Kubelet uses this information to update resource capacity/allocatable in its node status. 392 // After the call, device plugin can remove the inactive resources from its internal list as the 393 // change is already reflected in Kubelet node status. 394 // Note in the special case after Kubelet restarts, device plugin resource capacities can 395 // temporarily drop to zero till corresponding device plugins re-register. This is OK because 396 // cm.UpdatePluginResource() run during predicate Admit guarantees we adjust nodeinfo 397 // capacity for already allocated pods so that they can continue to run. However, new pods 398 // requiring device plugin resources will not be scheduled till device plugin re-registers. 399 func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string) { 400 needsUpdateCheckpoint := false 401 var capacity = v1.ResourceList{} 402 var allocatable = v1.ResourceList{} 403 deletedResources := sets.New[string]() 404 m.mutex.Lock() 405 for resourceName, devices := range m.healthyDevices { 406 eI, ok := m.endpoints[resourceName] 407 if (ok && eI.e.stopGracePeriodExpired()) || !ok { 408 // The resources contained in endpoints and (un)healthyDevices 409 // should always be consistent. Otherwise, we run with the risk 410 // of failing to garbage collect non-existing resources or devices. 411 if !ok { 412 klog.ErrorS(nil, "Unexpected: healthyDevices and endpoints are out of sync") 413 } 414 delete(m.endpoints, resourceName) 415 delete(m.healthyDevices, resourceName) 416 deletedResources.Insert(resourceName) 417 needsUpdateCheckpoint = true 418 } else { 419 capacity[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(devices.Len()), resource.DecimalSI) 420 allocatable[v1.ResourceName(resourceName)] = *resource.NewQuantity(int64(devices.Len()), resource.DecimalSI) 421 } 422 } 423 for resourceName, devices := range m.unhealthyDevices { 424 eI, ok := m.endpoints[resourceName] 425 if (ok && eI.e.stopGracePeriodExpired()) || !ok { 426 if !ok { 427 klog.ErrorS(nil, "Unexpected: unhealthyDevices and endpoints are out of sync") 428 } 429 delete(m.endpoints, resourceName) 430 delete(m.unhealthyDevices, resourceName) 431 deletedResources.Insert(resourceName) 432 needsUpdateCheckpoint = true 433 } else { 434 capacityCount := capacity[v1.ResourceName(resourceName)] 435 unhealthyCount := *resource.NewQuantity(int64(devices.Len()), resource.DecimalSI) 436 capacityCount.Add(unhealthyCount) 437 capacity[v1.ResourceName(resourceName)] = capacityCount 438 } 439 } 440 m.mutex.Unlock() 441 if needsUpdateCheckpoint { 442 if err := m.writeCheckpoint(); err != nil { 443 klog.ErrorS(err, "Error on writing checkpoint") 444 } 445 } 446 return capacity, allocatable, deletedResources.UnsortedList() 447 } 448 449 // Checkpoints device to container allocation information to disk. 450 func (m *ManagerImpl) writeCheckpoint() error { 451 m.mutex.Lock() 452 registeredDevs := make(map[string][]string) 453 for resource, devices := range m.healthyDevices { 454 registeredDevs[resource] = devices.UnsortedList() 455 } 456 data := checkpoint.New(m.podDevices.toCheckpointData(), 457 registeredDevs) 458 m.mutex.Unlock() 459 err := m.checkpointManager.CreateCheckpoint(kubeletDeviceManagerCheckpoint, data) 460 if err != nil { 461 err2 := fmt.Errorf("failed to write checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err) 462 klog.InfoS("Failed to write checkpoint file", "err", err) 463 return err2 464 } 465 return nil 466 } 467 468 // Reads device to container allocation information from disk, and populates 469 // m.allocatedDevices accordingly. 470 func (m *ManagerImpl) readCheckpoint() error { 471 // the vast majority of time we restore a compatible checkpoint, so we try 472 // the current version first. Trying to restore older format checkpoints is 473 // relevant only in the kubelet upgrade flow, which happens once in a 474 // (long) while. 475 cp, err := m.getCheckpointV2() 476 if err != nil { 477 if err == errors.ErrCheckpointNotFound { 478 // no point in trying anything else 479 klog.InfoS("Failed to read data from checkpoint", "checkpoint", kubeletDeviceManagerCheckpoint, "err", err) 480 return nil 481 } 482 483 var errv1 error 484 // one last try: maybe it's a old format checkpoint? 485 cp, errv1 = m.getCheckpointV1() 486 if errv1 != nil { 487 klog.InfoS("Failed to read checkpoint V1 file", "err", errv1) 488 // intentionally return the parent error. We expect to restore V1 checkpoints 489 // a tiny fraction of time, so what matters most is the current checkpoint read error. 490 return err 491 } 492 klog.InfoS("Read data from a V1 checkpoint", "checkpoint", kubeletDeviceManagerCheckpoint) 493 } 494 495 m.mutex.Lock() 496 defer m.mutex.Unlock() 497 podDevices, registeredDevs := cp.GetDataInLatestFormat() 498 m.podDevices.fromCheckpointData(podDevices) 499 m.allocatedDevices = m.podDevices.devices() 500 for resource := range registeredDevs { 501 // During start up, creates empty healthyDevices list so that the resource capacity 502 // will stay zero till the corresponding device plugin re-registers. 503 m.healthyDevices[resource] = sets.New[string]() 504 m.unhealthyDevices[resource] = sets.New[string]() 505 m.endpoints[resource] = endpointInfo{e: newStoppedEndpointImpl(resource), opts: nil} 506 } 507 return nil 508 } 509 510 func (m *ManagerImpl) getCheckpointV2() (checkpoint.DeviceManagerCheckpoint, error) { 511 registeredDevs := make(map[string][]string) 512 devEntries := make([]checkpoint.PodDevicesEntry, 0) 513 cp := checkpoint.New(devEntries, registeredDevs) 514 err := m.checkpointManager.GetCheckpoint(kubeletDeviceManagerCheckpoint, cp) 515 return cp, err 516 } 517 518 func (m *ManagerImpl) getCheckpointV1() (checkpoint.DeviceManagerCheckpoint, error) { 519 registeredDevs := make(map[string][]string) 520 devEntries := make([]checkpoint.PodDevicesEntryV1, 0) 521 cp := checkpoint.NewV1(devEntries, registeredDevs) 522 err := m.checkpointManager.GetCheckpoint(kubeletDeviceManagerCheckpoint, cp) 523 return cp, err 524 } 525 526 // UpdateAllocatedDevices frees any Devices that are bound to terminated pods. 527 func (m *ManagerImpl) UpdateAllocatedDevices() { 528 if !m.sourcesReady.AllReady() { 529 return 530 } 531 532 m.mutex.Lock() 533 defer m.mutex.Unlock() 534 535 activeAndAdmittedPods := m.activePods() 536 if m.pendingAdmissionPod != nil { 537 activeAndAdmittedPods = append(activeAndAdmittedPods, m.pendingAdmissionPod) 538 } 539 540 podsToBeRemoved := m.podDevices.pods() 541 for _, pod := range activeAndAdmittedPods { 542 podsToBeRemoved.Delete(string(pod.UID)) 543 } 544 if len(podsToBeRemoved) <= 0 { 545 return 546 } 547 klog.V(3).InfoS("Pods to be removed", "podUIDs", sets.List(podsToBeRemoved)) 548 m.podDevices.delete(sets.List(podsToBeRemoved)) 549 // Regenerated allocatedDevices after we update pod allocation information. 550 m.allocatedDevices = m.podDevices.devices() 551 } 552 553 // Returns list of device Ids we need to allocate with Allocate rpc call. 554 // Returns empty list in case we don't need to issue the Allocate rpc call. 555 func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, required int, reusableDevices sets.Set[string]) (sets.Set[string], error) { 556 m.mutex.Lock() 557 defer m.mutex.Unlock() 558 needed := required 559 // Gets list of devices that have already been allocated. 560 // This can happen if a container restarts for example. 561 devices := m.podDevices.containerDevices(podUID, contName, resource) 562 if devices != nil { 563 klog.V(3).InfoS("Found pre-allocated devices for resource on pod", "resourceName", resource, "containerName", contName, "podUID", string(podUID), "devices", sets.List(devices)) 564 needed = needed - devices.Len() 565 // A pod's resource is not expected to change once admitted by the API server, 566 // so just fail loudly here. We can revisit this part if this no longer holds. 567 if needed != 0 { 568 return nil, fmt.Errorf("pod %q container %q changed request for resource %q from %d to %d", string(podUID), contName, resource, devices.Len(), required) 569 } 570 } 571 572 // We have 3 major flows to handle: 573 // 1. kubelet running, normal allocation (needed > 0, container being [re]created). Steady state and most common case by far and large. 574 // 2. kubelet restart. In this scenario every other component of the stack (device plugins, app container, runtime) is still running. 575 // 3. node reboot. In this scenario device plugins may not be running yet when we try to allocate devices. 576 // note: if we get this far the runtime is surely running. This is usually enforced at OS level by startup system services dependencies. 577 578 // First we take care of the exceptional flow (scenarios 2 and 3). In both flows, kubelet is reinitializing, and while kubelet is initializing, sources are NOT all ready. 579 // Is this a simple kubelet restart (scenario 2)? To distinguish, we use the informations we got for runtime. If we are asked to allocate devices for containers reported 580 // running, then it can only be a kubelet restart. On node reboot the runtime and the containers were also shut down. Then, if the container was running, it can only be 581 // because it already has access to all the required devices, so we got nothing to do and we can bail out. 582 if !m.sourcesReady.AllReady() && m.isContainerAlreadyRunning(podUID, contName) { 583 klog.V(3).InfoS("container detected running, nothing to do", "deviceNumber", needed, "resourceName", resource, "podUID", string(podUID), "containerName", contName) 584 return nil, nil 585 } 586 587 // We dealt with scenario 2. If we got this far it's either scenario 3 (node reboot) or scenario 1 (steady state, normal flow). 588 klog.V(3).InfoS("Need devices to allocate for pod", "deviceNumber", needed, "resourceName", resource, "podUID", string(podUID), "containerName", contName) 589 healthyDevices, hasRegistered := m.healthyDevices[resource] 590 591 // The following checks are expected to fail only happen on scenario 3 (node reboot). 592 // The kubelet is reinitializing and got a container from sources. But there's no ordering, so an app container may attempt allocation _before_ the device plugin was created, 593 // has registered and reported back to kubelet the devices. 594 // This can only happen on scenario 3 because at steady state (scenario 1) the scheduler prevents pod to be sent towards node which don't report enough devices. 595 // Note: we need to check the device health and registration status *before* we check how many devices are needed, doing otherwise caused issue #109595 596 // Note: if the scheduler is bypassed, we fall back in scenario 1, so we still need these checks. 597 if !hasRegistered { 598 return nil, fmt.Errorf("cannot allocate unregistered device %s", resource) 599 } 600 601 // Check if registered resource has healthy devices 602 if healthyDevices.Len() == 0 { 603 return nil, fmt.Errorf("no healthy devices present; cannot allocate unhealthy devices %s", resource) 604 } 605 606 // Check if all the previously allocated devices are healthy 607 if !healthyDevices.IsSuperset(devices) { 608 return nil, fmt.Errorf("previously allocated devices are no longer healthy; cannot allocate unhealthy devices %s", resource) 609 } 610 611 // We handled the known error paths in scenario 3 (node reboot), so from now on we can fall back in a common path. 612 // We cover container restart on kubelet steady state with the same flow. 613 if needed == 0 { 614 klog.V(3).InfoS("no devices needed, nothing to do", "deviceNumber", needed, "resourceName", resource, "podUID", string(podUID), "containerName", contName) 615 // No change, no work. 616 return nil, nil 617 } 618 619 // Declare the list of allocated devices. 620 // This will be populated and returned below. 621 allocated := sets.New[string]() 622 623 // Create a closure to help with device allocation 624 // Returns 'true' once no more devices need to be allocated. 625 allocateRemainingFrom := func(devices sets.Set[string]) bool { 626 for device := range devices.Difference(allocated) { 627 m.allocatedDevices[resource].Insert(device) 628 allocated.Insert(device) 629 needed-- 630 if needed == 0 { 631 return true 632 } 633 } 634 return false 635 } 636 637 // Needs to allocate additional devices. 638 if m.allocatedDevices[resource] == nil { 639 m.allocatedDevices[resource] = sets.New[string]() 640 } 641 642 // Allocates from reusableDevices list first. 643 if allocateRemainingFrom(reusableDevices) { 644 return allocated, nil 645 } 646 647 // Gets Devices in use. 648 devicesInUse := m.allocatedDevices[resource] 649 // Gets Available devices. 650 available := m.healthyDevices[resource].Difference(devicesInUse) 651 if available.Len() < needed { 652 return nil, fmt.Errorf("requested number of devices unavailable for %s. Requested: %d, Available: %d", resource, needed, available.Len()) 653 } 654 655 // Filters available Devices based on NUMA affinity. 656 aligned, unaligned, noAffinity := m.filterByAffinity(podUID, contName, resource, available) 657 658 // If we can allocate all remaining devices from the set of aligned ones, then 659 // give the plugin the chance to influence which ones to allocate from that set. 660 if needed < aligned.Len() { 661 // First allocate from the preferred devices list (if available). 662 preferred, err := m.callGetPreferredAllocationIfAvailable(podUID, contName, resource, aligned.Union(allocated), allocated, required) 663 if err != nil { 664 return nil, err 665 } 666 if allocateRemainingFrom(preferred.Intersection(aligned)) { 667 return allocated, nil 668 } 669 // Then fallback to allocate from the aligned set if no preferred list 670 // is returned (or not enough devices are returned in that list). 671 if allocateRemainingFrom(aligned) { 672 return allocated, nil 673 } 674 675 return nil, fmt.Errorf("unexpectedly allocated less resources than required. Requested: %d, Got: %d", required, required-needed) 676 } 677 678 // If we can't allocate all remaining devices from the set of aligned ones, 679 // then start by first allocating all of the aligned devices (to ensure 680 // that the alignment guaranteed by the TopologyManager is honored). 681 if allocateRemainingFrom(aligned) { 682 return allocated, nil 683 } 684 685 // Then give the plugin the chance to influence the decision on any 686 // remaining devices to allocate. 687 preferred, err := m.callGetPreferredAllocationIfAvailable(podUID, contName, resource, available.Union(allocated), allocated, required) 688 if err != nil { 689 return nil, err 690 } 691 if allocateRemainingFrom(preferred.Intersection(available)) { 692 return allocated, nil 693 } 694 695 // Finally, if the plugin did not return a preferred allocation (or didn't 696 // return a large enough one), then fall back to allocating the remaining 697 // devices from the 'unaligned' and 'noAffinity' sets. 698 if allocateRemainingFrom(unaligned) { 699 return allocated, nil 700 } 701 if allocateRemainingFrom(noAffinity) { 702 return allocated, nil 703 } 704 705 return nil, fmt.Errorf("unexpectedly allocated less resources than required. Requested: %d, Got: %d", required, required-needed) 706 } 707 708 func (m *ManagerImpl) filterByAffinity(podUID, contName, resource string, available sets.Set[string]) (sets.Set[string], sets.Set[string], sets.Set[string]) { 709 // If alignment information is not available, just pass the available list back. 710 hint := m.topologyAffinityStore.GetAffinity(podUID, contName) 711 if !m.deviceHasTopologyAlignment(resource) || hint.NUMANodeAffinity == nil { 712 return sets.New[string](), sets.New[string](), available 713 } 714 715 // Build a map of NUMA Nodes to the devices associated with them. A 716 // device may be associated to multiple NUMA nodes at the same time. If an 717 // available device does not have any NUMA Nodes associated with it, add it 718 // to a list of NUMA Nodes for the fake NUMANode -1. 719 perNodeDevices := make(map[int]sets.Set[string]) 720 for d := range available { 721 if m.allDevices[resource][d].Topology == nil || len(m.allDevices[resource][d].Topology.Nodes) == 0 { 722 if _, ok := perNodeDevices[nodeWithoutTopology]; !ok { 723 perNodeDevices[nodeWithoutTopology] = sets.New[string]() 724 } 725 perNodeDevices[nodeWithoutTopology].Insert(d) 726 continue 727 } 728 729 for _, node := range m.allDevices[resource][d].Topology.Nodes { 730 if _, ok := perNodeDevices[int(node.ID)]; !ok { 731 perNodeDevices[int(node.ID)] = sets.New[string]() 732 } 733 perNodeDevices[int(node.ID)].Insert(d) 734 } 735 } 736 737 // Get a flat list of all of the nodes associated with available devices. 738 var nodes []int 739 for node := range perNodeDevices { 740 nodes = append(nodes, node) 741 } 742 743 // Sort the list of nodes by: 744 // 1) Nodes contained in the 'hint's affinity set 745 // 2) Nodes not contained in the 'hint's affinity set 746 // 3) The fake NUMANode of -1 (assuming it is included in the list) 747 // Within each of the groups above, sort the nodes by how many devices they contain 748 sort.Slice(nodes, func(i, j int) bool { 749 // If one or the other of nodes[i] or nodes[j] is in the 'hint's affinity set 750 if hint.NUMANodeAffinity.IsSet(nodes[i]) && hint.NUMANodeAffinity.IsSet(nodes[j]) { 751 return perNodeDevices[nodes[i]].Len() < perNodeDevices[nodes[j]].Len() 752 } 753 if hint.NUMANodeAffinity.IsSet(nodes[i]) { 754 return true 755 } 756 if hint.NUMANodeAffinity.IsSet(nodes[j]) { 757 return false 758 } 759 760 // If one or the other of nodes[i] or nodes[j] is the fake NUMA node -1 (they can't both be) 761 if nodes[i] == nodeWithoutTopology { 762 return false 763 } 764 if nodes[j] == nodeWithoutTopology { 765 return true 766 } 767 768 // Otherwise both nodes[i] and nodes[j] are real NUMA nodes that are not in the 'hint's' affinity list. 769 return perNodeDevices[nodes[i]].Len() < perNodeDevices[nodes[j]].Len() 770 }) 771 772 // Generate three sorted lists of devices. Devices in the first list come 773 // from valid NUMA Nodes contained in the affinity mask. Devices in the 774 // second list come from valid NUMA Nodes not in the affinity mask. Devices 775 // in the third list come from devices with no NUMA Node association (i.e. 776 // those mapped to the fake NUMA Node -1). Because we loop through the 777 // sorted list of NUMA nodes in order, within each list, devices are sorted 778 // by their connection to NUMA Nodes with more devices on them. 779 var fromAffinity []string 780 var notFromAffinity []string 781 var withoutTopology []string 782 for d := range available { 783 // Since the same device may be associated with multiple NUMA Nodes. We 784 // need to be careful not to add each device to multiple lists. The 785 // logic below ensures this by breaking after the first NUMA node that 786 // has the device is encountered. 787 for _, n := range nodes { 788 if perNodeDevices[n].Has(d) { 789 if n == nodeWithoutTopology { 790 withoutTopology = append(withoutTopology, d) 791 } else if hint.NUMANodeAffinity.IsSet(n) { 792 fromAffinity = append(fromAffinity, d) 793 } else { 794 notFromAffinity = append(notFromAffinity, d) 795 } 796 break 797 } 798 } 799 } 800 801 // Return all three lists containing the full set of devices across them. 802 return sets.New[string](fromAffinity...), sets.New[string](notFromAffinity...), sets.New[string](withoutTopology...) 803 } 804 805 // allocateContainerResources attempts to allocate all of required device 806 // plugin resources for the input container, issues an Allocate rpc request 807 // for each new device resource requirement, processes their AllocateResponses, 808 // and updates the cached containerDevices on success. 809 func (m *ManagerImpl) allocateContainerResources(pod *v1.Pod, container *v1.Container, devicesToReuse map[string]sets.Set[string]) error { 810 podUID := string(pod.UID) 811 contName := container.Name 812 allocatedDevicesUpdated := false 813 needsUpdateCheckpoint := false 814 // Extended resources are not allowed to be overcommitted. 815 // Since device plugin advertises extended resources, 816 // therefore Requests must be equal to Limits and iterating 817 // over the Limits should be sufficient. 818 for k, v := range container.Resources.Limits { 819 resource := string(k) 820 needed := int(v.Value()) 821 klog.V(3).InfoS("Looking for needed resources", "needed", needed, "resourceName", resource) 822 if !m.isDevicePluginResource(resource) { 823 continue 824 } 825 // Updates allocatedDevices to garbage collect any stranded resources 826 // before doing the device plugin allocation. 827 if !allocatedDevicesUpdated { 828 m.UpdateAllocatedDevices() 829 allocatedDevicesUpdated = true 830 } 831 allocDevices, err := m.devicesToAllocate(podUID, contName, resource, needed, devicesToReuse[resource]) 832 if err != nil { 833 return err 834 } 835 if allocDevices == nil || len(allocDevices) <= 0 { 836 continue 837 } 838 839 needsUpdateCheckpoint = true 840 841 startRPCTime := time.Now() 842 // Manager.Allocate involves RPC calls to device plugin, which 843 // could be heavy-weight. Therefore we want to perform this operation outside 844 // mutex lock. Note if Allocate call fails, we may leave container resources 845 // partially allocated for the failed container. We rely on UpdateAllocatedDevices() 846 // to garbage collect these resources later. Another side effect is that if 847 // we have X resource A and Y resource B in total, and two containers, container1 848 // and container2 both require X resource A and Y resource B. Both allocation 849 // requests may fail if we serve them in mixed order. 850 // TODO: may revisit this part later if we see inefficient resource allocation 851 // in real use as the result of this. Should also consider to parallelize device 852 // plugin Allocate grpc calls if it becomes common that a container may require 853 // resources from multiple device plugins. 854 m.mutex.Lock() 855 eI, ok := m.endpoints[resource] 856 m.mutex.Unlock() 857 if !ok { 858 m.mutex.Lock() 859 m.allocatedDevices = m.podDevices.devices() 860 m.mutex.Unlock() 861 return fmt.Errorf("unknown Device Plugin %s", resource) 862 } 863 864 devs := allocDevices.UnsortedList() 865 // TODO: refactor this part of code to just append a ContainerAllocationRequest 866 // in a passed in AllocateRequest pointer, and issues a single Allocate call per pod. 867 klog.V(3).InfoS("Making allocation request for device plugin", "devices", devs, "resourceName", resource) 868 resp, err := eI.e.allocate(devs) 869 metrics.DevicePluginAllocationDuration.WithLabelValues(resource).Observe(metrics.SinceInSeconds(startRPCTime)) 870 if err != nil { 871 // In case of allocation failure, we want to restore m.allocatedDevices 872 // to the actual allocated state from m.podDevices. 873 m.mutex.Lock() 874 m.allocatedDevices = m.podDevices.devices() 875 m.mutex.Unlock() 876 return err 877 } 878 879 if len(resp.ContainerResponses) == 0 { 880 return fmt.Errorf("no containers return in allocation response %v", resp) 881 } 882 883 allocDevicesWithNUMA := checkpoint.NewDevicesPerNUMA() 884 // Update internal cached podDevices state. 885 m.mutex.Lock() 886 for dev := range allocDevices { 887 if m.allDevices[resource][dev].Topology == nil || len(m.allDevices[resource][dev].Topology.Nodes) == 0 { 888 allocDevicesWithNUMA[nodeWithoutTopology] = append(allocDevicesWithNUMA[nodeWithoutTopology], dev) 889 continue 890 } 891 for idx := range m.allDevices[resource][dev].Topology.Nodes { 892 node := m.allDevices[resource][dev].Topology.Nodes[idx] 893 allocDevicesWithNUMA[node.ID] = append(allocDevicesWithNUMA[node.ID], dev) 894 } 895 } 896 m.mutex.Unlock() 897 m.podDevices.insert(podUID, contName, resource, allocDevicesWithNUMA, resp.ContainerResponses[0]) 898 } 899 900 if needsUpdateCheckpoint { 901 return m.writeCheckpoint() 902 } 903 904 return nil 905 } 906 907 // checkPodActive checks if the given pod is still in activePods list 908 func (m *ManagerImpl) checkPodActive(pod *v1.Pod) bool { 909 activePods := m.activePods() 910 for _, activePod := range activePods { 911 if activePod.UID == pod.UID { 912 return true 913 } 914 } 915 916 return false 917 } 918 919 // GetDeviceRunContainerOptions checks whether we have cached containerDevices 920 // for the passed-in <pod, container> and returns its DeviceRunContainerOptions 921 // for the found one. An empty struct is returned in case no cached state is found. 922 func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error) { 923 podUID := string(pod.UID) 924 contName := container.Name 925 needsReAllocate := false 926 for k, v := range container.Resources.Limits { 927 resource := string(k) 928 if !m.isDevicePluginResource(resource) || v.Value() == 0 { 929 continue 930 } 931 err := m.callPreStartContainerIfNeeded(podUID, contName, resource) 932 if err != nil { 933 return nil, err 934 } 935 936 if !m.checkPodActive(pod) { 937 klog.ErrorS(nil, "pod deleted from activePods, skip to reAllocate", "podUID", podUID) 938 continue 939 } 940 941 // This is a device plugin resource yet we don't have cached 942 // resource state. This is likely due to a race during node 943 // restart. We re-issue allocate request to cover this race. 944 if m.podDevices.containerDevices(podUID, contName, resource) == nil { 945 needsReAllocate = true 946 } 947 } 948 if needsReAllocate { 949 klog.V(2).InfoS("Needs to re-allocate device plugin resources for pod", "pod", klog.KObj(pod), "containerName", container.Name) 950 if err := m.Allocate(pod, container); err != nil { 951 return nil, err 952 } 953 } 954 return m.podDevices.deviceRunContainerOptions(string(pod.UID), container.Name), nil 955 } 956 957 // callPreStartContainerIfNeeded issues PreStartContainer grpc call for device plugin resource 958 // with PreStartRequired option set. 959 func (m *ManagerImpl) callPreStartContainerIfNeeded(podUID, contName, resource string) error { 960 m.mutex.Lock() 961 eI, ok := m.endpoints[resource] 962 if !ok { 963 m.mutex.Unlock() 964 return fmt.Errorf("endpoint not found in cache for a registered resource: %s", resource) 965 } 966 967 if eI.opts == nil || !eI.opts.PreStartRequired { 968 m.mutex.Unlock() 969 klog.V(4).InfoS("Plugin options indicate to skip PreStartContainer for resource", "resourceName", resource) 970 return nil 971 } 972 973 devices := m.podDevices.containerDevices(podUID, contName, resource) 974 if devices == nil { 975 m.mutex.Unlock() 976 return fmt.Errorf("no devices found allocated in local cache for pod %s, container %s, resource %s", string(podUID), contName, resource) 977 } 978 979 m.mutex.Unlock() 980 devs := devices.UnsortedList() 981 klog.V(4).InfoS("Issuing a PreStartContainer call for container", "containerName", contName, "podUID", string(podUID)) 982 _, err := eI.e.preStartContainer(devs) 983 if err != nil { 984 return fmt.Errorf("device plugin PreStartContainer rpc failed with err: %v", err) 985 } 986 // TODO: Add metrics support for init RPC 987 return nil 988 } 989 990 // callGetPreferredAllocationIfAvailable issues GetPreferredAllocation grpc 991 // call for device plugin resource with GetPreferredAllocationAvailable option set. 992 func (m *ManagerImpl) callGetPreferredAllocationIfAvailable(podUID, contName, resource string, available, mustInclude sets.Set[string], size int) (sets.Set[string], error) { 993 eI, ok := m.endpoints[resource] 994 if !ok { 995 return nil, fmt.Errorf("endpoint not found in cache for a registered resource: %s", resource) 996 } 997 998 if eI.opts == nil || !eI.opts.GetPreferredAllocationAvailable { 999 klog.V(4).InfoS("Plugin options indicate to skip GetPreferredAllocation for resource", "resourceName", resource) 1000 return nil, nil 1001 } 1002 1003 m.mutex.Unlock() 1004 klog.V(4).InfoS("Issuing a GetPreferredAllocation call for container", "containerName", contName, "podUID", string(podUID)) 1005 resp, err := eI.e.getPreferredAllocation(available.UnsortedList(), mustInclude.UnsortedList(), size) 1006 m.mutex.Lock() 1007 if err != nil { 1008 return nil, fmt.Errorf("device plugin GetPreferredAllocation rpc failed with err: %v", err) 1009 } 1010 if resp != nil && len(resp.ContainerResponses) > 0 { 1011 return sets.New[string](resp.ContainerResponses[0].DeviceIDs...), nil 1012 } 1013 return sets.New[string](), nil 1014 } 1015 1016 // sanitizeNodeAllocatable scans through allocatedDevices in the device manager 1017 // and if necessary, updates allocatableResource in nodeInfo to at least equal to 1018 // the allocated capacity. This allows pods that have already been scheduled on 1019 // the node to pass GeneralPredicates admission checking even upon device plugin failure. 1020 func (m *ManagerImpl) sanitizeNodeAllocatable(node *schedulerframework.NodeInfo) { 1021 var newAllocatableResource *schedulerframework.Resource 1022 allocatableResource := node.Allocatable 1023 if allocatableResource.ScalarResources == nil { 1024 allocatableResource.ScalarResources = make(map[v1.ResourceName]int64) 1025 } 1026 1027 m.mutex.Lock() 1028 defer m.mutex.Unlock() 1029 for resource, devices := range m.allocatedDevices { 1030 needed := devices.Len() 1031 quant, ok := allocatableResource.ScalarResources[v1.ResourceName(resource)] 1032 if ok && int(quant) >= needed { 1033 continue 1034 } 1035 // Needs to update nodeInfo.AllocatableResource to make sure 1036 // NodeInfo.allocatableResource at least equal to the capacity already allocated. 1037 if newAllocatableResource == nil { 1038 newAllocatableResource = allocatableResource.Clone() 1039 } 1040 newAllocatableResource.ScalarResources[v1.ResourceName(resource)] = int64(needed) 1041 } 1042 if newAllocatableResource != nil { 1043 node.Allocatable = newAllocatableResource 1044 } 1045 } 1046 1047 func (m *ManagerImpl) isDevicePluginResource(resource string) bool { 1048 m.mutex.Lock() 1049 defer m.mutex.Unlock() 1050 _, registeredResource := m.healthyDevices[resource] 1051 _, allocatedResource := m.allocatedDevices[resource] 1052 // Return true if this is either an active device plugin resource or 1053 // a resource we have previously allocated. 1054 if registeredResource || allocatedResource { 1055 return true 1056 } 1057 return false 1058 } 1059 1060 // GetAllocatableDevices returns information about all the healthy devices known to the manager 1061 func (m *ManagerImpl) GetAllocatableDevices() ResourceDeviceInstances { 1062 m.mutex.Lock() 1063 defer m.mutex.Unlock() 1064 resp := m.allDevices.Filter(m.healthyDevices) 1065 klog.V(4).InfoS("GetAllocatableDevices", "known", len(m.allDevices), "allocatable", len(resp)) 1066 return resp 1067 } 1068 1069 // GetDevices returns the devices used by the specified container 1070 func (m *ManagerImpl) GetDevices(podUID, containerName string) ResourceDeviceInstances { 1071 return m.podDevices.getContainerDevices(podUID, containerName) 1072 } 1073 1074 // ShouldResetExtendedResourceCapacity returns whether the extended resources should be zeroed or not, 1075 // depending on whether the node has been recreated. Absence of the checkpoint file strongly indicates the node 1076 // has been recreated. 1077 func (m *ManagerImpl) ShouldResetExtendedResourceCapacity() bool { 1078 checkpoints, err := m.checkpointManager.ListCheckpoints() 1079 if err != nil { 1080 return false 1081 } 1082 return len(checkpoints) == 0 1083 } 1084 1085 func (m *ManagerImpl) setPodPendingAdmission(pod *v1.Pod) { 1086 m.mutex.Lock() 1087 defer m.mutex.Unlock() 1088 1089 m.pendingAdmissionPod = pod 1090 } 1091 1092 func (m *ManagerImpl) isContainerAlreadyRunning(podUID, cntName string) bool { 1093 cntID, err := m.containerMap.GetContainerID(podUID, cntName) 1094 if err != nil { 1095 klog.V(4).InfoS("container not found in the initial map, assumed NOT running", "podUID", podUID, "containerName", cntName, "err", err) 1096 return false 1097 } 1098 1099 // note that if container runtime is down when kubelet restarts, this set will be empty, 1100 // so on kubelet restart containers will again fail admission, hitting https://github.com/kubernetes/kubernetes/issues/118559 again. 1101 // This scenario should however be rare enough. 1102 if !m.containerRunningSet.Has(cntID) { 1103 klog.V(4).InfoS("container not present in the initial running set", "podUID", podUID, "containerName", cntName, "containerID", cntID) 1104 return false 1105 } 1106 1107 // Once we make it here we know we have a running container. 1108 klog.V(4).InfoS("container found in the initial set, assumed running", "podUID", podUID, "containerName", cntName, "containerID", cntID) 1109 return true 1110 }