github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/orm/manager.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package orm 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "net" 24 "os" 25 "path/filepath" 26 "sync" 27 "time" 28 29 "github.com/opencontainers/selinux/go-selinux" 30 "k8s.io/klog/v2" 31 32 "google.golang.org/grpc" 33 v1 "k8s.io/api/core/v1" 34 "k8s.io/apimachinery/pkg/util/wait" 35 "k8s.io/kubelet/pkg/apis/pluginregistration/v1" 36 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 37 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" 38 maputil "k8s.io/kubernetes/pkg/util/maps" 39 40 "github.com/kubewharf/katalyst-core/pkg/agent/orm/deviceprovider/kubelet" 41 "github.com/kubewharf/katalyst-core/pkg/agent/orm/endpoint" 42 "github.com/kubewharf/katalyst-core/pkg/agent/orm/executor" 43 "github.com/kubewharf/katalyst-core/pkg/agent/orm/metamanager" 44 "github.com/kubewharf/katalyst-core/pkg/agent/orm/server" 45 "github.com/kubewharf/katalyst-core/pkg/agent/orm/server/podresources" 46 "github.com/kubewharf/katalyst-core/pkg/agent/orm/topology" 47 "github.com/kubewharf/katalyst-core/pkg/config" 48 "github.com/kubewharf/katalyst-core/pkg/config/generic" 49 "github.com/kubewharf/katalyst-core/pkg/metaserver" 50 "github.com/kubewharf/katalyst-core/pkg/metrics" 51 "github.com/kubewharf/katalyst-core/pkg/util/bitmask" 52 cgroupmgr "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager" 53 podresourcesutil "github.com/kubewharf/katalyst-core/pkg/util/kubelet/podresources" 54 "github.com/kubewharf/katalyst-core/pkg/util/native" 55 ) 56 57 type ManagerImpl struct { 58 ctx context.Context 59 60 socketname string 61 socketdir string 62 63 // resource to QRMPlugins and executors 64 mutex sync.RWMutex 65 endpoints map[string]endpoint.EndpointInfo 66 resourceExecutor executor.Executor 67 68 metaManager *metamanager.Manager 69 70 topologyManager topology.Manager 71 72 server *grpc.Server 73 wg sync.WaitGroup 74 75 podAddChan chan string 76 podDeleteChan chan string 77 78 podResources *podResourcesChk 79 checkpointManager checkpointmanager.CheckpointManager 80 81 emitter metrics.MetricEmitter 82 qosConfig *generic.QoSConfiguration 83 84 reconcilePeriod time.Duration 85 resourceNamesMap map[string]string 86 podResourceSocket string 87 88 devicesProvider podresources.DevicesProvider 89 } 90 91 func NewManager(socketPath string, emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, config *config.Configuration) (*ManagerImpl, error) { 92 klog.V(2).Infof("new ORM..., socketPath: %v, resourceNameMap: %v, reconcilePeriod: %v", socketPath, config.ORMResourceNamesMap, config.ORMRconcilePeriod) 93 94 if socketPath == "" || !filepath.IsAbs(socketPath) { 95 return nil, fmt.Errorf(errBadSocket+" %s", socketPath) 96 } 97 dir, file := filepath.Split(socketPath) 98 99 checkpointManager, err := checkpointmanager.NewCheckpointManager(dir) 100 if err != nil { 101 return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err) 102 } 103 104 m := &ManagerImpl{ 105 socketdir: dir, 106 socketname: file, 107 108 endpoints: make(map[string]endpoint.EndpointInfo), 109 podResources: newPodResourcesChk(), 110 checkpointManager: checkpointManager, 111 112 resourceNamesMap: config.ORMResourceNamesMap, 113 reconcilePeriod: config.ORMRconcilePeriod, 114 115 podAddChan: make(chan string, config.ORMPodNotifyChanLen), 116 podDeleteChan: make(chan string, config.ORMPodNotifyChanLen), 117 emitter: emitter, 118 qosConfig: config.QoSConfiguration, 119 podResourceSocket: config.ORMPodResourcesSocket, 120 } 121 122 m.resourceExecutor = executor.NewExecutor(cgroupmgr.GetManager()) 123 124 metaManager := metamanager.NewManager(emitter, m.podResources.pods, metaServer) 125 m.metaManager = metaManager 126 127 topologyManager, err := topology.NewManager(metaServer.Topology, config.TopologyPolicyName, config.NumericAlignResources) 128 if err != nil { 129 klog.Error(err) 130 return nil, err 131 } 132 topologyManager.AddHintProvider(m) 133 m.topologyManager = topologyManager 134 135 m.initDeviceProvider(config) 136 137 if err := m.removeContents(m.socketdir); err != nil { 138 err = fmt.Errorf("[ORM] Fail to clean up stale contents under %s: %v", m.socketdir, err) 139 klog.Error(err) 140 return nil, err 141 } 142 klog.V(5).Infof("removeContents......") 143 144 return m, nil 145 } 146 147 func (m *ManagerImpl) Run(ctx context.Context) { 148 klog.V(2).Infof("[ORM] running...") 149 m.ctx = ctx 150 151 // read data from checkpoint 152 err := m.readCheckpoint() 153 if err != nil { 154 klog.Fatalf("[ORM] read checkpoint fail: %v", err) 155 } 156 157 if err = os.MkdirAll(m.socketdir, 0o750); err != nil { 158 klog.Fatalf("[ORM] Mkdir socketdir %v fail: %v", m.socketdir, err) 159 } 160 if selinux.GetEnabled() { 161 if err := selinux.SetFileLabel(m.socketdir, KubeletPluginsDirSELinuxLabel); err != nil { 162 klog.Warningf("[ORM] Unprivileged containerized plugins might not work. Could not set selinux context on %s: %v", m.socketdir, err) 163 } 164 } 165 166 socketPath := filepath.Join(m.socketdir, m.socketname) 167 s, err := net.Listen("unix", socketPath) 168 if err != nil { 169 klog.Fatalf(errListenSocket+" %v", err) 170 } 171 172 m.wg.Add(1) 173 m.server = grpc.NewServer([]grpc.ServerOption{}...) 174 175 pluginapi.RegisterRegistrationServer(m.server, m) 176 177 klog.V(2).Infof("[ORM] Serving resource plugin registration server on %q", socketPath) 178 go func() { 179 defer func() { 180 m.wg.Done() 181 182 if err := recover(); err != nil { 183 klog.Fatalf("[ORM] Start recover from err: %v", err) 184 } 185 s.Close() 186 }() 187 m.server.Serve(s) 188 }() 189 190 klog.V(5).Infof("[ORM] start serve socketPath %v", socketPath) 191 go func() { 192 m.process() 193 }() 194 195 go wait.Until(m.reconcile, m.reconcilePeriod, m.ctx.Done()) 196 197 m.metaManager.RegistPodAddedFunc(m.onPodAdd) 198 m.metaManager.RegistPodDeletedFunc(m.onPodDelete) 199 200 m.metaManager.Run(ctx, m.reconcilePeriod) 201 202 go server.ListenAndServePodResources(m.podResourceSocket, m.metaManager, m, m.devicesProvider, m.emitter) 203 } 204 205 func (m *ManagerImpl) GetHandlerType() string { 206 return pluginregistration.ResourcePlugin 207 } 208 209 func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topology.TopologyHint { 210 if pod == nil || container == nil { 211 klog.Errorf("[ORM] GetTopologyHints got nil pod: %v or container: %v", pod, container) 212 return nil 213 } 214 215 podUID := string(pod.UID) 216 contName := container.Name 217 containerType, containerIndex, err := GetContainerTypeAndIndex(pod, container) 218 if err != nil { 219 return nil 220 } 221 222 resourceHints := make(map[string][]topology.TopologyHint) 223 for resourceObj, requestedObj := range container.Resources.Requests { 224 requested := int(requestedObj.Value()) 225 resource, err := m.getMappedResourceName(string(resourceObj), container.Resources.Requests) 226 if err != nil { 227 klog.Errorf("resource %s getMappedResourceName fail: %v", string(resourceObj), err) 228 return nil 229 } 230 231 if requestedObj.IsZero() { 232 continue 233 } 234 235 allocationInfo := m.podResources.containerResource(podUID, contName, resource) 236 if allocationInfo != nil && allocationInfo.ResourceHints != nil && len(allocationInfo.ResourceHints.Hints) > 0 { 237 238 allocated := int(math.Ceil(allocationInfo.AllocatedQuantity)) 239 240 if allocationInfo.IsScalarResource && allocated >= requested { 241 resourceHints[resource] = ParseListOfTopologyHints(allocationInfo.ResourceHints) 242 klog.Warningf("[ORM] resource %s already allocated to (pod %s/%s, container %v) with larger number than request: requested: %d, allocated: %d; not to getTopologyHints", 243 resource, pod.GetNamespace(), pod.GetName(), container.Name, requested, allocated) 244 continue 245 } else { 246 klog.Warningf("[ORM] resource %s already allocated to (pod %s/%s, container %v) with smaller number than request: requested: %d, allocated: %d; continue to getTopologyHints", 247 resource, pod.GetNamespace(), pod.GetName(), container.Name, requested, int(math.Ceil(allocationInfo.AllocatedQuantity))) 248 } 249 } 250 251 m.mutex.Lock() 252 e, ok := m.endpoints[resource] 253 m.mutex.Unlock() 254 if !ok || e.Opts == nil || !e.Opts.WithTopologyAlignment { 255 klog.V(5).Infof("[ORM] GetTopologyHints resource %s not supported", resource) 256 continue 257 } 258 259 resourceReq := &pluginapi.ResourceRequest{ 260 PodUid: podUID, 261 PodNamespace: pod.GetNamespace(), 262 PodName: pod.GetName(), 263 ContainerName: container.Name, 264 ContainerType: containerType, 265 ContainerIndex: containerIndex, 266 PodRole: pod.Labels[pluginapi.PodRoleLabelKey], 267 PodType: pod.Annotations[pluginapi.PodTypeAnnotationKey], 268 Labels: maputil.CopySS(pod.Labels), 269 Annotations: maputil.CopySS(pod.Annotations), 270 // use mapped resource name in "ResourceName" to indicates which endpoint to request 271 ResourceName: resource, 272 // use original requested resource name in "ResourceRequests" in order to make plugin identity real requested resource name 273 ResourceRequests: map[string]float64{string(resourceObj): requestedObj.AsApproximateFloat64()}, 274 } 275 276 resp, err := e.E.GetTopologyHints(context.Background(), resourceReq) 277 if err != nil { 278 klog.Errorf("[ORM] call GetTopologyHints of %s resource plugin for pod: %s/%s, container: %s failed with error: %v", 279 resource, pod.GetNamespace(), pod.GetName(), contName, err) 280 281 resourceHints[resource] = []topology.TopologyHint{} 282 continue 283 } 284 285 resourceHints[resource] = ParseListOfTopologyHints(resp.ResourceHints[resource]) 286 287 klog.Infof("[ORM] GetTopologyHints for resource: %s, pod: %s/%s, container: %s, result: %+v", 288 resource, pod.Namespace, pod.Name, contName, resourceHints[resource]) 289 } 290 291 return resourceHints 292 } 293 294 func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topology.TopologyHint { 295 // [TODO]: implement pod scope get topologyHints for provider and resource plugins. 296 return nil 297 } 298 299 func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error { 300 if pod == nil || container == nil { 301 return fmt.Errorf("Allocate got nil pod: %v or container: %v", pod, container) 302 } 303 304 err := m.addContainer(pod, container) 305 if err != nil { 306 return err 307 } 308 309 err = m.syncContainer(pod, container) 310 return err 311 } 312 313 func (m *ManagerImpl) initDeviceProvider(config *config.Configuration) { 314 switch config.ORMDevicesProvider { 315 case kubeletDevicesProvider: 316 p, err := kubelet.NewProvider(config.ORMKubeletPodResourcesEndpoints, podresourcesutil.GetV1Client) 317 if err != nil { 318 klog.Fatalf("new kubelet devices provider fail: %v", err) 319 } 320 m.devicesProvider = p 321 case NoneDevicesProvider: 322 m.devicesProvider = &podresources.DevicesProviderStub{} 323 default: 324 klog.Fatalf("Unknown ORMDevicesProvider: %s", config.ORMDevicesProvider) 325 } 326 } 327 328 func (m *ManagerImpl) onPodAdd(podUID string) { 329 klog.V(5).Infof("[ORM] onPodAdd: %v", podUID) 330 331 timeout, cancel := context.WithTimeout(m.ctx, 1*time.Second) 332 defer cancel() 333 334 select { 335 case m.podAddChan <- podUID: 336 337 case <-timeout.Done(): 338 klog.Errorf("[ORM] add pod timeout: %v", podUID) 339 _ = m.emitter.StoreInt64(MetricAddPodTimeout, 1, metrics.MetricTypeNameRaw) 340 } 341 } 342 343 func (m *ManagerImpl) onPodDelete(podUID string) { 344 klog.V(5).Infof("[ORM] onPodDelete: %v", podUID) 345 346 timeout, cancel := context.WithTimeout(m.ctx, 1*time.Second) 347 defer cancel() 348 349 select { 350 case m.podDeleteChan <- podUID: 351 352 case <-timeout.Done(): 353 klog.Errorf("[ORM] delete pod timeout: %v", podUID) 354 _ = m.emitter.StoreInt64(MetricDeletePodTImeout, 1, metrics.MetricTypeNameRaw) 355 } 356 } 357 358 func (m *ManagerImpl) process() { 359 klog.Infof("[ORM] start process...") 360 361 for { 362 select { 363 case podUID := <-m.podAddChan: 364 err := m.processAddPod(podUID) 365 if err != nil { 366 klog.Errorf("[ORM] processAddPod fail, podUID: %v, err: %v", podUID, err) 367 } 368 369 case podUID := <-m.podDeleteChan: 370 err := m.processDeletePod(podUID) 371 if err != nil { 372 klog.Errorf("[ORM] processDeletePod fail, podUID: %v, err: %v", podUID, err) 373 } 374 375 case <-m.ctx.Done(): 376 klog.Infof("[ORM] ctx done, exit") 377 return 378 } 379 } 380 } 381 382 func (m *ManagerImpl) processAddPod(podUID string) error { 383 pod, err := m.metaManager.MetaServer.GetPod(m.ctx, podUID) 384 if err != nil { 385 klog.Errorf("[ORM] processAddPod getPod fail, podUID: %v, err: %v", podUID, err) 386 return err 387 } 388 389 return m.topologyManager.Admit(pod) 390 } 391 392 func (m *ManagerImpl) processDeletePod(podUID string) error { 393 allSuccess := true 394 395 m.mutex.Lock() 396 for resourceName, endpoint := range m.endpoints { 397 _, err := endpoint.E.RemovePod(m.ctx, &pluginapi.RemovePodRequest{ 398 PodUid: podUID, 399 }) 400 if err != nil { 401 allSuccess = false 402 klog.Errorf("[ORM] plugin %v remove pod %v fail: %v", resourceName, podUID, err) 403 } 404 } 405 m.mutex.Unlock() 406 407 if allSuccess { 408 m.podResources.deletePod(podUID) 409 m.topologyManager.RemovePod(podUID) 410 } 411 412 return m.writeCheckpoint() 413 } 414 415 func (m *ManagerImpl) addContainer(pod *v1.Pod, container *v1.Container) error { 416 klog.V(5).Infof("[ORM] addContainer, pod: %v, container: %v", pod.Name, container.Name) 417 418 systemCores, err := isPodKatalystQoSLevelSystemCores(m.qosConfig, pod) 419 if err != nil { 420 klog.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err) 421 return err 422 } 423 424 if native.CheckDaemonPod(pod) && !systemCores { 425 klog.Infof("[ORM] skip pod: %s/%s, container: %s resource allocation", 426 pod.Namespace, pod.Name, container.Name) 427 return nil 428 } 429 430 containerType, containerIndex, err := GetContainerTypeAndIndex(pod, container) 431 if err != nil { 432 return err 433 } 434 435 for k, v := range container.Resources.Requests { 436 needed := int(v.Value()) 437 resource, err := m.getMappedResourceName(string(k), container.Resources.Requests) 438 if err != nil { 439 klog.Errorf("resource %s getMappedResourceName fail: %v", string(k), err) 440 return err 441 } 442 443 allocationInfo := m.podResources.containerResource(string(pod.UID), container.Name, resource) 444 if allocationInfo != nil { 445 allocated := int(math.Ceil(allocationInfo.AllocatedQuantity)) 446 447 if allocationInfo.IsScalarResource && allocated >= needed { 448 klog.Infof("[ORM] resource %s already allocated to (pod %s/%s, container %v) with larger number than request: requested: %d, allocated: %d; not to allocate", 449 resource, pod.GetNamespace(), pod.GetName(), container.Name, needed, allocated) 450 continue 451 } else { 452 klog.Warningf("[ORM] resource %s already allocated to (pod %s/%s, container %v) with smaller number than request: requested: %d, allocated: %d; continue to allocate", 453 resource, pod.GetNamespace(), pod.GetName(), container.Name, needed, allocated) 454 } 455 } 456 457 m.mutex.Lock() 458 e, ok := m.endpoints[resource] 459 m.mutex.Unlock() 460 if !ok { 461 klog.V(5).Infof("[ORM] addContainer resource %s not supported", resource) 462 continue 463 } 464 465 resourceReq := &pluginapi.ResourceRequest{ 466 PodUid: string(pod.UID), 467 PodNamespace: pod.GetNamespace(), 468 PodName: pod.GetName(), 469 ContainerName: container.Name, 470 ContainerType: containerType, 471 ContainerIndex: containerIndex, 472 // PodRole and PodType should be identified by more general annotations 473 PodRole: pod.Labels[pluginapi.PodRoleLabelKey], 474 PodType: pod.Annotations[pluginapi.PodTypeAnnotationKey], 475 // use mapped resource name in "ResourceName" to indicates which endpoint to request 476 ResourceName: resource, 477 // use original requested resource name in "ResourceRequests" in order to make plugin identity real requested resource name 478 ResourceRequests: map[string]float64{resource: v.AsApproximateFloat64()}, 479 Labels: maputil.CopySS(pod.Labels), 480 Annotations: maputil.CopySS(pod.Annotations), 481 } 482 483 if e.Opts != nil && e.Opts.WithTopologyAlignment { 484 hint := m.topologyManager.GetAffinity(string(pod.UID), container.Name, resource) 485 486 if hint.NUMANodeAffinity == nil { 487 klog.Warningf("[ORM] pod: %s/%s; container: %s allocate resource: %s without numa nodes affinity", 488 pod.Namespace, pod.Name, container.Name, resource) 489 } else { 490 klog.Warningf("[ORM] pod: %s/%s; container: %s allocate resource: %s get hint: %v from store", 491 pod.Namespace, pod.Name, container.Name, resource, hint) 492 } 493 494 resourceReq.Hint = ParseTopologyManagerHint(hint) 495 } 496 497 response, err := e.E.Allocate(m.ctx, resourceReq) 498 if err != nil { 499 err = fmt.Errorf("[ORM] addContainer allocate fail, pod %v, container %v, err: %v", pod.Name, container.Name, err) 500 klog.Error(err) 501 return err 502 } 503 504 if response.AllocationResult == nil { 505 klog.Warningf("[ORM] allocate for pod %v container %v resource %v got nil allocation result", pod.Name, container.Name, resource) 506 continue 507 } 508 509 // update 510 m.UpdatePodResources(response.AllocationResult.ResourceAllocation, pod, container, resource) 511 } 512 513 // write checkpoint 514 return m.writeCheckpoint() 515 } 516 517 func (m *ManagerImpl) syncContainer(pod *v1.Pod, container *v1.Container) error { 518 klog.Infof("[ORM] syncContainer, pod: %v, container: %v", pod.Name, container.Name) 519 containerAllResources := m.podResources.containerAllResources(string(pod.UID), container.Name) 520 if containerAllResources == nil { 521 klog.V(5).Infof("got pod %v container %v resources nil", pod.Name, container.Name) 522 return nil 523 } 524 525 err := m.resourceExecutor.UpdateContainerResources(pod, container, containerAllResources) 526 if err != nil { 527 klog.Errorf("[ORM] UpdateContainerResources fail, pod: %v, container: %v, err: %v", pod.Name, container.Name, err) 528 return err 529 } 530 531 return nil 532 } 533 534 func (m *ManagerImpl) reconcile() { 535 klog.V(5).Infof("[ORM] reconcile...") 536 resourceAllocationResps := make(map[string]*pluginapi.GetResourcesAllocationResponse) 537 activePods, err := m.metaManager.MetaServer.GetPodList(m.ctx, native.PodIsActive) 538 if err != nil { 539 klog.Errorf("[ORM] getPodList fail: %v", err) 540 return 541 } 542 543 m.mutex.Lock() 544 for resourceName, e := range m.endpoints { 545 if e.E.IsStopped() { 546 klog.Warningf("[ORM] skip getResourceAllocation of resource: %s, because plugin stopped", resourceName) 547 continue 548 } else if !e.Opts.NeedReconcile { 549 klog.V(5).Infof("[ORM] skip getResourceAllocation of resource: %s, because plugin needn't reconciling", resourceName) 550 continue 551 } 552 resp, err := e.E.GetResourceAllocation(m.ctx, &pluginapi.GetResourcesAllocationRequest{}) 553 if err != nil { 554 klog.Errorf("[ORM] plugin %s getResourcesAllocation fail: %v", resourceName, err) 555 continue 556 } 557 558 resourceAllocationResps[resourceName] = resp 559 } 560 m.mutex.Unlock() 561 562 for _, pod := range activePods { 563 if pod == nil { 564 continue 565 } 566 systemCores, err := isPodKatalystQoSLevelSystemCores(m.qosConfig, pod) 567 if err != nil { 568 klog.Errorf("[ORM] check pod %s qos level fail: %v", pod.Name, err) 569 } 570 571 if native.CheckDaemonPod(pod) && !systemCores { 572 continue 573 } 574 for _, container := range pod.Spec.Containers { 575 576 needsReAllocate := false 577 for resourceName, resp := range resourceAllocationResps { 578 if resp == nil { 579 klog.Warningf("[ORM] resource: %s got nil resourceAllocationResp", resourceName) 580 continue 581 } 582 583 isRequested, err := m.IsContainerRequestResource(&container, resourceName) 584 if err != nil { 585 klog.Errorf("[ORM] IsContainerRequestResource fail, container %v, resourceName %v, err: %v", container.Name, resourceName, err) 586 continue 587 } 588 589 if isRequested { 590 if resp.PodResources[string(pod.UID)] != nil && resp.PodResources[string(pod.UID)].ContainerResources[container.Name] != nil { 591 resourceAllocations := resp.PodResources[string(pod.UID)].ContainerResources[container.Name] 592 m.UpdatePodResources(resourceAllocations.ResourceAllocation, pod, &container, resourceName) 593 } else { 594 needsReAllocate = true 595 m.podResources.deleteResourceAllocationInfo(string(pod.UID), container.Name, resourceName) 596 } 597 } 598 } 599 if needsReAllocate && !isSkippedContainer(pod, &container) { 600 klog.Infof("[ORM] needs re-allocate resource plugin resources for pod %s/%s, container %s during reconcileState", 601 pod.Namespace, pod.Name, container.Name) 602 err = m.addContainer(pod, &container) 603 if err != nil { 604 klog.Errorf("[ORM] re addContainer fail, pod %v container %v, err: %v", pod.Name, container.Name, err) 605 continue 606 } 607 } 608 609 _ = m.syncContainer(pod, &container) 610 } 611 } 612 613 err = m.writeCheckpoint() 614 if err != nil { 615 klog.Errorf("[ORM] writeCheckpoint: %v", err) 616 } 617 } 618 619 func (m *ManagerImpl) UpdatePodResources( 620 resourceAllocation map[string]*pluginapi.ResourceAllocationInfo, 621 pod *v1.Pod, container *v1.Container, resource string, 622 ) { 623 for accResourceName, allocationInfo := range resourceAllocation { 624 if allocationInfo == nil { 625 klog.Warningf("[ORM] allocation request for resources %s - accompanying resource: %s for pod: %s/%s, container: %s got nil allocation information", 626 resource, accResourceName, pod.Namespace, pod.Name, container.Name) 627 continue 628 } 629 630 klog.V(4).Infof("[ORM] allocation information for resources %s - accompanying resource: %s for pod: %s/%s, container: %s is %v", 631 resource, accResourceName, pod.Namespace, pod.Name, container.Name, *allocationInfo) 632 633 m.podResources.insert(string(pod.UID), container.Name, accResourceName, allocationInfo) 634 } 635 } 636 637 // getMappedResourceName returns mapped resource name of input "resourceName" in m.resourceNamesMap if there is the mapping entry, 638 // or it will return input "resourceName". 639 // If both the input "resourceName" and the mapped resource name are requested, it will return error. 640 func (m *ManagerImpl) getMappedResourceName(resourceName string, requests v1.ResourceList) (string, error) { 641 if _, found := m.resourceNamesMap[resourceName]; !found { 642 return resourceName, nil 643 } 644 645 mappedResourceName := m.resourceNamesMap[resourceName] 646 647 _, foundReq := requests[v1.ResourceName(resourceName)] 648 _, foundMappedReq := requests[v1.ResourceName(mappedResourceName)] 649 650 if foundReq && foundMappedReq { 651 return mappedResourceName, fmt.Errorf("both %s and mapped %s are requested", resourceName, mappedResourceName) 652 } 653 654 klog.V(5).Infof("[ORM] map resource name: %s to %s", resourceName, mappedResourceName) 655 656 return mappedResourceName, nil 657 } 658 659 func (m *ManagerImpl) IsContainerRequestResource(container *v1.Container, resourceName string) (bool, error) { 660 if container == nil { 661 return false, nil 662 } 663 664 for k := range container.Resources.Requests { 665 requestedResourceName, err := m.getMappedResourceName(string(k), container.Resources.Requests) 666 if err != nil { 667 return false, err 668 } 669 670 if requestedResourceName == resourceName { 671 return true, nil 672 } 673 } 674 675 return false, nil 676 } 677 678 func GetContainerTypeAndIndex(pod *v1.Pod, container *v1.Container) (containerType pluginapi.ContainerType, containerIndex uint64, err error) { 679 if pod == nil || container == nil { 680 err = fmt.Errorf("got nil pod: %v or container: %v", pod, container) 681 return 682 } 683 684 foundContainer := false 685 686 for i, initContainer := range pod.Spec.InitContainers { 687 if container.Name == initContainer.Name { 688 foundContainer = true 689 containerType = pluginapi.ContainerType_INIT 690 containerIndex = uint64(i) 691 break 692 } 693 } 694 695 if !foundContainer { 696 mainContainerName := pod.Annotations[MainContainerNameAnnotationKey] 697 698 if mainContainerName == "" && len(pod.Spec.Containers) > 0 { 699 mainContainerName = pod.Spec.Containers[0].Name 700 } 701 702 for i, appContainer := range pod.Spec.Containers { 703 if container.Name == appContainer.Name { 704 foundContainer = true 705 706 if container.Name == mainContainerName { 707 containerType = pluginapi.ContainerType_MAIN 708 } else { 709 containerType = pluginapi.ContainerType_SIDECAR 710 } 711 712 containerIndex = uint64(i) 713 break 714 } 715 } 716 } 717 718 if !foundContainer { 719 err = fmt.Errorf("GetContainerTypeAndIndex doesn't find container: %s in pod: %s/%s", container.Name, pod.Namespace, pod.Name) 720 } 721 722 return 723 } 724 725 func isSkippedContainer(pod *v1.Pod, container *v1.Container) bool { 726 containerType, _, err := GetContainerTypeAndIndex(pod, container) 727 if err != nil { 728 klog.Errorf("GetContainerTypeAndIndex failed with error: %v", err) 729 return false 730 } 731 732 return containerType == pluginapi.ContainerType_INIT 733 } 734 735 func isPodKatalystQoSLevelSystemCores(qosConfig *generic.QoSConfiguration, pod *v1.Pod) (bool, error) { 736 qosLevel, err := qosConfig.GetQoSLevelForPod(pod) 737 if err != nil { 738 return false, err 739 } 740 741 return qosLevel == pluginapi.KatalystQoSLevelSystemCores, nil 742 } 743 744 func ParseListOfTopologyHints(hintsList *pluginapi.ListOfTopologyHints) []topology.TopologyHint { 745 if hintsList == nil { 746 return nil 747 } 748 749 resultHints := make([]topology.TopologyHint, 0, len(hintsList.Hints)) 750 751 for _, hint := range hintsList.Hints { 752 if hint != nil { 753 754 mask := bitmask.NewEmptyBitMask() 755 756 for _, node := range hint.Nodes { 757 mask.Add(int(node)) 758 } 759 760 resultHints = append(resultHints, topology.TopologyHint{ 761 NUMANodeAffinity: mask, 762 Preferred: hint.Preferred, 763 }) 764 } 765 } 766 767 return resultHints 768 } 769 770 func ParseTopologyManagerHint(hint topology.TopologyHint) *pluginapi.TopologyHint { 771 var nodes []uint64 772 773 if hint.NUMANodeAffinity != nil { 774 bits := hint.NUMANodeAffinity.GetBits() 775 776 for _, node := range bits { 777 nodes = append(nodes, uint64(node)) 778 } 779 } 780 781 return &pluginapi.TopologyHint{ 782 Nodes: nodes, 783 Preferred: hint.Preferred, 784 } 785 }