github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/nativepolicy/policy.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package nativepolicy 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "sync" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/util/wait" 28 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 29 30 "github.com/kubewharf/katalyst-api/pkg/plugins/skeleton" 31 "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent" 32 cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" 33 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" 34 nativepolicyutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/nativepolicy/util" 35 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" 36 "github.com/kubewharf/katalyst-core/pkg/config" 37 dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" 38 "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/crd" 39 "github.com/kubewharf/katalyst-core/pkg/metaserver" 40 "github.com/kubewharf/katalyst-core/pkg/metrics" 41 "github.com/kubewharf/katalyst-core/pkg/util/general" 42 utilkubeconfig "github.com/kubewharf/katalyst-core/pkg/util/kubelet/config" 43 "github.com/kubewharf/katalyst-core/pkg/util/machine" 44 "github.com/kubewharf/katalyst-core/pkg/util/native" 45 ) 46 47 const ( 48 // cpuPluginStateFileName is the name of cpu plugin state file. 49 cpuPluginStateFileName = "cpu_plugin_state" 50 ) 51 52 const ( 53 stateCheckPeriod = 30 * time.Second 54 maxResidualTime = 5 * time.Minute 55 ) 56 57 var ( 58 readonlyStateLock sync.RWMutex 59 readonlyState state.ReadonlyState 60 ) 61 62 // GetReadonlyState returns state.ReadonlyState to provides a way 63 // to obtain the running states of the plugin 64 func GetReadonlyState() (state.ReadonlyState, error) { 65 readonlyStateLock.RLock() 66 defer readonlyStateLock.RUnlock() 67 68 if readonlyState == nil { 69 return nil, fmt.Errorf("readonlyState isn't setted") 70 } 71 return readonlyState, nil 72 } 73 74 // NativePolicy is a policy compatible with Kubernetes native semantics and is used in topology-aware scheduling scenarios. 75 type NativePolicy struct { 76 sync.RWMutex 77 name string 78 stopCh chan struct{} 79 started bool 80 81 emitter metrics.MetricEmitter 82 metaServer *metaserver.MetaServer 83 machineInfo *machine.KatalystMachineInfo 84 85 state state.State 86 residualHitMap map[string]int64 87 // set of CPUs to reuse across allocations in a pod 88 cpusToReuse map[string]machine.CPUSet 89 90 // those are parsed from configurations 91 // todo if we want to use dynamic configuration, we'd better not use self-defined conf 92 reservedCPUs machine.CPUSet 93 cpuPluginSocketAbsPath string 94 extraStateFileAbsPath string 95 dynamicConfig *dynamicconfig.DynamicAgentConfiguration 96 podDebugAnnoKeys []string 97 98 // enableFullPhysicalCPUsOnly is a flag to enable extra allocation restrictions to avoid 99 // different containers to possibly end up on the same core. 100 enableFullPhysicalCPUsOnly bool 101 102 // cpuAllocationOption is is the allocation option of cpu (packed/distributed). 103 cpuAllocationOption string 104 } 105 106 func NewNativePolicy(agentCtx *agent.GenericContext, conf *config.Configuration, 107 _ interface{}, agentName string, 108 ) (bool, agent.Component, error) { 109 general.Infof("new native policy") 110 111 stateImpl, stateErr := state.NewCheckpointState(conf.GenericQRMPluginConfiguration.StateFileDirectory, cpuPluginStateFileName, 112 cpuconsts.CPUResourcePluginPolicyNameNative, agentCtx.CPUTopology, conf.SkipCPUStateCorruption) 113 if stateErr != nil { 114 return false, agent.ComponentStub{}, fmt.Errorf("NewCheckpointState failed with error: %v", stateErr) 115 } 116 117 readonlyStateLock.Lock() 118 readonlyState = stateImpl 119 readonlyStateLock.Unlock() 120 121 wrappedEmitter := agentCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(agentName, metrics.MetricTag{ 122 Key: util.QRMPluginPolicyTagName, 123 Val: cpuconsts.CPUResourcePluginPolicyNameNative, 124 }) 125 126 policyImplement := &NativePolicy{ 127 name: fmt.Sprintf("%s_%s", agentName, cpuconsts.CPUResourcePluginPolicyNameNative), 128 stopCh: make(chan struct{}), 129 machineInfo: agentCtx.KatalystMachineInfo, 130 emitter: wrappedEmitter, 131 metaServer: agentCtx.MetaServer, 132 residualHitMap: make(map[string]int64), 133 cpusToReuse: make(map[string]machine.CPUSet), 134 state: stateImpl, 135 dynamicConfig: conf.DynamicAgentConfiguration, 136 cpuPluginSocketAbsPath: conf.CPUPluginSocketAbsPath, 137 extraStateFileAbsPath: conf.ExtraStateFileAbsPath, 138 podDebugAnnoKeys: conf.PodDebugAnnoKeys, 139 enableFullPhysicalCPUsOnly: conf.EnableFullPhysicalCPUsOnly, 140 cpuAllocationOption: conf.CPUAllocationOption, 141 } 142 143 if err := policyImplement.setReservedCPUs(agentCtx.CPUDetails.CPUs().Clone()); err != nil { 144 return false, agent.ComponentStub{}, fmt.Errorf("native policy set reserved CPUs failed with error: %v", err) 145 } 146 147 state.SetContainerRequestedCores(policyImplement.getContainerRequestedCores) 148 149 err := agentCtx.MetaServer.ConfigurationManager.AddConfigWatcher(crd.AdminQoSConfigurationGVR) 150 if err != nil { 151 return false, nil, err 152 } 153 154 pluginWrapper, err := skeleton.NewRegistrationPluginWrapper(policyImplement, conf.QRMPluginSocketDirs, nil) 155 if err != nil { 156 return false, agent.ComponentStub{}, fmt.Errorf("native policy new plugin wrapper failed with error: %v", err) 157 } 158 159 return true, &agent.PluginWrapper{GenericPlugin: pluginWrapper}, nil 160 } 161 162 func (p *NativePolicy) Name() string { 163 return p.name 164 } 165 166 func (p *NativePolicy) ResourceName() string { 167 return string(v1.ResourceCPU) 168 } 169 170 func (p *NativePolicy) Start() (err error) { 171 general.Infof("called") 172 173 p.Lock() 174 defer func() { 175 if err == nil { 176 p.started = true 177 } 178 p.Unlock() 179 }() 180 181 if p.started { 182 general.Infof("is already started") 183 return nil 184 } 185 p.stopCh = make(chan struct{}) 186 187 go wait.Until(func() { 188 _ = p.emitter.StoreInt64(util.MetricNameHeartBeat, 1, metrics.MetricTypeNameRaw) 189 }, time.Second*30, p.stopCh) 190 go wait.Until(p.clearResidualState, stateCheckPeriod, p.stopCh) 191 192 return nil 193 } 194 195 func (p *NativePolicy) Stop() error { 196 p.Lock() 197 defer func() { 198 p.started = false 199 p.Unlock() 200 general.Infof("stopped") 201 }() 202 203 if !p.started { 204 general.Warningf("already stopped") 205 return nil 206 } 207 close(p.stopCh) 208 209 return nil 210 } 211 212 // GetResourcePluginOptions returns options to be communicated with Resource Manager 213 func (p *NativePolicy) GetResourcePluginOptions(context.Context, 214 *pluginapi.Empty, 215 ) (*pluginapi.ResourcePluginOptions, error) { 216 general.Infof("called") 217 return &pluginapi.ResourcePluginOptions{ 218 PreStartRequired: false, 219 WithTopologyAlignment: true, 220 NeedReconcile: true, 221 }, nil 222 } 223 224 // GetTopologyHints returns hints of corresponding resources 225 func (p *NativePolicy) GetTopologyHints(ctx context.Context, 226 req *pluginapi.ResourceRequest, 227 ) (resp *pluginapi.ResourceHintsResponse, err error) { 228 if req == nil { 229 return nil, fmt.Errorf("GetTopologyHints got nil req") 230 } 231 232 // identify if the pod is a debug pod, 233 // if so, apply specific strategy to it. 234 // since GetKatalystQoSLevelFromResourceReq function will filter annotations, 235 // we should do it before GetKatalystQoSLevelFromResourceReq. 236 isDebugPod := util.IsDebugPod(req.Annotations, p.podDebugAnnoKeys) 237 238 reqInt, _, err := util.GetQuantityFromResourceReq(req) 239 if err != nil { 240 return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) 241 } 242 243 isInteger := float64(reqInt) == req.ResourceRequests[string(v1.ResourceCPU)] 244 245 general.InfoS("called", 246 "podNamespace", req.PodNamespace, 247 "podName", req.PodName, 248 "containerName", req.ContainerName, 249 "podType", req.PodType, 250 "podRole", req.PodRole, 251 "containerType", req.ContainerType, 252 "qosClass", req.NativeQosClass, 253 "numCPUs", reqInt, 254 "isDebugPod", isDebugPod, 255 "isInteger", isInteger) 256 257 if req.ContainerType == pluginapi.ContainerType_INIT || isDebugPod { 258 general.Infof("there is no NUMA preference, return nil hint") 259 return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), 260 map[string]*pluginapi.ListOfTopologyHints{ 261 string(v1.ResourceCPU): nil, // indicates that there is no numa preference 262 }) 263 } 264 265 p.RLock() 266 defer func() { 267 p.RUnlock() 268 if err != nil { 269 _ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw) 270 } 271 }() 272 273 if req.NativeQosClass != string(v1.PodQOSGuaranteed) || !isInteger { 274 return p.sharedPoolHintHandler(ctx, req) 275 } 276 return p.dedicatedCoresHintHandler(ctx, req) 277 } 278 279 // Allocate is called during pod admit so that the resource 280 // plugin can allocate corresponding resource for the container 281 // according to resource request 282 func (p *NativePolicy) Allocate(ctx context.Context, 283 req *pluginapi.ResourceRequest, 284 ) (resp *pluginapi.ResourceAllocationResponse, respErr error) { 285 if req == nil { 286 return nil, fmt.Errorf("allocate got nil req") 287 } 288 289 // identify if the pod is a debug pod, 290 // if so, apply specific strategy to it. 291 // since GetKatalystQoSLevelFromResourceReq function will filter annotations, 292 // we should do it before GetKatalystQoSLevelFromResourceReq. 293 isDebugPod := util.IsDebugPod(req.Annotations, p.podDebugAnnoKeys) 294 295 reqInt, _, err := util.GetQuantityFromResourceReq(req) 296 if err != nil { 297 return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) 298 } 299 300 isInteger := float64(reqInt) == req.ResourceRequests[string(v1.ResourceCPU)] 301 302 general.InfoS("called", 303 "podNamespace", req.PodNamespace, 304 "podName", req.PodName, 305 "containerName", req.ContainerName, 306 "podType", req.PodType, 307 "podRole", req.PodRole, 308 "containerType", req.ContainerType, 309 "qosClass", req.NativeQosClass, 310 "numCPUs", reqInt, 311 "isDebugPod", isDebugPod, 312 "isInteger", isInteger) 313 314 if req.ContainerType == pluginapi.ContainerType_INIT { 315 return &pluginapi.ResourceAllocationResponse{ 316 PodUid: req.PodUid, 317 PodNamespace: req.PodNamespace, 318 PodName: req.PodName, 319 ContainerName: req.ContainerName, 320 ContainerType: req.ContainerType, 321 ContainerIndex: req.ContainerIndex, 322 PodRole: req.PodRole, 323 PodType: req.PodType, 324 ResourceName: string(v1.ResourceCPU), 325 Labels: general.DeepCopyMap(req.Labels), 326 Annotations: general.DeepCopyMap(req.Annotations), 327 NativeQosClass: req.NativeQosClass, 328 }, nil 329 } 330 331 if isDebugPod { 332 return &pluginapi.ResourceAllocationResponse{ 333 PodUid: req.PodUid, 334 PodNamespace: req.PodNamespace, 335 PodName: req.PodName, 336 ContainerName: req.ContainerName, 337 ContainerType: req.ContainerType, 338 ContainerIndex: req.ContainerIndex, 339 PodRole: req.PodRole, 340 PodType: req.PodType, 341 ResourceName: string(v1.ResourceCPU), 342 AllocationResult: &pluginapi.ResourceAllocation{ 343 ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ 344 string(v1.ResourceCPU): { 345 // return ResourceAllocation with empty OciPropertyName, AllocatedQuantity, AllocationResult for containers in debug pod, 346 // it won't influence oci spec properties of the container 347 IsNodeResource: false, 348 IsScalarResource: true, 349 }, 350 }, 351 }, 352 Labels: general.DeepCopyMap(req.Labels), 353 Annotations: general.DeepCopyMap(req.Annotations), 354 NativeQosClass: req.NativeQosClass, 355 }, nil 356 } 357 358 p.Lock() 359 defer func() { 360 if respErr != nil { 361 _ = p.removeContainer(req.PodUid, req.ContainerName) 362 _ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw) 363 } 364 365 p.Unlock() 366 return 367 }() 368 369 allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName) 370 if allocationInfo != nil && allocationInfo.OriginalAllocationResult.Size() >= reqInt { 371 general.InfoS("already allocated and meet requirement", 372 "podNamespace", req.PodNamespace, 373 "podName", req.PodName, 374 "containerName", req.ContainerName, 375 "numCPUs", reqInt, 376 "originalAllocationResult", allocationInfo.OriginalAllocationResult.String(), 377 "currentResult", allocationInfo.AllocationResult.String()) 378 379 p.updateCPUsToReuse(req, allocationInfo.AllocationResult) 380 381 return &pluginapi.ResourceAllocationResponse{ 382 PodUid: req.PodUid, 383 PodNamespace: req.PodNamespace, 384 PodName: req.PodName, 385 ContainerName: req.ContainerName, 386 ContainerType: req.ContainerType, 387 ContainerIndex: req.ContainerIndex, 388 PodRole: req.PodRole, 389 PodType: req.PodType, 390 ResourceName: string(v1.ResourceCPU), 391 AllocationResult: &pluginapi.ResourceAllocation{ 392 ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ 393 string(v1.ResourceCPU): { 394 OciPropertyName: util.OCIPropertyNameCPUSetCPUs, 395 IsNodeResource: false, 396 IsScalarResource: true, 397 AllocatedQuantity: float64(allocationInfo.AllocationResult.Size()), 398 AllocationResult: allocationInfo.AllocationResult.String(), 399 }, 400 }, 401 }, 402 Labels: general.DeepCopyMap(req.Labels), 403 Annotations: general.DeepCopyMap(req.Annotations), 404 NativeQosClass: req.NativeQosClass, 405 }, nil 406 } 407 408 if req.NativeQosClass != string(v1.PodQOSGuaranteed) || !isInteger { 409 return p.sharedPoolAllocationHandler(ctx, req) 410 } 411 return p.dedicatedCoresAllocationHandler(ctx, req) 412 } 413 414 // GetResourcesAllocation returns allocation results of corresponding resources 415 func (p *NativePolicy) GetResourcesAllocation(_ context.Context, 416 req *pluginapi.GetResourcesAllocationRequest, 417 ) (*pluginapi.GetResourcesAllocationResponse, error) { 418 if req == nil { 419 return nil, fmt.Errorf("GetResourcesAllocation got nil req") 420 } 421 422 general.Infof("called") 423 p.Lock() 424 defer p.Unlock() 425 426 defaultCPUSet := p.state.GetMachineState().GetDefaultCPUSet() 427 defaultCPUSetTopologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, defaultCPUSet) 428 if err != nil { 429 return nil, fmt.Errorf("GetNumaAwareAssignments err: %v", err) 430 } 431 432 podResources := make(map[string]*pluginapi.ContainerResources) 433 434 for podUID, containerEntries := range p.state.GetPodEntries() { 435 if podResources[podUID] == nil { 436 podResources[podUID] = &pluginapi.ContainerResources{} 437 } 438 439 for containerName, allocationInfo := range containerEntries { 440 if allocationInfo == nil { 441 continue 442 } 443 allocationInfo = allocationInfo.Clone() 444 445 resultCPUSet := machine.NewCPUSet() 446 switch allocationInfo.OwnerPoolName { 447 case state.PoolNameDedicated: 448 resultCPUSet = allocationInfo.AllocationResult 449 case state.PoolNameShare: 450 resultCPUSet = defaultCPUSet 451 452 if !allocationInfo.AllocationResult.Equals(defaultCPUSet) { 453 clonedDefaultCPUSet := defaultCPUSet.Clone() 454 clonedDefaultCPUSetTopologyAwareAssignments := machine.DeepcopyCPUAssignment(defaultCPUSetTopologyAwareAssignments) 455 456 allocationInfo.AllocationResult = clonedDefaultCPUSet 457 allocationInfo.OriginalAllocationResult = clonedDefaultCPUSet 458 allocationInfo.TopologyAwareAssignments = clonedDefaultCPUSetTopologyAwareAssignments 459 allocationInfo.OriginalTopologyAwareAssignments = clonedDefaultCPUSetTopologyAwareAssignments 460 461 p.state.SetAllocationInfo(podUID, containerName, allocationInfo) 462 } 463 default: 464 general.Errorf("skip container because the pool name is not supported, pod: %s, container: %s, cpuset: %s", 465 podUID, containerName, resultCPUSet.String()) 466 continue 467 } 468 469 if podResources[podUID].ContainerResources == nil { 470 podResources[podUID].ContainerResources = make(map[string]*pluginapi.ResourceAllocation) 471 } 472 473 podResources[podUID].ContainerResources[containerName] = &pluginapi.ResourceAllocation{ 474 ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ 475 string(v1.ResourceCPU): { 476 OciPropertyName: util.OCIPropertyNameCPUSetCPUs, 477 IsNodeResource: false, 478 IsScalarResource: true, 479 AllocatedQuantity: float64(resultCPUSet.Size()), 480 AllocationResult: resultCPUSet.String(), 481 }, 482 }, 483 } 484 } 485 } 486 487 return &pluginapi.GetResourcesAllocationResponse{ 488 PodResources: podResources, 489 }, nil 490 } 491 492 // GetTopologyAwareResources returns allocation results of corresponding resources as machineInfo aware format 493 func (p *NativePolicy) GetTopologyAwareResources(_ context.Context, 494 req *pluginapi.GetTopologyAwareResourcesRequest, 495 ) (*pluginapi.GetTopologyAwareResourcesResponse, error) { 496 if req == nil { 497 return nil, fmt.Errorf("GetTopologyAwareResources got nil req") 498 } 499 500 general.Infof("called") 501 p.RLock() 502 defer p.RUnlock() 503 504 allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName) 505 if allocationInfo == nil { 506 return nil, fmt.Errorf("pod: %s, container: %s is not show up in cpu plugin state", req.PodUid, req.ContainerName) 507 } 508 509 resp := &pluginapi.GetTopologyAwareResourcesResponse{ 510 PodUid: allocationInfo.PodUid, 511 PodName: allocationInfo.PodName, 512 PodNamespace: allocationInfo.PodNamespace, 513 ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{ 514 ContainerName: allocationInfo.ContainerName, 515 AllocatedResources: make(map[string]*pluginapi.TopologyAwareResource), 516 }, 517 } 518 519 if allocationInfo.OwnerPoolName == state.PoolNameDedicated { 520 resp.ContainerTopologyAwareResources.AllocatedResources[string(v1.ResourceCPU)] = &pluginapi.TopologyAwareResource{ 521 IsNodeResource: false, 522 IsScalarResource: true, 523 AggregatedQuantity: float64(allocationInfo.AllocationResult.Size()), 524 OriginalAggregatedQuantity: float64(allocationInfo.OriginalAllocationResult.Size()), 525 TopologyAwareQuantityList: util.GetTopologyAwareQuantityFromAssignments(allocationInfo.TopologyAwareAssignments), 526 OriginalTopologyAwareQuantityList: util.GetTopologyAwareQuantityFromAssignments(allocationInfo.OriginalTopologyAwareAssignments), 527 } 528 } 529 530 return resp, nil 531 } 532 533 // GetTopologyAwareAllocatableResources returns corresponding allocatable resources as machineInfo aware format 534 func (p *NativePolicy) GetTopologyAwareAllocatableResources(_ context.Context, 535 _ *pluginapi.GetTopologyAwareAllocatableResourcesRequest, 536 ) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) { 537 general.Infof("is called") 538 539 numaNodes := p.machineInfo.CPUDetails.NUMANodes().ToSliceInt() 540 topologyAwareAllocatableQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(numaNodes)) 541 topologyAwareCapacityQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(numaNodes)) 542 543 for _, numaNode := range numaNodes { 544 numaNodeCPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(numaNode).Clone() 545 topologyAwareAllocatableQuantityList = append(topologyAwareAllocatableQuantityList, &pluginapi.TopologyAwareQuantity{ 546 ResourceValue: float64(numaNodeCPUs.Difference(p.reservedCPUs).Size()), 547 Node: uint64(numaNode), 548 }) 549 topologyAwareCapacityQuantityList = append(topologyAwareCapacityQuantityList, &pluginapi.TopologyAwareQuantity{ 550 ResourceValue: float64(numaNodeCPUs.Size()), 551 Node: uint64(numaNode), 552 }) 553 } 554 555 return &pluginapi.GetTopologyAwareAllocatableResourcesResponse{ 556 AllocatableResources: map[string]*pluginapi.AllocatableTopologyAwareResource{ 557 string(v1.ResourceCPU): { 558 IsNodeResource: false, 559 IsScalarResource: true, 560 AggregatedAllocatableQuantity: float64(p.machineInfo.NumCPUs - p.reservedCPUs.Size()), 561 TopologyAwareAllocatableQuantityList: topologyAwareAllocatableQuantityList, 562 AggregatedCapacityQuantity: float64(p.machineInfo.NumCPUs), 563 TopologyAwareCapacityQuantityList: topologyAwareCapacityQuantityList, 564 }, 565 }, 566 }, nil 567 } 568 569 // PreStartContainer is called, if indicated by resource plugin during registration phase, 570 // before each container start. Resource plugin can run resource specific operations 571 // such as resetting the resource before making resources available to the container 572 func (p *NativePolicy) PreStartContainer(context.Context, 573 *pluginapi.PreStartContainerRequest, 574 ) (*pluginapi.PreStartContainerResponse, error) { 575 return nil, nil 576 } 577 578 func (p *NativePolicy) RemovePod(ctx context.Context, 579 req *pluginapi.RemovePodRequest, 580 ) (resp *pluginapi.RemovePodResponse, err error) { 581 if req == nil { 582 return nil, fmt.Errorf("RemovePod got nil req") 583 } 584 general.InfoS("is called", "podUID", req.PodUid) 585 586 p.Lock() 587 defer func() { 588 p.Unlock() 589 if err != nil { 590 _ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw) 591 } 592 }() 593 594 err = p.removePod(req.PodUid) 595 if err != nil { 596 general.ErrorS(err, "remove pod failed with error", "podUID", req.PodUid) 597 return nil, err 598 } 599 600 return &pluginapi.RemovePodResponse{}, nil 601 } 602 603 func (p *NativePolicy) removePod(podUID string) error { 604 podEntries := p.state.GetPodEntries() 605 if len(podEntries[podUID]) == 0 { 606 return nil 607 } 608 delete(podEntries, podUID) 609 610 updatedMachineState, err := nativepolicyutil.GenerateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries) 611 if err != nil { 612 return fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) 613 } 614 615 p.state.SetPodEntries(podEntries) 616 p.state.SetMachineState(updatedMachineState) 617 return nil 618 } 619 620 func (p *NativePolicy) removeContainer(podUID, containerName string) error { 621 podEntries := p.state.GetPodEntries() 622 if podEntries[podUID][containerName] == nil { 623 return nil 624 } 625 delete(podEntries[podUID], containerName) 626 627 updatedMachineState, err := nativepolicyutil.GenerateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries) 628 if err != nil { 629 return fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) 630 } 631 632 p.state.SetPodEntries(podEntries) 633 p.state.SetMachineState(updatedMachineState) 634 return nil 635 } 636 637 // getContainerRequestedCores parses and returns request cores for the given container 638 func (p *NativePolicy) getContainerRequestedCores(allocationInfo *state.AllocationInfo) float64 { 639 if allocationInfo == nil { 640 general.Errorf("got nil allocationInfo") 641 return 0 642 } 643 644 if allocationInfo.RequestQuantity == 0 { 645 if p.metaServer == nil { 646 general.Errorf("got nil metaServer") 647 return 0 648 } 649 650 container, err := p.metaServer.GetContainerSpec(allocationInfo.PodUid, allocationInfo.ContainerName) 651 if err != nil || container == nil { 652 general.Errorf("get container failed with error: %v", err) 653 return 0 654 } 655 656 cpuQuantity := native.CPUQuantityGetter()(container.Resources.Requests) 657 allocationInfo.RequestQuantity = general.MaxFloat64(float64(cpuQuantity.MilliValue())/1000, 0) 658 general.Infof("get cpu request quantity: %.3f for pod: %s/%s container: %s from podWatcher", 659 allocationInfo.RequestQuantity, allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName) 660 } 661 return allocationInfo.RequestQuantity 662 } 663 664 // setReservedCPUs calculates and sets the reservedCPUs field 665 func (p *NativePolicy) setReservedCPUs(allCPUs machine.CPUSet) error { 666 klConfig, err := p.metaServer.GetKubeletConfig(context.TODO()) 667 if err != nil { 668 return fmt.Errorf("NewNativePolicy failed because get kubelet config failed with error: %v", err) 669 } 670 671 reservedQuantity, _, err := utilkubeconfig.GetReservedQuantity(klConfig, string(v1.ResourceCPU)) 672 if err != nil { 673 return fmt.Errorf("getKubeletReservedQuantity failed because get kubelet reserved quantity failed with error: %v", err) 674 } else if reservedQuantity.IsZero() { 675 // The native policy requires this to be nonzero. Zero CPU reservation 676 // would allow the shared pool to be completely exhausted. At that point 677 // either we would violate our guarantee of exclusivity or need to evict 678 // any pod that has at least one container that requires zero CPUs. 679 // See the comments in policy_static.go for more details. 680 return fmt.Errorf("the native policy requires systemreserved.cpu + kubereserved.cpu to be greater than zero") 681 } 682 683 // Take the ceiling of the reservation, since fractional CPUs cannot be 684 // exclusively allocated. 685 reservedCPUsFloat := float64(reservedQuantity.MilliValue()) / 1000 686 numReservedCPUs := int(math.Ceil(reservedCPUsFloat)) 687 688 var reserved machine.CPUSet 689 reservedCPUs, err := machine.Parse(klConfig.ReservedSystemCPUs) 690 if err != nil { 691 return fmt.Errorf("NewNativePolicy parse cpuset for reserved-cpus failed with error: %v", err) 692 } 693 if reservedCPUs.Size() > 0 { 694 reserved = reservedCPUs 695 } else { 696 // takeByTopology allocates CPUs associated with low-numbered cores from 697 // allCPUs. 698 reserved, _ = p.takeByTopology(allCPUs, numReservedCPUs) 699 } 700 701 if reserved.Size() != numReservedCPUs { 702 return fmt.Errorf("unable to reserve the required amount of CPUs (size of %s did not equal %d)", reserved, numReservedCPUs) 703 } 704 705 general.Infof("take reserved CPUs: %s by reservedCPUsNum: %d", reserved.String(), numReservedCPUs) 706 707 p.reservedCPUs = reserved 708 709 return nil 710 }