github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/nativepolicy/policy.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package nativepolicy
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"sync"
    24  	"time"
    25  
    26  	v1 "k8s.io/api/core/v1"
    27  	"k8s.io/apimachinery/pkg/util/wait"
    28  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    29  
    30  	"github.com/kubewharf/katalyst-api/pkg/plugins/skeleton"
    31  	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
    32  	cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
    33  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
    34  	nativepolicyutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/nativepolicy/util"
    35  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
    36  	"github.com/kubewharf/katalyst-core/pkg/config"
    37  	dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
    38  	"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/crd"
    39  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    40  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    41  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    42  	utilkubeconfig "github.com/kubewharf/katalyst-core/pkg/util/kubelet/config"
    43  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    44  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    45  )
    46  
    47  const (
    48  	// cpuPluginStateFileName is the name of cpu plugin state file.
    49  	cpuPluginStateFileName = "cpu_plugin_state"
    50  )
    51  
    52  const (
    53  	stateCheckPeriod = 30 * time.Second
    54  	maxResidualTime  = 5 * time.Minute
    55  )
    56  
    57  var (
    58  	readonlyStateLock sync.RWMutex
    59  	readonlyState     state.ReadonlyState
    60  )
    61  
    62  // GetReadonlyState returns state.ReadonlyState to provides a way
    63  // to obtain the running states of the plugin
    64  func GetReadonlyState() (state.ReadonlyState, error) {
    65  	readonlyStateLock.RLock()
    66  	defer readonlyStateLock.RUnlock()
    67  
    68  	if readonlyState == nil {
    69  		return nil, fmt.Errorf("readonlyState isn't setted")
    70  	}
    71  	return readonlyState, nil
    72  }
    73  
    74  // NativePolicy is a policy compatible with Kubernetes native semantics and is used in topology-aware scheduling scenarios.
    75  type NativePolicy struct {
    76  	sync.RWMutex
    77  	name    string
    78  	stopCh  chan struct{}
    79  	started bool
    80  
    81  	emitter     metrics.MetricEmitter
    82  	metaServer  *metaserver.MetaServer
    83  	machineInfo *machine.KatalystMachineInfo
    84  
    85  	state          state.State
    86  	residualHitMap map[string]int64
    87  	// set of CPUs to reuse across allocations in a pod
    88  	cpusToReuse map[string]machine.CPUSet
    89  
    90  	// those are parsed from configurations
    91  	// todo if we want to use dynamic configuration, we'd better not use self-defined conf
    92  	reservedCPUs           machine.CPUSet
    93  	cpuPluginSocketAbsPath string
    94  	extraStateFileAbsPath  string
    95  	dynamicConfig          *dynamicconfig.DynamicAgentConfiguration
    96  	podDebugAnnoKeys       []string
    97  
    98  	// enableFullPhysicalCPUsOnly is a flag to enable extra allocation restrictions to avoid
    99  	// different containers to possibly end up on the same core.
   100  	enableFullPhysicalCPUsOnly bool
   101  
   102  	// cpuAllocationOption is is the allocation option of cpu (packed/distributed).
   103  	cpuAllocationOption string
   104  }
   105  
   106  func NewNativePolicy(agentCtx *agent.GenericContext, conf *config.Configuration,
   107  	_ interface{}, agentName string,
   108  ) (bool, agent.Component, error) {
   109  	general.Infof("new native policy")
   110  
   111  	stateImpl, stateErr := state.NewCheckpointState(conf.GenericQRMPluginConfiguration.StateFileDirectory, cpuPluginStateFileName,
   112  		cpuconsts.CPUResourcePluginPolicyNameNative, agentCtx.CPUTopology, conf.SkipCPUStateCorruption)
   113  	if stateErr != nil {
   114  		return false, agent.ComponentStub{}, fmt.Errorf("NewCheckpointState failed with error: %v", stateErr)
   115  	}
   116  
   117  	readonlyStateLock.Lock()
   118  	readonlyState = stateImpl
   119  	readonlyStateLock.Unlock()
   120  
   121  	wrappedEmitter := agentCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(agentName, metrics.MetricTag{
   122  		Key: util.QRMPluginPolicyTagName,
   123  		Val: cpuconsts.CPUResourcePluginPolicyNameNative,
   124  	})
   125  
   126  	policyImplement := &NativePolicy{
   127  		name:                       fmt.Sprintf("%s_%s", agentName, cpuconsts.CPUResourcePluginPolicyNameNative),
   128  		stopCh:                     make(chan struct{}),
   129  		machineInfo:                agentCtx.KatalystMachineInfo,
   130  		emitter:                    wrappedEmitter,
   131  		metaServer:                 agentCtx.MetaServer,
   132  		residualHitMap:             make(map[string]int64),
   133  		cpusToReuse:                make(map[string]machine.CPUSet),
   134  		state:                      stateImpl,
   135  		dynamicConfig:              conf.DynamicAgentConfiguration,
   136  		cpuPluginSocketAbsPath:     conf.CPUPluginSocketAbsPath,
   137  		extraStateFileAbsPath:      conf.ExtraStateFileAbsPath,
   138  		podDebugAnnoKeys:           conf.PodDebugAnnoKeys,
   139  		enableFullPhysicalCPUsOnly: conf.EnableFullPhysicalCPUsOnly,
   140  		cpuAllocationOption:        conf.CPUAllocationOption,
   141  	}
   142  
   143  	if err := policyImplement.setReservedCPUs(agentCtx.CPUDetails.CPUs().Clone()); err != nil {
   144  		return false, agent.ComponentStub{}, fmt.Errorf("native policy set reserved CPUs failed with error: %v", err)
   145  	}
   146  
   147  	state.SetContainerRequestedCores(policyImplement.getContainerRequestedCores)
   148  
   149  	err := agentCtx.MetaServer.ConfigurationManager.AddConfigWatcher(crd.AdminQoSConfigurationGVR)
   150  	if err != nil {
   151  		return false, nil, err
   152  	}
   153  
   154  	pluginWrapper, err := skeleton.NewRegistrationPluginWrapper(policyImplement, conf.QRMPluginSocketDirs, nil)
   155  	if err != nil {
   156  		return false, agent.ComponentStub{}, fmt.Errorf("native policy new plugin wrapper failed with error: %v", err)
   157  	}
   158  
   159  	return true, &agent.PluginWrapper{GenericPlugin: pluginWrapper}, nil
   160  }
   161  
   162  func (p *NativePolicy) Name() string {
   163  	return p.name
   164  }
   165  
   166  func (p *NativePolicy) ResourceName() string {
   167  	return string(v1.ResourceCPU)
   168  }
   169  
   170  func (p *NativePolicy) Start() (err error) {
   171  	general.Infof("called")
   172  
   173  	p.Lock()
   174  	defer func() {
   175  		if err == nil {
   176  			p.started = true
   177  		}
   178  		p.Unlock()
   179  	}()
   180  
   181  	if p.started {
   182  		general.Infof("is already started")
   183  		return nil
   184  	}
   185  	p.stopCh = make(chan struct{})
   186  
   187  	go wait.Until(func() {
   188  		_ = p.emitter.StoreInt64(util.MetricNameHeartBeat, 1, metrics.MetricTypeNameRaw)
   189  	}, time.Second*30, p.stopCh)
   190  	go wait.Until(p.clearResidualState, stateCheckPeriod, p.stopCh)
   191  
   192  	return nil
   193  }
   194  
   195  func (p *NativePolicy) Stop() error {
   196  	p.Lock()
   197  	defer func() {
   198  		p.started = false
   199  		p.Unlock()
   200  		general.Infof("stopped")
   201  	}()
   202  
   203  	if !p.started {
   204  		general.Warningf("already stopped")
   205  		return nil
   206  	}
   207  	close(p.stopCh)
   208  
   209  	return nil
   210  }
   211  
   212  // GetResourcePluginOptions returns options to be communicated with Resource Manager
   213  func (p *NativePolicy) GetResourcePluginOptions(context.Context,
   214  	*pluginapi.Empty,
   215  ) (*pluginapi.ResourcePluginOptions, error) {
   216  	general.Infof("called")
   217  	return &pluginapi.ResourcePluginOptions{
   218  		PreStartRequired:      false,
   219  		WithTopologyAlignment: true,
   220  		NeedReconcile:         true,
   221  	}, nil
   222  }
   223  
   224  // GetTopologyHints returns hints of corresponding resources
   225  func (p *NativePolicy) GetTopologyHints(ctx context.Context,
   226  	req *pluginapi.ResourceRequest,
   227  ) (resp *pluginapi.ResourceHintsResponse, err error) {
   228  	if req == nil {
   229  		return nil, fmt.Errorf("GetTopologyHints got nil req")
   230  	}
   231  
   232  	// identify if the pod is a debug pod,
   233  	// if so, apply specific strategy to it.
   234  	// since GetKatalystQoSLevelFromResourceReq function will filter annotations,
   235  	// we should do it before GetKatalystQoSLevelFromResourceReq.
   236  	isDebugPod := util.IsDebugPod(req.Annotations, p.podDebugAnnoKeys)
   237  
   238  	reqInt, _, err := util.GetQuantityFromResourceReq(req)
   239  	if err != nil {
   240  		return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
   241  	}
   242  
   243  	isInteger := float64(reqInt) == req.ResourceRequests[string(v1.ResourceCPU)]
   244  
   245  	general.InfoS("called",
   246  		"podNamespace", req.PodNamespace,
   247  		"podName", req.PodName,
   248  		"containerName", req.ContainerName,
   249  		"podType", req.PodType,
   250  		"podRole", req.PodRole,
   251  		"containerType", req.ContainerType,
   252  		"qosClass", req.NativeQosClass,
   253  		"numCPUs", reqInt,
   254  		"isDebugPod", isDebugPod,
   255  		"isInteger", isInteger)
   256  
   257  	if req.ContainerType == pluginapi.ContainerType_INIT || isDebugPod {
   258  		general.Infof("there is no NUMA preference, return nil hint")
   259  		return util.PackResourceHintsResponse(req, string(v1.ResourceCPU),
   260  			map[string]*pluginapi.ListOfTopologyHints{
   261  				string(v1.ResourceCPU): nil, // indicates that there is no numa preference
   262  			})
   263  	}
   264  
   265  	p.RLock()
   266  	defer func() {
   267  		p.RUnlock()
   268  		if err != nil {
   269  			_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw)
   270  		}
   271  	}()
   272  
   273  	if req.NativeQosClass != string(v1.PodQOSGuaranteed) || !isInteger {
   274  		return p.sharedPoolHintHandler(ctx, req)
   275  	}
   276  	return p.dedicatedCoresHintHandler(ctx, req)
   277  }
   278  
   279  // Allocate is called during pod admit so that the resource
   280  // plugin can allocate corresponding resource for the container
   281  // according to resource request
   282  func (p *NativePolicy) Allocate(ctx context.Context,
   283  	req *pluginapi.ResourceRequest,
   284  ) (resp *pluginapi.ResourceAllocationResponse, respErr error) {
   285  	if req == nil {
   286  		return nil, fmt.Errorf("allocate got nil req")
   287  	}
   288  
   289  	// identify if the pod is a debug pod,
   290  	// if so, apply specific strategy to it.
   291  	// since GetKatalystQoSLevelFromResourceReq function will filter annotations,
   292  	// we should do it before GetKatalystQoSLevelFromResourceReq.
   293  	isDebugPod := util.IsDebugPod(req.Annotations, p.podDebugAnnoKeys)
   294  
   295  	reqInt, _, err := util.GetQuantityFromResourceReq(req)
   296  	if err != nil {
   297  		return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
   298  	}
   299  
   300  	isInteger := float64(reqInt) == req.ResourceRequests[string(v1.ResourceCPU)]
   301  
   302  	general.InfoS("called",
   303  		"podNamespace", req.PodNamespace,
   304  		"podName", req.PodName,
   305  		"containerName", req.ContainerName,
   306  		"podType", req.PodType,
   307  		"podRole", req.PodRole,
   308  		"containerType", req.ContainerType,
   309  		"qosClass", req.NativeQosClass,
   310  		"numCPUs", reqInt,
   311  		"isDebugPod", isDebugPod,
   312  		"isInteger", isInteger)
   313  
   314  	if req.ContainerType == pluginapi.ContainerType_INIT {
   315  		return &pluginapi.ResourceAllocationResponse{
   316  			PodUid:         req.PodUid,
   317  			PodNamespace:   req.PodNamespace,
   318  			PodName:        req.PodName,
   319  			ContainerName:  req.ContainerName,
   320  			ContainerType:  req.ContainerType,
   321  			ContainerIndex: req.ContainerIndex,
   322  			PodRole:        req.PodRole,
   323  			PodType:        req.PodType,
   324  			ResourceName:   string(v1.ResourceCPU),
   325  			Labels:         general.DeepCopyMap(req.Labels),
   326  			Annotations:    general.DeepCopyMap(req.Annotations),
   327  			NativeQosClass: req.NativeQosClass,
   328  		}, nil
   329  	}
   330  
   331  	if isDebugPod {
   332  		return &pluginapi.ResourceAllocationResponse{
   333  			PodUid:         req.PodUid,
   334  			PodNamespace:   req.PodNamespace,
   335  			PodName:        req.PodName,
   336  			ContainerName:  req.ContainerName,
   337  			ContainerType:  req.ContainerType,
   338  			ContainerIndex: req.ContainerIndex,
   339  			PodRole:        req.PodRole,
   340  			PodType:        req.PodType,
   341  			ResourceName:   string(v1.ResourceCPU),
   342  			AllocationResult: &pluginapi.ResourceAllocation{
   343  				ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{
   344  					string(v1.ResourceCPU): {
   345  						// return ResourceAllocation with empty OciPropertyName, AllocatedQuantity, AllocationResult for containers in debug pod,
   346  						// it won't influence oci spec properties of the container
   347  						IsNodeResource:   false,
   348  						IsScalarResource: true,
   349  					},
   350  				},
   351  			},
   352  			Labels:         general.DeepCopyMap(req.Labels),
   353  			Annotations:    general.DeepCopyMap(req.Annotations),
   354  			NativeQosClass: req.NativeQosClass,
   355  		}, nil
   356  	}
   357  
   358  	p.Lock()
   359  	defer func() {
   360  		if respErr != nil {
   361  			_ = p.removeContainer(req.PodUid, req.ContainerName)
   362  			_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw)
   363  		}
   364  
   365  		p.Unlock()
   366  		return
   367  	}()
   368  
   369  	allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName)
   370  	if allocationInfo != nil && allocationInfo.OriginalAllocationResult.Size() >= reqInt {
   371  		general.InfoS("already allocated and meet requirement",
   372  			"podNamespace", req.PodNamespace,
   373  			"podName", req.PodName,
   374  			"containerName", req.ContainerName,
   375  			"numCPUs", reqInt,
   376  			"originalAllocationResult", allocationInfo.OriginalAllocationResult.String(),
   377  			"currentResult", allocationInfo.AllocationResult.String())
   378  
   379  		p.updateCPUsToReuse(req, allocationInfo.AllocationResult)
   380  
   381  		return &pluginapi.ResourceAllocationResponse{
   382  			PodUid:         req.PodUid,
   383  			PodNamespace:   req.PodNamespace,
   384  			PodName:        req.PodName,
   385  			ContainerName:  req.ContainerName,
   386  			ContainerType:  req.ContainerType,
   387  			ContainerIndex: req.ContainerIndex,
   388  			PodRole:        req.PodRole,
   389  			PodType:        req.PodType,
   390  			ResourceName:   string(v1.ResourceCPU),
   391  			AllocationResult: &pluginapi.ResourceAllocation{
   392  				ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{
   393  					string(v1.ResourceCPU): {
   394  						OciPropertyName:   util.OCIPropertyNameCPUSetCPUs,
   395  						IsNodeResource:    false,
   396  						IsScalarResource:  true,
   397  						AllocatedQuantity: float64(allocationInfo.AllocationResult.Size()),
   398  						AllocationResult:  allocationInfo.AllocationResult.String(),
   399  					},
   400  				},
   401  			},
   402  			Labels:         general.DeepCopyMap(req.Labels),
   403  			Annotations:    general.DeepCopyMap(req.Annotations),
   404  			NativeQosClass: req.NativeQosClass,
   405  		}, nil
   406  	}
   407  
   408  	if req.NativeQosClass != string(v1.PodQOSGuaranteed) || !isInteger {
   409  		return p.sharedPoolAllocationHandler(ctx, req)
   410  	}
   411  	return p.dedicatedCoresAllocationHandler(ctx, req)
   412  }
   413  
   414  // GetResourcesAllocation returns allocation results of corresponding resources
   415  func (p *NativePolicy) GetResourcesAllocation(_ context.Context,
   416  	req *pluginapi.GetResourcesAllocationRequest,
   417  ) (*pluginapi.GetResourcesAllocationResponse, error) {
   418  	if req == nil {
   419  		return nil, fmt.Errorf("GetResourcesAllocation got nil req")
   420  	}
   421  
   422  	general.Infof("called")
   423  	p.Lock()
   424  	defer p.Unlock()
   425  
   426  	defaultCPUSet := p.state.GetMachineState().GetDefaultCPUSet()
   427  	defaultCPUSetTopologyAwareAssignments, err := machine.GetNumaAwareAssignments(p.machineInfo.CPUTopology, defaultCPUSet)
   428  	if err != nil {
   429  		return nil, fmt.Errorf("GetNumaAwareAssignments err: %v", err)
   430  	}
   431  
   432  	podResources := make(map[string]*pluginapi.ContainerResources)
   433  
   434  	for podUID, containerEntries := range p.state.GetPodEntries() {
   435  		if podResources[podUID] == nil {
   436  			podResources[podUID] = &pluginapi.ContainerResources{}
   437  		}
   438  
   439  		for containerName, allocationInfo := range containerEntries {
   440  			if allocationInfo == nil {
   441  				continue
   442  			}
   443  			allocationInfo = allocationInfo.Clone()
   444  
   445  			resultCPUSet := machine.NewCPUSet()
   446  			switch allocationInfo.OwnerPoolName {
   447  			case state.PoolNameDedicated:
   448  				resultCPUSet = allocationInfo.AllocationResult
   449  			case state.PoolNameShare:
   450  				resultCPUSet = defaultCPUSet
   451  
   452  				if !allocationInfo.AllocationResult.Equals(defaultCPUSet) {
   453  					clonedDefaultCPUSet := defaultCPUSet.Clone()
   454  					clonedDefaultCPUSetTopologyAwareAssignments := machine.DeepcopyCPUAssignment(defaultCPUSetTopologyAwareAssignments)
   455  
   456  					allocationInfo.AllocationResult = clonedDefaultCPUSet
   457  					allocationInfo.OriginalAllocationResult = clonedDefaultCPUSet
   458  					allocationInfo.TopologyAwareAssignments = clonedDefaultCPUSetTopologyAwareAssignments
   459  					allocationInfo.OriginalTopologyAwareAssignments = clonedDefaultCPUSetTopologyAwareAssignments
   460  
   461  					p.state.SetAllocationInfo(podUID, containerName, allocationInfo)
   462  				}
   463  			default:
   464  				general.Errorf("skip container because the pool name is not supported, pod: %s, container: %s, cpuset: %s",
   465  					podUID, containerName, resultCPUSet.String())
   466  				continue
   467  			}
   468  
   469  			if podResources[podUID].ContainerResources == nil {
   470  				podResources[podUID].ContainerResources = make(map[string]*pluginapi.ResourceAllocation)
   471  			}
   472  
   473  			podResources[podUID].ContainerResources[containerName] = &pluginapi.ResourceAllocation{
   474  				ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{
   475  					string(v1.ResourceCPU): {
   476  						OciPropertyName:   util.OCIPropertyNameCPUSetCPUs,
   477  						IsNodeResource:    false,
   478  						IsScalarResource:  true,
   479  						AllocatedQuantity: float64(resultCPUSet.Size()),
   480  						AllocationResult:  resultCPUSet.String(),
   481  					},
   482  				},
   483  			}
   484  		}
   485  	}
   486  
   487  	return &pluginapi.GetResourcesAllocationResponse{
   488  		PodResources: podResources,
   489  	}, nil
   490  }
   491  
   492  // GetTopologyAwareResources returns allocation results of corresponding resources as machineInfo aware format
   493  func (p *NativePolicy) GetTopologyAwareResources(_ context.Context,
   494  	req *pluginapi.GetTopologyAwareResourcesRequest,
   495  ) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
   496  	if req == nil {
   497  		return nil, fmt.Errorf("GetTopologyAwareResources got nil req")
   498  	}
   499  
   500  	general.Infof("called")
   501  	p.RLock()
   502  	defer p.RUnlock()
   503  
   504  	allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName)
   505  	if allocationInfo == nil {
   506  		return nil, fmt.Errorf("pod: %s, container: %s is not show up in cpu plugin state", req.PodUid, req.ContainerName)
   507  	}
   508  
   509  	resp := &pluginapi.GetTopologyAwareResourcesResponse{
   510  		PodUid:       allocationInfo.PodUid,
   511  		PodName:      allocationInfo.PodName,
   512  		PodNamespace: allocationInfo.PodNamespace,
   513  		ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
   514  			ContainerName:      allocationInfo.ContainerName,
   515  			AllocatedResources: make(map[string]*pluginapi.TopologyAwareResource),
   516  		},
   517  	}
   518  
   519  	if allocationInfo.OwnerPoolName == state.PoolNameDedicated {
   520  		resp.ContainerTopologyAwareResources.AllocatedResources[string(v1.ResourceCPU)] = &pluginapi.TopologyAwareResource{
   521  			IsNodeResource:                    false,
   522  			IsScalarResource:                  true,
   523  			AggregatedQuantity:                float64(allocationInfo.AllocationResult.Size()),
   524  			OriginalAggregatedQuantity:        float64(allocationInfo.OriginalAllocationResult.Size()),
   525  			TopologyAwareQuantityList:         util.GetTopologyAwareQuantityFromAssignments(allocationInfo.TopologyAwareAssignments),
   526  			OriginalTopologyAwareQuantityList: util.GetTopologyAwareQuantityFromAssignments(allocationInfo.OriginalTopologyAwareAssignments),
   527  		}
   528  	}
   529  
   530  	return resp, nil
   531  }
   532  
   533  // GetTopologyAwareAllocatableResources returns corresponding allocatable resources as machineInfo aware format
   534  func (p *NativePolicy) GetTopologyAwareAllocatableResources(_ context.Context,
   535  	_ *pluginapi.GetTopologyAwareAllocatableResourcesRequest,
   536  ) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) {
   537  	general.Infof("is called")
   538  
   539  	numaNodes := p.machineInfo.CPUDetails.NUMANodes().ToSliceInt()
   540  	topologyAwareAllocatableQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(numaNodes))
   541  	topologyAwareCapacityQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(numaNodes))
   542  
   543  	for _, numaNode := range numaNodes {
   544  		numaNodeCPUs := p.machineInfo.CPUDetails.CPUsInNUMANodes(numaNode).Clone()
   545  		topologyAwareAllocatableQuantityList = append(topologyAwareAllocatableQuantityList, &pluginapi.TopologyAwareQuantity{
   546  			ResourceValue: float64(numaNodeCPUs.Difference(p.reservedCPUs).Size()),
   547  			Node:          uint64(numaNode),
   548  		})
   549  		topologyAwareCapacityQuantityList = append(topologyAwareCapacityQuantityList, &pluginapi.TopologyAwareQuantity{
   550  			ResourceValue: float64(numaNodeCPUs.Size()),
   551  			Node:          uint64(numaNode),
   552  		})
   553  	}
   554  
   555  	return &pluginapi.GetTopologyAwareAllocatableResourcesResponse{
   556  		AllocatableResources: map[string]*pluginapi.AllocatableTopologyAwareResource{
   557  			string(v1.ResourceCPU): {
   558  				IsNodeResource:                       false,
   559  				IsScalarResource:                     true,
   560  				AggregatedAllocatableQuantity:        float64(p.machineInfo.NumCPUs - p.reservedCPUs.Size()),
   561  				TopologyAwareAllocatableQuantityList: topologyAwareAllocatableQuantityList,
   562  				AggregatedCapacityQuantity:           float64(p.machineInfo.NumCPUs),
   563  				TopologyAwareCapacityQuantityList:    topologyAwareCapacityQuantityList,
   564  			},
   565  		},
   566  	}, nil
   567  }
   568  
   569  // PreStartContainer is called, if indicated by resource plugin during registration phase,
   570  // before each container start. Resource plugin can run resource specific operations
   571  // such as resetting the resource before making resources available to the container
   572  func (p *NativePolicy) PreStartContainer(context.Context,
   573  	*pluginapi.PreStartContainerRequest,
   574  ) (*pluginapi.PreStartContainerResponse, error) {
   575  	return nil, nil
   576  }
   577  
   578  func (p *NativePolicy) RemovePod(ctx context.Context,
   579  	req *pluginapi.RemovePodRequest,
   580  ) (resp *pluginapi.RemovePodResponse, err error) {
   581  	if req == nil {
   582  		return nil, fmt.Errorf("RemovePod got nil req")
   583  	}
   584  	general.InfoS("is called", "podUID", req.PodUid)
   585  
   586  	p.Lock()
   587  	defer func() {
   588  		p.Unlock()
   589  		if err != nil {
   590  			_ = p.emitter.StoreInt64(util.MetricNameRemovePodFailed, 1, metrics.MetricTypeNameRaw)
   591  		}
   592  	}()
   593  
   594  	err = p.removePod(req.PodUid)
   595  	if err != nil {
   596  		general.ErrorS(err, "remove pod failed with error", "podUID", req.PodUid)
   597  		return nil, err
   598  	}
   599  
   600  	return &pluginapi.RemovePodResponse{}, nil
   601  }
   602  
   603  func (p *NativePolicy) removePod(podUID string) error {
   604  	podEntries := p.state.GetPodEntries()
   605  	if len(podEntries[podUID]) == 0 {
   606  		return nil
   607  	}
   608  	delete(podEntries, podUID)
   609  
   610  	updatedMachineState, err := nativepolicyutil.GenerateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries)
   611  	if err != nil {
   612  		return fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err)
   613  	}
   614  
   615  	p.state.SetPodEntries(podEntries)
   616  	p.state.SetMachineState(updatedMachineState)
   617  	return nil
   618  }
   619  
   620  func (p *NativePolicy) removeContainer(podUID, containerName string) error {
   621  	podEntries := p.state.GetPodEntries()
   622  	if podEntries[podUID][containerName] == nil {
   623  		return nil
   624  	}
   625  	delete(podEntries[podUID], containerName)
   626  
   627  	updatedMachineState, err := nativepolicyutil.GenerateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries)
   628  	if err != nil {
   629  		return fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err)
   630  	}
   631  
   632  	p.state.SetPodEntries(podEntries)
   633  	p.state.SetMachineState(updatedMachineState)
   634  	return nil
   635  }
   636  
   637  // getContainerRequestedCores parses and returns request cores for the given container
   638  func (p *NativePolicy) getContainerRequestedCores(allocationInfo *state.AllocationInfo) float64 {
   639  	if allocationInfo == nil {
   640  		general.Errorf("got nil allocationInfo")
   641  		return 0
   642  	}
   643  
   644  	if allocationInfo.RequestQuantity == 0 {
   645  		if p.metaServer == nil {
   646  			general.Errorf("got nil metaServer")
   647  			return 0
   648  		}
   649  
   650  		container, err := p.metaServer.GetContainerSpec(allocationInfo.PodUid, allocationInfo.ContainerName)
   651  		if err != nil || container == nil {
   652  			general.Errorf("get container failed with error: %v", err)
   653  			return 0
   654  		}
   655  
   656  		cpuQuantity := native.CPUQuantityGetter()(container.Resources.Requests)
   657  		allocationInfo.RequestQuantity = general.MaxFloat64(float64(cpuQuantity.MilliValue())/1000, 0)
   658  		general.Infof("get cpu request quantity: %.3f for pod: %s/%s container: %s from podWatcher",
   659  			allocationInfo.RequestQuantity, allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName)
   660  	}
   661  	return allocationInfo.RequestQuantity
   662  }
   663  
   664  // setReservedCPUs calculates and sets the reservedCPUs field
   665  func (p *NativePolicy) setReservedCPUs(allCPUs machine.CPUSet) error {
   666  	klConfig, err := p.metaServer.GetKubeletConfig(context.TODO())
   667  	if err != nil {
   668  		return fmt.Errorf("NewNativePolicy failed because get kubelet config failed with error: %v", err)
   669  	}
   670  
   671  	reservedQuantity, _, err := utilkubeconfig.GetReservedQuantity(klConfig, string(v1.ResourceCPU))
   672  	if err != nil {
   673  		return fmt.Errorf("getKubeletReservedQuantity failed because get kubelet reserved quantity failed with error: %v", err)
   674  	} else if reservedQuantity.IsZero() {
   675  		// The native policy requires this to be nonzero. Zero CPU reservation
   676  		// would allow the shared pool to be completely exhausted. At that point
   677  		// either we would violate our guarantee of exclusivity or need to evict
   678  		// any pod that has at least one container that requires zero CPUs.
   679  		// See the comments in policy_static.go for more details.
   680  		return fmt.Errorf("the native policy requires systemreserved.cpu + kubereserved.cpu to be greater than zero")
   681  	}
   682  
   683  	// Take the ceiling of the reservation, since fractional CPUs cannot be
   684  	// exclusively allocated.
   685  	reservedCPUsFloat := float64(reservedQuantity.MilliValue()) / 1000
   686  	numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
   687  
   688  	var reserved machine.CPUSet
   689  	reservedCPUs, err := machine.Parse(klConfig.ReservedSystemCPUs)
   690  	if err != nil {
   691  		return fmt.Errorf("NewNativePolicy parse cpuset for reserved-cpus failed with error: %v", err)
   692  	}
   693  	if reservedCPUs.Size() > 0 {
   694  		reserved = reservedCPUs
   695  	} else {
   696  		// takeByTopology allocates CPUs associated with low-numbered cores from
   697  		// allCPUs.
   698  		reserved, _ = p.takeByTopology(allCPUs, numReservedCPUs)
   699  	}
   700  
   701  	if reserved.Size() != numReservedCPUs {
   702  		return fmt.Errorf("unable to reserve the required amount of CPUs (size of %s did not equal %d)", reserved, numReservedCPUs)
   703  	}
   704  
   705  	general.Infof("take reserved CPUs: %s by reservedCPUsNum: %d", reserved.String(), numReservedCPUs)
   706  
   707  	p.reservedCPUs = reserved
   708  
   709  	return nil
   710  }