github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/overcommit/node/node.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package node
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"reflect"
    23  	"strconv"
    24  	"time"
    25  
    26  	corev1 "k8s.io/api/core/v1"
    27  	"k8s.io/apimachinery/pkg/api/errors"
    28  	"k8s.io/apimachinery/pkg/api/resource"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  	v1 "k8s.io/client-go/listers/core/v1"
    33  	"k8s.io/client-go/tools/cache"
    34  	"k8s.io/client-go/util/workqueue"
    35  	"k8s.io/klog/v2"
    36  	"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
    37  
    38  	configv1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/overcommit/v1alpha1"
    39  	nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1"
    40  	"github.com/kubewharf/katalyst-api/pkg/client/listers/overcommit/v1alpha1"
    41  	"github.com/kubewharf/katalyst-api/pkg/consts"
    42  	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
    43  	"github.com/kubewharf/katalyst-core/pkg/client/control"
    44  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    45  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    46  	"github.com/kubewharf/katalyst-core/pkg/controller/overcommit/node/matcher"
    47  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    48  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    49  	overcommitutil "github.com/kubewharf/katalyst-core/pkg/util/overcommit"
    50  )
    51  
    52  const nodeOvercommitControllerName = "noc"
    53  
    54  var resourceAnnotationKey = map[corev1.ResourceName]string{
    55  	corev1.ResourceCPU:    consts.NodeAnnotationCPUOvercommitRatioKey,
    56  	corev1.ResourceMemory: consts.NodeAnnotationMemoryOvercommitRatioKey,
    57  }
    58  
    59  // NodeOvercommitController is responsible to update node overcommit annotation
    60  // according to NodeOvercommitConfig
    61  type NodeOvercommitController struct {
    62  	ctx context.Context
    63  
    64  	nodeLister           v1.NodeLister
    65  	nodeOvercommitLister v1alpha1.NodeOvercommitConfigLister
    66  	cnrLister            nodev1alpha1.CustomNodeResourceLister
    67  	nodeUpdater          control.NodeUpdater
    68  	nocUpdater           control.NocUpdater
    69  
    70  	syncedFunc []cache.InformerSynced
    71  
    72  	matcher         matcher.Matcher
    73  	nocSyncQueue    workqueue.RateLimitingInterface
    74  	nodeSyncQueue   workqueue.RateLimitingInterface
    75  	cnrSyncQueue    workqueue.RateLimitingInterface
    76  	workerCount     int
    77  	reconcilePeriod time.Duration
    78  	firstReconcile  bool
    79  
    80  	metricsEmitter metrics.MetricEmitter
    81  }
    82  
    83  func NewNodeOvercommitController(
    84  	ctx context.Context,
    85  	controlCtx *katalyst_base.GenericContext,
    86  	genericConf *generic.GenericConfiguration,
    87  	overcommitConf *controller.OvercommitConfig,
    88  ) (*NodeOvercommitController, error) {
    89  	nodeInformer := controlCtx.KubeInformerFactory.Core().V1().Nodes()
    90  	nodeOvercommitInformer := controlCtx.InternalInformerFactory.Overcommit().V1alpha1().NodeOvercommitConfigs()
    91  	err := nodeOvercommitInformer.Informer().AddIndexers(cache.Indexers{
    92  		matcher.LabelSelectorValIndex: func(obj interface{}) ([]string, error) {
    93  			noc, ok := obj.(*configv1alpha1.NodeOvercommitConfig)
    94  			if !ok {
    95  				return []string{}, nil
    96  			}
    97  			return []string{noc.Spec.NodeOvercommitSelectorVal}, nil
    98  		},
    99  	})
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  	genericClient := controlCtx.Client
   104  
   105  	cnrInformer := controlCtx.InternalInformerFactory.Node().V1alpha1().CustomNodeResources()
   106  
   107  	nodeOvercommitConfigController := &NodeOvercommitController{
   108  		ctx:                  ctx,
   109  		nodeLister:           nodeInformer.Lister(),
   110  		nodeOvercommitLister: nodeOvercommitInformer.Lister(),
   111  		cnrLister:            cnrInformer.Lister(),
   112  		nodeUpdater:          &control.DummyNodeUpdater{},
   113  		nocUpdater:           &control.DummyNocUpdater{},
   114  		nocSyncQueue:         workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "noc"),
   115  		nodeSyncQueue:        workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node"),
   116  		cnrSyncQueue:         workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "cnr"),
   117  		workerCount:          overcommitConf.Node.SyncWorkers,
   118  		syncedFunc: []cache.InformerSynced{
   119  			nodeInformer.Informer().HasSynced,
   120  			nodeOvercommitInformer.Informer().HasSynced,
   121  			cnrInformer.Informer().HasSynced,
   122  		},
   123  		matcher:         &matcher.DummyMatcher{},
   124  		reconcilePeriod: overcommitConf.Node.ConfigReconcilePeriod,
   125  	}
   126  
   127  	nodeOvercommitConfigController.metricsEmitter = controlCtx.EmitterPool.GetDefaultMetricsEmitter()
   128  	if nodeOvercommitConfigController.metricsEmitter == nil {
   129  		nodeOvercommitConfigController.metricsEmitter = metrics.DummyMetrics{}
   130  	}
   131  
   132  	if !genericConf.DryRun {
   133  		nodeOvercommitConfigController.matcher = matcher.NewMatcher(nodeInformer.Lister(), nodeOvercommitInformer.Lister(), nodeOvercommitInformer.Informer().GetIndexer())
   134  		nodeOvercommitConfigController.nodeUpdater = control.NewRealNodeUpdater(genericClient.KubeClient)
   135  		nodeOvercommitConfigController.nocUpdater = control.NewRealNocUpdater(genericClient.InternalClient)
   136  	}
   137  
   138  	// add handlers
   139  	nodeOvercommitInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   140  		AddFunc:    nodeOvercommitConfigController.addNodeOvercommitConfig,
   141  		UpdateFunc: nodeOvercommitConfigController.updateNodeOvercommitConfig,
   142  		DeleteFunc: nodeOvercommitConfigController.deleteNodeOvercommitConfig,
   143  	})
   144  
   145  	nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   146  		AddFunc:    nodeOvercommitConfigController.addNode,
   147  		UpdateFunc: nodeOvercommitConfigController.updateNode,
   148  	})
   149  
   150  	cnrInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   151  		AddFunc:    nodeOvercommitConfigController.addCNR,
   152  		UpdateFunc: nodeOvercommitConfigController.updateCNR,
   153  	})
   154  
   155  	return nodeOvercommitConfigController, nil
   156  }
   157  
   158  func (nc *NodeOvercommitController) Run() {
   159  	defer utilruntime.HandleCrash()
   160  	defer func() {
   161  		nc.nocSyncQueue.ShutDown()
   162  		nc.nodeSyncQueue.ShutDown()
   163  		nc.cnrSyncQueue.ShutDown()
   164  		klog.Infof("Shutting down %s controller", nodeOvercommitControllerName)
   165  	}()
   166  
   167  	if !cache.WaitForCacheSync(nc.ctx.Done(), nc.syncedFunc...) {
   168  		utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", nodeOvercommitControllerName))
   169  		return
   170  	}
   171  
   172  	klog.Infof("caches are synced for %s controller", nodeOvercommitControllerName)
   173  
   174  	err := nc.matcher.Reconcile()
   175  	if err != nil {
   176  		utilruntime.HandleError(fmt.Errorf("controller %s Reconcile fail: %v", nodeOvercommitControllerName, err))
   177  		return
   178  	}
   179  
   180  	klog.Infof("%s controller start process, workerCount: %v, reconcilePeriod: %v", nodeOvercommitControllerName, nc.workerCount, nc.reconcilePeriod)
   181  	for i := 0; i < nc.workerCount; i++ {
   182  		// config matching and node configs sorting are handled asynchronously in different worker
   183  		go wait.Until(nc.nodeWorker, time.Second, nc.ctx.Done())
   184  
   185  		go wait.Until(nc.worker, time.Second, nc.ctx.Done())
   186  
   187  		go wait.Until(nc.cnrWorker, time.Second, nc.ctx.Done())
   188  	}
   189  
   190  	nc.reconcile()
   191  
   192  	<-nc.ctx.Done()
   193  }
   194  
   195  func (nc *NodeOvercommitController) reconcile() {
   196  	go wait.Until(func() {
   197  		if nc.firstReconcile {
   198  			nc.firstReconcile = false
   199  			return
   200  		}
   201  		err := nc.matcher.Reconcile()
   202  		if err != nil {
   203  			klog.Error(err)
   204  			return
   205  		}
   206  
   207  		nodeList, err := nc.nodeLister.List(labels.Everything())
   208  		if err != nil {
   209  			klog.Error(err)
   210  			return
   211  		}
   212  		for _, node := range nodeList {
   213  			err = nc.setNodeOvercommitAnnotations(node.Name, nil)
   214  			if err != nil {
   215  				klog.Errorf("%s controller reconcile set node annotation fail: %v", nodeOvercommitControllerName, err)
   216  				continue
   217  			}
   218  		}
   219  
   220  		configList, err := nc.nodeOvercommitLister.List(labels.Everything())
   221  		if err != nil {
   222  			klog.Error(err)
   223  			return
   224  		}
   225  		for _, config := range configList {
   226  			err = nc.patchNodeOvercommitConfigStatus(config.Name)
   227  			if err != nil {
   228  				klog.Errorf("%s controller reconcile patch noc status fail: %v")
   229  				continue
   230  			}
   231  		}
   232  	}, nc.reconcilePeriod, nc.ctx.Done())
   233  }
   234  
   235  func (nc *NodeOvercommitController) worker() {
   236  	for nc.processNextEvent() {
   237  	}
   238  }
   239  
   240  func (nc *NodeOvercommitController) nodeWorker() {
   241  	for nc.processNextNode() {
   242  	}
   243  }
   244  
   245  func (nc *NodeOvercommitController) cnrWorker() {
   246  	for nc.processNextCNR() {
   247  	}
   248  }
   249  
   250  func (nc *NodeOvercommitController) processNextEvent() bool {
   251  	key, quit := nc.nocSyncQueue.Get()
   252  	if quit {
   253  		return false
   254  	}
   255  	defer nc.nocSyncQueue.Done(key)
   256  
   257  	var (
   258  		event = key.(nodeOvercommitEvent)
   259  		err   error
   260  	)
   261  	// both config change and node label change may cause the matching relationship to change,
   262  	// but they are handled in different ways
   263  	switch event.eventType {
   264  	case nodeEvent:
   265  		err = nc.syncNodeEvent(event.nodeKey)
   266  	case configEvent:
   267  		err = nc.syncConfigEvent(event.configKey)
   268  	default:
   269  		nc.nocSyncQueue.Forget(key)
   270  		klog.Errorf("unkonw event type: %s", event.eventType)
   271  		return true
   272  	}
   273  	if err == nil {
   274  		nc.nocSyncQueue.Forget(key)
   275  		return true
   276  	}
   277  
   278  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   279  	nc.nocSyncQueue.AddRateLimited(key)
   280  
   281  	return true
   282  }
   283  
   284  func (nc *NodeOvercommitController) processNextNode() bool {
   285  	key, quit := nc.nodeSyncQueue.Get()
   286  	if quit {
   287  		return false
   288  	}
   289  	defer nc.nodeSyncQueue.Done(key)
   290  
   291  	err := nc.syncNode(key.(string))
   292  	if err == nil {
   293  		nc.nodeSyncQueue.Forget(key)
   294  		return true
   295  	}
   296  
   297  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   298  	nc.nodeSyncQueue.AddRateLimited(key)
   299  
   300  	return true
   301  }
   302  
   303  func (nc *NodeOvercommitController) processNextCNR() bool {
   304  	key, quit := nc.cnrSyncQueue.Get()
   305  	if quit {
   306  		return false
   307  	}
   308  	defer nc.cnrSyncQueue.Done(key)
   309  
   310  	err := nc.syncCNR(key.(string))
   311  	if err == nil {
   312  		nc.cnrSyncQueue.Forget(key)
   313  		return true
   314  	}
   315  
   316  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   317  	nc.cnrSyncQueue.AddRateLimited(key)
   318  
   319  	return true
   320  }
   321  
   322  func (nc *NodeOvercommitController) syncConfigEvent(key string) error {
   323  	_, name, err := cache.SplitMetaNamespaceKey(key)
   324  	if err != nil {
   325  		klog.Errorf("failed to split namespace and name from key %s", key)
   326  		return err
   327  	}
   328  
   329  	nodeNames, err := nc.matcher.MatchConfig(name)
   330  	if err != nil {
   331  		klog.Errorf("failed to update config, configName: %v, err: %v", name, err)
   332  		return err
   333  	}
   334  
   335  	for _, nodeName := range nodeNames {
   336  		nc.nodeSyncQueue.Add(nodeName)
   337  	}
   338  
   339  	return nc.patchNodeOvercommitConfigStatus(name)
   340  }
   341  
   342  func (nc *NodeOvercommitController) syncNodeEvent(key string) error {
   343  	_, name, err := cache.SplitMetaNamespaceKey(key)
   344  	if err != nil {
   345  		klog.Errorf("failed to split namespace and name from key %s", key)
   346  		return err
   347  	}
   348  	_, err = nc.nodeLister.Get(name)
   349  	if err != nil {
   350  		if errors.IsNotFound(err) {
   351  			nc.matcher.DelNode(name)
   352  			return nil
   353  		} else {
   354  			return err
   355  		}
   356  	}
   357  
   358  	nodeOverCommitConfigList, err := nc.nodeOvercommitLister.List(labels.Everything())
   359  	if err != nil {
   360  		klog.Errorf("failed to list nodeOverCommitConfig: %v", err)
   361  		return err
   362  	}
   363  	for _, config := range nodeOverCommitConfigList {
   364  		_, err := nc.matcher.MatchConfig(config.Name)
   365  		if err != nil {
   366  			klog.Errorf("failed to match config %s: %v", config.Name, err)
   367  			return err
   368  		}
   369  		err = nc.patchNodeOvercommitConfigStatus(config.Name)
   370  		if err != nil {
   371  			// fail of patching nodeOvercommitConfigStatus will not affect node overcommit ratio
   372  			// can be fixed by reconcile
   373  			klog.Warning("failed to patch %s nodeOvercommitConfigStatus: %v", config.Name, err)
   374  			continue
   375  		}
   376  	}
   377  
   378  	nc.nodeSyncQueue.Add(name)
   379  	return nil
   380  }
   381  
   382  func (nc *NodeOvercommitController) syncNode(key string) error {
   383  	_, name, err := cache.SplitMetaNamespaceKey(key)
   384  	if err != nil {
   385  		klog.Errorf("failed to split namespace and name from key %s", key)
   386  		return err
   387  	}
   388  
   389  	config, err := nc.matcher.MatchNode(name)
   390  	if err != nil {
   391  		klog.Errorf("matchNode %v fail: %v", name, err)
   392  		return err
   393  	}
   394  
   395  	return nc.setNodeOvercommitAnnotations(name, config)
   396  }
   397  
   398  func (nc *NodeOvercommitController) syncCNR(key string) error {
   399  	_, name, err := cache.SplitMetaNamespaceKey(key)
   400  	if err != nil {
   401  		klog.Errorf("failed to split namespace and name from key %s", key)
   402  		return err
   403  	}
   404  
   405  	return nc.setNodeOvercommitAnnotations(name, nil)
   406  }
   407  
   408  func (nc *NodeOvercommitController) patchNodeOvercommitConfigStatus(configName string) error {
   409  	oldConfig, err := nc.nodeOvercommitLister.Get(configName)
   410  	if err != nil {
   411  		if errors.IsNotFound(err) {
   412  			klog.Warning("nodeOvercommitConfig %v has been deleted.")
   413  			return nil
   414  		}
   415  		klog.Errorf("get nodeOvercommitConfig %v fail: %v", configName, err)
   416  		return err
   417  	}
   418  
   419  	nodeNames := nc.matcher.GetNodes(configName)
   420  	newConfig := oldConfig.DeepCopy()
   421  	newConfig.Status.MatchedNodeList = nodeNames
   422  
   423  	_, err = nc.nocUpdater.PatchNocStatus(nc.ctx, oldConfig, newConfig)
   424  	if err != nil {
   425  		klog.Error(err)
   426  		return err
   427  	}
   428  	return nil
   429  }
   430  
   431  func (nc *NodeOvercommitController) setNodeOvercommitAnnotations(nodeName string, config *configv1alpha1.NodeOvercommitConfig) error {
   432  	// get node from node index
   433  	node, err := nc.nodeLister.Get(nodeName)
   434  	if err != nil {
   435  		klog.Errorf("get node %s fail: %v", nodeName, err)
   436  		return err
   437  	}
   438  
   439  	// get noc if nil
   440  	if config == nil {
   441  		config = nc.matcher.GetConfig(nodeName)
   442  	}
   443  	nodeOvercommitConfig := emptyOvercommitConfig()
   444  	if config != nil {
   445  		nodeOvercommitConfig = config
   446  	}
   447  
   448  	nodeCopy := node.DeepCopy()
   449  	nodeAnnotations := nodeCopy.GetAnnotations()
   450  	if nodeAnnotations == nil {
   451  		nodeAnnotations = make(map[string]string)
   452  	}
   453  
   454  	for resourceName, annotationKey := range resourceAnnotationKey {
   455  		c, ok := nodeOvercommitConfig.Spec.ResourceOvercommitRatio[resourceName]
   456  		if !ok {
   457  			switch resourceName {
   458  			case corev1.ResourceCPU:
   459  				nodeAnnotations[annotationKey] = consts.DefaultNodeCPUOvercommitRatio
   460  			case corev1.ResourceMemory:
   461  				nodeAnnotations[annotationKey] = consts.DefaultNodeMemoryOvercommitRatio
   462  			}
   463  		} else {
   464  			nodeAnnotations[annotationKey] = c
   465  		}
   466  	}
   467  
   468  	nc.nodeRealtimeOvercommitRatio(nodeAnnotations, node)
   469  
   470  	cpuAllocatable, cpuCapacity := nc.nodeOvercommitResource(node, validCPUOvercommitRatio(nodeAnnotations), corev1.ResourceCPU, consts.NodeAnnotationOriginalAllocatableCPUKey, consts.NodeAnnotationOriginalCapacityCPUKey)
   471  	klog.V(5).Infof("node %s CPU allocatable: %v, CPU capacity: %v with bindcpu", node.Name, cpuAllocatable, cpuCapacity)
   472  	if cpuAllocatable == "" {
   473  		delete(nodeAnnotations, consts.NodeAnnotationOvercommitAllocatableCPUKey)
   474  		delete(nodeAnnotations, consts.NodeAnnotationOvercommitCapacityCPUKey)
   475  	} else {
   476  		nodeAnnotations[consts.NodeAnnotationOvercommitAllocatableCPUKey] = cpuAllocatable
   477  		nodeAnnotations[consts.NodeAnnotationOvercommitCapacityCPUKey] = cpuCapacity
   478  	}
   479  
   480  	memAllocatable, memCapacity := nc.nodeOvercommitResource(node, validMemoryOvercommitRatio(nodeAnnotations), corev1.ResourceMemory, consts.NodeAnnotationOriginalAllocatableMemoryKey, consts.NodeAnnotationOriginalCapacityMemoryKey)
   481  	klog.V(5).Infof("node %s memory allocatable: %v, memory capacity: %v", node.Name, memAllocatable, memCapacity)
   482  	if memAllocatable == "" {
   483  		delete(nodeAnnotations, consts.NodeAnnotationOvercommitAllocatableMemoryKey)
   484  		delete(nodeAnnotations, consts.NodeAnnotationOvercommitCapacityMemoryKey)
   485  	} else {
   486  		nodeAnnotations[consts.NodeAnnotationOvercommitAllocatableMemoryKey] = memAllocatable
   487  		nodeAnnotations[consts.NodeAnnotationOvercommitCapacityMemoryKey] = memCapacity
   488  	}
   489  
   490  	nodeCopy.Annotations = nodeAnnotations
   491  	if !reflect.DeepEqual(nodeAnnotations, node.Annotations) {
   492  		return nc.nodeUpdater.PatchNode(nc.ctx, node, nodeCopy)
   493  	}
   494  	return nil
   495  }
   496  
   497  func emptyOvercommitConfig() *configv1alpha1.NodeOvercommitConfig {
   498  	return &configv1alpha1.NodeOvercommitConfig{
   499  		Spec: configv1alpha1.NodeOvercommitConfigSpec{
   500  			ResourceOvercommitRatio: map[corev1.ResourceName]string{},
   501  		},
   502  	}
   503  }
   504  
   505  func (nc *NodeOvercommitController) nodeRealtimeOvercommitRatio(nodeAnnotation map[string]string, node *corev1.Node) {
   506  	kcnr, err := nc.cnrLister.Get(node.Name)
   507  	if err != nil {
   508  		klog.Error(err)
   509  		return
   510  	}
   511  
   512  	if len(kcnr.Annotations) == 0 {
   513  		delete(nodeAnnotation, consts.NodeAnnotationRealtimeCPUOvercommitRatioKey)
   514  		delete(nodeAnnotation, consts.NodeAnnotationRealtimeMemoryOvercommitRatioKey)
   515  		return
   516  	}
   517  
   518  	realtimeCPU, ok := kcnr.Annotations[consts.NodeAnnotationCPUOvercommitRatioKey]
   519  	if ok {
   520  		nodeAnnotation[consts.NodeAnnotationRealtimeCPUOvercommitRatioKey] = realtimeCPU
   521  	} else {
   522  		delete(nodeAnnotation, consts.NodeAnnotationRealtimeCPUOvercommitRatioKey)
   523  	}
   524  
   525  	realtimeMem, ok := kcnr.Annotations[consts.NodeAnnotationMemoryOvercommitRatioKey]
   526  	if ok {
   527  		nodeAnnotation[consts.NodeAnnotationRealtimeMemoryOvercommitRatioKey] = realtimeMem
   528  	} else {
   529  		delete(nodeAnnotation, consts.NodeAnnotationRealtimeMemoryOvercommitRatioKey)
   530  	}
   531  }
   532  
   533  func (nc *NodeOvercommitController) nodeOvercommitResource(
   534  	node *corev1.Node,
   535  	overcommitRatio float64,
   536  	resourceName corev1.ResourceName,
   537  	originalAllocatableKey, originalCapacityKey string,
   538  ) (string, string) {
   539  	var guaranteedResource int
   540  
   541  	if overcommitRatio <= 1 {
   542  		klog.V(5).Infof("node %v cpu overcommit ratio less than 1: %v", node.Name, overcommitRatio)
   543  		overcommitRatio = 1.0
   544  	}
   545  
   546  	// node original allocatable and capacity should be always exists if overcommit webhook is running
   547  	// node allocatable can not be calculated without original allocatable, just return empty string.
   548  	nodeAllocatableAnnotation, ok := node.Annotations[originalAllocatableKey]
   549  	if !ok {
   550  		klog.V(5).Infof("node %s annotation %s missing", node.Name, originalAllocatableKey)
   551  		return "", ""
   552  	}
   553  	nodeCapacityAnnotation, ok := node.Annotations[originalCapacityKey]
   554  	if !ok {
   555  		klog.V(5).Infof("node %s annotation %s missing", node.Name, originalCapacityKey)
   556  		return "", ""
   557  	}
   558  
   559  	nodeAllocatable, err := resource.ParseQuantity(nodeAllocatableAnnotation)
   560  	if err != nil {
   561  		klog.Error(err)
   562  		return "", ""
   563  	}
   564  	nodeCapacity, err := resource.ParseQuantity(nodeCapacityAnnotation)
   565  	if err != nil {
   566  		klog.Error(err)
   567  		return "", ""
   568  	}
   569  
   570  	if resourceName == corev1.ResourceCPU {
   571  		guaranteedCPU, err := nc.getGuaranteedCPU(node.Name)
   572  		if err != nil {
   573  			klog.Error(err)
   574  		} else {
   575  			guaranteedResource = guaranteedCPU
   576  		}
   577  	}
   578  
   579  	guaranteedQuantity := resource.NewQuantity(int64(guaranteedResource), resource.DecimalSI)
   580  	nodeAllocatable.Sub(*guaranteedQuantity)
   581  	// Using quantity.Value may lead to a loss of precision, but it can cover larger values than MilliValue.
   582  	// memory is converted to int64 using quantity.Value in the cache of kube-scheduler,
   583  	// adopt the same approach here.
   584  	nodeAllocatable = native.MultiplyResourceQuantity(resourceName, nodeAllocatable, overcommitRatio)
   585  	nodeAllocatable.Add(*guaranteedQuantity)
   586  	nodeCapacity.Sub(*guaranteedQuantity)
   587  	nodeCapacity = native.MultiplyResourceQuantity(resourceName, nodeCapacity, overcommitRatio)
   588  	nodeCapacity.Add(*guaranteedQuantity)
   589  
   590  	klog.V(5).Infof("node %s overcommitRatio: %v, guaranteedResource: %v, final allocatable: %v, capacity: %v",
   591  		node.Name, overcommitRatio, guaranteedResource, nodeAllocatable.String(), nodeCapacity.String())
   592  	return nodeAllocatable.String(), nodeCapacity.String()
   593  }
   594  
   595  func (nc *NodeOvercommitController) getGuaranteedCPU(nodeName string) (int, error) {
   596  	kcnr, err := nc.cnrLister.Get(nodeName)
   597  	if err != nil {
   598  		klog.Error(err)
   599  		return 0, err
   600  	}
   601  
   602  	if kcnr.Annotations == nil {
   603  		klog.V(5).Infof("node %s with nil annotation", nodeName)
   604  		return 0, nil
   605  	}
   606  
   607  	if kcnr.Annotations[consts.KCNRAnnotationCPUManager] != string(cpumanager.PolicyStatic) {
   608  		klog.V(5).Infof("node %s not support cpu manager", kcnr.Name)
   609  		return 0, nil
   610  	}
   611  
   612  	cpusAnnotation, ok := kcnr.Annotations[consts.KCNRAnnotationGuaranteedCPUs]
   613  	if !ok {
   614  		klog.V(5).Infof("node %s guaranteed cpus missing", kcnr.Name)
   615  		return 0, nil
   616  	}
   617  	guaranteedCPUs, err := strconv.Atoi(cpusAnnotation)
   618  	if err != nil {
   619  		klog.Error(err)
   620  		return 0, err
   621  	}
   622  
   623  	return guaranteedCPUs, nil
   624  }
   625  
   626  func validCPUOvercommitRatio(annotation map[string]string) float64 {
   627  	res, err := overcommitutil.OvercommitRatioValidate(annotation, consts.NodeAnnotationCPUOvercommitRatioKey, consts.NodeAnnotationRealtimeCPUOvercommitRatioKey)
   628  	if err != nil {
   629  		klog.Error(err)
   630  	}
   631  	return res
   632  }
   633  
   634  func validMemoryOvercommitRatio(annotation map[string]string) float64 {
   635  	res, err := overcommitutil.OvercommitRatioValidate(annotation, consts.NodeAnnotationMemoryOvercommitRatioKey, consts.NodeAnnotationRealtimeMemoryOvercommitRatioKey)
   636  	if err != nil {
   637  		klog.Error(err)
   638  	}
   639  	return res
   640  }