github.com/kubewharf/katalyst-core@v0.5.3/pkg/scheduler/plugins/nodeovercommitment/fit.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package nodeovercommitment
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	overcommitutil "github.com/kubewharf/katalyst-core/pkg/util/overcommit"
    24  
    25  	v1 "k8s.io/api/core/v1"
    26  	"k8s.io/apimachinery/pkg/api/resource"
    27  	"k8s.io/klog/v2"
    28  	"k8s.io/kubernetes/pkg/scheduler/framework"
    29  
    30  	"github.com/kubewharf/katalyst-api/pkg/consts"
    31  	"github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/nodeovercommitment/cache"
    32  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    33  )
    34  
    35  type preFilterState struct {
    36  	framework.Resource
    37  	GuaranteedCPUs int
    38  }
    39  
    40  func (s *preFilterState) Clone() framework.StateData {
    41  	return s
    42  }
    43  
    44  func (n *NodeOvercommitment) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
    45  	cycleState.Write(preFilterStateKey, computePodResourceRequest(pod))
    46  	return nil, nil
    47  }
    48  
    49  func (n *NodeOvercommitment) PreFilterExtensions() framework.PreFilterExtensions {
    50  	return nil
    51  }
    52  
    53  func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error) {
    54  	c, err := cycleState.Read(preFilterStateKey)
    55  	if err != nil {
    56  		return nil, fmt.Errorf("error reading %q from cycleState: %w", preFilterStateKey, err)
    57  	}
    58  
    59  	s, ok := c.(*preFilterState)
    60  	if !ok {
    61  		return nil, fmt.Errorf("%+v convert to NodeOvercommitment.preFilterState error", c)
    62  	}
    63  	return s, nil
    64  }
    65  
    66  func (n *NodeOvercommitment) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
    67  	if nodeInfo.Node() == nil {
    68  		return framework.NewStatus(framework.Error, "node not found")
    69  	}
    70  
    71  	s, err := getPreFilterState(cycleState)
    72  	if err != nil {
    73  		klog.Error(err)
    74  		return framework.AsStatus(err)
    75  	}
    76  	if s.GuaranteedCPUs == 0 && s.MilliCPU == 0 {
    77  		return nil
    78  	}
    79  
    80  	nodeName := nodeInfo.Node().GetName()
    81  	nodeCache, err := cache.GetCache().GetNode(nodeName)
    82  	if err != nil {
    83  		err := fmt.Errorf("GetNodeInfo %s from cache fail: %v", nodeName, err)
    84  		klog.Error(err)
    85  		return framework.NewStatus(framework.Error, err.Error())
    86  	}
    87  
    88  	CPUManagerAvailable, memoryManagerAvailable := nodeCache.HintProvidersAvailable()
    89  	CPUOvercommitRatio, memoryOvercommitRatio, err := n.nodeOvercommitRatio(nodeInfo)
    90  	if err != nil {
    91  		klog.Error(err)
    92  		return framework.NewStatus(framework.Error, err.Error())
    93  	}
    94  
    95  	if memoryManagerAvailable {
    96  		if memoryOvercommitRatio > 1.0 {
    97  			err = fmt.Errorf("node %v memoryManager and memoryOvercommit both available", nodeName)
    98  			klog.Error(err)
    99  			return framework.NewStatus(framework.Unschedulable, err.Error())
   100  		}
   101  	}
   102  
   103  	if CPUManagerAvailable && CPUOvercommitRatio > 1.0 {
   104  		nodeCPUOriginAllocatable, err := n.nodeCPUAllocatable(nodeInfo)
   105  		if err != nil {
   106  			klog.Error(err)
   107  			return framework.NewStatus(framework.Error, err.Error())
   108  		}
   109  
   110  		guaranteedCPUs := resource.NewQuantity(int64(nodeCache.GetGuaranteedCPUs()), resource.DecimalSI)
   111  		nonGuaranteedRequestCPU := nodeInfo.Requested.MilliCPU - guaranteedCPUs.MilliValue()
   112  
   113  		nodeCPUOriginAllocatable.Sub(*guaranteedCPUs)
   114  		*nodeCPUOriginAllocatable = native.MultiplyResourceQuantity(v1.ResourceCPU, *nodeCPUOriginAllocatable, CPUOvercommitRatio)
   115  
   116  		klog.V(5).Infof("nodeOvercommitment, pod guranteedCPUs: %v, pod cpus: %v, CPUOvercommitRatio: %v, nodeAllocatable: %v, guaranteedCPUs: %v, nonGuaranteedRequestCPu: %v",
   117  			s.GuaranteedCPUs, s.MilliCPU, CPUOvercommitRatio, nodeCPUOriginAllocatable.MilliValue(), guaranteedCPUs.MilliValue(), nonGuaranteedRequestCPU)
   118  
   119  		if s.GuaranteedCPUs > 0 {
   120  			if int64(float64(s.GuaranteedCPUs)*1000.0*CPUOvercommitRatio) > nodeCPUOriginAllocatable.MilliValue()-nonGuaranteedRequestCPU {
   121  				return framework.NewStatus(framework.Unschedulable, "node overcommitment insufficient cpu")
   122  			}
   123  		} else {
   124  			if s.MilliCPU > nodeCPUOriginAllocatable.MilliValue()-nonGuaranteedRequestCPU {
   125  				return framework.NewStatus(framework.Unschedulable, "node overcommitment insufficient cpu")
   126  			}
   127  		}
   128  	}
   129  
   130  	return nil
   131  }
   132  
   133  func (n *NodeOvercommitment) nodeOvercommitRatio(nodeInfo *framework.NodeInfo) (CPUOvercommitRatio, memoryOvercommitRatio float64, err error) {
   134  	CPUOvercommitRatio, memoryOvercommitRatio = 1.0, 1.0
   135  
   136  	if nodeInfo.Node() == nil || nodeInfo.Node().GetAnnotations() == nil {
   137  		return
   138  	}
   139  
   140  	annotation := nodeInfo.Node().GetAnnotations()
   141  	CPUOvercommitRatio, err = overcommitutil.OvercommitRatioValidate(annotation, consts.NodeAnnotationCPUOvercommitRatioKey, consts.NodeAnnotationRealtimeCPUOvercommitRatioKey)
   142  	if err != nil {
   143  		klog.Error(err)
   144  		return
   145  	}
   146  
   147  	memoryOvercommitRatio, err = overcommitutil.OvercommitRatioValidate(annotation, consts.NodeAnnotationMemoryOvercommitRatioKey, consts.NodeAnnotationRealtimeMemoryOvercommitRatioKey)
   148  	if err != nil {
   149  		klog.Error(err)
   150  		return
   151  	}
   152  
   153  	return
   154  }
   155  
   156  func computePodResourceRequest(pod *v1.Pod) *preFilterState {
   157  	result := &preFilterState{}
   158  
   159  	CPUs := native.PodGuaranteedCPUs(pod)
   160  	if CPUs > 0 {
   161  		result.GuaranteedCPUs = CPUs
   162  		return result
   163  	}
   164  
   165  	for _, container := range pod.Spec.Containers {
   166  		result.Add(container.Resources.Requests)
   167  	}
   168  
   169  	// take max_resource(sum_pod, any_init_container)
   170  	for _, container := range pod.Spec.InitContainers {
   171  		result.SetMaxResource(container.Resources.Requests)
   172  	}
   173  
   174  	// If Overhead is being utilized, add to the total requests for the pod
   175  	if pod.Spec.Overhead != nil {
   176  		result.Add(pod.Spec.Overhead)
   177  	}
   178  	return result
   179  }
   180  
   181  func (n *NodeOvercommitment) nodeCPUAllocatable(nodeInfo *framework.NodeInfo) (*resource.Quantity, error) {
   182  	node := nodeInfo.Node()
   183  	if node == nil {
   184  		return nil, fmt.Errorf("nil nodeInfo")
   185  	}
   186  
   187  	if node.GetAnnotations() == nil {
   188  		return node.Status.Allocatable.Cpu(), nil
   189  	}
   190  
   191  	originalAllocatableCPU, ok := node.Annotations[consts.NodeAnnotationOriginalAllocatableCPUKey]
   192  	if !ok {
   193  		return node.Status.Allocatable.Cpu(), nil
   194  	}
   195  
   196  	quantity, err := resource.ParseQuantity(originalAllocatableCPU)
   197  	return &quantity, err
   198  }