volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/rescheduling/node_utilization_util.go (about)

     1  /*
     2  Copyright 2022 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package rescheduling
    18  
    19  import (
    20  	"sort"
    21  
    22  	v1 "k8s.io/api/core/v1"
    23  	"k8s.io/apimachinery/pkg/api/resource"
    24  	"k8s.io/klog/v2"
    25  	v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
    26  
    27  	"volcano.sh/volcano/pkg/scheduler/api"
    28  )
    29  
    30  type NodeUtilization struct {
    31  	nodeInfo    *v1.Node
    32  	utilization map[v1.ResourceName]float64
    33  	pods        []*v1.Pod
    34  }
    35  
    36  type thresholdFilter func(*NodeUtilization, interface{}) bool
    37  
    38  type isContinueEviction func(usage *NodeUtilization, totalAllocatableResource map[v1.ResourceName]*resource.Quantity, config interface{}) bool
    39  
    40  // groupNodesByUtilization divides the nodes into two groups by resource utilization filters
    41  func groupNodesByUtilization(nodeUtilizationList []*NodeUtilization, lowThresholdFilter, highThresholdFilter thresholdFilter, config interface{}) ([]*NodeUtilization, []*NodeUtilization) {
    42  	lowNodes := make([]*NodeUtilization, 0)
    43  	highNodes := make([]*NodeUtilization, 0)
    44  
    45  	for _, nodeUtilization := range nodeUtilizationList {
    46  		if lowThresholdFilter(nodeUtilization, config) {
    47  			lowNodes = append(lowNodes, nodeUtilization)
    48  		} else if highThresholdFilter(nodeUtilization, config) {
    49  			highNodes = append(highNodes, nodeUtilization)
    50  		}
    51  	}
    52  	klog.V(4).Infof("lowNodes: %v\n", lowNodes)
    53  	klog.V(4).Infof("highNodes: %v\n", highNodes)
    54  	return lowNodes, highNodes
    55  }
    56  
    57  // getNodeUtilization returns all node resource utilization list
    58  func getNodeUtilization() []*NodeUtilization {
    59  	nodeUtilizationList := make([]*NodeUtilization, 0)
    60  	for _, nodeInfo := range Session.Nodes {
    61  		nodeUtilization := &NodeUtilization{
    62  			nodeInfo: nodeInfo.Node,
    63  			utilization: map[v1.ResourceName]float64{
    64  				v1.ResourceCPU:    nodeInfo.ResourceUsage.CPUUsageAvg[MetricsPeriod],
    65  				v1.ResourceMemory: nodeInfo.ResourceUsage.MEMUsageAvg[MetricsPeriod],
    66  			},
    67  			pods: nodeInfo.Pods(),
    68  		}
    69  		nodeUtilizationList = append(nodeUtilizationList, nodeUtilization)
    70  		klog.V(4).Infof("node: %s, cpu: %v, memory: %v\n", nodeUtilization.nodeInfo.Name, nodeUtilization.utilization[v1.ResourceCPU], nodeUtilization.utilization[v1.ResourceMemory])
    71  	}
    72  	return nodeUtilizationList
    73  }
    74  
    75  // evictPodsFromSourceNodes evict pods from source nodes to target nodes according to priority and QoS
    76  func evictPodsFromSourceNodes(sourceNodes, targetNodes []*NodeUtilization, tasks []*api.TaskInfo, evictionCon isContinueEviction, config interface{}) []*api.TaskInfo {
    77  	resourceNames := []v1.ResourceName{
    78  		v1.ResourceCPU,
    79  		v1.ResourceMemory,
    80  	}
    81  	utilizationConfig := parseArgToConfig(config)
    82  	totalAllocatableResource := map[v1.ResourceName]*resource.Quantity{
    83  		v1.ResourceCPU:    {},
    84  		v1.ResourceMemory: {},
    85  	}
    86  	for _, node := range targetNodes {
    87  		nodeCapacity := getNodeCapacity(node.nodeInfo)
    88  		for _, rName := range resourceNames {
    89  			totalAllocatableResource[rName].Add(*convertPercentToQuan(rName, utilizationConfig.TargetThresholds[string(rName)], nodeCapacity))
    90  			totalAllocatableResource[rName].Sub(*convertPercentToQuan(rName, node.utilization[rName], nodeCapacity))
    91  		}
    92  	}
    93  	klog.V(4).Infof("totalAllocatableResource: %s", totalAllocatableResource)
    94  
    95  	// sort the source nodes in descending order
    96  	sortNodes(sourceNodes)
    97  
    98  	// victims select algorithm:
    99  	// 1. Evict pods from nodes with high utilization to low utilization
   100  	// 2. As to one node, evict pods from low priority to high priority. If the priority is same, evict pods according to QoS from low to high
   101  	victims := make([]*api.TaskInfo, 0)
   102  	for _, node := range sourceNodes {
   103  		if len(node.pods) == 0 {
   104  			klog.V(4).Infof("No pods can be removed on node: %s", node.nodeInfo.Name)
   105  			continue
   106  		}
   107  		sortPods(node.pods)
   108  		victims = append(victims, evict(node.pods, node, totalAllocatableResource, evictionCon, tasks, config)...)
   109  	}
   110  	klog.V(3).Infof("victims: %v\n", victims)
   111  	return victims
   112  }
   113  
   114  // parseArgToConfig returns a nodeUtilizationConfig object from parameters
   115  // TODO: It is just for lowNodeUtilization now, which should be abstracted as a common function.
   116  func parseArgToConfig(config interface{}) *LowNodeUtilizationConf {
   117  	var utilizationConfig *LowNodeUtilizationConf
   118  	if arg, ok := config.(LowNodeUtilizationConf); ok {
   119  		utilizationConfig = &arg
   120  	}
   121  	return utilizationConfig
   122  }
   123  
   124  // sortNodes sorts all the nodes according the usage of cpu and memory with weight score
   125  func sortNodes(nodeUtilizationList []*NodeUtilization) {
   126  	cmpFn := func(i, j int) bool {
   127  		return getScoreForNode(i, nodeUtilizationList) > getScoreForNode(j, nodeUtilizationList)
   128  	}
   129  	sort.Slice(nodeUtilizationList, cmpFn)
   130  }
   131  
   132  // getScoreForNode returns the score for node which considers only for CPU and memory
   133  func getScoreForNode(index int, nodeUtilizationList []*NodeUtilization) float64 {
   134  	cpuScore := nodeUtilizationList[index].utilization[v1.ResourceCPU]
   135  	memoryScore := nodeUtilizationList[index].utilization[v1.ResourceMemory]
   136  	return cpuScore + memoryScore
   137  }
   138  
   139  // sortPods return the pods in order according the priority and QoS
   140  func sortPods(pods []*v1.Pod) {
   141  	cmp := func(i, j int) bool {
   142  		if pods[i].Spec.Priority == nil && pods[j].Spec.Priority != nil {
   143  			return true
   144  		}
   145  		if pods[j].Spec.Priority == nil && pods[i].Spec.Priority != nil {
   146  			return false
   147  		}
   148  		if (pods[j].Spec.Priority == nil && pods[i].Spec.Priority == nil) || (*pods[i].Spec.Priority == *pods[j].Spec.Priority) {
   149  			if v1qos.GetPodQOS(pods[i]) == v1.PodQOSBestEffort {
   150  				return true
   151  			}
   152  			if v1qos.GetPodQOS(pods[i]) == v1.PodQOSBurstable && v1qos.GetPodQOS(pods[j]) == v1.PodQOSGuaranteed {
   153  				return true
   154  			}
   155  			return false
   156  		}
   157  		return *pods[i].Spec.Priority < *pods[j].Spec.Priority
   158  	}
   159  	sort.Slice(pods, cmp)
   160  }
   161  
   162  // evict select victims and add to the eviction list
   163  func evict(pods []*v1.Pod, utilization *NodeUtilization, totalAllocatableResource map[v1.ResourceName]*resource.Quantity, continueEviction isContinueEviction, tasks []*api.TaskInfo, config interface{}) []*api.TaskInfo {
   164  	victims := make([]*api.TaskInfo, 0)
   165  	for _, pod := range pods {
   166  		if !continueEviction(utilization, totalAllocatableResource, config) {
   167  			klog.V(3).Infoln("stop evict pods")
   168  			return victims
   169  		}
   170  		for _, task := range tasks {
   171  			if task.Pod.Name == pod.Name {
   172  				usedCPU := *resource.NewMilliQuantity(int64(task.Resreq.MilliCPU), resource.DecimalSI)
   173  				usedMem := *resource.NewQuantity(int64(task.Resreq.Memory), resource.BinarySI)
   174  				totalAllocatableResource[v1.ResourceCPU].Sub(usedCPU)
   175  				totalAllocatableResource[v1.ResourceMemory].Sub(usedMem)
   176  				utilization.utilization[v1.ResourceCPU] -= convertQuanToPercent(v1.ResourceCPU, &usedCPU, utilization.nodeInfo.Status.Capacity)
   177  				utilization.utilization[v1.ResourceMemory] -= convertQuanToPercent(v1.ResourceMemory, &usedMem, utilization.nodeInfo.Status.Capacity)
   178  				klog.V(4).Infof("totalAllocatableResource: %v\n", totalAllocatableResource)
   179  				klog.V(4).Infof("node: %s, utilization: %v\n", utilization.nodeInfo.Name, utilization.utilization)
   180  				victims = append(victims, task)
   181  				break
   182  			}
   183  		}
   184  	}
   185  	return victims
   186  }
   187  
   188  // getNodeCapacity returns node's capacity
   189  func getNodeCapacity(node *v1.Node) v1.ResourceList {
   190  	nodeCapacity := node.Status.Capacity
   191  	if len(node.Status.Allocatable) > 0 {
   192  		nodeCapacity = node.Status.Allocatable
   193  	}
   194  	return nodeCapacity
   195  }
   196  
   197  // convertPercentToQuan converts resource percentage to amount
   198  func convertPercentToQuan(rName v1.ResourceName, percent float64, nodeCapacity v1.ResourceList) *resource.Quantity {
   199  	var amount *resource.Quantity
   200  	if rName == v1.ResourceCPU {
   201  		amount = resource.NewMilliQuantity(int64(percent*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI)
   202  	} else if rName == v1.ResourceMemory {
   203  		amount = resource.NewQuantity(int64(percent*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI)
   204  	}
   205  	return amount
   206  }
   207  
   208  // convertQuanToPercent converts resource amount to percentage
   209  func convertQuanToPercent(rName v1.ResourceName, amount *resource.Quantity, nodeCapacity v1.ResourceList) float64 {
   210  	var percent float64
   211  	if rName == v1.ResourceCPU {
   212  		percent = amount.AsApproximateFloat64() * 100 / nodeCapacity.Cpu().AsApproximateFloat64()
   213  	} else if rName == v1.ResourceMemory {
   214  		percent = amount.AsApproximateFloat64() * 100 / nodeCapacity.Memory().AsApproximateFloat64()
   215  	}
   216  	return percent
   217  }