volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/rescheduling/low_node_utilization.go (about)

     1  /*
     2  Copyright 2022 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package rescheduling
    18  
    19  import (
    20  	"reflect"
    21  
    22  	v1 "k8s.io/api/core/v1"
    23  	"k8s.io/apimachinery/pkg/api/resource"
    24  	"k8s.io/klog/v2"
    25  
    26  	"volcano.sh/volcano/pkg/scheduler/api"
    27  )
    28  
    29  // DefaultLowNodeConf defines the default configuration for LNU strategy
    30  var DefaultLowNodeConf = map[string]interface{}{
    31  	"thresholds":                 map[string]float64{"cpu": 100, "memory": 100, "pods": 100},
    32  	"targetThresholds":           map[string]float64{"cpu": 100, "memory": 100, "pods": 100},
    33  	"thresholdPriorityClassName": "system-cluster-critical",
    34  	"nodeFit":                    true,
    35  }
    36  
    37  type LowNodeUtilizationConf struct {
    38  	Thresholds                 map[string]float64
    39  	TargetThresholds           map[string]float64
    40  	NumberOfNodes              int
    41  	ThresholdPriority          int
    42  	ThresholdPriorityClassName string
    43  	NodeFit                    bool
    44  }
    45  
    46  // NewLowNodeUtilizationConf returns the pointer of LowNodeUtilizationConf object with default value
    47  func NewLowNodeUtilizationConf() *LowNodeUtilizationConf {
    48  	return &LowNodeUtilizationConf{
    49  		Thresholds:                 map[string]float64{"cpu": 100, "memory": 100, "pods": 100},
    50  		TargetThresholds:           map[string]float64{"cpu": 100, "memory": 100, "pods": 100},
    51  		ThresholdPriorityClassName: "system-cluster-critical",
    52  		NodeFit:                    true,
    53  	}
    54  }
    55  
    56  // parse converts the config map to struct object
    57  func (lnuc *LowNodeUtilizationConf) parse(configs map[string]interface{}) {
    58  	if len(configs) == 0 {
    59  		return
    60  	}
    61  	lowThresholdsConfigs, ok := configs["thresholds"]
    62  	if ok {
    63  		lowConfigs, ok := lowThresholdsConfigs.(map[interface{}]interface{})
    64  		if !ok {
    65  			klog.Warningln("Assert lowThresholdsConfigs to map error, abort the configuration parse.")
    66  			return
    67  		}
    68  		config := make(map[string]int)
    69  		for k, v := range lowConfigs {
    70  			config[k.(string)] = v.(int)
    71  		}
    72  		parseThreshold(config, lnuc, "Thresholds")
    73  	}
    74  	targetThresholdsConfigs, ok := configs["targetThresholds"]
    75  	if ok {
    76  		targetConfigs, ok := targetThresholdsConfigs.(map[interface{}]interface{})
    77  		if !ok {
    78  			klog.Warningln("Assert targetThresholdsConfigs to map error, abort the configuration parse.")
    79  			return
    80  		}
    81  		config := make(map[string]int)
    82  		for k, v := range targetConfigs {
    83  			config[k.(string)] = v.(int)
    84  		}
    85  		parseThreshold(config, lnuc, "TargetThresholds")
    86  	}
    87  }
    88  
    89  func parseThreshold(thresholdsConfig map[string]int, lnuc *LowNodeUtilizationConf, param string) {
    90  	if len(thresholdsConfig) > 0 {
    91  		configValue := reflect.ValueOf(lnuc).Elem().FieldByName(param)
    92  		config := configValue.Interface().(map[string]float64)
    93  
    94  		cpuThreshold, ok := thresholdsConfig["cpu"]
    95  		if ok {
    96  			config["cpu"] = float64(cpuThreshold)
    97  		}
    98  		memoryThreshold, ok := thresholdsConfig["memory"]
    99  		if ok {
   100  			config["memory"] = float64(memoryThreshold)
   101  		}
   102  		podThreshold, ok := thresholdsConfig["pod"]
   103  		if ok {
   104  			config["pod"] = float64(podThreshold)
   105  		}
   106  	}
   107  }
   108  
   109  var victimsFnForLnu = func(tasks []*api.TaskInfo) []*api.TaskInfo {
   110  	victims := make([]*api.TaskInfo, 0)
   111  
   112  	// parse configuration arguments
   113  	utilizationConfig := NewLowNodeUtilizationConf()
   114  	parametersConfig := RegisteredStrategyConfigs["lowNodeUtilization"]
   115  	var config map[string]interface{}
   116  	config, ok := parametersConfig.(map[string]interface{})
   117  	if !ok {
   118  		klog.Errorln("parameters parse error for lowNodeUtilization")
   119  		return victims
   120  	}
   121  	utilizationConfig.parse(config)
   122  
   123  	// group the nodes into lowNodes and highNodes
   124  	nodeUtilizationList := getNodeUtilization()
   125  	lowNodes, highNodes := groupNodesByUtilization(nodeUtilizationList, lowThresholdFilter, highThresholdFilter, *utilizationConfig)
   126  
   127  	if len(lowNodes) == 0 {
   128  		klog.V(4).Infof("The resource utilization of all nodes is above the threshold")
   129  		return victims
   130  	}
   131  	if len(lowNodes) == len(Session.Nodes) {
   132  		klog.V(4).Infof("The resource utilization of all nodes is below the threshold")
   133  		return victims
   134  	}
   135  	if len(highNodes) == 0 {
   136  		klog.V(4).Infof("The resource utilization of all nodes is below the target threshold")
   137  		return victims
   138  	}
   139  
   140  	// select victims from lowNodes
   141  	return evictPodsFromSourceNodes(highNodes, lowNodes, tasks, isContinueEvictPods, *utilizationConfig)
   142  }
   143  
   144  // lowThresholdFilter filter nodes which all resource dimensions are under the low utilization threshold
   145  func lowThresholdFilter(usage *NodeUtilization, config interface{}) bool {
   146  	utilizationConfig := parseArgToConfig(config)
   147  	if utilizationConfig == nil {
   148  		klog.V(4).Infoln("lack of LowNodeUtilizationConf pointer parameter")
   149  		return false
   150  	}
   151  	klog.V(4).Infof("The utilizationConfig thresholds is %v", utilizationConfig.Thresholds)
   152  
   153  	if usage.nodeInfo.Spec.Unschedulable {
   154  		return false
   155  	}
   156  	for rName, usagePercent := range usage.utilization {
   157  		if threshold, ok := utilizationConfig.Thresholds[string(rName)]; ok {
   158  			if usagePercent >= threshold {
   159  				return false
   160  			}
   161  		}
   162  	}
   163  	return true
   164  }
   165  
   166  // highThresholdFilter filter nodes which at least one resource dimension above the target utilization threshold
   167  func highThresholdFilter(usage *NodeUtilization, config interface{}) bool {
   168  	utilizationConfig := parseArgToConfig(config)
   169  	if utilizationConfig == nil {
   170  		klog.V(4).Infof("lack of LowNodeUtilizationConf pointer parameter")
   171  		return false
   172  	}
   173  	klog.V(4).Infof("The utilizationConfig targetThresholds is %v", utilizationConfig.TargetThresholds)
   174  
   175  	for rName, usagePercent := range usage.utilization {
   176  		if threshold, ok := utilizationConfig.TargetThresholds[string(rName)]; ok {
   177  			if usagePercent > threshold {
   178  				return true
   179  			}
   180  		}
   181  	}
   182  	return false
   183  }
   184  
   185  // isContinueEvictPods judges whether continue to select victim pods
   186  func isContinueEvictPods(usage *NodeUtilization, totalAllocatableResource map[v1.ResourceName]*resource.Quantity, config interface{}) bool {
   187  	var isNodeOverused bool
   188  	utilizationConfig := parseArgToConfig(config)
   189  	for rName, usage := range usage.utilization {
   190  		if threshold, ok := utilizationConfig.TargetThresholds[string(rName)]; ok {
   191  			if usage >= threshold {
   192  				isNodeOverused = true
   193  				break
   194  			}
   195  		}
   196  	}
   197  	if !isNodeOverused {
   198  		return false
   199  	}
   200  
   201  	for _, amount := range totalAllocatableResource {
   202  		if amount.CmpInt64(0) == 0 {
   203  			return false
   204  		}
   205  	}
   206  	return true
   207  }