volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/tdm/tdm.go (about)

     1  /*
     2  Copyright 2021 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tdm
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  	"time"
    23  
    24  	"k8s.io/apimachinery/pkg/util/intstr"
    25  	"k8s.io/klog/v2"
    26  	k8sFramework "k8s.io/kubernetes/pkg/scheduler/framework"
    27  
    28  	"volcano.sh/volcano/pkg/scheduler/api"
    29  	"volcano.sh/volcano/pkg/scheduler/framework"
    30  	tutil "volcano.sh/volcano/pkg/scheduler/plugins/util"
    31  	"volcano.sh/volcano/pkg/scheduler/util"
    32  )
    33  
    34  const (
    35  	// PluginName indicates name of volcano scheduler plugin.
    36  	PluginName = "tdm"
    37  	// revocableZoneLayout revocable zone layout
    38  	revocableZoneLayout      = "15:04"
    39  	revocableZoneLabelPrefix = "tdm.revocable-zone."
    40  	evictPeriodLabel         = "tdm.evict.period"
    41  	defaultPodEvictNum       = 1
    42  )
    43  
    44  var lastEvictAt time.Time
    45  
    46  /*
    47     actions: "enqueue, reclaim, allocate, preempt"
    48     tiers:
    49     - plugins:
    50       - name: tdm
    51         arguments:
    52           tdm.revocable-zone.rz1: 10:00-21:00
    53           tdm.revocable-zone.rz2: 12:00-14:00
    54           tdm.evict.period: 1m
    55  */
    56  
    57  type tdmPlugin struct {
    58  	revocableZone map[string]string
    59  	// evictPeriod
    60  	// default 1m
    61  	evictPeriod time.Duration
    62  }
    63  
    64  // New function returns prioritizePlugin object
    65  func New(args framework.Arguments) framework.Plugin {
    66  	revocableZone := make(map[string]string)
    67  	evictPeriod := time.Minute
    68  
    69  	for k, v := range args {
    70  		if strings.Contains(k, revocableZoneLabelPrefix) {
    71  			revocableZone[strings.Replace(k, revocableZoneLabelPrefix, "", 1)] = v.(string)
    72  		}
    73  	}
    74  
    75  	if period, ok := args[evictPeriodLabel]; ok {
    76  		if d, err := time.ParseDuration(period.(string)); err == nil {
    77  			evictPeriod = d
    78  		}
    79  	}
    80  
    81  	return &tdmPlugin{revocableZone, evictPeriod}
    82  }
    83  
    84  func (tp *tdmPlugin) Name() string {
    85  	return PluginName
    86  }
    87  
    88  func parseRevocableZone(rzRaw string) (start, end time.Time, err error) {
    89  	rzValues := strings.Split(strings.TrimSpace(rzRaw), "-")
    90  
    91  	if len(rzValues) != 2 {
    92  		err = fmt.Errorf("revocable zone %v format error", rzRaw)
    93  		return
    94  	}
    95  
    96  	t1, err := time.Parse(revocableZoneLayout, rzValues[0])
    97  	if err != nil {
    98  		return
    99  	}
   100  
   101  	t2, err := time.Parse(revocableZoneLayout, rzValues[1])
   102  	if err != nil {
   103  		return
   104  	}
   105  
   106  	now := time.Now()
   107  
   108  	start = time.Date(now.Year(), now.Month(), now.Day(), t1.Hour(), t1.Minute(), 0, 0, now.Location())
   109  	if t1.After(t2) || t1.Equal(t2) {
   110  		end = time.Date(now.Year(), now.Month(), now.Day()+1, t2.Hour(), t2.Minute(), 0, 0, now.Location())
   111  	} else {
   112  		end = time.Date(now.Year(), now.Month(), now.Day(), t2.Hour(), t2.Minute(), 0, 0, now.Location())
   113  	}
   114  
   115  	return
   116  }
   117  
   118  func (tp *tdmPlugin) availableRevocableZone(rz string) error {
   119  	// rzRaw format 00:00-23:59
   120  	rzRaw, ok := tp.revocableZone[rz]
   121  	if !ok {
   122  		return fmt.Errorf("revocable zone %v not support", rz)
   123  	}
   124  
   125  	now := time.Now()
   126  
   127  	start, end, err := parseRevocableZone(rzRaw)
   128  	if err != nil {
   129  		return err
   130  	}
   131  
   132  	if now.Unix() < start.Unix() || now.Unix() > end.Unix() {
   133  		return fmt.Errorf("current time beyond revocable zone %v:%v", rz, rzRaw)
   134  	}
   135  
   136  	return nil
   137  }
   138  
   139  func (tp *tdmPlugin) OnSessionOpen(ssn *framework.Session) {
   140  	klog.V(5).Infof("Enter tdm plugin ...")
   141  	defer func() {
   142  		klog.V(5).Infof("Leaving tdm plugin.")
   143  	}()
   144  
   145  	// tdm plugin just handle revocable node
   146  	predicateFn := func(task *api.TaskInfo, node *api.NodeInfo) ([]*api.Status, error) {
   147  		predicateStatus := make([]*api.Status, 0)
   148  		tdmStatus := &api.Status{}
   149  		if node.RevocableZone == "" {
   150  			return predicateStatus, nil
   151  		}
   152  
   153  		if err := tp.availableRevocableZone(node.RevocableZone); err != nil {
   154  			tdmStatus.Code = api.UnschedulableAndUnresolvable
   155  			tdmStatus.Reason = fmt.Sprintf("plugin %s predicates %v", tp.Name(), err)
   156  			return predicateStatus, fmt.Errorf("plugin %s predicates %v", tp.Name(), err)
   157  		}
   158  
   159  		klog.V(4).Infof("TDM node %v revocable zone %v:%v is active", node.Name, node.RevocableZone, tp.revocableZone[node.RevocableZone])
   160  
   161  		if len(task.RevocableZone) == 0 {
   162  			msg := fmt.Sprintf("task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name)
   163  			return predicateStatus, fmt.Errorf("plugin %s predicates %s", tp.Name(), msg)
   164  		}
   165  
   166  		tdmStatus.Code = api.Success
   167  		predicateStatus = append(predicateStatus, tdmStatus)
   168  		klog.V(4).Infof("TDM filter for Task %s/%s on node %s pass.", task.Namespace, task.Name, node.Name)
   169  		return predicateStatus, nil
   170  	}
   171  
   172  	// tdm plugin just handle revocable node
   173  	nodeOrderFn := func(task *api.TaskInfo, node *api.NodeInfo) (float64, error) {
   174  		score := 0.0
   175  
   176  		if node.RevocableZone == "" {
   177  			return score, nil
   178  		}
   179  
   180  		if err := tp.availableRevocableZone(node.RevocableZone); err != nil {
   181  			klog.V(4).Infof("TDM not available %s", err)
   182  			return score, err
   183  		}
   184  
   185  		if len(task.RevocableZone) == 0 {
   186  			klog.V(4).Infof("TDM task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name)
   187  			return score, nil
   188  		}
   189  
   190  		score = float64(k8sFramework.MaxNodeScore)
   191  
   192  		klog.V(4).Infof("TDM score for Task %s/%s on node %s is: %v", task.Namespace, task.Name, node.Name, score)
   193  		return score, nil
   194  	}
   195  
   196  	preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) {
   197  		// for the preemptable or can use revocablezone workload, they can not preempt other tasks.
   198  		if preemptor.Preemptable || len(preemptor.RevocableZone) > 0 {
   199  			klog.V(4).Infof("TDM task %s/%s is preemptable, do nothing skip", preemptor.Namespace, preemptor.Name)
   200  			return nil, tutil.Reject
   201  		}
   202  
   203  		var victims []*api.TaskInfo
   204  		tasksMap := make(map[api.JobID][]*api.TaskInfo)
   205  
   206  		// find preemptable tasks which appear on none revocable node
   207  		for _, task := range preemptees {
   208  			if !task.Preemptable || task.Status != api.Running {
   209  				continue
   210  			}
   211  
   212  			node, ok := ssn.Nodes[task.NodeName]
   213  			if !ok {
   214  				continue
   215  			}
   216  
   217  			if node.RevocableZone != "" {
   218  				continue
   219  			}
   220  
   221  			tasksMap[task.Job] = append(tasksMap[task.Job], task)
   222  		}
   223  
   224  		for jobID, preemptableTasks := range tasksMap {
   225  			if job, ok := ssn.Jobs[jobID]; ok {
   226  				victims = append(victims, tp.maxVictims(job, preemptableTasks)...)
   227  			}
   228  		}
   229  
   230  		klog.V(4).Infof("TDM victims are %+v", victims)
   231  
   232  		return victims, tutil.Permit
   233  	}
   234  
   235  	victimsFn := func([]*api.TaskInfo) []*api.TaskInfo {
   236  		if lastEvictAt.Add(tp.evictPeriod).After(time.Now()) {
   237  			klog.V(4).Infof("TDM next evict time at %v", lastEvictAt)
   238  			return nil
   239  		}
   240  
   241  		klog.V(4).Infof("TDM start to find victims")
   242  
   243  		// find preemptable task on timeout revocable zone node
   244  		victims := make([]*api.TaskInfo, 0)
   245  		for rz := range tp.revocableZone {
   246  			if err := tp.availableRevocableZone(rz); err != nil {
   247  				klog.V(4).Infof("TDM revocable zone %v disactive, %v", rz, err)
   248  				// rz disactive, then evict preemptable tasks by job from the revocable node
   249  				for jobID, preemtableTasks := range tp.revocableNodePreemptableTask(rz, ssn) {
   250  					if job, ok := ssn.Jobs[jobID]; ok {
   251  						victims = append(victims, tp.maxVictims(job, preemtableTasks)...)
   252  					}
   253  				}
   254  			}
   255  		}
   256  
   257  		// need to consider concurrency?
   258  		lastEvictAt = time.Now()
   259  
   260  		klog.V(4).Infof("TDM got %v victims", len(victims))
   261  
   262  		return victims
   263  	}
   264  
   265  	jobOrderFn := func(l, r interface{}) int {
   266  		lv := l.(*api.JobInfo)
   267  		rv := r.(*api.JobInfo)
   268  
   269  		if lv.Preemptable == rv.Preemptable {
   270  			return 0
   271  		}
   272  
   273  		if !lv.Preemptable {
   274  			return -1
   275  		}
   276  
   277  		return 1
   278  	}
   279  
   280  	jobPipelinedFn := func(obj interface{}) int {
   281  		jobInfo := obj.(*api.JobInfo)
   282  		if jobInfo.IsPipelined() {
   283  			return tutil.Permit
   284  		}
   285  		return tutil.Reject
   286  	}
   287  
   288  	jobStarvingFn := func(obj interface{}) bool {
   289  		jobInfo := obj.(*api.JobInfo)
   290  		// allow none preemptable elastic job (deployment) preempt task
   291  		if jobInfo.Preemptable {
   292  			return false
   293  		}
   294  		return len(jobInfo.TaskStatusIndex[api.Pending]) > 0
   295  	}
   296  
   297  	victimsFns := make([]api.VictimTasksFn, 0)
   298  	victimsFns = append(victimsFns, victimsFn)
   299  	ssn.AddPredicateFn(tp.Name(), predicateFn)
   300  	ssn.AddNodeOrderFn(tp.Name(), nodeOrderFn)
   301  	ssn.AddPreemptableFn(tp.Name(), preemptableFn)
   302  	ssn.AddVictimTasksFns(tp.Name(), victimsFns)
   303  	ssn.AddJobOrderFn(tp.Name(), jobOrderFn)
   304  	ssn.AddJobPipelinedFn(tp.Name(), jobPipelinedFn)
   305  	ssn.AddJobStarvingFns(tp.Name(), jobStarvingFn)
   306  }
   307  
   308  func (tp *tdmPlugin) maxVictims(job *api.JobInfo, victims []*api.TaskInfo) []*api.TaskInfo {
   309  	maxPodEvictNum := tp.getMaxPodEvictNum(job)
   310  	targetNum := util.GetMinInt(maxPodEvictNum, len(victims))
   311  	klog.V(3).Infof("Job <%s/%s> max evict:%v, potential victims number:%v, max victims number:%v",
   312  		job.Namespace, job.Name, maxPodEvictNum, len(victims), targetNum)
   313  
   314  	return victims[:targetNum]
   315  }
   316  
   317  // get max pod evict number from job budget configure
   318  func (tp *tdmPlugin) getMaxPodEvictNum(job *api.JobInfo) int {
   319  	jobRunningTaskNum := len(job.TaskStatusIndex[api.Running])
   320  	if job.Budget.MaxUnavilable != "" {
   321  		maxUnavilable := tp.parseIntStr(job.Budget.MaxUnavilable, len(job.Tasks))
   322  		finalTaskNum := len(job.TaskStatusIndex[api.Succeeded]) + len(job.TaskStatusIndex[api.Failed])
   323  		realUnavilable := len(job.Tasks) - finalTaskNum - jobRunningTaskNum
   324  		if realUnavilable >= maxUnavilable {
   325  			return 0
   326  		}
   327  		return maxUnavilable - realUnavilable
   328  	}
   329  
   330  	if job.Budget.MinAvailable != "" {
   331  		minAvailable := tp.parseIntStr(job.Budget.MinAvailable, len(job.Tasks))
   332  		if jobRunningTaskNum >= minAvailable {
   333  			return jobRunningTaskNum - minAvailable
   334  		}
   335  	}
   336  
   337  	return defaultPodEvictNum
   338  }
   339  
   340  func (tp *tdmPlugin) parseIntStr(input string, taskNum int) int {
   341  	resultValue := 0
   342  	tmp := intstr.Parse(input)
   343  	switch tmp.Type {
   344  	case intstr.Int:
   345  		resultValue = tmp.IntValue()
   346  	case intstr.String:
   347  		if v, err := intstr.GetValueFromIntOrPercent(&tmp, taskNum, true); err == nil {
   348  			resultValue = v
   349  		} else {
   350  			klog.Warningf("TDM get percent value err: %v", err)
   351  		}
   352  	}
   353  
   354  	return resultValue
   355  }
   356  
   357  func (tp *tdmPlugin) revocableNodePreemptableTask(rz string, ssn *framework.Session) map[api.JobID][]*api.TaskInfo {
   358  	tasksMap := make(map[api.JobID][]*api.TaskInfo)
   359  	for _, node := range ssn.RevocableNodes {
   360  		if node.RevocableZone != rz {
   361  			continue
   362  		}
   363  
   364  		for _, task := range node.Tasks {
   365  			if task.Preemptable {
   366  				if task.Status == api.Running {
   367  					tasksMap[task.Job] = append(tasksMap[task.Job], task)
   368  				}
   369  			}
   370  		}
   371  	}
   372  
   373  	return tasksMap
   374  }
   375  
   376  func (tp *tdmPlugin) OnSessionClose(ssn *framework.Session) {}