volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/gang/gang.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package gang
    18  
    19  import (
    20  	"fmt"
    21  
    22  	v1 "k8s.io/api/core/v1"
    23  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    24  	"k8s.io/klog/v2"
    25  
    26  	"volcano.sh/apis/pkg/apis/scheduling"
    27  	"volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    28  	"volcano.sh/volcano/pkg/scheduler/api"
    29  	"volcano.sh/volcano/pkg/scheduler/framework"
    30  	"volcano.sh/volcano/pkg/scheduler/metrics"
    31  	"volcano.sh/volcano/pkg/scheduler/plugins/util"
    32  )
    33  
    34  // PluginName indicates name of volcano scheduler plugin.
    35  const PluginName = "gang"
    36  
    37  type gangPlugin struct {
    38  	// Arguments given for the plugin
    39  	pluginArguments framework.Arguments
    40  }
    41  
    42  // New return gang plugin
    43  func New(arguments framework.Arguments) framework.Plugin {
    44  	return &gangPlugin{pluginArguments: arguments}
    45  }
    46  
    47  func (gp *gangPlugin) Name() string {
    48  	return PluginName
    49  }
    50  
    51  func (gp *gangPlugin) OnSessionOpen(ssn *framework.Session) {
    52  	validJobFn := func(obj interface{}) *api.ValidateResult {
    53  		job, ok := obj.(*api.JobInfo)
    54  		if !ok {
    55  			return &api.ValidateResult{
    56  				Pass:    false,
    57  				Message: fmt.Sprintf("Failed to convert <%v> to *JobInfo", obj),
    58  			}
    59  		}
    60  
    61  		if valid := job.CheckTaskValid(); !valid {
    62  			return &api.ValidateResult{
    63  				Pass:    false,
    64  				Reason:  v1beta1.NotEnoughPodsOfTaskReason,
    65  				Message: "Not enough valid pods of each task for gang-scheduling",
    66  			}
    67  		}
    68  
    69  		vtn := job.ValidTaskNum()
    70  		if vtn < job.MinAvailable {
    71  			return &api.ValidateResult{
    72  				Pass:   false,
    73  				Reason: v1beta1.NotEnoughPodsReason,
    74  				Message: fmt.Sprintf("Not enough valid tasks for gang-scheduling, valid: %d, min: %d",
    75  					vtn, job.MinAvailable),
    76  			}
    77  		}
    78  		return nil
    79  	}
    80  
    81  	ssn.AddJobValidFn(gp.Name(), validJobFn)
    82  
    83  	preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) {
    84  		var victims []*api.TaskInfo
    85  		jobOccupiedMap := map[api.JobID]int32{}
    86  
    87  		for _, preemptee := range preemptees {
    88  			job := ssn.Jobs[preemptee.Job]
    89  			if _, found := jobOccupiedMap[job.UID]; !found {
    90  				jobOccupiedMap[job.UID] = job.ReadyTaskNum()
    91  			}
    92  
    93  			if jobOccupiedMap[job.UID] > job.MinAvailable {
    94  				jobOccupiedMap[job.UID]--
    95  				victims = append(victims, preemptee)
    96  			} else {
    97  				klog.V(4).Infof("Can not preempt task <%v/%v> because job %s ready num(%d) <= MinAvailable(%d) for gang-scheduling",
    98  					preemptee.Namespace, preemptee.Name, job.Name, jobOccupiedMap[job.UID], job.MinAvailable)
    99  			}
   100  		}
   101  
   102  		klog.V(4).InfoS("Victims from Gang plugins", "victims", victims, "preemptor", preemptor)
   103  
   104  		return victims, util.Permit
   105  	}
   106  
   107  	// TODO(k82cn): Support preempt/reclaim batch job.
   108  	ssn.AddReclaimableFn(gp.Name(), preemptableFn)
   109  	ssn.AddPreemptableFn(gp.Name(), preemptableFn)
   110  
   111  	jobOrderFn := func(l, r interface{}) int {
   112  		lv := l.(*api.JobInfo)
   113  		rv := r.(*api.JobInfo)
   114  
   115  		lReady := lv.IsReady()
   116  		rReady := rv.IsReady()
   117  
   118  		klog.V(4).Infof("Gang JobOrderFn: <%v/%v> is ready: %t, <%v/%v> is ready: %t",
   119  			lv.Namespace, lv.Name, lReady, rv.Namespace, rv.Name, rReady)
   120  
   121  		if lReady && rReady {
   122  			return 0
   123  		}
   124  
   125  		if lReady {
   126  			return 1
   127  		}
   128  
   129  		if rReady {
   130  			return -1
   131  		}
   132  
   133  		return 0
   134  	}
   135  
   136  	ssn.AddJobOrderFn(gp.Name(), jobOrderFn)
   137  	ssn.AddJobReadyFn(gp.Name(), func(obj interface{}) bool {
   138  		ji := obj.(*api.JobInfo)
   139  		if ji.CheckTaskReady() && ji.IsReady() {
   140  			return true
   141  		}
   142  		return false
   143  	})
   144  
   145  	pipelinedFn := func(obj interface{}) int {
   146  		ji := obj.(*api.JobInfo)
   147  		if ji.CheckTaskPipelined() && ji.IsPipelined() {
   148  			return util.Permit
   149  		}
   150  		return util.Reject
   151  	}
   152  	ssn.AddJobPipelinedFn(gp.Name(), pipelinedFn)
   153  
   154  	jobStarvingFn := func(obj interface{}) bool {
   155  		ji := obj.(*api.JobInfo)
   156  		// In the preemption scenario, the taskMinAvailable configuration is not concerned, only the jobMinAvailable is concerned
   157  		return ji.IsStarving()
   158  	}
   159  	ssn.AddJobStarvingFns(gp.Name(), jobStarvingFn)
   160  }
   161  
   162  func (gp *gangPlugin) OnSessionClose(ssn *framework.Session) {
   163  	var unreadyTaskCount int32
   164  	var unScheduleJobCount int
   165  	for _, job := range ssn.Jobs {
   166  		if !job.IsReady() {
   167  			schedulableTaskNum := func() (num int32) {
   168  				for _, task := range job.TaskStatusIndex[api.Pending] {
   169  					ctx := task.GetTransactionContext()
   170  					if task.LastTransaction != nil {
   171  						ctx = *task.LastTransaction
   172  					}
   173  					if api.AllocatedStatus(ctx.Status) {
   174  						num++
   175  					}
   176  				}
   177  				return num + job.ReadyTaskNum()
   178  			}
   179  			unreadyTaskCount = job.MinAvailable - schedulableTaskNum()
   180  			msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v",
   181  				unreadyTaskCount, len(job.Tasks), job.FitError())
   182  			job.JobFitErrors = msg
   183  
   184  			unScheduleJobCount++
   185  			metrics.RegisterJobRetries(job.Name)
   186  
   187  			jc := &scheduling.PodGroupCondition{
   188  				Type:               scheduling.PodGroupUnschedulableType,
   189  				Status:             v1.ConditionTrue,
   190  				LastTransitionTime: metav1.Now(),
   191  				TransitionID:       string(ssn.UID),
   192  				Reason:             v1beta1.NotEnoughResourcesReason,
   193  				Message:            msg,
   194  			}
   195  
   196  			if err := ssn.UpdatePodGroupCondition(job, jc); err != nil {
   197  				klog.Errorf("Failed to update job <%s/%s> condition: %v",
   198  					job.Namespace, job.Name, err)
   199  			}
   200  
   201  			// allocated task should follow the job fit error
   202  			for _, taskInfo := range job.TaskStatusIndex[api.Allocated] {
   203  				fitError := job.NodesFitErrors[taskInfo.UID]
   204  				if fitError != nil {
   205  					continue
   206  				}
   207  
   208  				fitError = api.NewFitErrors()
   209  				job.NodesFitErrors[taskInfo.UID] = fitError
   210  				fitError.SetError(msg)
   211  			}
   212  		} else {
   213  			jc := &scheduling.PodGroupCondition{
   214  				Type:               scheduling.PodGroupScheduled,
   215  				Status:             v1.ConditionTrue,
   216  				LastTransitionTime: metav1.Now(),
   217  				TransitionID:       string(ssn.UID),
   218  				Reason:             "tasks in gang are ready to be scheduled",
   219  				Message:            "",
   220  			}
   221  
   222  			if err := ssn.UpdatePodGroupCondition(job, jc); err != nil {
   223  				klog.Errorf("Failed to update job <%s/%s> condition: %v",
   224  					job.Namespace, job.Name, err)
   225  			}
   226  		}
   227  		metrics.UpdateUnscheduleTaskCount(job.Name, int(unreadyTaskCount))
   228  		unreadyTaskCount = 0
   229  	}
   230  
   231  	metrics.UpdateUnscheduleJobCount(unScheduleJobCount)
   232  }