volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/gang/gang.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package gang 18 19 import ( 20 "fmt" 21 22 v1 "k8s.io/api/core/v1" 23 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 "k8s.io/klog/v2" 25 26 "volcano.sh/apis/pkg/apis/scheduling" 27 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 28 "volcano.sh/volcano/pkg/scheduler/api" 29 "volcano.sh/volcano/pkg/scheduler/framework" 30 "volcano.sh/volcano/pkg/scheduler/metrics" 31 "volcano.sh/volcano/pkg/scheduler/plugins/util" 32 ) 33 34 // PluginName indicates name of volcano scheduler plugin. 35 const PluginName = "gang" 36 37 type gangPlugin struct { 38 // Arguments given for the plugin 39 pluginArguments framework.Arguments 40 } 41 42 // New return gang plugin 43 func New(arguments framework.Arguments) framework.Plugin { 44 return &gangPlugin{pluginArguments: arguments} 45 } 46 47 func (gp *gangPlugin) Name() string { 48 return PluginName 49 } 50 51 func (gp *gangPlugin) OnSessionOpen(ssn *framework.Session) { 52 validJobFn := func(obj interface{}) *api.ValidateResult { 53 job, ok := obj.(*api.JobInfo) 54 if !ok { 55 return &api.ValidateResult{ 56 Pass: false, 57 Message: fmt.Sprintf("Failed to convert <%v> to *JobInfo", obj), 58 } 59 } 60 61 if valid := job.CheckTaskValid(); !valid { 62 return &api.ValidateResult{ 63 Pass: false, 64 Reason: v1beta1.NotEnoughPodsOfTaskReason, 65 Message: "Not enough valid pods of each task for gang-scheduling", 66 } 67 } 68 69 vtn := job.ValidTaskNum() 70 if vtn < job.MinAvailable { 71 return &api.ValidateResult{ 72 Pass: false, 73 Reason: v1beta1.NotEnoughPodsReason, 74 Message: fmt.Sprintf("Not enough valid tasks for gang-scheduling, valid: %d, min: %d", 75 vtn, job.MinAvailable), 76 } 77 } 78 return nil 79 } 80 81 ssn.AddJobValidFn(gp.Name(), validJobFn) 82 83 preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) { 84 var victims []*api.TaskInfo 85 jobOccupiedMap := map[api.JobID]int32{} 86 87 for _, preemptee := range preemptees { 88 job := ssn.Jobs[preemptee.Job] 89 if _, found := jobOccupiedMap[job.UID]; !found { 90 jobOccupiedMap[job.UID] = job.ReadyTaskNum() 91 } 92 93 if jobOccupiedMap[job.UID] > job.MinAvailable { 94 jobOccupiedMap[job.UID]-- 95 victims = append(victims, preemptee) 96 } else { 97 klog.V(4).Infof("Can not preempt task <%v/%v> because job %s ready num(%d) <= MinAvailable(%d) for gang-scheduling", 98 preemptee.Namespace, preemptee.Name, job.Name, jobOccupiedMap[job.UID], job.MinAvailable) 99 } 100 } 101 102 klog.V(4).InfoS("Victims from Gang plugins", "victims", victims, "preemptor", preemptor) 103 104 return victims, util.Permit 105 } 106 107 // TODO(k82cn): Support preempt/reclaim batch job. 108 ssn.AddReclaimableFn(gp.Name(), preemptableFn) 109 ssn.AddPreemptableFn(gp.Name(), preemptableFn) 110 111 jobOrderFn := func(l, r interface{}) int { 112 lv := l.(*api.JobInfo) 113 rv := r.(*api.JobInfo) 114 115 lReady := lv.IsReady() 116 rReady := rv.IsReady() 117 118 klog.V(4).Infof("Gang JobOrderFn: <%v/%v> is ready: %t, <%v/%v> is ready: %t", 119 lv.Namespace, lv.Name, lReady, rv.Namespace, rv.Name, rReady) 120 121 if lReady && rReady { 122 return 0 123 } 124 125 if lReady { 126 return 1 127 } 128 129 if rReady { 130 return -1 131 } 132 133 return 0 134 } 135 136 ssn.AddJobOrderFn(gp.Name(), jobOrderFn) 137 ssn.AddJobReadyFn(gp.Name(), func(obj interface{}) bool { 138 ji := obj.(*api.JobInfo) 139 if ji.CheckTaskReady() && ji.IsReady() { 140 return true 141 } 142 return false 143 }) 144 145 pipelinedFn := func(obj interface{}) int { 146 ji := obj.(*api.JobInfo) 147 if ji.CheckTaskPipelined() && ji.IsPipelined() { 148 return util.Permit 149 } 150 return util.Reject 151 } 152 ssn.AddJobPipelinedFn(gp.Name(), pipelinedFn) 153 154 jobStarvingFn := func(obj interface{}) bool { 155 ji := obj.(*api.JobInfo) 156 // In the preemption scenario, the taskMinAvailable configuration is not concerned, only the jobMinAvailable is concerned 157 return ji.IsStarving() 158 } 159 ssn.AddJobStarvingFns(gp.Name(), jobStarvingFn) 160 } 161 162 func (gp *gangPlugin) OnSessionClose(ssn *framework.Session) { 163 var unreadyTaskCount int32 164 var unScheduleJobCount int 165 for _, job := range ssn.Jobs { 166 if !job.IsReady() { 167 schedulableTaskNum := func() (num int32) { 168 for _, task := range job.TaskStatusIndex[api.Pending] { 169 ctx := task.GetTransactionContext() 170 if task.LastTransaction != nil { 171 ctx = *task.LastTransaction 172 } 173 if api.AllocatedStatus(ctx.Status) { 174 num++ 175 } 176 } 177 return num + job.ReadyTaskNum() 178 } 179 unreadyTaskCount = job.MinAvailable - schedulableTaskNum() 180 msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v", 181 unreadyTaskCount, len(job.Tasks), job.FitError()) 182 job.JobFitErrors = msg 183 184 unScheduleJobCount++ 185 metrics.RegisterJobRetries(job.Name) 186 187 jc := &scheduling.PodGroupCondition{ 188 Type: scheduling.PodGroupUnschedulableType, 189 Status: v1.ConditionTrue, 190 LastTransitionTime: metav1.Now(), 191 TransitionID: string(ssn.UID), 192 Reason: v1beta1.NotEnoughResourcesReason, 193 Message: msg, 194 } 195 196 if err := ssn.UpdatePodGroupCondition(job, jc); err != nil { 197 klog.Errorf("Failed to update job <%s/%s> condition: %v", 198 job.Namespace, job.Name, err) 199 } 200 201 // allocated task should follow the job fit error 202 for _, taskInfo := range job.TaskStatusIndex[api.Allocated] { 203 fitError := job.NodesFitErrors[taskInfo.UID] 204 if fitError != nil { 205 continue 206 } 207 208 fitError = api.NewFitErrors() 209 job.NodesFitErrors[taskInfo.UID] = fitError 210 fitError.SetError(msg) 211 } 212 } else { 213 jc := &scheduling.PodGroupCondition{ 214 Type: scheduling.PodGroupScheduled, 215 Status: v1.ConditionTrue, 216 LastTransitionTime: metav1.Now(), 217 TransitionID: string(ssn.UID), 218 Reason: "tasks in gang are ready to be scheduled", 219 Message: "", 220 } 221 222 if err := ssn.UpdatePodGroupCondition(job, jc); err != nil { 223 klog.Errorf("Failed to update job <%s/%s> condition: %v", 224 job.Namespace, job.Name, err) 225 } 226 } 227 metrics.UpdateUnscheduleTaskCount(job.Name, int(unreadyTaskCount)) 228 unreadyTaskCount = 0 229 } 230 231 metrics.UpdateUnscheduleJobCount(unScheduleJobCount) 232 }