volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/tdm/tdm.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tdm 18 19 import ( 20 "fmt" 21 "strings" 22 "time" 23 24 "k8s.io/apimachinery/pkg/util/intstr" 25 "k8s.io/klog/v2" 26 k8sFramework "k8s.io/kubernetes/pkg/scheduler/framework" 27 28 "volcano.sh/volcano/pkg/scheduler/api" 29 "volcano.sh/volcano/pkg/scheduler/framework" 30 tutil "volcano.sh/volcano/pkg/scheduler/plugins/util" 31 "volcano.sh/volcano/pkg/scheduler/util" 32 ) 33 34 const ( 35 // PluginName indicates name of volcano scheduler plugin. 36 PluginName = "tdm" 37 // revocableZoneLayout revocable zone layout 38 revocableZoneLayout = "15:04" 39 revocableZoneLabelPrefix = "tdm.revocable-zone." 40 evictPeriodLabel = "tdm.evict.period" 41 defaultPodEvictNum = 1 42 ) 43 44 var lastEvictAt time.Time 45 46 /* 47 actions: "enqueue, reclaim, allocate, preempt" 48 tiers: 49 - plugins: 50 - name: tdm 51 arguments: 52 tdm.revocable-zone.rz1: 10:00-21:00 53 tdm.revocable-zone.rz2: 12:00-14:00 54 tdm.evict.period: 1m 55 */ 56 57 type tdmPlugin struct { 58 revocableZone map[string]string 59 // evictPeriod 60 // default 1m 61 evictPeriod time.Duration 62 } 63 64 // New function returns prioritizePlugin object 65 func New(args framework.Arguments) framework.Plugin { 66 revocableZone := make(map[string]string) 67 evictPeriod := time.Minute 68 69 for k, v := range args { 70 if strings.Contains(k, revocableZoneLabelPrefix) { 71 revocableZone[strings.Replace(k, revocableZoneLabelPrefix, "", 1)] = v.(string) 72 } 73 } 74 75 if period, ok := args[evictPeriodLabel]; ok { 76 if d, err := time.ParseDuration(period.(string)); err == nil { 77 evictPeriod = d 78 } 79 } 80 81 return &tdmPlugin{revocableZone, evictPeriod} 82 } 83 84 func (tp *tdmPlugin) Name() string { 85 return PluginName 86 } 87 88 func parseRevocableZone(rzRaw string) (start, end time.Time, err error) { 89 rzValues := strings.Split(strings.TrimSpace(rzRaw), "-") 90 91 if len(rzValues) != 2 { 92 err = fmt.Errorf("revocable zone %v format error", rzRaw) 93 return 94 } 95 96 t1, err := time.Parse(revocableZoneLayout, rzValues[0]) 97 if err != nil { 98 return 99 } 100 101 t2, err := time.Parse(revocableZoneLayout, rzValues[1]) 102 if err != nil { 103 return 104 } 105 106 now := time.Now() 107 108 start = time.Date(now.Year(), now.Month(), now.Day(), t1.Hour(), t1.Minute(), 0, 0, now.Location()) 109 if t1.After(t2) || t1.Equal(t2) { 110 end = time.Date(now.Year(), now.Month(), now.Day()+1, t2.Hour(), t2.Minute(), 0, 0, now.Location()) 111 } else { 112 end = time.Date(now.Year(), now.Month(), now.Day(), t2.Hour(), t2.Minute(), 0, 0, now.Location()) 113 } 114 115 return 116 } 117 118 func (tp *tdmPlugin) availableRevocableZone(rz string) error { 119 // rzRaw format 00:00-23:59 120 rzRaw, ok := tp.revocableZone[rz] 121 if !ok { 122 return fmt.Errorf("revocable zone %v not support", rz) 123 } 124 125 now := time.Now() 126 127 start, end, err := parseRevocableZone(rzRaw) 128 if err != nil { 129 return err 130 } 131 132 if now.Unix() < start.Unix() || now.Unix() > end.Unix() { 133 return fmt.Errorf("current time beyond revocable zone %v:%v", rz, rzRaw) 134 } 135 136 return nil 137 } 138 139 func (tp *tdmPlugin) OnSessionOpen(ssn *framework.Session) { 140 klog.V(5).Infof("Enter tdm plugin ...") 141 defer func() { 142 klog.V(5).Infof("Leaving tdm plugin.") 143 }() 144 145 // tdm plugin just handle revocable node 146 predicateFn := func(task *api.TaskInfo, node *api.NodeInfo) ([]*api.Status, error) { 147 predicateStatus := make([]*api.Status, 0) 148 tdmStatus := &api.Status{} 149 if node.RevocableZone == "" { 150 return predicateStatus, nil 151 } 152 153 if err := tp.availableRevocableZone(node.RevocableZone); err != nil { 154 tdmStatus.Code = api.UnschedulableAndUnresolvable 155 tdmStatus.Reason = fmt.Sprintf("plugin %s predicates %v", tp.Name(), err) 156 return predicateStatus, fmt.Errorf("plugin %s predicates %v", tp.Name(), err) 157 } 158 159 klog.V(4).Infof("TDM node %v revocable zone %v:%v is active", node.Name, node.RevocableZone, tp.revocableZone[node.RevocableZone]) 160 161 if len(task.RevocableZone) == 0 { 162 msg := fmt.Sprintf("task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name) 163 return predicateStatus, fmt.Errorf("plugin %s predicates %s", tp.Name(), msg) 164 } 165 166 tdmStatus.Code = api.Success 167 predicateStatus = append(predicateStatus, tdmStatus) 168 klog.V(4).Infof("TDM filter for Task %s/%s on node %s pass.", task.Namespace, task.Name, node.Name) 169 return predicateStatus, nil 170 } 171 172 // tdm plugin just handle revocable node 173 nodeOrderFn := func(task *api.TaskInfo, node *api.NodeInfo) (float64, error) { 174 score := 0.0 175 176 if node.RevocableZone == "" { 177 return score, nil 178 } 179 180 if err := tp.availableRevocableZone(node.RevocableZone); err != nil { 181 klog.V(4).Infof("TDM not available %s", err) 182 return score, err 183 } 184 185 if len(task.RevocableZone) == 0 { 186 klog.V(4).Infof("TDM task %s/%s is not allow to dispatch to revocable node %s", task.Namespace, task.Name, node.Name) 187 return score, nil 188 } 189 190 score = float64(k8sFramework.MaxNodeScore) 191 192 klog.V(4).Infof("TDM score for Task %s/%s on node %s is: %v", task.Namespace, task.Name, node.Name, score) 193 return score, nil 194 } 195 196 preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) { 197 // for the preemptable or can use revocablezone workload, they can not preempt other tasks. 198 if preemptor.Preemptable || len(preemptor.RevocableZone) > 0 { 199 klog.V(4).Infof("TDM task %s/%s is preemptable, do nothing skip", preemptor.Namespace, preemptor.Name) 200 return nil, tutil.Reject 201 } 202 203 var victims []*api.TaskInfo 204 tasksMap := make(map[api.JobID][]*api.TaskInfo) 205 206 // find preemptable tasks which appear on none revocable node 207 for _, task := range preemptees { 208 if !task.Preemptable || task.Status != api.Running { 209 continue 210 } 211 212 node, ok := ssn.Nodes[task.NodeName] 213 if !ok { 214 continue 215 } 216 217 if node.RevocableZone != "" { 218 continue 219 } 220 221 tasksMap[task.Job] = append(tasksMap[task.Job], task) 222 } 223 224 for jobID, preemptableTasks := range tasksMap { 225 if job, ok := ssn.Jobs[jobID]; ok { 226 victims = append(victims, tp.maxVictims(job, preemptableTasks)...) 227 } 228 } 229 230 klog.V(4).Infof("TDM victims are %+v", victims) 231 232 return victims, tutil.Permit 233 } 234 235 victimsFn := func([]*api.TaskInfo) []*api.TaskInfo { 236 if lastEvictAt.Add(tp.evictPeriod).After(time.Now()) { 237 klog.V(4).Infof("TDM next evict time at %v", lastEvictAt) 238 return nil 239 } 240 241 klog.V(4).Infof("TDM start to find victims") 242 243 // find preemptable task on timeout revocable zone node 244 victims := make([]*api.TaskInfo, 0) 245 for rz := range tp.revocableZone { 246 if err := tp.availableRevocableZone(rz); err != nil { 247 klog.V(4).Infof("TDM revocable zone %v disactive, %v", rz, err) 248 // rz disactive, then evict preemptable tasks by job from the revocable node 249 for jobID, preemtableTasks := range tp.revocableNodePreemptableTask(rz, ssn) { 250 if job, ok := ssn.Jobs[jobID]; ok { 251 victims = append(victims, tp.maxVictims(job, preemtableTasks)...) 252 } 253 } 254 } 255 } 256 257 // need to consider concurrency? 258 lastEvictAt = time.Now() 259 260 klog.V(4).Infof("TDM got %v victims", len(victims)) 261 262 return victims 263 } 264 265 jobOrderFn := func(l, r interface{}) int { 266 lv := l.(*api.JobInfo) 267 rv := r.(*api.JobInfo) 268 269 if lv.Preemptable == rv.Preemptable { 270 return 0 271 } 272 273 if !lv.Preemptable { 274 return -1 275 } 276 277 return 1 278 } 279 280 jobPipelinedFn := func(obj interface{}) int { 281 jobInfo := obj.(*api.JobInfo) 282 if jobInfo.IsPipelined() { 283 return tutil.Permit 284 } 285 return tutil.Reject 286 } 287 288 jobStarvingFn := func(obj interface{}) bool { 289 jobInfo := obj.(*api.JobInfo) 290 // allow none preemptable elastic job (deployment) preempt task 291 if jobInfo.Preemptable { 292 return false 293 } 294 return len(jobInfo.TaskStatusIndex[api.Pending]) > 0 295 } 296 297 victimsFns := make([]api.VictimTasksFn, 0) 298 victimsFns = append(victimsFns, victimsFn) 299 ssn.AddPredicateFn(tp.Name(), predicateFn) 300 ssn.AddNodeOrderFn(tp.Name(), nodeOrderFn) 301 ssn.AddPreemptableFn(tp.Name(), preemptableFn) 302 ssn.AddVictimTasksFns(tp.Name(), victimsFns) 303 ssn.AddJobOrderFn(tp.Name(), jobOrderFn) 304 ssn.AddJobPipelinedFn(tp.Name(), jobPipelinedFn) 305 ssn.AddJobStarvingFns(tp.Name(), jobStarvingFn) 306 } 307 308 func (tp *tdmPlugin) maxVictims(job *api.JobInfo, victims []*api.TaskInfo) []*api.TaskInfo { 309 maxPodEvictNum := tp.getMaxPodEvictNum(job) 310 targetNum := util.GetMinInt(maxPodEvictNum, len(victims)) 311 klog.V(3).Infof("Job <%s/%s> max evict:%v, potential victims number:%v, max victims number:%v", 312 job.Namespace, job.Name, maxPodEvictNum, len(victims), targetNum) 313 314 return victims[:targetNum] 315 } 316 317 // get max pod evict number from job budget configure 318 func (tp *tdmPlugin) getMaxPodEvictNum(job *api.JobInfo) int { 319 jobRunningTaskNum := len(job.TaskStatusIndex[api.Running]) 320 if job.Budget.MaxUnavilable != "" { 321 maxUnavilable := tp.parseIntStr(job.Budget.MaxUnavilable, len(job.Tasks)) 322 finalTaskNum := len(job.TaskStatusIndex[api.Succeeded]) + len(job.TaskStatusIndex[api.Failed]) 323 realUnavilable := len(job.Tasks) - finalTaskNum - jobRunningTaskNum 324 if realUnavilable >= maxUnavilable { 325 return 0 326 } 327 return maxUnavilable - realUnavilable 328 } 329 330 if job.Budget.MinAvailable != "" { 331 minAvailable := tp.parseIntStr(job.Budget.MinAvailable, len(job.Tasks)) 332 if jobRunningTaskNum >= minAvailable { 333 return jobRunningTaskNum - minAvailable 334 } 335 } 336 337 return defaultPodEvictNum 338 } 339 340 func (tp *tdmPlugin) parseIntStr(input string, taskNum int) int { 341 resultValue := 0 342 tmp := intstr.Parse(input) 343 switch tmp.Type { 344 case intstr.Int: 345 resultValue = tmp.IntValue() 346 case intstr.String: 347 if v, err := intstr.GetValueFromIntOrPercent(&tmp, taskNum, true); err == nil { 348 resultValue = v 349 } else { 350 klog.Warningf("TDM get percent value err: %v", err) 351 } 352 } 353 354 return resultValue 355 } 356 357 func (tp *tdmPlugin) revocableNodePreemptableTask(rz string, ssn *framework.Session) map[api.JobID][]*api.TaskInfo { 358 tasksMap := make(map[api.JobID][]*api.TaskInfo) 359 for _, node := range ssn.RevocableNodes { 360 if node.RevocableZone != rz { 361 continue 362 } 363 364 for _, task := range node.Tasks { 365 if task.Preemptable { 366 if task.Status == api.Running { 367 tasksMap[task.Job] = append(tasksMap[task.Job], task) 368 } 369 } 370 } 371 } 372 373 return tasksMap 374 } 375 376 func (tp *tdmPlugin) OnSessionClose(ssn *framework.Session) {}