volcano.sh/volcano@v1.9.0/pkg/scheduler/api/job_info.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package api 18 19 import ( 20 "encoding/json" 21 "errors" 22 "fmt" 23 "sort" 24 "strconv" 25 "strings" 26 "time" 27 28 v1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/types" 31 "k8s.io/klog/v2" 32 33 batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 34 "volcano.sh/apis/pkg/apis/scheduling" 35 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 36 37 volumescheduling "volcano.sh/volcano/pkg/scheduler/capabilities/volumebinding" 38 ) 39 40 // DisruptionBudget define job min pod available and max pod unavailable value 41 type DisruptionBudget struct { 42 MinAvailable string 43 MaxUnavilable string 44 } 45 46 // NewDisruptionBudget create disruption budget for job 47 func NewDisruptionBudget(minAvailable, maxUnavilable string) *DisruptionBudget { 48 disruptionBudget := &DisruptionBudget{ 49 MinAvailable: minAvailable, 50 MaxUnavilable: maxUnavilable, 51 } 52 return disruptionBudget 53 } 54 55 // Clone return a clone of DisruptionBudget 56 func (db *DisruptionBudget) Clone() *DisruptionBudget { 57 return &DisruptionBudget{ 58 MinAvailable: db.MinAvailable, 59 MaxUnavilable: db.MaxUnavilable, 60 } 61 } 62 63 // JobWaitingTime is maximum waiting time that a job could stay Pending in service level agreement 64 // when job waits longer than waiting time, it should enqueue at once, and cluster should reserve resources for it 65 const JobWaitingTime = "sla-waiting-time" 66 67 // TaskID is UID type for Task 68 type TaskID types.UID 69 70 // TransactionContext holds all the fields that needed by scheduling transaction 71 type TransactionContext struct { 72 NodeName string 73 Status TaskStatus 74 } 75 76 // Clone returns a clone of TransactionContext 77 func (ctx *TransactionContext) Clone() *TransactionContext { 78 if ctx == nil { 79 return nil 80 } 81 clone := *ctx 82 return &clone 83 } 84 85 type TopologyInfo struct { 86 Policy string 87 ResMap map[int]v1.ResourceList // key: numa ID 88 } 89 90 func (info *TopologyInfo) Clone() *TopologyInfo { 91 copyInfo := &TopologyInfo{ 92 Policy: info.Policy, 93 ResMap: make(map[int]v1.ResourceList), 94 } 95 96 for numaID, resList := range info.ResMap { 97 copyInfo.ResMap[numaID] = resList.DeepCopy() 98 } 99 100 return copyInfo 101 } 102 103 // TaskInfo will have all infos about the task 104 type TaskInfo struct { 105 UID TaskID 106 Job JobID 107 108 Name string 109 Namespace string 110 111 // Resreq is the resource that used when task running. 112 Resreq *Resource 113 // InitResreq is the resource that used to launch a task. 114 InitResreq *Resource 115 116 TransactionContext 117 // LastTransaction holds the context of last scheduling transaction 118 LastTransaction *TransactionContext 119 120 Priority int32 121 VolumeReady bool 122 Preemptable bool 123 BestEffort bool 124 125 // RevocableZone supports setting volcano.sh/revocable-zone annotation or label for pod/podgroup 126 // we only support empty value or * value for this version and we will support specify revocable zone name for future releases 127 // empty value means workload can not use revocable node 128 // * value means workload can use all the revocable node for during node active revocable time. 129 RevocableZone string 130 131 NumaInfo *TopologyInfo 132 PodVolumes *volumescheduling.PodVolumes 133 Pod *v1.Pod 134 135 // CustomBindErrHandler is a custom callback func called when task bind err. 136 CustomBindErrHandler func() error `json:"-"` 137 // CustomBindErrHandlerSucceeded indicates whether CustomBindErrHandler is executed successfully. 138 CustomBindErrHandlerSucceeded bool 139 } 140 141 func getJobID(pod *v1.Pod) JobID { 142 if gn, found := pod.Annotations[v1beta1.KubeGroupNameAnnotationKey]; found && len(gn) != 0 { 143 // Make sure Pod and PodGroup belong to the same namespace. 144 jobID := fmt.Sprintf("%s/%s", pod.Namespace, gn) 145 return JobID(jobID) 146 } 147 148 return "" 149 } 150 151 func getTaskID(pod *v1.Pod) TaskID { 152 if ts, found := pod.Annotations[batch.TaskSpecKey]; found && len(ts) != 0 { 153 return TaskID(ts) 154 } 155 156 return "" 157 } 158 159 const TaskPriorityAnnotation = "volcano.sh/task-priority" 160 161 // NewTaskInfo creates new taskInfo object for a Pod 162 func NewTaskInfo(pod *v1.Pod) *TaskInfo { 163 initResReq := GetPodResourceRequest(pod) 164 resReq := initResReq 165 bestEffort := initResReq.IsEmpty() 166 preemptable := GetPodPreemptable(pod) 167 revocableZone := GetPodRevocableZone(pod) 168 topologyInfo := GetPodTopologyInfo(pod) 169 170 jobID := getJobID(pod) 171 172 ti := &TaskInfo{ 173 UID: TaskID(pod.UID), 174 Job: jobID, 175 Name: pod.Name, 176 Namespace: pod.Namespace, 177 Priority: 1, 178 Pod: pod, 179 Resreq: resReq, 180 InitResreq: initResReq, 181 Preemptable: preemptable, 182 BestEffort: bestEffort, 183 RevocableZone: revocableZone, 184 NumaInfo: topologyInfo, 185 TransactionContext: TransactionContext{ 186 NodeName: pod.Spec.NodeName, 187 Status: getTaskStatus(pod), 188 }, 189 } 190 191 if pod.Spec.Priority != nil { 192 ti.Priority = *pod.Spec.Priority 193 } 194 195 if taskPriority, ok := pod.Annotations[TaskPriorityAnnotation]; ok { 196 if priority, err := strconv.ParseInt(taskPriority, 10, 32); err == nil { 197 ti.Priority = int32(priority) 198 } 199 } 200 201 return ti 202 } 203 204 // GetTransactionContext get transaction context of a task 205 func (ti *TaskInfo) GetTransactionContext() TransactionContext { 206 return ti.TransactionContext 207 } 208 209 // GenerateLastTxContext generate and set context of last transaction for a task 210 func (ti *TaskInfo) GenerateLastTxContext() { 211 ctx := ti.GetTransactionContext() 212 ti.LastTransaction = &ctx 213 } 214 215 // ClearLastTxContext clear context of last transaction for a task 216 func (ti *TaskInfo) ClearLastTxContext() { 217 ti.LastTransaction = nil 218 } 219 220 func (ti *TaskInfo) SetPodResourceDecision() error { 221 if ti.NumaInfo == nil || len(ti.NumaInfo.ResMap) == 0 { 222 return nil 223 } 224 225 klog.V(4).Infof("%v/%v resource decision: %v", ti.Namespace, ti.Name, ti.NumaInfo.ResMap) 226 decision := PodResourceDecision{ 227 NUMAResources: ti.NumaInfo.ResMap, 228 } 229 230 layout, err := json.Marshal(&decision) 231 if err != nil { 232 return err 233 } 234 235 metav1.SetMetaDataAnnotation(&ti.Pod.ObjectMeta, topologyDecisionAnnotation, string(layout[:])) 236 return nil 237 } 238 239 func (ti *TaskInfo) UnsetPodResourceDecision() { 240 delete(ti.Pod.Annotations, topologyDecisionAnnotation) 241 } 242 243 // Clone is used for cloning a task 244 func (ti *TaskInfo) Clone() *TaskInfo { 245 return &TaskInfo{ 246 UID: ti.UID, 247 Job: ti.Job, 248 Name: ti.Name, 249 Namespace: ti.Namespace, 250 Priority: ti.Priority, 251 PodVolumes: ti.PodVolumes, 252 Pod: ti.Pod, 253 Resreq: ti.Resreq.Clone(), 254 InitResreq: ti.InitResreq.Clone(), 255 VolumeReady: ti.VolumeReady, 256 Preemptable: ti.Preemptable, 257 BestEffort: ti.BestEffort, 258 RevocableZone: ti.RevocableZone, 259 NumaInfo: ti.NumaInfo.Clone(), 260 TransactionContext: TransactionContext{ 261 NodeName: ti.NodeName, 262 Status: ti.Status, 263 }, 264 LastTransaction: ti.LastTransaction.Clone(), 265 } 266 } 267 268 func (ti *TaskInfo) GetTaskSpecKey() TaskID { 269 if ti.Pod == nil { 270 return "" 271 } 272 return getTaskID(ti.Pod) 273 } 274 275 // String returns the taskInfo details in a string 276 func (ti TaskInfo) String() string { 277 res := fmt.Sprintf("Task (%v:%v/%v): job %v, status %v, pri %v, "+ 278 "resreq %v, preemptable %v, revocableZone %v", 279 ti.UID, ti.Namespace, ti.Name, ti.Job, ti.Status, ti.Priority, 280 ti.Resreq, ti.Preemptable, ti.RevocableZone) 281 282 if ti.NumaInfo != nil { 283 res += fmt.Sprintf(", numaInfo %v", *ti.NumaInfo) 284 } 285 286 return res 287 } 288 289 // JobID is the type of JobInfo's ID. 290 type JobID types.UID 291 292 type tasksMap map[TaskID]*TaskInfo 293 294 // NodeResourceMap stores resource in a node 295 type NodeResourceMap map[string]*Resource 296 297 // JobInfo will have all info of a Job 298 type JobInfo struct { 299 UID JobID 300 PgUID types.UID 301 302 Name string 303 Namespace string 304 305 Queue QueueID 306 307 Priority int32 308 309 MinAvailable int32 310 311 WaitingTime *time.Duration 312 313 JobFitErrors string 314 NodesFitErrors map[TaskID]*FitErrors 315 316 // All tasks of the Job. 317 TaskStatusIndex map[TaskStatus]tasksMap 318 Tasks tasksMap 319 TaskMinAvailable map[TaskID]int32 320 TaskMinAvailableTotal int32 321 322 Allocated *Resource 323 TotalRequest *Resource 324 325 CreationTimestamp metav1.Time 326 PodGroup *PodGroup 327 328 ScheduleStartTimestamp metav1.Time 329 330 Preemptable bool 331 332 // RevocableZone support set volcano.sh/revocable-zone annotaion or label for pod/podgroup 333 // we only support empty value or * value for this version and we will support specify revocable zone name for future release 334 // empty value means workload can not use revocable node 335 // * value means workload can use all the revocable node for during node active revocable time. 336 RevocableZone string 337 Budget *DisruptionBudget 338 } 339 340 // NewJobInfo creates a new jobInfo for set of tasks 341 func NewJobInfo(uid JobID, tasks ...*TaskInfo) *JobInfo { 342 job := &JobInfo{ 343 UID: uid, 344 MinAvailable: 0, 345 NodesFitErrors: make(map[TaskID]*FitErrors), 346 Allocated: EmptyResource(), 347 TotalRequest: EmptyResource(), 348 TaskStatusIndex: map[TaskStatus]tasksMap{}, 349 Tasks: tasksMap{}, 350 TaskMinAvailable: map[TaskID]int32{}, 351 } 352 353 for _, task := range tasks { 354 job.AddTaskInfo(task) 355 } 356 357 return job 358 } 359 360 // UnsetPodGroup removes podGroup details from a job 361 func (ji *JobInfo) UnsetPodGroup() { 362 ji.PodGroup = nil 363 } 364 365 // SetPodGroup sets podGroup details to a job 366 func (ji *JobInfo) SetPodGroup(pg *PodGroup) { 367 ji.Name = pg.Name 368 ji.Namespace = pg.Namespace 369 ji.MinAvailable = pg.Spec.MinMember 370 ji.Queue = QueueID(pg.Spec.Queue) 371 ji.CreationTimestamp = pg.GetCreationTimestamp() 372 373 var err error 374 ji.WaitingTime, err = ji.extractWaitingTime(pg, v1beta1.JobWaitingTime) 375 if err != nil { 376 klog.Warningf("Error occurs in parsing waiting time for job <%s/%s>, err: %s.", 377 pg.Namespace, pg.Name, err.Error()) 378 ji.WaitingTime = nil 379 } 380 if ji.WaitingTime == nil { 381 ji.WaitingTime, err = ji.extractWaitingTime(pg, JobWaitingTime) 382 if err != nil { 383 klog.Warningf("Error occurs in parsing waiting time for job <%s/%s>, err: %s.", 384 pg.Namespace, pg.Name, err.Error()) 385 ji.WaitingTime = nil 386 } 387 } 388 389 ji.Preemptable = ji.extractPreemptable(pg) 390 ji.RevocableZone = ji.extractRevocableZone(pg) 391 ji.Budget = ji.extractBudget(pg) 392 393 taskMinAvailableTotal := int32(0) 394 for task, member := range pg.Spec.MinTaskMember { 395 ji.TaskMinAvailable[TaskID(task)] = member 396 taskMinAvailableTotal += member 397 } 398 ji.TaskMinAvailableTotal = taskMinAvailableTotal 399 400 ji.PgUID = pg.UID 401 ji.PodGroup = pg 402 } 403 404 // extractWaitingTime reads sla waiting time for job from podgroup annotations 405 // TODO: should also read from given field in volcano job spec 406 func (ji *JobInfo) extractWaitingTime(pg *PodGroup, waitingTimeKey string) (*time.Duration, error) { 407 if _, exist := pg.Annotations[waitingTimeKey]; !exist { 408 return nil, nil 409 } 410 411 jobWaitingTime, err := time.ParseDuration(pg.Annotations[waitingTimeKey]) 412 if err != nil { 413 return nil, err 414 } 415 416 if jobWaitingTime <= 0 { 417 return nil, errors.New("invalid sla waiting time") 418 } 419 420 return &jobWaitingTime, nil 421 } 422 423 // extractPreemptable return volcano.sh/preemptable value for job 424 func (ji *JobInfo) extractPreemptable(pg *PodGroup) bool { 425 // check annotaion first 426 if len(pg.Annotations) > 0 { 427 if value, found := pg.Annotations[v1beta1.PodPreemptable]; found { 428 b, err := strconv.ParseBool(value) 429 if err != nil { 430 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 431 return false 432 } 433 return b 434 } 435 } 436 437 // it annotation does not exit, check label 438 if len(pg.Labels) > 0 { 439 if value, found := pg.Labels[v1beta1.PodPreemptable]; found { 440 b, err := strconv.ParseBool(value) 441 if err != nil { 442 klog.Warningf("invalid %s=%s", v1beta1.PodPreemptable, value) 443 return false 444 } 445 return b 446 } 447 } 448 449 return false 450 } 451 452 // extractRevocableZone return volcano.sh/revocable-zone value for pod/podgroup 453 func (ji *JobInfo) extractRevocableZone(pg *PodGroup) string { 454 // check annotation first 455 if len(pg.Annotations) > 0 { 456 if value, found := pg.Annotations[v1beta1.RevocableZone]; found { 457 if value != "*" { 458 return "" 459 } 460 return value 461 } 462 463 if value, found := pg.Annotations[v1beta1.PodPreemptable]; found { 464 if b, err := strconv.ParseBool(value); err == nil && b { 465 return "*" 466 } 467 } 468 } 469 470 return "" 471 } 472 473 // extractBudget return budget value for job 474 func (ji *JobInfo) extractBudget(pg *PodGroup) *DisruptionBudget { 475 if len(pg.Annotations) > 0 { 476 if value, found := pg.Annotations[v1beta1.JDBMinAvailable]; found { 477 return NewDisruptionBudget(value, "") 478 } else if value, found := pg.Annotations[v1beta1.JDBMaxUnavailable]; found { 479 return NewDisruptionBudget("", value) 480 } 481 } 482 483 return NewDisruptionBudget("", "") 484 } 485 486 // GetMinResources return the min resources of podgroup. 487 func (ji *JobInfo) GetMinResources() *Resource { 488 if ji.PodGroup.Spec.MinResources == nil { 489 return EmptyResource() 490 } 491 492 return NewResource(*ji.PodGroup.Spec.MinResources) 493 } 494 495 func (ji *JobInfo) GetElasticResources() *Resource { 496 minResource := ji.GetMinResources() 497 if ji.Allocated.LessEqualPartly(minResource, Zero) { 498 return EmptyResource() 499 } 500 return ji.Allocated.Clone().Sub(minResource) 501 } 502 503 func (ji *JobInfo) addTaskIndex(ti *TaskInfo) { 504 if _, found := ji.TaskStatusIndex[ti.Status]; !found { 505 ji.TaskStatusIndex[ti.Status] = tasksMap{} 506 } 507 ji.TaskStatusIndex[ti.Status][ti.UID] = ti 508 } 509 510 // AddTaskInfo is used to add a task to a job 511 func (ji *JobInfo) AddTaskInfo(ti *TaskInfo) { 512 ji.Tasks[ti.UID] = ti 513 ji.addTaskIndex(ti) 514 ji.TotalRequest.Add(ti.Resreq) 515 if AllocatedStatus(ti.Status) { 516 ji.Allocated.Add(ti.Resreq) 517 } 518 } 519 520 // UpdateTaskStatus is used to update task's status in a job. 521 // If error occurs both task and job are guaranteed to be in the original state. 522 func (ji *JobInfo) UpdateTaskStatus(task *TaskInfo, status TaskStatus) error { 523 if err := validateStatusUpdate(task.Status, status); err != nil { 524 return err 525 } 526 527 // First remove the task (if exist) from the task list. 528 if _, found := ji.Tasks[task.UID]; found { 529 if err := ji.DeleteTaskInfo(task); err != nil { 530 return err 531 } 532 } 533 534 // Update task's status to the target status once task addition is guaranteed to succeed. 535 task.Status = status 536 ji.AddTaskInfo(task) 537 538 return nil 539 } 540 541 func (ji *JobInfo) deleteTaskIndex(ti *TaskInfo) { 542 if tasks, found := ji.TaskStatusIndex[ti.Status]; found { 543 delete(tasks, ti.UID) 544 545 if len(tasks) == 0 { 546 delete(ji.TaskStatusIndex, ti.Status) 547 } 548 } 549 } 550 551 // DeleteTaskInfo is used to delete a task from a job 552 func (ji *JobInfo) DeleteTaskInfo(ti *TaskInfo) error { 553 if task, found := ji.Tasks[ti.UID]; found { 554 ji.TotalRequest.Sub(task.Resreq) 555 if AllocatedStatus(task.Status) { 556 ji.Allocated.Sub(task.Resreq) 557 } 558 delete(ji.Tasks, task.UID) 559 ji.deleteTaskIndex(task) 560 return nil 561 } 562 563 klog.Warningf("failed to find task <%v/%v> in job <%v/%v>", ti.Namespace, ti.Name, ji.Namespace, ji.Name) 564 return nil 565 } 566 567 // Clone is used to clone a jobInfo object 568 func (ji *JobInfo) Clone() *JobInfo { 569 info := &JobInfo{ 570 UID: ji.UID, 571 Name: ji.Name, 572 Namespace: ji.Namespace, 573 Queue: ji.Queue, 574 Priority: ji.Priority, 575 576 MinAvailable: ji.MinAvailable, 577 WaitingTime: ji.WaitingTime, 578 JobFitErrors: ji.JobFitErrors, 579 NodesFitErrors: make(map[TaskID]*FitErrors), 580 Allocated: EmptyResource(), 581 TotalRequest: EmptyResource(), 582 583 PodGroup: ji.PodGroup.Clone(), 584 585 TaskStatusIndex: map[TaskStatus]tasksMap{}, 586 TaskMinAvailable: make(map[TaskID]int32, len(ji.TaskMinAvailable)), 587 TaskMinAvailableTotal: ji.TaskMinAvailableTotal, 588 Tasks: tasksMap{}, 589 Preemptable: ji.Preemptable, 590 RevocableZone: ji.RevocableZone, 591 Budget: ji.Budget.Clone(), 592 } 593 594 ji.CreationTimestamp.DeepCopyInto(&info.CreationTimestamp) 595 596 for task, minAvailable := range ji.TaskMinAvailable { 597 info.TaskMinAvailable[task] = minAvailable 598 } 599 for _, task := range ji.Tasks { 600 info.AddTaskInfo(task.Clone()) 601 } 602 603 return info 604 } 605 606 // String returns a jobInfo object in string format 607 func (ji JobInfo) String() string { 608 res := "" 609 610 i := 0 611 for _, task := range ji.Tasks { 612 res += fmt.Sprintf("\n\t %d: %v", i, task) 613 i++ 614 } 615 616 return fmt.Sprintf("Job (%v): namespace %v (%v), name %v, minAvailable %d, podGroup %+v, preemptable %+v, revocableZone %+v, minAvailable %+v, maxAvailable %+v", 617 ji.UID, ji.Namespace, ji.Queue, ji.Name, ji.MinAvailable, ji.PodGroup, ji.Preemptable, ji.RevocableZone, ji.Budget.MinAvailable, ji.Budget.MaxUnavilable) + res 618 } 619 620 // FitError returns detailed information on why a job's task failed to fit on 621 // each available node 622 func (ji *JobInfo) FitError() string { 623 sortReasonsHistogram := func(reasons map[string]int) []string { 624 reasonStrings := []string{} 625 for k, v := range reasons { 626 reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k)) 627 } 628 sort.Strings(reasonStrings) 629 return reasonStrings 630 } 631 632 // Stat histogram for all tasks of the job 633 reasons := make(map[string]int) 634 for status, taskMap := range ji.TaskStatusIndex { 635 reasons[status.String()] += len(taskMap) 636 } 637 reasons["minAvailable"] = int(ji.MinAvailable) 638 reasonMsg := fmt.Sprintf("%v, %v", scheduling.PodGroupNotReady, strings.Join(sortReasonsHistogram(reasons), ", ")) 639 640 // Stat histogram for pending tasks only 641 reasons = make(map[string]int) 642 for uid := range ji.TaskStatusIndex[Pending] { 643 reason, _ := ji.TaskSchedulingReason(uid) 644 reasons[reason]++ 645 } 646 if len(reasons) > 0 { 647 reasonMsg += "; " + fmt.Sprintf("%s: %s", Pending.String(), strings.Join(sortReasonsHistogram(reasons), ", ")) 648 } 649 return reasonMsg 650 } 651 652 // TaskSchedulingReason get detailed reason and message of the given task 653 // It returns detailed reason and message for tasks based on last scheduling transaction. 654 func (ji *JobInfo) TaskSchedulingReason(tid TaskID) (reason string, msg string) { 655 taskInfo, exists := ji.Tasks[tid] 656 if !exists { 657 return "", "" 658 } 659 660 // Get detailed scheduling reason based on LastTransaction 661 ctx := taskInfo.GetTransactionContext() 662 if taskInfo.LastTransaction != nil { 663 ctx = *taskInfo.LastTransaction 664 } 665 666 msg = ji.JobFitErrors 667 switch status := ctx.Status; status { 668 case Allocated: 669 // Pod is schedulable 670 msg = fmt.Sprintf("Pod %s/%s can possibly be assigned to %s", taskInfo.Namespace, taskInfo.Name, ctx.NodeName) 671 return PodReasonSchedulable, msg 672 case Pipelined: 673 msg = fmt.Sprintf("Pod %s/%s can possibly be assigned to %s, once resource is released", taskInfo.Namespace, taskInfo.Name, ctx.NodeName) 674 return PodReasonUnschedulable, msg 675 case Pending: 676 if fe := ji.NodesFitErrors[tid]; fe != nil { 677 // Pod is unschedulable 678 return PodReasonUnschedulable, fe.Error() 679 } 680 // Pod is not scheduled yet, keep UNSCHEDULABLE as the reason to support cluster autoscaler 681 return PodReasonUnschedulable, msg 682 default: 683 return status.String(), msg 684 } 685 } 686 687 // ReadyTaskNum returns the number of tasks that are ready or that is best-effort. 688 func (ji *JobInfo) ReadyTaskNum() int32 { 689 occupied := 0 690 occupied += len(ji.TaskStatusIndex[Bound]) 691 occupied += len(ji.TaskStatusIndex[Binding]) 692 occupied += len(ji.TaskStatusIndex[Running]) 693 occupied += len(ji.TaskStatusIndex[Allocated]) 694 occupied += len(ji.TaskStatusIndex[Succeeded]) 695 696 return int32(occupied) 697 } 698 699 // WaitingTaskNum returns the number of tasks that are pipelined. 700 func (ji *JobInfo) WaitingTaskNum() int32 { 701 return int32(len(ji.TaskStatusIndex[Pipelined])) 702 } 703 704 func (ji *JobInfo) PendingBestEffortTaskNum() int32 { 705 count := 0 706 for _, task := range ji.TaskStatusIndex[Pending] { 707 if task.BestEffort { 708 count++ 709 } 710 } 711 return int32(count) 712 } 713 714 // CheckTaskValid returns whether each task of job is valid. 715 func (ji *JobInfo) CheckTaskValid() bool { 716 // if job minAvailable is less than sum of(task minAvailable), skip this check 717 if ji.MinAvailable < ji.TaskMinAvailableTotal { 718 return true 719 } 720 721 actual := map[TaskID]int32{} 722 for status, tasks := range ji.TaskStatusIndex { 723 if AllocatedStatus(status) || 724 status == Succeeded || 725 status == Pipelined || 726 status == Pending { 727 for _, task := range tasks { 728 actual[getTaskID(task.Pod)]++ 729 } 730 } 731 } 732 733 klog.V(4).Infof("job %s/%s actual: %+v, ji.TaskMinAvailable: %+v", ji.Name, ji.Namespace, actual, ji.TaskMinAvailable) 734 for task, minAvailable := range ji.TaskMinAvailable { 735 if minAvailable == 0 { 736 continue 737 } 738 if act, ok := actual[task]; !ok || act < minAvailable { 739 return false 740 } 741 } 742 743 return true 744 } 745 746 // CheckTaskReady return whether each task of job is ready. 747 func (ji *JobInfo) CheckTaskReady() bool { 748 if ji.MinAvailable < ji.TaskMinAvailableTotal { 749 return true 750 } 751 occupiedMap := map[TaskID]int32{} 752 for status, tasks := range ji.TaskStatusIndex { 753 if AllocatedStatus(status) || 754 status == Succeeded { 755 for _, task := range tasks { 756 occupiedMap[getTaskID(task.Pod)]++ 757 } 758 continue 759 } 760 761 if status == Pending { 762 for _, task := range tasks { 763 if task.InitResreq.IsEmpty() { 764 occupiedMap[getTaskID(task.Pod)]++ 765 } 766 } 767 } 768 } 769 for taskID, minNum := range ji.TaskMinAvailable { 770 if occupiedMap[taskID] < minNum { 771 klog.V(4).Infof("Job %s/%s Task %s occupied %v less than task min avaliable", ji.Namespace, ji.Name, taskID, occupiedMap[taskID]) 772 return false 773 } 774 } 775 return true 776 } 777 778 // CheckTaskPipelined return whether each task of job is pipelined. 779 func (ji *JobInfo) CheckTaskPipelined() bool { 780 if ji.MinAvailable < ji.TaskMinAvailableTotal { 781 return true 782 } 783 occupiedMap := map[TaskID]int32{} 784 for status, tasks := range ji.TaskStatusIndex { 785 if AllocatedStatus(status) || 786 status == Succeeded || 787 status == Pipelined { 788 for _, task := range tasks { 789 occupiedMap[getTaskID(task.Pod)]++ 790 } 791 continue 792 } 793 794 if status == Pending { 795 for _, task := range tasks { 796 if task.InitResreq.IsEmpty() { 797 occupiedMap[getTaskID(task.Pod)]++ 798 } 799 } 800 } 801 } 802 for taskID, minNum := range ji.TaskMinAvailable { 803 if occupiedMap[taskID] < minNum { 804 klog.V(4).Infof("Job %s/%s Task %s occupied %v less than task min avaliable", ji.Namespace, ji.Name, taskID, occupiedMap[taskID]) 805 return false 806 } 807 } 808 return true 809 } 810 811 // CheckTaskStarving return whether job has at least one task which is starving. 812 func (ji *JobInfo) CheckTaskStarving() bool { 813 if ji.MinAvailable < ji.TaskMinAvailableTotal { 814 return true 815 } 816 occupiedMap := map[TaskID]int32{} 817 for status, tasks := range ji.TaskStatusIndex { 818 if AllocatedStatus(status) || 819 status == Succeeded || 820 status == Pipelined { 821 for _, task := range tasks { 822 occupiedMap[getTaskID(task.Pod)]++ 823 } 824 continue 825 } 826 } 827 for taskID, minNum := range ji.TaskMinAvailable { 828 if occupiedMap[taskID] < minNum { 829 klog.V(4).Infof("Job %s/%s Task %s occupied %v less than task min available", ji.Namespace, ji.Name, taskID, occupiedMap[taskID]) 830 return true 831 } 832 } 833 return false 834 } 835 836 // ValidTaskNum returns the number of tasks that are valid. 837 func (ji *JobInfo) ValidTaskNum() int32 { 838 occupied := 0 839 for status, tasks := range ji.TaskStatusIndex { 840 if AllocatedStatus(status) || 841 status == Succeeded || 842 status == Pipelined || 843 status == Pending { 844 occupied += len(tasks) 845 } 846 } 847 848 return int32(occupied) 849 } 850 851 func (ji *JobInfo) IsReady() bool { 852 return ji.ReadyTaskNum()+ji.PendingBestEffortTaskNum() >= ji.MinAvailable 853 } 854 855 func (ji *JobInfo) IsPipelined() bool { 856 return ji.WaitingTaskNum()+ji.ReadyTaskNum()+ji.PendingBestEffortTaskNum() >= ji.MinAvailable 857 } 858 859 func (ji *JobInfo) IsStarving() bool { 860 return ji.WaitingTaskNum()+ji.ReadyTaskNum() < ji.MinAvailable 861 } 862 863 // IsPending returns whether job is in pending status 864 func (ji *JobInfo) IsPending() bool { 865 return ji.PodGroup == nil || 866 ji.PodGroup.Status.Phase == scheduling.PodGroupPending || 867 ji.PodGroup.Status.Phase == "" 868 } 869 870 // HasPendingTasks return whether job has pending tasks 871 func (ji *JobInfo) HasPendingTasks() bool { 872 return len(ji.TaskStatusIndex[Pending]) != 0 873 }