volcano.sh/volcano@v1.9.0/pkg/scheduler/actions/backfill/backfill.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package backfill
    18  
    19  import (
    20  	"fmt"
    21  	"time"
    22  
    23  	"k8s.io/klog/v2"
    24  
    25  	"volcano.sh/volcano/pkg/scheduler/api"
    26  	"volcano.sh/volcano/pkg/scheduler/framework"
    27  	"volcano.sh/volcano/pkg/scheduler/metrics"
    28  	"volcano.sh/volcano/pkg/scheduler/util"
    29  )
    30  
    31  type Action struct{}
    32  
    33  func New() *Action {
    34  	return &Action{}
    35  }
    36  
    37  func (backfill *Action) Name() string {
    38  	return "backfill"
    39  }
    40  
    41  func (backfill *Action) Initialize() {}
    42  
    43  func (backfill *Action) Execute(ssn *framework.Session) {
    44  	klog.V(5).Infof("Enter Backfill ...")
    45  	defer klog.V(5).Infof("Leaving Backfill ...")
    46  
    47  	predicatFunc := func(task *api.TaskInfo, node *api.NodeInfo) ([]*api.Status, error) {
    48  		var statusSets util.StatusSets
    49  		statusSets, err := ssn.PredicateFn(task, node)
    50  		if err != nil {
    51  			return nil, err
    52  		}
    53  
    54  		// predicateHelper.PredicateNodes will print the log if predicate failed, so don't print log anymore here
    55  		if statusSets.ContainsUnschedulable() || statusSets.ContainsUnschedulableAndUnresolvable() || statusSets.ContainsErrorSkipOrWait() {
    56  			err := fmt.Errorf(statusSets.Message()) // should not include variables in api node errors
    57  			return nil, err
    58  		}
    59  		return nil, nil
    60  	}
    61  
    62  	// TODO (k82cn): When backfill, it's also need to balance between Queues.
    63  	pendingTasks := backfill.pickUpPendingTasks(ssn)
    64  	for _, task := range pendingTasks {
    65  		job := ssn.Jobs[task.Job]
    66  		ph := util.NewPredicateHelper()
    67  		allocated := false
    68  		fe := api.NewFitErrors()
    69  
    70  		if err := ssn.PrePredicateFn(task); err != nil {
    71  			klog.V(3).Infof("PrePredicate for task %s/%s failed in backfill for: %v", task.Namespace, task.Name, err)
    72  			for _, ni := range ssn.Nodes {
    73  				fe.SetNodeError(ni.Name, err)
    74  			}
    75  			job.NodesFitErrors[task.UID] = fe
    76  			break
    77  		}
    78  
    79  		predicateNodes, fitErrors := ph.PredicateNodes(task, ssn.NodeList, predicatFunc, true)
    80  		if len(predicateNodes) == 0 {
    81  			job.NodesFitErrors[task.UID] = fitErrors
    82  			break
    83  		}
    84  
    85  		node := predicateNodes[0]
    86  		if len(predicateNodes) > 1 {
    87  			nodeScores := util.PrioritizeNodes(task, predicateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn)
    88  			node = ssn.BestNodeFn(task, nodeScores)
    89  			if node == nil {
    90  				node = util.SelectBestNode(nodeScores)
    91  			}
    92  		}
    93  
    94  		klog.V(3).Infof("Binding Task <%v/%v> to node <%v>", task.Namespace, task.Name, node.Name)
    95  		if err := ssn.Allocate(task, node); err != nil {
    96  			klog.Errorf("Failed to bind Task %v on %v in Session %v", task.UID, node.Name, ssn.UID)
    97  			fe.SetNodeError(node.Name, err)
    98  			continue
    99  		}
   100  
   101  		metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time))
   102  		metrics.UpdateE2eSchedulingLastTimeByJob(job.Name, string(job.Queue), job.Namespace, time.Now())
   103  		allocated = true
   104  
   105  		if !allocated {
   106  			job.NodesFitErrors[task.UID] = fe
   107  		}
   108  		// TODO (k82cn): backfill for other case.
   109  	}
   110  }
   111  
   112  func (backfill *Action) UnInitialize() {}
   113  
   114  func (backfill *Action) pickUpPendingTasks(ssn *framework.Session) []*api.TaskInfo {
   115  	queues := util.NewPriorityQueue(ssn.QueueOrderFn)
   116  	jobs := map[api.QueueID]*util.PriorityQueue{}
   117  	tasks := map[api.JobID]*util.PriorityQueue{}
   118  	var pendingTasks []*api.TaskInfo
   119  	for _, job := range ssn.Jobs {
   120  		if job.IsPending() {
   121  			continue
   122  		}
   123  
   124  		if vr := ssn.JobValid(job); vr != nil && !vr.Pass {
   125  			klog.V(4).Infof("Job <%s/%s> Queue <%s> skip backfill, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message)
   126  			continue
   127  		}
   128  
   129  		queue, found := ssn.Queues[job.Queue]
   130  		if !found {
   131  			continue
   132  		}
   133  
   134  		for _, task := range job.TaskStatusIndex[api.Pending] {
   135  			if !task.BestEffort {
   136  				continue
   137  			}
   138  			if _, existed := tasks[job.UID]; !existed {
   139  				tasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn)
   140  			}
   141  			tasks[job.UID].Push(task)
   142  		}
   143  
   144  		for _, task := range job.TaskStatusIndex[api.Pipelined] {
   145  			if !task.BestEffort {
   146  				continue
   147  			}
   148  
   149  			stmt := framework.NewStatement(ssn)
   150  			err := stmt.UnPipeline(task)
   151  			if err != nil {
   152  				klog.Errorf("Failed to unpipeline task: %s", err.Error())
   153  				continue
   154  			}
   155  			if _, existed := tasks[job.UID]; !existed {
   156  				tasks[job.UID] = util.NewPriorityQueue(ssn.TaskOrderFn)
   157  			}
   158  			tasks[job.UID].Push(task)
   159  		}
   160  
   161  		if _, existed := tasks[job.UID]; !existed {
   162  			continue
   163  		}
   164  
   165  		if _, existed := jobs[queue.UID]; !existed {
   166  			queues.Push(queue)
   167  			jobs[job.Queue] = util.NewPriorityQueue(ssn.JobOrderFn)
   168  		}
   169  		jobs[job.Queue].Push(job)
   170  	}
   171  
   172  	for !queues.Empty() {
   173  		queue, ok := queues.Pop().(*api.QueueInfo)
   174  		if !ok {
   175  			klog.V(3).Infof("QueueInfo transition failed, ignore it.")
   176  			continue
   177  		}
   178  		for !jobs[queue.UID].Empty() {
   179  			job, ok := jobs[queue.UID].Pop().(*api.JobInfo)
   180  			if !ok {
   181  				klog.Errorf("JobInfo transition failed, ignore it.")
   182  				continue
   183  			}
   184  			for !tasks[job.UID].Empty() {
   185  				pendingTasks = append(pendingTasks, tasks[job.UID].Pop().(*api.TaskInfo))
   186  			}
   187  		}
   188  	}
   189  	return pendingTasks
   190  }