volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/task-topology/topology.go (about)

     1  /*
     2  Copyright 2021 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tasktopology
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  	"time"
    23  
    24  	"k8s.io/klog/v2"
    25  	k8sFramework "k8s.io/kubernetes/pkg/scheduler/framework"
    26  
    27  	"volcano.sh/volcano/pkg/scheduler/api"
    28  	"volcano.sh/volcano/pkg/scheduler/framework"
    29  )
    30  
    31  type taskTopologyPlugin struct {
    32  	arguments framework.Arguments
    33  
    34  	weight   int
    35  	managers map[api.JobID]*JobManager
    36  }
    37  
    38  // New function returns taskTopologyPlugin object
    39  func New(arguments framework.Arguments) framework.Plugin {
    40  	return &taskTopologyPlugin{
    41  		arguments: arguments,
    42  
    43  		weight:   calculateWeight(arguments),
    44  		managers: make(map[api.JobID]*JobManager),
    45  	}
    46  }
    47  
    48  func (p *taskTopologyPlugin) Name() string {
    49  	return PluginName
    50  }
    51  
    52  // TaskOrderFn returns -1 to make l prior to r.
    53  //
    54  // for example:
    55  // A:
    56  //
    57  //	| bucket1   | bucket2   | out of bucket
    58  //	| a1 a3     | a2        | a4
    59  //
    60  // B:
    61  //
    62  //	| bucket1   | out of bucket
    63  //	| b1 b2     | b3
    64  //
    65  // the right task order should be:
    66  //
    67  //	a1 a3 a2 b1 b2 a4 b3
    68  func (p *taskTopologyPlugin) TaskOrderFn(l interface{}, r interface{}) int {
    69  	lv, ok := l.(*api.TaskInfo)
    70  	if !ok {
    71  		klog.Errorf("Object is not a taskinfo")
    72  		return 0
    73  	}
    74  	rv, ok := r.(*api.TaskInfo)
    75  	if !ok {
    76  		klog.Errorf("Object is not a taskinfo")
    77  		return 0
    78  	}
    79  
    80  	lvJobManager := p.managers[lv.Job]
    81  	rvJobManager := p.managers[rv.Job]
    82  	if lvJobManager == nil {
    83  		klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", lv.Job)
    84  		return 0
    85  	}
    86  	if rvJobManager == nil {
    87  		klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", rv.Job)
    88  		return 0
    89  	}
    90  
    91  	lvBucket := lvJobManager.GetBucket(lv)
    92  	rvBucket := rvJobManager.GetBucket(rv)
    93  	// the one have bucket would always prior to another
    94  	lvInBucket := lvBucket != nil
    95  	rvInBucket := rvBucket != nil
    96  	if lvInBucket != rvInBucket {
    97  		if lvInBucket {
    98  			return -1
    99  		}
   100  		return 1
   101  	}
   102  
   103  	// comparison between job is not the duty of this plugin
   104  	if lv.Job != rv.Job {
   105  		return 0
   106  	}
   107  
   108  	// task out of bucket have no order
   109  	if !lvInBucket && !rvInBucket {
   110  		return 0
   111  	}
   112  
   113  	// the big bucket should prior to small one
   114  	lvHasTask := len(lvBucket.tasks)
   115  	rvHasTask := len(rvBucket.tasks)
   116  	if lvHasTask != rvHasTask {
   117  		if lvHasTask > rvHasTask {
   118  			return -1
   119  		}
   120  		return 1
   121  	}
   122  
   123  	lvBucketIndex := lvBucket.index
   124  	rvBucketIndex := rvBucket.index
   125  	// in the same bucket, the affinityOrder is ok
   126  	if lvBucketIndex == rvBucketIndex {
   127  		affinityOrder := lvJobManager.taskAffinityOrder(lv, rv)
   128  		return -affinityOrder
   129  	}
   130  
   131  	// the old bucket should prior to young one
   132  	if lvBucketIndex < rvBucketIndex {
   133  		return -1
   134  	}
   135  	return 1
   136  }
   137  
   138  func (p *taskTopologyPlugin) calcBucketScore(task *api.TaskInfo, node *api.NodeInfo) (int, *JobManager, error) {
   139  	// task could never fits the node
   140  	maxResource := node.Idle.Clone().Add(node.Releasing)
   141  	if req := task.Resreq; req != nil && maxResource.LessPartly(req, api.Zero) {
   142  		return 0, nil, nil
   143  	}
   144  
   145  	jobManager, hasManager := p.managers[task.Job]
   146  	if !hasManager {
   147  		return 0, nil, nil
   148  	}
   149  
   150  	bucket := jobManager.GetBucket(task)
   151  	// task out of bucket
   152  	if bucket == nil {
   153  		return 0, jobManager, nil
   154  	}
   155  
   156  	// 1. bound task in bucket is the base score of this node
   157  	score := bucket.node[node.Name]
   158  
   159  	// 2. task inter/self anti-affinity should be calculated
   160  	if nodeTaskSet := jobManager.nodeTaskSet[node.Name]; nodeTaskSet != nil {
   161  		taskName := getTaskName(task)
   162  		affinityScore := jobManager.checkTaskSetAffinity(taskName, nodeTaskSet, true)
   163  		if affinityScore < 0 {
   164  			score += affinityScore
   165  		}
   166  	}
   167  	klog.V(4).Infof("task %s/%s, node %s, additional score %d, task %d",
   168  		task.Namespace, task.Name, node.Name, score, len(bucket.tasks))
   169  
   170  	// 3. the other tasks in bucket take into considering
   171  	score += len(bucket.tasks)
   172  	if bucket.request == nil || bucket.request.LessEqual(maxResource, api.Zero) {
   173  		return score, jobManager, nil
   174  	}
   175  
   176  	remains := bucket.request.Clone()
   177  	// randomly (by map) take out task to make the bucket fits the node
   178  	for bucketTaskID, bucketTask := range bucket.tasks {
   179  		// current task should kept in bucket
   180  		if bucketTaskID == task.Pod.UID || bucketTask.Resreq == nil {
   181  			continue
   182  		}
   183  		remains.Sub(bucketTask.Resreq)
   184  		score--
   185  		if remains.LessEqual(maxResource, api.Zero) {
   186  			break
   187  		}
   188  	}
   189  	// here, the bucket remained request will always fit the maxResource
   190  	return score, jobManager, nil
   191  }
   192  
   193  func (p *taskTopologyPlugin) NodeOrderFn(task *api.TaskInfo, node *api.NodeInfo) (float64, error) {
   194  	score, jobManager, err := p.calcBucketScore(task, node)
   195  	if err != nil {
   196  		return 0, err
   197  	}
   198  	fScore := float64(score * p.weight)
   199  	if jobManager != nil && jobManager.bucketMaxSize != 0 {
   200  		fScore = fScore * float64(k8sFramework.MaxNodeScore) / float64(jobManager.bucketMaxSize)
   201  	}
   202  	klog.V(4).Infof("task %s/%s at node %s has bucket score %d, score %f",
   203  		task.Namespace, task.Name, node.Name, score, fScore)
   204  	return fScore, nil
   205  }
   206  
   207  func (p *taskTopologyPlugin) AllocateFunc(event *framework.Event) {
   208  	task := event.Task
   209  
   210  	jobManager, hasManager := p.managers[task.Job]
   211  	if !hasManager {
   212  		return
   213  	}
   214  	jobManager.TaskBound(task)
   215  }
   216  
   217  func (p *taskTopologyPlugin) initBucket(ssn *framework.Session) {
   218  	for jobID, job := range ssn.Jobs {
   219  		if !job.HasPendingTasks() {
   220  			klog.V(4).Infof("No pending tasks in job <%s/%s> by plugin %s.",
   221  				job.Namespace, job.Name, PluginName)
   222  			continue
   223  		}
   224  
   225  		jobTopology, err := readTopologyFromPgAnnotations(job)
   226  		if err != nil {
   227  			klog.V(4).Infof("Failed to read task topology from job <%s/%s> annotations, error: %s.",
   228  				job.Namespace, job.Name, err.Error())
   229  			continue
   230  		}
   231  		if jobTopology == nil {
   232  			continue
   233  		}
   234  
   235  		manager := NewJobManager(jobID)
   236  		manager.ApplyTaskTopology(jobTopology)
   237  		manager.ConstructBucket(job.Tasks)
   238  
   239  		p.managers[job.UID] = manager
   240  	}
   241  }
   242  
   243  func affinityCheck(job *api.JobInfo, affinity [][]string) error {
   244  	if job == nil || affinity == nil {
   245  		return fmt.Errorf("empty input, job: %v, affinity: %v", job, affinity)
   246  	}
   247  
   248  	var taskNumber = len(job.Tasks)
   249  	var taskRef = make(map[string]bool, taskNumber)
   250  	var jobNamePrefix = job.Name + "-"
   251  	for _, task := range job.Tasks {
   252  		// the full task name looks like "${job name}-${task name}-${index}",
   253  		// so we can trim the jobNamePrefix and the indexSuffix to get the short task name.
   254  		tmpTaskName := task.Name[:strings.LastIndex(task.Name, "-")]
   255  		tmpTaskName = strings.TrimPrefix(tmpTaskName, jobNamePrefix)
   256  
   257  		if _, exist := taskRef[tmpTaskName]; !exist {
   258  			taskRef[tmpTaskName] = true
   259  		}
   260  	}
   261  
   262  	for _, aff := range affinity {
   263  		affTasks := make(map[string]bool, len(aff))
   264  		for _, task := range aff {
   265  			if len(task) == 0 {
   266  				continue
   267  			}
   268  			if _, exist := taskRef[task]; !exist {
   269  				return fmt.Errorf("task %s do not exist in job <%s/%s>", task, job.Namespace, job.Name)
   270  			}
   271  			if _, exist := affTasks[task]; exist {
   272  				return fmt.Errorf("task %s is duplicated in job <%s/%s>", task, job.Namespace, job.Name)
   273  			}
   274  			affTasks[task] = true
   275  		}
   276  	}
   277  
   278  	return nil
   279  }
   280  
   281  func splitAnnotations(job *api.JobInfo, annotation string) ([][]string, error) {
   282  	affinityStr := strings.Split(annotation, ";")
   283  	if len(affinityStr) == 0 {
   284  		return nil, nil
   285  	}
   286  	var affinity = make([][]string, len(affinityStr))
   287  	for i, str := range affinityStr {
   288  		affinity[i] = strings.Split(str, ",")
   289  	}
   290  	if err := affinityCheck(job, affinity); err != nil {
   291  		klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.",
   292  			job.Namespace, job.Name, err.Error())
   293  		return nil, err
   294  	}
   295  	return affinity, nil
   296  }
   297  
   298  func readTopologyFromPgAnnotations(job *api.JobInfo) (*TaskTopology, error) {
   299  	jobAffinityStr, affinityExist := job.PodGroup.Annotations[JobAffinityAnnotations]
   300  	jobAntiAffinityStr, antiAffinityExist := job.PodGroup.Annotations[JobAntiAffinityAnnotations]
   301  	taskOrderStr, taskOrderExist := job.PodGroup.Annotations[TaskOrderAnnotations]
   302  
   303  	if !(affinityExist || antiAffinityExist || taskOrderExist) {
   304  		return nil, nil
   305  	}
   306  
   307  	var jobTopology = TaskTopology{
   308  		Affinity:     nil,
   309  		AntiAffinity: nil,
   310  		TaskOrder:    nil,
   311  	}
   312  
   313  	if affinityExist {
   314  		affinities, err := splitAnnotations(job, jobAffinityStr)
   315  		if err != nil {
   316  			klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.",
   317  				job.Namespace, job.Name, err.Error())
   318  			return nil, err
   319  		}
   320  		jobTopology.Affinity = affinities
   321  	}
   322  
   323  	if antiAffinityExist {
   324  		affinities, err := splitAnnotations(job, jobAntiAffinityStr)
   325  		if err != nil {
   326  			klog.V(4).Infof("Job <%s/%s> anti affinity key invalid: %s.",
   327  				job.Namespace, job.Name, err.Error())
   328  			return nil, err
   329  		}
   330  		jobTopology.AntiAffinity = affinities
   331  	}
   332  
   333  	if taskOrderExist {
   334  		jobTopology.TaskOrder = strings.Split(taskOrderStr, ",")
   335  		if err := affinityCheck(job, [][]string{jobTopology.TaskOrder}); err != nil {
   336  			klog.V(4).Infof("Job <%s/%s> task order key invalid: %s.",
   337  				job.Namespace, job.Name, err.Error())
   338  			return nil, err
   339  		}
   340  	}
   341  
   342  	return &jobTopology, nil
   343  }
   344  
   345  func (p *taskTopologyPlugin) OnSessionOpen(ssn *framework.Session) {
   346  	start := time.Now()
   347  	klog.V(3).Infof("start to init task topology plugin, weight[%d], defined order %v", p.weight, affinityPriority)
   348  
   349  	p.initBucket(ssn)
   350  
   351  	ssn.AddTaskOrderFn(p.Name(), p.TaskOrderFn)
   352  
   353  	ssn.AddNodeOrderFn(p.Name(), p.NodeOrderFn)
   354  
   355  	ssn.AddEventHandler(&framework.EventHandler{
   356  		AllocateFunc: p.AllocateFunc,
   357  	})
   358  
   359  	klog.V(3).Infof("finished to init task topology plugin, using time %v", time.Since(start))
   360  }
   361  
   362  func (p *taskTopologyPlugin) OnSessionClose(ssn *framework.Session) {
   363  	p.managers = nil
   364  }