volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/task-topology/topology.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tasktopology 18 19 import ( 20 "fmt" 21 "strings" 22 "time" 23 24 "k8s.io/klog/v2" 25 k8sFramework "k8s.io/kubernetes/pkg/scheduler/framework" 26 27 "volcano.sh/volcano/pkg/scheduler/api" 28 "volcano.sh/volcano/pkg/scheduler/framework" 29 ) 30 31 type taskTopologyPlugin struct { 32 arguments framework.Arguments 33 34 weight int 35 managers map[api.JobID]*JobManager 36 } 37 38 // New function returns taskTopologyPlugin object 39 func New(arguments framework.Arguments) framework.Plugin { 40 return &taskTopologyPlugin{ 41 arguments: arguments, 42 43 weight: calculateWeight(arguments), 44 managers: make(map[api.JobID]*JobManager), 45 } 46 } 47 48 func (p *taskTopologyPlugin) Name() string { 49 return PluginName 50 } 51 52 // TaskOrderFn returns -1 to make l prior to r. 53 // 54 // for example: 55 // A: 56 // 57 // | bucket1 | bucket2 | out of bucket 58 // | a1 a3 | a2 | a4 59 // 60 // B: 61 // 62 // | bucket1 | out of bucket 63 // | b1 b2 | b3 64 // 65 // the right task order should be: 66 // 67 // a1 a3 a2 b1 b2 a4 b3 68 func (p *taskTopologyPlugin) TaskOrderFn(l interface{}, r interface{}) int { 69 lv, ok := l.(*api.TaskInfo) 70 if !ok { 71 klog.Errorf("Object is not a taskinfo") 72 return 0 73 } 74 rv, ok := r.(*api.TaskInfo) 75 if !ok { 76 klog.Errorf("Object is not a taskinfo") 77 return 0 78 } 79 80 lvJobManager := p.managers[lv.Job] 81 rvJobManager := p.managers[rv.Job] 82 if lvJobManager == nil { 83 klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", lv.Job) 84 return 0 85 } 86 if rvJobManager == nil { 87 klog.V(4).Infof("No job manager for job <ID: %s>, do not return task order.", rv.Job) 88 return 0 89 } 90 91 lvBucket := lvJobManager.GetBucket(lv) 92 rvBucket := rvJobManager.GetBucket(rv) 93 // the one have bucket would always prior to another 94 lvInBucket := lvBucket != nil 95 rvInBucket := rvBucket != nil 96 if lvInBucket != rvInBucket { 97 if lvInBucket { 98 return -1 99 } 100 return 1 101 } 102 103 // comparison between job is not the duty of this plugin 104 if lv.Job != rv.Job { 105 return 0 106 } 107 108 // task out of bucket have no order 109 if !lvInBucket && !rvInBucket { 110 return 0 111 } 112 113 // the big bucket should prior to small one 114 lvHasTask := len(lvBucket.tasks) 115 rvHasTask := len(rvBucket.tasks) 116 if lvHasTask != rvHasTask { 117 if lvHasTask > rvHasTask { 118 return -1 119 } 120 return 1 121 } 122 123 lvBucketIndex := lvBucket.index 124 rvBucketIndex := rvBucket.index 125 // in the same bucket, the affinityOrder is ok 126 if lvBucketIndex == rvBucketIndex { 127 affinityOrder := lvJobManager.taskAffinityOrder(lv, rv) 128 return -affinityOrder 129 } 130 131 // the old bucket should prior to young one 132 if lvBucketIndex < rvBucketIndex { 133 return -1 134 } 135 return 1 136 } 137 138 func (p *taskTopologyPlugin) calcBucketScore(task *api.TaskInfo, node *api.NodeInfo) (int, *JobManager, error) { 139 // task could never fits the node 140 maxResource := node.Idle.Clone().Add(node.Releasing) 141 if req := task.Resreq; req != nil && maxResource.LessPartly(req, api.Zero) { 142 return 0, nil, nil 143 } 144 145 jobManager, hasManager := p.managers[task.Job] 146 if !hasManager { 147 return 0, nil, nil 148 } 149 150 bucket := jobManager.GetBucket(task) 151 // task out of bucket 152 if bucket == nil { 153 return 0, jobManager, nil 154 } 155 156 // 1. bound task in bucket is the base score of this node 157 score := bucket.node[node.Name] 158 159 // 2. task inter/self anti-affinity should be calculated 160 if nodeTaskSet := jobManager.nodeTaskSet[node.Name]; nodeTaskSet != nil { 161 taskName := getTaskName(task) 162 affinityScore := jobManager.checkTaskSetAffinity(taskName, nodeTaskSet, true) 163 if affinityScore < 0 { 164 score += affinityScore 165 } 166 } 167 klog.V(4).Infof("task %s/%s, node %s, additional score %d, task %d", 168 task.Namespace, task.Name, node.Name, score, len(bucket.tasks)) 169 170 // 3. the other tasks in bucket take into considering 171 score += len(bucket.tasks) 172 if bucket.request == nil || bucket.request.LessEqual(maxResource, api.Zero) { 173 return score, jobManager, nil 174 } 175 176 remains := bucket.request.Clone() 177 // randomly (by map) take out task to make the bucket fits the node 178 for bucketTaskID, bucketTask := range bucket.tasks { 179 // current task should kept in bucket 180 if bucketTaskID == task.Pod.UID || bucketTask.Resreq == nil { 181 continue 182 } 183 remains.Sub(bucketTask.Resreq) 184 score-- 185 if remains.LessEqual(maxResource, api.Zero) { 186 break 187 } 188 } 189 // here, the bucket remained request will always fit the maxResource 190 return score, jobManager, nil 191 } 192 193 func (p *taskTopologyPlugin) NodeOrderFn(task *api.TaskInfo, node *api.NodeInfo) (float64, error) { 194 score, jobManager, err := p.calcBucketScore(task, node) 195 if err != nil { 196 return 0, err 197 } 198 fScore := float64(score * p.weight) 199 if jobManager != nil && jobManager.bucketMaxSize != 0 { 200 fScore = fScore * float64(k8sFramework.MaxNodeScore) / float64(jobManager.bucketMaxSize) 201 } 202 klog.V(4).Infof("task %s/%s at node %s has bucket score %d, score %f", 203 task.Namespace, task.Name, node.Name, score, fScore) 204 return fScore, nil 205 } 206 207 func (p *taskTopologyPlugin) AllocateFunc(event *framework.Event) { 208 task := event.Task 209 210 jobManager, hasManager := p.managers[task.Job] 211 if !hasManager { 212 return 213 } 214 jobManager.TaskBound(task) 215 } 216 217 func (p *taskTopologyPlugin) initBucket(ssn *framework.Session) { 218 for jobID, job := range ssn.Jobs { 219 if !job.HasPendingTasks() { 220 klog.V(4).Infof("No pending tasks in job <%s/%s> by plugin %s.", 221 job.Namespace, job.Name, PluginName) 222 continue 223 } 224 225 jobTopology, err := readTopologyFromPgAnnotations(job) 226 if err != nil { 227 klog.V(4).Infof("Failed to read task topology from job <%s/%s> annotations, error: %s.", 228 job.Namespace, job.Name, err.Error()) 229 continue 230 } 231 if jobTopology == nil { 232 continue 233 } 234 235 manager := NewJobManager(jobID) 236 manager.ApplyTaskTopology(jobTopology) 237 manager.ConstructBucket(job.Tasks) 238 239 p.managers[job.UID] = manager 240 } 241 } 242 243 func affinityCheck(job *api.JobInfo, affinity [][]string) error { 244 if job == nil || affinity == nil { 245 return fmt.Errorf("empty input, job: %v, affinity: %v", job, affinity) 246 } 247 248 var taskNumber = len(job.Tasks) 249 var taskRef = make(map[string]bool, taskNumber) 250 var jobNamePrefix = job.Name + "-" 251 for _, task := range job.Tasks { 252 // the full task name looks like "${job name}-${task name}-${index}", 253 // so we can trim the jobNamePrefix and the indexSuffix to get the short task name. 254 tmpTaskName := task.Name[:strings.LastIndex(task.Name, "-")] 255 tmpTaskName = strings.TrimPrefix(tmpTaskName, jobNamePrefix) 256 257 if _, exist := taskRef[tmpTaskName]; !exist { 258 taskRef[tmpTaskName] = true 259 } 260 } 261 262 for _, aff := range affinity { 263 affTasks := make(map[string]bool, len(aff)) 264 for _, task := range aff { 265 if len(task) == 0 { 266 continue 267 } 268 if _, exist := taskRef[task]; !exist { 269 return fmt.Errorf("task %s do not exist in job <%s/%s>", task, job.Namespace, job.Name) 270 } 271 if _, exist := affTasks[task]; exist { 272 return fmt.Errorf("task %s is duplicated in job <%s/%s>", task, job.Namespace, job.Name) 273 } 274 affTasks[task] = true 275 } 276 } 277 278 return nil 279 } 280 281 func splitAnnotations(job *api.JobInfo, annotation string) ([][]string, error) { 282 affinityStr := strings.Split(annotation, ";") 283 if len(affinityStr) == 0 { 284 return nil, nil 285 } 286 var affinity = make([][]string, len(affinityStr)) 287 for i, str := range affinityStr { 288 affinity[i] = strings.Split(str, ",") 289 } 290 if err := affinityCheck(job, affinity); err != nil { 291 klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.", 292 job.Namespace, job.Name, err.Error()) 293 return nil, err 294 } 295 return affinity, nil 296 } 297 298 func readTopologyFromPgAnnotations(job *api.JobInfo) (*TaskTopology, error) { 299 jobAffinityStr, affinityExist := job.PodGroup.Annotations[JobAffinityAnnotations] 300 jobAntiAffinityStr, antiAffinityExist := job.PodGroup.Annotations[JobAntiAffinityAnnotations] 301 taskOrderStr, taskOrderExist := job.PodGroup.Annotations[TaskOrderAnnotations] 302 303 if !(affinityExist || antiAffinityExist || taskOrderExist) { 304 return nil, nil 305 } 306 307 var jobTopology = TaskTopology{ 308 Affinity: nil, 309 AntiAffinity: nil, 310 TaskOrder: nil, 311 } 312 313 if affinityExist { 314 affinities, err := splitAnnotations(job, jobAffinityStr) 315 if err != nil { 316 klog.V(4).Infof("Job <%s/%s> affinity key invalid: %s.", 317 job.Namespace, job.Name, err.Error()) 318 return nil, err 319 } 320 jobTopology.Affinity = affinities 321 } 322 323 if antiAffinityExist { 324 affinities, err := splitAnnotations(job, jobAntiAffinityStr) 325 if err != nil { 326 klog.V(4).Infof("Job <%s/%s> anti affinity key invalid: %s.", 327 job.Namespace, job.Name, err.Error()) 328 return nil, err 329 } 330 jobTopology.AntiAffinity = affinities 331 } 332 333 if taskOrderExist { 334 jobTopology.TaskOrder = strings.Split(taskOrderStr, ",") 335 if err := affinityCheck(job, [][]string{jobTopology.TaskOrder}); err != nil { 336 klog.V(4).Infof("Job <%s/%s> task order key invalid: %s.", 337 job.Namespace, job.Name, err.Error()) 338 return nil, err 339 } 340 } 341 342 return &jobTopology, nil 343 } 344 345 func (p *taskTopologyPlugin) OnSessionOpen(ssn *framework.Session) { 346 start := time.Now() 347 klog.V(3).Infof("start to init task topology plugin, weight[%d], defined order %v", p.weight, affinityPriority) 348 349 p.initBucket(ssn) 350 351 ssn.AddTaskOrderFn(p.Name(), p.TaskOrderFn) 352 353 ssn.AddNodeOrderFn(p.Name(), p.NodeOrderFn) 354 355 ssn.AddEventHandler(&framework.EventHandler{ 356 AllocateFunc: p.AllocateFunc, 357 }) 358 359 klog.V(3).Infof("finished to init task topology plugin, using time %v", time.Since(start)) 360 } 361 362 func (p *taskTopologyPlugin) OnSessionClose(ssn *framework.Session) { 363 p.managers = nil 364 }