volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/task-topology/manager.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tasktopology 18 19 import ( 20 "fmt" 21 "math" 22 "sort" 23 "strings" 24 25 "k8s.io/apimachinery/pkg/types" 26 "k8s.io/klog/v2" 27 28 "volcano.sh/volcano/pkg/scheduler/api" 29 ) 30 31 type topologyType int 32 33 const ( 34 selfAntiAffinity topologyType = iota 35 interAntiAffinity 36 selfAffinity 37 interAffinity 38 ) 39 40 // map[topologyType]priority, the larger number means the higher priority 41 var affinityPriority = map[topologyType]int{ 42 selfAntiAffinity: 4, 43 interAffinity: 3, 44 selfAffinity: 2, 45 interAntiAffinity: 1, 46 } 47 48 // JobManager is struct used to save infos about affinity and buckets of a job 49 type JobManager struct { 50 jobID api.JobID 51 52 buckets []*Bucket 53 podInBucket map[types.UID]int 54 podInTask map[types.UID]string 55 56 taskAffinityPriority map[string]int // [taskName] -> priority 57 taskExistOrder map[string]int 58 interAffinity map[string]map[string]struct{} // [taskName]->[taskName] 59 selfAffinity map[string]struct{} 60 interAntiAffinity map[string]map[string]struct{} // [taskName]->[taskName] 61 selfAntiAffinity map[string]struct{} 62 63 bucketMaxSize int 64 nodeTaskSet map[string]map[string]int // [nodeName]->[taskName] 65 } 66 67 // NewJobManager creates a new job manager for job 68 func NewJobManager(jobID api.JobID) *JobManager { 69 return &JobManager{ 70 jobID: jobID, 71 72 buckets: make([]*Bucket, 0), 73 podInBucket: make(map[types.UID]int), 74 podInTask: make(map[types.UID]string), 75 76 taskAffinityPriority: make(map[string]int), 77 taskExistOrder: make(map[string]int), 78 interAffinity: make(map[string]map[string]struct{}), 79 interAntiAffinity: make(map[string]map[string]struct{}), 80 selfAffinity: make(map[string]struct{}), 81 selfAntiAffinity: make(map[string]struct{}), 82 83 bucketMaxSize: 0, 84 nodeTaskSet: make(map[string]map[string]int), 85 } 86 } 87 88 // MarkOutOfBucket indicates task is outside of any bucket 89 func (jm *JobManager) MarkOutOfBucket(uid types.UID) { 90 jm.podInBucket[uid] = OutOfBucket 91 } 92 93 // MarkTaskHasTopology indicates task has topology settings 94 func (jm *JobManager) MarkTaskHasTopology(taskName string, topoType topologyType) { 95 priority := affinityPriority[topoType] 96 if priority > jm.taskAffinityPriority[taskName] { 97 jm.taskAffinityPriority[taskName] = priority 98 } 99 } 100 101 // ApplyTaskTopology transforms taskTopology to matrix 102 // affinity: [[a, b], [c]] 103 // interAffinity: 104 // 105 // a b c 106 // a - x - 107 // b x - - 108 // c - - - 109 // selfAffinity: 110 // a b c 111 // - - x 112 func (jm *JobManager) ApplyTaskTopology(topo *TaskTopology) { 113 for _, aff := range topo.Affinity { 114 if len(aff) == 1 { 115 taskName := aff[0] 116 jm.selfAffinity[taskName] = struct{}{} 117 jm.MarkTaskHasTopology(taskName, selfAffinity) 118 continue 119 } 120 for index, src := range aff { 121 for _, dst := range aff[:index] { 122 addAffinity(jm.interAffinity, src, dst) 123 addAffinity(jm.interAffinity, dst, src) 124 } 125 jm.MarkTaskHasTopology(src, interAffinity) 126 } 127 } 128 129 for _, aff := range topo.AntiAffinity { 130 if len(aff) == 1 { 131 taskName := aff[0] 132 jm.selfAntiAffinity[taskName] = struct{}{} 133 jm.MarkTaskHasTopology(taskName, selfAntiAffinity) 134 continue 135 } 136 for index, src := range aff { 137 for _, dst := range aff[:index] { 138 addAffinity(jm.interAntiAffinity, src, dst) 139 addAffinity(jm.interAntiAffinity, dst, src) 140 } 141 jm.MarkTaskHasTopology(src, interAntiAffinity) 142 } 143 } 144 145 length := len(topo.TaskOrder) 146 for index, taskName := range topo.TaskOrder { 147 jm.taskExistOrder[taskName] = length - index 148 } 149 } 150 151 // NewBucket creates a new bucket 152 func (jm *JobManager) NewBucket() *Bucket { 153 bucket := NewBucket() 154 bucket.index = len(jm.buckets) 155 jm.buckets = append(jm.buckets, bucket) 156 return bucket 157 } 158 159 // AddTaskToBucket adds task into bucket 160 func (jm *JobManager) AddTaskToBucket(bucketIndex int, taskName string, task *api.TaskInfo) { 161 bucket := jm.buckets[bucketIndex] 162 jm.podInBucket[task.Pod.UID] = bucketIndex 163 bucket.AddTask(taskName, task) 164 if size := len(bucket.tasks) + bucket.boundTask; size > jm.bucketMaxSize { 165 jm.bucketMaxSize = size 166 } 167 } 168 169 // L compared with R, -1 for L < R, 0 for L == R, 1 for L > R 170 func (jm *JobManager) taskAffinityOrder(L, R *api.TaskInfo) int { 171 LTaskName := jm.podInTask[L.Pod.UID] 172 RTaskName := jm.podInTask[R.Pod.UID] 173 174 // in the same vk task, they are equal 175 if LTaskName == RTaskName { 176 return 0 177 } 178 179 // use user defined order firstly 180 LOrder := jm.taskExistOrder[LTaskName] 181 ROrder := jm.taskExistOrder[RTaskName] 182 if LOrder != ROrder { 183 if LOrder > ROrder { 184 return 1 185 } 186 return -1 187 } 188 189 LPriority := jm.taskAffinityPriority[LTaskName] 190 RPriority := jm.taskAffinityPriority[RTaskName] 191 if LPriority != RPriority { 192 if LPriority > RPriority { 193 return 1 194 } 195 return -1 196 } 197 198 // all affinity setting of L and R are the same, they are equal 199 return 0 200 } 201 202 func (jm *JobManager) buildTaskInfo(tasks map[api.TaskID]*api.TaskInfo) []*api.TaskInfo { 203 taskWithoutBucket := make([]*api.TaskInfo, 0, len(tasks)) 204 for _, task := range tasks { 205 pod := task.Pod 206 207 taskName := getTaskName(task) 208 if taskName == "" { 209 jm.MarkOutOfBucket(pod.UID) 210 continue 211 } 212 if _, hasTopology := jm.taskAffinityPriority[taskName]; !hasTopology { 213 jm.MarkOutOfBucket(pod.UID) 214 continue 215 } 216 217 jm.podInTask[pod.UID] = taskName 218 taskWithoutBucket = append(taskWithoutBucket, task) 219 } 220 return taskWithoutBucket 221 } 222 223 func (jm *JobManager) checkTaskSetAffinity(taskName string, taskNameSet map[string]int, onlyAnti bool) int { 224 bucketPodAff := 0 225 226 if taskName == "" { 227 return bucketPodAff 228 } 229 230 for taskNameInBucket, count := range taskNameSet { 231 theSameTask := taskNameInBucket == taskName 232 233 if !onlyAnti { 234 affinity := false 235 if theSameTask { 236 _, affinity = jm.selfAffinity[taskName] 237 } else { 238 _, affinity = jm.interAffinity[taskName][taskNameInBucket] 239 } 240 if affinity { 241 bucketPodAff += count 242 } 243 } 244 245 antiAffinity := false 246 if theSameTask { 247 _, antiAffinity = jm.selfAntiAffinity[taskName] 248 } else { 249 _, antiAffinity = jm.interAntiAffinity[taskName][taskNameInBucket] 250 } 251 if antiAffinity { 252 bucketPodAff -= count 253 } 254 } 255 256 return bucketPodAff 257 } 258 259 func (jm *JobManager) buildBucket(taskWithOrder []*api.TaskInfo) { 260 nodeBucketMapping := make(map[string]*Bucket) 261 262 for _, task := range taskWithOrder { 263 klog.V(5).Infof("jobID %s task with order task %s/%s", jm.jobID, task.Namespace, task.Name) 264 265 var selectedBucket *Bucket 266 maxAffinity := math.MinInt32 267 268 taskName := getTaskName(task) 269 270 if task.NodeName != "" { 271 // generate bucket by node 272 maxAffinity = 0 273 selectedBucket = nodeBucketMapping[task.NodeName] 274 } else { 275 for _, bucket := range jm.buckets { 276 bucketPodAff := jm.checkTaskSetAffinity(taskName, bucket.taskNameSet, false) 277 278 // choose the best fit affinity, or balance resource between bucket 279 if bucketPodAff > maxAffinity { 280 maxAffinity = bucketPodAff 281 selectedBucket = bucket 282 } else if bucketPodAff == maxAffinity && selectedBucket != nil && 283 bucket.reqScore < selectedBucket.reqScore { 284 selectedBucket = bucket 285 } 286 } 287 } 288 289 if maxAffinity < 0 || selectedBucket == nil { 290 selectedBucket = jm.NewBucket() 291 if task.NodeName != "" { 292 nodeBucketMapping[task.NodeName] = selectedBucket 293 } 294 } 295 296 jm.AddTaskToBucket(selectedBucket.index, taskName, task) 297 } 298 } 299 300 // ConstructBucket builds bucket for tasks 301 func (jm *JobManager) ConstructBucket(tasks map[api.TaskID]*api.TaskInfo) { 302 taskWithoutBucket := jm.buildTaskInfo(tasks) 303 304 o := TaskOrder{ 305 tasks: taskWithoutBucket, 306 307 manager: jm, 308 } 309 sort.Sort(sort.Reverse(&o)) 310 311 jm.buildBucket(o.tasks) 312 } 313 314 // TaskBound binds task to bucket 315 func (jm *JobManager) TaskBound(task *api.TaskInfo) { 316 if taskName := getTaskName(task); taskName != "" { 317 set, ok := jm.nodeTaskSet[task.NodeName] 318 if !ok { 319 set = make(map[string]int) 320 jm.nodeTaskSet[task.NodeName] = set 321 } 322 set[taskName]++ 323 } 324 325 bucket := jm.GetBucket(task) 326 if bucket != nil { 327 bucket.TaskBound(task) 328 } 329 } 330 331 // GetBucket get bucket inside which task has been 332 func (jm *JobManager) GetBucket(task *api.TaskInfo) *Bucket { 333 index, ok := jm.podInBucket[task.Pod.UID] 334 if !ok || index == OutOfBucket { 335 return nil 336 } 337 338 bucket := jm.buckets[index] 339 return bucket 340 } 341 342 func (jm *JobManager) String() string { 343 // saa: selfAntiAffinity 344 // iaa: interAntiAffinity 345 // sa: selfAffinity 346 // ia: interAffinity 347 msg := []string{ 348 fmt.Sprintf("%s - job %s max %d || saa: %v - iaa: %v - sa: %v - ia: %v || priority: %v - order: %v || ", 349 PluginName, jm.jobID, jm.bucketMaxSize, 350 jm.selfAntiAffinity, jm.interAntiAffinity, 351 jm.selfAffinity, jm.interAffinity, 352 jm.taskAffinityPriority, jm.taskExistOrder, 353 ), 354 } 355 356 for _, bucket := range jm.buckets { 357 bucketMsg := fmt.Sprintf("b:%d -- ", bucket.index) 358 var info []string 359 for _, task := range bucket.tasks { 360 info = append(info, task.Pod.Name) 361 } 362 bucketMsg += strings.Join(info, ", ") 363 bucketMsg += "|" 364 365 info = nil 366 for nodeName, count := range bucket.node { 367 info = append(info, fmt.Sprintf("n%s-%d", nodeName, count)) 368 } 369 bucketMsg += strings.Join(info, ", ") 370 371 msg = append(msg, "["+bucketMsg+"]") 372 } 373 return strings.Join(msg, " ") 374 }