volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/drf/drf.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package drf 18 19 import ( 20 "fmt" 21 "math" 22 "strconv" 23 "strings" 24 25 v1 "k8s.io/api/core/v1" 26 "k8s.io/klog/v2" 27 28 "volcano.sh/volcano/pkg/scheduler/api" 29 "volcano.sh/volcano/pkg/scheduler/api/helpers" 30 "volcano.sh/volcano/pkg/scheduler/framework" 31 "volcano.sh/volcano/pkg/scheduler/metrics" 32 "volcano.sh/volcano/pkg/scheduler/plugins/util" 33 ) 34 35 // PluginName indicates name of volcano scheduler plugin. 36 const PluginName = "drf" 37 38 var shareDelta = 0.000001 39 40 // hierarchicalNode represents the node hierarchy 41 // and the corresponding weight and drf attribute 42 type hierarchicalNode struct { 43 parent *hierarchicalNode 44 attr *drfAttr 45 // If the node is a leaf node, 46 // request represents the request of the job. 47 request *api.Resource 48 weight float64 49 saturated bool 50 hierarchy string 51 children map[string]*hierarchicalNode 52 } 53 54 func (node *hierarchicalNode) Clone(parent *hierarchicalNode) *hierarchicalNode { 55 newNode := &hierarchicalNode{ 56 parent: parent, 57 attr: &drfAttr{ 58 share: node.attr.share, 59 dominantResource: node.attr.dominantResource, 60 allocated: node.attr.allocated.Clone(), 61 }, 62 request: node.request.Clone(), 63 weight: node.weight, 64 saturated: node.saturated, 65 hierarchy: node.hierarchy, 66 children: nil, 67 } 68 if node.children != nil { 69 newNode.children = map[string]*hierarchicalNode{} 70 for _, child := range node.children { 71 newNode.children[child.hierarchy] = child.Clone(newNode) 72 } 73 } 74 return newNode 75 } 76 77 // resourceSaturated returns true if any resource of the job is saturated or the job demands fully allocated resource 78 func resourceSaturated(allocated *api.Resource, 79 jobRequest *api.Resource, demandingResources map[v1.ResourceName]bool) bool { 80 for _, rn := range allocated.ResourceNames() { 81 if allocated.Get(rn) != 0 && jobRequest.Get(rn) != 0 && 82 allocated.Get(rn) >= jobRequest.Get(rn) { 83 return true 84 } 85 if !demandingResources[rn] && jobRequest.Get(rn) != 0 { 86 return true 87 } 88 } 89 return false 90 } 91 92 type drfAttr struct { 93 share float64 94 dominantResource string 95 allocated *api.Resource 96 } 97 98 func (attr *drfAttr) String() string { 99 return fmt.Sprintf("dominant resource <%s>, dominant share %f, allocated %s", 100 attr.dominantResource, attr.share, attr.allocated) 101 } 102 103 type drfPlugin struct { 104 totalResource *api.Resource 105 totalAllocated *api.Resource 106 107 // Key is Job ID 108 jobAttrs map[api.JobID]*drfAttr 109 110 // map[namespaceName]->attr 111 namespaceOpts map[string]*drfAttr 112 113 // hierarchical tree root 114 hierarchicalRoot *hierarchicalNode 115 116 // Arguments given for the plugin 117 pluginArguments framework.Arguments 118 } 119 120 // New return drf plugin 121 func New(arguments framework.Arguments) framework.Plugin { 122 return &drfPlugin{ 123 totalResource: api.EmptyResource(), 124 totalAllocated: api.EmptyResource(), 125 jobAttrs: map[api.JobID]*drfAttr{}, 126 namespaceOpts: map[string]*drfAttr{}, 127 hierarchicalRoot: &hierarchicalNode{ 128 attr: &drfAttr{allocated: api.EmptyResource()}, 129 request: api.EmptyResource(), 130 hierarchy: "root", 131 weight: 1, 132 children: map[string]*hierarchicalNode{}, 133 }, 134 pluginArguments: arguments, 135 } 136 } 137 138 func (drf *drfPlugin) Name() string { 139 return PluginName 140 } 141 142 // HierarchyEnabled returns if hierarchy is enabled 143 func (drf *drfPlugin) HierarchyEnabled(ssn *framework.Session) bool { 144 for _, tier := range ssn.Tiers { 145 for _, plugin := range tier.Plugins { 146 if plugin.Name != PluginName { 147 continue 148 } 149 return plugin.EnabledHierarchy != nil && *plugin.EnabledHierarchy 150 } 151 } 152 return false 153 } 154 155 func (drf *drfPlugin) compareQueues(root *hierarchicalNode, lqueue *api.QueueInfo, rqueue *api.QueueInfo) float64 { 156 lnode := root 157 lpaths := strings.Split(lqueue.Hierarchy, "/") 158 rnode := root 159 rpaths := strings.Split(rqueue.Hierarchy, "/") 160 depth := 0 161 if len(lpaths) < len(rpaths) { 162 depth = len(lpaths) 163 } else { 164 depth = len(rpaths) 165 } 166 for i := 0; i < depth; i++ { 167 // Saturated nodes have minumun prioirty, 168 // so that demanding nodes will be poped first. 169 if !lnode.saturated && rnode.saturated { 170 return -1 171 } 172 if lnode.saturated && !rnode.saturated { 173 return 1 174 } 175 if lnode.attr.share/lnode.weight == rnode.attr.share/rnode.weight { 176 if i < depth-1 { 177 lnode = lnode.children[lpaths[i+1]] 178 rnode = rnode.children[rpaths[i+1]] 179 } 180 } else { 181 return lnode.attr.share/lnode.weight - rnode.attr.share/rnode.weight 182 } 183 } 184 return 0 185 } 186 187 func (drf *drfPlugin) OnSessionOpen(ssn *framework.Session) { 188 // Prepare scheduling data for this session. 189 drf.totalResource.Add(ssn.TotalResource) 190 191 klog.V(4).Infof("Total Allocatable %s", drf.totalResource) 192 193 hierarchyEnabled := drf.HierarchyEnabled(ssn) 194 195 for _, job := range ssn.Jobs { 196 attr := &drfAttr{ 197 allocated: api.EmptyResource(), 198 } 199 200 for status, tasks := range job.TaskStatusIndex { 201 if api.AllocatedStatus(status) { 202 for _, t := range tasks { 203 attr.allocated.Add(t.Resreq) 204 } 205 } 206 } 207 208 // Calculate the init share of Job 209 drf.updateJobShare(job.Namespace, job.Name, attr) 210 211 drf.jobAttrs[job.UID] = attr 212 213 if hierarchyEnabled { 214 queue := ssn.Queues[job.Queue] 215 drf.totalAllocated.Add(attr.allocated) 216 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 217 } 218 } 219 220 preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) { 221 var victims []*api.TaskInfo 222 223 addVictim := func(candidate *api.TaskInfo) { 224 victims = append(victims, candidate) 225 } 226 227 latt := drf.jobAttrs[preemptor.Job] 228 lalloc := latt.allocated.Clone().Add(preemptor.Resreq) 229 _, ls := drf.calculateShare(lalloc, drf.totalResource) 230 231 allocations := map[api.JobID]*api.Resource{} 232 233 for _, preemptee := range preemptees { 234 if _, found := allocations[preemptee.Job]; !found { 235 ratt := drf.jobAttrs[preemptee.Job] 236 allocations[preemptee.Job] = ratt.allocated.Clone() 237 } 238 ralloc := allocations[preemptee.Job].Sub(preemptee.Resreq) 239 _, rs := drf.calculateShare(ralloc, drf.totalResource) 240 241 if ls < rs || math.Abs(ls-rs) <= shareDelta { 242 addVictim(preemptee) 243 } 244 } 245 246 klog.V(4).Infof("Victims from DRF plugins are %+v", victims) 247 248 return victims, util.Permit 249 } 250 251 ssn.AddPreemptableFn(drf.Name(), preemptableFn) 252 253 if hierarchyEnabled { 254 queueOrderFn := func(l interface{}, r interface{}) int { 255 lv := l.(*api.QueueInfo) 256 rv := r.(*api.QueueInfo) 257 ret := drf.compareQueues(drf.hierarchicalRoot, lv, rv) 258 if ret < 0 { 259 return -1 260 } 261 if ret > 0 { 262 return 1 263 } 264 return 0 265 } 266 ssn.AddQueueOrderFn(drf.Name(), queueOrderFn) 267 268 reclaimFn := func(reclaimer *api.TaskInfo, reclaimees []*api.TaskInfo) ([]*api.TaskInfo, int) { 269 var victims []*api.TaskInfo 270 // clone hdrf tree 271 totalAllocated := drf.totalAllocated.Clone() 272 root := drf.hierarchicalRoot.Clone(nil) 273 274 // update reclaimer hdrf 275 ljob := ssn.Jobs[reclaimer.Job] 276 lqueue := ssn.Queues[ljob.Queue] 277 ljob = ljob.Clone() 278 attr := drf.jobAttrs[ljob.UID] 279 lattr := &drfAttr{ 280 allocated: attr.allocated.Clone(), 281 } 282 lattr.allocated.Add(reclaimer.Resreq) 283 totalAllocated.Add(reclaimer.Resreq) 284 drf.updateShare(lattr) 285 drf.UpdateHierarchicalShare(root, totalAllocated, ljob, lattr, lqueue.Hierarchy, lqueue.Weights) 286 287 for _, preemptee := range reclaimees { 288 rjob := ssn.Jobs[preemptee.Job] 289 rqueue := ssn.Queues[rjob.Queue] 290 291 // update hdrf of reclaimee job 292 totalAllocated.Sub(preemptee.Resreq) 293 rjob = rjob.Clone() 294 attr := drf.jobAttrs[rjob.UID] 295 rattr := &drfAttr{ 296 allocated: attr.allocated.Clone(), 297 } 298 rattr.allocated.Sub(preemptee.Resreq) 299 drf.updateShare(rattr) 300 drf.UpdateHierarchicalShare(root, totalAllocated, rjob, rattr, rqueue.Hierarchy, rqueue.Weights) 301 302 // compare hdrf of queues 303 ret := drf.compareQueues(root, lqueue, rqueue) 304 305 // resume hdrf of reclaimee job 306 totalAllocated.Add(preemptee.Resreq) 307 rattr.allocated.Add(preemptee.Resreq) 308 drf.updateShare(rattr) 309 drf.UpdateHierarchicalShare(root, totalAllocated, rjob, rattr, rqueue.Hierarchy, rqueue.Weights) 310 311 if ret < 0 { 312 victims = append(victims, preemptee) 313 } 314 315 if ret > shareDelta { 316 continue 317 } 318 } 319 320 klog.V(4).Infof("Victims from HDRF plugins are %+v", victims) 321 322 return victims, util.Permit 323 } 324 ssn.AddReclaimableFn(drf.Name(), reclaimFn) 325 } 326 327 jobOrderFn := func(l interface{}, r interface{}) int { 328 lv := l.(*api.JobInfo) 329 rv := r.(*api.JobInfo) 330 331 klog.V(4).Infof("DRF JobOrderFn: <%v/%v> share state: %v, <%v/%v> share state: %v", 332 lv.Namespace, lv.Name, drf.jobAttrs[lv.UID].share, rv.Namespace, rv.Name, drf.jobAttrs[rv.UID].share) 333 334 if drf.jobAttrs[lv.UID].share == drf.jobAttrs[rv.UID].share { 335 return 0 336 } 337 338 if drf.jobAttrs[lv.UID].share < drf.jobAttrs[rv.UID].share { 339 return -1 340 } 341 342 return 1 343 } 344 345 ssn.AddJobOrderFn(drf.Name(), jobOrderFn) 346 347 // Register event handlers. 348 ssn.AddEventHandler(&framework.EventHandler{ 349 AllocateFunc: func(event *framework.Event) { 350 attr := drf.jobAttrs[event.Task.Job] 351 attr.allocated.Add(event.Task.Resreq) 352 353 job := ssn.Jobs[event.Task.Job] 354 drf.updateJobShare(job.Namespace, job.Name, attr) 355 356 nsShare := -1.0 357 if hierarchyEnabled { 358 queue := ssn.Queues[job.Queue] 359 360 drf.totalAllocated.Add(event.Task.Resreq) 361 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 362 } 363 364 klog.V(4).Infof("DRF AllocateFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>", 365 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare) 366 }, 367 DeallocateFunc: func(event *framework.Event) { 368 attr := drf.jobAttrs[event.Task.Job] 369 attr.allocated.Sub(event.Task.Resreq) 370 371 job := ssn.Jobs[event.Task.Job] 372 drf.updateJobShare(job.Namespace, job.Name, attr) 373 374 nsShare := -1.0 375 376 if hierarchyEnabled { 377 queue := ssn.Queues[job.Queue] 378 drf.totalAllocated.Sub(event.Task.Resreq) 379 drf.UpdateHierarchicalShare(drf.hierarchicalRoot, drf.totalAllocated, job, attr, queue.Hierarchy, queue.Weights) 380 } 381 382 klog.V(4).Infof("DRF EvictFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>", 383 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare) 384 }, 385 }) 386 } 387 388 // build hierarchy if the node does not exist 389 func (drf *drfPlugin) buildHierarchy(root *hierarchicalNode, job *api.JobInfo, attr *drfAttr, 390 hierarchy, hierarchicalWeights string) { 391 inode := root 392 paths := strings.Split(hierarchy, "/") 393 weights := strings.Split(hierarchicalWeights, "/") 394 395 for i := 1; i < len(paths); i++ { 396 if child, ok := inode.children[paths[i]]; ok { 397 inode = child 398 } else { 399 fweight, _ := strconv.ParseFloat(weights[i], 64) 400 if fweight < 1 { 401 fweight = 1 402 } 403 child = &hierarchicalNode{ 404 weight: fweight, 405 hierarchy: paths[i], 406 request: api.EmptyResource(), 407 attr: &drfAttr{ 408 allocated: api.EmptyResource(), 409 }, 410 children: make(map[string]*hierarchicalNode), 411 } 412 klog.V(4).Infof("Node %s added to %s, weight %f", 413 child.hierarchy, inode.hierarchy, fweight) 414 inode.children[paths[i]] = child 415 child.parent = inode 416 inode = child 417 } 418 } 419 420 child := &hierarchicalNode{ 421 weight: 1, 422 attr: attr, 423 hierarchy: string(job.UID), 424 request: job.TotalRequest.Clone(), 425 children: nil, 426 } 427 inode.children[string(job.UID)] = child 428 // update drf attribute bottom up 429 klog.V(4).Infof("Job <%s/%s> added to %s, weights %s, attr %v, total request: %s", 430 job.Namespace, job.Name, inode.hierarchy, hierarchicalWeights, child.attr, job.TotalRequest) 431 } 432 433 // updateHierarchicalShare updates the node attribute recursively 434 func (drf *drfPlugin) updateHierarchicalShare(node *hierarchicalNode, 435 demandingResources map[v1.ResourceName]bool) { 436 if node.children == nil { 437 node.saturated = resourceSaturated(node.attr.allocated, 438 node.request, demandingResources) 439 klog.V(4).Infof("Update hierarchical node %s, share %f, dominant %s, resource %v, saturated: %t", 440 node.hierarchy, node.attr.share, node.attr.dominantResource, node.attr.allocated, node.saturated) 441 } else { 442 var mdr float64 = 1 443 // get minimun dominant resource share 444 for _, child := range node.children { 445 drf.updateHierarchicalShare(child, demandingResources) 446 // skip empty child and saturated child 447 if child.attr.share != 0 && !child.saturated { 448 _, resShare := drf.calculateShare(child.attr.allocated, drf.totalResource) 449 if resShare < mdr { 450 mdr = resShare 451 } 452 } 453 } 454 455 node.attr.allocated = api.EmptyResource() 456 saturated := true 457 for _, child := range node.children { 458 if !child.saturated { 459 saturated = false 460 } 461 // only consider non-empty children 462 if child.attr.share != 0 { 463 // saturated child is not scaled 464 if child.saturated { 465 t := child.attr.allocated 466 node.attr.allocated.Add(t) 467 } else { 468 t := child.attr.allocated.Clone().Multi(mdr / child.attr.share) 469 node.attr.allocated.Add(t) 470 } 471 } 472 } 473 node.attr.dominantResource, node.attr.share = drf.calculateShare( 474 node.attr.allocated, drf.totalResource) 475 node.saturated = saturated 476 klog.V(4).Infof("Update hierarchical node %s, share %f, dominant resource %s, resource %v, saturated: %t", 477 node.hierarchy, node.attr.share, node.attr.dominantResource, node.attr.allocated, node.saturated) 478 } 479 } 480 481 func (drf *drfPlugin) UpdateHierarchicalShare(root *hierarchicalNode, totalAllocated *api.Resource, job *api.JobInfo, attr *drfAttr, hierarchy, hierarchicalWeights string) { 482 // filter out demanding resources 483 demandingResources := map[v1.ResourceName]bool{} 484 for _, rn := range drf.totalResource.ResourceNames() { 485 if totalAllocated.Get(rn) < drf.totalResource.Get(rn) { 486 demandingResources[rn] = true 487 } 488 } 489 drf.buildHierarchy(root, job, attr, hierarchy, hierarchicalWeights) 490 drf.updateHierarchicalShare(root, demandingResources) 491 } 492 493 func (drf *drfPlugin) updateJobShare(jobNs, jobName string, attr *drfAttr) { 494 drf.updateShare(attr) 495 metrics.UpdateJobShare(jobNs, jobName, attr.share) 496 } 497 498 func (drf *drfPlugin) updateShare(attr *drfAttr) { 499 attr.dominantResource, attr.share = drf.calculateShare(attr.allocated, drf.totalResource) 500 } 501 502 func (drf *drfPlugin) calculateShare(allocated, totalResource *api.Resource) (string, float64) { 503 res := float64(0) 504 dominantResource := "" 505 for _, rn := range totalResource.ResourceNames() { 506 share := helpers.Share(allocated.Get(rn), totalResource.Get(rn)) 507 if share > res { 508 res = share 509 dominantResource = string(rn) 510 } 511 } 512 513 return dominantResource, res 514 } 515 516 func (drf *drfPlugin) OnSessionClose(session *framework.Session) { 517 // Clean schedule data. 518 drf.totalResource = api.EmptyResource() 519 drf.totalAllocated = api.EmptyResource() 520 drf.jobAttrs = map[api.JobID]*drfAttr{} 521 }