volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/proportion/proportion.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package proportion 18 19 import ( 20 "math" 21 "reflect" 22 23 v1 "k8s.io/api/core/v1" 24 "k8s.io/klog/v2" 25 26 "volcano.sh/apis/pkg/apis/scheduling" 27 "volcano.sh/volcano/pkg/scheduler/api" 28 "volcano.sh/volcano/pkg/scheduler/api/helpers" 29 "volcano.sh/volcano/pkg/scheduler/framework" 30 "volcano.sh/volcano/pkg/scheduler/metrics" 31 "volcano.sh/volcano/pkg/scheduler/plugins/util" 32 ) 33 34 // PluginName indicates name of volcano scheduler plugin. 35 const PluginName = "proportion" 36 37 type proportionPlugin struct { 38 totalResource *api.Resource 39 totalGuarantee *api.Resource 40 queueOpts map[api.QueueID]*queueAttr 41 // Arguments given for the plugin 42 pluginArguments framework.Arguments 43 } 44 45 type queueAttr struct { 46 queueID api.QueueID 47 name string 48 weight int32 49 share float64 50 51 deserved *api.Resource 52 allocated *api.Resource 53 request *api.Resource 54 // elastic represents the sum of job's elastic resource, job's elastic = job.allocated - job.minAvailable 55 elastic *api.Resource 56 // inqueue represents the resource request of the inqueue job 57 inqueue *api.Resource 58 capability *api.Resource 59 // realCapability represents the resource limit of the queue, LessEqual capability 60 realCapability *api.Resource 61 guarantee *api.Resource 62 } 63 64 // New return proportion action 65 func New(arguments framework.Arguments) framework.Plugin { 66 return &proportionPlugin{ 67 totalResource: api.EmptyResource(), 68 totalGuarantee: api.EmptyResource(), 69 queueOpts: map[api.QueueID]*queueAttr{}, 70 pluginArguments: arguments, 71 } 72 } 73 74 func (pp *proportionPlugin) Name() string { 75 return PluginName 76 } 77 78 func (pp *proportionPlugin) OnSessionOpen(ssn *framework.Session) { 79 // Prepare scheduling data for this session. 80 pp.totalResource.Add(ssn.TotalResource) 81 82 klog.V(4).Infof("The total resource is <%v>", pp.totalResource) 83 for _, queue := range ssn.Queues { 84 if len(queue.Queue.Spec.Guarantee.Resource) == 0 { 85 continue 86 } 87 guarantee := api.NewResource(queue.Queue.Spec.Guarantee.Resource) 88 pp.totalGuarantee.Add(guarantee) 89 } 90 klog.V(4).Infof("The total guarantee resource is <%v>", pp.totalGuarantee) 91 // Build attributes for Queues. 92 for _, job := range ssn.Jobs { 93 klog.V(4).Infof("Considering Job <%s/%s>.", job.Namespace, job.Name) 94 if _, found := pp.queueOpts[job.Queue]; !found { 95 queue := ssn.Queues[job.Queue] 96 attr := &queueAttr{ 97 queueID: queue.UID, 98 name: queue.Name, 99 weight: queue.Weight, 100 101 deserved: api.EmptyResource(), 102 allocated: api.EmptyResource(), 103 request: api.EmptyResource(), 104 elastic: api.EmptyResource(), 105 inqueue: api.EmptyResource(), 106 guarantee: api.EmptyResource(), 107 } 108 if len(queue.Queue.Spec.Capability) != 0 { 109 attr.capability = api.NewResource(queue.Queue.Spec.Capability) 110 if attr.capability.MilliCPU <= 0 { 111 attr.capability.MilliCPU = math.MaxFloat64 112 } 113 if attr.capability.Memory <= 0 { 114 attr.capability.Memory = math.MaxFloat64 115 } 116 } 117 if len(queue.Queue.Spec.Guarantee.Resource) != 0 { 118 attr.guarantee = api.NewResource(queue.Queue.Spec.Guarantee.Resource) 119 } 120 realCapability := pp.totalResource.Clone().Sub(pp.totalGuarantee).Add(attr.guarantee) 121 if attr.capability == nil { 122 attr.realCapability = realCapability 123 } else { 124 realCapability.MinDimensionResource(attr.capability, api.Infinity) 125 attr.realCapability = realCapability 126 } 127 pp.queueOpts[job.Queue] = attr 128 klog.V(4).Infof("Added Queue <%s> attributes.", job.Queue) 129 } 130 131 attr := pp.queueOpts[job.Queue] 132 for status, tasks := range job.TaskStatusIndex { 133 if api.AllocatedStatus(status) { 134 for _, t := range tasks { 135 attr.allocated.Add(t.Resreq) 136 attr.request.Add(t.Resreq) 137 } 138 } else if status == api.Pending { 139 for _, t := range tasks { 140 attr.request.Add(t.Resreq) 141 } 142 } 143 } 144 145 if job.PodGroup.Status.Phase == scheduling.PodGroupInqueue { 146 attr.inqueue.Add(job.GetMinResources()) 147 } 148 149 // calculate inqueue resource for running jobs 150 // the judgement 'job.PodGroup.Status.Running >= job.PodGroup.Spec.MinMember' will work on cases such as the following condition: 151 // Considering a Spark job is completed(driver pod is completed) while the podgroup keeps running, the allocated resource will be reserved again if without the judgement. 152 if job.PodGroup.Status.Phase == scheduling.PodGroupRunning && 153 job.PodGroup.Spec.MinResources != nil && 154 int32(util.CalculateAllocatedTaskNum(job)) >= job.PodGroup.Spec.MinMember { 155 inqueued := util.GetInqueueResource(job, job.Allocated) 156 attr.inqueue.Add(inqueued) 157 } 158 attr.elastic.Add(job.GetElasticResources()) 159 klog.V(5).Infof("Queue %s allocated <%s> request <%s> inqueue <%s> elastic <%s>", 160 attr.name, attr.allocated.String(), attr.request.String(), attr.inqueue.String(), attr.elastic.String()) 161 } 162 163 // Record metrics 164 for queueID, queueInfo := range ssn.Queues { 165 if attr, ok := pp.queueOpts[queueID]; ok { 166 metrics.UpdateQueueAllocated(attr.name, attr.allocated.MilliCPU, attr.allocated.Memory) 167 metrics.UpdateQueueRequest(attr.name, attr.request.MilliCPU, attr.request.Memory) 168 metrics.UpdateQueueWeight(attr.name, attr.weight) 169 queue := ssn.Queues[attr.queueID] 170 metrics.UpdateQueuePodGroupInqueueCount(attr.name, queue.Queue.Status.Inqueue) 171 metrics.UpdateQueuePodGroupPendingCount(attr.name, queue.Queue.Status.Pending) 172 metrics.UpdateQueuePodGroupRunningCount(attr.name, queue.Queue.Status.Running) 173 metrics.UpdateQueuePodGroupUnknownCount(attr.name, queue.Queue.Status.Unknown) 174 continue 175 } 176 metrics.UpdateQueueAllocated(queueInfo.Name, 0, 0) 177 metrics.UpdateQueueRequest(queueInfo.Name, 0, 0) 178 metrics.UpdateQueuePodGroupInqueueCount(queueInfo.Name, 0) 179 metrics.UpdateQueuePodGroupPendingCount(queueInfo.Name, 0) 180 metrics.UpdateQueuePodGroupRunningCount(queueInfo.Name, 0) 181 metrics.UpdateQueuePodGroupUnknownCount(queueInfo.Name, 0) 182 } 183 184 remaining := pp.totalResource.Clone() 185 meet := map[api.QueueID]struct{}{} 186 for { 187 totalWeight := int32(0) 188 for _, attr := range pp.queueOpts { 189 if _, found := meet[attr.queueID]; found { 190 continue 191 } 192 totalWeight += attr.weight 193 } 194 195 // If no queues, break 196 if totalWeight == 0 { 197 klog.V(4).Infof("Exiting when total weight is 0") 198 break 199 } 200 201 oldRemaining := remaining.Clone() 202 // Calculates the deserved of each Queue. 203 // increasedDeserved is the increased value for attr.deserved of processed queues 204 // decreasedDeserved is the decreased value for attr.deserved of processed queues 205 increasedDeserved := api.EmptyResource() 206 decreasedDeserved := api.EmptyResource() 207 for _, attr := range pp.queueOpts { 208 klog.V(4).Infof("Considering Queue <%s>: weight <%d>, total weight <%d>.", 209 attr.name, attr.weight, totalWeight) 210 if _, found := meet[attr.queueID]; found { 211 continue 212 } 213 214 oldDeserved := attr.deserved.Clone() 215 attr.deserved.Add(remaining.Clone().Multi(float64(attr.weight) / float64(totalWeight))) 216 217 if attr.realCapability != nil { 218 attr.deserved.MinDimensionResource(attr.realCapability, api.Infinity) 219 } 220 attr.deserved.MinDimensionResource(attr.request, api.Zero) 221 222 attr.deserved = helpers.Max(attr.deserved, attr.guarantee) 223 pp.updateShare(attr) 224 klog.V(4).Infof("Format queue <%s> deserved resource to <%v>", attr.name, attr.deserved) 225 226 if attr.request.LessEqual(attr.deserved, api.Zero) { 227 meet[attr.queueID] = struct{}{} 228 klog.V(4).Infof("queue <%s> is meet", attr.name) 229 } else if reflect.DeepEqual(attr.deserved, oldDeserved) { 230 meet[attr.queueID] = struct{}{} 231 klog.V(4).Infof("queue <%s> is meet cause of the capability", attr.name) 232 } 233 234 klog.V(4).Infof("The attributes of queue <%s> in proportion: deserved <%v>, realCapability <%v>, allocate <%v>, request <%v>, elastic <%v>, share <%0.2f>", 235 attr.name, attr.deserved, attr.realCapability, attr.allocated, attr.request, attr.elastic, attr.share) 236 237 increased, decreased := attr.deserved.Diff(oldDeserved, api.Zero) 238 increasedDeserved.Add(increased) 239 decreasedDeserved.Add(decreased) 240 241 // Record metrics 242 metrics.UpdateQueueDeserved(attr.name, attr.deserved.MilliCPU, attr.deserved.Memory) 243 } 244 245 remaining.Sub(increasedDeserved).Add(decreasedDeserved) 246 klog.V(4).Infof("Remaining resource is <%s>", remaining) 247 if remaining.IsEmpty() || reflect.DeepEqual(remaining, oldRemaining) { 248 klog.V(4).Infof("Exiting when remaining is empty or no queue has more resource request: <%v>", remaining) 249 break 250 } 251 } 252 253 ssn.AddQueueOrderFn(pp.Name(), func(l, r interface{}) int { 254 lv := l.(*api.QueueInfo) 255 rv := r.(*api.QueueInfo) 256 257 if pp.queueOpts[lv.UID].share == pp.queueOpts[rv.UID].share { 258 return 0 259 } 260 261 if pp.queueOpts[lv.UID].share < pp.queueOpts[rv.UID].share { 262 return -1 263 } 264 265 return 1 266 }) 267 268 ssn.AddReclaimableFn(pp.Name(), func(reclaimer *api.TaskInfo, reclaimees []*api.TaskInfo) ([]*api.TaskInfo, int) { 269 var victims []*api.TaskInfo 270 allocations := map[api.QueueID]*api.Resource{} 271 272 for _, reclaimee := range reclaimees { 273 job := ssn.Jobs[reclaimee.Job] 274 attr := pp.queueOpts[job.Queue] 275 276 if _, found := allocations[job.Queue]; !found { 277 allocations[job.Queue] = attr.allocated.Clone() 278 } 279 allocated := allocations[job.Queue] 280 if allocated.LessPartly(reclaimer.Resreq, api.Zero) { 281 klog.V(3).Infof("Failed to allocate resource for Task <%s/%s> in Queue <%s>, not enough resource.", 282 reclaimee.Namespace, reclaimee.Name, job.Queue) 283 continue 284 } 285 286 if !allocated.LessEqual(attr.deserved, api.Zero) { 287 allocated.Sub(reclaimee.Resreq) 288 victims = append(victims, reclaimee) 289 } 290 } 291 klog.V(4).Infof("Victims from proportion plugins are %+v", victims) 292 return victims, util.Permit 293 }) 294 295 ssn.AddOverusedFn(pp.Name(), func(obj interface{}) bool { 296 queue := obj.(*api.QueueInfo) 297 attr := pp.queueOpts[queue.UID] 298 299 overused := attr.deserved.LessEqual(attr.allocated, api.Zero) 300 metrics.UpdateQueueOverused(attr.name, overused) 301 if overused { 302 klog.V(3).Infof("Queue <%v>: deserved <%v>, allocated <%v>, share <%v>", 303 queue.Name, attr.deserved, attr.allocated, attr.share) 304 } 305 306 return overused 307 }) 308 309 ssn.AddAllocatableFn(pp.Name(), func(queue *api.QueueInfo, candidate *api.TaskInfo) bool { 310 attr := pp.queueOpts[queue.UID] 311 312 free, _ := attr.deserved.Diff(attr.allocated, api.Zero) 313 allocatable := candidate.Resreq.LessEqual(free, api.Zero) 314 if !allocatable { 315 klog.V(3).Infof("Queue <%v>: deserved <%v>, allocated <%v>; Candidate <%v>: resource request <%v>", 316 queue.Name, attr.deserved, attr.allocated, candidate.Name, candidate.Resreq) 317 } 318 319 return allocatable 320 }) 321 322 ssn.AddJobEnqueueableFn(pp.Name(), func(obj interface{}) int { 323 job := obj.(*api.JobInfo) 324 queueID := job.Queue 325 attr := pp.queueOpts[queueID] 326 queue := ssn.Queues[queueID] 327 // If no capability is set, always enqueue the job. 328 if attr.realCapability == nil { 329 klog.V(4).Infof("Capability of queue <%s> was not set, allow job <%s/%s> to Inqueue.", 330 queue.Name, job.Namespace, job.Name) 331 return util.Permit 332 } 333 334 if job.PodGroup.Spec.MinResources == nil { 335 klog.V(4).Infof("job %s MinResources is null.", job.Name) 336 return util.Permit 337 } 338 minReq := job.GetMinResources() 339 340 klog.V(5).Infof("job %s min resource <%s>, queue %s capability <%s> allocated <%s> inqueue <%s> elastic <%s>", 341 job.Name, minReq.String(), queue.Name, attr.realCapability.String(), attr.allocated.String(), attr.inqueue.String(), attr.elastic.String()) 342 // The queue resource quota limit has not reached 343 r := minReq.Add(attr.allocated).Add(attr.inqueue).Sub(attr.elastic) 344 rr := attr.realCapability.Clone() 345 346 for name := range rr.ScalarResources { 347 if _, ok := r.ScalarResources[name]; !ok { 348 delete(rr.ScalarResources, name) 349 } 350 } 351 352 inqueue := r.LessEqual(rr, api.Infinity) 353 klog.V(5).Infof("job %s inqueue %v", job.Name, inqueue) 354 if inqueue { 355 attr.inqueue.Add(job.GetMinResources()) 356 return util.Permit 357 } 358 ssn.RecordPodGroupEvent(job.PodGroup, v1.EventTypeNormal, string(scheduling.PodGroupUnschedulableType), "queue resource quota insufficient") 359 return util.Reject 360 }) 361 362 // Register event handlers. 363 ssn.AddEventHandler(&framework.EventHandler{ 364 AllocateFunc: func(event *framework.Event) { 365 job := ssn.Jobs[event.Task.Job] 366 attr := pp.queueOpts[job.Queue] 367 attr.allocated.Add(event.Task.Resreq) 368 metrics.UpdateQueueAllocated(attr.name, attr.allocated.MilliCPU, attr.allocated.Memory) 369 370 pp.updateShare(attr) 371 372 klog.V(4).Infof("Proportion AllocateFunc: task <%v/%v>, resreq <%v>, share <%v>", 373 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share) 374 }, 375 DeallocateFunc: func(event *framework.Event) { 376 job := ssn.Jobs[event.Task.Job] 377 attr := pp.queueOpts[job.Queue] 378 attr.allocated.Sub(event.Task.Resreq) 379 metrics.UpdateQueueAllocated(attr.name, attr.allocated.MilliCPU, attr.allocated.Memory) 380 381 pp.updateShare(attr) 382 383 klog.V(4).Infof("Proportion EvictFunc: task <%v/%v>, resreq <%v>, share <%v>", 384 event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share) 385 }, 386 }) 387 } 388 389 func (pp *proportionPlugin) OnSessionClose(ssn *framework.Session) { 390 pp.totalResource = nil 391 pp.totalGuarantee = nil 392 pp.queueOpts = nil 393 } 394 395 func (pp *proportionPlugin) updateShare(attr *queueAttr) { 396 res := float64(0) 397 398 // TODO(k82cn): how to handle fragment issues? 399 for _, rn := range attr.deserved.ResourceNames() { 400 share := helpers.Share(attr.allocated.Get(rn), attr.deserved.Get(rn)) 401 if share > res { 402 res = share 403 } 404 } 405 406 attr.share = res 407 metrics.UpdateQueueShare(attr.name, attr.share) 408 }