volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/cdp/cdp.go (about) 1 /* 2 Copyright 2022 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cdp 18 19 import ( 20 "time" 21 22 v1 "k8s.io/api/core/v1" 23 "k8s.io/klog/v2" 24 25 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 26 "volcano.sh/volcano/pkg/scheduler/api" 27 "volcano.sh/volcano/pkg/scheduler/framework" 28 "volcano.sh/volcano/pkg/scheduler/plugins/util" 29 ) 30 31 const ( 32 // refer to issue https://github.com/volcano-sh/volcano/issues/2075, 33 // plugin cdp means cooldown protection, related to elastic scheduler, 34 // when we need to enable elastic training or serving, 35 // preemptible job's pods can be preempted or back to running repeatedly, 36 // if no cooldown protection set, these pods can be preempted again after they just started for a short time, 37 // this may cause service stability dropped. 38 // cdp plugin here is to ensure vcjob's pods cannot be preempted within cooldown protection conditions. 39 // currently cdp plugin only support cooldown time protection. 40 PluginName = "cdp" 41 ) 42 43 type CooldownProtectionPlugin struct { 44 } 45 46 // New return CooldownProtectionPlugin 47 func New(arguments framework.Arguments) framework.Plugin { 48 return &CooldownProtectionPlugin{} 49 } 50 51 // Name implements framework.Plugin 52 func (*CooldownProtectionPlugin) Name() string { 53 return PluginName 54 } 55 56 func (sp *CooldownProtectionPlugin) podCooldownTime(pod *v1.Pod) (value time.Duration, enabled bool) { 57 // check labels and annotations 58 v, ok := pod.Labels[v1beta1.CooldownTime] 59 if !ok { 60 v, ok = pod.Annotations[v1beta1.CooldownTime] 61 if !ok { 62 return 0, false 63 } 64 } 65 vi, err := time.ParseDuration(v) 66 if err != nil { 67 klog.Warningf("invalid time duration %s=%s", v1beta1.CooldownTime, v) 68 return 0, false 69 } 70 return vi, true 71 } 72 73 // OnSessionOpen implements framework.Plugin 74 func (sp *CooldownProtectionPlugin) OnSessionOpen(ssn *framework.Session) { 75 preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) { 76 var victims []*api.TaskInfo 77 for _, preemptee := range preemptees { 78 cooldownTime, enabled := sp.podCooldownTime(preemptee.Pod) 79 if !enabled { 80 victims = append(victims, preemptee) 81 continue 82 } 83 pod := preemptee.Pod 84 // find the time of pod really transform to running 85 // only running pod check stable time, others all put into victims 86 stableFiltered := false 87 if pod.Status.Phase == v1.PodRunning { 88 // ensure pod is running and have ready state 89 for _, c := range pod.Status.Conditions { 90 if c.Type == v1.PodScheduled && c.Status == v1.ConditionTrue { 91 if c.LastTransitionTime.Add(cooldownTime).After(time.Now()) { 92 stableFiltered = true 93 } 94 break 95 } 96 } 97 } 98 if !stableFiltered { 99 victims = append(victims, preemptee) 100 } 101 } 102 103 klog.V(4).Infof("Victims from cdp plugins are %+v", victims) 104 return victims, util.Permit 105 } 106 107 klog.V(4).Info("plugin cdp session open") 108 ssn.AddPreemptableFn(sp.Name(), preemptableFn) 109 } 110 111 // OnSessionClose implements framework.Plugin 112 func (*CooldownProtectionPlugin) OnSessionClose(ssn *framework.Session) {}