volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/cdp/cdp.go (about)

     1  /*
     2  Copyright 2022 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cdp
    18  
    19  import (
    20  	"time"
    21  
    22  	v1 "k8s.io/api/core/v1"
    23  	"k8s.io/klog/v2"
    24  
    25  	"volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    26  	"volcano.sh/volcano/pkg/scheduler/api"
    27  	"volcano.sh/volcano/pkg/scheduler/framework"
    28  	"volcano.sh/volcano/pkg/scheduler/plugins/util"
    29  )
    30  
    31  const (
    32  	// refer to issue https://github.com/volcano-sh/volcano/issues/2075,
    33  	// plugin cdp means cooldown protection, related to elastic scheduler,
    34  	// when we need to enable elastic training or serving,
    35  	// preemptible job's pods can be preempted or back to running repeatedly,
    36  	// if no cooldown protection set, these pods can be preempted again after they just started for a short time,
    37  	// this may cause service stability dropped.
    38  	// cdp plugin here is to ensure vcjob's pods cannot be preempted within cooldown protection conditions.
    39  	// currently cdp plugin only support cooldown time protection.
    40  	PluginName = "cdp"
    41  )
    42  
    43  type CooldownProtectionPlugin struct {
    44  }
    45  
    46  // New return CooldownProtectionPlugin
    47  func New(arguments framework.Arguments) framework.Plugin {
    48  	return &CooldownProtectionPlugin{}
    49  }
    50  
    51  // Name implements framework.Plugin
    52  func (*CooldownProtectionPlugin) Name() string {
    53  	return PluginName
    54  }
    55  
    56  func (sp *CooldownProtectionPlugin) podCooldownTime(pod *v1.Pod) (value time.Duration, enabled bool) {
    57  	// check labels and annotations
    58  	v, ok := pod.Labels[v1beta1.CooldownTime]
    59  	if !ok {
    60  		v, ok = pod.Annotations[v1beta1.CooldownTime]
    61  		if !ok {
    62  			return 0, false
    63  		}
    64  	}
    65  	vi, err := time.ParseDuration(v)
    66  	if err != nil {
    67  		klog.Warningf("invalid time duration %s=%s", v1beta1.CooldownTime, v)
    68  		return 0, false
    69  	}
    70  	return vi, true
    71  }
    72  
    73  // OnSessionOpen implements framework.Plugin
    74  func (sp *CooldownProtectionPlugin) OnSessionOpen(ssn *framework.Session) {
    75  	preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) ([]*api.TaskInfo, int) {
    76  		var victims []*api.TaskInfo
    77  		for _, preemptee := range preemptees {
    78  			cooldownTime, enabled := sp.podCooldownTime(preemptee.Pod)
    79  			if !enabled {
    80  				victims = append(victims, preemptee)
    81  				continue
    82  			}
    83  			pod := preemptee.Pod
    84  			// find the time of pod really transform to running
    85  			// only running pod check stable time, others all put into victims
    86  			stableFiltered := false
    87  			if pod.Status.Phase == v1.PodRunning {
    88  				// ensure pod is running and have ready state
    89  				for _, c := range pod.Status.Conditions {
    90  					if c.Type == v1.PodScheduled && c.Status == v1.ConditionTrue {
    91  						if c.LastTransitionTime.Add(cooldownTime).After(time.Now()) {
    92  							stableFiltered = true
    93  						}
    94  						break
    95  					}
    96  				}
    97  			}
    98  			if !stableFiltered {
    99  				victims = append(victims, preemptee)
   100  			}
   101  		}
   102  
   103  		klog.V(4).Infof("Victims from cdp plugins are %+v", victims)
   104  		return victims, util.Permit
   105  	}
   106  
   107  	klog.V(4).Info("plugin cdp session open")
   108  	ssn.AddPreemptableFn(sp.Name(), preemptableFn)
   109  }
   110  
   111  // OnSessionClose implements framework.Plugin
   112  func (*CooldownProtectionPlugin) OnSessionClose(ssn *framework.Session) {}