github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/control/podgroup_control.go (about) 1 /* 2 Copyright 2023 The Kubeflow Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package control 18 19 import ( 20 "context" 21 "fmt" 22 23 corev1 "k8s.io/api/core/v1" 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 "k8s.io/apimachinery/pkg/types" 26 "k8s.io/klog/v2" 27 "sigs.k8s.io/controller-runtime/pkg/client" 28 schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1" 29 volcanobatchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 30 volcanov1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 31 volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned" 32 ) 33 34 // PodGroupControlInterface is an interface that knows how to add or delete PodGroups 35 // created as an interface to allow testing. 36 type PodGroupControlInterface interface { 37 // NewEmptyPodGroup returns an empty PodGroup. 38 NewEmptyPodGroup() client.Object 39 // GetPodGroup gets the PodGroup identified by namespace and name. 40 GetPodGroup(namespace string, name string) (metav1.Object, error) 41 // DeletePodGroup deletes the PodGroup identified by namespace and name. 42 DeletePodGroup(namespace string, name string) error 43 // UpdatePodGroup updates a PodGroup. 44 UpdatePodGroup(podGroup client.Object) error 45 // CreatePodGroup creates a new PodGroup with PodGroup spec fill function. 46 CreatePodGroup(podGroup client.Object) error 47 // DelayPodCreationDueToPodGroup determines whether it should delay Pod Creation. 48 DelayPodCreationDueToPodGroup(pg metav1.Object) bool 49 // DecoratePodTemplateSpec decorates PodTemplateSpec. 50 // If the PodTemplateSpec has SchedulerName set, this method will Not override. 51 DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, rtype string) 52 // GetSchedulerName returns the name of the gang scheduler. 53 GetSchedulerName() string 54 } 55 56 // VolcanoControl is the implementation of PodGroupControlInterface with volcano. 57 type VolcanoControl struct { 58 Client volcanoclient.Interface 59 } 60 61 func (v *VolcanoControl) GetSchedulerName() string { 62 return "volcano" 63 } 64 65 func (v *VolcanoControl) DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, rtype string) { 66 if len(pts.Spec.SchedulerName) == 0 { 67 pts.Spec.SchedulerName = v.GetSchedulerName() 68 } 69 if pts.Annotations == nil { 70 pts.Annotations = make(map[string]string) 71 } 72 pts.Annotations[volcanov1beta1.KubeGroupNameAnnotationKey] = job.GetName() 73 pts.Annotations[volcanobatchv1alpha1.TaskSpecKey] = rtype 74 } 75 76 // NewVolcanoControl returns a VolcanoControl 77 func NewVolcanoControl(vci volcanoclient.Interface) PodGroupControlInterface { 78 return &VolcanoControl{Client: vci} 79 } 80 81 func (v *VolcanoControl) DelayPodCreationDueToPodGroup(pg metav1.Object) bool { 82 if pg == nil { 83 return true 84 } 85 volcanoPodGroup := pg.(*volcanov1beta1.PodGroup) 86 return len(volcanoPodGroup.Status.Phase) == 0 || volcanoPodGroup.Status.Phase == volcanov1beta1.PodGroupPending 87 } 88 89 func (v *VolcanoControl) NewEmptyPodGroup() client.Object { 90 return &volcanov1beta1.PodGroup{} 91 } 92 93 func (v *VolcanoControl) GetPodGroup(namespace string, name string) (metav1.Object, error) { 94 pg, err := v.Client.SchedulingV1beta1().PodGroups(namespace).Get(context.TODO(), name, metav1.GetOptions{}) 95 if err != nil { 96 return nil, err 97 } 98 return pg, nil 99 } 100 101 func (v *VolcanoControl) DeletePodGroup(namespace string, name string) error { 102 return v.Client.SchedulingV1beta1().PodGroups(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{}) 103 } 104 105 func (v *VolcanoControl) UpdatePodGroup(podGroup client.Object) error { 106 pg := podGroup.(*volcanov1beta1.PodGroup) 107 _, err := v.Client.SchedulingV1beta1().PodGroups(pg.GetNamespace()).Update(context.TODO(), pg, metav1.UpdateOptions{}) 108 if err != nil { 109 return fmt.Errorf("unable to update a PodGroup, '%v': %v", klog.KObj(pg), err) 110 } 111 return nil 112 } 113 114 func (v *VolcanoControl) CreatePodGroup(podGroup client.Object) error { 115 pg := podGroup.(*volcanov1beta1.PodGroup) 116 _, err := v.Client.SchedulingV1beta1().PodGroups(pg.GetNamespace()).Create(context.TODO(), pg, metav1.CreateOptions{}) 117 if err != nil { 118 return fmt.Errorf("unable to create PodGroup: %v", err) 119 } 120 return nil 121 } 122 123 var _ PodGroupControlInterface = &VolcanoControl{} 124 125 // SchedulerPluginsControl is the implementation of PodGroupControlInterface with scheduler-plugins. 126 type SchedulerPluginsControl struct { 127 Client client.Client 128 SchedulerName string 129 } 130 131 func (s *SchedulerPluginsControl) DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, _ string) { 132 if len(pts.Spec.SchedulerName) == 0 { 133 pts.Spec.SchedulerName = s.GetSchedulerName() 134 } 135 136 if pts.Labels == nil { 137 pts.Labels = make(map[string]string) 138 } 139 pts.Labels[schedulerpluginsv1alpha1.PodGroupLabel] = job.GetName() 140 } 141 142 func (s *SchedulerPluginsControl) GetSchedulerName() string { 143 return s.SchedulerName 144 } 145 146 // NewSchedulerPluginsControl returns a SchedulerPluginsControl 147 func NewSchedulerPluginsControl(c client.Client, schedulerName string) PodGroupControlInterface { 148 return &SchedulerPluginsControl{Client: c, SchedulerName: schedulerName} 149 } 150 151 func (s *SchedulerPluginsControl) DelayPodCreationDueToPodGroup(pg metav1.Object) bool { 152 return false 153 } 154 155 func (s *SchedulerPluginsControl) NewEmptyPodGroup() client.Object { 156 return &schedulerpluginsv1alpha1.PodGroup{} 157 } 158 159 func (s *SchedulerPluginsControl) GetPodGroup(namespace, name string) (metav1.Object, error) { 160 pg := &schedulerpluginsv1alpha1.PodGroup{} 161 ctx := context.TODO() 162 key := types.NamespacedName{ 163 Namespace: namespace, 164 Name: name, 165 } 166 if err := s.Client.Get(ctx, key, pg); err != nil { 167 return nil, err 168 } 169 return pg, nil 170 } 171 172 func (s *SchedulerPluginsControl) DeletePodGroup(namespace, name string) error { 173 ctx := context.TODO() 174 pg := s.NewEmptyPodGroup() 175 pg.SetNamespace(namespace) 176 pg.SetName(name) 177 178 return s.Client.Delete(ctx, pg) 179 } 180 181 func (s *SchedulerPluginsControl) UpdatePodGroup(podGroup client.Object) error { 182 pg := podGroup.(*schedulerpluginsv1alpha1.PodGroup) 183 err := s.Client.Update(context.TODO(), pg, &client.UpdateOptions{}) 184 if err != nil { 185 return fmt.Errorf("unable to update a PodGroup, '%v': %v", klog.KObj(pg), err) 186 } 187 return nil 188 } 189 190 func (s *SchedulerPluginsControl) CreatePodGroup(podGroup client.Object) error { 191 pg := podGroup.(*schedulerpluginsv1alpha1.PodGroup) 192 err := s.Client.Create(context.TODO(), pg, &client.CreateOptions{}) 193 if err != nil { 194 return fmt.Errorf("unable to create a PodGroup, '%v': %v", klog.KObj(pg), err) 195 } 196 return nil 197 } 198 199 var _ PodGroupControlInterface = &SchedulerPluginsControl{}