github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/control/podgroup_control.go (about)

     1  /*
     2  Copyright 2023 The Kubeflow Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package control
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	corev1 "k8s.io/api/core/v1"
    24  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    25  	"k8s.io/apimachinery/pkg/types"
    26  	"k8s.io/klog/v2"
    27  	"sigs.k8s.io/controller-runtime/pkg/client"
    28  	schedulerpluginsv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
    29  	volcanobatchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1"
    30  	volcanov1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    31  	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
    32  )
    33  
    34  // PodGroupControlInterface is an interface that knows how to add or delete PodGroups
    35  // created as an interface to allow testing.
    36  type PodGroupControlInterface interface {
    37  	// NewEmptyPodGroup returns an empty PodGroup.
    38  	NewEmptyPodGroup() client.Object
    39  	// GetPodGroup gets the PodGroup identified by namespace and name.
    40  	GetPodGroup(namespace string, name string) (metav1.Object, error)
    41  	// DeletePodGroup deletes the PodGroup identified by namespace and name.
    42  	DeletePodGroup(namespace string, name string) error
    43  	// UpdatePodGroup updates a PodGroup.
    44  	UpdatePodGroup(podGroup client.Object) error
    45  	// CreatePodGroup creates a new PodGroup with PodGroup spec fill function.
    46  	CreatePodGroup(podGroup client.Object) error
    47  	// DelayPodCreationDueToPodGroup determines whether it should delay Pod Creation.
    48  	DelayPodCreationDueToPodGroup(pg metav1.Object) bool
    49  	// DecoratePodTemplateSpec decorates PodTemplateSpec.
    50  	// If the PodTemplateSpec has SchedulerName set, this method will Not override.
    51  	DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, rtype string)
    52  	// GetSchedulerName returns the name of the gang scheduler.
    53  	GetSchedulerName() string
    54  }
    55  
    56  // VolcanoControl is the implementation of PodGroupControlInterface with volcano.
    57  type VolcanoControl struct {
    58  	Client volcanoclient.Interface
    59  }
    60  
    61  func (v *VolcanoControl) GetSchedulerName() string {
    62  	return "volcano"
    63  }
    64  
    65  func (v *VolcanoControl) DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, rtype string) {
    66  	if len(pts.Spec.SchedulerName) == 0 {
    67  		pts.Spec.SchedulerName = v.GetSchedulerName()
    68  	}
    69  	if pts.Annotations == nil {
    70  		pts.Annotations = make(map[string]string)
    71  	}
    72  	pts.Annotations[volcanov1beta1.KubeGroupNameAnnotationKey] = job.GetName()
    73  	pts.Annotations[volcanobatchv1alpha1.TaskSpecKey] = rtype
    74  }
    75  
    76  // NewVolcanoControl returns a VolcanoControl
    77  func NewVolcanoControl(vci volcanoclient.Interface) PodGroupControlInterface {
    78  	return &VolcanoControl{Client: vci}
    79  }
    80  
    81  func (v *VolcanoControl) DelayPodCreationDueToPodGroup(pg metav1.Object) bool {
    82  	if pg == nil {
    83  		return true
    84  	}
    85  	volcanoPodGroup := pg.(*volcanov1beta1.PodGroup)
    86  	return len(volcanoPodGroup.Status.Phase) == 0 || volcanoPodGroup.Status.Phase == volcanov1beta1.PodGroupPending
    87  }
    88  
    89  func (v *VolcanoControl) NewEmptyPodGroup() client.Object {
    90  	return &volcanov1beta1.PodGroup{}
    91  }
    92  
    93  func (v *VolcanoControl) GetPodGroup(namespace string, name string) (metav1.Object, error) {
    94  	pg, err := v.Client.SchedulingV1beta1().PodGroups(namespace).Get(context.TODO(), name, metav1.GetOptions{})
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	return pg, nil
    99  }
   100  
   101  func (v *VolcanoControl) DeletePodGroup(namespace string, name string) error {
   102  	return v.Client.SchedulingV1beta1().PodGroups(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{})
   103  }
   104  
   105  func (v *VolcanoControl) UpdatePodGroup(podGroup client.Object) error {
   106  	pg := podGroup.(*volcanov1beta1.PodGroup)
   107  	_, err := v.Client.SchedulingV1beta1().PodGroups(pg.GetNamespace()).Update(context.TODO(), pg, metav1.UpdateOptions{})
   108  	if err != nil {
   109  		return fmt.Errorf("unable to update a PodGroup, '%v': %v", klog.KObj(pg), err)
   110  	}
   111  	return nil
   112  }
   113  
   114  func (v *VolcanoControl) CreatePodGroup(podGroup client.Object) error {
   115  	pg := podGroup.(*volcanov1beta1.PodGroup)
   116  	_, err := v.Client.SchedulingV1beta1().PodGroups(pg.GetNamespace()).Create(context.TODO(), pg, metav1.CreateOptions{})
   117  	if err != nil {
   118  		return fmt.Errorf("unable to create PodGroup: %v", err)
   119  	}
   120  	return nil
   121  }
   122  
   123  var _ PodGroupControlInterface = &VolcanoControl{}
   124  
   125  // SchedulerPluginsControl is the  implementation of PodGroupControlInterface with scheduler-plugins.
   126  type SchedulerPluginsControl struct {
   127  	Client        client.Client
   128  	SchedulerName string
   129  }
   130  
   131  func (s *SchedulerPluginsControl) DecoratePodTemplateSpec(pts *corev1.PodTemplateSpec, job metav1.Object, _ string) {
   132  	if len(pts.Spec.SchedulerName) == 0 {
   133  		pts.Spec.SchedulerName = s.GetSchedulerName()
   134  	}
   135  
   136  	if pts.Labels == nil {
   137  		pts.Labels = make(map[string]string)
   138  	}
   139  	pts.Labels[schedulerpluginsv1alpha1.PodGroupLabel] = job.GetName()
   140  }
   141  
   142  func (s *SchedulerPluginsControl) GetSchedulerName() string {
   143  	return s.SchedulerName
   144  }
   145  
   146  // NewSchedulerPluginsControl returns a SchedulerPluginsControl
   147  func NewSchedulerPluginsControl(c client.Client, schedulerName string) PodGroupControlInterface {
   148  	return &SchedulerPluginsControl{Client: c, SchedulerName: schedulerName}
   149  }
   150  
   151  func (s *SchedulerPluginsControl) DelayPodCreationDueToPodGroup(pg metav1.Object) bool {
   152  	return false
   153  }
   154  
   155  func (s *SchedulerPluginsControl) NewEmptyPodGroup() client.Object {
   156  	return &schedulerpluginsv1alpha1.PodGroup{}
   157  }
   158  
   159  func (s *SchedulerPluginsControl) GetPodGroup(namespace, name string) (metav1.Object, error) {
   160  	pg := &schedulerpluginsv1alpha1.PodGroup{}
   161  	ctx := context.TODO()
   162  	key := types.NamespacedName{
   163  		Namespace: namespace,
   164  		Name:      name,
   165  	}
   166  	if err := s.Client.Get(ctx, key, pg); err != nil {
   167  		return nil, err
   168  	}
   169  	return pg, nil
   170  }
   171  
   172  func (s *SchedulerPluginsControl) DeletePodGroup(namespace, name string) error {
   173  	ctx := context.TODO()
   174  	pg := s.NewEmptyPodGroup()
   175  	pg.SetNamespace(namespace)
   176  	pg.SetName(name)
   177  
   178  	return s.Client.Delete(ctx, pg)
   179  }
   180  
   181  func (s *SchedulerPluginsControl) UpdatePodGroup(podGroup client.Object) error {
   182  	pg := podGroup.(*schedulerpluginsv1alpha1.PodGroup)
   183  	err := s.Client.Update(context.TODO(), pg, &client.UpdateOptions{})
   184  	if err != nil {
   185  		return fmt.Errorf("unable to update a PodGroup, '%v': %v", klog.KObj(pg), err)
   186  	}
   187  	return nil
   188  }
   189  
   190  func (s *SchedulerPluginsControl) CreatePodGroup(podGroup client.Object) error {
   191  	pg := podGroup.(*schedulerpluginsv1alpha1.PodGroup)
   192  	err := s.Client.Create(context.TODO(), pg, &client.CreateOptions{})
   193  	if err != nil {
   194  		return fmt.Errorf("unable to create a PodGroup, '%v': %v", klog.KObj(pg), err)
   195  	}
   196  	return nil
   197  }
   198  
   199  var _ PodGroupControlInterface = &SchedulerPluginsControl{}