github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/common/job_controller.go (about)

     1  /*
     2  Copyright 2023 The Kubeflow Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	"strings"
    21  
    22  	apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
    23  	"github.com/kubeflow/training-operator/pkg/common"
    24  	"github.com/kubeflow/training-operator/pkg/controller.v1/control"
    25  	"github.com/kubeflow/training-operator/pkg/controller.v1/expectation"
    26  
    27  	"github.com/prometheus/client_golang/prometheus"
    28  	"github.com/prometheus/client_golang/prometheus/promauto"
    29  	log "github.com/sirupsen/logrus"
    30  	corev1 "k8s.io/api/core/v1"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	kubeinformers "k8s.io/client-go/informers"
    33  	kubeclientset "k8s.io/client-go/kubernetes"
    34  	"k8s.io/client-go/kubernetes/scheme"
    35  	typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
    36  	corelisters "k8s.io/client-go/listers/core/v1"
    37  	schedulinglisters "k8s.io/client-go/listers/scheduling/v1"
    38  	"k8s.io/client-go/tools/cache"
    39  	"k8s.io/client-go/tools/record"
    40  	"k8s.io/client-go/util/workqueue"
    41  	"sigs.k8s.io/controller-runtime/pkg/client"
    42  	volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
    43  )
    44  
    45  var (
    46  	// KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc.
    47  	// IndexerInformer uses a delta queue, therefore for deletes we have to use this
    48  	// key function but it should be just fine for non delete events.
    49  	KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
    50  
    51  	// Prometheus metrics
    52  	createdPDBCount = promauto.NewCounter(prometheus.CounterOpts{
    53  		Name: "created_pod_disruption_policies_total",
    54  		Help: "The total number of created pod disruption policies",
    55  	})
    56  	deletedPDBCount = promauto.NewCounter(prometheus.CounterOpts{
    57  		Name: "deleted_pod_disruption_policies_total",
    58  		Help: "The total number of deleted pod disruption policies",
    59  	})
    60  	createdPodGroupsCount = promauto.NewCounter(prometheus.CounterOpts{
    61  		Name: "created_pod_groups_total",
    62  		Help: "The total number of created pod groups",
    63  	})
    64  	deletedPodGroupsCount = promauto.NewCounter(prometheus.CounterOpts{
    65  		Name: "deleted_pod_groups_total",
    66  		Help: "The total number of deleted pod groups",
    67  	})
    68  )
    69  
    70  type GangScheduler string
    71  
    72  const (
    73  	GangSchedulerNone    GangScheduler = "None"
    74  	GangSchedulerVolcano GangScheduler = "volcano"
    75  	// GangSchedulerSchedulerPlugins Using this scheduler name or any scheduler name different than volcano uses the scheduler-plugins PodGroup
    76  	GangSchedulerSchedulerPlugins GangScheduler = "scheduler-plugins"
    77  )
    78  
    79  // JobControllerConfiguration contains configuration of operator.
    80  type JobControllerConfiguration struct {
    81  	// GangScheduling choice: None, volcano and scheduler-plugins
    82  	GangScheduling GangScheduler
    83  }
    84  
    85  func (c *JobControllerConfiguration) EnableGangScheduling() bool {
    86  	return c.GangScheduling != "" && c.GangScheduling != GangSchedulerNone
    87  }
    88  
    89  // JobController abstracts other operators to manage the lifecycle of Jobs.
    90  // User need to first implement the ControllerInterface(objectA) and then initialize a JobController(objectB) struct with objectA
    91  // as the parameter.
    92  // And then call objectB.ReconcileJobs as mentioned below, the ReconcileJobs method is the entrypoint to trigger the
    93  // reconcile logic of the job controller
    94  //
    95  // ReconcileJobs(
    96  //
    97  //	job interface{},
    98  //	replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec,
    99  //	jobStatus apiv1.JobStatus,
   100  //	runPolicy *apiv1.RunPolicy) error
   101  type JobController struct {
   102  	Controller common.ControllerInterface
   103  
   104  	Config JobControllerConfiguration
   105  
   106  	// PodControl is used to add or delete pods.
   107  	PodControl control.PodControlInterface
   108  
   109  	// ServiceControl is used to add or delete services.
   110  	ServiceControl control.ServiceControlInterface
   111  
   112  	// KubeClientSet is a standard kubernetes clientset.
   113  	KubeClientSet kubeclientset.Interface
   114  
   115  	// PodGroupControl is used to add or delete PodGroup.
   116  	PodGroupControl control.PodGroupControlInterface
   117  
   118  	// PodLister can list/get pods from the shared informer's store.
   119  	PodLister corelisters.PodLister
   120  
   121  	// ServiceLister can list/get services from the shared informer's store.
   122  	ServiceLister corelisters.ServiceLister
   123  
   124  	// PriorityClassLister can list/get priorityClasses from the shared informer's store.
   125  	PriorityClassLister schedulinglisters.PriorityClassLister
   126  
   127  	// PodInformerSynced returns true if the pod store has been synced at least once.
   128  	PodInformerSynced cache.InformerSynced
   129  
   130  	// ServiceInformerSynced returns true if the service store has been synced at least once.
   131  	ServiceInformerSynced cache.InformerSynced
   132  
   133  	// PriorityClassInformerSynced returns true if the priority class store has been synced at least once.
   134  	PriorityClassInformerSynced cache.InformerSynced
   135  
   136  	// A TTLCache of pod/services creates/deletes each job expects to see
   137  	// We use Job namespace/name + ReplicaType + pods/services as an expectation key,
   138  	// For example, there is a TFJob with namespace "tf-operator" and name "tfjob-abc":
   139  	// {
   140  	//     "PS": {
   141  	//         "Replicas": 2,
   142  	//     },
   143  	//     "Worker": {
   144  	//         "Replicas": 4,
   145  	//     }
   146  	// }
   147  	// We will create 4 expectations:
   148  	// - "tf-operator/tfjob-abc/ps/services", expects 2 adds.
   149  	// - "tf-operator/tfjob-abc/ps/pods", expects 2 adds.
   150  	// - "tf-operator/tfjob-abc/worker/services", expects 4 adds.
   151  	// - "tf-operator/tfjob-abc/worker/pods", expects 4 adds.
   152  	Expectations expectation.ControllerExpectationsInterface
   153  
   154  	// WorkQueue is a rate limited work queue. This is used to queue work to be
   155  	// processed instead of performing it as soon as a change happens. This
   156  	// means we can ensure we only process a fixed amount of resources at a
   157  	// time, and makes it easy to ensure we are never processing the same item
   158  	// simultaneously in two different workers.
   159  	WorkQueue workqueue.RateLimitingInterface
   160  
   161  	// Recorder is an event recorder for recording Event resources to the
   162  	// Kubernetes API.
   163  	Recorder record.EventRecorder
   164  }
   165  
   166  type GangSchedulingSetupFunc func(jc *JobController)
   167  
   168  var GenVolcanoSetupFunc = func(vci volcanoclient.Interface) GangSchedulingSetupFunc {
   169  	return func(jc *JobController) {
   170  		jc.Config.GangScheduling = GangSchedulerVolcano
   171  		jc.PodGroupControl = control.NewVolcanoControl(vci)
   172  	}
   173  }
   174  
   175  var GenSchedulerPluginsSetupFunc = func(c client.Client, gangSchedulerName string) GangSchedulingSetupFunc {
   176  	return func(jc *JobController) {
   177  		jc.Config.GangScheduling = GangScheduler(gangSchedulerName)
   178  		jc.PodGroupControl = control.NewSchedulerPluginsControl(c, gangSchedulerName)
   179  	}
   180  }
   181  
   182  var GenNonGangSchedulerSetupFunc = func() GangSchedulingSetupFunc {
   183  	return func(jc *JobController) {
   184  		jc.Config.GangScheduling = ""
   185  		jc.PodGroupControl = nil
   186  	}
   187  }
   188  
   189  func NewJobController(
   190  	controllerImpl common.ControllerInterface,
   191  	reconcilerSyncPeriod metav1.Duration,
   192  	kubeClientSet kubeclientset.Interface,
   193  	setupPodGroup GangSchedulingSetupFunc,
   194  	kubeInformerFactory kubeinformers.SharedInformerFactory,
   195  	workQueueName string) JobController {
   196  
   197  	log.Debug("Creating event broadcaster")
   198  	eventBroadcaster := record.NewBroadcaster()
   199  	eventBroadcaster.StartLogging(log.Infof)
   200  	eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClientSet.CoreV1().Events("")})
   201  	recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()})
   202  
   203  	podControl := control.RealPodControl{
   204  		KubeClient: kubeClientSet,
   205  		Recorder:   eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}),
   206  	}
   207  
   208  	serviceControl := control.RealServiceControl{
   209  		KubeClient: kubeClientSet,
   210  		Recorder:   eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}),
   211  	}
   212  
   213  	jc := JobController{
   214  		Controller:     controllerImpl,
   215  		Config:         JobControllerConfiguration{},
   216  		PodControl:     podControl,
   217  		ServiceControl: serviceControl,
   218  		KubeClientSet:  kubeClientSet,
   219  		Expectations:   expectation.NewControllerExpectations(),
   220  		WorkQueue:      workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), workQueueName),
   221  		Recorder:       recorder,
   222  	}
   223  
   224  	setupPodGroup(&jc)
   225  
   226  	return jc
   227  
   228  }
   229  
   230  func (jc *JobController) GenOwnerReference(obj metav1.Object) *metav1.OwnerReference {
   231  	boolPtr := func(b bool) *bool { return &b }
   232  	controllerRef := &metav1.OwnerReference{
   233  		APIVersion:         jc.Controller.GetAPIGroupVersion().String(),
   234  		Kind:               jc.Controller.GetAPIGroupVersionKind().Kind,
   235  		Name:               obj.GetName(),
   236  		UID:                obj.GetUID(),
   237  		BlockOwnerDeletion: boolPtr(true),
   238  		Controller:         boolPtr(true),
   239  	}
   240  
   241  	return controllerRef
   242  }
   243  
   244  func (jc *JobController) GenLabels(jobName string) map[string]string {
   245  	jobName = strings.Replace(jobName, "/", "-", -1)
   246  	return map[string]string{
   247  		apiv1.OperatorNameLabel: jc.Controller.ControllerName(),
   248  		apiv1.JobNameLabel:      jobName,
   249  	}
   250  }
   251  
   252  // resolveControllerRef returns the job referenced by a ControllerRef,
   253  // or nil if the ControllerRef could not be resolved to a matching job
   254  // of the correct Kind.
   255  func (jc *JobController) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) metav1.Object {
   256  	// We can't look up by UID, so look up by Name and then verify UID.
   257  	// Don't even try to look up by Name if it's the wrong Kind.
   258  	if controllerRef.Kind != jc.Controller.GetAPIGroupVersionKind().Kind {
   259  		return nil
   260  	}
   261  	job, err := jc.Controller.GetJobFromInformerCache(namespace, controllerRef.Name)
   262  	if err != nil {
   263  		return nil
   264  	}
   265  	if job.GetUID() != controllerRef.UID {
   266  		// The controller we found with this Name is not the same one that the
   267  		// ControllerRef points to.
   268  		return nil
   269  	}
   270  	return job
   271  }