github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/common/job_controller.go (about) 1 /* 2 Copyright 2023 The Kubeflow Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "strings" 21 22 apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" 23 "github.com/kubeflow/training-operator/pkg/common" 24 "github.com/kubeflow/training-operator/pkg/controller.v1/control" 25 "github.com/kubeflow/training-operator/pkg/controller.v1/expectation" 26 27 "github.com/prometheus/client_golang/prometheus" 28 "github.com/prometheus/client_golang/prometheus/promauto" 29 log "github.com/sirupsen/logrus" 30 corev1 "k8s.io/api/core/v1" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 kubeinformers "k8s.io/client-go/informers" 33 kubeclientset "k8s.io/client-go/kubernetes" 34 "k8s.io/client-go/kubernetes/scheme" 35 typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" 36 corelisters "k8s.io/client-go/listers/core/v1" 37 schedulinglisters "k8s.io/client-go/listers/scheduling/v1" 38 "k8s.io/client-go/tools/cache" 39 "k8s.io/client-go/tools/record" 40 "k8s.io/client-go/util/workqueue" 41 "sigs.k8s.io/controller-runtime/pkg/client" 42 volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned" 43 ) 44 45 var ( 46 // KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc. 47 // IndexerInformer uses a delta queue, therefore for deletes we have to use this 48 // key function but it should be just fine for non delete events. 49 KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc 50 51 // Prometheus metrics 52 createdPDBCount = promauto.NewCounter(prometheus.CounterOpts{ 53 Name: "created_pod_disruption_policies_total", 54 Help: "The total number of created pod disruption policies", 55 }) 56 deletedPDBCount = promauto.NewCounter(prometheus.CounterOpts{ 57 Name: "deleted_pod_disruption_policies_total", 58 Help: "The total number of deleted pod disruption policies", 59 }) 60 createdPodGroupsCount = promauto.NewCounter(prometheus.CounterOpts{ 61 Name: "created_pod_groups_total", 62 Help: "The total number of created pod groups", 63 }) 64 deletedPodGroupsCount = promauto.NewCounter(prometheus.CounterOpts{ 65 Name: "deleted_pod_groups_total", 66 Help: "The total number of deleted pod groups", 67 }) 68 ) 69 70 type GangScheduler string 71 72 const ( 73 GangSchedulerNone GangScheduler = "None" 74 GangSchedulerVolcano GangScheduler = "volcano" 75 // GangSchedulerSchedulerPlugins Using this scheduler name or any scheduler name different than volcano uses the scheduler-plugins PodGroup 76 GangSchedulerSchedulerPlugins GangScheduler = "scheduler-plugins" 77 ) 78 79 // JobControllerConfiguration contains configuration of operator. 80 type JobControllerConfiguration struct { 81 // GangScheduling choice: None, volcano and scheduler-plugins 82 GangScheduling GangScheduler 83 } 84 85 func (c *JobControllerConfiguration) EnableGangScheduling() bool { 86 return c.GangScheduling != "" && c.GangScheduling != GangSchedulerNone 87 } 88 89 // JobController abstracts other operators to manage the lifecycle of Jobs. 90 // User need to first implement the ControllerInterface(objectA) and then initialize a JobController(objectB) struct with objectA 91 // as the parameter. 92 // And then call objectB.ReconcileJobs as mentioned below, the ReconcileJobs method is the entrypoint to trigger the 93 // reconcile logic of the job controller 94 // 95 // ReconcileJobs( 96 // 97 // job interface{}, 98 // replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, 99 // jobStatus apiv1.JobStatus, 100 // runPolicy *apiv1.RunPolicy) error 101 type JobController struct { 102 Controller common.ControllerInterface 103 104 Config JobControllerConfiguration 105 106 // PodControl is used to add or delete pods. 107 PodControl control.PodControlInterface 108 109 // ServiceControl is used to add or delete services. 110 ServiceControl control.ServiceControlInterface 111 112 // KubeClientSet is a standard kubernetes clientset. 113 KubeClientSet kubeclientset.Interface 114 115 // PodGroupControl is used to add or delete PodGroup. 116 PodGroupControl control.PodGroupControlInterface 117 118 // PodLister can list/get pods from the shared informer's store. 119 PodLister corelisters.PodLister 120 121 // ServiceLister can list/get services from the shared informer's store. 122 ServiceLister corelisters.ServiceLister 123 124 // PriorityClassLister can list/get priorityClasses from the shared informer's store. 125 PriorityClassLister schedulinglisters.PriorityClassLister 126 127 // PodInformerSynced returns true if the pod store has been synced at least once. 128 PodInformerSynced cache.InformerSynced 129 130 // ServiceInformerSynced returns true if the service store has been synced at least once. 131 ServiceInformerSynced cache.InformerSynced 132 133 // PriorityClassInformerSynced returns true if the priority class store has been synced at least once. 134 PriorityClassInformerSynced cache.InformerSynced 135 136 // A TTLCache of pod/services creates/deletes each job expects to see 137 // We use Job namespace/name + ReplicaType + pods/services as an expectation key, 138 // For example, there is a TFJob with namespace "tf-operator" and name "tfjob-abc": 139 // { 140 // "PS": { 141 // "Replicas": 2, 142 // }, 143 // "Worker": { 144 // "Replicas": 4, 145 // } 146 // } 147 // We will create 4 expectations: 148 // - "tf-operator/tfjob-abc/ps/services", expects 2 adds. 149 // - "tf-operator/tfjob-abc/ps/pods", expects 2 adds. 150 // - "tf-operator/tfjob-abc/worker/services", expects 4 adds. 151 // - "tf-operator/tfjob-abc/worker/pods", expects 4 adds. 152 Expectations expectation.ControllerExpectationsInterface 153 154 // WorkQueue is a rate limited work queue. This is used to queue work to be 155 // processed instead of performing it as soon as a change happens. This 156 // means we can ensure we only process a fixed amount of resources at a 157 // time, and makes it easy to ensure we are never processing the same item 158 // simultaneously in two different workers. 159 WorkQueue workqueue.RateLimitingInterface 160 161 // Recorder is an event recorder for recording Event resources to the 162 // Kubernetes API. 163 Recorder record.EventRecorder 164 } 165 166 type GangSchedulingSetupFunc func(jc *JobController) 167 168 var GenVolcanoSetupFunc = func(vci volcanoclient.Interface) GangSchedulingSetupFunc { 169 return func(jc *JobController) { 170 jc.Config.GangScheduling = GangSchedulerVolcano 171 jc.PodGroupControl = control.NewVolcanoControl(vci) 172 } 173 } 174 175 var GenSchedulerPluginsSetupFunc = func(c client.Client, gangSchedulerName string) GangSchedulingSetupFunc { 176 return func(jc *JobController) { 177 jc.Config.GangScheduling = GangScheduler(gangSchedulerName) 178 jc.PodGroupControl = control.NewSchedulerPluginsControl(c, gangSchedulerName) 179 } 180 } 181 182 var GenNonGangSchedulerSetupFunc = func() GangSchedulingSetupFunc { 183 return func(jc *JobController) { 184 jc.Config.GangScheduling = "" 185 jc.PodGroupControl = nil 186 } 187 } 188 189 func NewJobController( 190 controllerImpl common.ControllerInterface, 191 reconcilerSyncPeriod metav1.Duration, 192 kubeClientSet kubeclientset.Interface, 193 setupPodGroup GangSchedulingSetupFunc, 194 kubeInformerFactory kubeinformers.SharedInformerFactory, 195 workQueueName string) JobController { 196 197 log.Debug("Creating event broadcaster") 198 eventBroadcaster := record.NewBroadcaster() 199 eventBroadcaster.StartLogging(log.Infof) 200 eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClientSet.CoreV1().Events("")}) 201 recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}) 202 203 podControl := control.RealPodControl{ 204 KubeClient: kubeClientSet, 205 Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}), 206 } 207 208 serviceControl := control.RealServiceControl{ 209 KubeClient: kubeClientSet, 210 Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerImpl.ControllerName()}), 211 } 212 213 jc := JobController{ 214 Controller: controllerImpl, 215 Config: JobControllerConfiguration{}, 216 PodControl: podControl, 217 ServiceControl: serviceControl, 218 KubeClientSet: kubeClientSet, 219 Expectations: expectation.NewControllerExpectations(), 220 WorkQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), workQueueName), 221 Recorder: recorder, 222 } 223 224 setupPodGroup(&jc) 225 226 return jc 227 228 } 229 230 func (jc *JobController) GenOwnerReference(obj metav1.Object) *metav1.OwnerReference { 231 boolPtr := func(b bool) *bool { return &b } 232 controllerRef := &metav1.OwnerReference{ 233 APIVersion: jc.Controller.GetAPIGroupVersion().String(), 234 Kind: jc.Controller.GetAPIGroupVersionKind().Kind, 235 Name: obj.GetName(), 236 UID: obj.GetUID(), 237 BlockOwnerDeletion: boolPtr(true), 238 Controller: boolPtr(true), 239 } 240 241 return controllerRef 242 } 243 244 func (jc *JobController) GenLabels(jobName string) map[string]string { 245 jobName = strings.Replace(jobName, "/", "-", -1) 246 return map[string]string{ 247 apiv1.OperatorNameLabel: jc.Controller.ControllerName(), 248 apiv1.JobNameLabel: jobName, 249 } 250 } 251 252 // resolveControllerRef returns the job referenced by a ControllerRef, 253 // or nil if the ControllerRef could not be resolved to a matching job 254 // of the correct Kind. 255 func (jc *JobController) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) metav1.Object { 256 // We can't look up by UID, so look up by Name and then verify UID. 257 // Don't even try to look up by Name if it's the wrong Kind. 258 if controllerRef.Kind != jc.Controller.GetAPIGroupVersionKind().Kind { 259 return nil 260 } 261 job, err := jc.Controller.GetJobFromInformerCache(namespace, controllerRef.Name) 262 if err != nil { 263 return nil 264 } 265 if job.GetUID() != controllerRef.UID { 266 // The controller we found with this Name is not the same one that the 267 // ControllerRef points to. 268 return nil 269 } 270 return job 271 }