sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobs/job/job_controller.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package job
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strconv"
    23  
    24  	batchv1 "k8s.io/api/batch/v1"
    25  	corev1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/runtime"
    28  	"k8s.io/apimachinery/pkg/runtime/schema"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    31  	"k8s.io/client-go/util/workqueue"
    32  	"k8s.io/klog/v2"
    33  	"k8s.io/utils/ptr"
    34  	ctrl "sigs.k8s.io/controller-runtime"
    35  	"sigs.k8s.io/controller-runtime/pkg/builder"
    36  	"sigs.k8s.io/controller-runtime/pkg/client"
    37  	"sigs.k8s.io/controller-runtime/pkg/event"
    38  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    39  
    40  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    41  	"sigs.k8s.io/kueue/pkg/controller/jobframework"
    42  	"sigs.k8s.io/kueue/pkg/podset"
    43  )
    44  
    45  var (
    46  	parentWorkloadKey = ".metadata.parentWorkload"
    47  	gvk               = batchv1.SchemeGroupVersion.WithKind("Job")
    48  
    49  	FrameworkName = "batch/job"
    50  )
    51  
    52  const (
    53  	JobMinParallelismAnnotation              = "kueue.x-k8s.io/job-min-parallelism"
    54  	JobCompletionsEqualParallelismAnnotation = "kueue.x-k8s.io/job-completions-equal-parallelism"
    55  )
    56  
    57  func init() {
    58  	utilruntime.Must(jobframework.RegisterIntegration(FrameworkName, jobframework.IntegrationCallbacks{
    59  		SetupIndexes:           SetupIndexes,
    60  		NewReconciler:          NewReconciler,
    61  		SetupWebhook:           SetupWebhook,
    62  		JobType:                &batchv1.Job{},
    63  		IsManagingObjectsOwner: isJob,
    64  	}))
    65  }
    66  
    67  // +kubebuilder:rbac:groups=scheduling.k8s.io,resources=priorityclasses,verbs=list;get;watch
    68  // +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch
    69  // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;update;patch
    70  // +kubebuilder:rbac:groups=batch,resources=jobs/status,verbs=get;update
    71  // +kubebuilder:rbac:groups=batch,resources=jobs/finalizers,verbs=get;update;patch
    72  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads,verbs=get;list;watch;create;update;patch;delete
    73  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/status,verbs=get;update;patch
    74  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/finalizers,verbs=update
    75  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors,verbs=get;list;watch
    76  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloadpriorityclasses,verbs=get;list;watch
    77  
    78  var NewReconciler = jobframework.NewGenericReconcilerFactory(
    79  	func() jobframework.GenericJob {
    80  		return &Job{}
    81  	}, func(b *builder.Builder, c client.Client) *builder.Builder {
    82  		return b.Watches(&kueue.Workload{}, &parentWorkloadHandler{client: c})
    83  	},
    84  )
    85  
    86  func isJob(owner *metav1.OwnerReference) bool {
    87  	return owner.Kind == "Job" && owner.APIVersion == gvk.GroupVersion().String()
    88  }
    89  
    90  type parentWorkloadHandler struct {
    91  	client client.Client
    92  }
    93  
    94  func (h *parentWorkloadHandler) Create(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) {
    95  	h.queueReconcileForChildJob(ctx, e.Object, q)
    96  }
    97  
    98  func (h *parentWorkloadHandler) Update(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) {
    99  	h.queueReconcileForChildJob(ctx, e.ObjectNew, q)
   100  }
   101  
   102  func (h *parentWorkloadHandler) Delete(context.Context, event.DeleteEvent, workqueue.RateLimitingInterface) {
   103  }
   104  
   105  func (h *parentWorkloadHandler) Generic(ctx context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) {
   106  }
   107  
   108  // queueReconcileForChildJob queues reconciliation of the child jobs (jobs with the
   109  // parent-workload annotation) in reaction to the parent-workload events.
   110  func (h *parentWorkloadHandler) queueReconcileForChildJob(ctx context.Context, object client.Object, q workqueue.RateLimitingInterface) {
   111  	w, ok := object.(*kueue.Workload)
   112  	if !ok {
   113  		return
   114  	}
   115  	log := ctrl.LoggerFrom(ctx).WithValues("workload", klog.KObj(w))
   116  	ctx = ctrl.LoggerInto(ctx, log)
   117  	log.V(5).Info("Queueing reconcile for child jobs")
   118  	var childJobs batchv1.JobList
   119  	if err := h.client.List(ctx, &childJobs, client.InNamespace(w.Namespace), client.MatchingFields{parentWorkloadKey: w.Name}); err != nil {
   120  		klog.Error(err, "Unable to list child jobs")
   121  		return
   122  	}
   123  	for _, childJob := range childJobs.Items {
   124  		log.V(5).Info("Queueing reconcile for child job", "job", klog.KObj(&childJob))
   125  		q.Add(reconcile.Request{
   126  			NamespacedName: types.NamespacedName{
   127  				Name:      childJob.Name,
   128  				Namespace: w.Namespace,
   129  			},
   130  		})
   131  	}
   132  }
   133  
   134  type Job batchv1.Job
   135  
   136  var _ jobframework.GenericJob = (*Job)(nil)
   137  var _ jobframework.JobWithReclaimablePods = (*Job)(nil)
   138  var _ jobframework.JobWithCustomStop = (*Job)(nil)
   139  
   140  func (j *Job) Object() client.Object {
   141  	return (*batchv1.Job)(j)
   142  }
   143  
   144  func fromObject(o runtime.Object) *Job {
   145  	return (*Job)(o.(*batchv1.Job))
   146  }
   147  
   148  func (j *Job) IsSuspended() bool {
   149  	return j.Spec.Suspend != nil && *j.Spec.Suspend
   150  }
   151  
   152  func (j *Job) IsActive() bool {
   153  	return j.Status.Active != 0
   154  }
   155  
   156  func (j *Job) Suspend() {
   157  	j.Spec.Suspend = ptr.To(true)
   158  }
   159  
   160  func (j *Job) Stop(ctx context.Context, c client.Client, podSetsInfo []podset.PodSetInfo, _ jobframework.StopReason, eventMsg string) (bool, error) {
   161  	stoppedNow := false
   162  	if !j.IsSuspended() {
   163  		j.Suspend()
   164  		if err := c.Update(ctx, j.Object()); err != nil {
   165  			return false, fmt.Errorf("suspend: %w", err)
   166  		}
   167  		stoppedNow = true
   168  	}
   169  
   170  	// Reset start time if necessary, so we can update the scheduling directives.
   171  	if j.Status.StartTime != nil {
   172  		j.Status.StartTime = nil
   173  		if err := c.Status().Update(ctx, j.Object()); err != nil {
   174  			return stoppedNow, fmt.Errorf("reset status: %w", err)
   175  		}
   176  	}
   177  
   178  	if changed := j.RestorePodSetsInfo(podSetsInfo); !changed {
   179  		return stoppedNow, nil
   180  	}
   181  	if err := c.Update(ctx, j.Object()); err != nil {
   182  		return false, fmt.Errorf("restore info: %w", err)
   183  	}
   184  	return stoppedNow, nil
   185  }
   186  
   187  func (j *Job) GVK() schema.GroupVersionKind {
   188  	return gvk
   189  }
   190  
   191  func (j *Job) ReclaimablePods() ([]kueue.ReclaimablePod, error) {
   192  	parallelism := ptr.Deref(j.Spec.Parallelism, 1)
   193  	if parallelism == 1 || j.Status.Succeeded == 0 {
   194  		return nil, nil
   195  	}
   196  
   197  	remaining := ptr.Deref(j.Spec.Completions, parallelism) - j.Status.Succeeded
   198  	if remaining >= parallelism {
   199  		return nil, nil
   200  	}
   201  
   202  	return []kueue.ReclaimablePod{{
   203  		Name:  kueue.DefaultPodSetName,
   204  		Count: parallelism - remaining,
   205  	}}, nil
   206  }
   207  
   208  // The following labels are managed internally by batch/job controller, we should not
   209  // propagate them to the workload.
   210  var (
   211  	// the legacy names are no longer defined in the api, only in k/2/apis/batch
   212  	legacyJobNameLabel       = "job-name"
   213  	legacyControllerUidLabel = "controller-uid"
   214  	ManagedLabels            = []string{legacyJobNameLabel, legacyControllerUidLabel, batchv1.JobNameLabel, batchv1.ControllerUidLabel}
   215  )
   216  
   217  func cleanManagedLabels(pt *corev1.PodTemplateSpec) *corev1.PodTemplateSpec {
   218  	for _, managedLabel := range ManagedLabels {
   219  		delete(pt.Labels, managedLabel)
   220  	}
   221  	return pt
   222  }
   223  
   224  func (j *Job) PodSets() []kueue.PodSet {
   225  	return []kueue.PodSet{
   226  		{
   227  			Name:     kueue.DefaultPodSetName,
   228  			Template: *cleanManagedLabels(j.Spec.Template.DeepCopy()),
   229  			Count:    j.podsCount(),
   230  			MinCount: j.minPodsCount(),
   231  		},
   232  	}
   233  }
   234  
   235  func (j *Job) RunWithPodSetsInfo(podSetsInfo []podset.PodSetInfo) error {
   236  	j.Spec.Suspend = ptr.To(false)
   237  	if len(podSetsInfo) != 1 {
   238  		return podset.BadPodSetsInfoLenError(1, len(podSetsInfo))
   239  	}
   240  
   241  	info := podSetsInfo[0]
   242  
   243  	if j.minPodsCount() != nil {
   244  		j.Spec.Parallelism = ptr.To(info.Count)
   245  		if j.syncCompletionWithParallelism() {
   246  			j.Spec.Completions = j.Spec.Parallelism
   247  		}
   248  	}
   249  	return podset.Merge(&j.Spec.Template.ObjectMeta, &j.Spec.Template.Spec, info)
   250  }
   251  
   252  func (j *Job) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool {
   253  	if len(podSetsInfo) == 0 {
   254  		return false
   255  	}
   256  
   257  	changed := false
   258  	// if the job accepts partial admission
   259  	if j.minPodsCount() != nil && ptr.Deref(j.Spec.Parallelism, 0) != podSetsInfo[0].Count {
   260  		changed = true
   261  		j.Spec.Parallelism = ptr.To(podSetsInfo[0].Count)
   262  		if j.syncCompletionWithParallelism() {
   263  			j.Spec.Completions = j.Spec.Parallelism
   264  		}
   265  	}
   266  	info := podSetsInfo[0]
   267  	for _, managedLabel := range ManagedLabels {
   268  		if v, found := j.Spec.Template.Labels[managedLabel]; found {
   269  			info.AddOrUpdateLabel(managedLabel, v)
   270  		}
   271  	}
   272  	changed = podset.RestorePodSpec(&j.Spec.Template.ObjectMeta, &j.Spec.Template.Spec, info) || changed
   273  	return changed
   274  }
   275  
   276  func (j *Job) Finished() (metav1.Condition, bool) {
   277  	var conditionType batchv1.JobConditionType
   278  	var finished bool
   279  
   280  	for _, c := range j.Status.Conditions {
   281  		if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed) && c.Status == corev1.ConditionTrue {
   282  			conditionType = c.Type
   283  			finished = true
   284  			break
   285  		}
   286  	}
   287  
   288  	condition := metav1.Condition{
   289  		Type:    kueue.WorkloadFinished,
   290  		Status:  metav1.ConditionTrue,
   291  		Reason:  "JobFinished",
   292  		Message: "Job finished successfully",
   293  	}
   294  	if conditionType == batchv1.JobFailed {
   295  		condition.Message = "Job failed"
   296  	}
   297  
   298  	return condition, finished
   299  }
   300  
   301  func (j *Job) PodsReady() bool {
   302  	ready := ptr.Deref(j.Status.Ready, 0)
   303  	return j.Status.Succeeded+ready >= j.podsCount()
   304  }
   305  
   306  func (j *Job) podsCount() int32 {
   307  	// parallelism is always set as it is otherwise defaulted by k8s to 1
   308  	podsCount := *(j.Spec.Parallelism)
   309  	if j.Spec.Completions != nil && *j.Spec.Completions < podsCount {
   310  		podsCount = *j.Spec.Completions
   311  	}
   312  	return podsCount
   313  }
   314  
   315  func (j *Job) minPodsCount() *int32 {
   316  	if strVal, found := j.GetAnnotations()[JobMinParallelismAnnotation]; found {
   317  		if iVal, err := strconv.Atoi(strVal); err == nil {
   318  			return ptr.To[int32](int32(iVal))
   319  		}
   320  	}
   321  	return nil
   322  }
   323  
   324  func (j *Job) syncCompletionWithParallelism() bool {
   325  	if strVal, found := j.GetAnnotations()[JobCompletionsEqualParallelismAnnotation]; found {
   326  		if bVal, err := strconv.ParseBool(strVal); err == nil {
   327  			return bVal
   328  		}
   329  	}
   330  	return false
   331  }
   332  
   333  func SetupIndexes(ctx context.Context, indexer client.FieldIndexer) error {
   334  	if err := indexer.IndexField(ctx, &batchv1.Job{}, parentWorkloadKey, func(o client.Object) []string {
   335  		job := fromObject(o)
   336  		if pwName := jobframework.ParentWorkloadName(job); pwName != "" {
   337  			return []string{pwName}
   338  		}
   339  		return nil
   340  	}); err != nil {
   341  		return err
   342  	}
   343  	return jobframework.SetupWorkloadOwnerIndex(ctx, indexer, gvk)
   344  }
   345  
   346  func GetWorkloadNameForJob(jobName string) string {
   347  	return jobframework.GetWorkloadNameForOwnerWithGVK(jobName, gvk)
   348  }