k8s.io/kubernetes@v1.29.3/pkg/controller/job/job_controller.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package job
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"reflect"
    23  	"sort"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	batch "k8s.io/api/batch/v1"
    29  	v1 "k8s.io/api/core/v1"
    30  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	"k8s.io/apimachinery/pkg/labels"
    33  	"k8s.io/apimachinery/pkg/types"
    34  	"k8s.io/apimachinery/pkg/util/json"
    35  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    36  	"k8s.io/apimachinery/pkg/util/sets"
    37  	"k8s.io/apimachinery/pkg/util/wait"
    38  	"k8s.io/apiserver/pkg/util/feature"
    39  	batchinformers "k8s.io/client-go/informers/batch/v1"
    40  	coreinformers "k8s.io/client-go/informers/core/v1"
    41  	clientset "k8s.io/client-go/kubernetes"
    42  	"k8s.io/client-go/kubernetes/scheme"
    43  	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
    44  	batchv1listers "k8s.io/client-go/listers/batch/v1"
    45  	corelisters "k8s.io/client-go/listers/core/v1"
    46  	"k8s.io/client-go/tools/cache"
    47  	"k8s.io/client-go/tools/record"
    48  	"k8s.io/client-go/util/workqueue"
    49  	"k8s.io/klog/v2"
    50  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    51  	"k8s.io/kubernetes/pkg/controller"
    52  	"k8s.io/kubernetes/pkg/controller/job/metrics"
    53  	"k8s.io/kubernetes/pkg/features"
    54  	"k8s.io/utils/clock"
    55  	"k8s.io/utils/integer"
    56  	"k8s.io/utils/ptr"
    57  )
    58  
    59  // controllerKind contains the schema.GroupVersionKind for this controller type.
    60  var controllerKind = batch.SchemeGroupVersion.WithKind("Job")
    61  
    62  var (
    63  	// syncJobBatchPeriod is the batch period for controller sync invocations for a Job.
    64  	syncJobBatchPeriod = time.Second
    65  	// DefaultJobApiBackOff is the default API backoff period. Exported for tests.
    66  	DefaultJobApiBackOff = time.Second
    67  	// MaxJobApiBackOff is the max API backoff period. Exported for tests.
    68  	MaxJobApiBackOff = time.Minute
    69  	// DefaultJobPodFailureBackOff is the default pod failure backoff period. Exported for tests.
    70  	DefaultJobPodFailureBackOff = 10 * time.Second
    71  	// MaxJobPodFailureBackOff is the max  pod failure backoff period. Exported for tests.
    72  	MaxJobPodFailureBackOff = 10 * time.Minute
    73  	// MaxUncountedPods is the maximum size the slices in
    74  	// .status.uncountedTerminatedPods should have to keep their representation
    75  	// roughly below 20 KB. Exported for tests
    76  	MaxUncountedPods = 500
    77  	// MaxPodCreateDeletePerSync is the maximum number of pods that can be
    78  	// created or deleted in a single sync call. Exported for tests.
    79  	MaxPodCreateDeletePerSync = 500
    80  )
    81  
    82  // Controller ensures that all Job objects have corresponding pods to
    83  // run their configured workload.
    84  type Controller struct {
    85  	kubeClient clientset.Interface
    86  	podControl controller.PodControlInterface
    87  
    88  	// To allow injection of the following for testing.
    89  	updateStatusHandler func(ctx context.Context, job *batch.Job) (*batch.Job, error)
    90  	patchJobHandler     func(ctx context.Context, job *batch.Job, patch []byte) error
    91  	syncHandler         func(ctx context.Context, jobKey string) error
    92  	// podStoreSynced returns true if the pod store has been synced at least once.
    93  	// Added as a member to the struct to allow injection for testing.
    94  	podStoreSynced cache.InformerSynced
    95  	// jobStoreSynced returns true if the job store has been synced at least once.
    96  	// Added as a member to the struct to allow injection for testing.
    97  	jobStoreSynced cache.InformerSynced
    98  
    99  	// A TTLCache of pod creates/deletes each rc expects to see
   100  	expectations controller.ControllerExpectationsInterface
   101  
   102  	// finalizerExpectations tracks the Pod UIDs for which the controller
   103  	// expects to observe the tracking finalizer removed.
   104  	finalizerExpectations *uidTrackingExpectations
   105  
   106  	// A store of jobs
   107  	jobLister batchv1listers.JobLister
   108  
   109  	// A store of pods, populated by the podController
   110  	podStore corelisters.PodLister
   111  
   112  	// Jobs that need to be updated
   113  	queue workqueue.RateLimitingInterface
   114  
   115  	// Orphan deleted pods that still have a Job tracking finalizer to be removed
   116  	orphanQueue workqueue.RateLimitingInterface
   117  
   118  	broadcaster record.EventBroadcaster
   119  	recorder    record.EventRecorder
   120  
   121  	clock clock.WithTicker
   122  
   123  	// Store with information to compute the expotential backoff delay for pod
   124  	// recreation in case of pod failures.
   125  	podBackoffStore *backoffStore
   126  }
   127  
   128  type syncJobCtx struct {
   129  	job                             *batch.Job
   130  	pods                            []*v1.Pod
   131  	finishedCondition               *batch.JobCondition
   132  	activePods                      []*v1.Pod
   133  	succeeded                       int32
   134  	prevSucceededIndexes            orderedIntervals
   135  	succeededIndexes                orderedIntervals
   136  	failedIndexes                   *orderedIntervals
   137  	newBackoffRecord                backoffRecord
   138  	expectedRmFinalizers            sets.Set[string]
   139  	uncounted                       *uncountedTerminatedPods
   140  	podsWithDelayedDeletionPerIndex map[int]*v1.Pod
   141  	terminating                     *int32
   142  }
   143  
   144  // NewController creates a new Job controller that keeps the relevant pods
   145  // in sync with their corresponding Job objects.
   146  func NewController(ctx context.Context, podInformer coreinformers.PodInformer, jobInformer batchinformers.JobInformer, kubeClient clientset.Interface) (*Controller, error) {
   147  	return newControllerWithClock(ctx, podInformer, jobInformer, kubeClient, &clock.RealClock{})
   148  }
   149  
   150  func newControllerWithClock(ctx context.Context, podInformer coreinformers.PodInformer, jobInformer batchinformers.JobInformer, kubeClient clientset.Interface, clock clock.WithTicker) (*Controller, error) {
   151  	eventBroadcaster := record.NewBroadcaster()
   152  	logger := klog.FromContext(ctx)
   153  
   154  	jm := &Controller{
   155  		kubeClient: kubeClient,
   156  		podControl: controller.RealPodControl{
   157  			KubeClient: kubeClient,
   158  			Recorder:   eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
   159  		},
   160  		expectations:          controller.NewControllerExpectations(),
   161  		finalizerExpectations: newUIDTrackingExpectations(),
   162  		queue:                 workqueue.NewRateLimitingQueueWithConfig(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff), workqueue.RateLimitingQueueConfig{Name: "job", Clock: clock}),
   163  		orphanQueue:           workqueue.NewRateLimitingQueueWithConfig(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff), workqueue.RateLimitingQueueConfig{Name: "job_orphan_pod", Clock: clock}),
   164  		broadcaster:           eventBroadcaster,
   165  		recorder:              eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
   166  		clock:                 clock,
   167  		podBackoffStore:       newBackoffStore(),
   168  	}
   169  
   170  	if _, err := jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   171  		AddFunc: func(obj interface{}) {
   172  			jm.enqueueSyncJobImmediately(logger, obj)
   173  		},
   174  		UpdateFunc: func(oldObj, newObj interface{}) {
   175  			jm.updateJob(logger, oldObj, newObj)
   176  		},
   177  		DeleteFunc: func(obj interface{}) {
   178  			jm.deleteJob(logger, obj)
   179  		},
   180  	}); err != nil {
   181  		return nil, fmt.Errorf("adding Job event handler: %w", err)
   182  	}
   183  	jm.jobLister = jobInformer.Lister()
   184  	jm.jobStoreSynced = jobInformer.Informer().HasSynced
   185  
   186  	if _, err := podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   187  		AddFunc: func(obj interface{}) {
   188  			jm.addPod(logger, obj)
   189  		},
   190  		UpdateFunc: func(oldObj, newObj interface{}) {
   191  			jm.updatePod(logger, oldObj, newObj)
   192  		},
   193  		DeleteFunc: func(obj interface{}) {
   194  			jm.deletePod(logger, obj, true)
   195  		},
   196  	}); err != nil {
   197  		return nil, fmt.Errorf("adding Pod event handler: %w", err)
   198  	}
   199  	jm.podStore = podInformer.Lister()
   200  	jm.podStoreSynced = podInformer.Informer().HasSynced
   201  
   202  	jm.updateStatusHandler = jm.updateJobStatus
   203  	jm.patchJobHandler = jm.patchJob
   204  	jm.syncHandler = jm.syncJob
   205  
   206  	metrics.Register()
   207  
   208  	return jm, nil
   209  }
   210  
   211  // Run the main goroutine responsible for watching and syncing jobs.
   212  func (jm *Controller) Run(ctx context.Context, workers int) {
   213  	defer utilruntime.HandleCrash()
   214  	logger := klog.FromContext(ctx)
   215  
   216  	// Start events processing pipeline.
   217  	jm.broadcaster.StartStructuredLogging(0)
   218  	jm.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: jm.kubeClient.CoreV1().Events("")})
   219  	defer jm.broadcaster.Shutdown()
   220  
   221  	defer jm.queue.ShutDown()
   222  	defer jm.orphanQueue.ShutDown()
   223  
   224  	logger.Info("Starting job controller")
   225  	defer logger.Info("Shutting down job controller")
   226  
   227  	if !cache.WaitForNamedCacheSync("job", ctx.Done(), jm.podStoreSynced, jm.jobStoreSynced) {
   228  		return
   229  	}
   230  
   231  	for i := 0; i < workers; i++ {
   232  		go wait.UntilWithContext(ctx, jm.worker, time.Second)
   233  	}
   234  
   235  	go wait.UntilWithContext(ctx, jm.orphanWorker, time.Second)
   236  
   237  	<-ctx.Done()
   238  }
   239  
   240  // getPodJobs returns a list of Jobs that potentially match a Pod.
   241  func (jm *Controller) getPodJobs(pod *v1.Pod) []*batch.Job {
   242  	jobs, err := jm.jobLister.GetPodJobs(pod)
   243  	if err != nil {
   244  		return nil
   245  	}
   246  	if len(jobs) > 1 {
   247  		// ControllerRef will ensure we don't do anything crazy, but more than one
   248  		// item in this list nevertheless constitutes user error.
   249  		utilruntime.HandleError(fmt.Errorf("user error! more than one job is selecting pods with labels: %+v", pod.Labels))
   250  	}
   251  	ret := make([]*batch.Job, 0, len(jobs))
   252  	for i := range jobs {
   253  		ret = append(ret, &jobs[i])
   254  	}
   255  	return ret
   256  }
   257  
   258  // resolveControllerRef returns the controller referenced by a ControllerRef,
   259  // or nil if the ControllerRef could not be resolved to a matching controller
   260  // of the correct Kind.
   261  func (jm *Controller) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *batch.Job {
   262  	// We can't look up by UID, so look up by Name and then verify UID.
   263  	// Don't even try to look up by Name if it's the wrong Kind.
   264  	if controllerRef.Kind != controllerKind.Kind {
   265  		return nil
   266  	}
   267  	job, err := jm.jobLister.Jobs(namespace).Get(controllerRef.Name)
   268  	if err != nil {
   269  		return nil
   270  	}
   271  	if job.UID != controllerRef.UID {
   272  		// The controller we found with this Name is not the same one that the
   273  		// ControllerRef points to.
   274  		return nil
   275  	}
   276  	return job
   277  }
   278  
   279  // When a pod is created, enqueue the controller that manages it and update its expectations.
   280  func (jm *Controller) addPod(logger klog.Logger, obj interface{}) {
   281  	pod := obj.(*v1.Pod)
   282  	recordFinishedPodWithTrackingFinalizer(nil, pod)
   283  	if pod.DeletionTimestamp != nil {
   284  		// on a restart of the controller, it's possible a new pod shows up in a state that
   285  		// is already pending deletion. Prevent the pod from being a creation observation.
   286  		jm.deletePod(logger, pod, false)
   287  		return
   288  	}
   289  
   290  	// If it has a ControllerRef, that's all that matters.
   291  	if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil {
   292  		job := jm.resolveControllerRef(pod.Namespace, controllerRef)
   293  		if job == nil {
   294  			return
   295  		}
   296  		jobKey, err := controller.KeyFunc(job)
   297  		if err != nil {
   298  			return
   299  		}
   300  		jm.expectations.CreationObserved(logger, jobKey)
   301  		jm.enqueueSyncJobBatched(logger, job)
   302  		return
   303  	}
   304  
   305  	// Otherwise, it's an orphan.
   306  	// Clean the finalizer.
   307  	if hasJobTrackingFinalizer(pod) {
   308  		jm.enqueueOrphanPod(pod)
   309  	}
   310  	// Get a list of all matching controllers and sync
   311  	// them to see if anyone wants to adopt it.
   312  	// DO NOT observe creation because no controller should be waiting for an
   313  	// orphan.
   314  	for _, job := range jm.getPodJobs(pod) {
   315  		jm.enqueueSyncJobBatched(logger, job)
   316  	}
   317  }
   318  
   319  // When a pod is updated, figure out what job/s manage it and wake them up.
   320  // If the labels of the pod have changed we need to awaken both the old
   321  // and new job. old and cur must be *v1.Pod types.
   322  func (jm *Controller) updatePod(logger klog.Logger, old, cur interface{}) {
   323  	curPod := cur.(*v1.Pod)
   324  	oldPod := old.(*v1.Pod)
   325  	recordFinishedPodWithTrackingFinalizer(oldPod, curPod)
   326  	if curPod.ResourceVersion == oldPod.ResourceVersion {
   327  		// Periodic resync will send update events for all known pods.
   328  		// Two different versions of the same pod will always have different RVs.
   329  		return
   330  	}
   331  	if curPod.DeletionTimestamp != nil {
   332  		// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
   333  		// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
   334  		// for modification of the deletion timestamp and expect an job to create more pods asap, not wait
   335  		// until the kubelet actually deletes the pod.
   336  		jm.deletePod(logger, curPod, false)
   337  		return
   338  	}
   339  
   340  	// Don't check if oldPod has the finalizer, as during ownership transfer
   341  	// finalizers might be re-added and removed again in behalf of the new owner.
   342  	// If all those Pod updates collapse into a single event, the finalizer
   343  	// might be removed in oldPod and curPod. We want to record the latest
   344  	// state.
   345  	finalizerRemoved := !hasJobTrackingFinalizer(curPod)
   346  	curControllerRef := metav1.GetControllerOf(curPod)
   347  	oldControllerRef := metav1.GetControllerOf(oldPod)
   348  	controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
   349  	if controllerRefChanged && oldControllerRef != nil {
   350  		// The ControllerRef was changed. Sync the old controller, if any.
   351  		if job := jm.resolveControllerRef(oldPod.Namespace, oldControllerRef); job != nil {
   352  			if finalizerRemoved {
   353  				key, err := controller.KeyFunc(job)
   354  				if err == nil {
   355  					jm.finalizerExpectations.finalizerRemovalObserved(logger, key, string(curPod.UID))
   356  				}
   357  			}
   358  			jm.enqueueSyncJobBatched(logger, job)
   359  		}
   360  	}
   361  
   362  	// If it has a ControllerRef, that's all that matters.
   363  	if curControllerRef != nil {
   364  		job := jm.resolveControllerRef(curPod.Namespace, curControllerRef)
   365  		if job == nil {
   366  			return
   367  		}
   368  		if finalizerRemoved {
   369  			key, err := controller.KeyFunc(job)
   370  			if err == nil {
   371  				jm.finalizerExpectations.finalizerRemovalObserved(logger, key, string(curPod.UID))
   372  			}
   373  		}
   374  		jm.enqueueSyncJobBatched(logger, job)
   375  		return
   376  	}
   377  
   378  	// Otherwise, it's an orphan.
   379  	// Clean the finalizer.
   380  	if hasJobTrackingFinalizer(curPod) {
   381  		jm.enqueueOrphanPod(curPod)
   382  	}
   383  	// If anything changed, sync matching controllers
   384  	// to see if anyone wants to adopt it now.
   385  	labelChanged := !reflect.DeepEqual(curPod.Labels, oldPod.Labels)
   386  	if labelChanged || controllerRefChanged {
   387  		for _, job := range jm.getPodJobs(curPod) {
   388  			jm.enqueueSyncJobBatched(logger, job)
   389  		}
   390  	}
   391  }
   392  
   393  // When a pod is deleted, enqueue the job that manages the pod and update its expectations.
   394  // obj could be an *v1.Pod, or a DeleteFinalStateUnknown marker item.
   395  func (jm *Controller) deletePod(logger klog.Logger, obj interface{}, final bool) {
   396  	pod, ok := obj.(*v1.Pod)
   397  	if final {
   398  		recordFinishedPodWithTrackingFinalizer(pod, nil)
   399  	}
   400  
   401  	// When a delete is dropped, the relist will notice a pod in the store not
   402  	// in the list, leading to the insertion of a tombstone object which contains
   403  	// the deleted key/value. Note that this value might be stale. If the pod
   404  	// changed labels the new job will not be woken up till the periodic resync.
   405  	if !ok {
   406  		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
   407  		if !ok {
   408  			utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj))
   409  			return
   410  		}
   411  		pod, ok = tombstone.Obj.(*v1.Pod)
   412  		if !ok {
   413  			utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a pod %+v", obj))
   414  			return
   415  		}
   416  	}
   417  
   418  	controllerRef := metav1.GetControllerOf(pod)
   419  	hasFinalizer := hasJobTrackingFinalizer(pod)
   420  	if controllerRef == nil {
   421  		// No controller should care about orphans being deleted.
   422  		// But this pod might have belonged to a Job and the GC removed the reference.
   423  		if hasFinalizer {
   424  			jm.enqueueOrphanPod(pod)
   425  		}
   426  		return
   427  	}
   428  	job := jm.resolveControllerRef(pod.Namespace, controllerRef)
   429  	if job == nil || IsJobFinished(job) {
   430  		// syncJob will not remove this finalizer.
   431  		if hasFinalizer {
   432  			jm.enqueueOrphanPod(pod)
   433  		}
   434  		return
   435  	}
   436  	jobKey, err := controller.KeyFunc(job)
   437  	if err != nil {
   438  		return
   439  	}
   440  	jm.expectations.DeletionObserved(logger, jobKey)
   441  
   442  	// Consider the finalizer removed if this is the final delete. Otherwise,
   443  	// it's an update for the deletion timestamp, then check finalizer.
   444  	if final || !hasFinalizer {
   445  		jm.finalizerExpectations.finalizerRemovalObserved(logger, jobKey, string(pod.UID))
   446  	}
   447  
   448  	jm.enqueueSyncJobBatched(logger, job)
   449  }
   450  
   451  func (jm *Controller) updateJob(logger klog.Logger, old, cur interface{}) {
   452  	oldJob := old.(*batch.Job)
   453  	curJob := cur.(*batch.Job)
   454  
   455  	// never return error
   456  	key, err := controller.KeyFunc(curJob)
   457  	if err != nil {
   458  		return
   459  	}
   460  
   461  	if curJob.Generation == oldJob.Generation {
   462  		// Delay the Job sync when no generation change to batch Job status updates,
   463  		// typically triggered by pod events.
   464  		jm.enqueueSyncJobBatched(logger, curJob)
   465  	} else {
   466  		// Trigger immediate sync when spec is changed.
   467  		jm.enqueueSyncJobImmediately(logger, curJob)
   468  	}
   469  
   470  	// The job shouldn't be marked as finished until all pod finalizers are removed.
   471  	// This is a backup operation in this case.
   472  	if IsJobFinished(curJob) {
   473  		jm.cleanupPodFinalizers(curJob)
   474  	}
   475  
   476  	// check if need to add a new rsync for ActiveDeadlineSeconds
   477  	if curJob.Status.StartTime != nil {
   478  		curADS := curJob.Spec.ActiveDeadlineSeconds
   479  		if curADS == nil {
   480  			return
   481  		}
   482  		oldADS := oldJob.Spec.ActiveDeadlineSeconds
   483  		if oldADS == nil || *oldADS != *curADS {
   484  			passed := jm.clock.Since(curJob.Status.StartTime.Time)
   485  			total := time.Duration(*curADS) * time.Second
   486  			// AddAfter will handle total < passed
   487  			jm.queue.AddAfter(key, total-passed)
   488  			logger.V(4).Info("job's ActiveDeadlineSeconds updated, will rsync", "key", key, "interval", total-passed)
   489  		}
   490  	}
   491  }
   492  
   493  // deleteJob enqueues the job and all the pods associated with it that still
   494  // have a finalizer.
   495  func (jm *Controller) deleteJob(logger klog.Logger, obj interface{}) {
   496  	jm.enqueueSyncJobImmediately(logger, obj)
   497  	jobObj, ok := obj.(*batch.Job)
   498  	if !ok {
   499  		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
   500  		if !ok {
   501  			utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj))
   502  			return
   503  		}
   504  		jobObj, ok = tombstone.Obj.(*batch.Job)
   505  		if !ok {
   506  			utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a job %+v", obj))
   507  			return
   508  		}
   509  	}
   510  	jm.cleanupPodFinalizers(jobObj)
   511  }
   512  
   513  // enqueueSyncJobImmediately tells the Job controller to invoke syncJob
   514  // immediately.
   515  // It is only used for Job events (creation, deletion, spec update).
   516  // obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item.
   517  func (jm *Controller) enqueueSyncJobImmediately(logger klog.Logger, obj interface{}) {
   518  	jm.enqueueSyncJobInternal(logger, obj, 0)
   519  }
   520  
   521  // enqueueSyncJobBatched tells the controller to invoke syncJob with a
   522  // constant batching delay.
   523  // It is used for:
   524  // - Pod events (creation, deletion, update)
   525  // - Job status update
   526  // obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item.
   527  func (jm *Controller) enqueueSyncJobBatched(logger klog.Logger, obj interface{}) {
   528  	jm.enqueueSyncJobInternal(logger, obj, syncJobBatchPeriod)
   529  }
   530  
   531  // enqueueSyncJobWithDelay tells the controller to invoke syncJob with a
   532  // custom delay, but not smaller than the batching delay.
   533  // It is used when pod recreations are delayed due to pod failures.
   534  // obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item.
   535  func (jm *Controller) enqueueSyncJobWithDelay(logger klog.Logger, obj interface{}, delay time.Duration) {
   536  	if delay < syncJobBatchPeriod {
   537  		delay = syncJobBatchPeriod
   538  	}
   539  	jm.enqueueSyncJobInternal(logger, obj, delay)
   540  }
   541  
   542  func (jm *Controller) enqueueSyncJobInternal(logger klog.Logger, obj interface{}, delay time.Duration) {
   543  	key, err := controller.KeyFunc(obj)
   544  	if err != nil {
   545  		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
   546  		return
   547  	}
   548  	// TODO: Handle overlapping controllers better. Either disallow them at admission time or
   549  	// deterministically avoid syncing controllers that fight over pods. Currently, we only
   550  	// ensure that the same controller is synced for a given pod. When we periodically relist
   551  	// all controllers there will still be some replica instability. One way to handle this is
   552  	// by querying the store for all controllers that this rc overlaps, as well as all
   553  	// controllers that overlap this rc, and sorting them.
   554  	logger.Info("enqueueing job", "key", key)
   555  	jm.queue.AddAfter(key, delay)
   556  }
   557  
   558  func (jm *Controller) enqueueOrphanPod(obj *v1.Pod) {
   559  	key, err := controller.KeyFunc(obj)
   560  	if err != nil {
   561  		utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err))
   562  		return
   563  	}
   564  	jm.orphanQueue.Add(key)
   565  }
   566  
   567  // worker runs a worker thread that just dequeues items, processes them, and marks them done.
   568  // It enforces that the syncHandler is never invoked concurrently with the same key.
   569  func (jm *Controller) worker(ctx context.Context) {
   570  	for jm.processNextWorkItem(ctx) {
   571  	}
   572  }
   573  
   574  func (jm *Controller) processNextWorkItem(ctx context.Context) bool {
   575  	key, quit := jm.queue.Get()
   576  	if quit {
   577  		return false
   578  	}
   579  	defer jm.queue.Done(key)
   580  
   581  	err := jm.syncHandler(ctx, key.(string))
   582  	if err == nil {
   583  		jm.queue.Forget(key)
   584  		return true
   585  	}
   586  
   587  	utilruntime.HandleError(fmt.Errorf("syncing job: %w", err))
   588  	jm.queue.AddRateLimited(key)
   589  
   590  	return true
   591  }
   592  
   593  func (jm *Controller) orphanWorker(ctx context.Context) {
   594  	for jm.processNextOrphanPod(ctx) {
   595  	}
   596  }
   597  
   598  func (jm *Controller) processNextOrphanPod(ctx context.Context) bool {
   599  	key, quit := jm.orphanQueue.Get()
   600  	if quit {
   601  		return false
   602  	}
   603  	defer jm.orphanQueue.Done(key)
   604  	err := jm.syncOrphanPod(ctx, key.(string))
   605  	if err != nil {
   606  		utilruntime.HandleError(fmt.Errorf("Error syncing orphan pod: %v", err))
   607  		jm.orphanQueue.AddRateLimited(key)
   608  	} else {
   609  		jm.orphanQueue.Forget(key)
   610  	}
   611  
   612  	return true
   613  }
   614  
   615  // syncOrphanPod removes the tracking finalizer from an orphan pod if found.
   616  func (jm *Controller) syncOrphanPod(ctx context.Context, key string) error {
   617  	startTime := jm.clock.Now()
   618  	logger := klog.FromContext(ctx)
   619  	defer func() {
   620  		logger.V(4).Info("Finished syncing orphan pod", "pod", key, "elapsed", jm.clock.Since(startTime))
   621  	}()
   622  
   623  	ns, name, err := cache.SplitMetaNamespaceKey(key)
   624  	if err != nil {
   625  		return err
   626  	}
   627  
   628  	sharedPod, err := jm.podStore.Pods(ns).Get(name)
   629  	if err != nil {
   630  		if apierrors.IsNotFound(err) {
   631  			logger.V(4).Info("Orphan pod has been deleted", "pod", key)
   632  			return nil
   633  		}
   634  		return err
   635  	}
   636  	// Make sure the pod is still orphaned.
   637  	if controllerRef := metav1.GetControllerOf(sharedPod); controllerRef != nil {
   638  		job := jm.resolveControllerRef(sharedPod.Namespace, controllerRef)
   639  		if job != nil && !IsJobFinished(job) {
   640  			// The pod was adopted. Do not remove finalizer.
   641  			return nil
   642  		}
   643  	}
   644  	if patch := removeTrackingFinalizerPatch(sharedPod); patch != nil {
   645  		if err := jm.podControl.PatchPod(ctx, ns, name, patch); err != nil && !apierrors.IsNotFound(err) {
   646  			return err
   647  		}
   648  	}
   649  	return nil
   650  }
   651  
   652  // getPodsForJob returns the set of pods that this Job should manage.
   653  // It also reconciles ControllerRef by adopting/orphaning, adding tracking
   654  // finalizers.
   655  // Note that the returned Pods are pointers into the cache.
   656  func (jm *Controller) getPodsForJob(ctx context.Context, j *batch.Job) ([]*v1.Pod, error) {
   657  	selector, err := metav1.LabelSelectorAsSelector(j.Spec.Selector)
   658  	if err != nil {
   659  		return nil, fmt.Errorf("couldn't convert Job selector: %v", err)
   660  	}
   661  	// List all pods to include those that don't match the selector anymore
   662  	// but have a ControllerRef pointing to this controller.
   663  	pods, err := jm.podStore.Pods(j.Namespace).List(labels.Everything())
   664  	if err != nil {
   665  		return nil, err
   666  	}
   667  	// If any adoptions are attempted, we should first recheck for deletion
   668  	// with an uncached quorum read sometime after listing Pods (see #42639).
   669  	canAdoptFunc := controller.RecheckDeletionTimestamp(func(ctx context.Context) (metav1.Object, error) {
   670  		fresh, err := jm.kubeClient.BatchV1().Jobs(j.Namespace).Get(ctx, j.Name, metav1.GetOptions{})
   671  		if err != nil {
   672  			return nil, err
   673  		}
   674  		if fresh.UID != j.UID {
   675  			return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", j.Namespace, j.Name, fresh.UID, j.UID)
   676  		}
   677  		return fresh, nil
   678  	})
   679  	cm := controller.NewPodControllerRefManager(jm.podControl, j, selector, controllerKind, canAdoptFunc, batch.JobTrackingFinalizer)
   680  	// When adopting Pods, this operation adds an ownerRef and finalizers.
   681  	pods, err = cm.ClaimPods(ctx, pods)
   682  	if err != nil {
   683  		return pods, err
   684  	}
   685  	// Set finalizer on adopted pods for the remaining calculations.
   686  	for i, p := range pods {
   687  		adopted := true
   688  		for _, r := range p.OwnerReferences {
   689  			if r.UID == j.UID {
   690  				adopted = false
   691  				break
   692  			}
   693  		}
   694  		if adopted && !hasJobTrackingFinalizer(p) {
   695  			pods[i] = p.DeepCopy()
   696  			pods[i].Finalizers = append(p.Finalizers, batch.JobTrackingFinalizer)
   697  		}
   698  	}
   699  	return pods, err
   700  }
   701  
   702  // syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
   703  // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
   704  // concurrently with the same key.
   705  func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
   706  	startTime := jm.clock.Now()
   707  	logger := klog.FromContext(ctx)
   708  	defer func() {
   709  		logger.V(4).Info("Finished syncing job", "key", key, "elapsed", jm.clock.Since(startTime))
   710  	}()
   711  
   712  	ns, name, err := cache.SplitMetaNamespaceKey(key)
   713  	if err != nil {
   714  		return err
   715  	}
   716  	if len(ns) == 0 || len(name) == 0 {
   717  		return fmt.Errorf("invalid job key %q: either namespace or name is missing", key)
   718  	}
   719  	sharedJob, err := jm.jobLister.Jobs(ns).Get(name)
   720  	if err != nil {
   721  		if apierrors.IsNotFound(err) {
   722  			logger.V(4).Info("Job has been deleted", "key", key)
   723  			jm.expectations.DeleteExpectations(logger, key)
   724  			jm.finalizerExpectations.deleteExpectations(logger, key)
   725  
   726  			err := jm.podBackoffStore.removeBackoffRecord(key)
   727  			if err != nil {
   728  				// re-syncing here as the record has to be removed for finished/deleted jobs
   729  				return fmt.Errorf("error removing backoff record %w", err)
   730  			}
   731  			return nil
   732  		}
   733  		return err
   734  	}
   735  	// make a copy so we don't mutate the shared cache
   736  	job := *sharedJob.DeepCopy()
   737  
   738  	// if job was finished previously, we don't want to redo the termination
   739  	if IsJobFinished(&job) {
   740  		err := jm.podBackoffStore.removeBackoffRecord(key)
   741  		if err != nil {
   742  			// re-syncing here as the record has to be removed for finished/deleted jobs
   743  			return fmt.Errorf("error removing backoff record %w", err)
   744  		}
   745  		return nil
   746  	}
   747  
   748  	if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode != batch.NonIndexedCompletion && *job.Spec.CompletionMode != batch.IndexedCompletion {
   749  		jm.recorder.Event(&job, v1.EventTypeWarning, "UnknownCompletionMode", "Skipped Job sync because completion mode is unknown")
   750  		return nil
   751  	}
   752  
   753  	completionMode := getCompletionMode(&job)
   754  	action := metrics.JobSyncActionReconciling
   755  
   756  	defer func() {
   757  		result := "success"
   758  		if rErr != nil {
   759  			result = "error"
   760  		}
   761  
   762  		metrics.JobSyncDurationSeconds.WithLabelValues(completionMode, result, action).Observe(jm.clock.Since(startTime).Seconds())
   763  		metrics.JobSyncNum.WithLabelValues(completionMode, result, action).Inc()
   764  	}()
   765  
   766  	if job.Status.UncountedTerminatedPods == nil {
   767  		job.Status.UncountedTerminatedPods = &batch.UncountedTerminatedPods{}
   768  	}
   769  
   770  	// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
   771  	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
   772  	// the store after we've checked the expectation, the job sync is just deferred till the next relist.
   773  	satisfiedExpectations := jm.expectations.SatisfiedExpectations(logger, key)
   774  
   775  	pods, err := jm.getPodsForJob(ctx, &job)
   776  	if err != nil {
   777  		return err
   778  	}
   779  	var terminating *int32
   780  	if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
   781  		terminating = ptr.To(controller.CountTerminatingPods(pods))
   782  	}
   783  	jobCtx := &syncJobCtx{
   784  		job:                  &job,
   785  		pods:                 pods,
   786  		activePods:           controller.FilterActivePods(logger, pods),
   787  		terminating:          terminating,
   788  		uncounted:            newUncountedTerminatedPods(*job.Status.UncountedTerminatedPods),
   789  		expectedRmFinalizers: jm.finalizerExpectations.getExpectedUIDs(key),
   790  	}
   791  	active := int32(len(jobCtx.activePods))
   792  	newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
   793  	jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
   794  	failed := job.Status.Failed + int32(nonIgnoredFailedPodsCount(jobCtx, newFailedPods)) + int32(len(jobCtx.uncounted.failed))
   795  	var ready *int32
   796  	if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
   797  		ready = ptr.To(countReadyPods(jobCtx.activePods))
   798  	}
   799  
   800  	// Job first start. Set StartTime only if the job is not in the suspended state.
   801  	if job.Status.StartTime == nil && !jobSuspended(&job) {
   802  		now := metav1.NewTime(jm.clock.Now())
   803  		job.Status.StartTime = &now
   804  	}
   805  
   806  	jobCtx.newBackoffRecord = jm.podBackoffStore.newBackoffRecord(key, newSucceededPods, newFailedPods)
   807  
   808  	var manageJobErr error
   809  
   810  	exceedsBackoffLimit := failed > *job.Spec.BackoffLimit
   811  
   812  	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
   813  		if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil {
   814  			jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now())
   815  		} else if failJobMessage := getFailJobMessage(&job, pods); failJobMessage != nil {
   816  			// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.
   817  			jobCtx.finishedCondition = newCondition(batch.JobFailureTarget, v1.ConditionTrue, batch.JobReasonPodFailurePolicy, *failJobMessage, jm.clock.Now())
   818  		}
   819  	}
   820  	if jobCtx.finishedCondition == nil {
   821  		if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
   822  			// check if the number of pod restart exceeds backoff (for restart OnFailure only)
   823  			// OR if the number of failed jobs increased since the last syncJob
   824  			jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonBackoffLimitExceeded, "Job has reached the specified backoff limit", jm.clock.Now())
   825  		} else if jm.pastActiveDeadline(&job) {
   826  			jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonDeadlineExceeded, "Job was active longer than specified deadline", jm.clock.Now())
   827  		} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
   828  			syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - jm.clock.Since(job.Status.StartTime.Time)
   829  			logger.V(2).Info("Job has activeDeadlineSeconds configuration. Will sync this job again", "key", key, "nextSyncIn", syncDuration)
   830  			jm.queue.AddAfter(key, syncDuration)
   831  		}
   832  	}
   833  
   834  	if isIndexedJob(&job) {
   835  		jobCtx.prevSucceededIndexes, jobCtx.succeededIndexes = calculateSucceededIndexes(logger, &job, pods)
   836  		jobCtx.succeeded = int32(jobCtx.succeededIndexes.total())
   837  		if hasBackoffLimitPerIndex(&job) {
   838  			jobCtx.failedIndexes = calculateFailedIndexes(logger, &job, pods)
   839  			if jobCtx.finishedCondition == nil {
   840  				if job.Spec.MaxFailedIndexes != nil && jobCtx.failedIndexes.total() > int(*job.Spec.MaxFailedIndexes) {
   841  					jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonMaxFailedIndexesExceeded, "Job has exceeded the specified maximal number of failed indexes", jm.clock.Now())
   842  				} else if jobCtx.failedIndexes.total() > 0 && jobCtx.failedIndexes.total()+jobCtx.succeededIndexes.total() >= int(*job.Spec.Completions) {
   843  					jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonFailedIndexes, "Job has failed indexes", jm.clock.Now())
   844  				}
   845  			}
   846  			jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)
   847  		}
   848  	}
   849  	suspendCondChanged := false
   850  	// Remove active pods if Job failed.
   851  	if jobCtx.finishedCondition != nil {
   852  		deleted, err := jm.deleteActivePods(ctx, &job, jobCtx.activePods)
   853  		if deleted != active || !satisfiedExpectations {
   854  			// Can't declare the Job as finished yet, as there might be remaining
   855  			// pod finalizers or pods that are not in the informer's cache yet.
   856  			jobCtx.finishedCondition = nil
   857  		}
   858  		active -= deleted
   859  		manageJobErr = err
   860  	} else {
   861  		manageJobCalled := false
   862  		if satisfiedExpectations && job.DeletionTimestamp == nil {
   863  			active, action, manageJobErr = jm.manageJob(ctx, &job, jobCtx)
   864  			manageJobCalled = true
   865  		}
   866  		complete := false
   867  		if job.Spec.Completions == nil {
   868  			// This type of job is complete when any pod exits with success.
   869  			// Each pod is capable of
   870  			// determining whether or not the entire Job is done.  Subsequent pods are
   871  			// not expected to fail, but if they do, the failure is ignored.  Once any
   872  			// pod succeeds, the controller waits for remaining pods to finish, and
   873  			// then the job is complete.
   874  			complete = jobCtx.succeeded > 0 && active == 0
   875  		} else {
   876  			// Job specifies a number of completions.  This type of job signals
   877  			// success by having that number of successes.  Since we do not
   878  			// start more pods than there are remaining completions, there should
   879  			// not be any remaining active pods once this count is reached.
   880  			complete = jobCtx.succeeded >= *job.Spec.Completions && active == 0
   881  		}
   882  		if complete {
   883  			jobCtx.finishedCondition = newCondition(batch.JobComplete, v1.ConditionTrue, "", "", jm.clock.Now())
   884  		} else if manageJobCalled {
   885  			// Update the conditions / emit events only if manageJob was called in
   886  			// this syncJob. Otherwise wait for the right syncJob call to make
   887  			// updates.
   888  			if job.Spec.Suspend != nil && *job.Spec.Suspend {
   889  				// Job can be in the suspended state only if it is NOT completed.
   890  				var isUpdated bool
   891  				job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionTrue, "JobSuspended", "Job suspended", jm.clock.Now())
   892  				if isUpdated {
   893  					suspendCondChanged = true
   894  					jm.recorder.Event(&job, v1.EventTypeNormal, "Suspended", "Job suspended")
   895  				}
   896  			} else {
   897  				// Job not suspended.
   898  				var isUpdated bool
   899  				job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionFalse, "JobResumed", "Job resumed", jm.clock.Now())
   900  				if isUpdated {
   901  					suspendCondChanged = true
   902  					jm.recorder.Event(&job, v1.EventTypeNormal, "Resumed", "Job resumed")
   903  					// Resumed jobs will always reset StartTime to current time. This is
   904  					// done because the ActiveDeadlineSeconds timer shouldn't go off
   905  					// whilst the Job is still suspended and resetting StartTime is
   906  					// consistent with resuming a Job created in the suspended state.
   907  					// (ActiveDeadlineSeconds is interpreted as the number of seconds a
   908  					// Job is continuously active.)
   909  					now := metav1.NewTime(jm.clock.Now())
   910  					job.Status.StartTime = &now
   911  				}
   912  			}
   913  		}
   914  	}
   915  
   916  	needsStatusUpdate := suspendCondChanged || active != job.Status.Active || !ptr.Equal(ready, job.Status.Ready)
   917  	needsStatusUpdate = needsStatusUpdate || !ptr.Equal(job.Status.Terminating, jobCtx.terminating)
   918  	job.Status.Active = active
   919  	job.Status.Ready = ready
   920  	job.Status.Terminating = jobCtx.terminating
   921  	err = jm.trackJobStatusAndRemoveFinalizers(ctx, jobCtx, needsStatusUpdate)
   922  	if err != nil {
   923  		return fmt.Errorf("tracking status: %w", err)
   924  	}
   925  
   926  	return manageJobErr
   927  }
   928  
   929  // deleteActivePods issues deletion for active Pods, preserving finalizers.
   930  // This is done through DELETE calls that set deletion timestamps.
   931  // The method trackJobStatusAndRemoveFinalizers removes the finalizers, after
   932  // which the objects can actually be deleted.
   933  // Returns number of successfully deletions issued.
   934  func (jm *Controller) deleteActivePods(ctx context.Context, job *batch.Job, pods []*v1.Pod) (int32, error) {
   935  	errCh := make(chan error, len(pods))
   936  	successfulDeletes := int32(len(pods))
   937  	wg := sync.WaitGroup{}
   938  	wg.Add(len(pods))
   939  	for i := range pods {
   940  		go func(pod *v1.Pod) {
   941  			defer wg.Done()
   942  			if err := jm.podControl.DeletePod(ctx, job.Namespace, pod.Name, job); err != nil && !apierrors.IsNotFound(err) {
   943  				atomic.AddInt32(&successfulDeletes, -1)
   944  				errCh <- err
   945  				utilruntime.HandleError(err)
   946  			}
   947  		}(pods[i])
   948  	}
   949  	wg.Wait()
   950  	return successfulDeletes, errorFromChannel(errCh)
   951  }
   952  
   953  func nonIgnoredFailedPodsCount(jobCtx *syncJobCtx, failedPods []*v1.Pod) int {
   954  	result := len(failedPods)
   955  	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
   956  		for _, p := range failedPods {
   957  			_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
   958  			if !countFailed {
   959  				result--
   960  			}
   961  		}
   962  	}
   963  	return result
   964  }
   965  
   966  // deleteJobPods deletes the pods, returns the number of successful removals
   967  // and any error.
   968  func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
   969  	errCh := make(chan error, len(pods))
   970  	successfulDeletes := int32(len(pods))
   971  	logger := klog.FromContext(ctx)
   972  
   973  	failDelete := func(pod *v1.Pod, err error) {
   974  		// Decrement the expected number of deletes because the informer won't observe this deletion
   975  		jm.expectations.DeletionObserved(logger, jobKey)
   976  		if !apierrors.IsNotFound(err) {
   977  			logger.V(2).Info("Failed to delete Pod", "job", klog.KObj(job), "pod", klog.KObj(pod), "err", err)
   978  			atomic.AddInt32(&successfulDeletes, -1)
   979  			errCh <- err
   980  			utilruntime.HandleError(err)
   981  		}
   982  	}
   983  
   984  	wg := sync.WaitGroup{}
   985  	wg.Add(len(pods))
   986  	for i := range pods {
   987  		go func(pod *v1.Pod) {
   988  			defer wg.Done()
   989  			if patch := removeTrackingFinalizerPatch(pod); patch != nil {
   990  				if err := jm.podControl.PatchPod(ctx, pod.Namespace, pod.Name, patch); err != nil {
   991  					failDelete(pod, fmt.Errorf("removing completion finalizer: %w", err))
   992  					return
   993  				}
   994  			}
   995  			if err := jm.podControl.DeletePod(ctx, job.Namespace, pod.Name, job); err != nil {
   996  				failDelete(pod, err)
   997  			}
   998  		}(pods[i])
   999  	}
  1000  	wg.Wait()
  1001  	return successfulDeletes, errorFromChannel(errCh)
  1002  }
  1003  
  1004  // trackJobStatusAndRemoveFinalizers does:
  1005  //  1. Add finished Pods to .status.uncountedTerminatedPods
  1006  //  2. Remove the finalizers from the Pods if they completed or were removed
  1007  //     or the job was removed.
  1008  //  3. Increment job counters for pods that no longer have a finalizer.
  1009  //  4. Add Complete condition if satisfied with current counters.
  1010  //
  1011  // It does this up to a limited number of Pods so that the size of .status
  1012  // doesn't grow too much and this sync doesn't starve other Jobs.
  1013  func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, jobCtx *syncJobCtx, needsFlush bool) error {
  1014  	logger := klog.FromContext(ctx)
  1015  
  1016  	isIndexed := isIndexedJob(jobCtx.job)
  1017  	var podsToRemoveFinalizer []*v1.Pod
  1018  	uncountedStatus := jobCtx.job.Status.UncountedTerminatedPods
  1019  	var newSucceededIndexes []int
  1020  	if isIndexed {
  1021  		// Sort to introduce completed Indexes in order.
  1022  		sort.Sort(byCompletionIndex(jobCtx.pods))
  1023  	}
  1024  	uidsWithFinalizer := make(sets.Set[string], len(jobCtx.pods))
  1025  	for _, p := range jobCtx.pods {
  1026  		uid := string(p.UID)
  1027  		if hasJobTrackingFinalizer(p) && !jobCtx.expectedRmFinalizers.Has(uid) {
  1028  			uidsWithFinalizer.Insert(uid)
  1029  		}
  1030  	}
  1031  
  1032  	// Shallow copy, as it will only be used to detect changes in the counters.
  1033  	oldCounters := jobCtx.job.Status
  1034  	if cleanUncountedPodsWithoutFinalizers(&jobCtx.job.Status, uidsWithFinalizer) {
  1035  		needsFlush = true
  1036  	}
  1037  	podFailureCountByPolicyAction := map[string]int{}
  1038  	reachedMaxUncountedPods := false
  1039  	for _, pod := range jobCtx.pods {
  1040  		if !hasJobTrackingFinalizer(pod) || jobCtx.expectedRmFinalizers.Has(string(pod.UID)) {
  1041  			// This pod was processed in a previous sync.
  1042  			continue
  1043  		}
  1044  		considerPodFailed := isPodFailed(pod, jobCtx.job)
  1045  		if !canRemoveFinalizer(logger, jobCtx, pod, considerPodFailed) {
  1046  			continue
  1047  		}
  1048  		podsToRemoveFinalizer = append(podsToRemoveFinalizer, pod)
  1049  		if pod.Status.Phase == v1.PodSucceeded && !jobCtx.uncounted.failed.Has(string(pod.UID)) {
  1050  			if isIndexed {
  1051  				// The completion index is enough to avoid recounting succeeded pods.
  1052  				// No need to track UIDs.
  1053  				ix := getCompletionIndex(pod.Annotations)
  1054  				if ix != unknownCompletionIndex && ix < int(*jobCtx.job.Spec.Completions) && !jobCtx.prevSucceededIndexes.has(ix) {
  1055  					newSucceededIndexes = append(newSucceededIndexes, ix)
  1056  					needsFlush = true
  1057  				}
  1058  			} else if !jobCtx.uncounted.succeeded.Has(string(pod.UID)) {
  1059  				needsFlush = true
  1060  				uncountedStatus.Succeeded = append(uncountedStatus.Succeeded, pod.UID)
  1061  			}
  1062  		} else if considerPodFailed || jobCtx.finishedCondition != nil {
  1063  			// When the job is considered finished, every non-terminated pod is considered failed
  1064  			ix := getCompletionIndex(pod.Annotations)
  1065  			if !jobCtx.uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*jobCtx.job.Spec.Completions))) {
  1066  				if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
  1067  					_, countFailed, action := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, pod)
  1068  					if action != nil {
  1069  						podFailureCountByPolicyAction[string(*action)] += 1
  1070  					}
  1071  					if countFailed {
  1072  						needsFlush = true
  1073  						uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID)
  1074  					}
  1075  				} else {
  1076  					needsFlush = true
  1077  					uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID)
  1078  				}
  1079  			}
  1080  		}
  1081  		if len(newSucceededIndexes)+len(uncountedStatus.Succeeded)+len(uncountedStatus.Failed) >= MaxUncountedPods {
  1082  			// The controller added enough Pods already to .status.uncountedTerminatedPods
  1083  			// We stop counting pods and removing finalizers here to:
  1084  			// 1. Ensure that the UIDs representation are under 20 KB.
  1085  			// 2. Cap the number of finalizer removals so that syncing of big Jobs
  1086  			//    doesn't starve smaller ones.
  1087  			//
  1088  			// The job will be synced again because the Job status and Pod updates
  1089  			// will put the Job back to the work queue.
  1090  			reachedMaxUncountedPods = true
  1091  			break
  1092  		}
  1093  	}
  1094  	if isIndexed {
  1095  		jobCtx.succeededIndexes = jobCtx.succeededIndexes.withOrderedIndexes(newSucceededIndexes)
  1096  		succeededIndexesStr := jobCtx.succeededIndexes.String()
  1097  		if succeededIndexesStr != jobCtx.job.Status.CompletedIndexes {
  1098  			needsFlush = true
  1099  		}
  1100  		jobCtx.job.Status.Succeeded = int32(jobCtx.succeededIndexes.total())
  1101  		jobCtx.job.Status.CompletedIndexes = succeededIndexesStr
  1102  		var failedIndexesStr *string
  1103  		if jobCtx.failedIndexes != nil {
  1104  			failedIndexesStr = ptr.To(jobCtx.failedIndexes.String())
  1105  		}
  1106  		if !ptr.Equal(jobCtx.job.Status.FailedIndexes, failedIndexesStr) {
  1107  			jobCtx.job.Status.FailedIndexes = failedIndexesStr
  1108  			needsFlush = true
  1109  		}
  1110  	}
  1111  	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
  1112  		if jobCtx.finishedCondition != nil && jobCtx.finishedCondition.Type == batch.JobFailureTarget {
  1113  
  1114  			// Append the interim FailureTarget condition to update the job status with before finalizers are removed.
  1115  			jobCtx.job.Status.Conditions = append(jobCtx.job.Status.Conditions, *jobCtx.finishedCondition)
  1116  			needsFlush = true
  1117  
  1118  			// Prepare the final Failed condition to update the job status with after the finalizers are removed.
  1119  			// It is also used in the enactJobFinished function for reporting.
  1120  			jobCtx.finishedCondition = newFailedConditionForFailureTarget(jobCtx.finishedCondition, jm.clock.Now())
  1121  		}
  1122  	}
  1123  	var err error
  1124  	if jobCtx.job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, jobCtx, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, podFailureCountByPolicyAction, needsFlush); err != nil {
  1125  		return err
  1126  	}
  1127  	jobFinished := !reachedMaxUncountedPods && jm.enactJobFinished(jobCtx.job, jobCtx.finishedCondition)
  1128  	if jobFinished {
  1129  		needsFlush = true
  1130  	}
  1131  	if needsFlush {
  1132  		if _, err := jm.updateStatusHandler(ctx, jobCtx.job); err != nil {
  1133  			return fmt.Errorf("removing uncounted pods from status: %w", err)
  1134  		}
  1135  		if jobFinished {
  1136  			jm.recordJobFinished(jobCtx.job, jobCtx.finishedCondition)
  1137  		}
  1138  		recordJobPodFinished(logger, jobCtx.job, oldCounters)
  1139  	}
  1140  	return nil
  1141  }
  1142  
  1143  // canRemoveFinalizer determines if the pod's finalizer can be safely removed.
  1144  // The finalizer can be removed when:
  1145  //   - the entire Job is terminating; or
  1146  //   - the pod's index is succeeded; or
  1147  //   - the Pod is considered failed, unless it's removal is delayed for the
  1148  //     purpose of transferring the JobIndexFailureCount annotations to the
  1149  //     replacement pod. the entire Job is terminating the finalizer can be
  1150  //     removed unconditionally.
  1151  func canRemoveFinalizer(logger klog.Logger, jobCtx *syncJobCtx, pod *v1.Pod, considerPodFailed bool) bool {
  1152  	if jobCtx.job.DeletionTimestamp != nil || jobCtx.finishedCondition != nil || pod.Status.Phase == v1.PodSucceeded {
  1153  		return true
  1154  	}
  1155  	if !considerPodFailed {
  1156  		return false
  1157  	}
  1158  	if hasBackoffLimitPerIndex(jobCtx.job) {
  1159  		if index := getCompletionIndex(pod.Annotations); index != unknownCompletionIndex {
  1160  			if p, ok := jobCtx.podsWithDelayedDeletionPerIndex[index]; ok && p.UID == pod.UID {
  1161  				logger.V(3).Info("Delaying pod finalizer removal to await for pod recreation within the index", "pod", klog.KObj(pod))
  1162  				return false
  1163  			}
  1164  		}
  1165  	}
  1166  	return true
  1167  }
  1168  
  1169  // flushUncountedAndRemoveFinalizers does:
  1170  //  1. flush the Job status that might include new uncounted Pod UIDs. Also flush the interim FailureTarget condition
  1171  //     if present.
  1172  //  2. perform the removal of finalizers from Pods which are in the uncounted
  1173  //     lists.
  1174  //  3. update the counters based on the Pods for which it successfully removed
  1175  //     the finalizers.
  1176  //  4. (if not all removals succeeded) flush Job status again.
  1177  //
  1178  // Returns whether there are pending changes in the Job status that need to be
  1179  // flushed in subsequent calls.
  1180  func (jm *Controller) flushUncountedAndRemoveFinalizers(ctx context.Context, jobCtx *syncJobCtx, podsToRemoveFinalizer []*v1.Pod, uidsWithFinalizer sets.Set[string], oldCounters *batch.JobStatus, podFailureCountByPolicyAction map[string]int, needsFlush bool) (*batch.Job, bool, error) {
  1181  	logger := klog.FromContext(ctx)
  1182  	var err error
  1183  	if needsFlush {
  1184  		if jobCtx.job, err = jm.updateStatusHandler(ctx, jobCtx.job); err != nil {
  1185  			return jobCtx.job, needsFlush, fmt.Errorf("adding uncounted pods to status: %w", err)
  1186  		}
  1187  
  1188  		err = jm.podBackoffStore.updateBackoffRecord(jobCtx.newBackoffRecord)
  1189  
  1190  		if err != nil {
  1191  			// this error might undercount the backoff.
  1192  			// re-syncing from the current state might not help to recover
  1193  			// the backoff information
  1194  			logger.Error(err, "Backoff update failed")
  1195  		}
  1196  
  1197  		recordJobPodFinished(logger, jobCtx.job, *oldCounters)
  1198  		// Shallow copy, as it will only be used to detect changes in the counters.
  1199  		*oldCounters = jobCtx.job.Status
  1200  		needsFlush = false
  1201  	}
  1202  	recordJobPodFailurePolicyActions(jobCtx.job, podFailureCountByPolicyAction)
  1203  
  1204  	jobKey, err := controller.KeyFunc(jobCtx.job)
  1205  	if err != nil {
  1206  		return jobCtx.job, needsFlush, fmt.Errorf("getting job key: %w", err)
  1207  	}
  1208  	var rmErr error
  1209  	if len(podsToRemoveFinalizer) > 0 {
  1210  		var rmSucceded []bool
  1211  		rmSucceded, rmErr = jm.removeTrackingFinalizerFromPods(ctx, jobKey, podsToRemoveFinalizer)
  1212  		for i, p := range podsToRemoveFinalizer {
  1213  			if rmSucceded[i] {
  1214  				uidsWithFinalizer.Delete(string(p.UID))
  1215  			}
  1216  		}
  1217  	}
  1218  	// Failed to remove some finalizers. Attempt to update the status with the
  1219  	// partial progress.
  1220  	if cleanUncountedPodsWithoutFinalizers(&jobCtx.job.Status, uidsWithFinalizer) {
  1221  		needsFlush = true
  1222  	}
  1223  	if rmErr != nil && needsFlush {
  1224  		if job, err := jm.updateStatusHandler(ctx, jobCtx.job); err != nil {
  1225  			return job, needsFlush, fmt.Errorf("removing uncounted pods from status: %w", err)
  1226  		}
  1227  	}
  1228  	return jobCtx.job, needsFlush, rmErr
  1229  }
  1230  
  1231  // cleanUncountedPodsWithoutFinalizers removes the Pod UIDs from
  1232  // .status.uncountedTerminatedPods for which the finalizer was successfully
  1233  // removed and increments the corresponding status counters.
  1234  // Returns whether there was any status change.
  1235  func cleanUncountedPodsWithoutFinalizers(status *batch.JobStatus, uidsWithFinalizer sets.Set[string]) bool {
  1236  	updated := false
  1237  	uncountedStatus := status.UncountedTerminatedPods
  1238  	newUncounted := filterInUncountedUIDs(uncountedStatus.Succeeded, uidsWithFinalizer)
  1239  	if len(newUncounted) != len(uncountedStatus.Succeeded) {
  1240  		updated = true
  1241  		status.Succeeded += int32(len(uncountedStatus.Succeeded) - len(newUncounted))
  1242  		uncountedStatus.Succeeded = newUncounted
  1243  	}
  1244  	newUncounted = filterInUncountedUIDs(uncountedStatus.Failed, uidsWithFinalizer)
  1245  	if len(newUncounted) != len(uncountedStatus.Failed) {
  1246  		updated = true
  1247  		status.Failed += int32(len(uncountedStatus.Failed) - len(newUncounted))
  1248  		uncountedStatus.Failed = newUncounted
  1249  	}
  1250  	return updated
  1251  }
  1252  
  1253  // removeTrackingFinalizerFromPods removes tracking finalizers from Pods and
  1254  // returns an array of booleans where the i-th value is true if the finalizer
  1255  // of the i-th Pod was successfully removed (if the pod was deleted when this
  1256  // function was called, it's considered as the finalizer was removed successfully).
  1257  func (jm *Controller) removeTrackingFinalizerFromPods(ctx context.Context, jobKey string, pods []*v1.Pod) ([]bool, error) {
  1258  	logger := klog.FromContext(ctx)
  1259  	errCh := make(chan error, len(pods))
  1260  	succeeded := make([]bool, len(pods))
  1261  	uids := make([]string, len(pods))
  1262  	for i, p := range pods {
  1263  		uids[i] = string(p.UID)
  1264  	}
  1265  	if jobKey != "" {
  1266  		err := jm.finalizerExpectations.expectFinalizersRemoved(logger, jobKey, uids)
  1267  		if err != nil {
  1268  			return succeeded, fmt.Errorf("setting expected removed finalizers: %w", err)
  1269  		}
  1270  	}
  1271  	wg := sync.WaitGroup{}
  1272  	wg.Add(len(pods))
  1273  	for i := range pods {
  1274  		go func(i int) {
  1275  			pod := pods[i]
  1276  			defer wg.Done()
  1277  			if patch := removeTrackingFinalizerPatch(pod); patch != nil {
  1278  				if err := jm.podControl.PatchPod(ctx, pod.Namespace, pod.Name, patch); err != nil {
  1279  					// In case of any failure, we don't expect a Pod update for the
  1280  					// finalizer removed. Clear expectation now.
  1281  					if jobKey != "" {
  1282  						jm.finalizerExpectations.finalizerRemovalObserved(logger, jobKey, string(pod.UID))
  1283  					}
  1284  					if !apierrors.IsNotFound(err) {
  1285  						errCh <- err
  1286  						utilruntime.HandleError(fmt.Errorf("removing tracking finalizer: %w", err))
  1287  						return
  1288  					}
  1289  				}
  1290  				succeeded[i] = true
  1291  			}
  1292  		}(i)
  1293  	}
  1294  	wg.Wait()
  1295  
  1296  	return succeeded, errorFromChannel(errCh)
  1297  }
  1298  
  1299  // enactJobFinished adds the Complete or Failed condition and records events.
  1300  // Returns whether the Job was considered finished.
  1301  func (jm *Controller) enactJobFinished(job *batch.Job, finishedCond *batch.JobCondition) bool {
  1302  	if finishedCond == nil {
  1303  		return false
  1304  	}
  1305  	if uncounted := job.Status.UncountedTerminatedPods; uncounted != nil {
  1306  		if len(uncounted.Succeeded) > 0 || len(uncounted.Failed) > 0 {
  1307  			return false
  1308  		}
  1309  	}
  1310  	job.Status.Conditions, _ = ensureJobConditionStatus(job.Status.Conditions, finishedCond.Type, finishedCond.Status, finishedCond.Reason, finishedCond.Message, jm.clock.Now())
  1311  	if finishedCond.Type == batch.JobComplete {
  1312  		job.Status.CompletionTime = &finishedCond.LastTransitionTime
  1313  	}
  1314  	return true
  1315  }
  1316  
  1317  // recordJobFinished records events and the job_finished_total metric for a finished job.
  1318  func (jm *Controller) recordJobFinished(job *batch.Job, finishedCond *batch.JobCondition) bool {
  1319  	completionMode := getCompletionMode(job)
  1320  	if finishedCond.Type == batch.JobComplete {
  1321  		if job.Spec.Completions != nil && job.Status.Succeeded > *job.Spec.Completions {
  1322  			jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
  1323  		}
  1324  		jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
  1325  		metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", "").Inc()
  1326  	} else {
  1327  		jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
  1328  		metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()
  1329  	}
  1330  	return true
  1331  }
  1332  
  1333  func filterInUncountedUIDs(uncounted []types.UID, include sets.Set[string]) []types.UID {
  1334  	var newUncounted []types.UID
  1335  	for _, uid := range uncounted {
  1336  		if include.Has(string(uid)) {
  1337  			newUncounted = append(newUncounted, uid)
  1338  		}
  1339  	}
  1340  	return newUncounted
  1341  }
  1342  
  1343  // newFailedConditionForFailureTarget creates a job Failed condition based on
  1344  // the interim FailureTarget condition.
  1345  func newFailedConditionForFailureTarget(condition *batch.JobCondition, now time.Time) *batch.JobCondition {
  1346  	return newCondition(batch.JobFailed, v1.ConditionTrue, condition.Reason, condition.Message, now)
  1347  }
  1348  
  1349  // pastBackoffLimitOnFailure checks if container restartCounts sum exceeds BackoffLimit
  1350  // this method applies only to pods with restartPolicy == OnFailure
  1351  func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool {
  1352  	if job.Spec.Template.Spec.RestartPolicy != v1.RestartPolicyOnFailure {
  1353  		return false
  1354  	}
  1355  	result := int32(0)
  1356  	for i := range pods {
  1357  		po := pods[i]
  1358  		if po.Status.Phase == v1.PodRunning || po.Status.Phase == v1.PodPending {
  1359  			for j := range po.Status.InitContainerStatuses {
  1360  				stat := po.Status.InitContainerStatuses[j]
  1361  				result += stat.RestartCount
  1362  			}
  1363  			for j := range po.Status.ContainerStatuses {
  1364  				stat := po.Status.ContainerStatuses[j]
  1365  				result += stat.RestartCount
  1366  			}
  1367  		}
  1368  	}
  1369  	if *job.Spec.BackoffLimit == 0 {
  1370  		return result > 0
  1371  	}
  1372  	return result >= *job.Spec.BackoffLimit
  1373  }
  1374  
  1375  // pastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if
  1376  // it is exceeded. If the job is currently suspended, the function will always
  1377  // return false.
  1378  func (jm *Controller) pastActiveDeadline(job *batch.Job) bool {
  1379  	if job.Spec.ActiveDeadlineSeconds == nil || job.Status.StartTime == nil || jobSuspended(job) {
  1380  		return false
  1381  	}
  1382  	duration := jm.clock.Since(job.Status.StartTime.Time)
  1383  	allowedDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds) * time.Second
  1384  	return duration >= allowedDuration
  1385  }
  1386  
  1387  func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatus, reason, message string, now time.Time) *batch.JobCondition {
  1388  	return &batch.JobCondition{
  1389  		Type:               conditionType,
  1390  		Status:             status,
  1391  		LastProbeTime:      metav1.NewTime(now),
  1392  		LastTransitionTime: metav1.NewTime(now),
  1393  		Reason:             reason,
  1394  		Message:            message,
  1395  	}
  1396  }
  1397  
  1398  // getFailJobMessage returns a job failure message if the job should fail with the current counters
  1399  func getFailJobMessage(job *batch.Job, pods []*v1.Pod) *string {
  1400  	if !feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) || job.Spec.PodFailurePolicy == nil {
  1401  		return nil
  1402  	}
  1403  	for _, p := range pods {
  1404  		if isPodFailed(p, job) {
  1405  			jobFailureMessage, _, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
  1406  			if jobFailureMessage != nil {
  1407  				return jobFailureMessage
  1408  			}
  1409  		}
  1410  	}
  1411  	return nil
  1412  }
  1413  
  1414  // getNewFinishedPods returns the list of newly succeeded and failed pods that are not accounted
  1415  // in the job status. The list of failed pods can be affected by the podFailurePolicy.
  1416  func getNewFinishedPods(jobCtx *syncJobCtx) (succeededPods, failedPods []*v1.Pod) {
  1417  	succeededPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Succeeded(), func(p *v1.Pod) bool {
  1418  		return p.Status.Phase == v1.PodSucceeded
  1419  	})
  1420  	failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
  1421  		return isPodFailed(p, jobCtx.job)
  1422  	})
  1423  	return succeededPods, failedPods
  1424  }
  1425  
  1426  // jobSuspended returns whether a Job is suspended while taking the feature
  1427  // gate into account.
  1428  func jobSuspended(job *batch.Job) bool {
  1429  	return job.Spec.Suspend != nil && *job.Spec.Suspend
  1430  }
  1431  
  1432  // manageJob is the core method responsible for managing the number of running
  1433  // pods according to what is specified in the job.Spec.
  1434  // Respects back-off; does not create new pods if the back-off time has not passed
  1435  // Does NOT modify <activePods>.
  1436  func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syncJobCtx) (int32, string, error) {
  1437  	logger := klog.FromContext(ctx)
  1438  	active := int32(len(jobCtx.activePods))
  1439  	parallelism := *job.Spec.Parallelism
  1440  	jobKey, err := controller.KeyFunc(job)
  1441  	if err != nil {
  1442  		utilruntime.HandleError(fmt.Errorf("Couldn't get key for job %#v: %v", job, err))
  1443  		return 0, metrics.JobSyncActionTracking, nil
  1444  	}
  1445  
  1446  	if jobSuspended(job) {
  1447  		logger.V(4).Info("Deleting all active pods in suspended job", "job", klog.KObj(job), "active", active)
  1448  		podsToDelete := activePodsForRemoval(job, jobCtx.activePods, int(active))
  1449  		jm.expectations.ExpectDeletions(logger, jobKey, len(podsToDelete))
  1450  		removed, err := jm.deleteJobPods(ctx, job, jobKey, podsToDelete)
  1451  		active -= removed
  1452  		return active, metrics.JobSyncActionPodsDeleted, err
  1453  	}
  1454  
  1455  	var terminating int32 = 0
  1456  	if onlyReplaceFailedPods(jobCtx.job) {
  1457  		// For PodFailurePolicy specified but PodReplacementPolicy disabled
  1458  		// we still need to count terminating pods for replica counts
  1459  		// But we will not allow updates to status.
  1460  		if jobCtx.terminating == nil {
  1461  			terminating = controller.CountTerminatingPods(jobCtx.pods)
  1462  		} else {
  1463  			terminating = *jobCtx.terminating
  1464  		}
  1465  	}
  1466  	wantActive := int32(0)
  1467  	if job.Spec.Completions == nil {
  1468  		// Job does not specify a number of completions.  Therefore, number active
  1469  		// should be equal to parallelism, unless the job has seen at least
  1470  		// once success, in which leave whatever is running, running.
  1471  		if jobCtx.succeeded > 0 {
  1472  			wantActive = active
  1473  		} else {
  1474  			wantActive = parallelism
  1475  		}
  1476  	} else {
  1477  		// Job specifies a specific number of completions.  Therefore, number
  1478  		// active should not ever exceed number of remaining completions.
  1479  		wantActive = *job.Spec.Completions - jobCtx.succeeded
  1480  		if wantActive > parallelism {
  1481  			wantActive = parallelism
  1482  		}
  1483  		if wantActive < 0 {
  1484  			wantActive = 0
  1485  		}
  1486  	}
  1487  
  1488  	rmAtLeast := active - wantActive
  1489  	if rmAtLeast < 0 {
  1490  		rmAtLeast = 0
  1491  	}
  1492  	podsToDelete := activePodsForRemoval(job, jobCtx.activePods, int(rmAtLeast))
  1493  	if len(podsToDelete) > MaxPodCreateDeletePerSync {
  1494  		podsToDelete = podsToDelete[:MaxPodCreateDeletePerSync]
  1495  	}
  1496  	if len(podsToDelete) > 0 {
  1497  		jm.expectations.ExpectDeletions(logger, jobKey, len(podsToDelete))
  1498  		logger.V(4).Info("Too many pods running for job", "job", klog.KObj(job), "deleted", len(podsToDelete), "target", wantActive)
  1499  		removed, err := jm.deleteJobPods(ctx, job, jobKey, podsToDelete)
  1500  		active -= removed
  1501  		// While it is possible for a Job to require both pod creations and
  1502  		// deletions at the same time (e.g. indexed Jobs with repeated indexes), we
  1503  		// restrict ourselves to either just pod deletion or pod creation in any
  1504  		// given sync cycle. Of these two, pod deletion takes precedence.
  1505  		return active, metrics.JobSyncActionPodsDeleted, err
  1506  	}
  1507  
  1508  	if diff := wantActive - terminating - active; diff > 0 {
  1509  		var remainingTime time.Duration
  1510  		if !hasBackoffLimitPerIndex(job) {
  1511  			// we compute the global remaining time for pod creation when backoffLimitPerIndex is not used
  1512  			remainingTime = jobCtx.newBackoffRecord.getRemainingTime(jm.clock, DefaultJobPodFailureBackOff, MaxJobPodFailureBackOff)
  1513  		}
  1514  		if remainingTime > 0 {
  1515  			jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
  1516  			return 0, metrics.JobSyncActionPodsCreated, nil
  1517  		}
  1518  		if diff > int32(MaxPodCreateDeletePerSync) {
  1519  			diff = int32(MaxPodCreateDeletePerSync)
  1520  		}
  1521  
  1522  		var indexesToAdd []int
  1523  		if isIndexedJob(job) {
  1524  			indexesToAdd = firstPendingIndexes(jobCtx, int(diff), int(*job.Spec.Completions))
  1525  			if hasBackoffLimitPerIndex(job) {
  1526  				indexesToAdd, remainingTime = jm.getPodCreationInfoForIndependentIndexes(logger, indexesToAdd, jobCtx.podsWithDelayedDeletionPerIndex)
  1527  				if remainingTime > 0 {
  1528  					jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
  1529  					return 0, metrics.JobSyncActionPodsCreated, nil
  1530  				}
  1531  			}
  1532  			diff = int32(len(indexesToAdd))
  1533  		}
  1534  
  1535  		jm.expectations.ExpectCreations(logger, jobKey, int(diff))
  1536  		errCh := make(chan error, diff)
  1537  		logger.V(4).Info("Too few pods running", "key", jobKey, "need", wantActive, "creating", diff)
  1538  
  1539  		wait := sync.WaitGroup{}
  1540  
  1541  		active += diff
  1542  
  1543  		podTemplate := job.Spec.Template.DeepCopy()
  1544  		if isIndexedJob(job) {
  1545  			addCompletionIndexEnvVariables(podTemplate)
  1546  		}
  1547  		podTemplate.Finalizers = appendJobCompletionFinalizerIfNotFound(podTemplate.Finalizers)
  1548  
  1549  		// Counters for pod creation status (used by the job_pods_creation_total metric)
  1550  		var creationsSucceeded, creationsFailed int32 = 0, 0
  1551  
  1552  		// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
  1553  		// and double with each successful iteration in a kind of "slow start".
  1554  		// This handles attempts to start large numbers of pods that would
  1555  		// likely all fail with the same error. For example a project with a
  1556  		// low quota that attempts to create a large number of pods will be
  1557  		// prevented from spamming the API service with the pod create requests
  1558  		// after one of its pods fails.  Conveniently, this also prevents the
  1559  		// event spam that those failures would generate.
  1560  		for batchSize := int32(integer.IntMin(int(diff), controller.SlowStartInitialBatchSize)); diff > 0; batchSize = integer.Int32Min(2*batchSize, diff) {
  1561  			errorCount := len(errCh)
  1562  			wait.Add(int(batchSize))
  1563  			for i := int32(0); i < batchSize; i++ {
  1564  				completionIndex := unknownCompletionIndex
  1565  				if len(indexesToAdd) > 0 {
  1566  					completionIndex = indexesToAdd[0]
  1567  					indexesToAdd = indexesToAdd[1:]
  1568  				}
  1569  				go func() {
  1570  					template := podTemplate
  1571  					generateName := ""
  1572  					if completionIndex != unknownCompletionIndex {
  1573  						template = podTemplate.DeepCopy()
  1574  						addCompletionIndexAnnotation(template, completionIndex)
  1575  
  1576  						if feature.DefaultFeatureGate.Enabled(features.PodIndexLabel) {
  1577  							addCompletionIndexLabel(template, completionIndex)
  1578  						}
  1579  						template.Spec.Hostname = fmt.Sprintf("%s-%d", job.Name, completionIndex)
  1580  						generateName = podGenerateNameWithIndex(job.Name, completionIndex)
  1581  						if hasBackoffLimitPerIndex(job) {
  1582  							addIndexFailureCountAnnotation(logger, template, job, jobCtx.podsWithDelayedDeletionPerIndex[completionIndex])
  1583  						}
  1584  					}
  1585  					defer wait.Done()
  1586  					err := jm.podControl.CreatePodsWithGenerateName(ctx, job.Namespace, template, job, metav1.NewControllerRef(job, controllerKind), generateName)
  1587  					if err != nil {
  1588  						if apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
  1589  							// If the namespace is being torn down, we can safely ignore
  1590  							// this error since all subsequent creations will fail.
  1591  							return
  1592  						}
  1593  					}
  1594  					if err != nil {
  1595  						defer utilruntime.HandleError(err)
  1596  						// Decrement the expected number of creates because the informer won't observe this pod
  1597  						logger.V(2).Info("Failed creation, decrementing expectations", "job", klog.KObj(job))
  1598  						jm.expectations.CreationObserved(logger, jobKey)
  1599  						atomic.AddInt32(&active, -1)
  1600  						errCh <- err
  1601  						atomic.AddInt32(&creationsFailed, 1)
  1602  					}
  1603  					atomic.AddInt32(&creationsSucceeded, 1)
  1604  				}()
  1605  			}
  1606  			wait.Wait()
  1607  			// any skipped pods that we never attempted to start shouldn't be expected.
  1608  			skippedPods := diff - batchSize
  1609  			if errorCount < len(errCh) && skippedPods > 0 {
  1610  				logger.V(2).Info("Slow-start failure. Skipping creating pods, decrementing expectations", "skippedCount", skippedPods, "job", klog.KObj(job))
  1611  				active -= skippedPods
  1612  				for i := int32(0); i < skippedPods; i++ {
  1613  					// Decrement the expected number of creates because the informer won't observe this pod
  1614  					jm.expectations.CreationObserved(logger, jobKey)
  1615  				}
  1616  				// The skipped pods will be retried later. The next controller resync will
  1617  				// retry the slow start process.
  1618  				break
  1619  			}
  1620  			diff -= batchSize
  1621  		}
  1622  		recordJobPodsCreationTotal(job, creationsSucceeded, creationsFailed)
  1623  		return active, metrics.JobSyncActionPodsCreated, errorFromChannel(errCh)
  1624  	}
  1625  
  1626  	return active, metrics.JobSyncActionTracking, nil
  1627  }
  1628  
  1629  // getPodCreationInfoForIndependentIndexes returns a sub-list of all indexes
  1630  // to create that contains those which can be already created. In case no indexes
  1631  // are ready to create pods, it returns the lowest remaining time to create pods
  1632  // out of all indexes.
  1633  func (jm *Controller) getPodCreationInfoForIndependentIndexes(logger klog.Logger, indexesToAdd []int, podsWithDelayedDeletionPerIndex map[int]*v1.Pod) ([]int, time.Duration) {
  1634  	var indexesToAddNow []int
  1635  	var minRemainingTimePerIndex *time.Duration
  1636  	for _, indexToAdd := range indexesToAdd {
  1637  		if remainingTimePerIndex := getRemainingTimePerIndex(logger, jm.clock, DefaultJobPodFailureBackOff, MaxJobPodFailureBackOff, podsWithDelayedDeletionPerIndex[indexToAdd]); remainingTimePerIndex == 0 {
  1638  			indexesToAddNow = append(indexesToAddNow, indexToAdd)
  1639  		} else if minRemainingTimePerIndex == nil || remainingTimePerIndex < *minRemainingTimePerIndex {
  1640  			minRemainingTimePerIndex = &remainingTimePerIndex
  1641  		}
  1642  	}
  1643  	if len(indexesToAddNow) > 0 {
  1644  		return indexesToAddNow, 0
  1645  	}
  1646  	return indexesToAddNow, ptr.Deref(minRemainingTimePerIndex, 0)
  1647  }
  1648  
  1649  // activePodsForRemoval returns Pods that should be removed because there
  1650  // are too many pods running or, if this is an indexed job, there are repeated
  1651  // indexes or invalid indexes or some pods don't have indexes.
  1652  // Sorts candidate pods in the order such that not-ready < ready, unscheduled
  1653  // < scheduled, and pending < running. This ensures that we delete pods
  1654  // in the earlier stages whenever possible.
  1655  func activePodsForRemoval(job *batch.Job, pods []*v1.Pod, rmAtLeast int) []*v1.Pod {
  1656  	var rm, left []*v1.Pod
  1657  
  1658  	if isIndexedJob(job) {
  1659  		rm = make([]*v1.Pod, 0, rmAtLeast)
  1660  		left = make([]*v1.Pod, 0, len(pods)-rmAtLeast)
  1661  		rm, left = appendDuplicatedIndexPodsForRemoval(rm, left, pods, int(*job.Spec.Completions))
  1662  	} else {
  1663  		left = pods
  1664  	}
  1665  
  1666  	if len(rm) < rmAtLeast {
  1667  		sort.Sort(controller.ActivePods(left))
  1668  		rm = append(rm, left[:rmAtLeast-len(rm)]...)
  1669  	}
  1670  	return rm
  1671  }
  1672  
  1673  // updateJobStatus calls the API to update the job status.
  1674  func (jm *Controller) updateJobStatus(ctx context.Context, job *batch.Job) (*batch.Job, error) {
  1675  	return jm.kubeClient.BatchV1().Jobs(job.Namespace).UpdateStatus(ctx, job, metav1.UpdateOptions{})
  1676  }
  1677  
  1678  func (jm *Controller) patchJob(ctx context.Context, job *batch.Job, data []byte) error {
  1679  	_, err := jm.kubeClient.BatchV1().Jobs(job.Namespace).Patch(
  1680  		ctx, job.Name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
  1681  	return err
  1682  }
  1683  
  1684  // getValidPodsWithFilter returns the valid pods that pass the filter.
  1685  // Pods are valid if they have a finalizer or in uncounted set
  1686  // and, for Indexed Jobs, a valid completion index.
  1687  func getValidPodsWithFilter(jobCtx *syncJobCtx, uncounted sets.Set[string], filter func(*v1.Pod) bool) []*v1.Pod {
  1688  	var result []*v1.Pod
  1689  	for _, p := range jobCtx.pods {
  1690  		uid := string(p.UID)
  1691  
  1692  		// Pods that don't have a completion finalizer are in the uncounted set or
  1693  		// have already been accounted for in the Job status.
  1694  		if !hasJobTrackingFinalizer(p) || uncounted.Has(uid) || jobCtx.expectedRmFinalizers.Has(uid) {
  1695  			continue
  1696  		}
  1697  		if isIndexedJob(jobCtx.job) {
  1698  			idx := getCompletionIndex(p.Annotations)
  1699  			if idx == unknownCompletionIndex || idx >= int(*jobCtx.job.Spec.Completions) {
  1700  				continue
  1701  			}
  1702  		}
  1703  		if filter(p) {
  1704  			result = append(result, p)
  1705  		}
  1706  	}
  1707  	return result
  1708  }
  1709  
  1710  // getCompletionMode returns string representation of the completion mode. Used as a label value for metrics.
  1711  func getCompletionMode(job *batch.Job) string {
  1712  	if isIndexedJob(job) {
  1713  		return string(batch.IndexedCompletion)
  1714  	}
  1715  	return string(batch.NonIndexedCompletion)
  1716  }
  1717  
  1718  func appendJobCompletionFinalizerIfNotFound(finalizers []string) []string {
  1719  	for _, fin := range finalizers {
  1720  		if fin == batch.JobTrackingFinalizer {
  1721  			return finalizers
  1722  		}
  1723  	}
  1724  	return append(finalizers, batch.JobTrackingFinalizer)
  1725  }
  1726  
  1727  func removeTrackingFinalizerPatch(pod *v1.Pod) []byte {
  1728  	if !hasJobTrackingFinalizer(pod) {
  1729  		return nil
  1730  	}
  1731  	patch := map[string]interface{}{
  1732  		"metadata": map[string]interface{}{
  1733  			"$deleteFromPrimitiveList/finalizers": []string{batch.JobTrackingFinalizer},
  1734  		},
  1735  	}
  1736  	patchBytes, _ := json.Marshal(patch)
  1737  	return patchBytes
  1738  }
  1739  
  1740  type uncountedTerminatedPods struct {
  1741  	succeeded sets.Set[string]
  1742  	failed    sets.Set[string]
  1743  }
  1744  
  1745  func newUncountedTerminatedPods(in batch.UncountedTerminatedPods) *uncountedTerminatedPods {
  1746  	obj := uncountedTerminatedPods{
  1747  		succeeded: make(sets.Set[string], len(in.Succeeded)),
  1748  		failed:    make(sets.Set[string], len(in.Failed)),
  1749  	}
  1750  	for _, v := range in.Succeeded {
  1751  		obj.succeeded.Insert(string(v))
  1752  	}
  1753  	for _, v := range in.Failed {
  1754  		obj.failed.Insert(string(v))
  1755  	}
  1756  	return &obj
  1757  }
  1758  
  1759  func (u *uncountedTerminatedPods) Succeeded() sets.Set[string] {
  1760  	if u == nil {
  1761  		return nil
  1762  	}
  1763  	return u.succeeded
  1764  }
  1765  
  1766  func (u *uncountedTerminatedPods) Failed() sets.Set[string] {
  1767  	if u == nil {
  1768  		return nil
  1769  	}
  1770  	return u.failed
  1771  }
  1772  
  1773  func errorFromChannel(errCh <-chan error) error {
  1774  	select {
  1775  	case err := <-errCh:
  1776  		return err
  1777  	default:
  1778  	}
  1779  	return nil
  1780  }
  1781  
  1782  // ensureJobConditionStatus appends or updates an existing job condition of the
  1783  // given type with the given status value. Note that this function will not
  1784  // append to the conditions list if the new condition's status is false
  1785  // (because going from nothing to false is meaningless); it can, however,
  1786  // update the status condition to false. The function returns a bool to let the
  1787  // caller know if the list was changed (either appended or updated).
  1788  func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditionType, status v1.ConditionStatus, reason, message string, now time.Time) ([]batch.JobCondition, bool) {
  1789  	if condition := findConditionByType(list, cType); condition != nil {
  1790  		if condition.Status != status || condition.Reason != reason || condition.Message != message {
  1791  			*condition = *newCondition(cType, status, reason, message, now)
  1792  			return list, true
  1793  		}
  1794  		return list, false
  1795  	}
  1796  	// A condition with that type doesn't exist in the list.
  1797  	if status != v1.ConditionFalse {
  1798  		return append(list, *newCondition(cType, status, reason, message, now)), true
  1799  	}
  1800  	return list, false
  1801  }
  1802  
  1803  func isPodFailed(p *v1.Pod, job *batch.Job) bool {
  1804  	if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
  1805  		// When PodDisruptionConditions is enabled, orphan Pods and unschedulable
  1806  		// terminating Pods are marked as Failed. So we only need to check the phase.
  1807  		// TODO(#113855): Stop limiting this behavior to Jobs with podFailurePolicy.
  1808  		// For now, we do so to avoid affecting all running Jobs without the
  1809  		// availability to opt-out into the old behavior.
  1810  		return p.Status.Phase == v1.PodFailed
  1811  	}
  1812  	if p.Status.Phase == v1.PodFailed {
  1813  		return true
  1814  	}
  1815  	if onlyReplaceFailedPods(job) {
  1816  		return p.Status.Phase == v1.PodFailed
  1817  	}
  1818  	// Count deleted Pods as failures to account for orphan Pods that
  1819  	// never have a chance to reach the Failed phase.
  1820  	return p.DeletionTimestamp != nil && p.Status.Phase != v1.PodSucceeded
  1821  }
  1822  
  1823  func findConditionByType(list []batch.JobCondition, cType batch.JobConditionType) *batch.JobCondition {
  1824  	for i := range list {
  1825  		if list[i].Type == cType {
  1826  			return &list[i]
  1827  		}
  1828  	}
  1829  	return nil
  1830  }
  1831  
  1832  func recordJobPodFinished(logger klog.Logger, job *batch.Job, oldCounters batch.JobStatus) {
  1833  	completionMode := completionModeStr(job)
  1834  	var diff int
  1835  
  1836  	// Updating succeeded metric must be handled differently
  1837  	// for Indexed Jobs to handle the case where the job has
  1838  	// been scaled down by reducing completions & parallelism
  1839  	// in tandem, and now a previously completed index is
  1840  	// now out of range (i.e. index >= spec.Completions).
  1841  	if isIndexedJob(job) {
  1842  		completions := int(*job.Spec.Completions)
  1843  		if job.Status.CompletedIndexes != oldCounters.CompletedIndexes {
  1844  			diff = indexesCount(logger, &job.Status.CompletedIndexes, completions) - indexesCount(logger, &oldCounters.CompletedIndexes, completions)
  1845  		}
  1846  		backoffLimitLabel := backoffLimitMetricsLabel(job)
  1847  		metrics.JobFinishedIndexesTotal.WithLabelValues(metrics.Succeeded, backoffLimitLabel).Add(float64(diff))
  1848  		if hasBackoffLimitPerIndex(job) && job.Status.FailedIndexes != oldCounters.FailedIndexes {
  1849  			if failedDiff := indexesCount(logger, job.Status.FailedIndexes, completions) - indexesCount(logger, oldCounters.FailedIndexes, completions); failedDiff > 0 {
  1850  				metrics.JobFinishedIndexesTotal.WithLabelValues(metrics.Failed, backoffLimitLabel).Add(float64(failedDiff))
  1851  			}
  1852  		}
  1853  	} else {
  1854  		diff = int(job.Status.Succeeded) - int(oldCounters.Succeeded)
  1855  	}
  1856  	metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Succeeded).Add(float64(diff))
  1857  
  1858  	// Update failed metric.
  1859  	diff = int(job.Status.Failed - oldCounters.Failed)
  1860  	metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Failed).Add(float64(diff))
  1861  }
  1862  
  1863  func indexesCount(logger klog.Logger, indexesStr *string, completions int) int {
  1864  	if indexesStr == nil {
  1865  		return 0
  1866  	}
  1867  	return parseIndexesFromString(logger, *indexesStr, completions).total()
  1868  }
  1869  
  1870  func backoffLimitMetricsLabel(job *batch.Job) string {
  1871  	if hasBackoffLimitPerIndex(job) {
  1872  		return "perIndex"
  1873  	}
  1874  	return "global"
  1875  }
  1876  
  1877  func recordJobPodFailurePolicyActions(job *batch.Job, podFailureCountByPolicyAction map[string]int) {
  1878  	for action, count := range podFailureCountByPolicyAction {
  1879  		metrics.PodFailuresHandledByFailurePolicy.WithLabelValues(action).Add(float64(count))
  1880  	}
  1881  }
  1882  
  1883  func countReadyPods(pods []*v1.Pod) int32 {
  1884  	cnt := int32(0)
  1885  	for _, p := range pods {
  1886  		if podutil.IsPodReady(p) {
  1887  			cnt++
  1888  		}
  1889  	}
  1890  	return cnt
  1891  }
  1892  
  1893  // This checks if we should apply PodReplacementPolicy.
  1894  // PodReplacementPolicy controls when we recreate pods if they are marked as terminating
  1895  // Failed means that we recreate only once the pod has terminated.
  1896  func onlyReplaceFailedPods(job *batch.Job) bool {
  1897  	// We check both PodReplacementPolicy for nil and failed
  1898  	// because it is possible that  `PodReplacementPolicy` is not defaulted,
  1899  	// when the `JobPodReplacementPolicy` feature gate is disabled for API server.
  1900  	if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && job.Spec.PodReplacementPolicy != nil && *job.Spec.PodReplacementPolicy == batch.Failed {
  1901  		return true
  1902  	}
  1903  	return feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil
  1904  }
  1905  
  1906  func (jm *Controller) cleanupPodFinalizers(job *batch.Job) {
  1907  	// Listing pods shouldn't really fail, as we are just querying the informer cache.
  1908  	selector, err := metav1.LabelSelectorAsSelector(job.Spec.Selector)
  1909  	if err != nil {
  1910  		utilruntime.HandleError(fmt.Errorf("parsing deleted job selector: %v", err))
  1911  		return
  1912  	}
  1913  	pods, _ := jm.podStore.Pods(job.Namespace).List(selector)
  1914  	for _, pod := range pods {
  1915  		if metav1.IsControlledBy(pod, job) && hasJobTrackingFinalizer(pod) {
  1916  			jm.enqueueOrphanPod(pod)
  1917  		}
  1918  	}
  1919  }
  1920  
  1921  func recordJobPodsCreationTotal(job *batch.Job, succeeded, failed int32) {
  1922  	reason := metrics.PodCreateNew
  1923  	if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
  1924  		podsTerminating := job.Status.Terminating != nil && *job.Status.Terminating > 0
  1925  		isRecreateAction := podsTerminating || job.Status.Failed > 0
  1926  		if isRecreateAction {
  1927  			reason = metrics.PodRecreateTerminatingOrFailed
  1928  			if *job.Spec.PodReplacementPolicy == batch.Failed {
  1929  				reason = metrics.PodRecreateFailed
  1930  			}
  1931  		}
  1932  	}
  1933  	if succeeded > 0 {
  1934  		metrics.JobPodsCreationTotal.WithLabelValues(reason, metrics.Succeeded).Add(float64(succeeded))
  1935  	}
  1936  	if failed > 0 {
  1937  		metrics.JobPodsCreationTotal.WithLabelValues(reason, metrics.Failed).Add(float64(failed))
  1938  	}
  1939  }