k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/job/job_controller.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package job
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"reflect"
    23  	"sort"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	batch "k8s.io/api/batch/v1"
    29  	v1 "k8s.io/api/core/v1"
    30  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	"k8s.io/apimachinery/pkg/labels"
    33  	"k8s.io/apimachinery/pkg/types"
    34  	"k8s.io/apimachinery/pkg/util/json"
    35  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    36  	"k8s.io/apimachinery/pkg/util/sets"
    37  	"k8s.io/apimachinery/pkg/util/wait"
    38  	"k8s.io/apiserver/pkg/util/feature"
    39  	batchinformers "k8s.io/client-go/informers/batch/v1"
    40  	coreinformers "k8s.io/client-go/informers/core/v1"
    41  	clientset "k8s.io/client-go/kubernetes"
    42  	"k8s.io/client-go/kubernetes/scheme"
    43  	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
    44  	batchv1listers "k8s.io/client-go/listers/batch/v1"
    45  	corelisters "k8s.io/client-go/listers/core/v1"
    46  	"k8s.io/client-go/tools/cache"
    47  	"k8s.io/client-go/tools/record"
    48  	"k8s.io/client-go/util/workqueue"
    49  	"k8s.io/klog/v2"
    50  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    51  	"k8s.io/kubernetes/pkg/controller"
    52  	"k8s.io/kubernetes/pkg/controller/job/metrics"
    53  	"k8s.io/kubernetes/pkg/controller/job/util"
    54  	"k8s.io/kubernetes/pkg/features"
    55  	"k8s.io/utils/clock"
    56  	"k8s.io/utils/ptr"
    57  )
    58  
    59  // controllerKind contains the schema.GroupVersionKind for this controller type.
    60  var controllerKind = batch.SchemeGroupVersion.WithKind("Job")
    61  
    62  var (
    63  	// syncJobBatchPeriod is the batch period for controller sync invocations for a Job.
    64  	syncJobBatchPeriod = time.Second
    65  	// DefaultJobApiBackOff is the default API backoff period. Exported for tests.
    66  	DefaultJobApiBackOff = time.Second
    67  	// MaxJobApiBackOff is the max API backoff period. Exported for tests.
    68  	MaxJobApiBackOff = time.Minute
    69  	// DefaultJobPodFailureBackOff is the default pod failure backoff period. Exported for tests.
    70  	DefaultJobPodFailureBackOff = 10 * time.Second
    71  	// MaxJobPodFailureBackOff is the max  pod failure backoff period. Exported for tests.
    72  	MaxJobPodFailureBackOff = 10 * time.Minute
    73  	// MaxUncountedPods is the maximum size the slices in
    74  	// .status.uncountedTerminatedPods should have to keep their representation
    75  	// roughly below 20 KB. Exported for tests
    76  	MaxUncountedPods = 500
    77  	// MaxPodCreateDeletePerSync is the maximum number of pods that can be
    78  	// created or deleted in a single sync call. Exported for tests.
    79  	MaxPodCreateDeletePerSync = 500
    80  )
    81  
    82  // Controller ensures that all Job objects have corresponding pods to
    83  // run their configured workload.
    84  type Controller struct {
    85  	kubeClient clientset.Interface
    86  	podControl controller.PodControlInterface
    87  
    88  	// To allow injection of the following for testing.
    89  	updateStatusHandler func(ctx context.Context, job *batch.Job) (*batch.Job, error)
    90  	patchJobHandler     func(ctx context.Context, job *batch.Job, patch []byte) error
    91  	syncHandler         func(ctx context.Context, jobKey string) error
    92  	// podStoreSynced returns true if the pod store has been synced at least once.
    93  	// Added as a member to the struct to allow injection for testing.
    94  	podStoreSynced cache.InformerSynced
    95  	// jobStoreSynced returns true if the job store has been synced at least once.
    96  	// Added as a member to the struct to allow injection for testing.
    97  	jobStoreSynced cache.InformerSynced
    98  
    99  	// A TTLCache of pod creates/deletes each rc expects to see
   100  	expectations controller.ControllerExpectationsInterface
   101  
   102  	// finalizerExpectations tracks the Pod UIDs for which the controller
   103  	// expects to observe the tracking finalizer removed.
   104  	finalizerExpectations *uidTrackingExpectations
   105  
   106  	// A store of jobs
   107  	jobLister batchv1listers.JobLister
   108  
   109  	// A store of pods, populated by the podController
   110  	podStore corelisters.PodLister
   111  
   112  	// Jobs that need to be updated
   113  	queue workqueue.TypedRateLimitingInterface[string]
   114  
   115  	// Orphan deleted pods that still have a Job tracking finalizer to be removed
   116  	orphanQueue workqueue.TypedRateLimitingInterface[string]
   117  
   118  	broadcaster record.EventBroadcaster
   119  	recorder    record.EventRecorder
   120  
   121  	clock clock.WithTicker
   122  
   123  	// Store with information to compute the expotential backoff delay for pod
   124  	// recreation in case of pod failures.
   125  	podBackoffStore *backoffStore
   126  }
   127  
   128  type syncJobCtx struct {
   129  	job                             *batch.Job
   130  	pods                            []*v1.Pod
   131  	finishedCondition               *batch.JobCondition
   132  	activePods                      []*v1.Pod
   133  	succeeded                       int32
   134  	failed                          int32
   135  	prevSucceededIndexes            orderedIntervals
   136  	succeededIndexes                orderedIntervals
   137  	failedIndexes                   *orderedIntervals
   138  	newBackoffRecord                backoffRecord
   139  	expectedRmFinalizers            sets.Set[string]
   140  	uncounted                       *uncountedTerminatedPods
   141  	podsWithDelayedDeletionPerIndex map[int]*v1.Pod
   142  	terminating                     *int32
   143  }
   144  
   145  // NewController creates a new Job controller that keeps the relevant pods
   146  // in sync with their corresponding Job objects.
   147  func NewController(ctx context.Context, podInformer coreinformers.PodInformer, jobInformer batchinformers.JobInformer, kubeClient clientset.Interface) (*Controller, error) {
   148  	return newControllerWithClock(ctx, podInformer, jobInformer, kubeClient, &clock.RealClock{})
   149  }
   150  
   151  func newControllerWithClock(ctx context.Context, podInformer coreinformers.PodInformer, jobInformer batchinformers.JobInformer, kubeClient clientset.Interface, clock clock.WithTicker) (*Controller, error) {
   152  	eventBroadcaster := record.NewBroadcaster(record.WithContext(ctx))
   153  	logger := klog.FromContext(ctx)
   154  
   155  	jm := &Controller{
   156  		kubeClient: kubeClient,
   157  		podControl: controller.RealPodControl{
   158  			KubeClient: kubeClient,
   159  			Recorder:   eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
   160  		},
   161  		expectations:          controller.NewControllerExpectations(),
   162  		finalizerExpectations: newUIDTrackingExpectations(),
   163  		queue:                 workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.NewTypedItemExponentialFailureRateLimiter[string](DefaultJobApiBackOff, MaxJobApiBackOff), workqueue.TypedRateLimitingQueueConfig[string]{Name: "job", Clock: clock}),
   164  		orphanQueue:           workqueue.NewTypedRateLimitingQueueWithConfig(workqueue.NewTypedItemExponentialFailureRateLimiter[string](DefaultJobApiBackOff, MaxJobApiBackOff), workqueue.TypedRateLimitingQueueConfig[string]{Name: "job_orphan_pod", Clock: clock}),
   165  		broadcaster:           eventBroadcaster,
   166  		recorder:              eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
   167  		clock:                 clock,
   168  		podBackoffStore:       newBackoffStore(),
   169  	}
   170  
   171  	if _, err := jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   172  		AddFunc: func(obj interface{}) {
   173  			jm.addJob(logger, obj)
   174  		},
   175  		UpdateFunc: func(oldObj, newObj interface{}) {
   176  			jm.updateJob(logger, oldObj, newObj)
   177  		},
   178  		DeleteFunc: func(obj interface{}) {
   179  			jm.deleteJob(logger, obj)
   180  		},
   181  	}); err != nil {
   182  		return nil, fmt.Errorf("adding Job event handler: %w", err)
   183  	}
   184  	jm.jobLister = jobInformer.Lister()
   185  	jm.jobStoreSynced = jobInformer.Informer().HasSynced
   186  
   187  	if _, err := podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   188  		AddFunc: func(obj interface{}) {
   189  			jm.addPod(logger, obj)
   190  		},
   191  		UpdateFunc: func(oldObj, newObj interface{}) {
   192  			jm.updatePod(logger, oldObj, newObj)
   193  		},
   194  		DeleteFunc: func(obj interface{}) {
   195  			jm.deletePod(logger, obj, true)
   196  		},
   197  	}); err != nil {
   198  		return nil, fmt.Errorf("adding Pod event handler: %w", err)
   199  	}
   200  	jm.podStore = podInformer.Lister()
   201  	jm.podStoreSynced = podInformer.Informer().HasSynced
   202  
   203  	jm.updateStatusHandler = jm.updateJobStatus
   204  	jm.patchJobHandler = jm.patchJob
   205  	jm.syncHandler = jm.syncJob
   206  
   207  	metrics.Register()
   208  
   209  	return jm, nil
   210  }
   211  
   212  // Run the main goroutine responsible for watching and syncing jobs.
   213  func (jm *Controller) Run(ctx context.Context, workers int) {
   214  	defer utilruntime.HandleCrash()
   215  	logger := klog.FromContext(ctx)
   216  
   217  	// Start events processing pipeline.
   218  	jm.broadcaster.StartStructuredLogging(3)
   219  	jm.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: jm.kubeClient.CoreV1().Events("")})
   220  	defer jm.broadcaster.Shutdown()
   221  
   222  	defer jm.queue.ShutDown()
   223  	defer jm.orphanQueue.ShutDown()
   224  
   225  	logger.Info("Starting job controller")
   226  	defer logger.Info("Shutting down job controller")
   227  
   228  	if !cache.WaitForNamedCacheSync("job", ctx.Done(), jm.podStoreSynced, jm.jobStoreSynced) {
   229  		return
   230  	}
   231  
   232  	for i := 0; i < workers; i++ {
   233  		go wait.UntilWithContext(ctx, jm.worker, time.Second)
   234  	}
   235  
   236  	go wait.UntilWithContext(ctx, jm.orphanWorker, time.Second)
   237  
   238  	<-ctx.Done()
   239  }
   240  
   241  // getPodJobs returns a list of Jobs that potentially match a Pod.
   242  func (jm *Controller) getPodJobs(pod *v1.Pod) []*batch.Job {
   243  	jobs, err := jm.jobLister.GetPodJobs(pod)
   244  	if err != nil {
   245  		return nil
   246  	}
   247  	if len(jobs) > 1 {
   248  		// ControllerRef will ensure we don't do anything crazy, but more than one
   249  		// item in this list nevertheless constitutes user error.
   250  		utilruntime.HandleError(fmt.Errorf("user error! more than one job is selecting pods with labels: %+v", pod.Labels))
   251  	}
   252  	ret := make([]*batch.Job, 0, len(jobs))
   253  	for i := range jobs {
   254  		ret = append(ret, &jobs[i])
   255  	}
   256  	return ret
   257  }
   258  
   259  // resolveControllerRef returns the controller referenced by a ControllerRef,
   260  // or nil if the ControllerRef could not be resolved to a matching controller
   261  // of the correct Kind.
   262  func (jm *Controller) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *batch.Job {
   263  	// We can't look up by UID, so look up by Name and then verify UID.
   264  	// Don't even try to look up by Name if it's the wrong Kind.
   265  	if controllerRef.Kind != controllerKind.Kind {
   266  		return nil
   267  	}
   268  	job, err := jm.jobLister.Jobs(namespace).Get(controllerRef.Name)
   269  	if err != nil {
   270  		return nil
   271  	}
   272  	if job.UID != controllerRef.UID {
   273  		// The controller we found with this Name is not the same one that the
   274  		// ControllerRef points to.
   275  		return nil
   276  	}
   277  	return job
   278  }
   279  
   280  // When a pod is created, enqueue the controller that manages it and update its expectations.
   281  func (jm *Controller) addPod(logger klog.Logger, obj interface{}) {
   282  	pod := obj.(*v1.Pod)
   283  	recordFinishedPodWithTrackingFinalizer(nil, pod)
   284  	if pod.DeletionTimestamp != nil {
   285  		// on a restart of the controller, it's possible a new pod shows up in a state that
   286  		// is already pending deletion. Prevent the pod from being a creation observation.
   287  		jm.deletePod(logger, pod, false)
   288  		return
   289  	}
   290  
   291  	// If it has a ControllerRef, that's all that matters.
   292  	if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil {
   293  		job := jm.resolveControllerRef(pod.Namespace, controllerRef)
   294  		if job == nil {
   295  			return
   296  		}
   297  		jobKey, err := controller.KeyFunc(job)
   298  		if err != nil {
   299  			return
   300  		}
   301  		jm.expectations.CreationObserved(logger, jobKey)
   302  		jm.enqueueSyncJobBatched(logger, job)
   303  		return
   304  	}
   305  
   306  	// Otherwise, it's an orphan.
   307  	// Clean the finalizer.
   308  	if hasJobTrackingFinalizer(pod) {
   309  		jm.enqueueOrphanPod(pod)
   310  	}
   311  	// Get a list of all matching controllers and sync
   312  	// them to see if anyone wants to adopt it.
   313  	// DO NOT observe creation because no controller should be waiting for an
   314  	// orphan.
   315  	for _, job := range jm.getPodJobs(pod) {
   316  		jm.enqueueSyncJobBatched(logger, job)
   317  	}
   318  }
   319  
   320  // When a pod is updated, figure out what job/s manage it and wake them up.
   321  // If the labels of the pod have changed we need to awaken both the old
   322  // and new job. old and cur must be *v1.Pod types.
   323  func (jm *Controller) updatePod(logger klog.Logger, old, cur interface{}) {
   324  	curPod := cur.(*v1.Pod)
   325  	oldPod := old.(*v1.Pod)
   326  	recordFinishedPodWithTrackingFinalizer(oldPod, curPod)
   327  	if curPod.ResourceVersion == oldPod.ResourceVersion {
   328  		// Periodic resync will send update events for all known pods.
   329  		// Two different versions of the same pod will always have different RVs.
   330  		return
   331  	}
   332  	if curPod.DeletionTimestamp != nil {
   333  		// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
   334  		// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
   335  		// for modification of the deletion timestamp and expect an job to create more pods asap, not wait
   336  		// until the kubelet actually deletes the pod.
   337  		jm.deletePod(logger, curPod, false)
   338  		return
   339  	}
   340  
   341  	// Don't check if oldPod has the finalizer, as during ownership transfer
   342  	// finalizers might be re-added and removed again in behalf of the new owner.
   343  	// If all those Pod updates collapse into a single event, the finalizer
   344  	// might be removed in oldPod and curPod. We want to record the latest
   345  	// state.
   346  	finalizerRemoved := !hasJobTrackingFinalizer(curPod)
   347  	curControllerRef := metav1.GetControllerOf(curPod)
   348  	oldControllerRef := metav1.GetControllerOf(oldPod)
   349  	controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
   350  	if controllerRefChanged && oldControllerRef != nil {
   351  		// The ControllerRef was changed. Sync the old controller, if any.
   352  		if job := jm.resolveControllerRef(oldPod.Namespace, oldControllerRef); job != nil {
   353  			if finalizerRemoved {
   354  				key, err := controller.KeyFunc(job)
   355  				if err == nil {
   356  					jm.finalizerExpectations.finalizerRemovalObserved(logger, key, string(curPod.UID))
   357  				}
   358  			}
   359  			jm.enqueueSyncJobBatched(logger, job)
   360  		}
   361  	}
   362  
   363  	// If it has a ControllerRef, that's all that matters.
   364  	if curControllerRef != nil {
   365  		job := jm.resolveControllerRef(curPod.Namespace, curControllerRef)
   366  		if job == nil {
   367  			return
   368  		}
   369  		if finalizerRemoved {
   370  			key, err := controller.KeyFunc(job)
   371  			if err == nil {
   372  				jm.finalizerExpectations.finalizerRemovalObserved(logger, key, string(curPod.UID))
   373  			}
   374  		}
   375  		jm.enqueueSyncJobBatched(logger, job)
   376  		return
   377  	}
   378  
   379  	// Otherwise, it's an orphan.
   380  	// Clean the finalizer.
   381  	if hasJobTrackingFinalizer(curPod) {
   382  		jm.enqueueOrphanPod(curPod)
   383  	}
   384  	// If anything changed, sync matching controllers
   385  	// to see if anyone wants to adopt it now.
   386  	labelChanged := !reflect.DeepEqual(curPod.Labels, oldPod.Labels)
   387  	if labelChanged || controllerRefChanged {
   388  		for _, job := range jm.getPodJobs(curPod) {
   389  			jm.enqueueSyncJobBatched(logger, job)
   390  		}
   391  	}
   392  }
   393  
   394  // When a pod is deleted, enqueue the job that manages the pod and update its expectations.
   395  // obj could be an *v1.Pod, or a DeleteFinalStateUnknown marker item.
   396  func (jm *Controller) deletePod(logger klog.Logger, obj interface{}, final bool) {
   397  	pod, ok := obj.(*v1.Pod)
   398  	if final {
   399  		recordFinishedPodWithTrackingFinalizer(pod, nil)
   400  	}
   401  
   402  	// When a delete is dropped, the relist will notice a pod in the store not
   403  	// in the list, leading to the insertion of a tombstone object which contains
   404  	// the deleted key/value. Note that this value might be stale. If the pod
   405  	// changed labels the new job will not be woken up till the periodic resync.
   406  	if !ok {
   407  		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
   408  		if !ok {
   409  			utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj))
   410  			return
   411  		}
   412  		pod, ok = tombstone.Obj.(*v1.Pod)
   413  		if !ok {
   414  			utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a pod %+v", obj))
   415  			return
   416  		}
   417  	}
   418  
   419  	controllerRef := metav1.GetControllerOf(pod)
   420  	hasFinalizer := hasJobTrackingFinalizer(pod)
   421  	if controllerRef == nil {
   422  		// No controller should care about orphans being deleted.
   423  		// But this pod might have belonged to a Job and the GC removed the reference.
   424  		if hasFinalizer {
   425  			jm.enqueueOrphanPod(pod)
   426  		}
   427  		return
   428  	}
   429  	job := jm.resolveControllerRef(pod.Namespace, controllerRef)
   430  	if job == nil || util.IsJobFinished(job) {
   431  		// syncJob will not remove this finalizer.
   432  		if hasFinalizer {
   433  			jm.enqueueOrphanPod(pod)
   434  		}
   435  		return
   436  	}
   437  	jobKey, err := controller.KeyFunc(job)
   438  	if err != nil {
   439  		return
   440  	}
   441  	jm.expectations.DeletionObserved(logger, jobKey)
   442  
   443  	// Consider the finalizer removed if this is the final delete. Otherwise,
   444  	// it's an update for the deletion timestamp, then check finalizer.
   445  	if final || !hasFinalizer {
   446  		jm.finalizerExpectations.finalizerRemovalObserved(logger, jobKey, string(pod.UID))
   447  	}
   448  
   449  	jm.enqueueSyncJobBatched(logger, job)
   450  }
   451  
   452  func (jm *Controller) addJob(logger klog.Logger, obj interface{}) {
   453  	jm.enqueueSyncJobImmediately(logger, obj)
   454  	jobObj, ok := obj.(*batch.Job)
   455  	if !ok {
   456  		return
   457  	}
   458  	if controllerName := managedByExternalController(jobObj); controllerName != nil {
   459  		metrics.JobByExternalControllerTotal.WithLabelValues(*controllerName).Inc()
   460  	}
   461  }
   462  
   463  func (jm *Controller) updateJob(logger klog.Logger, old, cur interface{}) {
   464  	oldJob := old.(*batch.Job)
   465  	curJob := cur.(*batch.Job)
   466  
   467  	// never return error
   468  	key, err := controller.KeyFunc(curJob)
   469  	if err != nil {
   470  		return
   471  	}
   472  
   473  	if curJob.Generation == oldJob.Generation {
   474  		// Delay the Job sync when no generation change to batch Job status updates,
   475  		// typically triggered by pod events.
   476  		jm.enqueueSyncJobBatched(logger, curJob)
   477  	} else {
   478  		// Trigger immediate sync when spec is changed.
   479  		jm.enqueueSyncJobImmediately(logger, curJob)
   480  	}
   481  
   482  	// The job shouldn't be marked as finished until all pod finalizers are removed.
   483  	// This is a backup operation in this case.
   484  	if util.IsJobFinished(curJob) {
   485  		jm.cleanupPodFinalizers(curJob)
   486  	}
   487  
   488  	// check if need to add a new rsync for ActiveDeadlineSeconds
   489  	if curJob.Status.StartTime != nil {
   490  		curADS := curJob.Spec.ActiveDeadlineSeconds
   491  		if curADS == nil {
   492  			return
   493  		}
   494  		oldADS := oldJob.Spec.ActiveDeadlineSeconds
   495  		if oldADS == nil || *oldADS != *curADS {
   496  			passed := jm.clock.Since(curJob.Status.StartTime.Time)
   497  			total := time.Duration(*curADS) * time.Second
   498  			// AddAfter will handle total < passed
   499  			jm.queue.AddAfter(key, total-passed)
   500  			logger.V(4).Info("job's ActiveDeadlineSeconds updated, will rsync", "key", key, "interval", total-passed)
   501  		}
   502  	}
   503  }
   504  
   505  // deleteJob enqueues the job and all the pods associated with it that still
   506  // have a finalizer.
   507  func (jm *Controller) deleteJob(logger klog.Logger, obj interface{}) {
   508  	jm.enqueueSyncJobImmediately(logger, obj)
   509  	jobObj, ok := obj.(*batch.Job)
   510  	if !ok {
   511  		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
   512  		if !ok {
   513  			utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj))
   514  			return
   515  		}
   516  		jobObj, ok = tombstone.Obj.(*batch.Job)
   517  		if !ok {
   518  			utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a job %+v", obj))
   519  			return
   520  		}
   521  	}
   522  	jm.cleanupPodFinalizers(jobObj)
   523  }
   524  
   525  // enqueueSyncJobImmediately tells the Job controller to invoke syncJob
   526  // immediately.
   527  // It is only used for Job events (creation, deletion, spec update).
   528  // obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item.
   529  func (jm *Controller) enqueueSyncJobImmediately(logger klog.Logger, obj interface{}) {
   530  	jm.enqueueSyncJobInternal(logger, obj, 0)
   531  }
   532  
   533  // enqueueSyncJobBatched tells the controller to invoke syncJob with a
   534  // constant batching delay.
   535  // It is used for:
   536  // - Pod events (creation, deletion, update)
   537  // - Job status update
   538  // obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item.
   539  func (jm *Controller) enqueueSyncJobBatched(logger klog.Logger, obj interface{}) {
   540  	jm.enqueueSyncJobInternal(logger, obj, syncJobBatchPeriod)
   541  }
   542  
   543  // enqueueSyncJobWithDelay tells the controller to invoke syncJob with a
   544  // custom delay, but not smaller than the batching delay.
   545  // It is used when pod recreations are delayed due to pod failures.
   546  // obj could be an *batch.Job, or a DeletionFinalStateUnknown marker item.
   547  func (jm *Controller) enqueueSyncJobWithDelay(logger klog.Logger, obj interface{}, delay time.Duration) {
   548  	if delay < syncJobBatchPeriod {
   549  		delay = syncJobBatchPeriod
   550  	}
   551  	jm.enqueueSyncJobInternal(logger, obj, delay)
   552  }
   553  
   554  func (jm *Controller) enqueueSyncJobInternal(logger klog.Logger, obj interface{}, delay time.Duration) {
   555  	key, err := controller.KeyFunc(obj)
   556  	if err != nil {
   557  		utilruntime.HandleError(fmt.Errorf("Couldn't get key for object %+v: %v", obj, err))
   558  		return
   559  	}
   560  
   561  	// TODO: Handle overlapping controllers better. Either disallow them at admission time or
   562  	// deterministically avoid syncing controllers that fight over pods. Currently, we only
   563  	// ensure that the same controller is synced for a given pod. When we periodically relist
   564  	// all controllers there will still be some replica instability. One way to handle this is
   565  	// by querying the store for all controllers that this rc overlaps, as well as all
   566  	// controllers that overlap this rc, and sorting them.
   567  	logger.Info("enqueueing job", "key", key, "delay", delay)
   568  	jm.queue.AddAfter(key, delay)
   569  }
   570  
   571  func (jm *Controller) enqueueOrphanPod(obj *v1.Pod) {
   572  	key, err := controller.KeyFunc(obj)
   573  	if err != nil {
   574  		utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err))
   575  		return
   576  	}
   577  	jm.orphanQueue.Add(key)
   578  }
   579  
   580  // worker runs a worker thread that just dequeues items, processes them, and marks them done.
   581  // It enforces that the syncHandler is never invoked concurrently with the same key.
   582  func (jm *Controller) worker(ctx context.Context) {
   583  	for jm.processNextWorkItem(ctx) {
   584  	}
   585  }
   586  
   587  func (jm *Controller) processNextWorkItem(ctx context.Context) bool {
   588  	key, quit := jm.queue.Get()
   589  	if quit {
   590  		return false
   591  	}
   592  	defer jm.queue.Done(key)
   593  
   594  	err := jm.syncHandler(ctx, key)
   595  	if err == nil {
   596  		jm.queue.Forget(key)
   597  		return true
   598  	}
   599  
   600  	utilruntime.HandleError(fmt.Errorf("syncing job: %w", err))
   601  	jm.queue.AddRateLimited(key)
   602  
   603  	return true
   604  }
   605  
   606  func (jm *Controller) orphanWorker(ctx context.Context) {
   607  	for jm.processNextOrphanPod(ctx) {
   608  	}
   609  }
   610  
   611  func (jm *Controller) processNextOrphanPod(ctx context.Context) bool {
   612  	key, quit := jm.orphanQueue.Get()
   613  	if quit {
   614  		return false
   615  	}
   616  	defer jm.orphanQueue.Done(key)
   617  	err := jm.syncOrphanPod(ctx, key)
   618  	if err != nil {
   619  		utilruntime.HandleError(fmt.Errorf("Error syncing orphan pod: %v", err))
   620  		jm.orphanQueue.AddRateLimited(key)
   621  	} else {
   622  		jm.orphanQueue.Forget(key)
   623  	}
   624  
   625  	return true
   626  }
   627  
   628  // syncOrphanPod removes the tracking finalizer from an orphan pod if found.
   629  func (jm *Controller) syncOrphanPod(ctx context.Context, key string) error {
   630  	startTime := jm.clock.Now()
   631  	logger := klog.FromContext(ctx)
   632  	defer func() {
   633  		logger.V(4).Info("Finished syncing orphan pod", "pod", key, "elapsed", jm.clock.Since(startTime))
   634  	}()
   635  
   636  	ns, name, err := cache.SplitMetaNamespaceKey(key)
   637  	if err != nil {
   638  		return err
   639  	}
   640  
   641  	sharedPod, err := jm.podStore.Pods(ns).Get(name)
   642  	if err != nil {
   643  		if apierrors.IsNotFound(err) {
   644  			logger.V(4).Info("Orphan pod has been deleted", "pod", key)
   645  			return nil
   646  		}
   647  		return err
   648  	}
   649  	// Make sure the pod is still orphaned.
   650  	if controllerRef := metav1.GetControllerOf(sharedPod); controllerRef != nil {
   651  		if controllerRef.Kind != controllerKind.Kind || controllerRef.APIVersion != batch.SchemeGroupVersion.String() {
   652  			// The pod is controlled by an owner that is not a batch/v1 Job. Do not remove finalizer.
   653  			return nil
   654  		}
   655  		job := jm.resolveControllerRef(sharedPod.Namespace, controllerRef)
   656  		if job != nil {
   657  			// Skip cleanup of finalizers for pods owned by a job managed by an external controller
   658  			if controllerName := managedByExternalController(job); controllerName != nil {
   659  				logger.V(2).Info("Skip cleanup of the job finalizer for a pod owned by a job that is managed by an external controller", "key", key, "podUID", sharedPod.UID, "jobUID", job.UID, "controllerName", controllerName)
   660  				return nil
   661  			}
   662  		}
   663  		if job != nil && !util.IsJobFinished(job) {
   664  			// The pod was adopted. Do not remove finalizer.
   665  			return nil
   666  		}
   667  	}
   668  	if patch := removeTrackingFinalizerPatch(sharedPod); patch != nil {
   669  		if err := jm.podControl.PatchPod(ctx, ns, name, patch); err != nil && !apierrors.IsNotFound(err) {
   670  			return err
   671  		}
   672  	}
   673  	return nil
   674  }
   675  
   676  // getPodsForJob returns the set of pods that this Job should manage.
   677  // It also reconciles ControllerRef by adopting/orphaning, adding tracking
   678  // finalizers.
   679  // Note that the returned Pods are pointers into the cache.
   680  func (jm *Controller) getPodsForJob(ctx context.Context, j *batch.Job) ([]*v1.Pod, error) {
   681  	selector, err := metav1.LabelSelectorAsSelector(j.Spec.Selector)
   682  	if err != nil {
   683  		return nil, fmt.Errorf("couldn't convert Job selector: %v", err)
   684  	}
   685  	// List all pods to include those that don't match the selector anymore
   686  	// but have a ControllerRef pointing to this controller.
   687  	pods, err := jm.podStore.Pods(j.Namespace).List(labels.Everything())
   688  	if err != nil {
   689  		return nil, err
   690  	}
   691  	// If any adoptions are attempted, we should first recheck for deletion
   692  	// with an uncached quorum read sometime after listing Pods (see #42639).
   693  	canAdoptFunc := controller.RecheckDeletionTimestamp(func(ctx context.Context) (metav1.Object, error) {
   694  		fresh, err := jm.kubeClient.BatchV1().Jobs(j.Namespace).Get(ctx, j.Name, metav1.GetOptions{})
   695  		if err != nil {
   696  			return nil, err
   697  		}
   698  		if fresh.UID != j.UID {
   699  			return nil, fmt.Errorf("original Job %v/%v is gone: got uid %v, wanted %v", j.Namespace, j.Name, fresh.UID, j.UID)
   700  		}
   701  		return fresh, nil
   702  	})
   703  	cm := controller.NewPodControllerRefManager(jm.podControl, j, selector, controllerKind, canAdoptFunc, batch.JobTrackingFinalizer)
   704  	// When adopting Pods, this operation adds an ownerRef and finalizers.
   705  	pods, err = cm.ClaimPods(ctx, pods)
   706  	if err != nil {
   707  		return pods, err
   708  	}
   709  	// Set finalizer on adopted pods for the remaining calculations.
   710  	for i, p := range pods {
   711  		adopted := true
   712  		for _, r := range p.OwnerReferences {
   713  			if r.UID == j.UID {
   714  				adopted = false
   715  				break
   716  			}
   717  		}
   718  		if adopted && !hasJobTrackingFinalizer(p) {
   719  			pods[i] = p.DeepCopy()
   720  			pods[i].Finalizers = append(p.Finalizers, batch.JobTrackingFinalizer)
   721  		}
   722  	}
   723  	return pods, err
   724  }
   725  
   726  // syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
   727  // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
   728  // concurrently with the same key.
   729  func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
   730  	startTime := jm.clock.Now()
   731  	logger := klog.FromContext(ctx)
   732  	defer func() {
   733  		logger.V(4).Info("Finished syncing job", "key", key, "elapsed", jm.clock.Since(startTime))
   734  	}()
   735  
   736  	ns, name, err := cache.SplitMetaNamespaceKey(key)
   737  	if err != nil {
   738  		return err
   739  	}
   740  	if len(ns) == 0 || len(name) == 0 {
   741  		return fmt.Errorf("invalid job key %q: either namespace or name is missing", key)
   742  	}
   743  	sharedJob, err := jm.jobLister.Jobs(ns).Get(name)
   744  	if err != nil {
   745  		if apierrors.IsNotFound(err) {
   746  			logger.V(4).Info("Job has been deleted", "key", key)
   747  			jm.expectations.DeleteExpectations(logger, key)
   748  			jm.finalizerExpectations.deleteExpectations(logger, key)
   749  
   750  			err := jm.podBackoffStore.removeBackoffRecord(key)
   751  			if err != nil {
   752  				// re-syncing here as the record has to be removed for finished/deleted jobs
   753  				return fmt.Errorf("error removing backoff record %w", err)
   754  			}
   755  			return nil
   756  		}
   757  		return err
   758  	}
   759  
   760  	// Skip syncing of the job it is managed by another controller.
   761  	// We cannot rely solely on skipping of queueing such jobs for synchronization,
   762  	// because it is possible a synchronization task is queued for a job, without
   763  	// the managedBy field, but the job is quickly replaced by another job with
   764  	// the field. Then, the syncJob might be invoked for a job with the field.
   765  	if controllerName := managedByExternalController(sharedJob); controllerName != nil {
   766  		logger.V(2).Info("Skip syncing the job as it is managed by an external controller", "key", key, "uid", sharedJob.UID, "controllerName", controllerName)
   767  		return nil
   768  	}
   769  
   770  	// make a copy so we don't mutate the shared cache
   771  	job := *sharedJob.DeepCopy()
   772  
   773  	// if job was finished previously, we don't want to redo the termination
   774  	if util.IsJobFinished(&job) {
   775  		err := jm.podBackoffStore.removeBackoffRecord(key)
   776  		if err != nil {
   777  			// re-syncing here as the record has to be removed for finished/deleted jobs
   778  			return fmt.Errorf("error removing backoff record %w", err)
   779  		}
   780  		return nil
   781  	}
   782  
   783  	if job.Spec.CompletionMode != nil && *job.Spec.CompletionMode != batch.NonIndexedCompletion && *job.Spec.CompletionMode != batch.IndexedCompletion {
   784  		jm.recorder.Event(&job, v1.EventTypeWarning, "UnknownCompletionMode", "Skipped Job sync because completion mode is unknown")
   785  		return nil
   786  	}
   787  
   788  	completionMode := getCompletionMode(&job)
   789  	action := metrics.JobSyncActionReconciling
   790  
   791  	defer func() {
   792  		result := "success"
   793  		if rErr != nil {
   794  			result = "error"
   795  		}
   796  
   797  		metrics.JobSyncDurationSeconds.WithLabelValues(completionMode, result, action).Observe(jm.clock.Since(startTime).Seconds())
   798  		metrics.JobSyncNum.WithLabelValues(completionMode, result, action).Inc()
   799  	}()
   800  
   801  	if job.Status.UncountedTerminatedPods == nil {
   802  		job.Status.UncountedTerminatedPods = &batch.UncountedTerminatedPods{}
   803  	}
   804  
   805  	// Check the expectations of the job before counting active pods, otherwise a new pod can sneak in
   806  	// and update the expectations after we've retrieved active pods from the store. If a new pod enters
   807  	// the store after we've checked the expectation, the job sync is just deferred till the next relist.
   808  	satisfiedExpectations := jm.expectations.SatisfiedExpectations(logger, key)
   809  
   810  	pods, err := jm.getPodsForJob(ctx, &job)
   811  	if err != nil {
   812  		return err
   813  	}
   814  	var terminating *int32
   815  	if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
   816  		terminating = ptr.To(controller.CountTerminatingPods(pods))
   817  	}
   818  	jobCtx := &syncJobCtx{
   819  		job:                  &job,
   820  		pods:                 pods,
   821  		activePods:           controller.FilterActivePods(logger, pods),
   822  		terminating:          terminating,
   823  		uncounted:            newUncountedTerminatedPods(*job.Status.UncountedTerminatedPods),
   824  		expectedRmFinalizers: jm.finalizerExpectations.getExpectedUIDs(key),
   825  	}
   826  	active := int32(len(jobCtx.activePods))
   827  	newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
   828  	jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
   829  	jobCtx.failed = job.Status.Failed + int32(nonIgnoredFailedPodsCount(jobCtx, newFailedPods)) + int32(len(jobCtx.uncounted.failed))
   830  	var ready *int32
   831  	if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
   832  		ready = ptr.To(countReadyPods(jobCtx.activePods))
   833  	}
   834  
   835  	// Job first start. Set StartTime only if the job is not in the suspended state.
   836  	if job.Status.StartTime == nil && !jobSuspended(&job) {
   837  		now := metav1.NewTime(jm.clock.Now())
   838  		job.Status.StartTime = &now
   839  	}
   840  
   841  	jobCtx.newBackoffRecord = jm.podBackoffStore.newBackoffRecord(key, newSucceededPods, newFailedPods)
   842  
   843  	var manageJobErr error
   844  
   845  	exceedsBackoffLimit := jobCtx.failed > *job.Spec.BackoffLimit
   846  	jobCtx.finishedCondition = hasSuccessCriteriaMetCondition(&job)
   847  
   848  	// Given that the Job already has the SuccessCriteriaMet condition, the termination condition already had confirmed in another cycle.
   849  	// So, the job-controller evaluates the podFailurePolicy only when the Job doesn't have the SuccessCriteriaMet condition.
   850  	if jobCtx.finishedCondition == nil && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
   851  		if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil {
   852  			jobCtx.finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition, jm.clock.Now())
   853  		} else if failJobMessage := getFailJobMessage(&job, pods); failJobMessage != nil {
   854  			// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.
   855  			jobCtx.finishedCondition = newCondition(batch.JobFailureTarget, v1.ConditionTrue, batch.JobReasonPodFailurePolicy, *failJobMessage, jm.clock.Now())
   856  		}
   857  	}
   858  	if jobCtx.finishedCondition == nil {
   859  		if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
   860  			// check if the number of pod restart exceeds backoff (for restart OnFailure only)
   861  			// OR if the number of failed jobs increased since the last syncJob
   862  			jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonBackoffLimitExceeded, "Job has reached the specified backoff limit", jm.clock.Now())
   863  		} else if jm.pastActiveDeadline(&job) {
   864  			jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonDeadlineExceeded, "Job was active longer than specified deadline", jm.clock.Now())
   865  		} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
   866  			syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - jm.clock.Since(job.Status.StartTime.Time)
   867  			logger.V(2).Info("Job has activeDeadlineSeconds configuration. Will sync this job again", "key", key, "nextSyncIn", syncDuration)
   868  			jm.queue.AddAfter(key, syncDuration)
   869  		}
   870  	}
   871  
   872  	if isIndexedJob(&job) {
   873  		jobCtx.prevSucceededIndexes, jobCtx.succeededIndexes = calculateSucceededIndexes(logger, &job, pods)
   874  		jobCtx.succeeded = int32(jobCtx.succeededIndexes.total())
   875  		if hasBackoffLimitPerIndex(&job) {
   876  			jobCtx.failedIndexes = calculateFailedIndexes(logger, &job, pods)
   877  			if jobCtx.finishedCondition == nil {
   878  				if job.Spec.MaxFailedIndexes != nil && jobCtx.failedIndexes.total() > int(*job.Spec.MaxFailedIndexes) {
   879  					jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonMaxFailedIndexesExceeded, "Job has exceeded the specified maximal number of failed indexes", jm.clock.Now())
   880  				} else if jobCtx.failedIndexes.total() > 0 && jobCtx.failedIndexes.total()+jobCtx.succeededIndexes.total() >= int(*job.Spec.Completions) {
   881  					jobCtx.finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, batch.JobReasonFailedIndexes, "Job has failed indexes", jm.clock.Now())
   882  				}
   883  			}
   884  			jobCtx.podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex(logger, jobCtx)
   885  		}
   886  		if jobCtx.finishedCondition == nil && hasSuccessCriteriaMetCondition(jobCtx.job) == nil {
   887  			if msg, met := matchSuccessPolicy(logger, job.Spec.SuccessPolicy, *job.Spec.Completions, jobCtx.succeededIndexes); met {
   888  				jobCtx.finishedCondition = newCondition(batch.JobSuccessCriteriaMet, v1.ConditionTrue, batch.JobReasonSuccessPolicy, msg, jm.clock.Now())
   889  			}
   890  		}
   891  	}
   892  	suspendCondChanged := false
   893  	// Remove active pods if Job failed.
   894  	if jobCtx.finishedCondition != nil {
   895  		deleted, err := jm.deleteActivePods(ctx, &job, jobCtx.activePods)
   896  		if deleted != active || !satisfiedExpectations {
   897  			// Can't declare the Job as finished yet, as there might be remaining
   898  			// pod finalizers or pods that are not in the informer's cache yet.
   899  			jobCtx.finishedCondition = nil
   900  		}
   901  		active -= deleted
   902  		manageJobErr = err
   903  	} else {
   904  		manageJobCalled := false
   905  		if satisfiedExpectations && job.DeletionTimestamp == nil {
   906  			active, action, manageJobErr = jm.manageJob(ctx, &job, jobCtx)
   907  			manageJobCalled = true
   908  		}
   909  		complete := false
   910  		if job.Spec.Completions == nil {
   911  			// This type of job is complete when any pod exits with success.
   912  			// Each pod is capable of
   913  			// determining whether or not the entire Job is done.  Subsequent pods are
   914  			// not expected to fail, but if they do, the failure is ignored.  Once any
   915  			// pod succeeds, the controller waits for remaining pods to finish, and
   916  			// then the job is complete.
   917  			complete = jobCtx.succeeded > 0 && active == 0
   918  		} else {
   919  			// Job specifies a number of completions.  This type of job signals
   920  			// success by having that number of successes.  Since we do not
   921  			// start more pods than there are remaining completions, there should
   922  			// not be any remaining active pods once this count is reached.
   923  			complete = jobCtx.succeeded >= *job.Spec.Completions && active == 0
   924  		}
   925  		if complete {
   926  			jobCtx.finishedCondition = newCondition(batch.JobComplete, v1.ConditionTrue, "", "", jm.clock.Now())
   927  		} else if manageJobCalled {
   928  			// Update the conditions / emit events only if manageJob was called in
   929  			// this syncJob. Otherwise wait for the right syncJob call to make
   930  			// updates.
   931  			if job.Spec.Suspend != nil && *job.Spec.Suspend {
   932  				// Job can be in the suspended state only if it is NOT completed.
   933  				var isUpdated bool
   934  				job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionTrue, "JobSuspended", "Job suspended", jm.clock.Now())
   935  				if isUpdated {
   936  					suspendCondChanged = true
   937  					jm.recorder.Event(&job, v1.EventTypeNormal, "Suspended", "Job suspended")
   938  				}
   939  			} else {
   940  				// Job not suspended.
   941  				var isUpdated bool
   942  				job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionFalse, "JobResumed", "Job resumed", jm.clock.Now())
   943  				if isUpdated {
   944  					suspendCondChanged = true
   945  					jm.recorder.Event(&job, v1.EventTypeNormal, "Resumed", "Job resumed")
   946  					// Resumed jobs will always reset StartTime to current time. This is
   947  					// done because the ActiveDeadlineSeconds timer shouldn't go off
   948  					// whilst the Job is still suspended and resetting StartTime is
   949  					// consistent with resuming a Job created in the suspended state.
   950  					// (ActiveDeadlineSeconds is interpreted as the number of seconds a
   951  					// Job is continuously active.)
   952  					now := metav1.NewTime(jm.clock.Now())
   953  					job.Status.StartTime = &now
   954  				}
   955  			}
   956  		}
   957  	}
   958  
   959  	needsStatusUpdate := suspendCondChanged || active != job.Status.Active || !ptr.Equal(ready, job.Status.Ready)
   960  	needsStatusUpdate = needsStatusUpdate || !ptr.Equal(job.Status.Terminating, jobCtx.terminating)
   961  	job.Status.Active = active
   962  	job.Status.Ready = ready
   963  	job.Status.Terminating = jobCtx.terminating
   964  	err = jm.trackJobStatusAndRemoveFinalizers(ctx, jobCtx, needsStatusUpdate)
   965  	if err != nil {
   966  		return fmt.Errorf("tracking status: %w", err)
   967  	}
   968  
   969  	return manageJobErr
   970  }
   971  
   972  // deleteActivePods issues deletion for active Pods, preserving finalizers.
   973  // This is done through DELETE calls that set deletion timestamps.
   974  // The method trackJobStatusAndRemoveFinalizers removes the finalizers, after
   975  // which the objects can actually be deleted.
   976  // Returns number of successfully deletions issued.
   977  func (jm *Controller) deleteActivePods(ctx context.Context, job *batch.Job, pods []*v1.Pod) (int32, error) {
   978  	errCh := make(chan error, len(pods))
   979  	successfulDeletes := int32(len(pods))
   980  	wg := sync.WaitGroup{}
   981  	wg.Add(len(pods))
   982  	for i := range pods {
   983  		go func(pod *v1.Pod) {
   984  			defer wg.Done()
   985  			if err := jm.podControl.DeletePod(ctx, job.Namespace, pod.Name, job); err != nil && !apierrors.IsNotFound(err) {
   986  				atomic.AddInt32(&successfulDeletes, -1)
   987  				errCh <- err
   988  				utilruntime.HandleError(err)
   989  			}
   990  		}(pods[i])
   991  	}
   992  	wg.Wait()
   993  	return successfulDeletes, errorFromChannel(errCh)
   994  }
   995  
   996  func nonIgnoredFailedPodsCount(jobCtx *syncJobCtx, failedPods []*v1.Pod) int {
   997  	result := len(failedPods)
   998  	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
   999  		for _, p := range failedPods {
  1000  			_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
  1001  			if !countFailed {
  1002  				result--
  1003  			}
  1004  		}
  1005  	}
  1006  	return result
  1007  }
  1008  
  1009  // deleteJobPods deletes the pods, returns the number of successful removals
  1010  // and any error.
  1011  func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
  1012  	errCh := make(chan error, len(pods))
  1013  	successfulDeletes := int32(len(pods))
  1014  	logger := klog.FromContext(ctx)
  1015  
  1016  	failDelete := func(pod *v1.Pod, err error) {
  1017  		// Decrement the expected number of deletes because the informer won't observe this deletion
  1018  		jm.expectations.DeletionObserved(logger, jobKey)
  1019  		if !apierrors.IsNotFound(err) {
  1020  			logger.V(2).Info("Failed to delete Pod", "job", klog.KObj(job), "pod", klog.KObj(pod), "err", err)
  1021  			atomic.AddInt32(&successfulDeletes, -1)
  1022  			errCh <- err
  1023  			utilruntime.HandleError(err)
  1024  		}
  1025  	}
  1026  
  1027  	wg := sync.WaitGroup{}
  1028  	wg.Add(len(pods))
  1029  	for i := range pods {
  1030  		go func(pod *v1.Pod) {
  1031  			defer wg.Done()
  1032  			if patch := removeTrackingFinalizerPatch(pod); patch != nil {
  1033  				if err := jm.podControl.PatchPod(ctx, pod.Namespace, pod.Name, patch); err != nil {
  1034  					failDelete(pod, fmt.Errorf("removing completion finalizer: %w", err))
  1035  					return
  1036  				}
  1037  			}
  1038  			if err := jm.podControl.DeletePod(ctx, job.Namespace, pod.Name, job); err != nil {
  1039  				failDelete(pod, err)
  1040  			}
  1041  		}(pods[i])
  1042  	}
  1043  	wg.Wait()
  1044  	return successfulDeletes, errorFromChannel(errCh)
  1045  }
  1046  
  1047  // trackJobStatusAndRemoveFinalizers does:
  1048  //  1. Add finished Pods to .status.uncountedTerminatedPods
  1049  //  2. Remove the finalizers from the Pods if they completed or were removed
  1050  //     or the job was removed.
  1051  //  3. Increment job counters for pods that no longer have a finalizer.
  1052  //  4. Add Complete condition if satisfied with current counters.
  1053  //
  1054  // It does this up to a limited number of Pods so that the size of .status
  1055  // doesn't grow too much and this sync doesn't starve other Jobs.
  1056  func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, jobCtx *syncJobCtx, needsFlush bool) error {
  1057  	logger := klog.FromContext(ctx)
  1058  
  1059  	isIndexed := isIndexedJob(jobCtx.job)
  1060  	var podsToRemoveFinalizer []*v1.Pod
  1061  	uncountedStatus := jobCtx.job.Status.UncountedTerminatedPods
  1062  	var newSucceededIndexes []int
  1063  	if isIndexed {
  1064  		// Sort to introduce completed Indexes in order.
  1065  		sort.Sort(byCompletionIndex(jobCtx.pods))
  1066  	}
  1067  	uidsWithFinalizer := make(sets.Set[string], len(jobCtx.pods))
  1068  	for _, p := range jobCtx.pods {
  1069  		uid := string(p.UID)
  1070  		if hasJobTrackingFinalizer(p) && !jobCtx.expectedRmFinalizers.Has(uid) {
  1071  			uidsWithFinalizer.Insert(uid)
  1072  		}
  1073  	}
  1074  
  1075  	// Shallow copy, as it will only be used to detect changes in the counters.
  1076  	oldCounters := jobCtx.job.Status
  1077  	if cleanUncountedPodsWithoutFinalizers(&jobCtx.job.Status, uidsWithFinalizer) {
  1078  		needsFlush = true
  1079  	}
  1080  	podFailureCountByPolicyAction := map[string]int{}
  1081  	reachedMaxUncountedPods := false
  1082  	for _, pod := range jobCtx.pods {
  1083  		if !hasJobTrackingFinalizer(pod) || jobCtx.expectedRmFinalizers.Has(string(pod.UID)) {
  1084  			// This pod was processed in a previous sync.
  1085  			continue
  1086  		}
  1087  		considerPodFailed := isPodFailed(pod, jobCtx.job)
  1088  		if !canRemoveFinalizer(logger, jobCtx, pod, considerPodFailed) {
  1089  			continue
  1090  		}
  1091  		podsToRemoveFinalizer = append(podsToRemoveFinalizer, pod)
  1092  		if pod.Status.Phase == v1.PodSucceeded && !jobCtx.uncounted.failed.Has(string(pod.UID)) {
  1093  			if isIndexed {
  1094  				// The completion index is enough to avoid recounting succeeded pods.
  1095  				// No need to track UIDs.
  1096  				ix := getCompletionIndex(pod.Annotations)
  1097  				if ix != unknownCompletionIndex && ix < int(*jobCtx.job.Spec.Completions) && !jobCtx.prevSucceededIndexes.has(ix) {
  1098  					newSucceededIndexes = append(newSucceededIndexes, ix)
  1099  					needsFlush = true
  1100  				}
  1101  			} else if !jobCtx.uncounted.succeeded.Has(string(pod.UID)) {
  1102  				needsFlush = true
  1103  				uncountedStatus.Succeeded = append(uncountedStatus.Succeeded, pod.UID)
  1104  			}
  1105  		} else if considerPodFailed || (jobCtx.finishedCondition != nil && !isSuccessCriteriaMetCondition(jobCtx.finishedCondition)) {
  1106  			// When the job is considered finished, every non-terminated pod is considered failed.
  1107  			ix := getCompletionIndex(pod.Annotations)
  1108  			if !jobCtx.uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*jobCtx.job.Spec.Completions))) {
  1109  				if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
  1110  					_, countFailed, action := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, pod)
  1111  					if action != nil {
  1112  						podFailureCountByPolicyAction[string(*action)] += 1
  1113  					}
  1114  					if countFailed {
  1115  						needsFlush = true
  1116  						uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID)
  1117  					}
  1118  				} else {
  1119  					needsFlush = true
  1120  					uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID)
  1121  				}
  1122  			}
  1123  		}
  1124  		if len(newSucceededIndexes)+len(uncountedStatus.Succeeded)+len(uncountedStatus.Failed) >= MaxUncountedPods {
  1125  			// The controller added enough Pods already to .status.uncountedTerminatedPods
  1126  			// We stop counting pods and removing finalizers here to:
  1127  			// 1. Ensure that the UIDs representation are under 20 KB.
  1128  			// 2. Cap the number of finalizer removals so that syncing of big Jobs
  1129  			//    doesn't starve smaller ones.
  1130  			//
  1131  			// The job will be synced again because the Job status and Pod updates
  1132  			// will put the Job back to the work queue.
  1133  			reachedMaxUncountedPods = true
  1134  			break
  1135  		}
  1136  	}
  1137  	if isIndexed {
  1138  		jobCtx.succeededIndexes = jobCtx.succeededIndexes.withOrderedIndexes(newSucceededIndexes)
  1139  		succeededIndexesStr := jobCtx.succeededIndexes.String()
  1140  		if succeededIndexesStr != jobCtx.job.Status.CompletedIndexes {
  1141  			needsFlush = true
  1142  		}
  1143  		jobCtx.job.Status.Succeeded = int32(jobCtx.succeededIndexes.total())
  1144  		jobCtx.job.Status.CompletedIndexes = succeededIndexesStr
  1145  		var failedIndexesStr *string
  1146  		if jobCtx.failedIndexes != nil {
  1147  			failedIndexesStr = ptr.To(jobCtx.failedIndexes.String())
  1148  		}
  1149  		if !ptr.Equal(jobCtx.job.Status.FailedIndexes, failedIndexesStr) {
  1150  			jobCtx.job.Status.FailedIndexes = failedIndexesStr
  1151  			needsFlush = true
  1152  		}
  1153  	}
  1154  	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
  1155  		if jobCtx.finishedCondition != nil && jobCtx.finishedCondition.Type == batch.JobFailureTarget {
  1156  
  1157  			// Append the interim FailureTarget condition to update the job status with before finalizers are removed.
  1158  			jobCtx.job.Status.Conditions = append(jobCtx.job.Status.Conditions, *jobCtx.finishedCondition)
  1159  			needsFlush = true
  1160  
  1161  			// Prepare the final Failed condition to update the job status with after the finalizers are removed.
  1162  			// It is also used in the enactJobFinished function for reporting.
  1163  			jobCtx.finishedCondition = newFailedConditionForFailureTarget(jobCtx.finishedCondition, jm.clock.Now())
  1164  		}
  1165  	}
  1166  	if isSuccessCriteriaMetCondition(jobCtx.finishedCondition) {
  1167  		// Append the interim SuccessCriteriaMet condition to update the job status with before finalizers are removed.
  1168  		if hasSuccessCriteriaMetCondition(jobCtx.job) == nil {
  1169  			jobCtx.job.Status.Conditions = append(jobCtx.job.Status.Conditions, *jobCtx.finishedCondition)
  1170  			needsFlush = true
  1171  		}
  1172  
  1173  		// Prepare the final Complete condition to update the job status with after the finalizers are removed.
  1174  		// It is also used in the enactJobFinished function for reporting.
  1175  		jobCtx.finishedCondition = newCondition(batch.JobComplete, v1.ConditionTrue, jobCtx.finishedCondition.Reason, jobCtx.finishedCondition.Message, jm.clock.Now())
  1176  	}
  1177  	var err error
  1178  	if jobCtx.job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, jobCtx, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, podFailureCountByPolicyAction, needsFlush); err != nil {
  1179  		return err
  1180  	}
  1181  	jobFinished := !reachedMaxUncountedPods && jm.enactJobFinished(jobCtx.job, jobCtx.finishedCondition)
  1182  	if jobFinished {
  1183  		needsFlush = true
  1184  	}
  1185  	if needsFlush {
  1186  		if _, err := jm.updateStatusHandler(ctx, jobCtx.job); err != nil {
  1187  			return fmt.Errorf("removing uncounted pods from status: %w", err)
  1188  		}
  1189  		if jobFinished {
  1190  			jm.recordJobFinished(jobCtx.job, jobCtx.finishedCondition)
  1191  		}
  1192  		recordJobPodFinished(logger, jobCtx.job, oldCounters)
  1193  	}
  1194  	return nil
  1195  }
  1196  
  1197  // canRemoveFinalizer determines if the pod's finalizer can be safely removed.
  1198  // The finalizer can be removed when:
  1199  //   - the entire Job is terminating; or
  1200  //   - the pod's index is succeeded; or
  1201  //   - the Pod is considered failed, unless it's removal is delayed for the
  1202  //     purpose of transferring the JobIndexFailureCount annotations to the
  1203  //     replacement pod. the entire Job is terminating the finalizer can be
  1204  //     removed unconditionally; or
  1205  //   - the Job met successPolicy.
  1206  func canRemoveFinalizer(logger klog.Logger, jobCtx *syncJobCtx, pod *v1.Pod, considerPodFailed bool) bool {
  1207  	if jobCtx.job.DeletionTimestamp != nil || jobCtx.finishedCondition != nil || pod.Status.Phase == v1.PodSucceeded {
  1208  		return true
  1209  	}
  1210  	if !considerPodFailed {
  1211  		return false
  1212  	}
  1213  	if hasBackoffLimitPerIndex(jobCtx.job) {
  1214  		if index := getCompletionIndex(pod.Annotations); index != unknownCompletionIndex {
  1215  			if p, ok := jobCtx.podsWithDelayedDeletionPerIndex[index]; ok && p.UID == pod.UID {
  1216  				logger.V(3).Info("Delaying pod finalizer removal to await for pod recreation within the index", "pod", klog.KObj(pod))
  1217  				return false
  1218  			}
  1219  		}
  1220  	}
  1221  	return true
  1222  }
  1223  
  1224  // flushUncountedAndRemoveFinalizers does:
  1225  //  1. flush the Job status that might include new uncounted Pod UIDs.
  1226  //     Also flush the interim FailureTarget and SuccessCriteriaMet conditions if present.
  1227  //  2. perform the removal of finalizers from Pods which are in the uncounted
  1228  //     lists.
  1229  //  3. update the counters based on the Pods for which it successfully removed
  1230  //     the finalizers.
  1231  //  4. (if not all removals succeeded) flush Job status again.
  1232  //
  1233  // Returns whether there are pending changes in the Job status that need to be
  1234  // flushed in subsequent calls.
  1235  func (jm *Controller) flushUncountedAndRemoveFinalizers(ctx context.Context, jobCtx *syncJobCtx, podsToRemoveFinalizer []*v1.Pod, uidsWithFinalizer sets.Set[string], oldCounters *batch.JobStatus, podFailureCountByPolicyAction map[string]int, needsFlush bool) (*batch.Job, bool, error) {
  1236  	logger := klog.FromContext(ctx)
  1237  	var err error
  1238  	if needsFlush {
  1239  		if jobCtx.job, err = jm.updateStatusHandler(ctx, jobCtx.job); err != nil {
  1240  			return jobCtx.job, needsFlush, fmt.Errorf("adding uncounted pods to status: %w", err)
  1241  		}
  1242  
  1243  		err = jm.podBackoffStore.updateBackoffRecord(jobCtx.newBackoffRecord)
  1244  
  1245  		if err != nil {
  1246  			// this error might undercount the backoff.
  1247  			// re-syncing from the current state might not help to recover
  1248  			// the backoff information
  1249  			logger.Error(err, "Backoff update failed")
  1250  		}
  1251  
  1252  		recordJobPodFinished(logger, jobCtx.job, *oldCounters)
  1253  		// Shallow copy, as it will only be used to detect changes in the counters.
  1254  		*oldCounters = jobCtx.job.Status
  1255  		needsFlush = false
  1256  	}
  1257  	recordJobPodFailurePolicyActions(jobCtx.job, podFailureCountByPolicyAction)
  1258  
  1259  	jobKey, err := controller.KeyFunc(jobCtx.job)
  1260  	if err != nil {
  1261  		return jobCtx.job, needsFlush, fmt.Errorf("getting job key: %w", err)
  1262  	}
  1263  	var rmErr error
  1264  	if len(podsToRemoveFinalizer) > 0 {
  1265  		var rmSucceded []bool
  1266  		rmSucceded, rmErr = jm.removeTrackingFinalizerFromPods(ctx, jobKey, podsToRemoveFinalizer)
  1267  		for i, p := range podsToRemoveFinalizer {
  1268  			if rmSucceded[i] {
  1269  				uidsWithFinalizer.Delete(string(p.UID))
  1270  			}
  1271  		}
  1272  	}
  1273  	// Failed to remove some finalizers. Attempt to update the status with the
  1274  	// partial progress.
  1275  	if cleanUncountedPodsWithoutFinalizers(&jobCtx.job.Status, uidsWithFinalizer) {
  1276  		needsFlush = true
  1277  	}
  1278  	if rmErr != nil && needsFlush {
  1279  		if job, err := jm.updateStatusHandler(ctx, jobCtx.job); err != nil {
  1280  			return job, needsFlush, fmt.Errorf("removing uncounted pods from status: %w", err)
  1281  		}
  1282  	}
  1283  	return jobCtx.job, needsFlush, rmErr
  1284  }
  1285  
  1286  // cleanUncountedPodsWithoutFinalizers removes the Pod UIDs from
  1287  // .status.uncountedTerminatedPods for which the finalizer was successfully
  1288  // removed and increments the corresponding status counters.
  1289  // Returns whether there was any status change.
  1290  func cleanUncountedPodsWithoutFinalizers(status *batch.JobStatus, uidsWithFinalizer sets.Set[string]) bool {
  1291  	updated := false
  1292  	uncountedStatus := status.UncountedTerminatedPods
  1293  	newUncounted := filterInUncountedUIDs(uncountedStatus.Succeeded, uidsWithFinalizer)
  1294  	if len(newUncounted) != len(uncountedStatus.Succeeded) {
  1295  		updated = true
  1296  		status.Succeeded += int32(len(uncountedStatus.Succeeded) - len(newUncounted))
  1297  		uncountedStatus.Succeeded = newUncounted
  1298  	}
  1299  	newUncounted = filterInUncountedUIDs(uncountedStatus.Failed, uidsWithFinalizer)
  1300  	if len(newUncounted) != len(uncountedStatus.Failed) {
  1301  		updated = true
  1302  		status.Failed += int32(len(uncountedStatus.Failed) - len(newUncounted))
  1303  		uncountedStatus.Failed = newUncounted
  1304  	}
  1305  	return updated
  1306  }
  1307  
  1308  // removeTrackingFinalizerFromPods removes tracking finalizers from Pods and
  1309  // returns an array of booleans where the i-th value is true if the finalizer
  1310  // of the i-th Pod was successfully removed (if the pod was deleted when this
  1311  // function was called, it's considered as the finalizer was removed successfully).
  1312  func (jm *Controller) removeTrackingFinalizerFromPods(ctx context.Context, jobKey string, pods []*v1.Pod) ([]bool, error) {
  1313  	logger := klog.FromContext(ctx)
  1314  	errCh := make(chan error, len(pods))
  1315  	succeeded := make([]bool, len(pods))
  1316  	uids := make([]string, len(pods))
  1317  	for i, p := range pods {
  1318  		uids[i] = string(p.UID)
  1319  	}
  1320  	if jobKey != "" {
  1321  		err := jm.finalizerExpectations.expectFinalizersRemoved(logger, jobKey, uids)
  1322  		if err != nil {
  1323  			return succeeded, fmt.Errorf("setting expected removed finalizers: %w", err)
  1324  		}
  1325  	}
  1326  	wg := sync.WaitGroup{}
  1327  	wg.Add(len(pods))
  1328  	for i := range pods {
  1329  		go func(i int) {
  1330  			pod := pods[i]
  1331  			defer wg.Done()
  1332  			if patch := removeTrackingFinalizerPatch(pod); patch != nil {
  1333  				if err := jm.podControl.PatchPod(ctx, pod.Namespace, pod.Name, patch); err != nil {
  1334  					// In case of any failure, we don't expect a Pod update for the
  1335  					// finalizer removed. Clear expectation now.
  1336  					if jobKey != "" {
  1337  						jm.finalizerExpectations.finalizerRemovalObserved(logger, jobKey, string(pod.UID))
  1338  					}
  1339  					if !apierrors.IsNotFound(err) {
  1340  						errCh <- err
  1341  						utilruntime.HandleError(fmt.Errorf("removing tracking finalizer: %w", err))
  1342  						return
  1343  					}
  1344  				}
  1345  				succeeded[i] = true
  1346  			}
  1347  		}(i)
  1348  	}
  1349  	wg.Wait()
  1350  
  1351  	return succeeded, errorFromChannel(errCh)
  1352  }
  1353  
  1354  // enactJobFinished adds the Complete or Failed condition and records events.
  1355  // Returns whether the Job was considered finished.
  1356  func (jm *Controller) enactJobFinished(job *batch.Job, finishedCond *batch.JobCondition) bool {
  1357  	if finishedCond == nil {
  1358  		return false
  1359  	}
  1360  	if uncounted := job.Status.UncountedTerminatedPods; uncounted != nil {
  1361  		if len(uncounted.Succeeded) > 0 || len(uncounted.Failed) > 0 {
  1362  			return false
  1363  		}
  1364  	}
  1365  	job.Status.Conditions, _ = ensureJobConditionStatus(job.Status.Conditions, finishedCond.Type, finishedCond.Status, finishedCond.Reason, finishedCond.Message, jm.clock.Now())
  1366  	if finishedCond.Type == batch.JobComplete {
  1367  		job.Status.CompletionTime = &finishedCond.LastTransitionTime
  1368  	}
  1369  	return true
  1370  }
  1371  
  1372  // recordJobFinished records events and the job_finished_total metric for a finished job.
  1373  func (jm *Controller) recordJobFinished(job *batch.Job, finishedCond *batch.JobCondition) bool {
  1374  	completionMode := getCompletionMode(job)
  1375  	if finishedCond.Type == batch.JobComplete {
  1376  		if job.Spec.Completions != nil && job.Status.Succeeded > *job.Spec.Completions {
  1377  			jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
  1378  		}
  1379  		jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
  1380  		metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", "").Inc()
  1381  	} else {
  1382  		jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
  1383  		metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()
  1384  	}
  1385  	return true
  1386  }
  1387  
  1388  func filterInUncountedUIDs(uncounted []types.UID, include sets.Set[string]) []types.UID {
  1389  	var newUncounted []types.UID
  1390  	for _, uid := range uncounted {
  1391  		if include.Has(string(uid)) {
  1392  			newUncounted = append(newUncounted, uid)
  1393  		}
  1394  	}
  1395  	return newUncounted
  1396  }
  1397  
  1398  // newFailedConditionForFailureTarget creates a job Failed condition based on
  1399  // the interim FailureTarget condition.
  1400  func newFailedConditionForFailureTarget(condition *batch.JobCondition, now time.Time) *batch.JobCondition {
  1401  	return newCondition(batch.JobFailed, v1.ConditionTrue, condition.Reason, condition.Message, now)
  1402  }
  1403  
  1404  // pastBackoffLimitOnFailure checks if container restartCounts sum exceeds BackoffLimit
  1405  // this method applies only to pods with restartPolicy == OnFailure
  1406  func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool {
  1407  	if job.Spec.Template.Spec.RestartPolicy != v1.RestartPolicyOnFailure {
  1408  		return false
  1409  	}
  1410  	result := int32(0)
  1411  	for i := range pods {
  1412  		po := pods[i]
  1413  		if po.Status.Phase == v1.PodRunning || po.Status.Phase == v1.PodPending {
  1414  			for j := range po.Status.InitContainerStatuses {
  1415  				stat := po.Status.InitContainerStatuses[j]
  1416  				result += stat.RestartCount
  1417  			}
  1418  			for j := range po.Status.ContainerStatuses {
  1419  				stat := po.Status.ContainerStatuses[j]
  1420  				result += stat.RestartCount
  1421  			}
  1422  		}
  1423  	}
  1424  	if *job.Spec.BackoffLimit == 0 {
  1425  		return result > 0
  1426  	}
  1427  	return result >= *job.Spec.BackoffLimit
  1428  }
  1429  
  1430  // pastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if
  1431  // it is exceeded. If the job is currently suspended, the function will always
  1432  // return false.
  1433  func (jm *Controller) pastActiveDeadline(job *batch.Job) bool {
  1434  	if job.Spec.ActiveDeadlineSeconds == nil || job.Status.StartTime == nil || jobSuspended(job) {
  1435  		return false
  1436  	}
  1437  	duration := jm.clock.Since(job.Status.StartTime.Time)
  1438  	allowedDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds) * time.Second
  1439  	return duration >= allowedDuration
  1440  }
  1441  
  1442  func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatus, reason, message string, now time.Time) *batch.JobCondition {
  1443  	return &batch.JobCondition{
  1444  		Type:               conditionType,
  1445  		Status:             status,
  1446  		LastProbeTime:      metav1.NewTime(now),
  1447  		LastTransitionTime: metav1.NewTime(now),
  1448  		Reason:             reason,
  1449  		Message:            message,
  1450  	}
  1451  }
  1452  
  1453  // getFailJobMessage returns a job failure message if the job should fail with the current counters
  1454  func getFailJobMessage(job *batch.Job, pods []*v1.Pod) *string {
  1455  	if !feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) || job.Spec.PodFailurePolicy == nil {
  1456  		return nil
  1457  	}
  1458  	for _, p := range pods {
  1459  		if isPodFailed(p, job) {
  1460  			jobFailureMessage, _, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
  1461  			if jobFailureMessage != nil {
  1462  				return jobFailureMessage
  1463  			}
  1464  		}
  1465  	}
  1466  	return nil
  1467  }
  1468  
  1469  // getNewFinishedPods returns the list of newly succeeded and failed pods that are not accounted
  1470  // in the job status. The list of failed pods can be affected by the podFailurePolicy.
  1471  func getNewFinishedPods(jobCtx *syncJobCtx) (succeededPods, failedPods []*v1.Pod) {
  1472  	succeededPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Succeeded(), func(p *v1.Pod) bool {
  1473  		return p.Status.Phase == v1.PodSucceeded
  1474  	})
  1475  	failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
  1476  		return isPodFailed(p, jobCtx.job)
  1477  	})
  1478  	return succeededPods, failedPods
  1479  }
  1480  
  1481  // jobSuspended returns whether a Job is suspended while taking the feature
  1482  // gate into account.
  1483  func jobSuspended(job *batch.Job) bool {
  1484  	return job.Spec.Suspend != nil && *job.Spec.Suspend
  1485  }
  1486  
  1487  // manageJob is the core method responsible for managing the number of running
  1488  // pods according to what is specified in the job.Spec.
  1489  // Respects back-off; does not create new pods if the back-off time has not passed
  1490  // Does NOT modify <activePods>.
  1491  func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syncJobCtx) (int32, string, error) {
  1492  	logger := klog.FromContext(ctx)
  1493  	active := int32(len(jobCtx.activePods))
  1494  	parallelism := *job.Spec.Parallelism
  1495  	jobKey, err := controller.KeyFunc(job)
  1496  	if err != nil {
  1497  		utilruntime.HandleError(fmt.Errorf("Couldn't get key for job %#v: %v", job, err))
  1498  		return 0, metrics.JobSyncActionTracking, nil
  1499  	}
  1500  
  1501  	if jobSuspended(job) {
  1502  		logger.V(4).Info("Deleting all active pods in suspended job", "job", klog.KObj(job), "active", active)
  1503  		podsToDelete := activePodsForRemoval(job, jobCtx.activePods, int(active))
  1504  		jm.expectations.ExpectDeletions(logger, jobKey, len(podsToDelete))
  1505  		removed, err := jm.deleteJobPods(ctx, job, jobKey, podsToDelete)
  1506  		active -= removed
  1507  		return active, metrics.JobSyncActionPodsDeleted, err
  1508  	}
  1509  
  1510  	var terminating int32 = 0
  1511  	if onlyReplaceFailedPods(jobCtx.job) {
  1512  		// For PodFailurePolicy specified but PodReplacementPolicy disabled
  1513  		// we still need to count terminating pods for replica counts
  1514  		// But we will not allow updates to status.
  1515  		if jobCtx.terminating == nil {
  1516  			terminating = controller.CountTerminatingPods(jobCtx.pods)
  1517  		} else {
  1518  			terminating = *jobCtx.terminating
  1519  		}
  1520  	}
  1521  	wantActive := int32(0)
  1522  	if job.Spec.Completions == nil {
  1523  		// Job does not specify a number of completions.  Therefore, number active
  1524  		// should be equal to parallelism, unless the job has seen at least
  1525  		// once success, in which leave whatever is running, running.
  1526  		if jobCtx.succeeded > 0 {
  1527  			wantActive = active
  1528  		} else {
  1529  			wantActive = parallelism
  1530  		}
  1531  	} else {
  1532  		// Job specifies a specific number of completions.  Therefore, number
  1533  		// active should not ever exceed number of remaining completions.
  1534  		wantActive = *job.Spec.Completions - jobCtx.succeeded
  1535  		if wantActive > parallelism {
  1536  			wantActive = parallelism
  1537  		}
  1538  		if wantActive < 0 {
  1539  			wantActive = 0
  1540  		}
  1541  	}
  1542  
  1543  	rmAtLeast := active - wantActive
  1544  	if rmAtLeast < 0 {
  1545  		rmAtLeast = 0
  1546  	}
  1547  	podsToDelete := activePodsForRemoval(job, jobCtx.activePods, int(rmAtLeast))
  1548  	if len(podsToDelete) > MaxPodCreateDeletePerSync {
  1549  		podsToDelete = podsToDelete[:MaxPodCreateDeletePerSync]
  1550  	}
  1551  	if len(podsToDelete) > 0 {
  1552  		jm.expectations.ExpectDeletions(logger, jobKey, len(podsToDelete))
  1553  		logger.V(4).Info("Too many pods running for job", "job", klog.KObj(job), "deleted", len(podsToDelete), "target", wantActive)
  1554  		removed, err := jm.deleteJobPods(ctx, job, jobKey, podsToDelete)
  1555  		active -= removed
  1556  		// While it is possible for a Job to require both pod creations and
  1557  		// deletions at the same time (e.g. indexed Jobs with repeated indexes), we
  1558  		// restrict ourselves to either just pod deletion or pod creation in any
  1559  		// given sync cycle. Of these two, pod deletion takes precedence.
  1560  		return active, metrics.JobSyncActionPodsDeleted, err
  1561  	}
  1562  
  1563  	if diff := wantActive - terminating - active; diff > 0 {
  1564  		var remainingTime time.Duration
  1565  		if !hasBackoffLimitPerIndex(job) {
  1566  			// we compute the global remaining time for pod creation when backoffLimitPerIndex is not used
  1567  			remainingTime = jobCtx.newBackoffRecord.getRemainingTime(jm.clock, DefaultJobPodFailureBackOff, MaxJobPodFailureBackOff)
  1568  		}
  1569  		if remainingTime > 0 {
  1570  			jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
  1571  			return 0, metrics.JobSyncActionPodsCreated, nil
  1572  		}
  1573  		if diff > int32(MaxPodCreateDeletePerSync) {
  1574  			diff = int32(MaxPodCreateDeletePerSync)
  1575  		}
  1576  
  1577  		var indexesToAdd []int
  1578  		if isIndexedJob(job) {
  1579  			indexesToAdd = firstPendingIndexes(jobCtx, int(diff), int(*job.Spec.Completions))
  1580  			if hasBackoffLimitPerIndex(job) {
  1581  				indexesToAdd, remainingTime = jm.getPodCreationInfoForIndependentIndexes(logger, indexesToAdd, jobCtx.podsWithDelayedDeletionPerIndex)
  1582  				if remainingTime > 0 {
  1583  					jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
  1584  					return 0, metrics.JobSyncActionPodsCreated, nil
  1585  				}
  1586  			}
  1587  			diff = int32(len(indexesToAdd))
  1588  		}
  1589  
  1590  		jm.expectations.ExpectCreations(logger, jobKey, int(diff))
  1591  		errCh := make(chan error, diff)
  1592  		logger.V(4).Info("Too few pods running", "key", jobKey, "need", wantActive, "creating", diff)
  1593  
  1594  		wait := sync.WaitGroup{}
  1595  
  1596  		active += diff
  1597  
  1598  		podTemplate := job.Spec.Template.DeepCopy()
  1599  		if isIndexedJob(job) {
  1600  			addCompletionIndexEnvVariables(podTemplate)
  1601  		}
  1602  		podTemplate.Finalizers = appendJobCompletionFinalizerIfNotFound(podTemplate.Finalizers)
  1603  
  1604  		// Counters for pod creation status (used by the job_pods_creation_total metric)
  1605  		var creationsSucceeded, creationsFailed int32 = 0, 0
  1606  
  1607  		// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
  1608  		// and double with each successful iteration in a kind of "slow start".
  1609  		// This handles attempts to start large numbers of pods that would
  1610  		// likely all fail with the same error. For example a project with a
  1611  		// low quota that attempts to create a large number of pods will be
  1612  		// prevented from spamming the API service with the pod create requests
  1613  		// after one of its pods fails.  Conveniently, this also prevents the
  1614  		// event spam that those failures would generate.
  1615  		for batchSize := min(diff, int32(controller.SlowStartInitialBatchSize)); diff > 0; batchSize = min(2*batchSize, diff) {
  1616  			errorCount := len(errCh)
  1617  			wait.Add(int(batchSize))
  1618  			for i := int32(0); i < batchSize; i++ {
  1619  				completionIndex := unknownCompletionIndex
  1620  				if len(indexesToAdd) > 0 {
  1621  					completionIndex = indexesToAdd[0]
  1622  					indexesToAdd = indexesToAdd[1:]
  1623  				}
  1624  				go func() {
  1625  					template := podTemplate
  1626  					generateName := ""
  1627  					if completionIndex != unknownCompletionIndex {
  1628  						template = podTemplate.DeepCopy()
  1629  						addCompletionIndexAnnotation(template, completionIndex)
  1630  
  1631  						if feature.DefaultFeatureGate.Enabled(features.PodIndexLabel) {
  1632  							addCompletionIndexLabel(template, completionIndex)
  1633  						}
  1634  						template.Spec.Hostname = fmt.Sprintf("%s-%d", job.Name, completionIndex)
  1635  						generateName = podGenerateNameWithIndex(job.Name, completionIndex)
  1636  						if hasBackoffLimitPerIndex(job) {
  1637  							addIndexFailureCountAnnotation(logger, template, job, jobCtx.podsWithDelayedDeletionPerIndex[completionIndex])
  1638  						}
  1639  					}
  1640  					defer wait.Done()
  1641  					err := jm.podControl.CreatePodsWithGenerateName(ctx, job.Namespace, template, job, metav1.NewControllerRef(job, controllerKind), generateName)
  1642  					if err != nil {
  1643  						if apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
  1644  							// If the namespace is being torn down, we can safely ignore
  1645  							// this error since all subsequent creations will fail.
  1646  							return
  1647  						}
  1648  					}
  1649  					if err != nil {
  1650  						defer utilruntime.HandleError(err)
  1651  						// Decrement the expected number of creates because the informer won't observe this pod
  1652  						logger.V(2).Info("Failed creation, decrementing expectations", "job", klog.KObj(job))
  1653  						jm.expectations.CreationObserved(logger, jobKey)
  1654  						atomic.AddInt32(&active, -1)
  1655  						errCh <- err
  1656  						atomic.AddInt32(&creationsFailed, 1)
  1657  					}
  1658  					atomic.AddInt32(&creationsSucceeded, 1)
  1659  				}()
  1660  			}
  1661  			wait.Wait()
  1662  			// any skipped pods that we never attempted to start shouldn't be expected.
  1663  			skippedPods := diff - batchSize
  1664  			if errorCount < len(errCh) && skippedPods > 0 {
  1665  				logger.V(2).Info("Slow-start failure. Skipping creating pods, decrementing expectations", "skippedCount", skippedPods, "job", klog.KObj(job))
  1666  				active -= skippedPods
  1667  				for i := int32(0); i < skippedPods; i++ {
  1668  					// Decrement the expected number of creates because the informer won't observe this pod
  1669  					jm.expectations.CreationObserved(logger, jobKey)
  1670  				}
  1671  				// The skipped pods will be retried later. The next controller resync will
  1672  				// retry the slow start process.
  1673  				break
  1674  			}
  1675  			diff -= batchSize
  1676  		}
  1677  		recordJobPodsCreationTotal(job, jobCtx, creationsSucceeded, creationsFailed)
  1678  		return active, metrics.JobSyncActionPodsCreated, errorFromChannel(errCh)
  1679  	}
  1680  
  1681  	return active, metrics.JobSyncActionTracking, nil
  1682  }
  1683  
  1684  // getPodCreationInfoForIndependentIndexes returns a sub-list of all indexes
  1685  // to create that contains those which can be already created. In case no indexes
  1686  // are ready to create pods, it returns the lowest remaining time to create pods
  1687  // out of all indexes.
  1688  func (jm *Controller) getPodCreationInfoForIndependentIndexes(logger klog.Logger, indexesToAdd []int, podsWithDelayedDeletionPerIndex map[int]*v1.Pod) ([]int, time.Duration) {
  1689  	var indexesToAddNow []int
  1690  	var minRemainingTimePerIndex *time.Duration
  1691  	for _, indexToAdd := range indexesToAdd {
  1692  		if remainingTimePerIndex := getRemainingTimePerIndex(logger, jm.clock, DefaultJobPodFailureBackOff, MaxJobPodFailureBackOff, podsWithDelayedDeletionPerIndex[indexToAdd]); remainingTimePerIndex == 0 {
  1693  			indexesToAddNow = append(indexesToAddNow, indexToAdd)
  1694  		} else if minRemainingTimePerIndex == nil || remainingTimePerIndex < *minRemainingTimePerIndex {
  1695  			minRemainingTimePerIndex = &remainingTimePerIndex
  1696  		}
  1697  	}
  1698  	if len(indexesToAddNow) > 0 {
  1699  		return indexesToAddNow, 0
  1700  	}
  1701  	return indexesToAddNow, ptr.Deref(minRemainingTimePerIndex, 0)
  1702  }
  1703  
  1704  // activePodsForRemoval returns Pods that should be removed because there
  1705  // are too many pods running or, if this is an indexed job, there are repeated
  1706  // indexes or invalid indexes or some pods don't have indexes.
  1707  // Sorts candidate pods in the order such that not-ready < ready, unscheduled
  1708  // < scheduled, and pending < running. This ensures that we delete pods
  1709  // in the earlier stages whenever possible.
  1710  func activePodsForRemoval(job *batch.Job, pods []*v1.Pod, rmAtLeast int) []*v1.Pod {
  1711  	var rm, left []*v1.Pod
  1712  
  1713  	if isIndexedJob(job) {
  1714  		rm = make([]*v1.Pod, 0, rmAtLeast)
  1715  		left = make([]*v1.Pod, 0, len(pods)-rmAtLeast)
  1716  		rm, left = appendDuplicatedIndexPodsForRemoval(rm, left, pods, int(*job.Spec.Completions))
  1717  	} else {
  1718  		left = pods
  1719  	}
  1720  
  1721  	if len(rm) < rmAtLeast {
  1722  		sort.Sort(controller.ActivePods(left))
  1723  		rm = append(rm, left[:rmAtLeast-len(rm)]...)
  1724  	}
  1725  	return rm
  1726  }
  1727  
  1728  // updateJobStatus calls the API to update the job status.
  1729  func (jm *Controller) updateJobStatus(ctx context.Context, job *batch.Job) (*batch.Job, error) {
  1730  	return jm.kubeClient.BatchV1().Jobs(job.Namespace).UpdateStatus(ctx, job, metav1.UpdateOptions{})
  1731  }
  1732  
  1733  func (jm *Controller) patchJob(ctx context.Context, job *batch.Job, data []byte) error {
  1734  	_, err := jm.kubeClient.BatchV1().Jobs(job.Namespace).Patch(
  1735  		ctx, job.Name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
  1736  	return err
  1737  }
  1738  
  1739  // getValidPodsWithFilter returns the valid pods that pass the filter.
  1740  // Pods are valid if they have a finalizer or in uncounted set
  1741  // and, for Indexed Jobs, a valid completion index.
  1742  func getValidPodsWithFilter(jobCtx *syncJobCtx, uncounted sets.Set[string], filter func(*v1.Pod) bool) []*v1.Pod {
  1743  	var result []*v1.Pod
  1744  	for _, p := range jobCtx.pods {
  1745  		uid := string(p.UID)
  1746  
  1747  		// Pods that don't have a completion finalizer are in the uncounted set or
  1748  		// have already been accounted for in the Job status.
  1749  		if !hasJobTrackingFinalizer(p) || uncounted.Has(uid) || jobCtx.expectedRmFinalizers.Has(uid) {
  1750  			continue
  1751  		}
  1752  		if isIndexedJob(jobCtx.job) {
  1753  			idx := getCompletionIndex(p.Annotations)
  1754  			if idx == unknownCompletionIndex || idx >= int(*jobCtx.job.Spec.Completions) {
  1755  				continue
  1756  			}
  1757  		}
  1758  		if filter(p) {
  1759  			result = append(result, p)
  1760  		}
  1761  	}
  1762  	return result
  1763  }
  1764  
  1765  // getCompletionMode returns string representation of the completion mode. Used as a label value for metrics.
  1766  func getCompletionMode(job *batch.Job) string {
  1767  	if isIndexedJob(job) {
  1768  		return string(batch.IndexedCompletion)
  1769  	}
  1770  	return string(batch.NonIndexedCompletion)
  1771  }
  1772  
  1773  func appendJobCompletionFinalizerIfNotFound(finalizers []string) []string {
  1774  	for _, fin := range finalizers {
  1775  		if fin == batch.JobTrackingFinalizer {
  1776  			return finalizers
  1777  		}
  1778  	}
  1779  	return append(finalizers, batch.JobTrackingFinalizer)
  1780  }
  1781  
  1782  func removeTrackingFinalizerPatch(pod *v1.Pod) []byte {
  1783  	if !hasJobTrackingFinalizer(pod) {
  1784  		return nil
  1785  	}
  1786  	patch := map[string]interface{}{
  1787  		"metadata": map[string]interface{}{
  1788  			"$deleteFromPrimitiveList/finalizers": []string{batch.JobTrackingFinalizer},
  1789  		},
  1790  	}
  1791  	patchBytes, _ := json.Marshal(patch)
  1792  	return patchBytes
  1793  }
  1794  
  1795  type uncountedTerminatedPods struct {
  1796  	succeeded sets.Set[string]
  1797  	failed    sets.Set[string]
  1798  }
  1799  
  1800  func newUncountedTerminatedPods(in batch.UncountedTerminatedPods) *uncountedTerminatedPods {
  1801  	obj := uncountedTerminatedPods{
  1802  		succeeded: make(sets.Set[string], len(in.Succeeded)),
  1803  		failed:    make(sets.Set[string], len(in.Failed)),
  1804  	}
  1805  	for _, v := range in.Succeeded {
  1806  		obj.succeeded.Insert(string(v))
  1807  	}
  1808  	for _, v := range in.Failed {
  1809  		obj.failed.Insert(string(v))
  1810  	}
  1811  	return &obj
  1812  }
  1813  
  1814  func (u *uncountedTerminatedPods) Succeeded() sets.Set[string] {
  1815  	if u == nil {
  1816  		return nil
  1817  	}
  1818  	return u.succeeded
  1819  }
  1820  
  1821  func (u *uncountedTerminatedPods) Failed() sets.Set[string] {
  1822  	if u == nil {
  1823  		return nil
  1824  	}
  1825  	return u.failed
  1826  }
  1827  
  1828  func errorFromChannel(errCh <-chan error) error {
  1829  	select {
  1830  	case err := <-errCh:
  1831  		return err
  1832  	default:
  1833  	}
  1834  	return nil
  1835  }
  1836  
  1837  // ensureJobConditionStatus appends or updates an existing job condition of the
  1838  // given type with the given status value. Note that this function will not
  1839  // append to the conditions list if the new condition's status is false
  1840  // (because going from nothing to false is meaningless); it can, however,
  1841  // update the status condition to false. The function returns a bool to let the
  1842  // caller know if the list was changed (either appended or updated).
  1843  func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditionType, status v1.ConditionStatus, reason, message string, now time.Time) ([]batch.JobCondition, bool) {
  1844  	if condition := findConditionByType(list, cType); condition != nil {
  1845  		if condition.Status != status || condition.Reason != reason || condition.Message != message {
  1846  			*condition = *newCondition(cType, status, reason, message, now)
  1847  			return list, true
  1848  		}
  1849  		return list, false
  1850  	}
  1851  	// A condition with that type doesn't exist in the list.
  1852  	if status != v1.ConditionFalse {
  1853  		return append(list, *newCondition(cType, status, reason, message, now)), true
  1854  	}
  1855  	return list, false
  1856  }
  1857  
  1858  func isPodFailed(p *v1.Pod, job *batch.Job) bool {
  1859  	if feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) && feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
  1860  		// When PodDisruptionConditions is enabled, orphan Pods and unschedulable
  1861  		// terminating Pods are marked as Failed. So we only need to check the phase.
  1862  		// TODO(#113855): Stop limiting this behavior to Jobs with podFailurePolicy.
  1863  		// For now, we do so to avoid affecting all running Jobs without the
  1864  		// availability to opt-out into the old behavior.
  1865  		return p.Status.Phase == v1.PodFailed
  1866  	}
  1867  	if p.Status.Phase == v1.PodFailed {
  1868  		return true
  1869  	}
  1870  	if onlyReplaceFailedPods(job) {
  1871  		return p.Status.Phase == v1.PodFailed
  1872  	}
  1873  	// Count deleted Pods as failures to account for orphan Pods that
  1874  	// never have a chance to reach the Failed phase.
  1875  	return p.DeletionTimestamp != nil && p.Status.Phase != v1.PodSucceeded
  1876  }
  1877  
  1878  func findConditionByType(list []batch.JobCondition, cType batch.JobConditionType) *batch.JobCondition {
  1879  	for i := range list {
  1880  		if list[i].Type == cType {
  1881  			return &list[i]
  1882  		}
  1883  	}
  1884  	return nil
  1885  }
  1886  
  1887  func recordJobPodFinished(logger klog.Logger, job *batch.Job, oldCounters batch.JobStatus) {
  1888  	completionMode := completionModeStr(job)
  1889  	var diff int
  1890  
  1891  	// Updating succeeded metric must be handled differently
  1892  	// for Indexed Jobs to handle the case where the job has
  1893  	// been scaled down by reducing completions & parallelism
  1894  	// in tandem, and now a previously completed index is
  1895  	// now out of range (i.e. index >= spec.Completions).
  1896  	if isIndexedJob(job) {
  1897  		completions := int(*job.Spec.Completions)
  1898  		if job.Status.CompletedIndexes != oldCounters.CompletedIndexes {
  1899  			diff = indexesCount(logger, &job.Status.CompletedIndexes, completions) - indexesCount(logger, &oldCounters.CompletedIndexes, completions)
  1900  		}
  1901  		backoffLimitLabel := backoffLimitMetricsLabel(job)
  1902  		metrics.JobFinishedIndexesTotal.WithLabelValues(metrics.Succeeded, backoffLimitLabel).Add(float64(diff))
  1903  		if hasBackoffLimitPerIndex(job) && job.Status.FailedIndexes != oldCounters.FailedIndexes {
  1904  			if failedDiff := indexesCount(logger, job.Status.FailedIndexes, completions) - indexesCount(logger, oldCounters.FailedIndexes, completions); failedDiff > 0 {
  1905  				metrics.JobFinishedIndexesTotal.WithLabelValues(metrics.Failed, backoffLimitLabel).Add(float64(failedDiff))
  1906  			}
  1907  		}
  1908  	} else {
  1909  		diff = int(job.Status.Succeeded) - int(oldCounters.Succeeded)
  1910  	}
  1911  	metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Succeeded).Add(float64(diff))
  1912  
  1913  	// Update failed metric.
  1914  	diff = int(job.Status.Failed - oldCounters.Failed)
  1915  	metrics.JobPodsFinished.WithLabelValues(completionMode, metrics.Failed).Add(float64(diff))
  1916  }
  1917  
  1918  func indexesCount(logger klog.Logger, indexesStr *string, completions int) int {
  1919  	if indexesStr == nil {
  1920  		return 0
  1921  	}
  1922  	return parseIndexesFromString(logger, *indexesStr, completions).total()
  1923  }
  1924  
  1925  func backoffLimitMetricsLabel(job *batch.Job) string {
  1926  	if hasBackoffLimitPerIndex(job) {
  1927  		return "perIndex"
  1928  	}
  1929  	return "global"
  1930  }
  1931  
  1932  func recordJobPodFailurePolicyActions(job *batch.Job, podFailureCountByPolicyAction map[string]int) {
  1933  	for action, count := range podFailureCountByPolicyAction {
  1934  		metrics.PodFailuresHandledByFailurePolicy.WithLabelValues(action).Add(float64(count))
  1935  	}
  1936  }
  1937  
  1938  func countReadyPods(pods []*v1.Pod) int32 {
  1939  	cnt := int32(0)
  1940  	for _, p := range pods {
  1941  		if podutil.IsPodReady(p) {
  1942  			cnt++
  1943  		}
  1944  	}
  1945  	return cnt
  1946  }
  1947  
  1948  // This checks if we should apply PodReplacementPolicy.
  1949  // PodReplacementPolicy controls when we recreate pods if they are marked as terminating
  1950  // Failed means that we recreate only once the pod has terminated.
  1951  func onlyReplaceFailedPods(job *batch.Job) bool {
  1952  	// We check both PodReplacementPolicy for nil and failed
  1953  	// because it is possible that  `PodReplacementPolicy` is not defaulted,
  1954  	// when the `JobPodReplacementPolicy` feature gate is disabled for API server.
  1955  	if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) && job.Spec.PodReplacementPolicy != nil && *job.Spec.PodReplacementPolicy == batch.Failed {
  1956  		return true
  1957  	}
  1958  	return feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil
  1959  }
  1960  
  1961  func (jm *Controller) cleanupPodFinalizers(job *batch.Job) {
  1962  	// Listing pods shouldn't really fail, as we are just querying the informer cache.
  1963  	selector, err := metav1.LabelSelectorAsSelector(job.Spec.Selector)
  1964  	if err != nil {
  1965  		utilruntime.HandleError(fmt.Errorf("parsing deleted job selector: %v", err))
  1966  		return
  1967  	}
  1968  	pods, _ := jm.podStore.Pods(job.Namespace).List(selector)
  1969  	for _, pod := range pods {
  1970  		if metav1.IsControlledBy(pod, job) && hasJobTrackingFinalizer(pod) {
  1971  			jm.enqueueOrphanPod(pod)
  1972  		}
  1973  	}
  1974  }
  1975  
  1976  func recordJobPodsCreationTotal(job *batch.Job, jobCtx *syncJobCtx, succeeded, failed int32) {
  1977  	reason := metrics.PodCreateNew
  1978  	if feature.DefaultFeatureGate.Enabled(features.JobPodReplacementPolicy) {
  1979  		if ptr.Deref(job.Spec.PodReplacementPolicy, batch.TerminatingOrFailed) == batch.Failed && jobCtx.failed > 0 {
  1980  			reason = metrics.PodRecreateFailed
  1981  		} else if jobCtx.failed > 0 || ptr.Deref(jobCtx.terminating, 0) > 0 {
  1982  			reason = metrics.PodRecreateTerminatingOrFailed
  1983  		}
  1984  	}
  1985  	if succeeded > 0 {
  1986  		metrics.JobPodsCreationTotal.WithLabelValues(reason, metrics.Succeeded).Add(float64(succeeded))
  1987  	}
  1988  	if failed > 0 {
  1989  		metrics.JobPodsCreationTotal.WithLabelValues(reason, metrics.Failed).Add(float64(failed))
  1990  	}
  1991  }
  1992  
  1993  func managedByExternalController(jobObj *batch.Job) *string {
  1994  	if feature.DefaultFeatureGate.Enabled(features.JobManagedBy) {
  1995  		if controllerName := jobObj.Spec.ManagedBy; controllerName != nil && *controllerName != batch.JobControllerName {
  1996  			return controllerName
  1997  		}
  1998  	}
  1999  	return nil
  2000  }