volcano.sh/volcano@v1.9.0/pkg/controllers/job/job_controller_actions.go (about)

     1  /*
     2  Copyright 2019 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package job
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"reflect"
    23  	"sort"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	quotav1 "k8s.io/apiserver/pkg/quota/v1"
    32  	"k8s.io/klog/v2"
    33  
    34  	batch "volcano.sh/apis/pkg/apis/batch/v1alpha1"
    35  	"volcano.sh/apis/pkg/apis/helpers"
    36  	scheduling "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    37  
    38  	"volcano.sh/volcano/pkg/controllers/apis"
    39  	jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers"
    40  	"volcano.sh/volcano/pkg/controllers/job/state"
    41  	"volcano.sh/volcano/pkg/controllers/util"
    42  )
    43  
    44  var calMutex sync.Mutex
    45  
    46  func (cc *jobcontroller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseMap, updateStatus state.UpdateStatusFn) error {
    47  	job := jobInfo.Job
    48  	klog.V(3).Infof("Killing Job <%s/%s>, current version %d", job.Namespace, job.Name, job.Status.Version)
    49  	defer klog.V(3).Infof("Finished Job <%s/%s> killing, current version %d", job.Namespace, job.Name, job.Status.Version)
    50  
    51  	if job.DeletionTimestamp != nil {
    52  		klog.Infof("Job <%s/%s> is terminating, skip management process.",
    53  			job.Namespace, job.Name)
    54  		return nil
    55  	}
    56  
    57  	var pending, running, terminating, succeeded, failed, unknown int32
    58  	taskStatusCount := make(map[string]batch.TaskState)
    59  
    60  	var errs []error
    61  	var total int
    62  
    63  	for _, pods := range jobInfo.Pods {
    64  		for _, pod := range pods {
    65  			total++
    66  
    67  			if pod.DeletionTimestamp != nil {
    68  				klog.Infof("Pod <%s/%s> is terminating", pod.Namespace, pod.Name)
    69  				terminating++
    70  				continue
    71  			}
    72  
    73  			maxRetry := job.Spec.MaxRetry
    74  			lastRetry := false
    75  			if job.Status.RetryCount >= maxRetry-1 {
    76  				lastRetry = true
    77  			}
    78  
    79  			// Only retain the Failed and Succeeded pods at the last retry.
    80  			// If it is not the last retry, kill pod as defined in `podRetainPhase`.
    81  			retainPhase := podRetainPhase
    82  			if lastRetry {
    83  				retainPhase = state.PodRetainPhaseSoft
    84  			}
    85  			_, retain := retainPhase[pod.Status.Phase]
    86  
    87  			if !retain {
    88  				err := cc.deleteJobPod(job.Name, pod)
    89  				if err == nil {
    90  					terminating++
    91  					continue
    92  				}
    93  				// record the err, and then collect the pod info like retained pod
    94  				errs = append(errs, err)
    95  				cc.resyncTask(pod)
    96  			}
    97  
    98  			classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown)
    99  			calcPodStatus(pod, taskStatusCount)
   100  		}
   101  	}
   102  
   103  	if len(errs) != 0 {
   104  		klog.Errorf("failed to kill pods for job %s/%s, with err %+v", job.Namespace, job.Name, errs)
   105  		cc.recorder.Event(job, v1.EventTypeWarning, FailedDeletePodReason,
   106  			fmt.Sprintf("Error deleting pods: %+v", errs))
   107  		return fmt.Errorf("failed to kill %d pods of %d", len(errs), total)
   108  	}
   109  
   110  	job = job.DeepCopy()
   111  	// Job version is bumped only when job is killed
   112  	job.Status.Version++
   113  	job.Status.Pending = pending
   114  	job.Status.Running = running
   115  	job.Status.Succeeded = succeeded
   116  	job.Status.Failed = failed
   117  	job.Status.Terminating = terminating
   118  	job.Status.Unknown = unknown
   119  	job.Status.TaskStatusCount = taskStatusCount
   120  
   121  	// Update running duration
   122  	klog.V(3).Infof("Running duration is %s", metav1.Duration{Duration: time.Since(jobInfo.Job.CreationTimestamp.Time)}.ToUnstructured())
   123  	job.Status.RunningDuration = &metav1.Duration{Duration: time.Since(jobInfo.Job.CreationTimestamp.Time)}
   124  
   125  	if updateStatus != nil {
   126  		if updateStatus(&job.Status) {
   127  			job.Status.State.LastTransitionTime = metav1.Now()
   128  			jobCondition := newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime)
   129  			job.Status.Conditions = append(job.Status.Conditions, jobCondition)
   130  		}
   131  	}
   132  
   133  	// must be called before update job status
   134  	if err := cc.pluginOnJobDelete(job); err != nil {
   135  		return err
   136  	}
   137  
   138  	// Update Job status
   139  	newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{})
   140  	if err != nil {
   141  		klog.Errorf("Failed to update status of Job %v/%v: %v",
   142  			job.Namespace, job.Name, err)
   143  		return err
   144  	}
   145  	if e := cc.cache.Update(newJob); e != nil {
   146  		klog.Errorf("KillJob - Failed to update Job %v/%v in cache:  %v",
   147  			newJob.Namespace, newJob.Name, e)
   148  		return e
   149  	}
   150  
   151  	// Delete PodGroup
   152  	pgName := job.Name + "-" + string(job.UID)
   153  	if err := cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Delete(context.TODO(), pgName, metav1.DeleteOptions{}); err != nil {
   154  		if !apierrors.IsNotFound(err) {
   155  			klog.Errorf("Failed to delete PodGroup of Job %v/%v: %v",
   156  				job.Namespace, job.Name, err)
   157  			return err
   158  		}
   159  	}
   160  
   161  	// NOTE(k82cn): DO NOT delete input/output until job is deleted.
   162  
   163  	return nil
   164  }
   165  
   166  func (cc *jobcontroller) initiateJob(job *batch.Job) (*batch.Job, error) {
   167  	klog.V(3).Infof("Starting to initiate Job <%s/%s>", job.Namespace, job.Name)
   168  	jobInstance, err := cc.initJobStatus(job)
   169  	if err != nil {
   170  		cc.recorder.Event(job, v1.EventTypeWarning, string(batch.JobStatusError),
   171  			fmt.Sprintf("Failed to initialize job status, err: %v", err))
   172  		return nil, err
   173  	}
   174  
   175  	if err := cc.pluginOnJobAdd(jobInstance); err != nil {
   176  		cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PluginError),
   177  			fmt.Sprintf("Execute plugin when job add failed, err: %v", err))
   178  		return nil, err
   179  	}
   180  
   181  	newJob, err := cc.createJobIOIfNotExist(jobInstance)
   182  	if err != nil {
   183  		cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PVCError),
   184  			fmt.Sprintf("Failed to create PVC, err: %v", err))
   185  		return nil, err
   186  	}
   187  
   188  	if err := cc.createOrUpdatePodGroup(newJob); err != nil {
   189  		cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PodGroupError),
   190  			fmt.Sprintf("Failed to create PodGroup, err: %v", err))
   191  		return nil, err
   192  	}
   193  
   194  	return newJob, nil
   195  }
   196  
   197  func (cc *jobcontroller) initOnJobUpdate(job *batch.Job) error {
   198  	klog.V(3).Infof("Starting to initiate Job <%s/%s> on update", job.Namespace, job.Name)
   199  
   200  	if err := cc.pluginOnJobUpdate(job); err != nil {
   201  		cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PluginError),
   202  			fmt.Sprintf("Execute plugin when job add failed, err: %v", err))
   203  		return err
   204  	}
   205  
   206  	if err := cc.createOrUpdatePodGroup(job); err != nil {
   207  		cc.recorder.Event(job, v1.EventTypeWarning, string(batch.PodGroupError),
   208  			fmt.Sprintf("Failed to create PodGroup, err: %v", err))
   209  		return err
   210  	}
   211  
   212  	return nil
   213  }
   214  
   215  func (cc *jobcontroller) GetQueueInfo(queue string) (*scheduling.Queue, error) {
   216  	queueInfo, err := cc.queueLister.Get(queue)
   217  	if err != nil {
   218  		klog.Errorf("Failed to get queue from queueLister, error: %s", err.Error())
   219  	}
   220  
   221  	return queueInfo, err
   222  }
   223  
   224  func (cc *jobcontroller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateStatusFn) error {
   225  	job := jobInfo.Job
   226  	klog.V(3).Infof("Starting to sync up Job <%s/%s>, current version %d", job.Namespace, job.Name, job.Status.Version)
   227  	defer klog.V(3).Infof("Finished Job <%s/%s> sync up, current version %d", job.Namespace, job.Name, job.Status.Version)
   228  
   229  	if jobInfo.Job.DeletionTimestamp != nil {
   230  		klog.Infof("Job <%s/%s> is terminating, skip management process.",
   231  			jobInfo.Job.Namespace, jobInfo.Job.Name)
   232  		return nil
   233  	}
   234  
   235  	// deep copy job to prevent mutate it
   236  	job = job.DeepCopy()
   237  
   238  	// Find queue that job belongs to, and check if the queue has forwarding metadata
   239  	queueInfo, err := cc.GetQueueInfo(job.Spec.Queue)
   240  	if err != nil {
   241  		return err
   242  	}
   243  
   244  	var jobForwarding bool
   245  	if len(queueInfo.Spec.ExtendClusters) != 0 {
   246  		jobForwarding = true
   247  		if len(job.Annotations) == 0 {
   248  			job.Annotations = make(map[string]string)
   249  		}
   250  		job.Annotations[batch.JobForwardingKey] = "true"
   251  		job, err = cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{})
   252  		if err != nil {
   253  			klog.Errorf("failed to update job: %s/%s, error: %s", job.Namespace, job.Name, err.Error())
   254  			return err
   255  		}
   256  	}
   257  
   258  	// Skip job initiation if job is already initiated
   259  	if !isInitiated(job) {
   260  		if job, err = cc.initiateJob(job); err != nil {
   261  			return err
   262  		}
   263  	} else {
   264  		// TODO: optimize this call it only when scale up/down
   265  		if err = cc.initOnJobUpdate(job); err != nil {
   266  			return err
   267  		}
   268  	}
   269  
   270  	if len(queueInfo.Spec.ExtendClusters) != 0 {
   271  		jobForwarding = true
   272  		job.Annotations[batch.JobForwardingKey] = "true"
   273  		_, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{})
   274  		if err != nil {
   275  			klog.Errorf("failed to update job: %s/%s, error: %s", job.Namespace, job.Name, err.Error())
   276  			return err
   277  		}
   278  	}
   279  
   280  	var syncTask bool
   281  	pgName := job.Name + "-" + string(job.UID)
   282  	if pg, _ := cc.pgLister.PodGroups(job.Namespace).Get(pgName); pg != nil {
   283  		if pg.Status.Phase != "" && pg.Status.Phase != scheduling.PodGroupPending {
   284  			syncTask = true
   285  		}
   286  
   287  		for _, condition := range pg.Status.Conditions {
   288  			if condition.Type == scheduling.PodGroupUnschedulableType {
   289  				cc.recorder.Eventf(job, v1.EventTypeWarning, string(batch.PodGroupPending),
   290  					fmt.Sprintf("PodGroup %s:%s unschedule,reason: %s", job.Namespace, job.Name, condition.Message))
   291  			}
   292  		}
   293  	}
   294  
   295  	var jobCondition batch.JobCondition
   296  	if !syncTask {
   297  		if updateStatus != nil {
   298  			if updateStatus(&job.Status) {
   299  				job.Status.State.LastTransitionTime = metav1.Now()
   300  				jobCondition = newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime)
   301  				job.Status.Conditions = append(job.Status.Conditions, jobCondition)
   302  			}
   303  		}
   304  		newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{})
   305  		if err != nil {
   306  			klog.Errorf("Failed to update status of Job %v/%v: %v",
   307  				job.Namespace, job.Name, err)
   308  			return err
   309  		}
   310  		if e := cc.cache.Update(newJob); e != nil {
   311  			klog.Errorf("SyncJob - Failed to update Job %v/%v in cache:  %v",
   312  				newJob.Namespace, newJob.Name, e)
   313  			return e
   314  		}
   315  		return nil
   316  	}
   317  
   318  	var running, pending, terminating, succeeded, failed, unknown int32
   319  	taskStatusCount := make(map[string]batch.TaskState)
   320  
   321  	podToCreate := make(map[string][]*v1.Pod)
   322  	var podToDelete []*v1.Pod
   323  	var creationErrs []error
   324  	var deletionErrs []error
   325  	appendMutex := sync.Mutex{}
   326  
   327  	appendError := func(container *[]error, err error) {
   328  		appendMutex.Lock()
   329  		defer appendMutex.Unlock()
   330  		*container = append(*container, err)
   331  	}
   332  
   333  	waitCreationGroup := sync.WaitGroup{}
   334  
   335  	for _, ts := range job.Spec.Tasks {
   336  		ts.Template.Name = ts.Name
   337  		tc := ts.Template.DeepCopy()
   338  		name := ts.Template.Name
   339  
   340  		pods, found := jobInfo.Pods[name]
   341  		if !found {
   342  			pods = map[string]*v1.Pod{}
   343  		}
   344  
   345  		var podToCreateEachTask []*v1.Pod
   346  		for i := 0; i < int(ts.Replicas); i++ {
   347  			podName := fmt.Sprintf(jobhelpers.PodNameFmt, job.Name, name, i)
   348  			if pod, found := pods[podName]; !found {
   349  				newPod := createJobPod(job, tc, ts.TopologyPolicy, i, jobForwarding)
   350  				if err := cc.pluginOnPodCreate(job, newPod); err != nil {
   351  					return err
   352  				}
   353  				podToCreateEachTask = append(podToCreateEachTask, newPod)
   354  				waitCreationGroup.Add(1)
   355  			} else {
   356  				delete(pods, podName)
   357  				if pod.DeletionTimestamp != nil {
   358  					klog.Infof("Pod <%s/%s> is terminating", pod.Namespace, pod.Name)
   359  					atomic.AddInt32(&terminating, 1)
   360  					continue
   361  				}
   362  
   363  				classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown)
   364  				calcPodStatus(pod, taskStatusCount)
   365  			}
   366  		}
   367  		podToCreate[ts.Name] = podToCreateEachTask
   368  		for _, pod := range pods {
   369  			podToDelete = append(podToDelete, pod)
   370  		}
   371  	}
   372  
   373  	for taskName, podToCreateEachTask := range podToCreate {
   374  		if len(podToCreateEachTask) == 0 {
   375  			continue
   376  		}
   377  		go func(taskName string, podToCreateEachTask []*v1.Pod) {
   378  			taskIndex := jobhelpers.GetTaskIndexUnderJob(taskName, job)
   379  			if job.Spec.Tasks[taskIndex].DependsOn != nil {
   380  				if !cc.waitDependsOnTaskMeetCondition(taskName, taskIndex, podToCreateEachTask, job) {
   381  					klog.V(3).Infof("Job %s/%s depends on task not ready", job.Name, job.Namespace)
   382  					// release wait group
   383  					for _, pod := range podToCreateEachTask {
   384  						go func(pod *v1.Pod) {
   385  							defer waitCreationGroup.Done()
   386  						}(pod)
   387  					}
   388  					return
   389  				}
   390  			}
   391  
   392  			for _, pod := range podToCreateEachTask {
   393  				go func(pod *v1.Pod) {
   394  					defer waitCreationGroup.Done()
   395  					newPod, err := cc.kubeClient.CoreV1().Pods(pod.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
   396  					if err != nil && !apierrors.IsAlreadyExists(err) {
   397  						// Failed to create Pod, waitCreationGroup a moment and then create it again
   398  						// This is to ensure all podsMap under the same Job created
   399  						// So gang-scheduling could schedule the Job successfully
   400  						klog.Errorf("Failed to create pod %s for Job %s, err %#v",
   401  							pod.Name, job.Name, err)
   402  						appendError(&creationErrs, fmt.Errorf("failed to create pod %s, err: %#v", pod.Name, err))
   403  					} else {
   404  						classifyAndAddUpPodBaseOnPhase(newPod, &pending, &running, &succeeded, &failed, &unknown)
   405  						calcPodStatus(pod, taskStatusCount)
   406  						klog.V(5).Infof("Created Task <%s> of Job <%s/%s>",
   407  							pod.Name, job.Namespace, job.Name)
   408  					}
   409  				}(pod)
   410  			}
   411  		}(taskName, podToCreateEachTask)
   412  	}
   413  
   414  	waitCreationGroup.Wait()
   415  
   416  	if len(creationErrs) != 0 {
   417  		cc.recorder.Event(job, v1.EventTypeWarning, FailedCreatePodReason,
   418  			fmt.Sprintf("Error creating pods: %+v", creationErrs))
   419  		return fmt.Errorf("failed to create %d pods of %d", len(creationErrs), len(podToCreate))
   420  	}
   421  
   422  	// Delete pods when scale down.
   423  	waitDeletionGroup := sync.WaitGroup{}
   424  	waitDeletionGroup.Add(len(podToDelete))
   425  	for _, pod := range podToDelete {
   426  		go func(pod *v1.Pod) {
   427  			defer waitDeletionGroup.Done()
   428  			err := cc.deleteJobPod(job.Name, pod)
   429  			if err != nil {
   430  				// Failed to delete Pod, waitCreationGroup a moment and then create it again
   431  				// This is to ensure all podsMap under the same Job created
   432  				// So gang-scheduling could schedule the Job successfully
   433  				klog.Errorf("Failed to delete pod %s for Job %s, err %#v",
   434  					pod.Name, job.Name, err)
   435  				appendError(&deletionErrs, err)
   436  				cc.resyncTask(pod)
   437  			} else {
   438  				klog.V(3).Infof("Deleted Task <%s> of Job <%s/%s>",
   439  					pod.Name, job.Namespace, job.Name)
   440  				atomic.AddInt32(&terminating, 1)
   441  			}
   442  		}(pod)
   443  	}
   444  	waitDeletionGroup.Wait()
   445  
   446  	if len(deletionErrs) != 0 {
   447  		cc.recorder.Event(job, v1.EventTypeWarning, FailedDeletePodReason,
   448  			fmt.Sprintf("Error deleting pods: %+v", deletionErrs))
   449  		return fmt.Errorf("failed to delete %d pods of %d", len(deletionErrs), len(podToDelete))
   450  	}
   451  	job.Status = batch.JobStatus{
   452  		State: job.Status.State,
   453  
   454  		Pending:             pending,
   455  		Running:             running,
   456  		Succeeded:           succeeded,
   457  		Failed:              failed,
   458  		Terminating:         terminating,
   459  		Unknown:             unknown,
   460  		Version:             job.Status.Version,
   461  		MinAvailable:        job.Spec.MinAvailable,
   462  		TaskStatusCount:     taskStatusCount,
   463  		ControlledResources: job.Status.ControlledResources,
   464  		Conditions:          job.Status.Conditions,
   465  		RetryCount:          job.Status.RetryCount,
   466  	}
   467  
   468  	if updateStatus != nil && updateStatus(&job.Status) {
   469  		job.Status.State.LastTransitionTime = metav1.Now()
   470  		jobCondition = newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime)
   471  		job.Status.Conditions = append(job.Status.Conditions, jobCondition)
   472  	}
   473  	newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{})
   474  	if err != nil {
   475  		klog.Errorf("Failed to update status of Job %v/%v: %v",
   476  			job.Namespace, job.Name, err)
   477  		return err
   478  	}
   479  	if e := cc.cache.Update(newJob); e != nil {
   480  		klog.Errorf("SyncJob - Failed to update Job %v/%v in cache:  %v",
   481  			newJob.Namespace, newJob.Name, e)
   482  		return e
   483  	}
   484  
   485  	return nil
   486  }
   487  
   488  func (cc *jobcontroller) waitDependsOnTaskMeetCondition(taskName string, taskIndex int, podToCreateEachTask []*v1.Pod, job *batch.Job) bool {
   489  	if job.Spec.Tasks[taskIndex].DependsOn == nil {
   490  		return true
   491  	}
   492  	dependsOn := *job.Spec.Tasks[taskIndex].DependsOn
   493  	if len(dependsOn.Name) > 1 && dependsOn.Iteration == batch.IterationAny {
   494  		// any ready to create task, return true
   495  		for _, task := range dependsOn.Name {
   496  			if cc.isDependsOnPodsReady(task, job) {
   497  				return true
   498  			}
   499  		}
   500  		// all not ready to skip create task, return false
   501  		return false
   502  	}
   503  	for _, dependsOnTask := range dependsOn.Name {
   504  		// any not ready to skip create task, return false
   505  		if !cc.isDependsOnPodsReady(dependsOnTask, job) {
   506  			return false
   507  		}
   508  	}
   509  	// all ready to create task, return true
   510  	return true
   511  }
   512  
   513  func (cc *jobcontroller) isDependsOnPodsReady(task string, job *batch.Job) bool {
   514  	dependsOnPods := jobhelpers.GetPodsNameUnderTask(task, job)
   515  	dependsOnTaskIndex := jobhelpers.GetTaskIndexUnderJob(task, job)
   516  	runningPodCount := 0
   517  	for _, podName := range dependsOnPods {
   518  		pod, err := cc.podLister.Pods(job.Namespace).Get(podName)
   519  		if err != nil {
   520  			// If pod is not found. There are 2 possibilities.
   521  			// 1. vcjob has been deleted. This function should return true.
   522  			// 2. pod is not created. This function should return false, continue waiting.
   523  			if apierrors.IsNotFound(err) {
   524  				_, errGetJob := cc.jobLister.Jobs(job.Namespace).Get(job.Name)
   525  				if errGetJob != nil {
   526  					return apierrors.IsNotFound(errGetJob)
   527  				}
   528  			}
   529  			klog.Errorf("Failed to get pod %v/%v %v", job.Namespace, podName, err)
   530  			continue
   531  		}
   532  
   533  		if pod.Status.Phase != v1.PodRunning && pod.Status.Phase != v1.PodSucceeded {
   534  			klog.V(5).Infof("Sequential state, pod %v/%v of depends on tasks is not running", pod.Namespace, pod.Name)
   535  			continue
   536  		}
   537  
   538  		allContainerReady := true
   539  		for _, containerStatus := range pod.Status.ContainerStatuses {
   540  			if !containerStatus.Ready {
   541  				allContainerReady = false
   542  				break
   543  			}
   544  		}
   545  		if allContainerReady {
   546  			runningPodCount++
   547  		}
   548  	}
   549  	dependsOnTaskMinReplicas := job.Spec.Tasks[dependsOnTaskIndex].MinAvailable
   550  	if dependsOnTaskMinReplicas != nil {
   551  		if runningPodCount < int(*dependsOnTaskMinReplicas) {
   552  			klog.V(5).Infof("In a depends on startup state, there are already %d pods running, which is less than the minimum number of runs", runningPodCount)
   553  			return false
   554  		}
   555  	}
   556  	return true
   557  }
   558  
   559  func (cc *jobcontroller) createJobIOIfNotExist(job *batch.Job) (*batch.Job, error) {
   560  	// If PVC does not exist, create them for Job.
   561  	var needUpdate bool
   562  	if job.Status.ControlledResources == nil {
   563  		job.Status.ControlledResources = make(map[string]string)
   564  	}
   565  	for index, volume := range job.Spec.Volumes {
   566  		vcName := volume.VolumeClaimName
   567  		if len(vcName) == 0 {
   568  			// NOTE(k82cn): Ensure never have duplicated generated names.
   569  			for {
   570  				vcName = jobhelpers.GenPVCName(job.Name)
   571  				exist, err := cc.checkPVCExist(job, vcName)
   572  				if err != nil {
   573  					return job, err
   574  				}
   575  				if exist {
   576  					continue
   577  				}
   578  				job.Spec.Volumes[index].VolumeClaimName = vcName
   579  				needUpdate = true
   580  				break
   581  			}
   582  			// TODO: check VolumeClaim must be set if VolumeClaimName is empty
   583  			if volume.VolumeClaim != nil {
   584  				if err := cc.createPVC(job, vcName, volume.VolumeClaim); err != nil {
   585  					return job, err
   586  				}
   587  			}
   588  		} else {
   589  			exist, err := cc.checkPVCExist(job, vcName)
   590  			if err != nil {
   591  				return job, err
   592  			}
   593  			if !exist {
   594  				return job, fmt.Errorf("pvc %s is not found, the job will be in the Pending state until the PVC is created", vcName)
   595  			}
   596  		}
   597  		job.Status.ControlledResources["volume-pvc-"+vcName] = vcName
   598  	}
   599  	if needUpdate {
   600  		newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{})
   601  		if err != nil {
   602  			klog.Errorf("Failed to update Job %v/%v for volume claim name: %v ",
   603  				job.Namespace, job.Name, err)
   604  			return job, err
   605  		}
   606  
   607  		newJob.Status = job.Status
   608  		return newJob, err
   609  	}
   610  	return job, nil
   611  }
   612  
   613  func (cc *jobcontroller) checkPVCExist(job *batch.Job, pvc string) (bool, error) {
   614  	if _, err := cc.pvcLister.PersistentVolumeClaims(job.Namespace).Get(pvc); err != nil {
   615  		if apierrors.IsNotFound(err) {
   616  			return false, nil
   617  		}
   618  		klog.V(3).Infof("Failed to get PVC %s for job <%s/%s>: %v",
   619  			pvc, job.Namespace, job.Name, err)
   620  		return false, err
   621  	}
   622  	return true, nil
   623  }
   624  
   625  func (cc *jobcontroller) createPVC(job *batch.Job, vcName string, volumeClaim *v1.PersistentVolumeClaimSpec) error {
   626  	pvc := &v1.PersistentVolumeClaim{
   627  		ObjectMeta: metav1.ObjectMeta{
   628  			Namespace: job.Namespace,
   629  			Name:      vcName,
   630  			OwnerReferences: []metav1.OwnerReference{
   631  				*metav1.NewControllerRef(job, helpers.JobKind),
   632  			},
   633  		},
   634  		Spec: *volumeClaim,
   635  	}
   636  
   637  	klog.V(3).Infof("Try to create PVC: %v", pvc)
   638  
   639  	if _, e := cc.kubeClient.CoreV1().PersistentVolumeClaims(job.Namespace).Create(context.TODO(), pvc, metav1.CreateOptions{}); e != nil {
   640  		klog.V(3).Infof("Failed to create PVC for Job <%s/%s>: %v",
   641  			job.Namespace, job.Name, e)
   642  		return e
   643  	}
   644  	return nil
   645  }
   646  
   647  func (cc *jobcontroller) createOrUpdatePodGroup(job *batch.Job) error {
   648  	// If PodGroup does not exist, create one for Job.
   649  	pgName := job.Name + "-" + string(job.UID)
   650  	var pg *scheduling.PodGroup
   651  	var err error
   652  	pg, err = cc.pgLister.PodGroups(job.Namespace).Get(pgName)
   653  	if err != nil {
   654  		if !apierrors.IsNotFound(err) {
   655  			klog.Errorf("Failed to get PodGroup for Job <%s/%s>: %v",
   656  				job.Namespace, job.Name, err)
   657  			return err
   658  		}
   659  		// try to get old pg if new pg not exist
   660  		pg, err = cc.pgLister.PodGroups(job.Namespace).Get(job.Name)
   661  		if err != nil {
   662  			if !apierrors.IsNotFound(err) {
   663  				klog.Errorf("Failed to get PodGroup for Job <%s/%s>: %v",
   664  					job.Namespace, job.Name, err)
   665  				return err
   666  			}
   667  
   668  			minTaskMember := map[string]int32{}
   669  			for _, task := range job.Spec.Tasks {
   670  				if task.MinAvailable != nil {
   671  					minTaskMember[task.Name] = *task.MinAvailable
   672  				} else {
   673  					minTaskMember[task.Name] = task.Replicas
   674  				}
   675  			}
   676  
   677  			pg := &scheduling.PodGroup{
   678  				ObjectMeta: metav1.ObjectMeta{
   679  					Namespace: job.Namespace,
   680  					// add job.UID into its name when create new PodGroup
   681  					Name:        pgName,
   682  					Annotations: job.Annotations,
   683  					Labels:      job.Labels,
   684  					OwnerReferences: []metav1.OwnerReference{
   685  						*metav1.NewControllerRef(job, helpers.JobKind),
   686  					},
   687  				},
   688  				Spec: scheduling.PodGroupSpec{
   689  					MinMember:         job.Spec.MinAvailable,
   690  					MinTaskMember:     minTaskMember,
   691  					Queue:             job.Spec.Queue,
   692  					MinResources:      cc.calcPGMinResources(job),
   693  					PriorityClassName: job.Spec.PriorityClassName,
   694  				},
   695  			}
   696  
   697  			if _, err = cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Create(context.TODO(), pg, metav1.CreateOptions{}); err != nil {
   698  				if !apierrors.IsAlreadyExists(err) {
   699  					klog.Errorf("Failed to create PodGroup for Job <%s/%s>: %v",
   700  						job.Namespace, job.Name, err)
   701  					return err
   702  				}
   703  			}
   704  			return nil
   705  		}
   706  	}
   707  
   708  	pgShouldUpdate := false
   709  	if pg.Spec.PriorityClassName != job.Spec.PriorityClassName {
   710  		pg.Spec.PriorityClassName = job.Spec.PriorityClassName
   711  		pgShouldUpdate = true
   712  	}
   713  
   714  	minResources := cc.calcPGMinResources(job)
   715  	if pg.Spec.MinMember != job.Spec.MinAvailable || !reflect.DeepEqual(pg.Spec.MinResources, minResources) {
   716  		pg.Spec.MinMember = job.Spec.MinAvailable
   717  		pg.Spec.MinResources = minResources
   718  		pgShouldUpdate = true
   719  	}
   720  
   721  	if pg.Spec.MinTaskMember == nil {
   722  		pgShouldUpdate = true
   723  		pg.Spec.MinTaskMember = make(map[string]int32)
   724  	}
   725  
   726  	for _, task := range job.Spec.Tasks {
   727  		cnt := task.Replicas
   728  		if task.MinAvailable != nil {
   729  			cnt = *task.MinAvailable
   730  		}
   731  
   732  		if taskMember, ok := pg.Spec.MinTaskMember[task.Name]; !ok {
   733  			pgShouldUpdate = true
   734  			pg.Spec.MinTaskMember[task.Name] = cnt
   735  		} else {
   736  			if taskMember == cnt {
   737  				continue
   738  			}
   739  
   740  			pgShouldUpdate = true
   741  			pg.Spec.MinTaskMember[task.Name] = cnt
   742  		}
   743  	}
   744  
   745  	if !pgShouldUpdate {
   746  		return nil
   747  	}
   748  
   749  	_, err = cc.vcClient.SchedulingV1beta1().PodGroups(job.Namespace).Update(context.TODO(), pg, metav1.UpdateOptions{})
   750  	if err != nil {
   751  		klog.V(3).Infof("Failed to update PodGroup for Job <%s/%s>: %v",
   752  			job.Namespace, job.Name, err)
   753  	}
   754  	return err
   755  }
   756  
   757  func (cc *jobcontroller) deleteJobPod(jobName string, pod *v1.Pod) error {
   758  	err := cc.kubeClient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
   759  	if err != nil && !apierrors.IsNotFound(err) {
   760  		klog.Errorf("Failed to delete pod %s/%s for Job %s, err %#v",
   761  			pod.Namespace, pod.Name, jobName, err)
   762  
   763  		return fmt.Errorf("failed to delete pod %s, err %#v", pod.Name, err)
   764  	}
   765  
   766  	return nil
   767  }
   768  
   769  func (cc *jobcontroller) calcPGMinResources(job *batch.Job) *v1.ResourceList {
   770  	// sort task by priorityClasses
   771  	var tasksPriority TasksPriority
   772  	for _, task := range job.Spec.Tasks {
   773  		tp := TaskPriority{0, task}
   774  		pc := task.Template.Spec.PriorityClassName
   775  
   776  		if pc != "" {
   777  			priorityClass, err := cc.pcLister.Get(pc)
   778  			if err != nil || priorityClass == nil {
   779  				klog.Warningf("Ignore task %s priority class %s: %v", task.Name, pc, err)
   780  			} else {
   781  				tp.priority = priorityClass.Value
   782  			}
   783  		}
   784  		tasksPriority = append(tasksPriority, tp)
   785  	}
   786  
   787  	sort.Sort(tasksPriority)
   788  
   789  	minReq := v1.ResourceList{}
   790  	podCnt := int32(0)
   791  	for _, task := range tasksPriority {
   792  		for i := int32(0); i < task.Replicas; i++ {
   793  			if podCnt >= job.Spec.MinAvailable {
   794  				break
   795  			}
   796  
   797  			podCnt++
   798  			pod := &v1.Pod{
   799  				Spec: task.Template.Spec,
   800  			}
   801  			minReq = quotav1.Add(minReq, *util.GetPodQuotaUsage(pod))
   802  		}
   803  	}
   804  
   805  	return &minReq
   806  }
   807  
   808  func (cc *jobcontroller) initJobStatus(job *batch.Job) (*batch.Job, error) {
   809  	if job.Status.State.Phase != "" {
   810  		return job, nil
   811  	}
   812  
   813  	job.Status.State.Phase = batch.Pending
   814  	job.Status.State.LastTransitionTime = metav1.Now()
   815  	job.Status.MinAvailable = job.Spec.MinAvailable
   816  	jobCondition := newCondition(job.Status.State.Phase, &job.Status.State.LastTransitionTime)
   817  	job.Status.Conditions = append(job.Status.Conditions, jobCondition)
   818  	newJob, err := cc.vcClient.BatchV1alpha1().Jobs(job.Namespace).UpdateStatus(context.TODO(), job, metav1.UpdateOptions{})
   819  	if err != nil {
   820  		klog.Errorf("Failed to update status of Job %v/%v: %v",
   821  			job.Namespace, job.Name, err)
   822  		return nil, err
   823  	}
   824  	if err := cc.cache.Update(newJob); err != nil {
   825  		klog.Errorf("CreateJob - Failed to update Job %v/%v in cache:  %v",
   826  			newJob.Namespace, newJob.Name, err)
   827  		return nil, err
   828  	}
   829  
   830  	return newJob, nil
   831  }
   832  
   833  func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) {
   834  	switch pod.Status.Phase {
   835  	case v1.PodPending:
   836  		atomic.AddInt32(pending, 1)
   837  	case v1.PodRunning:
   838  		atomic.AddInt32(running, 1)
   839  	case v1.PodSucceeded:
   840  		atomic.AddInt32(succeeded, 1)
   841  	case v1.PodFailed:
   842  		atomic.AddInt32(failed, 1)
   843  	default:
   844  		atomic.AddInt32(unknown, 1)
   845  	}
   846  }
   847  
   848  func calcPodStatus(pod *v1.Pod, taskStatusCount map[string]batch.TaskState) {
   849  	taskName, found := pod.Annotations[batch.TaskSpecKey]
   850  	if !found {
   851  		return
   852  	}
   853  
   854  	calMutex.Lock()
   855  	defer calMutex.Unlock()
   856  	if _, ok := taskStatusCount[taskName]; !ok {
   857  		taskStatusCount[taskName] = batch.TaskState{
   858  			Phase: make(map[v1.PodPhase]int32),
   859  		}
   860  	}
   861  
   862  	switch pod.Status.Phase {
   863  	case v1.PodPending:
   864  		taskStatusCount[taskName].Phase[v1.PodPending]++
   865  	case v1.PodRunning:
   866  		taskStatusCount[taskName].Phase[v1.PodRunning]++
   867  	case v1.PodSucceeded:
   868  		taskStatusCount[taskName].Phase[v1.PodSucceeded]++
   869  	case v1.PodFailed:
   870  		taskStatusCount[taskName].Phase[v1.PodFailed]++
   871  	default:
   872  		taskStatusCount[taskName].Phase[v1.PodUnknown]++
   873  	}
   874  }
   875  
   876  func isInitiated(job *batch.Job) bool {
   877  	if job.Status.State.Phase == "" || job.Status.State.Phase == batch.Pending {
   878  		return false
   879  	}
   880  
   881  	return true
   882  }
   883  
   884  func newCondition(status batch.JobPhase, lastTransitionTime *metav1.Time) batch.JobCondition {
   885  	return batch.JobCondition{
   886  		Status:             status,
   887  		LastTransitionTime: lastTransitionTime,
   888  	}
   889  }