volcano.sh/apis@v1.8.2/pkg/apis/batch/v1alpha1/job.go (about)

     1  /*
     2  Copyright 2018 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package v1alpha1
    18  
    19  import (
    20  	v1 "k8s.io/api/core/v1"
    21  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    22  	"volcano.sh/apis/pkg/apis/bus/v1alpha1"
    23  )
    24  
    25  // +genclient
    26  // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
    27  // +kubebuilder:object:root=true
    28  // +kubebuilder:resource:path=jobs,shortName=vcjob;vj
    29  // +kubebuilder:subresource:status
    30  
    31  // Job defines the volcano job.
    32  // +kubebuilder:printcolumn:name="STATUS",type=string,JSONPath=`.status.state.phase`
    33  // +kubebuilder:printcolumn:name="minAvailable",type=integer,JSONPath=`.status.minAvailable`
    34  // +kubebuilder:printcolumn:name="RUNNINGS",type=integer,JSONPath=`.status.running`
    35  // +kubebuilder:printcolumn:name="AGE",type=date,JSONPath=`.metadata.creationTimestamp`
    36  // +kubebuilder:printcolumn:name="QUEUE",type=string,priority=1,JSONPath=`.spec.queue`
    37  type Job struct {
    38  	metav1.TypeMeta `json:",inline"`
    39  
    40  	// +optional
    41  	metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
    42  
    43  	// Specification of the desired behavior of the volcano job, including the minAvailable
    44  	// +optional
    45  	Spec JobSpec `json:"spec,omitempty" protobuf:"bytes,2,opt,name=spec"`
    46  
    47  	// Current status of the volcano Job
    48  	// +optional
    49  	Status JobStatus `json:"status,omitempty" protobuf:"bytes,3,opt,name=status"`
    50  }
    51  
    52  // JobSpec describes how the job execution will look like and when it will actually run.
    53  type JobSpec struct {
    54  	// SchedulerName is the default value of `tasks.template.spec.schedulerName`.
    55  	// +optional
    56  	SchedulerName string `json:"schedulerName,omitempty" protobuf:"bytes,1,opt,name=schedulerName"`
    57  
    58  	// The minimal available pods to run for this Job
    59  	// Defaults to the summary of tasks' replicas
    60  	// +optional
    61  	MinAvailable int32 `json:"minAvailable,omitempty" protobuf:"bytes,2,opt,name=minAvailable"`
    62  
    63  	// The volumes mount on Job
    64  	// +optional
    65  	Volumes []VolumeSpec `json:"volumes,omitempty" protobuf:"bytes,3,opt,name=volumes"`
    66  
    67  	// Tasks specifies the task specification of Job
    68  	// +optional
    69  	Tasks []TaskSpec `json:"tasks,omitempty" protobuf:"bytes,4,opt,name=tasks"`
    70  
    71  	// Specifies the default lifecycle of tasks
    72  	// +optional
    73  	Policies []LifecyclePolicy `json:"policies,omitempty" protobuf:"bytes,5,opt,name=policies"`
    74  
    75  	// Specifies the plugin of job
    76  	// Key is plugin name, value is the arguments of the plugin
    77  	// +optional
    78  	Plugins map[string][]string `json:"plugins,omitempty" protobuf:"bytes,6,opt,name=plugins"`
    79  
    80  	// Running Estimate is a user running duration estimate for the job
    81  	// Default to nil
    82  	RunningEstimate *metav1.Duration `json:"runningEstimate,omitempty" protobuf:"bytes,7,opt,name=runningEstimate"`
    83  
    84  	//Specifies the queue that will be used in the scheduler, "default" queue is used this leaves empty.
    85  	// +optional
    86  	Queue string `json:"queue,omitempty" protobuf:"bytes,8,opt,name=queue"`
    87  
    88  	// Specifies the maximum number of retries before marking this Job failed.
    89  	// Defaults to 3.
    90  	// +optional
    91  	MaxRetry int32 `json:"maxRetry,omitempty" protobuf:"bytes,9,opt,name=maxRetry"`
    92  
    93  	// ttlSecondsAfterFinished limits the lifetime of a Job that has finished
    94  	// execution (either Completed or Failed). If this field is set,
    95  	// ttlSecondsAfterFinished after the Job finishes, it is eligible to be
    96  	// automatically deleted. If this field is unset,
    97  	// the Job won't be automatically deleted. If this field is set to zero,
    98  	// the Job becomes eligible to be deleted immediately after it finishes.
    99  	// +optional
   100  	TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty" protobuf:"varint,10,opt,name=ttlSecondsAfterFinished"`
   101  
   102  	// If specified, indicates the job's priority.
   103  	// +optional
   104  	PriorityClassName string `json:"priorityClassName,omitempty" protobuf:"bytes,11,opt,name=priorityClassName"`
   105  
   106  	// The minimal success pods to run for this Job
   107  	// +kubebuilder:validation:Minimum=1
   108  	// +optional
   109  	MinSuccess *int32 `json:"minSuccess,omitempty" protobuf:"varint,12,opt,name=minSuccess"`
   110  }
   111  
   112  // VolumeSpec defines the specification of Volume, e.g. PVC.
   113  type VolumeSpec struct {
   114  	// Path within the container at which the volume should be mounted.  Must
   115  	// not contain ':'.
   116  	MountPath string `json:"mountPath" protobuf:"bytes,1,opt,name=mountPath"`
   117  
   118  	// defined the PVC name
   119  	// +optional
   120  	VolumeClaimName string `json:"volumeClaimName,omitempty" protobuf:"bytes,2,opt,name=volumeClaimName"`
   121  
   122  	// VolumeClaim defines the PVC used by the VolumeMount.
   123  	// +optional
   124  	VolumeClaim *v1.PersistentVolumeClaimSpec `json:"volumeClaim,omitempty" protobuf:"bytes,3,opt,name=volumeClaim"`
   125  }
   126  
   127  // JobEvent job event.
   128  type JobEvent string
   129  
   130  const (
   131  	// CommandIssued command issued event is generated if a command is raised by user
   132  	CommandIssued JobEvent = "CommandIssued"
   133  	// PluginError  plugin error event is generated if error happens
   134  	PluginError JobEvent = "PluginError"
   135  	// PVCError pvc error event is generated if error happens during IO creation
   136  	PVCError JobEvent = "PVCError"
   137  	// PodGroupError  pod grp error event is generated if error happens during pod grp creation
   138  	PodGroupError JobEvent = "PodGroupError"
   139  	//ExecuteAction action issued event for each action
   140  	ExecuteAction JobEvent = "ExecuteAction"
   141  	//JobStatusError is generated if update job status failed
   142  	JobStatusError JobEvent = "JobStatusError"
   143  	// PodGroupPending  pod grp pending event is generated if pg pending due to some error
   144  	PodGroupPending JobEvent = "PodGroupPending"
   145  )
   146  
   147  // LifecyclePolicy specifies the lifecycle and error handling of task and job.
   148  type LifecyclePolicy struct {
   149  	// The action that will be taken to the PodGroup according to Event.
   150  	// One of "Restart", "None".
   151  	// Default to None.
   152  	// +optional
   153  	Action v1alpha1.Action `json:"action,omitempty" protobuf:"bytes,1,opt,name=action"`
   154  
   155  	// The Event recorded by scheduler; the controller takes actions
   156  	// according to this Event.
   157  	// +optional
   158  	Event v1alpha1.Event `json:"event,omitempty" protobuf:"bytes,2,opt,name=event"`
   159  
   160  	// The Events recorded by scheduler; the controller takes actions
   161  	// according to this Events.
   162  	// +optional
   163  	Events []v1alpha1.Event `json:"events,omitempty" protobuf:"bytes,3,opt,name=events"`
   164  
   165  	// The exit code of the pod container, controller will take action
   166  	// according to this code.
   167  	// Note: only one of `Event` or `ExitCode` can be specified.
   168  	// +optional
   169  	ExitCode *int32 `json:"exitCode,omitempty" protobuf:"bytes,4,opt,name=exitCode"`
   170  
   171  	// Timeout is the grace period for controller to take actions.
   172  	// Default to nil (take action immediately).
   173  	// +optional
   174  	Timeout *metav1.Duration `json:"timeout,omitempty" protobuf:"bytes,5,opt,name=timeout"`
   175  }
   176  
   177  type NumaPolicy string
   178  
   179  const (
   180  	None           NumaPolicy = "none"
   181  	BestEffort     NumaPolicy = "best-effort"
   182  	Restricted     NumaPolicy = "restricted"
   183  	SingleNumaNode NumaPolicy = "single-numa-node"
   184  )
   185  
   186  // TaskSpec specifies the task specification of Job.
   187  type TaskSpec struct {
   188  	// Name specifies the name of tasks
   189  	// +optional
   190  	Name string `json:"name,omitempty" protobuf:"bytes,1,opt,name=name"`
   191  
   192  	// Replicas specifies the replicas of this TaskSpec in Job
   193  	// +optional
   194  	Replicas int32 `json:"replicas,omitempty" protobuf:"bytes,2,opt,name=replicas"`
   195  
   196  	// The minimal available pods to run for this Task
   197  	// Defaults to the task replicas
   198  	// +optional
   199  	MinAvailable *int32 `json:"minAvailable,omitempty" protobuf:"bytes,3,opt,name=minAvailable"`
   200  
   201  	// Specifies the pod that will be created for this TaskSpec
   202  	// when executing a Job
   203  	// +optional
   204  	Template v1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,4,opt,name=template"`
   205  
   206  	// Specifies the lifecycle of task
   207  	// +optional
   208  	Policies []LifecyclePolicy `json:"policies,omitempty" protobuf:"bytes,5,opt,name=policies"`
   209  
   210  	// Specifies the topology policy of task
   211  	// +optional
   212  	TopologyPolicy NumaPolicy `json:"topologyPolicy,omitempty" protobuf:"bytes,6,opt,name=topologyPolicy"`
   213  
   214  	// Specifies the maximum number of retries before marking this Task failed.
   215  	// Defaults to 3.
   216  	// +optional
   217  	MaxRetry int32 `json:"maxRetry,omitempty" protobuf:"bytes,7,opt,name=maxRetry"`
   218  
   219  	// Specifies the tasks that this task depends on.
   220  	// +optional
   221  	DependsOn *DependsOn `json:"dependsOn,omitempty" protobuf:"bytes,8,opt,name=dependsOn"`
   222  }
   223  
   224  // JobPhase defines the phase of the job.
   225  type JobPhase string
   226  
   227  const (
   228  	// Pending is the phase that job is pending in the queue, waiting for scheduling decision
   229  	Pending JobPhase = "Pending"
   230  	// Aborting is the phase that job is aborted, waiting for releasing pods
   231  	Aborting JobPhase = "Aborting"
   232  	// Aborted is the phase that job is aborted by user or error handling
   233  	Aborted JobPhase = "Aborted"
   234  	// Running is the phase that minimal available tasks of Job are running
   235  	Running JobPhase = "Running"
   236  	// Restarting is the phase that the Job is restarted, waiting for pod releasing and recreating
   237  	Restarting JobPhase = "Restarting"
   238  	// Completing is the phase that required tasks of job are completed, job starts to clean up
   239  	Completing JobPhase = "Completing"
   240  	// Completed is the phase that all tasks of Job are completed
   241  	Completed JobPhase = "Completed"
   242  	// Terminating is the phase that the Job is terminated, waiting for releasing pods
   243  	Terminating JobPhase = "Terminating"
   244  	// Terminated is the phase that the job is finished unexpected, e.g. events
   245  	Terminated JobPhase = "Terminated"
   246  	// Failed is the phase that the job is restarted failed reached the maximum number of retries.
   247  	Failed JobPhase = "Failed"
   248  )
   249  
   250  // JobState contains details for the current state of the job.
   251  type JobState struct {
   252  	// The phase of Job.
   253  	// +optional
   254  	Phase JobPhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase"`
   255  
   256  	// Unique, one-word, CamelCase reason for the phase's last transition.
   257  	// +optional
   258  	Reason string `json:"reason,omitempty" protobuf:"bytes,2,opt,name=reason"`
   259  
   260  	// Human-readable message indicating details about last transition.
   261  	// +optional
   262  	Message string `json:"message,omitempty" protobuf:"bytes,3,opt,name=message"`
   263  
   264  	// Last time the condition transit from one phase to another.
   265  	// +optional
   266  	LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" protobuf:"bytes,4,opt,name=lastTransitionTime"`
   267  }
   268  
   269  // TaskState contains details for the current state of the task.
   270  type TaskState struct {
   271  	// The phase of Task.
   272  	// +optional
   273  	Phase map[v1.PodPhase]int32 `json:"phase,omitempty" protobuf:"bytes,11,opt,name=phase"`
   274  }
   275  
   276  // JobStatus represents the current status of a Job.
   277  type JobStatus struct {
   278  	// Current state of Job.
   279  	// +optional
   280  	State JobState `json:"state,omitempty" protobuf:"bytes,1,opt,name=state"`
   281  
   282  	// The minimal available pods to run for this Job
   283  	// +optional
   284  	MinAvailable int32 `json:"minAvailable,omitempty" protobuf:"bytes,2,opt,name=minAvailable"`
   285  
   286  	// The status of pods for each task
   287  	// +optional
   288  	TaskStatusCount map[string]TaskState `json:"taskStatusCount,omitempty" protobuf:"bytes,21,opt,name=taskStatusCount"`
   289  
   290  	// The number of pending pods.
   291  	// +optional
   292  	Pending int32 `json:"pending,omitempty" protobuf:"bytes,3,opt,name=pending"`
   293  
   294  	// The number of running pods.
   295  	// +optional
   296  	Running int32 `json:"running,omitempty" protobuf:"bytes,4,opt,name=running"`
   297  
   298  	// The number of pods which reached phase Succeeded.
   299  	// +optional
   300  	Succeeded int32 `json:"succeeded,omitempty" protobuf:"bytes,5,opt,name=succeeded"`
   301  
   302  	// The number of pods which reached phase Failed.
   303  	// +optional
   304  	Failed int32 `json:"failed,omitempty" protobuf:"bytes,6,opt,name=failed"`
   305  
   306  	// The number of pods which reached phase Terminating.
   307  	// +optional
   308  	Terminating int32 `json:"terminating,omitempty" protobuf:"bytes,7,opt,name=terminating"`
   309  
   310  	// The number of pods which reached phase Unknown.
   311  	// +optional
   312  	Unknown int32 `json:"unknown,omitempty" protobuf:"bytes,8,opt,name=unknown"`
   313  
   314  	//Current version of job
   315  	// +optional
   316  	Version int32 `json:"version,omitempty" protobuf:"bytes,9,opt,name=version"`
   317  
   318  	// The number of Job retries.
   319  	// +optional
   320  	RetryCount int32 `json:"retryCount,omitempty" protobuf:"bytes,10,opt,name=retryCount"`
   321  
   322  	// The job running duration is the length of time from job running to complete.
   323  	// +optional
   324  	RunningDuration *metav1.Duration `json:"runningDuration,omitempty" protobuf:"bytes,11,opt,name=runningDuration"`
   325  
   326  	// The resources that controlled by this job, e.g. Service, ConfigMap
   327  	// +optional
   328  	ControlledResources map[string]string `json:"controlledResources,omitempty" protobuf:"bytes,12,opt,name=controlledResources"`
   329  
   330  	// Which conditions caused the current job state.
   331  	// +optional
   332  	// +patchMergeKey=status
   333  	// +patchStrategy=merge
   334  	Conditions []JobCondition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"status" protobuf:"bytes,13,rep,name=conditions"`
   335  }
   336  
   337  // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
   338  // +kubebuilder:object:root=true
   339  
   340  // JobList defines the list of jobs.
   341  type JobList struct {
   342  	metav1.TypeMeta `json:",inline"`
   343  	metav1.ListMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
   344  
   345  	Items []Job `json:"items" protobuf:"bytes,2,rep,name=items"`
   346  }
   347  
   348  // JobCondition contains details for the current condition of this job.
   349  type JobCondition struct {
   350  	// Status is the new phase of job after performing the state's action.
   351  	Status JobPhase `json:"status" protobuf:"bytes,1,opt,name=status,casttype=JobPhase"`
   352  	// Last time the condition transitioned from one phase to another.
   353  	// +optional
   354  	LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty" protobuf:"bytes,2,opt,name=lastTransitionTime"`
   355  }
   356  
   357  // Iteration defines the phase of the iteration.
   358  type Iteration string
   359  
   360  const (
   361  	// Indicates that when there are multiple tasks,
   362  	// as long as one task becomes the specified state,
   363  	// the task scheduling will be triggered
   364  	IterationAny Iteration = "any"
   365  	// Indicates that when there are multiple tasks,
   366  	// all tasks must become the specified state,
   367  	// the task scheduling will be triggered
   368  	IterationAll Iteration = "all"
   369  )
   370  
   371  // DependsOn represents the tasks that this task depends on and their dependencies
   372  type DependsOn struct {
   373  	// Indicates the name of the tasks that this task depends on,
   374  	// which can depend on multiple tasks
   375  	// +optional
   376  	Name []string `json:"name,omitempty" protobuf:"bytes,1,opt,name=name"`
   377  	// This field specifies that when there are multiple dependent tasks,
   378  	// as long as one task becomes the specified state,
   379  	// the task scheduling is triggered or
   380  	// all tasks must be changed to the specified state to trigger the task scheduling
   381  	// +optional
   382  	Iteration Iteration `json:"iteration,omitempty" protobuf:"bytes,2,opt,name=iteration"`
   383  }