github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/mpi/mpijob.go (about) 1 // Copyright 2019 The Kubeflow Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mpi 16 17 import ( 18 "strings" 19 20 kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" 21 22 corev1 "k8s.io/api/core/v1" 23 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 "k8s.io/apimachinery/pkg/labels" 25 ) 26 27 const ( 28 configSuffix = "-config" 29 configVolumeName = "mpi-job-config" 30 configMountPath = "/etc/mpi" 31 kubexecScriptName = "kubexec.sh" 32 hostfileName = "hostfile" 33 discoverHostsScriptName = "discover_hosts.sh" 34 kubectlDeliveryName = "kubectl-delivery" 35 kubectlTargetDirEnv = "TARGET_DIR" 36 kubectlVolumeName = "mpi-job-kubectl" 37 kubectlMountPath = "/opt/kube" 38 launcher = "launcher" 39 worker = "worker" 40 launcherSuffix = "-launcher" 41 workerSuffix = "-worker" 42 gpuResourceNameSuffix = ".com/gpu" 43 gpuResourceNamePattern = "gpu" 44 initContainerCpu = "100m" 45 initContainerEphStorage = "5Gi" 46 initContainerMem = "512Mi" 47 iMPIDefaultBootstrap = "rsh" 48 ) 49 50 const ( 51 // ErrResourceExists is used as part of the Event 'reason' when an MPIJob 52 // fails to sync due to dependent resources of the same name already 53 // existing. 54 ErrResourceExists = "ErrResourceExists" 55 56 // MessageResourceExists is the message used for Events when a resource 57 // fails to sync due to dependent resources already existing. 58 MessageResourceExists = "Resource %q of MPIJobKind %q already exists and is not managed by MPIJob" 59 60 // ErrResourceDoesNotExist is used as part of the Event 'reason' when some 61 // resource is missing in yaml 62 ErrResourceDoesNotExist = "ErrResourceDoesNotExist" 63 64 // MessageResourceDoesNotExist is used for Events when some 65 // resource is missing in yaml 66 MessageResourceDoesNotExist = "Resource %q is missing in yaml" 67 68 // podTemplateRestartPolicyReason is the warning reason when the restart 69 // policy is set in pod template. 70 podTemplateRestartPolicyReason = "SettedPodTemplateRestartPolicy" 71 72 // podTemplateSchedulerNameReason is the warning reason when other scheduler name is set 73 // in pod templates with gang-scheduling enabled 74 podTemplateSchedulerNameReason = "SettedPodTemplateSchedulerName" 75 76 // mpiJobEvict 77 mpiJobEvict = "MPIJobEvicted" 78 ) 79 80 // initializeMPIJobStatuses initializes the ReplicaStatuses for MPIJob. 81 func initializeMPIJobStatuses(mpiJob *kubeflowv1.MPIJob, rType kubeflowv1.ReplicaType) { 82 if mpiJob.Status.ReplicaStatuses == nil { 83 mpiJob.Status.ReplicaStatuses = make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus) 84 } 85 86 mpiJob.Status.ReplicaStatuses[rType] = &kubeflowv1.ReplicaStatus{} 87 } 88 89 // updateMPIJobConditions updates the conditions of the given mpiJob. 90 func updateMPIJobConditions(mpiJob *kubeflowv1.MPIJob, conditionType kubeflowv1.JobConditionType, reason, message string) error { 91 condition := newCondition(conditionType, reason, message) 92 setCondition(&mpiJob.Status, condition) 93 return nil 94 } 95 96 // newCondition creates a new mpiJob condition. 97 func newCondition(conditionType kubeflowv1.JobConditionType, reason, message string) kubeflowv1.JobCondition { 98 return kubeflowv1.JobCondition{ 99 Type: conditionType, 100 Status: corev1.ConditionTrue, 101 LastUpdateTime: metav1.Now(), 102 LastTransitionTime: metav1.Now(), 103 Reason: reason, 104 Message: message, 105 } 106 } 107 108 // getCondition returns the condition with the provided type. 109 func getCondition(status kubeflowv1.JobStatus, condType kubeflowv1.JobConditionType) *kubeflowv1.JobCondition { 110 for _, condition := range status.Conditions { 111 if condition.Type == condType { 112 return &condition 113 } 114 } 115 return nil 116 } 117 118 func isEvicted(status kubeflowv1.JobStatus) bool { 119 for _, condition := range status.Conditions { 120 if condition.Type == kubeflowv1.JobFailed && 121 condition.Status == corev1.ConditionTrue && 122 condition.Reason == mpiJobEvict { 123 return true 124 } 125 } 126 return false 127 } 128 129 // setCondition updates the mpiJob to include the provided condition. 130 // If the condition that we are about to add already exists 131 // and has the same status and reason then we are not going to update. 132 func setCondition(status *kubeflowv1.JobStatus, condition kubeflowv1.JobCondition) { 133 134 currentCond := getCondition(*status, condition.Type) 135 136 // Do nothing if condition doesn't change 137 if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason { 138 return 139 } 140 141 // Do not update lastTransitionTime if the status of the condition doesn't change. 142 if currentCond != nil && currentCond.Status == condition.Status { 143 condition.LastTransitionTime = currentCond.LastTransitionTime 144 } 145 146 // Append the updated condition 147 newConditions := filterOutCondition(status.Conditions, condition.Type) 148 status.Conditions = append(newConditions, condition) 149 } 150 151 // filterOutCondition returns a new slice of mpiJob conditions without conditions with the provided type. 152 func filterOutCondition(conditions []kubeflowv1.JobCondition, condType kubeflowv1.JobConditionType) []kubeflowv1.JobCondition { 153 var newConditions []kubeflowv1.JobCondition 154 for _, c := range conditions { 155 if condType == kubeflowv1.JobRestarting && c.Type == kubeflowv1.JobRunning { 156 continue 157 } 158 if condType == kubeflowv1.JobRunning && c.Type == kubeflowv1.JobRestarting { 159 continue 160 } 161 162 if c.Type == condType { 163 continue 164 } 165 166 // Set the running condition status to be false when current condition failed or succeeded 167 if (condType == kubeflowv1.JobFailed || condType == kubeflowv1.JobSucceeded) && (c.Type == kubeflowv1.JobRunning || c.Type == kubeflowv1.JobFailed) { 168 c.Status = corev1.ConditionFalse 169 } 170 171 newConditions = append(newConditions, c) 172 } 173 return newConditions 174 } 175 176 func isPodFinished(j *corev1.Pod) bool { 177 return isPodSucceeded(j) || isPodFailed(j) 178 } 179 180 func isPodFailed(p *corev1.Pod) bool { 181 return p.Status.Phase == corev1.PodFailed 182 } 183 184 func isPodSucceeded(p *corev1.Pod) bool { 185 return p.Status.Phase == corev1.PodSucceeded 186 } 187 188 func isPodRunning(p *corev1.Pod) bool { 189 return p.Status.Phase == corev1.PodRunning 190 } 191 192 // isGPULauncher checks whether the launcher needs GPU. 193 func isGPULauncher(mpiJob *kubeflowv1.MPIJob) bool { 194 for _, container := range mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.Containers { 195 for key := range container.Resources.Limits { 196 if strings.HasSuffix(string(key), gpuResourceNameSuffix) { 197 return true 198 } 199 if strings.Contains(string(key), gpuResourceNamePattern) { 200 return true 201 } 202 } 203 } 204 return false 205 } 206 207 // hasIntelMPIBootstrapValues returns the existence of I_MPI_HYDRA_BOOTSTRAP 208 // and I_MPI_HYDRA_BOOTSTRAP_EXEC values. 209 // There are also _EXEC_EXTRA_ARGS and _AUTOFORK under the I_MPI_HYDRA_BOOTSTRAP 210 // prefix but those are not checked on purpose. 211 func hasIntelMPIBootstrapValues(envs []corev1.EnvVar) (bootstrap, exec bool) { 212 for _, env := range envs { 213 if env.Name == "I_MPI_HYDRA_BOOTSTRAP" { 214 bootstrap = true 215 } else if env.Name == "I_MPI_HYDRA_BOOTSTRAP_EXEC" { 216 exec = true 217 } 218 219 if bootstrap && exec { 220 break 221 } 222 } 223 224 return bootstrap, exec 225 } 226 227 func defaultReplicaLabels(genericLabels map[string]string, roleLabelVal string) map[string]string { 228 replicaLabels := map[string]string{} 229 for k, v := range genericLabels { 230 replicaLabels[k] = v 231 } 232 233 replicaLabels[kubeflowv1.ReplicaTypeLabel] = roleLabelVal 234 return replicaLabels 235 } 236 237 func defaultWorkerLabels(genericLabels map[string]string) map[string]string { 238 return defaultReplicaLabels(genericLabels, worker) 239 } 240 241 func defaultLauncherLabels(genericLabels map[string]string) map[string]string { 242 return defaultReplicaLabels(genericLabels, launcher) 243 } 244 245 func workerSelector(genericLabels map[string]string) (labels.Selector, error) { 246 labels := defaultWorkerLabels(genericLabels) 247 248 labelSelector := metav1.LabelSelector{ 249 MatchLabels: labels, 250 } 251 252 selector, err := metav1.LabelSelectorAsSelector(&labelSelector) 253 if err != nil { 254 return nil, err 255 } 256 257 return selector, nil 258 } 259 260 // initializeReplicaStatuses initializes the ReplicaStatuses for replica. 261 // originally from pkg/controller.v1/tensorflow/status.go (deleted) 262 func initializeReplicaStatuses(jobStatus *kubeflowv1.JobStatus, rtype kubeflowv1.ReplicaType) { 263 if jobStatus.ReplicaStatuses == nil { 264 jobStatus.ReplicaStatuses = make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus) 265 } 266 267 jobStatus.ReplicaStatuses[rtype] = &kubeflowv1.ReplicaStatus{} 268 }