github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/mpi/mpijob.go (about)

     1  // Copyright 2019 The Kubeflow Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mpi
    16  
    17  import (
    18  	"strings"
    19  
    20  	kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
    21  
    22  	corev1 "k8s.io/api/core/v1"
    23  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    24  	"k8s.io/apimachinery/pkg/labels"
    25  )
    26  
    27  const (
    28  	configSuffix            = "-config"
    29  	configVolumeName        = "mpi-job-config"
    30  	configMountPath         = "/etc/mpi"
    31  	kubexecScriptName       = "kubexec.sh"
    32  	hostfileName            = "hostfile"
    33  	discoverHostsScriptName = "discover_hosts.sh"
    34  	kubectlDeliveryName     = "kubectl-delivery"
    35  	kubectlTargetDirEnv     = "TARGET_DIR"
    36  	kubectlVolumeName       = "mpi-job-kubectl"
    37  	kubectlMountPath        = "/opt/kube"
    38  	launcher                = "launcher"
    39  	worker                  = "worker"
    40  	launcherSuffix          = "-launcher"
    41  	workerSuffix            = "-worker"
    42  	gpuResourceNameSuffix   = ".com/gpu"
    43  	gpuResourceNamePattern  = "gpu"
    44  	initContainerCpu        = "100m"
    45  	initContainerEphStorage = "5Gi"
    46  	initContainerMem        = "512Mi"
    47  	iMPIDefaultBootstrap    = "rsh"
    48  )
    49  
    50  const (
    51  	// ErrResourceExists is used as part of the Event 'reason' when an MPIJob
    52  	// fails to sync due to dependent resources of the same name already
    53  	// existing.
    54  	ErrResourceExists = "ErrResourceExists"
    55  
    56  	// MessageResourceExists is the message used for Events when a resource
    57  	// fails to sync due to dependent resources already existing.
    58  	MessageResourceExists = "Resource %q of MPIJobKind %q already exists and is not managed by MPIJob"
    59  
    60  	// ErrResourceDoesNotExist is used as part of the Event 'reason' when some
    61  	// resource is missing in yaml
    62  	ErrResourceDoesNotExist = "ErrResourceDoesNotExist"
    63  
    64  	// MessageResourceDoesNotExist is used for Events when some
    65  	// resource is missing in yaml
    66  	MessageResourceDoesNotExist = "Resource %q is missing in yaml"
    67  
    68  	// podTemplateRestartPolicyReason is the warning reason when the restart
    69  	// policy is set in pod template.
    70  	podTemplateRestartPolicyReason = "SettedPodTemplateRestartPolicy"
    71  
    72  	// podTemplateSchedulerNameReason is the warning reason when other scheduler name is set
    73  	// in pod templates with gang-scheduling enabled
    74  	podTemplateSchedulerNameReason = "SettedPodTemplateSchedulerName"
    75  
    76  	// mpiJobEvict
    77  	mpiJobEvict = "MPIJobEvicted"
    78  )
    79  
    80  // initializeMPIJobStatuses initializes the ReplicaStatuses for MPIJob.
    81  func initializeMPIJobStatuses(mpiJob *kubeflowv1.MPIJob, rType kubeflowv1.ReplicaType) {
    82  	if mpiJob.Status.ReplicaStatuses == nil {
    83  		mpiJob.Status.ReplicaStatuses = make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus)
    84  	}
    85  
    86  	mpiJob.Status.ReplicaStatuses[rType] = &kubeflowv1.ReplicaStatus{}
    87  }
    88  
    89  // updateMPIJobConditions updates the conditions of the given mpiJob.
    90  func updateMPIJobConditions(mpiJob *kubeflowv1.MPIJob, conditionType kubeflowv1.JobConditionType, reason, message string) error {
    91  	condition := newCondition(conditionType, reason, message)
    92  	setCondition(&mpiJob.Status, condition)
    93  	return nil
    94  }
    95  
    96  // newCondition creates a new mpiJob condition.
    97  func newCondition(conditionType kubeflowv1.JobConditionType, reason, message string) kubeflowv1.JobCondition {
    98  	return kubeflowv1.JobCondition{
    99  		Type:               conditionType,
   100  		Status:             corev1.ConditionTrue,
   101  		LastUpdateTime:     metav1.Now(),
   102  		LastTransitionTime: metav1.Now(),
   103  		Reason:             reason,
   104  		Message:            message,
   105  	}
   106  }
   107  
   108  // getCondition returns the condition with the provided type.
   109  func getCondition(status kubeflowv1.JobStatus, condType kubeflowv1.JobConditionType) *kubeflowv1.JobCondition {
   110  	for _, condition := range status.Conditions {
   111  		if condition.Type == condType {
   112  			return &condition
   113  		}
   114  	}
   115  	return nil
   116  }
   117  
   118  func isEvicted(status kubeflowv1.JobStatus) bool {
   119  	for _, condition := range status.Conditions {
   120  		if condition.Type == kubeflowv1.JobFailed &&
   121  			condition.Status == corev1.ConditionTrue &&
   122  			condition.Reason == mpiJobEvict {
   123  			return true
   124  		}
   125  	}
   126  	return false
   127  }
   128  
   129  // setCondition updates the mpiJob to include the provided condition.
   130  // If the condition that we are about to add already exists
   131  // and has the same status and reason then we are not going to update.
   132  func setCondition(status *kubeflowv1.JobStatus, condition kubeflowv1.JobCondition) {
   133  
   134  	currentCond := getCondition(*status, condition.Type)
   135  
   136  	// Do nothing if condition doesn't change
   137  	if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason {
   138  		return
   139  	}
   140  
   141  	// Do not update lastTransitionTime if the status of the condition doesn't change.
   142  	if currentCond != nil && currentCond.Status == condition.Status {
   143  		condition.LastTransitionTime = currentCond.LastTransitionTime
   144  	}
   145  
   146  	// Append the updated condition
   147  	newConditions := filterOutCondition(status.Conditions, condition.Type)
   148  	status.Conditions = append(newConditions, condition)
   149  }
   150  
   151  // filterOutCondition returns a new slice of mpiJob conditions without conditions with the provided type.
   152  func filterOutCondition(conditions []kubeflowv1.JobCondition, condType kubeflowv1.JobConditionType) []kubeflowv1.JobCondition {
   153  	var newConditions []kubeflowv1.JobCondition
   154  	for _, c := range conditions {
   155  		if condType == kubeflowv1.JobRestarting && c.Type == kubeflowv1.JobRunning {
   156  			continue
   157  		}
   158  		if condType == kubeflowv1.JobRunning && c.Type == kubeflowv1.JobRestarting {
   159  			continue
   160  		}
   161  
   162  		if c.Type == condType {
   163  			continue
   164  		}
   165  
   166  		// Set the running condition status to be false when current condition failed or succeeded
   167  		if (condType == kubeflowv1.JobFailed || condType == kubeflowv1.JobSucceeded) && (c.Type == kubeflowv1.JobRunning || c.Type == kubeflowv1.JobFailed) {
   168  			c.Status = corev1.ConditionFalse
   169  		}
   170  
   171  		newConditions = append(newConditions, c)
   172  	}
   173  	return newConditions
   174  }
   175  
   176  func isPodFinished(j *corev1.Pod) bool {
   177  	return isPodSucceeded(j) || isPodFailed(j)
   178  }
   179  
   180  func isPodFailed(p *corev1.Pod) bool {
   181  	return p.Status.Phase == corev1.PodFailed
   182  }
   183  
   184  func isPodSucceeded(p *corev1.Pod) bool {
   185  	return p.Status.Phase == corev1.PodSucceeded
   186  }
   187  
   188  func isPodRunning(p *corev1.Pod) bool {
   189  	return p.Status.Phase == corev1.PodRunning
   190  }
   191  
   192  // isGPULauncher checks whether the launcher needs GPU.
   193  func isGPULauncher(mpiJob *kubeflowv1.MPIJob) bool {
   194  	for _, container := range mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template.Spec.Containers {
   195  		for key := range container.Resources.Limits {
   196  			if strings.HasSuffix(string(key), gpuResourceNameSuffix) {
   197  				return true
   198  			}
   199  			if strings.Contains(string(key), gpuResourceNamePattern) {
   200  				return true
   201  			}
   202  		}
   203  	}
   204  	return false
   205  }
   206  
   207  // hasIntelMPIBootstrapValues returns the existence of I_MPI_HYDRA_BOOTSTRAP
   208  // and I_MPI_HYDRA_BOOTSTRAP_EXEC values.
   209  // There are also _EXEC_EXTRA_ARGS and _AUTOFORK under the I_MPI_HYDRA_BOOTSTRAP
   210  // prefix but those are not checked on purpose.
   211  func hasIntelMPIBootstrapValues(envs []corev1.EnvVar) (bootstrap, exec bool) {
   212  	for _, env := range envs {
   213  		if env.Name == "I_MPI_HYDRA_BOOTSTRAP" {
   214  			bootstrap = true
   215  		} else if env.Name == "I_MPI_HYDRA_BOOTSTRAP_EXEC" {
   216  			exec = true
   217  		}
   218  
   219  		if bootstrap && exec {
   220  			break
   221  		}
   222  	}
   223  
   224  	return bootstrap, exec
   225  }
   226  
   227  func defaultReplicaLabels(genericLabels map[string]string, roleLabelVal string) map[string]string {
   228  	replicaLabels := map[string]string{}
   229  	for k, v := range genericLabels {
   230  		replicaLabels[k] = v
   231  	}
   232  
   233  	replicaLabels[kubeflowv1.ReplicaTypeLabel] = roleLabelVal
   234  	return replicaLabels
   235  }
   236  
   237  func defaultWorkerLabels(genericLabels map[string]string) map[string]string {
   238  	return defaultReplicaLabels(genericLabels, worker)
   239  }
   240  
   241  func defaultLauncherLabels(genericLabels map[string]string) map[string]string {
   242  	return defaultReplicaLabels(genericLabels, launcher)
   243  }
   244  
   245  func workerSelector(genericLabels map[string]string) (labels.Selector, error) {
   246  	labels := defaultWorkerLabels(genericLabels)
   247  
   248  	labelSelector := metav1.LabelSelector{
   249  		MatchLabels: labels,
   250  	}
   251  
   252  	selector, err := metav1.LabelSelectorAsSelector(&labelSelector)
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  
   257  	return selector, nil
   258  }
   259  
   260  // initializeReplicaStatuses initializes the ReplicaStatuses for replica.
   261  // originally from pkg/controller.v1/tensorflow/status.go (deleted)
   262  func initializeReplicaStatuses(jobStatus *kubeflowv1.JobStatus, rtype kubeflowv1.ReplicaType) {
   263  	if jobStatus.ReplicaStatuses == nil {
   264  		jobStatus.ReplicaStatuses = make(map[kubeflowv1.ReplicaType]*kubeflowv1.ReplicaStatus)
   265  	}
   266  
   267  	jobStatus.ReplicaStatuses[rtype] = &kubeflowv1.ReplicaStatus{}
   268  }