github.com/kubeflow/training-operator@v1.7.0/pkg/apis/kubeflow.org/v1/mpi_validation.go (about)

     1  // Copyright 2021 The Kubeflow Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package v1
    16  
    17  import (
    18  	"fmt"
    19  )
    20  
    21  func ValidateV1MpiJobSpec(c *MPIJobSpec) error {
    22  	if c.MPIReplicaSpecs == nil {
    23  		return fmt.Errorf("MPIReplicaSpecs is not valid")
    24  	}
    25  	launcherExists := false
    26  	for rType, value := range c.MPIReplicaSpecs {
    27  		if value == nil || len(value.Template.Spec.Containers) == 0 {
    28  			return fmt.Errorf("MPIReplicaSpecs is not valid: containers definition expected in %v", rType)
    29  		}
    30  		// Make sure the replica type is valid.
    31  		validReplicaTypes := []ReplicaType{MPIJobReplicaTypeLauncher, MPIJobReplicaTypeWorker}
    32  
    33  		isValidReplicaType := false
    34  		for _, t := range validReplicaTypes {
    35  			if t == rType {
    36  				isValidReplicaType = true
    37  				break
    38  			}
    39  		}
    40  		if !isValidReplicaType {
    41  			return fmt.Errorf("MPIReplicaType is %v but must be one of %v", rType, validReplicaTypes)
    42  		}
    43  
    44  		for _, container := range value.Template.Spec.Containers {
    45  			if container.Image == "" {
    46  				msg := fmt.Sprintf("MPIReplicaSpec is not valid: Image is undefined in the container of %v", rType)
    47  				return fmt.Errorf(msg)
    48  			}
    49  
    50  			if container.Name == "" {
    51  				msg := fmt.Sprintf("MPIReplicaSpec is not valid: ImageName is undefined in the container of %v", rType)
    52  				return fmt.Errorf(msg)
    53  			}
    54  		}
    55  		if rType == MPIJobReplicaTypeLauncher {
    56  			launcherExists = true
    57  			if value.Replicas != nil && int(*value.Replicas) != 1 {
    58  				return fmt.Errorf("MPIReplicaSpec is not valid: There must be only 1 launcher replica")
    59  			}
    60  		}
    61  
    62  	}
    63  
    64  	if !launcherExists {
    65  		return fmt.Errorf("MPIReplicaSpec is not valid: Master ReplicaSpec must be present")
    66  	}
    67  	return nil
    68  
    69  }