github.com/kubeflow/training-operator@v1.7.0/pkg/apis/kubeflow.org/v1/tensorflow_validation.go (about)

     1  // Copyright 2018 The Kubeflow Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package v1
    16  
    17  import (
    18  	"fmt"
    19  
    20  	log "github.com/sirupsen/logrus"
    21  	apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation"
    22  )
    23  
    24  func ValidateV1TFJob(tfjob *TFJob) error {
    25  	if errors := apimachineryvalidation.NameIsDNS1035Label(tfjob.ObjectMeta.Name, false); errors != nil {
    26  		return fmt.Errorf("TFJob name is invalid: %v", errors)
    27  	}
    28  	if err := validateV1TFReplicaSpecs(tfjob.Spec.TFReplicaSpecs); err != nil {
    29  		return err
    30  	}
    31  	return nil
    32  }
    33  
    34  // IsChieforMaster returns true if the type is Master or Chief.
    35  func IsChieforMaster(typ ReplicaType) bool {
    36  	return typ == TFJobReplicaTypeChief || typ == TFJobReplicaTypeMaster
    37  }
    38  
    39  // IsWorker returns true if the type is Worker.
    40  func IsWorker(typ ReplicaType) bool {
    41  	return typ == TFJobReplicaTypeWorker
    42  }
    43  
    44  // IsEvaluator returns true if the type is Evaluator.
    45  func IsEvaluator(typ ReplicaType) bool {
    46  	return typ == TFJobReplicaTypeEval
    47  }
    48  
    49  func validateV1TFReplicaSpecs(specs map[ReplicaType]*ReplicaSpec) error {
    50  	if specs == nil {
    51  		return fmt.Errorf("TFJobSpec is not valid")
    52  	}
    53  	foundChief := 0
    54  	for rType, value := range specs {
    55  		if value == nil || len(value.Template.Spec.Containers) == 0 {
    56  			return fmt.Errorf("TFJobSpec is not valid: containers definition expected in %v", rType)
    57  		}
    58  		if IsChieforMaster(rType) {
    59  			foundChief++
    60  		}
    61  		// Make sure the image is defined in the container.
    62  		numNamedTensorflow := 0
    63  		for _, container := range value.Template.Spec.Containers {
    64  			if container.Image == "" {
    65  				msg := fmt.Sprintf("TFJobSpec is not valid: Image is undefined in the container of %v", rType)
    66  				log.Error(msg)
    67  				return fmt.Errorf(msg)
    68  			}
    69  			if container.Name == TFJobDefaultContainerName {
    70  				numNamedTensorflow++
    71  			}
    72  		}
    73  		// Make sure there has at least one container named "tensorflow".
    74  		if numNamedTensorflow == 0 {
    75  			msg := fmt.Sprintf("TFJobSpec is not valid: There is no container named %s in %v", TFJobDefaultContainerName, rType)
    76  			log.Error(msg)
    77  			return fmt.Errorf(msg)
    78  		}
    79  	}
    80  	if foundChief > 1 {
    81  		return fmt.Errorf("TFJobSpec is not valid: more than 1 chief/master found")
    82  	}
    83  	return nil
    84  }