github.com/kubeflow/training-operator@v1.7.0/pkg/apis/kubeflow.org/v1/tensorflow_validation.go (about) 1 // Copyright 2018 The Kubeflow Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package v1 16 17 import ( 18 "fmt" 19 20 log "github.com/sirupsen/logrus" 21 apimachineryvalidation "k8s.io/apimachinery/pkg/api/validation" 22 ) 23 24 func ValidateV1TFJob(tfjob *TFJob) error { 25 if errors := apimachineryvalidation.NameIsDNS1035Label(tfjob.ObjectMeta.Name, false); errors != nil { 26 return fmt.Errorf("TFJob name is invalid: %v", errors) 27 } 28 if err := validateV1TFReplicaSpecs(tfjob.Spec.TFReplicaSpecs); err != nil { 29 return err 30 } 31 return nil 32 } 33 34 // IsChieforMaster returns true if the type is Master or Chief. 35 func IsChieforMaster(typ ReplicaType) bool { 36 return typ == TFJobReplicaTypeChief || typ == TFJobReplicaTypeMaster 37 } 38 39 // IsWorker returns true if the type is Worker. 40 func IsWorker(typ ReplicaType) bool { 41 return typ == TFJobReplicaTypeWorker 42 } 43 44 // IsEvaluator returns true if the type is Evaluator. 45 func IsEvaluator(typ ReplicaType) bool { 46 return typ == TFJobReplicaTypeEval 47 } 48 49 func validateV1TFReplicaSpecs(specs map[ReplicaType]*ReplicaSpec) error { 50 if specs == nil { 51 return fmt.Errorf("TFJobSpec is not valid") 52 } 53 foundChief := 0 54 for rType, value := range specs { 55 if value == nil || len(value.Template.Spec.Containers) == 0 { 56 return fmt.Errorf("TFJobSpec is not valid: containers definition expected in %v", rType) 57 } 58 if IsChieforMaster(rType) { 59 foundChief++ 60 } 61 // Make sure the image is defined in the container. 62 numNamedTensorflow := 0 63 for _, container := range value.Template.Spec.Containers { 64 if container.Image == "" { 65 msg := fmt.Sprintf("TFJobSpec is not valid: Image is undefined in the container of %v", rType) 66 log.Error(msg) 67 return fmt.Errorf(msg) 68 } 69 if container.Name == TFJobDefaultContainerName { 70 numNamedTensorflow++ 71 } 72 } 73 // Make sure there has at least one container named "tensorflow". 74 if numNamedTensorflow == 0 { 75 msg := fmt.Sprintf("TFJobSpec is not valid: There is no container named %s in %v", TFJobDefaultContainerName, rType) 76 log.Error(msg) 77 return fmt.Errorf(msg) 78 } 79 } 80 if foundChief > 1 { 81 return fmt.Errorf("TFJobSpec is not valid: more than 1 chief/master found") 82 } 83 return nil 84 }