github.com/kubeflow/training-operator@v1.7.0/pkg/apis/kubeflow.org/v1/pytorch_validation_test.go (about) 1 // Copyright 2018 The Kubeflow Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package v1 16 17 import ( 18 "testing" 19 20 corev1 "k8s.io/api/core/v1" 21 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 "k8s.io/utils/pointer" 23 ) 24 25 func TestValidateV1PyTorchJob(t *testing.T) { 26 validPyTorchReplicaSpecs := map[ReplicaType]*ReplicaSpec{ 27 PyTorchJobReplicaTypeMaster: { 28 Replicas: pointer.Int32(1), 29 RestartPolicy: RestartPolicyOnFailure, 30 Template: corev1.PodTemplateSpec{ 31 Spec: corev1.PodSpec{ 32 Containers: []corev1.Container{{ 33 Name: "pytorch", 34 Image: "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727", 35 ImagePullPolicy: corev1.PullAlways, 36 Command: []string{ 37 "python3", 38 "/opt/pytorch-mnist/mnist.py", 39 "--epochs=1", 40 }, 41 }}, 42 }, 43 }, 44 }, 45 PyTorchJobReplicaTypeWorker: { 46 Replicas: pointer.Int32(1), 47 RestartPolicy: RestartPolicyOnFailure, 48 Template: corev1.PodTemplateSpec{ 49 Spec: corev1.PodSpec{ 50 Containers: []corev1.Container{{ 51 Name: "pytorch", 52 Image: "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727", 53 ImagePullPolicy: corev1.PullAlways, 54 Command: []string{ 55 "python3", 56 "/opt/pytorch-mnist/mnist.py", 57 "--epochs=1", 58 }, 59 }}, 60 }, 61 }, 62 }, 63 } 64 65 testCases := map[string]struct { 66 pytorchJob *PyTorchJob 67 wantErr bool 68 }{ 69 "valid PyTorchJob": { 70 pytorchJob: &PyTorchJob{ 71 ObjectMeta: metav1.ObjectMeta{ 72 Name: "test", 73 }, 74 Spec: PyTorchJobSpec{ 75 PyTorchReplicaSpecs: validPyTorchReplicaSpecs, 76 }, 77 }, 78 wantErr: false, 79 }, 80 "pytorchJob name does not meet DNS1035": { 81 pytorchJob: &PyTorchJob{ 82 ObjectMeta: metav1.ObjectMeta{ 83 Name: "0-test", 84 }, 85 Spec: PyTorchJobSpec{ 86 PyTorchReplicaSpecs: validPyTorchReplicaSpecs, 87 }, 88 }, 89 wantErr: true, 90 }, 91 "no containers": { 92 pytorchJob: &PyTorchJob{ 93 ObjectMeta: metav1.ObjectMeta{ 94 Name: "test", 95 }, 96 Spec: PyTorchJobSpec{ 97 PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ 98 PyTorchJobReplicaTypeWorker: { 99 Template: corev1.PodTemplateSpec{ 100 Spec: corev1.PodSpec{ 101 Containers: []corev1.Container{}, 102 }, 103 }, 104 }, 105 }, 106 }, 107 }, 108 wantErr: true, 109 }, 110 "image is empty": { 111 pytorchJob: &PyTorchJob{ 112 ObjectMeta: metav1.ObjectMeta{ 113 Name: "test", 114 }, 115 Spec: PyTorchJobSpec{ 116 PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ 117 PyTorchJobReplicaTypeWorker: { 118 Template: corev1.PodTemplateSpec{ 119 Spec: corev1.PodSpec{ 120 Containers: []corev1.Container{ 121 { 122 Name: "pytorch", 123 Image: "", 124 }, 125 }, 126 }, 127 }, 128 }, 129 }, 130 }, 131 }, 132 wantErr: true, 133 }, 134 "pytorchJob default container name doesn't present": { 135 pytorchJob: &PyTorchJob{ 136 ObjectMeta: metav1.ObjectMeta{ 137 Name: "test", 138 }, 139 Spec: PyTorchJobSpec{ 140 PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ 141 PyTorchJobReplicaTypeWorker: { 142 Template: corev1.PodTemplateSpec{ 143 Spec: corev1.PodSpec{ 144 Containers: []corev1.Container{ 145 { 146 Name: "", 147 Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", 148 }, 149 }, 150 }, 151 }, 152 }, 153 }, 154 }, 155 }, 156 wantErr: true, 157 }, 158 "the number of replicas in masterReplica is other than 1": { 159 pytorchJob: &PyTorchJob{ 160 ObjectMeta: metav1.ObjectMeta{ 161 Name: "test", 162 }, 163 Spec: PyTorchJobSpec{ 164 PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ 165 PyTorchJobReplicaTypeMaster: { 166 Replicas: pointer.Int32(2), 167 Template: corev1.PodTemplateSpec{ 168 Spec: corev1.PodSpec{ 169 Containers: []corev1.Container{ 170 { 171 Name: "pytorch", 172 Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", 173 }, 174 }, 175 }, 176 }, 177 }, 178 }, 179 }, 180 }, 181 wantErr: true, 182 }, 183 "Spec.NprocPerNode and Spec.ElasticPolicy.NProcPerNode are set": { 184 pytorchJob: &PyTorchJob{ 185 ObjectMeta: metav1.ObjectMeta{ 186 Name: "test", 187 }, 188 Spec: PyTorchJobSpec{ 189 NprocPerNode: pointer.String("1"), 190 ElasticPolicy: &ElasticPolicy{ 191 NProcPerNode: pointer.Int32(1), 192 }, 193 PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{ 194 PyTorchJobReplicaTypeMaster: { 195 Replicas: pointer.Int32(2), 196 Template: corev1.PodTemplateSpec{ 197 Spec: corev1.PodSpec{ 198 Containers: []corev1.Container{ 199 { 200 Name: "pytorch", 201 Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0", 202 }, 203 }, 204 }, 205 }, 206 }, 207 }, 208 }, 209 }, 210 wantErr: true, 211 }, 212 } 213 214 for name, tc := range testCases { 215 t.Run(name, func(t *testing.T) { 216 got := ValidateV1PyTorchJob(tc.pytorchJob) 217 if (got != nil) != tc.wantErr { 218 t.Fatalf("ValidateV1PyTorchJob() error = %v, wantErr %v", got, tc.wantErr) 219 } 220 }) 221 } 222 }