github.com/kubeflow/training-operator@v1.7.0/pkg/apis/kubeflow.org/v1/pytorch_validation_test.go (about)

     1  // Copyright 2018 The Kubeflow Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package v1
    16  
    17  import (
    18  	"testing"
    19  
    20  	corev1 "k8s.io/api/core/v1"
    21  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    22  	"k8s.io/utils/pointer"
    23  )
    24  
    25  func TestValidateV1PyTorchJob(t *testing.T) {
    26  	validPyTorchReplicaSpecs := map[ReplicaType]*ReplicaSpec{
    27  		PyTorchJobReplicaTypeMaster: {
    28  			Replicas:      pointer.Int32(1),
    29  			RestartPolicy: RestartPolicyOnFailure,
    30  			Template: corev1.PodTemplateSpec{
    31  				Spec: corev1.PodSpec{
    32  					Containers: []corev1.Container{{
    33  						Name:            "pytorch",
    34  						Image:           "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727",
    35  						ImagePullPolicy: corev1.PullAlways,
    36  						Command: []string{
    37  							"python3",
    38  							"/opt/pytorch-mnist/mnist.py",
    39  							"--epochs=1",
    40  						},
    41  					}},
    42  				},
    43  			},
    44  		},
    45  		PyTorchJobReplicaTypeWorker: {
    46  			Replicas:      pointer.Int32(1),
    47  			RestartPolicy: RestartPolicyOnFailure,
    48  			Template: corev1.PodTemplateSpec{
    49  				Spec: corev1.PodSpec{
    50  					Containers: []corev1.Container{{
    51  						Name:            "pytorch",
    52  						Image:           "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727",
    53  						ImagePullPolicy: corev1.PullAlways,
    54  						Command: []string{
    55  							"python3",
    56  							"/opt/pytorch-mnist/mnist.py",
    57  							"--epochs=1",
    58  						},
    59  					}},
    60  				},
    61  			},
    62  		},
    63  	}
    64  
    65  	testCases := map[string]struct {
    66  		pytorchJob *PyTorchJob
    67  		wantErr    bool
    68  	}{
    69  		"valid PyTorchJob": {
    70  			pytorchJob: &PyTorchJob{
    71  				ObjectMeta: metav1.ObjectMeta{
    72  					Name: "test",
    73  				},
    74  				Spec: PyTorchJobSpec{
    75  					PyTorchReplicaSpecs: validPyTorchReplicaSpecs,
    76  				},
    77  			},
    78  			wantErr: false,
    79  		},
    80  		"pytorchJob name does not meet DNS1035": {
    81  			pytorchJob: &PyTorchJob{
    82  				ObjectMeta: metav1.ObjectMeta{
    83  					Name: "0-test",
    84  				},
    85  				Spec: PyTorchJobSpec{
    86  					PyTorchReplicaSpecs: validPyTorchReplicaSpecs,
    87  				},
    88  			},
    89  			wantErr: true,
    90  		},
    91  		"no containers": {
    92  			pytorchJob: &PyTorchJob{
    93  				ObjectMeta: metav1.ObjectMeta{
    94  					Name: "test",
    95  				},
    96  				Spec: PyTorchJobSpec{
    97  					PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{
    98  						PyTorchJobReplicaTypeWorker: {
    99  							Template: corev1.PodTemplateSpec{
   100  								Spec: corev1.PodSpec{
   101  									Containers: []corev1.Container{},
   102  								},
   103  							},
   104  						},
   105  					},
   106  				},
   107  			},
   108  			wantErr: true,
   109  		},
   110  		"image is empty": {
   111  			pytorchJob: &PyTorchJob{
   112  				ObjectMeta: metav1.ObjectMeta{
   113  					Name: "test",
   114  				},
   115  				Spec: PyTorchJobSpec{
   116  					PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{
   117  						PyTorchJobReplicaTypeWorker: {
   118  							Template: corev1.PodTemplateSpec{
   119  								Spec: corev1.PodSpec{
   120  									Containers: []corev1.Container{
   121  										{
   122  											Name:  "pytorch",
   123  											Image: "",
   124  										},
   125  									},
   126  								},
   127  							},
   128  						},
   129  					},
   130  				},
   131  			},
   132  			wantErr: true,
   133  		},
   134  		"pytorchJob default container name doesn't present": {
   135  			pytorchJob: &PyTorchJob{
   136  				ObjectMeta: metav1.ObjectMeta{
   137  					Name: "test",
   138  				},
   139  				Spec: PyTorchJobSpec{
   140  					PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{
   141  						PyTorchJobReplicaTypeWorker: {
   142  							Template: corev1.PodTemplateSpec{
   143  								Spec: corev1.PodSpec{
   144  									Containers: []corev1.Container{
   145  										{
   146  											Name:  "",
   147  											Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0",
   148  										},
   149  									},
   150  								},
   151  							},
   152  						},
   153  					},
   154  				},
   155  			},
   156  			wantErr: true,
   157  		},
   158  		"the number of replicas in masterReplica is other than 1": {
   159  			pytorchJob: &PyTorchJob{
   160  				ObjectMeta: metav1.ObjectMeta{
   161  					Name: "test",
   162  				},
   163  				Spec: PyTorchJobSpec{
   164  					PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{
   165  						PyTorchJobReplicaTypeMaster: {
   166  							Replicas: pointer.Int32(2),
   167  							Template: corev1.PodTemplateSpec{
   168  								Spec: corev1.PodSpec{
   169  									Containers: []corev1.Container{
   170  										{
   171  											Name:  "pytorch",
   172  											Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0",
   173  										},
   174  									},
   175  								},
   176  							},
   177  						},
   178  					},
   179  				},
   180  			},
   181  			wantErr: true,
   182  		},
   183  		"Spec.NprocPerNode and Spec.ElasticPolicy.NProcPerNode are set": {
   184  			pytorchJob: &PyTorchJob{
   185  				ObjectMeta: metav1.ObjectMeta{
   186  					Name: "test",
   187  				},
   188  				Spec: PyTorchJobSpec{
   189  					NprocPerNode: pointer.String("1"),
   190  					ElasticPolicy: &ElasticPolicy{
   191  						NProcPerNode: pointer.Int32(1),
   192  					},
   193  					PyTorchReplicaSpecs: map[ReplicaType]*ReplicaSpec{
   194  						PyTorchJobReplicaTypeMaster: {
   195  							Replicas: pointer.Int32(2),
   196  							Template: corev1.PodTemplateSpec{
   197  								Spec: corev1.PodSpec{
   198  									Containers: []corev1.Container{
   199  										{
   200  											Name:  "pytorch",
   201  											Image: "gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0",
   202  										},
   203  									},
   204  								},
   205  							},
   206  						},
   207  					},
   208  				},
   209  			},
   210  			wantErr: true,
   211  		},
   212  	}
   213  
   214  	for name, tc := range testCases {
   215  		t.Run(name, func(t *testing.T) {
   216  			got := ValidateV1PyTorchJob(tc.pytorchJob)
   217  			if (got != nil) != tc.wantErr {
   218  				t.Fatalf("ValidateV1PyTorchJob() error = %v, wantErr %v", got, tc.wantErr)
   219  			}
   220  		})
   221  	}
   222  }