github.com/kubeflow/training-operator@v1.7.0/examples/pytorch/simple.yaml (about) 1 apiVersion: "kubeflow.org/v1" 2 kind: PyTorchJob 3 metadata: 4 name: pytorch-simple 5 namespace: kubeflow 6 spec: 7 pytorchReplicaSpecs: 8 Master: 9 replicas: 1 10 restartPolicy: OnFailure 11 template: 12 spec: 13 containers: 14 - name: pytorch 15 image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 16 imagePullPolicy: Always 17 command: 18 - "python3" 19 - "/opt/pytorch-mnist/mnist.py" 20 - "--epochs=1" 21 Worker: 22 replicas: 1 23 restartPolicy: OnFailure 24 template: 25 spec: 26 containers: 27 - name: pytorch 28 image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727 29 imagePullPolicy: Always 30 command: 31 - "python3" 32 - "/opt/pytorch-mnist/mnist.py" 33 - "--epochs=1"