github.com/kubeflow/training-operator@v1.7.0/examples/pytorch/simple.yaml (about)

     1  apiVersion: "kubeflow.org/v1"
     2  kind: PyTorchJob
     3  metadata:
     4    name: pytorch-simple
     5    namespace: kubeflow
     6  spec:
     7    pytorchReplicaSpecs:
     8      Master:
     9        replicas: 1
    10        restartPolicy: OnFailure
    11        template:
    12          spec:
    13            containers:
    14              - name: pytorch
    15                image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
    16                imagePullPolicy: Always
    17                command:
    18                  - "python3"
    19                  - "/opt/pytorch-mnist/mnist.py"
    20                  - "--epochs=1"
    21      Worker:
    22        replicas: 1
    23        restartPolicy: OnFailure
    24        template:
    25          spec:
    26            containers:
    27              - name: pytorch
    28                image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
    29                imagePullPolicy: Always
    30                command:
    31                  - "python3"
    32                  - "/opt/pytorch-mnist/mnist.py"
    33                  - "--epochs=1"