sigs.k8s.io/kueue@v0.6.2/site/static/examples/jobs/sample-pytorchjob.yaml (about)

     1  apiVersion: kubeflow.org/v1
     2  kind: PyTorchJob
     3  metadata:
     4    name: pytorch-simple
     5    namespace: default
     6    labels:
     7      kueue.x-k8s.io/queue-name: user-queue
     8  spec:
     9    pytorchReplicaSpecs:
    10      Master:
    11        replicas: 1
    12        restartPolicy: OnFailure
    13        template:
    14          spec:
    15            containers:
    16              - name: pytorch
    17                image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
    18                imagePullPolicy: Always
    19                command:
    20                  - "python3"
    21                  - "/opt/pytorch-mnist/mnist.py"
    22                  - "--epochs=1"
    23                resources:
    24                  requests:
    25                    cpu: 1
    26                    memory: "200Mi"
    27      Worker:
    28        replicas: 1
    29        restartPolicy: OnFailure
    30        template:
    31          spec:
    32            containers:
    33              - name: pytorch
    34                image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
    35                imagePullPolicy: Always
    36                command:
    37                  - "python3"
    38                  - "/opt/pytorch-mnist/mnist.py"
    39                  - "--epochs=1"
    40                resources:
    41                  requests:
    42                    cpu: 1
    43                    memory: "200Mi"