sigs.k8s.io/kueue@v0.6.2/site/static/examples/jobs/sample-mxjob.yaml (about) 1 apiVersion: kubeflow.org/v1 2 kind: MXJob 3 metadata: 4 name: mxnet-job 5 labels: 6 kueue.x-k8s.io/queue-name: user-queue 7 spec: 8 jobMode: MXTrain 9 mxReplicaSpecs: 10 Scheduler: 11 replicas: 1 12 restartPolicy: Never 13 template: 14 spec: 15 containers: 16 - name: mxnet 17 image: kubeflow/mxnet-gpu:latest 18 resources: 19 limits: 20 cpu: 100m 21 memory: 0.2Gi 22 ports: 23 - containerPort: 9991 24 name: mxjob-port 25 Server: 26 replicas: 1 27 restartPolicy: Never 28 template: 29 spec: 30 containers: 31 - name: mxnet 32 image: kubeflow/mxnet-gpu:latest 33 resources: 34 limits: 35 cpu: 100m 36 memory: 0.2Gi 37 ports: 38 - containerPort: 9991 39 name: mxjob-port 40 Worker: 41 replicas: 1 42 restartPolicy: Never 43 template: 44 spec: 45 containers: 46 - name: mxnet 47 image: kubeflow/mxnet-gpu:latest 48 command: 49 - python3 50 args: 51 - /mxnet/mxnet/example/image-classification/train_mnist.py 52 - --num-epochs=1 53 - --num-layers=2 54 - --kv-store=dist_device_sync 55 resources: 56 limits: 57 cpu: 2 58 memory: 1Gi 59 ports: 60 - containerPort: 9991 61 name: mxjob-port