volcano.sh/volcano@v1.9.0/example/integrations/mxnet/train/train-mnist-cpu.yaml (about) 1 apiVersion: batch.volcano.sh/v1alpha1 2 kind: Job 3 metadata: 4 name: mxnet-job 5 spec: 6 minAvailable: 5 7 schedulerName: volcano 8 policies: 9 - event: PodEvicted 10 action: RestartJob 11 - event: PodFailed 12 action: RestartJob 13 plugins: 14 svc: [] 15 tasks: 16 - replicas: 2 17 name: worker 18 template: 19 spec: 20 imagePullSecrets: 21 - name: default-secret 22 containers: 23 - image: volcanosh/mxnet-train-mnist-cpu:v1 24 args: 25 - --kv-store=dist_sync 26 imagePullPolicy: IfNotPresent 27 name: mxnet 28 env: 29 - name: DMLC_PS_ROOT_PORT 30 value: "9000" 31 - name: DMLC_PS_ROOT_URI 32 value: mxnet-job-scheduler-0.mxnet-job 33 - name: DMLC_NUM_SERVER 34 value: "2" 35 - name: DMLC_NUM_WORKER 36 value: "2" 37 - name: DMLC_ROLE 38 value: "worker" 39 - name: DMLC_USE_KUBERNETES 40 value: "1" 41 restartPolicy: OnFailure 42 - replicas: 2 43 name: server 44 template: 45 spec: 46 imagePullSecrets: 47 - name: default-secret 48 containers: 49 - image: volcanosh/mxnet-train-mnist-cpu:v1 50 imagePullPolicy: IfNotPresent 51 name: mxnet 52 env: 53 - name: DMLC_PS_ROOT_PORT 54 value: "9000" 55 - name: DMLC_PS_ROOT_URI 56 value: mxnet-job-scheduler-0.mxnet-job 57 - name: DMLC_NUM_SERVER 58 value: "2" 59 - name: DMLC_NUM_WORKER 60 value: "2" 61 - name: DMLC_ROLE 62 value: "server" 63 - name: DMLC_USE_KUBERNETES 64 value: "1" 65 restartPolicy: OnFailure 66 - replicas: 1 67 name: scheduler 68 template: 69 spec: 70 imagePullSecrets: 71 - name: default-secret 72 containers: 73 - image: volcanosh/mxnet-train-mnist-cpu:v1 74 imagePullPolicy: IfNotPresent 75 name: mxnet 76 env: 77 - name: DMLC_PS_ROOT_PORT 78 value: "9000" 79 - name: DMLC_PS_ROOT_URI 80 value: mxnet-job-scheduler-0.mxnet-job 81 - name: DMLC_NUM_SERVER 82 value: "2" 83 - name: DMLC_NUM_WORKER 84 value: "2" 85 - name: DMLC_ROLE 86 value: "scheduler" 87 - name: DMLC_USE_KUBERNETES 88 value: "1" 89 restartPolicy: OnFailure