volcano.sh/volcano@v1.9.0/example/integrations/tensorflow/dist-mnist/tf-dist-mnist-example.yaml (about) 1 apiVersion: batch.volcano.sh/v1alpha1 2 kind: Job 3 metadata: 4 name: tensorflow-dist-mnist 5 spec: 6 minAvailable: 3 7 schedulerName: volcano 8 plugins: 9 env: [] 10 svc: [] 11 policies: 12 - event: PodEvicted 13 action: RestartJob 14 queue: default 15 tasks: 16 - replicas: 1 17 name: ps 18 template: 19 spec: 20 containers: 21 - command: 22 - sh 23 - -c 24 - | 25 PS_HOST=`cat /etc/volcano/ps.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`; 26 WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`; 27 export TF_CONFIG={\"cluster\":{\"ps\":[${PS_HOST}],\"worker\":[${WORKER_HOST}]},\"task\":{\"type\":\"ps\",\"index\":${VK_TASK_INDEX}},\"environment\":\"cloud\"}; 28 python /var/tf_dist_mnist/dist_mnist.py 29 image: volcanosh/dist-mnist-tf-example:0.0.1 30 name: tensorflow 31 ports: 32 - containerPort: 2222 33 name: tfjob-port 34 resources: {} 35 restartPolicy: Never 36 - replicas: 2 37 name: worker 38 policies: 39 - event: TaskCompleted 40 action: CompleteJob 41 template: 42 spec: 43 containers: 44 - command: 45 - sh 46 - -c 47 - | 48 PS_HOST=`cat /etc/volcano/ps.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`; 49 WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`; 50 export TF_CONFIG={\"cluster\":{\"ps\":[${PS_HOST}],\"worker\":[${WORKER_HOST}]},\"task\":{\"type\":\"worker\",\"index\":${VK_TASK_INDEX}},\"environment\":\"cloud\"}; 51 python /var/tf_dist_mnist/dist_mnist.py 52 image: volcanosh/dist-mnist-tf-example:0.0.1 53 name: tensorflow 54 ports: 55 - containerPort: 2222 56 name: tfjob-port 57 resources: {} 58 restartPolicy: Never