volcano.sh/volcano@v1.9.0/example/kubecon-2019-china/mpi-sample/mpi-example.yaml (about) 1 apiVersion: batch.volcano.sh/v1alpha1 2 kind: Job 3 metadata: 4 name: lm-mpi-job 5 labels: 6 # 根据业务需要设置作业类型 7 "volcano.sh/job-type": "MPI" 8 spec: 9 # 设置最小需要的服务 (小于总replicas数) 10 minAvailable: 3 11 schedulerName: volcano 12 plugins: 13 # 提供 ssh 免密认证 14 ssh: [] 15 # 提供运行作业所需要的网络信息,hosts文件,headless service等 16 svc: [] 17 # 如果有pod被 杀死,重启整个作业 18 policies: 19 - event: PodEvicted 20 action: RestartJob 21 tasks: 22 - replicas: 1 23 name: mpimaster 24 # 当 mpiexec 结束,认识整个mpi作业结束 25 policies: 26 - event: TaskCompleted 27 action: CompleteJob 28 template: 29 spec: 30 # Volcano 的信息会统一放到 /etc/volcano 目录下 31 containers: 32 - command: 33 - /bin/sh 34 - -c 35 - | 36 MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`; 37 mkdir -p /var/run/sshd; /usr/sbin/sshd; 38 mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world; 39 image: volcanosh/example-mpi:0.0.1 40 name: mpimaster 41 ports: 42 - containerPort: 22 43 name: mpijob-port 44 workingDir: /home 45 resources: 46 requests: 47 cpu: "500m" 48 memory: "1024Mi" 49 limits: 50 cpu: "500m" 51 memory: "1024Mi" 52 restartPolicy: OnFailure 53 imagePullSecrets: 54 - name: default-secret 55 - replicas: 2 56 name: mpiworker 57 template: 58 spec: 59 containers: 60 - command: 61 - /bin/sh 62 - -c 63 - | 64 mkdir -p /var/run/sshd; /usr/sbin/sshd -D; 65 image: volcanosh/example-mpi:0.0.1 66 name: mpiworker 67 ports: 68 - containerPort: 22 69 name: mpijob-port 70 workingDir: /home 71 resources: 72 requests: 73 cpu: "1024m" 74 memory: "2048Mi" 75 limits: 76 cpu: "1024m" 77 memory: "2048Mi" 78 restartPolicy: OnFailure 79 imagePullSecrets: 80 - name: default-secret 81