volcano.sh/volcano@v1.9.0/example/kubecon-2019-china/gang/mpi-example.yaml (about)

     1  apiVersion: batch.volcano.sh/v1alpha1
     2  kind: Job
     3  metadata:
     4    name: lm-mpi-job
     5    labels:
     6      # 根据业务需要设置作业类型
     7      "volcano.sh/job-type": "MPI"
     8  spec:
     9    # 设置最小需要的服务 (小于总replicas数)
    10    minAvailable: 4
    11    schedulerName: volcano
    12    plugins:
    13      # 提供 ssh 免密认证
    14      ssh: []
    15      # 提供运行作业所需要的网络信息,hosts文件,headless service等
    16      svc: []
    17    # 如果有pod被 杀死,重启整个作业
    18    policies:
    19      - event: PodEvicted
    20        action: RestartJob
    21    tasks:
    22      - replicas: 1
    23        name: mpimaster
    24        # 当 mpiexec 结束,认识整个mpi作业结束
    25        policies:
    26          - event: TaskCompleted
    27            action: CompleteJob
    28        template:
    29          spec:
    30            # Volcano 的信息会统一放到 /etc/volcano 目录下
    31            containers:
    32              - command:
    33                  - /bin/sh
    34                  - -c
    35                  - |
    36                    MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
    37                    mkdir -p /var/run/sshd; /usr/sbin/sshd;
    38                    mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world;
    39                image: volcanosh/example-mpi:0.0.1
    40                name: mpimaster
    41                ports:
    42                  - containerPort: 22
    43                    name: mpijob-port
    44                workingDir: /home
    45                resources:
    46                  requests:
    47                    cpu: "500m"
    48                  limits:
    49                    cpu: "500m"
    50            restartPolicy: OnFailure
    51            imagePullSecrets:
    52              - name: default-secret
    53      - replicas: 3
    54        name: mpiworker
    55        template:
    56          spec:
    57            containers:
    58              - command:
    59                  - /bin/sh
    60                  - -c
    61                  - |
    62                    mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
    63                image: volcanosh/example-mpi:0.0.1
    64                name: mpiworker
    65                ports:
    66                  - containerPort: 22
    67                    name: mpijob-port
    68                workingDir: /home
    69                resources:
    70                  requests:
    71                    cpu: "1000m"
    72                  limits:
    73                    cpu: "1000m"
    74            restartPolicy: OnFailure
    75            imagePullSecrets:
    76              - name: default-secret
    77