volcano.sh/volcano@v1.9.0/example/kubecon-2019-china/mpi-sample/mpi-example.yaml (about)

     1  apiVersion: batch.volcano.sh/v1alpha1
     2  kind: Job
     3  metadata:
     4    name: lm-mpi-job
     5    labels:
     6      # 根据业务需要设置作业类型
     7      "volcano.sh/job-type": "MPI"
     8  spec:
     9    # 设置最小需要的服务 (小于总replicas数)
    10    minAvailable: 3
    11    schedulerName: volcano
    12    plugins:
    13      # 提供 ssh 免密认证
    14      ssh: []
    15      # 提供运行作业所需要的网络信息,hosts文件,headless service等
    16      svc: []
    17    # 如果有pod被 杀死,重启整个作业
    18    policies:
    19      - event: PodEvicted
    20        action: RestartJob
    21    tasks:
    22      - replicas: 1
    23        name: mpimaster
    24        # 当 mpiexec 结束,认识整个mpi作业结束
    25        policies:
    26          - event: TaskCompleted
    27            action: CompleteJob
    28        template:
    29          spec:
    30            # Volcano 的信息会统一放到 /etc/volcano 目录下
    31            containers:
    32              - command:
    33                  - /bin/sh
    34                  - -c
    35                  - |
    36                    MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
    37                    mkdir -p /var/run/sshd; /usr/sbin/sshd;
    38                    mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world;
    39                image: volcanosh/example-mpi:0.0.1
    40                name: mpimaster
    41                ports:
    42                  - containerPort: 22
    43                    name: mpijob-port
    44                workingDir: /home
    45                resources:
    46                  requests:
    47                    cpu: "500m"
    48                    memory: "1024Mi"
    49                  limits:
    50                    cpu: "500m"
    51                    memory: "1024Mi"
    52            restartPolicy: OnFailure
    53            imagePullSecrets:
    54              - name: default-secret
    55      - replicas: 2
    56        name: mpiworker
    57        template:
    58          spec:
    59            containers:
    60              - command:
    61                  - /bin/sh
    62                  - -c
    63                  - |
    64                    mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
    65                image: volcanosh/example-mpi:0.0.1
    66                name: mpiworker
    67                ports:
    68                  - containerPort: 22
    69                    name: mpijob-port
    70                workingDir: /home
    71                resources:
    72                  requests:
    73                    cpu: "1024m"
    74                    memory: "2048Mi"
    75                  limits:
    76                    cpu: "1024m"
    77                    memory: "2048Mi"
    78            restartPolicy: OnFailure
    79            imagePullSecrets:
    80              - name: default-secret
    81