volcano.sh/volcano@v1.9.0/example/integrations/mxnet/train/train-mnist-cpu.yaml (about)

     1  apiVersion: batch.volcano.sh/v1alpha1
     2  kind: Job
     3  metadata:
     4    name: mxnet-job
     5  spec:
     6    minAvailable: 5
     7    schedulerName: volcano
     8    policies:
     9    - event: PodEvicted
    10      action: RestartJob
    11    - event: PodFailed
    12      action: RestartJob
    13    plugins:
    14      svc: []
    15    tasks:
    16    - replicas: 2
    17      name: worker
    18      template:
    19        spec:
    20          imagePullSecrets:
    21          - name: default-secret
    22          containers:
    23          - image: volcanosh/mxnet-train-mnist-cpu:v1
    24            args:
    25            - --kv-store=dist_sync
    26            imagePullPolicy: IfNotPresent
    27            name: mxnet
    28            env:
    29            - name: DMLC_PS_ROOT_PORT
    30              value: "9000"
    31            - name: DMLC_PS_ROOT_URI
    32              value: mxnet-job-scheduler-0.mxnet-job
    33            - name: DMLC_NUM_SERVER
    34              value: "2"
    35            - name: DMLC_NUM_WORKER
    36              value: "2"
    37            - name: DMLC_ROLE
    38              value: "worker"
    39            - name: DMLC_USE_KUBERNETES
    40              value: "1"
    41          restartPolicy: OnFailure
    42    - replicas: 2
    43      name: server
    44      template:
    45        spec:
    46          imagePullSecrets:
    47          - name: default-secret
    48          containers:
    49          - image: volcanosh/mxnet-train-mnist-cpu:v1
    50            imagePullPolicy: IfNotPresent
    51            name: mxnet
    52            env:
    53            - name: DMLC_PS_ROOT_PORT
    54              value: "9000"
    55            - name: DMLC_PS_ROOT_URI
    56              value: mxnet-job-scheduler-0.mxnet-job
    57            - name: DMLC_NUM_SERVER
    58              value: "2"
    59            - name: DMLC_NUM_WORKER
    60              value: "2"
    61            - name: DMLC_ROLE
    62              value: "server"
    63            - name: DMLC_USE_KUBERNETES
    64              value: "1"
    65          restartPolicy: OnFailure
    66    - replicas: 1
    67      name: scheduler
    68      template:
    69        spec:
    70          imagePullSecrets:
    71          - name: default-secret
    72          containers:
    73          - image: volcanosh/mxnet-train-mnist-cpu:v1
    74            imagePullPolicy: IfNotPresent
    75            name: mxnet
    76            env:
    77            - name: DMLC_PS_ROOT_PORT
    78              value: "9000"
    79            - name: DMLC_PS_ROOT_URI
    80              value: mxnet-job-scheduler-0.mxnet-job
    81            - name: DMLC_NUM_SERVER
    82              value: "2"
    83            - name: DMLC_NUM_WORKER
    84              value: "2"
    85            - name: DMLC_ROLE
    86              value: "scheduler"
    87            - name: DMLC_USE_KUBERNETES
    88              value: "1"
    89          restartPolicy: OnFailure