volcano.sh/volcano@v1.9.0/example/integrations/tensorflow/dist-mnist/tf-dist-mnist-example.yaml (about)

     1  apiVersion: batch.volcano.sh/v1alpha1
     2  kind: Job
     3  metadata:
     4    name: tensorflow-dist-mnist
     5  spec:
     6    minAvailable: 3
     7    schedulerName: volcano
     8    plugins:
     9      env: []
    10      svc: []
    11    policies:
    12      - event: PodEvicted
    13        action: RestartJob
    14    queue: default
    15    tasks:
    16      - replicas: 1
    17        name: ps
    18        template:
    19          spec:
    20            containers:
    21              - command:
    22                  - sh
    23                  - -c
    24                  - |
    25                    PS_HOST=`cat /etc/volcano/ps.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`;
    26                    WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`;
    27                    export TF_CONFIG={\"cluster\":{\"ps\":[${PS_HOST}],\"worker\":[${WORKER_HOST}]},\"task\":{\"type\":\"ps\",\"index\":${VK_TASK_INDEX}},\"environment\":\"cloud\"};
    28                    python /var/tf_dist_mnist/dist_mnist.py
    29                image: volcanosh/dist-mnist-tf-example:0.0.1
    30                name: tensorflow
    31                ports:
    32                  - containerPort: 2222
    33                    name: tfjob-port
    34                resources: {}
    35            restartPolicy: Never
    36      - replicas: 2
    37        name: worker
    38        policies:
    39          - event: TaskCompleted
    40            action: CompleteJob
    41        template:
    42          spec:
    43            containers:
    44              - command:
    45                  - sh
    46                  - -c
    47                  - |
    48                    PS_HOST=`cat /etc/volcano/ps.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`;
    49                    WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&:2222/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`;
    50                    export TF_CONFIG={\"cluster\":{\"ps\":[${PS_HOST}],\"worker\":[${WORKER_HOST}]},\"task\":{\"type\":\"worker\",\"index\":${VK_TASK_INDEX}},\"environment\":\"cloud\"};
    51                    python /var/tf_dist_mnist/dist_mnist.py
    52                image: volcanosh/dist-mnist-tf-example:0.0.1
    53                name: tensorflow
    54                ports:
    55                  - containerPort: 2222
    56                    name: tfjob-port
    57                resources: {}
    58            restartPolicy: Never