github.com/kubeflow/training-operator@v1.7.0/examples/mpi/tensorflow-mnist.yaml (about) 1 apiVersion: kubeflow.org/v1 2 kind: MPIJob 3 metadata: 4 name: tensorflow-mnist 5 spec: 6 slotsPerWorker: 1 7 runPolicy: 8 cleanPodPolicy: Running 9 mpiReplicaSpecs: 10 Launcher: 11 replicas: 1 12 template: 13 spec: 14 containers: 15 - image: horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.5.0-py3.7-cpu 16 name: mpi 17 command: 18 - mpirun 19 args: 20 - -np 21 - "2" 22 - --allow-run-as-root 23 - -bind-to 24 - none 25 - -map-by 26 - slot 27 - -x 28 - LD_LIBRARY_PATH 29 - -x 30 - PATH 31 - -mca 32 - pml 33 - ob1 34 - -mca 35 - btl 36 - ^openib 37 - python 38 - /examples/tensorflow2_mnist.py 39 resources: 40 limits: 41 cpu: 1 42 memory: 2Gi 43 Worker: 44 replicas: 2 45 template: 46 spec: 47 containers: 48 - image: horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.5.0-py3.7-cpu 49 name: mpi 50 resources: 51 limits: 52 cpu: 2 53 memory: 4Gi