github.com/kubeflow/training-operator@v1.7.0/examples/tensorflow/distribution_strategy/keras-API/multi_worker_tfjob.yaml (about) 1 apiVersion: kubeflow.org/v1 2 kind: TFJob 3 metadata: 4 name: multi-worker 5 spec: 6 runPolicy: 7 cleanPodPolicy: None 8 tfReplicaSpecs: 9 Worker: 10 replicas: 2 11 restartPolicy: Never 12 template: 13 spec: 14 containers: 15 - name: tensorflow 16 image: kubeflow/tf-multi-worker-strategy:latest 17 volumeMounts: 18 - mountPath: /train 19 name: training 20 resources: 21 limits: 22 nvidia.com/gpu: 1 23 volumes: 24 - name: training 25 persistentVolumeClaim: 26 claimName: strategy-volume