github.com/kubeflow/training-operator@v1.7.0/examples/mxnet/tune/mx_job_tune_gpu_v1.yaml (about) 1 apiVersion: "kubeflow.org/v1" 2 kind: "MXJob" 3 metadata: 4 name: "auto-tuning-job" 5 spec: 6 jobMode: MXTune 7 mxReplicaSpecs: 8 TunerTracker: 9 replicas: 1 10 restartPolicy: Never 11 template: 12 spec: 13 containers: 14 - name: mxnet 15 image: mxjob/auto-tuning:gpu 16 command: ["python3"] 17 args: ["/home/scripts/start-job.py"] 18 TunerServer: 19 label: 2080ti 20 replicas: 1 21 restartPolicy: Never 22 template: 23 spec: 24 containers: 25 - name: mxnet 26 image: mxjob/auto-tuning:gpu 27 command: ["python3"] 28 args: ["/home/scripts/start-job.py"] 29 resources: 30 limits: 31 nvidia.com/gpu: 1 32 Tuner: 33 replicas: 1 34 restartPolicy: Never 35 template: 36 spec: 37 containers: 38 - name: mxnet 39 image: mxjob/auto-tuning:gpu 40 command: ["python3"] 41 args: ["/home/scripts/start-job.py"]