github.com/kubeflow/training-operator@v1.7.0/examples/mxnet/tune/start-job.py (about) 1 # Befor running this script, make sure tvm is install in your cluster 2 3 import os 4 import time 5 import json 6 7 if __name__ == '__main__': 8 mx_config = json.loads(os.environ.get('MX_CONFIG') or '{}') 9 cluster_config = mx_config.get('cluster', {}) 10 labels_config = mx_config.get('labels', {}) 11 task_config = mx_config.get('task', {}) 12 task_type = task_config.get('type') 13 task_index = task_config.get('index') 14 15 if task_type == "": 16 print("No task_type, Error") 17 elif task_type == "tunertracker": 18 addr = cluster_config["tunertracker"][0] 19 command = "python3 -m tvm.exec.rpc_tracker --port={0}".format(addr.get('port')) 20 print("DO: " + command) 21 os.system(command) 22 elif task_type == "tunerserver": 23 time.sleep(5) 24 addr = cluster_config["tunertracker"][0] 25 label = labels_config["tunerserver"] 26 command = "python3 -m tvm.exec.rpc_server --tracker={0}:{1} --key={2}".format(addr.get('url'), addr.get('port'), label) 27 print("DO: " + command) 28 os.system(command) 29 elif task_type == "tuner": 30 time.sleep(5) 31 addr = cluster_config["tunertracker"][0] 32 label = labels_config["tunerserver"] 33 command = "python3 /home/scripts/auto-tuning.py --tracker {0} --tracker_port {1} --server_key {2}".format(addr.get('url'), addr.get('port'), label) 34 print("DO: " + command) 35 os.system(command) 36 else: 37 print("Unknow task type! Error")