github.com/kubeflow/training-operator@v1.7.0/examples/mxnet/tune/start-job.py (about)

     1  # Befor running this script, make sure tvm is install in your cluster
     2  
     3  import os
     4  import time
     5  import json
     6  
     7  if __name__ == '__main__':
     8      mx_config = json.loads(os.environ.get('MX_CONFIG') or '{}')
     9      cluster_config = mx_config.get('cluster', {})
    10      labels_config = mx_config.get('labels', {})
    11      task_config = mx_config.get('task', {})
    12      task_type = task_config.get('type')
    13      task_index = task_config.get('index')
    14  
    15      if task_type == "":
    16          print("No task_type, Error")
    17      elif task_type == "tunertracker":
    18          addr = cluster_config["tunertracker"][0]
    19          command = "python3 -m tvm.exec.rpc_tracker --port={0}".format(addr.get('port'))
    20          print("DO: " + command)
    21          os.system(command)
    22      elif task_type == "tunerserver":
    23          time.sleep(5)
    24          addr = cluster_config["tunertracker"][0]
    25          label = labels_config["tunerserver"]
    26          command = "python3 -m tvm.exec.rpc_server --tracker={0}:{1} --key={2}".format(addr.get('url'), addr.get('port'), label)
    27          print("DO: " + command)
    28          os.system(command)
    29      elif task_type == "tuner":
    30          time.sleep(5)
    31          addr = cluster_config["tunertracker"][0]
    32          label = labels_config["tunerserver"]
    33          command = "python3 /home/scripts/auto-tuning.py --tracker {0} --tracker_port {1} --server_key {2}".format(addr.get('url'), addr.get('port'), label)
    34          print("DO: " + command)
    35          os.system(command)
    36      else:
    37          print("Unknow task type! Error")