github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/python/aistore/sdk/dsort.py (about) 1 import json 2 import logging 3 import time 4 from typing import Dict 5 6 from aistore.sdk.const import ( 7 HTTP_METHOD_POST, 8 URL_PATH_DSORT, 9 HTTP_METHOD_GET, 10 DEFAULT_DSORT_WAIT_TIMEOUT, 11 HTTP_METHOD_DELETE, 12 DSORT_ABORT, 13 DSORT_UUID, 14 ) 15 from aistore.sdk.dsort_types import JobInfo 16 from aistore.sdk.errors import Timeout 17 from aistore.sdk.utils import validate_file, probing_frequency 18 19 20 class Dsort: 21 """ 22 Class for managing jobs for the dSort extension: https://github.com/NVIDIA/aistore/blob/main/docs/cli/dsort.md 23 """ 24 25 def __init__(self, client: "Client", dsort_id: str = ""): 26 self._client = client 27 self._dsort_id = dsort_id 28 29 @property 30 def dsort_id(self) -> str: 31 """ 32 Return dSort job id 33 """ 34 return self._dsort_id 35 36 def start(self, spec_file: str) -> str: 37 """ 38 Start a dSort job with a provided spec file location 39 Returns: 40 dSort job ID 41 """ 42 validate_file(spec_file) 43 with open(spec_file, "r", encoding="utf-8") as file_data: 44 spec = json.load(file_data) 45 self._dsort_id = self._client.request( 46 HTTP_METHOD_POST, path=URL_PATH_DSORT, json=spec 47 ).text 48 return self._dsort_id 49 50 def abort(self): 51 """ 52 Abort a dSort job 53 """ 54 qparam = {DSORT_UUID: [self._dsort_id]} 55 self._client.request( 56 HTTP_METHOD_DELETE, path=f"{URL_PATH_DSORT}/{DSORT_ABORT}", params=qparam 57 ) 58 59 def get_job_info(self) -> Dict[str, JobInfo]: 60 """ 61 Get info for a dsort job 62 Returns: 63 Dictionary of job info for all jobs associated with this dsort 64 """ 65 qparam = {DSORT_UUID: [self._dsort_id]} 66 return self._client.request_deserialize( 67 HTTP_METHOD_GET, 68 path=URL_PATH_DSORT, 69 res_model=Dict[str, JobInfo], 70 params=qparam, 71 ) 72 73 def wait( 74 self, 75 timeout: int = DEFAULT_DSORT_WAIT_TIMEOUT, 76 verbose: bool = True, 77 ): 78 """ 79 Wait for a dSort job to finish 80 81 Args: 82 timeout (int, optional): The maximum time to wait for the job, in seconds. Default timeout is 5 minutes. 83 verbose (bool, optional): Whether to log wait status to standard output 84 85 Raises: 86 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 87 requests.ConnectionError: Connection error 88 requests.ConnectionTimeout: Timed out connecting to AIStore 89 requests.ReadTimeout: Timed out waiting response from AIStore 90 errors.Timeout: Timeout while waiting for the job to finish 91 """ 92 logger = logging.getLogger(f"{__name__}.wait") 93 logger.disabled = not verbose 94 passed = 0 95 sleep_time = probing_frequency(timeout) 96 while True: 97 if passed > timeout: 98 raise Timeout("dsort job to finish") 99 finished = True 100 for job_info in self.get_job_info().values(): 101 if job_info.metrics.aborted: 102 logger.info("DSort job '%s' aborted", self._dsort_id) 103 return 104 # Shard creation is the last phase, so check if it's finished 105 finished = job_info.metrics.shard_creation.finished and finished 106 if finished: 107 logger.info("DSort job '%s' finished", self._dsort_id) 108 return 109 logger.info("Waiting on dsort job '%s'...", self._dsort_id) 110 time.sleep(sleep_time) 111 passed += sleep_time