github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/python/aistore/sdk/object.py (about) 1 # 2 # Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. 3 # 4 from io import BufferedWriter 5 from typing import NewType 6 7 import requests 8 9 from aistore.sdk.const import ( 10 DEFAULT_CHUNK_SIZE, 11 HTTP_METHOD_DELETE, 12 HTTP_METHOD_GET, 13 HTTP_METHOD_HEAD, 14 HTTP_METHOD_PUT, 15 QPARAM_ARCHPATH, 16 QPARAM_ETL_NAME, 17 QPARAM_LATEST, 18 ACT_PROMOTE, 19 HTTP_METHOD_POST, 20 URL_PATH_OBJECTS, 21 HEADER_RANGE, 22 ACT_BLOB_DOWNLOAD, 23 HEADER_OBJECT_BLOB_DOWNLOAD, 24 HEADER_OBJECT_BLOB_WORKERS, 25 HEADER_OBJECT_BLOB_CHUNK_SIZE, 26 ) 27 from aistore.sdk.object_reader import ObjectReader 28 from aistore.sdk.types import ActionMsg, PromoteAPIArgs, BlobMsg 29 from aistore.sdk.utils import read_file_bytes, validate_file 30 31 Header = NewType("Header", requests.structures.CaseInsensitiveDict) 32 33 34 # pylint: disable=consider-using-with,unused-variable 35 class Object: 36 """ 37 A class representing an object of a bucket bound to a client. 38 39 Args: 40 bucket (Bucket): Bucket to which this object belongs 41 name (str): name of object 42 43 """ 44 45 def __init__(self, bucket: "Bucket", name: str): 46 self._bucket = bucket 47 self._client = bucket.client 48 self._bck_name = bucket.name 49 self._qparams = bucket.qparam 50 self._name = name 51 self._object_path = f"{URL_PATH_OBJECTS}/{ self._bck_name}/{ self.name }" 52 53 @property 54 def bucket(self): 55 """Bucket containing this object""" 56 return self._bucket 57 58 @property 59 def name(self): 60 """Name of this object""" 61 return self._name 62 63 def head(self) -> Header: 64 """ 65 Requests object properties. 66 67 Returns: 68 Response header with the object properties. 69 70 Raises: 71 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 72 requests.ConnectionError: Connection error 73 requests.ConnectionTimeout: Timed out connecting to AIStore 74 requests.ReadTimeout: Timed out waiting response from AIStore 75 requests.exceptions.HTTPError(404): The object does not exist 76 """ 77 return self._client.request( 78 HTTP_METHOD_HEAD, 79 path=self._object_path, 80 params=self._qparams, 81 ).headers 82 83 # pylint: disable=too-many-arguments 84 def get( 85 self, 86 archpath: str = "", 87 chunk_size: int = DEFAULT_CHUNK_SIZE, 88 etl_name: str = None, 89 writer: BufferedWriter = None, 90 latest: bool = False, 91 byte_range: str = None, 92 blob_chunk_size: str = None, 93 blob_num_workers: str = None, 94 ) -> ObjectReader: 95 """ 96 Reads an object 97 98 Args: 99 archpath (str, optional): If the object is an archive, use `archpath` to extract a single file 100 from the archive 101 chunk_size (int, optional): chunk_size to use while reading from stream 102 etl_name (str, optional): Transforms an object based on ETL with etl_name 103 writer (BufferedWriter, optional): User-provided writer for writing content output 104 User is responsible for closing the writer 105 latest (bool, optional): GET the latest object version from the associated remote bucket 106 byte_range (str, optional): Specify a specific data segment of the object for transfer, including 107 both the start and end of the range (e.g. "bytes=0-499" to request the first 500 bytes) 108 blob_chunk_size (str, optional): Utilize built-in blob-downloader with the given chunk size in 109 IEC or SI units, or "raw" bytes (e.g.: 4mb, 1MiB, 1048576, 128k;) 110 blob_num_workers (str, optional): Utilize built-in blob-downloader with the given number of 111 concurrent blob-downloading workers (readers) 112 113 Returns: 114 The stream of bytes to read an object or a file inside an archive. 115 116 Raises: 117 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 118 requests.ConnectionError: Connection error 119 requests.ConnectionTimeout: Timed out connecting to AIStore 120 requests.ReadTimeout: Timed out waiting response from AIStore 121 """ 122 params = self._qparams.copy() 123 params[QPARAM_ARCHPATH] = archpath 124 if etl_name: 125 params[QPARAM_ETL_NAME] = etl_name 126 if latest: 127 params[QPARAM_LATEST] = "true" 128 129 if byte_range and (blob_chunk_size or blob_num_workers): 130 raise ValueError("Cannot use Byte Range with Blob Download") 131 headers = {} 132 if blob_chunk_size or blob_num_workers: 133 headers[HEADER_OBJECT_BLOB_DOWNLOAD] = "true" 134 if blob_chunk_size: 135 headers[HEADER_OBJECT_BLOB_CHUNK_SIZE] = blob_chunk_size 136 if blob_num_workers: 137 headers[HEADER_OBJECT_BLOB_WORKERS] = blob_num_workers 138 if byte_range: 139 # For range formatting, see the spec: 140 # https://www.rfc-editor.org/rfc/rfc7233#section-2.1 141 headers = {HEADER_RANGE: byte_range} 142 143 resp = self._client.request( 144 HTTP_METHOD_GET, 145 path=self._object_path, 146 params=params, 147 stream=True, 148 headers=headers, 149 ) 150 obj_reader = ObjectReader( 151 stream=resp, 152 response_headers=resp.headers, 153 chunk_size=chunk_size, 154 ) 155 if writer: 156 writer.writelines(obj_reader) 157 return obj_reader 158 159 def get_semantic_url(self): 160 """ 161 Get the semantic URL to the object 162 163 Returns: 164 Semantic URL to get object 165 """ 166 167 return f"{self.bucket.provider}://{self._bck_name}/{self._name}" 168 169 def get_url(self, archpath: str = "", etl_name: str = None): 170 """ 171 Get the full url to the object including base url and any query parameters 172 173 Args: 174 archpath (str, optional): If the object is an archive, use `archpath` to extract a single file 175 from the archive 176 etl_name (str, optional): Transforms an object based on ETL with etl_name 177 178 Returns: 179 Full URL to get object 180 181 """ 182 params = self._qparams.copy() 183 if archpath: 184 params[QPARAM_ARCHPATH] = archpath 185 if etl_name: 186 params[QPARAM_ETL_NAME] = etl_name 187 return self._client.get_full_url(self._object_path, params) 188 189 def put_content(self, content: bytes) -> Header: 190 """ 191 Puts bytes as an object to a bucket in AIS storage. 192 193 Args: 194 content (bytes): Bytes to put as an object. 195 196 Raises: 197 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 198 requests.ConnectionError: Connection error 199 requests.ConnectionTimeout: Timed out connecting to AIStore 200 requests.ReadTimeout: Timed out waiting response from AIStore 201 """ 202 self._put_data(self.name, content) 203 204 def put_file(self, path: str = None): 205 """ 206 Puts a local file as an object to a bucket in AIS storage. 207 208 Args: 209 path (str): Path to local file 210 211 Raises: 212 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 213 requests.ConnectionError: Connection error 214 requests.ConnectionTimeout: Timed out connecting to AIStore 215 requests.ReadTimeout: Timed out waiting response from AIStore 216 ValueError: The path provided is not a valid file 217 """ 218 validate_file(path) 219 self._put_data(self.name, read_file_bytes(path)) 220 221 def _put_data(self, obj_name: str, data: bytes): 222 url = f"{URL_PATH_OBJECTS}/{ self._bck_name }/{ obj_name }" 223 self._client.request( 224 HTTP_METHOD_PUT, 225 path=url, 226 params=self._qparams, 227 data=data, 228 ) 229 230 # pylint: disable=too-many-arguments 231 def promote( 232 self, 233 path: str, 234 target_id: str = "", 235 recursive: bool = False, 236 overwrite_dest: bool = False, 237 delete_source: bool = False, 238 src_not_file_share: bool = False, 239 ) -> Header: 240 """ 241 Promotes a file or folder an AIS target can access to a bucket in AIS storage. 242 These files can be either on the physical disk of an AIS target itself or on a network file system 243 the cluster can access. 244 See more info here: https://aiatscale.org/blog/2022/03/17/promote 245 246 Args: 247 path (str): Path to file or folder the AIS cluster can reach 248 target_id (str, optional): Promote files from a specific target node 249 recursive (bool, optional): Recursively promote objects from files in directories inside the path 250 overwrite_dest (bool, optional): Overwrite objects already on AIS 251 delete_source (bool, optional): Delete the source files when done promoting 252 src_not_file_share (bool, optional): Optimize if the source is guaranteed to not be on a file share 253 254 Returns: 255 Object properties 256 257 Raises: 258 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 259 requests.ConnectionError: Connection error 260 requests.ConnectionTimeout: Timed out connecting to AIStore 261 requests.ReadTimeout: Timed out waiting response from AIStore 262 AISError: Path does not exist on the AIS cluster storage 263 """ 264 url = f"{URL_PATH_OBJECTS}/{ self._bck_name }" 265 value = PromoteAPIArgs( 266 source_path=path, 267 object_name=self.name, 268 target_id=target_id, 269 recursive=recursive, 270 overwrite_dest=overwrite_dest, 271 delete_source=delete_source, 272 src_not_file_share=src_not_file_share, 273 ).as_dict() 274 json_val = ActionMsg(action=ACT_PROMOTE, name=path, value=value).dict() 275 276 return self._client.request( 277 HTTP_METHOD_POST, path=url, params=self._qparams, json=json_val 278 ).headers 279 280 def delete(self): 281 """ 282 Delete an object from a bucket. 283 284 Returns: 285 None 286 287 Raises: 288 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 289 requests.ConnectionError: Connection error 290 requests.ConnectionTimeout: Timed out connecting to AIStore 291 requests.ReadTimeout: Timed out waiting response from AIStore 292 requests.exceptions.HTTPError(404): The object does not exist 293 """ 294 self._client.request( 295 HTTP_METHOD_DELETE, 296 path=self._object_path, 297 params=self._qparams, 298 ) 299 300 def blob_download( 301 self, 302 chunk_size: int = None, 303 num_workers: int = None, 304 latest: bool = False, 305 ) -> str: 306 """ 307 A special facility to download very large remote objects a.k.a. BLOBs 308 Returns job ID that for the blob download operation. 309 310 Args: 311 chunk_size (int): chunk size in bytes 312 num_workers (int): number of concurrent blob-downloading workers (readers) 313 latest (bool): GET the latest object version from the associated remote bucket 314 315 Returns: 316 Job ID (as str) that can be used to check the status of the operation 317 318 Raises: 319 aistore.sdk.errors.AISError: All other types of errors with AIStore 320 requests.ConnectionError: Connection error 321 requests.ConnectionTimeout: Timed out connecting to AIStore 322 requests.exceptions.HTTPError: Service unavailable 323 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 324 """ 325 params = self._qparams.copy() 326 value = BlobMsg( 327 chunk_size=chunk_size, 328 num_workers=num_workers, 329 latest=latest, 330 ).as_dict() 331 json_val = ActionMsg( 332 action=ACT_BLOB_DOWNLOAD, value=value, name=self.name 333 ).dict() 334 url = f"{URL_PATH_OBJECTS}/{ self._bck_name }" 335 return self._client.request( 336 HTTP_METHOD_POST, path=url, params=params, json=json_val 337 ).text