github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/python/aistore/sdk/multiobj/object_group.py (about) 1 # 2 # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 # 4 import logging 5 from typing import List, Iterable 6 7 from aistore.sdk.ais_source import AISSource 8 from aistore.sdk.const import ( 9 HTTP_METHOD_DELETE, 10 HTTP_METHOD_POST, 11 HTTP_METHOD_PUT, 12 ACT_DELETE_OBJECTS, 13 ACT_PREFETCH_OBJECTS, 14 ACT_EVICT_OBJECTS, 15 ACT_COPY_OBJECTS, 16 ACT_TRANSFORM_OBJECTS, 17 ACT_ARCHIVE_OBJECTS, 18 ) 19 from aistore.sdk.etl_const import DEFAULT_ETL_TIMEOUT 20 from aistore.sdk.object import Object 21 from aistore.sdk.multiobj.object_names import ObjectNames 22 from aistore.sdk.multiobj.object_range import ObjectRange 23 from aistore.sdk.multiobj.object_template import ObjectTemplate 24 from aistore.sdk.types import ( 25 TCMultiObj, 26 CopyBckMsg, 27 TransformBckMsg, 28 TCBckMsg, 29 ArchiveMultiObj, 30 PrefetchMsg, 31 ) 32 33 34 # pylint: disable=unused-variable 35 class ObjectGroup(AISSource): 36 """ 37 A class representing multiple objects within the same bucket. Only one of obj_names, obj_range, or obj_template 38 should be provided. 39 40 Args: 41 bck (Bucket): Bucket the objects belong to 42 obj_names (list[str], optional): List of object names to include in this collection 43 obj_range (ObjectRange, optional): Range defining which object names in the bucket should be included 44 obj_template (str, optional): String argument to pass as template value directly to api 45 """ 46 47 def __init__( 48 self, 49 bck: "Bucket", 50 obj_names: list = None, 51 obj_range: ObjectRange = None, 52 obj_template: str = None, 53 ): 54 self.bck = bck 55 num_args = sum( 56 1 if x is not None else 0 for x in [obj_names, obj_range, obj_template] 57 ) 58 if num_args != 1: 59 raise ValueError( 60 "ObjectGroup accepts one and only one of: obj_names, obj_range, or obj_template" 61 ) 62 if obj_range and not isinstance(obj_range, ObjectRange): 63 raise TypeError("obj_range must be of type ObjectRange") 64 65 if obj_range: 66 self._obj_collection = obj_range 67 elif obj_names: 68 self._obj_collection = ObjectNames(obj_names) 69 else: 70 self._obj_collection = ObjectTemplate(obj_template) 71 72 def list_urls(self, prefix: str = "", etl_name: str = None) -> Iterable[str]: 73 """ 74 Implementation of the abstract method from AISSource that provides an iterator 75 of full URLs to every object in this bucket matching the specified prefix 76 Args: 77 prefix (str, optional): Limit objects selected by a given string prefix 78 etl_name (str, optional): ETL to include in URLs 79 80 Returns: 81 Iterator of all object URLs in the group 82 """ 83 for obj_name in self._obj_collection: 84 yield self.bck.object(obj_name).get_url(etl_name=etl_name) 85 86 def list_all_objects_iter(self, prefix: str = "") -> Iterable[Object]: 87 """ 88 Implementation of the abstract method from AISSource that provides an iterator 89 of all the objects in this bucket matching the specified prefix 90 91 Args: 92 prefix (str, optional): Limit objects selected by a given string prefix 93 94 Returns: 95 Iterator of all the objects in the group 96 """ 97 for obj_name in self._obj_collection: 98 yield self.bck.object(obj_name) 99 100 def delete(self): 101 """ 102 Deletes a list or range of objects in a bucket 103 104 Raises: 105 aistore.sdk.errors.AISError: All other types of errors with AIStore 106 requests.ConnectionError: Connection error 107 requests.ConnectionTimeout: Timed out connecting to AIStore 108 requests.exceptions.HTTPError: Service unavailable 109 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 110 requests.ReadTimeout: Timed out receiving response from AIStore 111 112 Returns: 113 Job ID (as str) that can be used to check the status of the operation 114 115 """ 116 117 return self.bck.make_request( 118 HTTP_METHOD_DELETE, 119 ACT_DELETE_OBJECTS, 120 value=self._obj_collection.get_value(), 121 ).text 122 123 def evict(self): 124 """ 125 Evicts a list or range of objects in a bucket so that they are no longer cached in AIS 126 NOTE: only Cloud buckets can be evicted. 127 128 Raises: 129 aistore.sdk.errors.AISError: All other types of errors with AIStore 130 requests.ConnectionError: Connection error 131 requests.ConnectionTimeout: Timed out connecting to AIStore 132 requests.exceptions.HTTPError: Service unavailable 133 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 134 requests.ReadTimeout: Timed out receiving response from AIStore 135 136 Returns: 137 Job ID (as str) that can be used to check the status of the operation 138 139 """ 140 self.bck.verify_cloud_bucket() 141 return self.bck.make_request( 142 HTTP_METHOD_DELETE, 143 ACT_EVICT_OBJECTS, 144 value=self._obj_collection.get_value(), 145 ).text 146 147 def prefetch( 148 self, 149 blob_threshold: int = None, 150 latest: bool = False, 151 continue_on_error: bool = False, 152 ): 153 """ 154 Prefetches a list or range of objects in a bucket so that they are cached in AIS 155 NOTE: only Cloud buckets can be prefetched. 156 157 Args: 158 latest (bool, optional): GET the latest object version from the associated remote bucket 159 continue_on_error (bool, optional): Whether to continue if there is an error prefetching a single object 160 blob_threshold (int, optional): Utilize built-in blob-downloader for remote objects 161 greater than the specified (threshold) size in bytes 162 163 Raises: 164 aistore.sdk.errors.AISError: All other types of errors with AIStore 165 requests.ConnectionError: Connection error 166 requests.ConnectionTimeout: Timed out connecting to AIStore 167 requests.exceptions.HTTPError: Service unavailable 168 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 169 requests.ReadTimeout: Timed out receiving response from AIStore 170 171 Returns: 172 Job ID (as str) that can be used to check the status of the operation 173 174 """ 175 self.bck.verify_cloud_bucket() 176 177 value = PrefetchMsg( 178 object_selection=self._obj_collection.get_value(), 179 continue_on_err=continue_on_error, 180 latest=latest, 181 blob_threshold=blob_threshold, 182 ).as_dict() 183 184 return self.bck.make_request( 185 HTTP_METHOD_POST, 186 ACT_PREFETCH_OBJECTS, 187 value=value, 188 ).text 189 190 # pylint: disable=too-many-arguments 191 def copy( 192 self, 193 to_bck: "Bucket", 194 prepend: str = "", 195 continue_on_error: bool = False, 196 dry_run: bool = False, 197 force: bool = False, 198 latest: bool = False, 199 sync: bool = False, 200 ): 201 """ 202 Copies a list or range of objects in a bucket 203 204 Args: 205 to_bck (Bucket): Destination bucket 206 prepend (str, optional): Value to prepend to the name of copied objects 207 continue_on_error (bool, optional): Whether to continue if there is an error copying a single object 208 dry_run (bool, optional): Skip performing the copy and just log the intended actions 209 force (bool, optional): Force this job to run over others in case it conflicts 210 (see "limited coexistence" and xact/xreg/xreg.go) 211 latest (bool, optional): GET the latest object version from the associated remote bucket 212 sync (bool, optional): synchronize destination bucket with its remote (e.g., Cloud or remote AIS) source 213 214 Raises: 215 aistore.sdk.errors.AISError: All other types of errors with AIStore 216 requests.ConnectionError: Connection error 217 requests.ConnectionTimeout: Timed out connecting to AIStore 218 requests.exceptions.HTTPError: Service unavailable 219 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 220 requests.ReadTimeout: Timed out receiving response from AIStore 221 222 Returns: 223 Job ID (as str) that can be used to check the status of the operation 224 225 """ 226 if dry_run: 227 logger = logging.getLogger(f"{__name__}.copy") 228 logger.info( 229 "Copy dry-run. Running with dry_run=False will copy the following objects from bucket '%s' to '%s': %s", 230 f"{self.bck.get_path()}", 231 f"{to_bck.get_path()}", 232 list(self._obj_collection), 233 ) 234 copy_msg = CopyBckMsg( 235 prepend=prepend, dry_run=dry_run, force=force, latest=latest, sync=sync 236 ) 237 238 value = TCMultiObj( 239 to_bck=to_bck.as_model(), 240 tc_msg=TCBckMsg(copy_msg=copy_msg), 241 object_selection=self._obj_collection.get_value(), 242 continue_on_err=continue_on_error, 243 ).as_dict() 244 245 return self.bck.make_request( 246 HTTP_METHOD_POST, 247 ACT_COPY_OBJECTS, 248 value=value, 249 ).text 250 251 # pylint: disable=too-many-arguments 252 def transform( 253 self, 254 to_bck: "Bucket", 255 etl_name: str, 256 timeout: str = DEFAULT_ETL_TIMEOUT, 257 prepend: str = "", 258 continue_on_error: bool = False, 259 dry_run: bool = False, 260 force: bool = False, 261 latest: bool = False, 262 sync: bool = False, 263 ): 264 """ 265 Performs ETL operation on a list or range of objects in a bucket, placing the results in the destination bucket 266 267 Args: 268 to_bck (Bucket): Destination bucket 269 etl_name (str): Name of existing ETL to apply 270 timeout (str): Timeout of the ETL job (e.g. 5m for 5 minutes) 271 prepend (str, optional): Value to prepend to the name of resulting transformed objects 272 continue_on_error (bool, optional): Whether to continue if there is an error transforming a single object 273 dry_run (bool, optional): Skip performing the transform and just log the intended actions 274 force (bool, optional): Force this job to run over others in case it conflicts 275 (see "limited coexistence" and xact/xreg/xreg.go) 276 latest (bool, optional): GET the latest object version from the associated remote bucket 277 sync (bool, optional): synchronize destination bucket with its remote (e.g., Cloud or remote AIS) source 278 279 Raises: 280 aistore.sdk.errors.AISError: All other types of errors with AIStore 281 requests.ConnectionError: Connection error 282 requests.ConnectionTimeout: Timed out connecting to AIStore 283 requests.exceptions.HTTPError: Service unavailable 284 requests.RequestException: "There was an ambiguous exception that occurred while handling..." 285 requests.ReadTimeout: Timed out receiving response from AIStore 286 287 Returns: 288 Job ID (as str) that can be used to check the status of the operation 289 290 """ 291 if dry_run: 292 logger = logging.getLogger(f"{__name__}.transform") 293 logger.info( 294 "Transform dry-run. Running with dry_run=False will apply ETL '%s' to objects %s", 295 etl_name, 296 list(self._obj_collection), 297 ) 298 299 copy_msg = CopyBckMsg( 300 prepend=prepend, dry_run=dry_run, force=force, latest=latest, sync=sync 301 ) 302 transform_msg = TransformBckMsg(etl_name=etl_name, timeout=timeout) 303 value = TCMultiObj( 304 to_bck=to_bck.as_model(), 305 tc_msg=TCBckMsg(transform_msg=transform_msg, copy_msg=copy_msg), 306 object_selection=self._obj_collection.get_value(), 307 continue_on_err=continue_on_error, 308 ).as_dict() 309 return self.bck.make_request( 310 HTTP_METHOD_POST, ACT_TRANSFORM_OBJECTS, value=value 311 ).text 312 313 def archive( 314 self, 315 archive_name: str, 316 mime: str = "", 317 to_bck: "Bucket" = None, 318 include_source_name: bool = False, 319 allow_append: bool = False, 320 continue_on_err: bool = False, 321 ): 322 """ 323 Create or append to an archive 324 325 Args: 326 archive_name (str): Name of archive to create or append 327 mime (str, optional): MIME type of the content 328 to_bck (Bucket, optional): Destination bucket, defaults to current bucket 329 include_source_name (bool, optional): Include the source bucket name in the archived objects' names 330 allow_append (bool, optional): Allow appending to an existing archive 331 continue_on_err (bool, optional): Whether to continue if there is an error archiving a single object 332 333 Returns: 334 Job ID (as str) that can be used to check the status of the operation 335 336 """ 337 val = ArchiveMultiObj( 338 object_selection=self._obj_collection.get_value(), 339 archive_name=archive_name, 340 mime=mime, 341 to_bck=to_bck.as_model() if to_bck else self.bck.as_model(), 342 include_source_name=include_source_name, 343 allow_append=allow_append, 344 continue_on_err=continue_on_err, 345 ).as_dict() 346 return self.bck.make_request( 347 HTTP_METHOD_PUT, ACT_ARCHIVE_OBJECTS, value=val 348 ).text 349 350 def list_names(self) -> List[str]: 351 """ 352 List all the object names included in this group of objects 353 354 Returns: 355 List of object names 356 357 """ 358 return list(self._obj_collection)