github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/python/aistore/pytorch/utils.py (about) 1 """ 2 Utils for AIS PyTorch Plugin 3 4 Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. 5 """ 6 7 from typing import List, Tuple, Iterable 8 from urllib.parse import urlparse, urlunparse, parse_qs 9 from aistore.sdk import Client 10 from aistore.sdk.ais_source import AISSource 11 from aistore.sdk.object import Object 12 13 14 def parse_url(url: str) -> Tuple[str, str, str]: 15 """ 16 Parse AIS urls for bucket and object names 17 Args: 18 url (str): Complete URL of the object (eg. "ais://bucket1/file.txt") 19 Returns: 20 provider (str): AIS Backend 21 bck_name (str): Bucket name identifier 22 obj_name (str): Object name with extension 23 """ 24 parsed_url = urlparse(url) 25 path = parsed_url.path 26 if len(path) > 0 and path.startswith("/"): 27 path = path[1:] 28 29 # returns provider, bck_name, path 30 return parsed_url.scheme, parsed_url.netloc, path 31 32 33 # pylint: disable=unused-variable 34 def list_objects( 35 client: Client, urls_list: List[str], ais_source_list: List[AISSource] 36 ) -> List[Object]: 37 """ 38 Create list of all the objects in the given urls and AISSources 39 40 Args: 41 client (Client): AIStore client object of the calling method 42 urls_list (List[str]): list of urls 43 ais_source_list (AISSource, List[AISSource]): list of AISSource objects to load data 44 45 Returns: 46 List[Object]: list of all the objects in the given urls and AISSources 47 """ 48 samples = [] 49 for item in urls_list: 50 provider, bck_name, path = parse_url(item) 51 objects_iter = client.bucket( 52 bck_name=bck_name, provider=provider 53 ).list_all_objects_iter(prefix=path) 54 for obj in objects_iter: 55 samples.append(obj) 56 57 for item in ais_source_list: 58 for obj in item.list_all_objects_iter(): 59 samples.append(obj) 60 61 return samples 62 63 64 def unparse_url(provider: str, bck_name: str, obj_name: str) -> str: 65 """ 66 To generate URL based on provider, bck_name and object name 67 Args: 68 provider(str): Provider name ('ais', 'gcp', etc) 69 bck_name(str): Bucket name 70 obj_name(str): Object name with extension. 71 Returns: 72 unparsed_url(str): Unparsed url (complete url) 73 """ 74 return urlunparse([provider, bck_name, obj_name, "", "", ""]) 75 76 77 def list_objects_iterator( 78 client: Client, 79 urls_list: List[str], 80 ais_source_list: List[AISSource], 81 ) -> Iterable[Object]: 82 """ 83 Create an iterable over all the objects in the given urls and AISSources 84 85 Args: 86 client (Client): AIStore client object of the calling method 87 urls_list (List[str]): list of urls 88 ais_source_list (AISSource, List[AISSource]): list of AISSource objects to load data 89 90 Returns: 91 Iterable[Object]: iterable over all the objects in the given urls and AISSources 92 """ 93 for item in urls_list: 94 provider, bck_name, path = parse_url(item) 95 objects_iter = client.bucket( 96 bck_name=bck_name, provider=provider 97 ).list_all_objects_iter(prefix=path) 98 for obj in objects_iter: 99 yield obj 100 101 for item in ais_source_list: 102 for obj in item.list_all_objects_iter(): 103 yield obj