github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/python/aistore/sdk/object.py (about)

     1  #
     2  # Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
     3  #
     4  from io import BufferedWriter
     5  from typing import NewType
     6  
     7  import requests
     8  
     9  from aistore.sdk.const import (
    10      DEFAULT_CHUNK_SIZE,
    11      HTTP_METHOD_DELETE,
    12      HTTP_METHOD_GET,
    13      HTTP_METHOD_HEAD,
    14      HTTP_METHOD_PUT,
    15      QPARAM_ARCHPATH,
    16      QPARAM_ETL_NAME,
    17      QPARAM_LATEST,
    18      ACT_PROMOTE,
    19      HTTP_METHOD_POST,
    20      URL_PATH_OBJECTS,
    21      HEADER_RANGE,
    22      ACT_BLOB_DOWNLOAD,
    23      HEADER_OBJECT_BLOB_DOWNLOAD,
    24      HEADER_OBJECT_BLOB_WORKERS,
    25      HEADER_OBJECT_BLOB_CHUNK_SIZE,
    26  )
    27  from aistore.sdk.object_reader import ObjectReader
    28  from aistore.sdk.types import ActionMsg, PromoteAPIArgs, BlobMsg
    29  from aistore.sdk.utils import read_file_bytes, validate_file
    30  
    31  Header = NewType("Header", requests.structures.CaseInsensitiveDict)
    32  
    33  
    34  # pylint: disable=consider-using-with,unused-variable
    35  class Object:
    36      """
    37      A class representing an object of a bucket bound to a client.
    38  
    39      Args:
    40          bucket (Bucket): Bucket to which this object belongs
    41          name (str): name of object
    42  
    43      """
    44  
    45      def __init__(self, bucket: "Bucket", name: str):
    46          self._bucket = bucket
    47          self._client = bucket.client
    48          self._bck_name = bucket.name
    49          self._qparams = bucket.qparam
    50          self._name = name
    51          self._object_path = f"{URL_PATH_OBJECTS}/{ self._bck_name}/{ self.name }"
    52  
    53      @property
    54      def bucket(self):
    55          """Bucket containing this object"""
    56          return self._bucket
    57  
    58      @property
    59      def name(self):
    60          """Name of this object"""
    61          return self._name
    62  
    63      def head(self) -> Header:
    64          """
    65          Requests object properties.
    66  
    67          Returns:
    68              Response header with the object properties.
    69  
    70          Raises:
    71              requests.RequestException: "There was an ambiguous exception that occurred while handling..."
    72              requests.ConnectionError: Connection error
    73              requests.ConnectionTimeout: Timed out connecting to AIStore
    74              requests.ReadTimeout: Timed out waiting response from AIStore
    75              requests.exceptions.HTTPError(404): The object does not exist
    76          """
    77          return self._client.request(
    78              HTTP_METHOD_HEAD,
    79              path=self._object_path,
    80              params=self._qparams,
    81          ).headers
    82  
    83      # pylint: disable=too-many-arguments
    84      def get(
    85          self,
    86          archpath: str = "",
    87          chunk_size: int = DEFAULT_CHUNK_SIZE,
    88          etl_name: str = None,
    89          writer: BufferedWriter = None,
    90          latest: bool = False,
    91          byte_range: str = None,
    92          blob_chunk_size: str = None,
    93          blob_num_workers: str = None,
    94      ) -> ObjectReader:
    95          """
    96          Reads an object
    97  
    98          Args:
    99              archpath (str, optional): If the object is an archive, use `archpath` to extract a single file
   100                  from the archive
   101              chunk_size (int, optional): chunk_size to use while reading from stream
   102              etl_name (str, optional): Transforms an object based on ETL with etl_name
   103              writer (BufferedWriter, optional): User-provided writer for writing content output
   104                  User is responsible for closing the writer
   105              latest (bool, optional): GET the latest object version from the associated remote bucket
   106              byte_range (str, optional): Specify a specific data segment of the object for transfer, including
   107                  both the start and end of the range (e.g. "bytes=0-499" to request the first 500 bytes)
   108              blob_chunk_size (str, optional):  Utilize built-in blob-downloader with the given chunk size in
   109                  IEC or SI units, or "raw" bytes (e.g.: 4mb, 1MiB, 1048576, 128k;)
   110              blob_num_workers (str, optional): Utilize built-in blob-downloader with the given number of
   111                  concurrent blob-downloading workers (readers)
   112  
   113          Returns:
   114              The stream of bytes to read an object or a file inside an archive.
   115  
   116          Raises:
   117              requests.RequestException: "There was an ambiguous exception that occurred while handling..."
   118              requests.ConnectionError: Connection error
   119              requests.ConnectionTimeout: Timed out connecting to AIStore
   120              requests.ReadTimeout: Timed out waiting response from AIStore
   121          """
   122          params = self._qparams.copy()
   123          params[QPARAM_ARCHPATH] = archpath
   124          if etl_name:
   125              params[QPARAM_ETL_NAME] = etl_name
   126          if latest:
   127              params[QPARAM_LATEST] = "true"
   128  
   129          if byte_range and (blob_chunk_size or blob_num_workers):
   130              raise ValueError("Cannot use Byte Range with Blob Download")
   131          headers = {}
   132          if blob_chunk_size or blob_num_workers:
   133              headers[HEADER_OBJECT_BLOB_DOWNLOAD] = "true"
   134          if blob_chunk_size:
   135              headers[HEADER_OBJECT_BLOB_CHUNK_SIZE] = blob_chunk_size
   136          if blob_num_workers:
   137              headers[HEADER_OBJECT_BLOB_WORKERS] = blob_num_workers
   138          if byte_range:
   139              # For range formatting, see the spec:
   140              # https://www.rfc-editor.org/rfc/rfc7233#section-2.1
   141              headers = {HEADER_RANGE: byte_range}
   142  
   143          resp = self._client.request(
   144              HTTP_METHOD_GET,
   145              path=self._object_path,
   146              params=params,
   147              stream=True,
   148              headers=headers,
   149          )
   150          obj_reader = ObjectReader(
   151              stream=resp,
   152              response_headers=resp.headers,
   153              chunk_size=chunk_size,
   154          )
   155          if writer:
   156              writer.writelines(obj_reader)
   157          return obj_reader
   158  
   159      def get_semantic_url(self):
   160          """
   161          Get the semantic URL to the object
   162  
   163          Returns:
   164              Semantic URL to get object
   165          """
   166  
   167          return f"{self.bucket.provider}://{self._bck_name}/{self._name}"
   168  
   169      def get_url(self, archpath: str = "", etl_name: str = None):
   170          """
   171          Get the full url to the object including base url and any query parameters
   172  
   173          Args:
   174              archpath (str, optional): If the object is an archive, use `archpath` to extract a single file
   175                  from the archive
   176              etl_name (str, optional): Transforms an object based on ETL with etl_name
   177  
   178          Returns:
   179              Full URL to get object
   180  
   181          """
   182          params = self._qparams.copy()
   183          if archpath:
   184              params[QPARAM_ARCHPATH] = archpath
   185          if etl_name:
   186              params[QPARAM_ETL_NAME] = etl_name
   187          return self._client.get_full_url(self._object_path, params)
   188  
   189      def put_content(self, content: bytes) -> Header:
   190          """
   191          Puts bytes as an object to a bucket in AIS storage.
   192  
   193          Args:
   194              content (bytes): Bytes to put as an object.
   195  
   196          Raises:
   197              requests.RequestException: "There was an ambiguous exception that occurred while handling..."
   198              requests.ConnectionError: Connection error
   199              requests.ConnectionTimeout: Timed out connecting to AIStore
   200              requests.ReadTimeout: Timed out waiting response from AIStore
   201          """
   202          self._put_data(self.name, content)
   203  
   204      def put_file(self, path: str = None):
   205          """
   206          Puts a local file as an object to a bucket in AIS storage.
   207  
   208          Args:
   209              path (str): Path to local file
   210  
   211          Raises:
   212              requests.RequestException: "There was an ambiguous exception that occurred while handling..."
   213              requests.ConnectionError: Connection error
   214              requests.ConnectionTimeout: Timed out connecting to AIStore
   215              requests.ReadTimeout: Timed out waiting response from AIStore
   216              ValueError: The path provided is not a valid file
   217          """
   218          validate_file(path)
   219          self._put_data(self.name, read_file_bytes(path))
   220  
   221      def _put_data(self, obj_name: str, data: bytes):
   222          url = f"{URL_PATH_OBJECTS}/{ self._bck_name }/{ obj_name }"
   223          self._client.request(
   224              HTTP_METHOD_PUT,
   225              path=url,
   226              params=self._qparams,
   227              data=data,
   228          )
   229  
   230      # pylint: disable=too-many-arguments
   231      def promote(
   232          self,
   233          path: str,
   234          target_id: str = "",
   235          recursive: bool = False,
   236          overwrite_dest: bool = False,
   237          delete_source: bool = False,
   238          src_not_file_share: bool = False,
   239      ) -> Header:
   240          """
   241          Promotes a file or folder an AIS target can access to a bucket in AIS storage.
   242          These files can be either on the physical disk of an AIS target itself or on a network file system
   243          the cluster can access.
   244          See more info here: https://aiatscale.org/blog/2022/03/17/promote
   245  
   246          Args:
   247              path (str): Path to file or folder the AIS cluster can reach
   248              target_id (str, optional): Promote files from a specific target node
   249              recursive (bool, optional): Recursively promote objects from files in directories inside the path
   250              overwrite_dest (bool, optional): Overwrite objects already on AIS
   251              delete_source (bool, optional): Delete the source files when done promoting
   252              src_not_file_share (bool, optional): Optimize if the source is guaranteed to not be on a file share
   253  
   254          Returns:
   255              Object properties
   256  
   257          Raises:
   258              requests.RequestException: "There was an ambiguous exception that occurred while handling..."
   259              requests.ConnectionError: Connection error
   260              requests.ConnectionTimeout: Timed out connecting to AIStore
   261              requests.ReadTimeout: Timed out waiting response from AIStore
   262              AISError: Path does not exist on the AIS cluster storage
   263          """
   264          url = f"{URL_PATH_OBJECTS}/{ self._bck_name }"
   265          value = PromoteAPIArgs(
   266              source_path=path,
   267              object_name=self.name,
   268              target_id=target_id,
   269              recursive=recursive,
   270              overwrite_dest=overwrite_dest,
   271              delete_source=delete_source,
   272              src_not_file_share=src_not_file_share,
   273          ).as_dict()
   274          json_val = ActionMsg(action=ACT_PROMOTE, name=path, value=value).dict()
   275  
   276          return self._client.request(
   277              HTTP_METHOD_POST, path=url, params=self._qparams, json=json_val
   278          ).headers
   279  
   280      def delete(self):
   281          """
   282          Delete an object from a bucket.
   283  
   284          Returns:
   285              None
   286  
   287          Raises:
   288              requests.RequestException: "There was an ambiguous exception that occurred while handling..."
   289              requests.ConnectionError: Connection error
   290              requests.ConnectionTimeout: Timed out connecting to AIStore
   291              requests.ReadTimeout: Timed out waiting response from AIStore
   292              requests.exceptions.HTTPError(404): The object does not exist
   293          """
   294          self._client.request(
   295              HTTP_METHOD_DELETE,
   296              path=self._object_path,
   297              params=self._qparams,
   298          )
   299  
   300      def blob_download(
   301          self,
   302          chunk_size: int = None,
   303          num_workers: int = None,
   304          latest: bool = False,
   305      ) -> str:
   306          """
   307          A special facility to download very large remote objects a.k.a. BLOBs
   308          Returns job ID that for the blob download operation.
   309  
   310          Args:
   311              chunk_size (int): chunk size in bytes
   312              num_workers (int): number of concurrent blob-downloading workers (readers)
   313              latest (bool): GET the latest object version from the associated remote bucket
   314  
   315          Returns:
   316              Job ID (as str) that can be used to check the status of the operation
   317  
   318          Raises:
   319              aistore.sdk.errors.AISError: All other types of errors with AIStore
   320              requests.ConnectionError: Connection error
   321              requests.ConnectionTimeout: Timed out connecting to AIStore
   322              requests.exceptions.HTTPError: Service unavailable
   323              requests.RequestException: "There was an ambiguous exception that occurred while handling..."
   324          """
   325          params = self._qparams.copy()
   326          value = BlobMsg(
   327              chunk_size=chunk_size,
   328              num_workers=num_workers,
   329              latest=latest,
   330          ).as_dict()
   331          json_val = ActionMsg(
   332              action=ACT_BLOB_DOWNLOAD, value=value, name=self.name
   333          ).dict()
   334          url = f"{URL_PATH_OBJECTS}/{ self._bck_name }"
   335          return self._client.request(
   336              HTTP_METHOD_POST, path=url, params=params, json=json_val
   337          ).text