github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/azure/blobstorageio.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Azure Blob Storage client.
    19  """
    20  
    21  # pytype: skip-file
    22  
    23  import errno
    24  import io
    25  import logging
    26  import os
    27  import re
    28  import tempfile
    29  import time
    30  
    31  from apache_beam.internal.azure import auth
    32  from apache_beam.io.filesystemio import Downloader
    33  from apache_beam.io.filesystemio import DownloaderStream
    34  from apache_beam.io.filesystemio import Uploader
    35  from apache_beam.io.filesystemio import UploaderStream
    36  from apache_beam.options.pipeline_options import AzureOptions
    37  from apache_beam.utils import retry
    38  from apache_beam.utils.annotations import deprecated
    39  
    40  _LOGGER = logging.getLogger(__name__)
    41  
    42  try:
    43    # pylint: disable=wrong-import-order, wrong-import-position
    44    # pylint: disable=ungrouped-imports
    45    from azure.core.exceptions import ResourceNotFoundError
    46    from azure.storage.blob import (
    47        BlobServiceClient,
    48        ContentSettings,
    49    )
    50    AZURE_DEPS_INSTALLED = True
    51  except ImportError:
    52    AZURE_DEPS_INSTALLED = False
    53  
    54  DEFAULT_READ_BUFFER_SIZE = 16 * 1024 * 1024
    55  
    56  MAX_BATCH_OPERATION_SIZE = 100
    57  
    58  
    59  def parse_azfs_path(azfs_path, blob_optional=False, get_account=False):
    60    """Return the storage account, the container and
    61    blob names of the given azfs:// path.
    62    """
    63    match = re.match(
    64        '^azfs://([a-z0-9]{3,24})/([a-z0-9](?![a-z0-9-]*--[a-z0-9-]*)'
    65        '[a-z0-9-]{1,61}[a-z0-9])/(.*)$',
    66        azfs_path)
    67    if match is None or (match.group(3) == '' and not blob_optional):
    68      raise ValueError(
    69          'Azure Blob Storage path must be in the form '
    70          'azfs://<storage-account>/<container>/<path>.')
    71    result = None
    72    if get_account:
    73      result = match.group(1), match.group(2), match.group(3)
    74    else:
    75      result = match.group(2), match.group(3)
    76    return result
    77  
    78  
    79  def get_azfs_url(storage_account, container, blob=''):
    80    """Returns the url in the form of
    81     https://account.blob.core.windows.net/container/blob-name
    82    """
    83    return 'https://' + storage_account + '.blob.core.windows.net/' + \
    84            container + '/' + blob
    85  
    86  
    87  class Blob():
    88    """A Blob in Azure Blob Storage."""
    89    def __init__(self, etag, name, last_updated, size, mime_type):
    90      self.etag = etag
    91      self.name = name
    92      self.last_updated = last_updated
    93      self.size = size
    94      self.mime_type = mime_type
    95  
    96  
    97  class BlobStorageIOError(IOError, retry.PermanentException):
    98    """Blob Strorage IO error that should not be retried."""
    99    pass
   100  
   101  
   102  class BlobStorageError(Exception):
   103    """Blob Storage client error."""
   104    def __init__(self, message=None, code=None):
   105      self.message = message
   106      self.code = code
   107  
   108  
   109  class BlobStorageIO(object):
   110    """Azure Blob Storage I/O client."""
   111    def __init__(self, client=None, pipeline_options=None):
   112      if client is None:
   113        azure_options = pipeline_options.view_as(AzureOptions)
   114        connect_str = azure_options.azure_connection_string or \
   115                      os.getenv('AZURE_STORAGE_CONNECTION_STRING')
   116        if connect_str:
   117          self.client = BlobServiceClient.from_connection_string(
   118              conn_str=connect_str)
   119        else:
   120          credential = auth.get_service_credentials(pipeline_options)
   121          self.client = BlobServiceClient(
   122              account_url=azure_options.blob_service_endpoint,
   123              credential=credential)
   124      else:
   125        self.client = client
   126      if not AZURE_DEPS_INSTALLED:
   127        raise RuntimeError('Azure dependencies are not installed. Unable to run.')
   128  
   129    def open(
   130        self,
   131        filename,
   132        mode='r',
   133        read_buffer_size=DEFAULT_READ_BUFFER_SIZE,
   134        mime_type='application/octet-stream'):
   135      """Open an Azure Blob Storage file path for reading or writing.
   136  
   137      Args:
   138        filename (str): Azure Blob Storage file path in the form
   139                        ``azfs://<storage-account>/<container>/<path>``.
   140        mode (str): ``'r'`` for reading or ``'w'`` for writing.
   141        read_buffer_size (int): Buffer size to use during read operations.
   142        mime_type (str): Mime type to set for write operations.
   143  
   144      Returns:
   145        Azure Blob Storage file object.
   146      Raises:
   147        ValueError: Invalid open file mode.
   148      """
   149      if mode == 'r' or mode == 'rb':
   150        downloader = BlobStorageDownloader(
   151            self.client, filename, buffer_size=read_buffer_size)
   152        return io.BufferedReader(
   153            DownloaderStream(
   154                downloader, read_buffer_size=read_buffer_size, mode=mode),
   155            buffer_size=read_buffer_size)
   156      elif mode == 'w' or mode == 'wb':
   157        uploader = BlobStorageUploader(self.client, filename, mime_type)
   158        return io.BufferedWriter(
   159            UploaderStream(uploader, mode=mode), buffer_size=128 * 1024)
   160      else:
   161        raise ValueError('Invalid file open mode: %s.' % mode)
   162  
   163    @retry.with_exponential_backoff(
   164        retry_filter=retry.retry_on_beam_io_error_filter)
   165    def copy(self, src, dest):
   166      """Copies a single Azure Blob Storage blob from src to dest.
   167  
   168      Args:
   169        src: Blob Storage file path pattern in the form
   170             azfs://<storage-account>/<container>/[name].
   171        dest: Blob Storage file path pattern in the form
   172              azfs://<storage-account>/<container>/[name].
   173  
   174      Raises:
   175        TimeoutError: on timeout.
   176      """
   177      src_storage_account, src_container, src_blob = parse_azfs_path(
   178          src, get_account=True)
   179      dest_container, dest_blob = parse_azfs_path(dest)
   180  
   181      source_blob = get_azfs_url(src_storage_account, src_container, src_blob)
   182      copied_blob = self.client.get_blob_client(dest_container, dest_blob)
   183  
   184      try:
   185        copied_blob.start_copy_from_url(source_blob)
   186      except ResourceNotFoundError as e:
   187        message = e.reason
   188        code = e.status_code
   189        raise BlobStorageError(message, code)
   190  
   191    # We intentionally do not decorate this method with a retry, since the
   192    # underlying copy operation is already an idempotent operation protected
   193    # by retry decorators.
   194    def copy_tree(self, src, dest):
   195      """Renames the given Azure Blob storage directory and its contents
   196      recursively from src to dest.
   197  
   198      Args:
   199        src: Blob Storage file path pattern in the form
   200             azfs://<storage-account>/<container>/[name].
   201        dest: Blob Storage file path pattern in the form
   202              azfs://<storage-account>/<container>/[name].
   203  
   204      Returns:
   205        List of tuples of (src, dest, exception) where exception is None if the
   206        operation succeeded or the relevant exception if the operation failed.
   207      """
   208      assert src.endswith('/')
   209      assert dest.endswith('/')
   210  
   211      results = []
   212      for entry in self.list_prefix(src):
   213        rel_path = entry[len(src):]
   214        try:
   215          self.copy(entry, dest + rel_path)
   216          results.append((entry, dest + rel_path, None))
   217        except BlobStorageError as e:
   218          results.append((entry, dest + rel_path, e))
   219  
   220      return results
   221  
   222    # We intentionally do not decorate this method with a retry, since the
   223    # underlying copy operation is already an idempotent operation protected
   224    # by retry decorators.
   225    def copy_paths(self, src_dest_pairs):
   226      """Copies the given Azure Blob Storage blobs from src to dest. This can
   227      handle directory or file paths.
   228  
   229      Args:
   230        src_dest_pairs: List of (src, dest) tuples of
   231                        azfs://<storage-account>/<container>/[name] file paths
   232                        to copy from src to dest.
   233  
   234      Returns:
   235        List of tuples of (src, dest, exception) in the same order as the
   236        src_dest_pairs argument, where exception is None if the operation
   237        succeeded or the relevant exception if the operation failed.
   238      """
   239      if not src_dest_pairs:
   240        return []
   241  
   242      results = []
   243  
   244      for src_path, dest_path in src_dest_pairs:
   245        # Case 1. They are directories.
   246        if src_path.endswith('/') and dest_path.endswith('/'):
   247          try:
   248            results += self.copy_tree(src_path, dest_path)
   249          except BlobStorageError as e:
   250            results.append((src_path, dest_path, e))
   251  
   252        # Case 2. They are individual blobs.
   253        elif not src_path.endswith('/') and not dest_path.endswith('/'):
   254          try:
   255            self.copy(src_path, dest_path)
   256            results.append((src_path, dest_path, None))
   257          except BlobStorageError as e:
   258            results.append((src_path, dest_path, e))
   259  
   260        # Mismatched paths (one directory, one non-directory) get an error.
   261        else:
   262          e = BlobStorageError(
   263              "Unable to copy mismatched paths" +
   264              "(directory, non-directory): %s, %s" % (src_path, dest_path),
   265              400)
   266          results.append((src_path, dest_path, e))
   267  
   268      return results
   269  
   270    # We intentionally do not decorate this method with a retry, since the
   271    # underlying copy and delete operations are already idempotent operations
   272    # protected by retry decorators.
   273    def rename(self, src, dest):
   274      """Renames the given Azure Blob Storage blob from src to dest.
   275  
   276      Args:
   277        src: Blob Storage file path pattern in the form
   278             azfs://<storage-account>/<container>/[name].
   279        dest: Blob Storage file path pattern in the form
   280              azfs://<storage-account>/<container>/[name].
   281      """
   282      self.copy(src, dest)
   283      self.delete(src)
   284  
   285    # We intentionally do not decorate this method with a retry, since the
   286    # underlying copy and delete operations are already idempotent operations
   287    # protected by retry decorators.
   288    def rename_files(self, src_dest_pairs):
   289      """Renames the given Azure Blob Storage blobs from src to dest.
   290  
   291      Args:
   292        src_dest_pairs: List of (src, dest) tuples of
   293                        azfs://<storage-account>/<container>/[name]
   294                        file paths to rename from src to dest.
   295      Returns: List of tuples of (src, dest, exception) in the same order as the
   296               src_dest_pairs argument, where exception is None if the operation
   297               succeeded or the relevant exception if the operation failed.
   298      """
   299      if not src_dest_pairs:
   300        return []
   301  
   302      for src, dest in src_dest_pairs:
   303        if src.endswith('/') or dest.endswith('/'):
   304          raise ValueError('Unable to rename a directory.')
   305  
   306      # Results from copy operation.
   307      copy_results = self.copy_paths(src_dest_pairs)
   308      paths_to_delete = \
   309          [src for (src, _, error) in copy_results if error is None]
   310      # Results from delete operation.
   311      delete_results = self.delete_files(paths_to_delete)
   312  
   313      # Get rename file results (list of tuples).
   314      results = []
   315  
   316      # Using a dictionary will make the operation faster.
   317      delete_results_dict = {src: error for (src, error) in delete_results}
   318  
   319      for src, dest, error in copy_results:
   320        # If there was an error in the copy operation.
   321        if error is not None:
   322          results.append((src, dest, error))
   323        # If there was an error in the delete operation.
   324        elif delete_results_dict[src] is not None:
   325          results.append((src, dest, delete_results_dict[src]))
   326        # If there was no error in the operations.
   327        else:
   328          results.append((src, dest, None))
   329  
   330      return results
   331  
   332    def exists(self, path):
   333      """Returns whether the given Azure Blob Storage blob exists.
   334  
   335      Args:
   336        path: Azure Blob Storage file path pattern in the form
   337              azfs://<storage-account>/<container>/[name].
   338      """
   339      try:
   340        self._blob_properties(path)
   341        return True
   342      except ResourceNotFoundError as e:
   343        if e.status_code == 404:
   344          # HTTP 404 indicates that the file did not exist.
   345          return False
   346        else:
   347          # We re-raise all other exceptions.
   348          raise
   349  
   350    def size(self, path):
   351      """Returns the size of a single Blob Storage blob.
   352  
   353      This method does not perform glob expansion. Hence the
   354      given path must be for a single Blob Storage blob.
   355  
   356      Returns: size of the Blob Storage blob in bytes.
   357      """
   358      return self._blob_properties(path).size
   359  
   360    def last_updated(self, path):
   361      """Returns the last updated epoch time of a single
   362      Azure Blob Storage blob.
   363  
   364      This method does not perform glob expansion. Hence the
   365      given path must be for a single Azure Blob Storage blob.
   366  
   367      Returns: last updated time of the Azure Blob Storage blob
   368      in seconds.
   369      """
   370      return self._updated_to_seconds(self._blob_properties(path).last_modified)
   371  
   372    def checksum(self, path):
   373      """Looks up the checksum of an Azure Blob Storage blob.
   374  
   375      Args:
   376        path: Azure Blob Storage file path pattern in the form
   377              azfs://<storage-account>/<container>/[name].
   378      """
   379      return self._blob_properties(path).etag
   380  
   381    def _status(self, path):
   382      """For internal use only; no backwards-compatibility guarantees.
   383  
   384      Returns supported fields (checksum, last_updated, size) of a single object
   385      as a dict at once.
   386  
   387      This method does not perform glob expansion. Hence the given path must be
   388      for a single blob property.
   389  
   390      Returns: dict of fields of the blob property.
   391      """
   392      properties = self._blob_properties(path)
   393      file_status = {}
   394      if hasattr(properties, 'etag'):
   395        file_status['checksum'] = properties.etag
   396      if hasattr(properties, 'last_modified'):
   397        file_status['last_updated'] = self._updated_to_seconds(
   398            properties.last_modified)
   399      if hasattr(properties, 'size'):
   400        file_status['size'] = properties.size
   401      return file_status
   402  
   403    @retry.with_exponential_backoff(
   404        retry_filter=retry.retry_on_beam_io_error_filter)
   405    def _blob_properties(self, path):
   406      """Returns a blob properties object for the given path
   407  
   408      This method does not perform glob expansion. Hence the given path must be
   409      for a single blob properties object.
   410  
   411      Returns: blob properties.
   412      """
   413      container, blob = parse_azfs_path(path)
   414      blob_to_check = self.client.get_blob_client(container, blob)
   415      try:
   416        properties = blob_to_check.get_blob_properties()
   417      except ResourceNotFoundError as e:
   418        message = e.reason
   419        code = e.status_code
   420        raise BlobStorageError(message, code)
   421  
   422      return properties
   423  
   424    @staticmethod
   425    def _updated_to_seconds(updated):
   426      """Helper function transform the updated field of response to seconds"""
   427      return (
   428          time.mktime(updated.timetuple()) - time.timezone +
   429          updated.microsecond / 1000000.0)
   430  
   431    @retry.with_exponential_backoff(
   432        retry_filter=retry.retry_on_beam_io_error_filter)
   433    def delete(self, path):
   434      """Deletes a single blob at the given Azure Blob Storage path.
   435  
   436      Args:
   437        path: Azure Blob Storage file path pattern in the form
   438              azfs://<storage-account>/<container>/[name].
   439      """
   440      container, blob = parse_azfs_path(path)
   441      blob_to_delete = self.client.get_blob_client(container, blob)
   442      try:
   443        blob_to_delete.delete_blob()
   444      except ResourceNotFoundError as e:
   445        if e.status_code == 404:
   446          # Return success when the file doesn't exist anymore for idempotency.
   447          return
   448        else:
   449          logging.error('HTTP error while deleting file %s', path)
   450          raise e
   451  
   452    # We intentionally do not decorate this method with a retry, since the
   453    # underlying copy and delete operations are already idempotent operations
   454    # protected by retry decorators.
   455    def delete_paths(self, paths):
   456      """Deletes the given Azure Blob Storage blobs from src to dest.
   457      This can handle directory or file paths.
   458  
   459      Args:
   460        paths: list of Azure Blob Storage paths in the form
   461               azfs://<storage-account>/<container>/[name] that give the
   462               file blobs to be deleted.
   463  
   464      Returns:
   465        List of tuples of (src, dest, exception) in the same order as the
   466        src_dest_pairs argument, where exception is None if the operation
   467        succeeded or the relevant exception if the operation failed.
   468      """
   469      directories, blobs = [], []
   470  
   471      # Retrieve directories and not directories.
   472      for path in paths:
   473        if path.endswith('/'):
   474          directories.append(path)
   475        else:
   476          blobs.append(path)
   477  
   478      results = {}
   479  
   480      for directory in directories:
   481        directory_result = dict(self.delete_tree(directory))
   482        results.update(directory_result)
   483  
   484      blobs_results = dict(self.delete_files(blobs))
   485      results.update(blobs_results)
   486  
   487      return results
   488  
   489    # We intentionally do not decorate this method with a retry, since the
   490    # underlying copy and delete operations are already idempotent operations
   491    # protected by retry decorators.
   492    def delete_tree(self, root):
   493      """Deletes all blobs under the given Azure BlobStorage virtual
   494      directory.
   495  
   496      Args:
   497        path: Azure Blob Storage file path pattern in the form
   498              azfs://<storage-account>/<container>/[name]
   499              (ending with a "/").
   500  
   501      Returns:
   502        List of tuples of (path, exception), where each path is a blob
   503        under the given root. exception is None if the operation succeeded
   504        or the relevant exception if the operation failed.
   505      """
   506      assert root.endswith('/')
   507  
   508      # Get the blob under the root directory.
   509      paths_to_delete = self.list_prefix(root)
   510  
   511      return self.delete_files(paths_to_delete)
   512  
   513    # We intentionally do not decorate this method with a retry, since the
   514    # underlying copy and delete operations are already idempotent operations
   515    # protected by retry decorators.
   516    def delete_files(self, paths):
   517      """Deletes the given Azure Blob Storage blobs from src to dest.
   518  
   519      Args:
   520        paths: list of Azure Blob Storage paths in the form
   521               azfs://<storage-account>/<container>/[name] that give the
   522               file blobs to be deleted.
   523  
   524      Returns:
   525        List of tuples of (src, dest, exception) in the same order as the
   526        src_dest_pairs argument, where exception is None if the operation
   527        succeeded or the relevant exception if the operation failed.
   528      """
   529      if not paths:
   530        return []
   531  
   532      # Group blobs into containers.
   533      containers, blobs = zip(*[parse_azfs_path(path, get_account=False) \
   534          for path in paths])
   535  
   536      grouped_blobs = {container: [] for container in containers}
   537  
   538      # Fill dictionary.
   539      for container, blob in zip(containers, blobs):
   540        grouped_blobs[container].append(blob)
   541  
   542      results = {}
   543  
   544      # Delete minibatches of blobs for each container.
   545      for container, blobs in grouped_blobs.items():
   546        for i in range(0, len(blobs), MAX_BATCH_OPERATION_SIZE):
   547          blobs_to_delete = blobs[i:i + MAX_BATCH_OPERATION_SIZE]
   548          results.update(self._delete_batch(container, blobs_to_delete))
   549  
   550      final_results = \
   551          [(path, results[parse_azfs_path(path, get_account=False)]) \
   552          for path in paths]
   553  
   554      return final_results
   555  
   556    @retry.with_exponential_backoff(
   557        retry_filter=retry.retry_on_beam_io_error_filter)
   558    def _delete_batch(self, container, blobs):
   559      """A helper method. Azure Blob Storage Python Client allows batch
   560      deletions for blobs within the same container.
   561  
   562      Args:
   563        container: container name.
   564        blobs: list of blobs to be deleted.
   565  
   566      Returns:
   567        Dictionary of the form {(container, blob): error}, where error is
   568        None if the operation succeeded.
   569      """
   570      container_client = self.client.get_container_client(container)
   571      results = {}
   572  
   573      for blob in blobs:
   574        try:
   575          response = container_client.delete_blob(blob)
   576          results[(container, blob)] = response
   577        except ResourceNotFoundError as e:
   578          results[(container, blob)] = e.status_code
   579  
   580      return results
   581  
   582    @deprecated(since='2.45.0', current='list_files')
   583    def list_prefix(self, path, with_metadata=False):
   584      """Lists files matching the prefix.
   585  
   586      Args:
   587        path: Azure Blob Storage file path pattern in the form
   588              azfs://<storage-account>/<container>/[name].
   589        with_metadata: Experimental. Specify whether returns file metadata.
   590  
   591      Returns:
   592        If ``with_metadata`` is False: dict of file name -> size; if
   593          ``with_metadata`` is True: dict of file name -> tuple(size, timestamp).
   594      """
   595      file_info = {}
   596      for file_metadata in self.list_files(path, with_metadata):
   597        file_info[file_metadata[0]] = file_metadata[1]
   598  
   599      return file_info
   600  
   601    def list_files(self, path, with_metadata=False):
   602      """Lists files matching the prefix.
   603  
   604      Args:
   605        path: Azure Blob Storage file path pattern in the form
   606              azfs://<storage-account>/<container>/[name].
   607        with_metadata: Experimental. Specify whether returns file metadata.
   608  
   609      Returns:
   610        If ``with_metadata`` is False: generator of tuple(file name, size); if
   611        ``with_metadata`` is True: generator of
   612        tuple(file name, tuple(size, timestamp)).
   613      """
   614      storage_account, container, blob = parse_azfs_path(
   615          path, blob_optional=True, get_account=True)
   616      file_info = set()
   617      counter = 0
   618      start_time = time.time()
   619  
   620      if with_metadata:
   621        logging.debug("Starting the file information of the input")
   622      else:
   623        logging.debug("Starting the size estimation of the input")
   624      container_client = self.client.get_container_client(container)
   625  
   626      response = retry.with_exponential_backoff(
   627          retry_filter=retry.retry_on_beam_io_error_filter)(
   628              container_client.list_blobs)(
   629                  name_starts_with=blob)
   630      for item in response:
   631        file_name = "azfs://%s/%s/%s" % (storage_account, container, item.name)
   632        if file_name not in file_info:
   633          file_info.add(file_name)
   634          counter += 1
   635          if counter % 10000 == 0:
   636            if with_metadata:
   637              logging.info(
   638                  "Finished computing file information of: %s files",
   639                  len(file_info))
   640            else:
   641              logging.info("Finished computing size of: %s files", len(file_info))
   642          if with_metadata:
   643            yield file_name, (
   644                item.size, self._updated_to_seconds(item.last_modified))
   645          else:
   646            yield file_name, item.size
   647  
   648      logging.log(
   649          # do not spam logs when list_prefix is likely used to check empty folder
   650          logging.INFO if counter > 0 else logging.DEBUG,
   651          "Finished listing %s files in %s seconds.",
   652          counter,
   653          time.time() - start_time)
   654  
   655  
   656  class BlobStorageDownloader(Downloader):
   657    def __init__(self, client, path, buffer_size):
   658      self._client = client
   659      self._path = path
   660      self._container, self._blob = parse_azfs_path(path)
   661      self._buffer_size = buffer_size
   662  
   663      self._blob_to_download = self._client.get_blob_client(
   664          self._container, self._blob)
   665  
   666      try:
   667        properties = self._get_object_properties()
   668      except ResourceNotFoundError as http_error:
   669        if http_error.status_code == 404:
   670          raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
   671        else:
   672          _LOGGER.error(
   673              'HTTP error while requesting file %s: %s', self._path, http_error)
   674          raise
   675  
   676      self._size = properties.size
   677  
   678    @retry.with_exponential_backoff(
   679        retry_filter=retry.retry_on_beam_io_error_filter)
   680    def _get_object_properties(self):
   681      return self._blob_to_download.get_blob_properties()
   682  
   683    @property
   684    def size(self):
   685      return self._size
   686  
   687    def get_range(self, start, end):
   688      # Download_blob first parameter is offset and second is length (exclusive).
   689      blob_data = self._blob_to_download.download_blob(start, end - start)
   690      # Returns the content as bytes.
   691      return blob_data.readall()
   692  
   693  
   694  class BlobStorageUploader(Uploader):
   695    def __init__(self, client, path, mime_type='application/octet-stream'):
   696      self._client = client
   697      self._path = path
   698      self._container, self._blob = parse_azfs_path(path)
   699      self._content_settings = ContentSettings(mime_type)
   700  
   701      self._blob_to_upload = self._client.get_blob_client(
   702          self._container, self._blob)
   703  
   704      # Temporary file.
   705      self._temporary_file = tempfile.NamedTemporaryFile()
   706  
   707    def put(self, data):
   708      self._temporary_file.write(data.tobytes())
   709  
   710    def finish(self):
   711      self._temporary_file.seek(0)
   712      # The temporary file is deleted immediately after the operation.
   713      with open(self._temporary_file.name, "rb") as f:
   714        self._blob_to_upload.upload_blob(
   715            f.read(), overwrite=True, content_settings=self._content_settings)