github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/aws/s3io.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/aws/s3io.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """AWS S3 client
    19  """
    20  
    21  # pytype: skip-file
    22  
    23  import errno
    24  import io
    25  import logging
    26  import re
    27  import time
    28  import traceback
    29  
    30  from apache_beam.io.aws.clients.s3 import messages
    31  from apache_beam.io.filesystemio import Downloader
    32  from apache_beam.io.filesystemio import DownloaderStream
    33  from apache_beam.io.filesystemio import Uploader
    34  from apache_beam.io.filesystemio import UploaderStream
    35  from apache_beam.utils import retry
    36  from apache_beam.utils.annotations import deprecated
    37  
    38  try:
    39    # pylint: disable=wrong-import-order, wrong-import-position
    40    # pylint: disable=ungrouped-imports
    41    from apache_beam.io.aws.clients.s3 import boto3_client
    42    BOTO3_INSTALLED = True
    43  except ImportError:
    44    BOTO3_INSTALLED = False
    45  
    46  MAX_BATCH_OPERATION_SIZE = 100
    47  
    48  
    49  def parse_s3_path(s3_path, object_optional=False):
    50    """Return the bucket and object names of the given s3:// path."""
    51    match = re.match('^s3://([^/]+)/(.*)$', s3_path)
    52    if match is None or (match.group(2) == '' and not object_optional):
    53      raise ValueError('S3 path must be in the form s3://<bucket>/<object>.')
    54    return match.group(1), match.group(2)
    55  
    56  
    57  class S3IO(object):
    58    """S3 I/O client."""
    59    def __init__(self, client=None, options=None):
    60      if client is None and options is None:
    61        raise ValueError('Must provide one of client or options')
    62      if client is not None:
    63        self.client = client
    64      elif BOTO3_INSTALLED:
    65        self.client = boto3_client.Client(options=options)
    66      else:
    67        message = 'AWS dependencies are not installed, and no alternative ' \
    68        'client was provided to S3IO.'
    69        raise RuntimeError(message)
    70  
    71    def open(
    72        self,
    73        filename,
    74        mode='r',
    75        read_buffer_size=16 * 1024 * 1024,
    76        mime_type='application/octet-stream'):
    77      """Open an S3 file path for reading or writing.
    78  
    79      Args:
    80        filename (str): S3 file path in the form ``s3://<bucket>/<object>``.
    81        mode (str): ``'r'`` for reading or ``'w'`` for writing.
    82        read_buffer_size (int): Buffer size to use during read operations.
    83        mime_type (str): Mime type to set for write operations.
    84  
    85      Returns:
    86        S3 file object.
    87  
    88      Raises:
    89        ValueError: Invalid open file mode.
    90      """
    91      if mode == 'r' or mode == 'rb':
    92        downloader = S3Downloader(
    93            self.client, filename, buffer_size=read_buffer_size)
    94        return io.BufferedReader(
    95            DownloaderStream(downloader, mode=mode), buffer_size=read_buffer_size)
    96      elif mode == 'w' or mode == 'wb':
    97        uploader = S3Uploader(self.client, filename, mime_type)
    98        return io.BufferedWriter(
    99            UploaderStream(uploader, mode=mode), buffer_size=128 * 1024)
   100      else:
   101        raise ValueError('Invalid file open mode: %s.' % mode)
   102  
   103    @deprecated(since='2.45.0', current='list_files')
   104    def list_prefix(self, path, with_metadata=False):
   105      """Lists files matching the prefix.
   106  
   107      ``list_prefix`` has been deprecated. Use `list_files` instead, which returns
   108      a generator of file information instead of a dict.
   109  
   110      Args:
   111        path: S3 file path pattern in the form s3://<bucket>/[name].
   112        with_metadata: Experimental. Specify whether returns file metadata.
   113  
   114      Returns:
   115        If ``with_metadata`` is False: dict of file name -> size; if
   116          ``with_metadata`` is True: dict of file name -> tuple(size, timestamp).
   117      """
   118      file_info = {}
   119      for file_metadata in self.list_files(path, with_metadata):
   120        file_info[file_metadata[0]] = file_metadata[1]
   121  
   122      return file_info
   123  
   124    def list_files(self, path, with_metadata=False):
   125      """Lists files matching the prefix.
   126  
   127      Args:
   128        path: S3 file path pattern in the form s3://<bucket>/[name].
   129        with_metadata: Experimental. Specify whether returns file metadata.
   130  
   131      Returns:
   132        If ``with_metadata`` is False: generator of tuple(file name, size); if
   133        ``with_metadata`` is True: generator of
   134        tuple(file name, tuple(size, timestamp)).
   135      """
   136      bucket, prefix = parse_s3_path(path, object_optional=True)
   137      request = messages.ListRequest(bucket=bucket, prefix=prefix)
   138  
   139      file_info = set()
   140      counter = 0
   141      start_time = time.time()
   142  
   143      if with_metadata:
   144        logging.debug("Starting the file information of the input")
   145      else:
   146        logging.debug("Starting the size estimation of the input")
   147  
   148      while True:
   149        #The list operation will raise an exception
   150        #when trying to list a nonexistent S3 path.
   151        #This should not be an issue here.
   152        #Ignore this exception or it will break the procedure.
   153        try:
   154          response = retry.with_exponential_backoff(
   155              retry_filter=retry.retry_on_server_errors_and_timeout_filter)(
   156                  self.client.list)(
   157                      request)
   158        except messages.S3ClientError as e:
   159          if e.code == 404:
   160            break
   161          else:
   162            raise e
   163  
   164        for item in response.items:
   165          file_name = 's3://%s/%s' % (bucket, item.key)
   166          if file_name not in file_info:
   167            file_info.add(file_name)
   168            counter += 1
   169            if counter % 10000 == 0:
   170              if with_metadata:
   171                logging.info(
   172                    "Finished computing file information of: %s files",
   173                    len(file_info))
   174              else:
   175                logging.info(
   176                    "Finished computing size of: %s files", len(file_info))
   177            if with_metadata:
   178              yield file_name, (
   179                  item.size, self._updated_to_seconds(item.last_modified))
   180            else:
   181              yield file_name, item.size
   182  
   183        if response.next_token:
   184          request.continuation_token = response.next_token
   185        else:
   186          break
   187  
   188      logging.log(
   189          # do not spam logs when list_prefix is likely used to check empty folder
   190          logging.INFO if counter > 0 else logging.DEBUG,
   191          "Finished listing %s files in %s seconds.",
   192          counter,
   193          time.time() - start_time)
   194  
   195      return file_info
   196  
   197    def checksum(self, path):
   198      """Looks up the checksum of an S3 object.
   199  
   200      Args:
   201        path: S3 file path pattern in the form s3://<bucket>/<name>.
   202      """
   203      return self._s3_object(path).etag
   204  
   205    @retry.with_exponential_backoff(
   206        retry_filter=retry.retry_on_server_errors_and_timeout_filter)
   207    def copy(self, src, dest):
   208      """Copies a single S3 file object from src to dest.
   209  
   210      Args:
   211        src: S3 file path pattern in the form s3://<bucket>/<name>.
   212        dest: S3 file path pattern in the form s3://<bucket>/<name>.
   213  
   214      Raises:
   215        TimeoutError: on timeout.
   216      """
   217      src_bucket, src_key = parse_s3_path(src)
   218      dest_bucket, dest_key = parse_s3_path(dest)
   219      request = messages.CopyRequest(src_bucket, src_key, dest_bucket, dest_key)
   220      self.client.copy(request)
   221  
   222    # We intentionally do not decorate this method with a retry, since the
   223    # underlying copy and delete operations are already idempotent operations
   224    # protected by retry decorators.
   225    def copy_paths(self, src_dest_pairs):
   226      """Copies the given S3 objects from src to dest. This can handle directory
   227      or file paths.
   228  
   229      Args:
   230        src_dest_pairs: list of (src, dest) tuples of s3://<bucket>/<name> file
   231                        paths to copy from src to dest
   232      Returns: List of tuples of (src, dest, exception) in the same order as the
   233              src_dest_pairs argument, where exception is None if the operation
   234              succeeded or the relevant exception if the operation failed.
   235      """
   236      if not src_dest_pairs: return []
   237  
   238      results = []
   239  
   240      for src_path, dest_path in src_dest_pairs:
   241  
   242        # Copy a directory with self.copy_tree
   243        if src_path.endswith('/') and dest_path.endswith('/'):
   244          try:
   245            results += self.copy_tree(src_path, dest_path)
   246          except messages.S3ClientError as err:
   247            results.append((src_path, dest_path, err))
   248  
   249        # Copy individual files with self.copy
   250        elif not src_path.endswith('/') and not dest_path.endswith('/'):
   251          src_bucket, src_key = parse_s3_path(src_path)
   252          dest_bucket, dest_key = parse_s3_path(dest_path)
   253          request = messages.CopyRequest(
   254              src_bucket, src_key, dest_bucket, dest_key)
   255  
   256          try:
   257            self.client.copy(request)
   258            results.append((src_path, dest_path, None))
   259          except messages.S3ClientError as err:
   260            results.append((src_path, dest_path, err))
   261  
   262        # Mismatched paths (one directory, one non-directory) get an error result
   263        else:
   264          e = messages.S3ClientError(
   265              "Can't copy mismatched paths (one directory, one non-directory):" +
   266              ' %s, %s' % (src_path, dest_path),
   267              400)
   268          results.append((src_path, dest_path, e))
   269  
   270      return results
   271  
   272    # We intentionally do not decorate this method with a retry, since the
   273    # underlying copy and delete operations are already idempotent operations
   274    # protected by retry decorators.
   275    def copy_tree(self, src, dest):
   276      """Renames the given S3 directory and it's contents recursively
   277      from src to dest.
   278  
   279      Args:
   280        src: S3 file path pattern in the form s3://<bucket>/<name>/.
   281        dest: S3 file path pattern in the form s3://<bucket>/<name>/.
   282  
   283      Returns:
   284        List of tuples of (src, dest, exception) where exception is None if the
   285        operation succeeded or the relevant exception if the operation failed.
   286      """
   287      assert src.endswith('/')
   288      assert dest.endswith('/')
   289  
   290      results = []
   291      for entry in self.list_prefix(src):
   292        rel_path = entry[len(src):]
   293        try:
   294          self.copy(entry, dest + rel_path)
   295          results.append((entry, dest + rel_path, None))
   296        except messages.S3ClientError as e:
   297          results.append((entry, dest + rel_path, e))
   298  
   299      return results
   300  
   301    @retry.with_exponential_backoff(
   302        retry_filter=retry.retry_on_server_errors_and_timeout_filter)
   303    def delete(self, path):
   304      """Deletes a single S3 file object from src to dest.
   305  
   306      Args:
   307        src: S3 file path pattern in the form s3://<bucket>/<name>/.
   308        dest: S3 file path pattern in the form s3://<bucket>/<name>/.
   309  
   310      Returns:
   311        List of tuples of (src, dest, exception) in the same order as the
   312        src_dest_pairs argument, where exception is None if the operation
   313        succeeded or the relevant exception if the operation failed.
   314      """
   315      bucket, object_path = parse_s3_path(path)
   316      request = messages.DeleteRequest(bucket, object_path)
   317  
   318      try:
   319        self.client.delete(request)
   320      except messages.S3ClientError as e:
   321        if e.code == 404:
   322          return  # Same behavior as GCS - don't surface a 404 error
   323        else:
   324          logging.error('HTTP error while deleting file %s: %s', path, 3)
   325          raise e
   326  
   327    # We intentionally do not decorate this method with a retry, since the
   328    # underlying copy and delete operations are already idempotent operations
   329    # protected by retry decorators.
   330    def delete_paths(self, paths):
   331      """Deletes the given S3 objects from src to dest. This can handle directory
   332      or file paths.
   333  
   334      Args:
   335        src: S3 file path pattern in the form s3://<bucket>/<name>/.
   336        dest: S3 file path pattern in the form s3://<bucket>/<name>/.
   337  
   338      Returns:
   339        List of tuples of (src, dest, exception) in the same order as the
   340        src_dest_pairs argument, where exception is None if the operation
   341        succeeded or the relevant exception if the operation failed.
   342      """
   343      directories, not_directories = [], []
   344      for path in paths:
   345        if path.endswith('/'): directories.append(path)
   346        else: not_directories.append(path)
   347  
   348      results = {}
   349  
   350      for directory in directories:
   351        dir_result = dict(self.delete_tree(directory))
   352        results.update(dir_result)
   353  
   354      not_directory_results = dict(self.delete_files(not_directories))
   355      results.update(not_directory_results)
   356  
   357      return results
   358  
   359    # We intentionally do not decorate this method with a retry, since the
   360    # underlying copy and delete operations are already idempotent operations
   361    # protected by retry decorators.
   362    def delete_files(self, paths, max_batch_size=1000):
   363      """Deletes the given S3 file object from src to dest.
   364  
   365      Args:
   366        paths: List of S3 file paths in the form s3://<bucket>/<name>
   367        max_batch_size: Largest number of keys to send to the client to be deleted
   368        simultaneously
   369  
   370      Returns: List of tuples of (path, exception) in the same order as the paths
   371               argument, where exception is None if the operation succeeded or
   372               the relevant exception if the operation failed.
   373      """
   374      if not paths: return []
   375  
   376      # Sort paths into bucket: [keys]
   377      buckets, keys = zip(*[parse_s3_path(path) for path in paths])
   378      grouped_keys = {bucket: [] for bucket in buckets}
   379      for bucket, key in zip(buckets, keys):
   380        grouped_keys[bucket].append(key)
   381  
   382      # For each bucket, delete minibatches of keys
   383      results = {}
   384      for bucket, keys in grouped_keys.items():
   385        for i in range(0, len(keys), max_batch_size):
   386          minibatch_keys = keys[i:i + max_batch_size]
   387          results.update(self._delete_minibatch(bucket, minibatch_keys))
   388  
   389      # Organize final results
   390      final_results = [(path, results[parse_s3_path(path)]) for path in paths]
   391  
   392      return final_results
   393  
   394    @retry.with_exponential_backoff(
   395        retry_filter=retry.retry_on_server_errors_and_timeout_filter)
   396    def _delete_minibatch(self, bucket, keys):
   397      """A helper method. Boto3 allows batch deletions
   398      for files within the same bucket.
   399  
   400      Args:
   401        bucket: String bucket name
   402        keys: List of keys to be deleted in the bucket
   403  
   404      Returns: dict of the form {(bucket, key): error}, where error is None if the
   405      operation succeeded
   406      """
   407      request = messages.DeleteBatchRequest(bucket, keys)
   408      results = {}
   409      try:
   410        response = self.client.delete_batch(request)
   411  
   412        for key in response.deleted:
   413          results[(bucket, key)] = None
   414  
   415        for key, error in zip(response.failed, response.errors):
   416          results[(bucket, key)] = error
   417  
   418      except messages.S3ClientError as e:
   419        for key in keys:
   420          results[(bucket, key)] = e
   421  
   422      return results
   423  
   424    # We intentionally do not decorate this method with a retry, since the
   425    # underlying copy and delete operations are already idempotent operations
   426    # protected by retry decorators.
   427    def delete_tree(self, root):
   428      """Deletes all objects under the given S3 directory.
   429  
   430      Args:
   431        path: S3 root path in the form s3://<bucket>/<name>/ (ending with a "/")
   432  
   433      Returns: List of tuples of (path, exception), where each path is an object
   434              under the given root. exception is None if the operation succeeded
   435              or the relevant exception if the operation failed.
   436      """
   437      assert root.endswith('/')
   438  
   439      paths = self.list_prefix(root)
   440      return self.delete_files(paths)
   441  
   442    def size(self, path):
   443      """Returns the size of a single S3 object.
   444  
   445      This method does not perform glob expansion. Hence the given path must be
   446      for a single S3 object.
   447  
   448      Returns: size of the S3 object in bytes.
   449      """
   450      return self._s3_object(path).size
   451  
   452    # We intentionally do not decorate this method with a retry, since the
   453    # underlying copy and delete operations are already idempotent operations
   454    # protected by retry decorators.
   455    def rename(self, src, dest):
   456      """Renames the given S3 object from src to dest.
   457  
   458      Args:
   459        src: S3 file path pattern in the form s3://<bucket>/<name>.
   460        dest: S3 file path pattern in the form s3://<bucket>/<name>.
   461      """
   462      self.copy(src, dest)
   463      self.delete(src)
   464  
   465    def last_updated(self, path):
   466      """Returns the last updated epoch time of a single S3 object.
   467  
   468      This method does not perform glob expansion. Hence the given path must be
   469      for a single S3 object.
   470  
   471      Returns: last updated time of the S3 object in second.
   472      """
   473      return self._updated_to_seconds(self._s3_object(path).last_modified)
   474  
   475    def exists(self, path):
   476      """Returns whether the given S3 object exists.
   477  
   478      Args:
   479        path: S3 file path pattern in the form s3://<bucket>/<name>.
   480      """
   481      try:
   482        self._s3_object(path)
   483        return True
   484      except messages.S3ClientError as e:
   485        if e.code == 404:
   486          # HTTP 404 indicates that the file did not exist
   487          return False
   488        else:
   489          # We re-raise all other exceptions
   490          raise
   491  
   492    def _status(self, path):
   493      """For internal use only; no backwards-compatibility guarantees.
   494  
   495      Returns supported fields (checksum, last_updated, size) of a single object
   496      as a dict at once.
   497  
   498      This method does not perform glob expansion. Hence the given path must be
   499      for a single S3 object.
   500  
   501      Returns: dict of fields of the S3 object.
   502      """
   503      s3_object = self._s3_object(path)
   504      file_status = {}
   505      if hasattr(s3_object, 'etag'):
   506        file_status['checksum'] = s3_object.etag
   507      if hasattr(s3_object, 'last_modified'):
   508        file_status['last_updated'] = self._updated_to_seconds(
   509            s3_object.last_modified)
   510      if hasattr(s3_object, 'size'):
   511        file_status['size'] = s3_object.size
   512      return file_status
   513  
   514    @retry.with_exponential_backoff(
   515        retry_filter=retry.retry_on_server_errors_and_timeout_filter)
   516    def _s3_object(self, path):
   517      """Returns a S3 object metadata for the given path
   518  
   519      This method does not perform glob expansion. Hence the given path must be
   520      for a single S3 object.
   521  
   522      Returns: S3 object metadata.
   523      """
   524      bucket, object = parse_s3_path(path)
   525      request = messages.GetRequest(bucket, object)
   526      return self.client.get_object_metadata(request)
   527  
   528    @staticmethod
   529    def _updated_to_seconds(updated):
   530      """Helper function transform the updated field of response to seconds"""
   531      return (
   532          time.mktime(updated.timetuple()) - time.timezone +
   533          updated.microsecond / 1000000.0)
   534  
   535    def rename_files(self, src_dest_pairs):
   536      """Renames the given S3 objects from src to dest.
   537  
   538      Args:
   539        src_dest_pairs: list of (src, dest) tuples of s3://<bucket>/<name> file
   540                        paths to rename from src to dest
   541      Returns: List of tuples of (src, dest, exception) in the same order as the
   542              src_dest_pairs argument, where exception is None if the operation
   543              succeeded or the relevant exception if the operation failed.
   544      """
   545      if not src_dest_pairs: return []
   546  
   547      # TODO: Throw value error if path has directory
   548      for src, dest in src_dest_pairs:
   549        if src.endswith('/') or dest.endswith('/'):
   550          raise ValueError('Cannot rename a directory')
   551  
   552      copy_results = self.copy_paths(src_dest_pairs)
   553      paths_to_delete = [src for (src, _, err) in copy_results if err is None]
   554      delete_results = self.delete_files(paths_to_delete)
   555  
   556      delete_results_dict = {src: err for (src, err) in delete_results}
   557      rename_results = []
   558      for src, dest, err in copy_results:
   559        if err is not None: rename_results.append((src, dest, err))
   560        elif delete_results_dict[src] is not None:
   561          rename_results.append((src, dest, delete_results_dict[src]))
   562        else:
   563          rename_results.append((src, dest, None))
   564  
   565      return rename_results
   566  
   567  
   568  class S3Downloader(Downloader):
   569    def __init__(self, client, path, buffer_size):
   570      self._client = client
   571      self._path = path
   572      self._bucket, self._name = parse_s3_path(path)
   573      self._buffer_size = buffer_size
   574  
   575      # Get object state.
   576      self._get_request = (
   577          messages.GetRequest(bucket=self._bucket, object=self._name))
   578  
   579      try:
   580        metadata = self._get_object_metadata(self._get_request)
   581  
   582      except messages.S3ClientError as e:
   583        if e.code == 404:
   584          raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
   585        else:
   586          logging.error('HTTP error while requesting file %s: %s', self._path, 3)
   587          raise
   588  
   589      self._size = metadata.size
   590  
   591    @retry.with_exponential_backoff(
   592        retry_filter=retry.retry_on_server_errors_and_timeout_filter)
   593    def _get_object_metadata(self, get_request):
   594      return self._client.get_object_metadata(get_request)
   595  
   596    @property
   597    def size(self):
   598      return self._size
   599  
   600    def get_range(self, start, end):
   601      return self._client.get_range(self._get_request, start, end)
   602  
   603  
   604  class S3Uploader(Uploader):
   605    def __init__(self, client, path, mime_type='application/octet-stream'):
   606      self._client = client
   607      self._path = path
   608      self._bucket, self._name = parse_s3_path(path)
   609      self._mime_type = mime_type
   610  
   611      self.part_number = 1
   612      self.buffer = b''
   613  
   614      self.last_error = None
   615  
   616      self.upload_id = None
   617  
   618      self.parts = []
   619  
   620      self._start_upload()
   621  
   622    # There is retry logic in the underlying transfer library but we should make
   623    # it more explicit so we can control the retry parameters.
   624    @retry.no_retries  # Using no_retries marks this as an integration point.
   625    def _start_upload(self):
   626      # The uploader by default transfers data in chunks of 1024 * 1024 bytes at
   627      # a time, buffering writes until that size is reached.
   628      try:
   629        request = messages.UploadRequest(
   630            self._bucket, self._name, self._mime_type)
   631        response = self._client.create_multipart_upload(request)
   632        self.upload_id = response.upload_id
   633      except Exception as e:  # pylint: disable=broad-except
   634        logging.error(
   635            'Error in _start_upload while inserting file %s: %s',
   636            self._path,
   637            traceback.format_exc())
   638        self.last_error = e
   639        raise e
   640  
   641    def put(self, data):
   642  
   643      MIN_WRITE_SIZE = 5 * 1024 * 1024
   644      MAX_WRITE_SIZE = 5 * 1024 * 1024 * 1024
   645  
   646      # TODO: Byte strings might not be the most performant way to handle this
   647      self.buffer += data.tobytes()
   648  
   649      while len(self.buffer) >= MIN_WRITE_SIZE:
   650        # Take the first chunk off the buffer and write it to S3
   651        chunk = self.buffer[:MAX_WRITE_SIZE]
   652        self._write_to_s3(chunk)
   653        # Remove the written chunk from the buffer
   654        self.buffer = self.buffer[MAX_WRITE_SIZE:]
   655  
   656    def _write_to_s3(self, data):
   657  
   658      try:
   659        request = messages.UploadPartRequest(
   660            self._bucket, self._name, self.upload_id, self.part_number, data)
   661        response = self._client.upload_part(request)
   662        self.parts.append({
   663            'ETag': response.etag, 'PartNumber': response.part_number
   664        })
   665        self.part_number = self.part_number + 1
   666      except messages.S3ClientError as e:
   667        self.last_error = e
   668        if e.code == 404:
   669          raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
   670        else:
   671          logging.error('HTTP error while requesting file %s: %s', self._path, 3)
   672          raise
   673  
   674    def finish(self):
   675      if len(self.buffer) > 0:
   676        # writes with zero length or mid size files between
   677        # MIN_WRITE_SIZE = 5 * 1024 * 1024
   678        # MAX_WRITE_SIZE = 5 * 1024 * 1024 * 1024
   679        # as we will reach this finish() with len(self.buffer) == 0
   680        # which will fail
   681        self._write_to_s3(self.buffer)
   682  
   683      if self.last_error is not None:
   684        raise self.last_error  # pylint: disable=raising-bad-type
   685  
   686      request = messages.CompleteMultipartUploadRequest(
   687          self._bucket, self._name, self.upload_id, self.parts)
   688      self._client.complete_multipart_upload(request)