github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/aws/s3io_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Tests for S3 client."""
    19  # pytype: skip-file
    20  
    21  import logging
    22  import os
    23  import random
    24  import time
    25  import unittest
    26  
    27  from apache_beam.io.aws import s3io
    28  from apache_beam.io.aws.clients.s3 import fake_client
    29  from apache_beam.io.aws.clients.s3 import messages
    30  from apache_beam.options import pipeline_options
    31  
    32  
    33  class TestS3PathParser(unittest.TestCase):
    34  
    35    BAD_S3_PATHS = [
    36        's3://',
    37        's3://bucket',
    38        's3:///name',
    39        's3:///',
    40        's3:/blah/bucket/name',
    41    ]
    42  
    43    def test_s3_path(self):
    44      self.assertEqual(s3io.parse_s3_path('s3://bucket/name'), ('bucket', 'name'))
    45      self.assertEqual(
    46          s3io.parse_s3_path('s3://bucket/name/sub'), ('bucket', 'name/sub'))
    47  
    48    def test_bad_s3_path(self):
    49      for path in self.BAD_S3_PATHS:
    50        self.assertRaises(ValueError, s3io.parse_s3_path, path)
    51      self.assertRaises(ValueError, s3io.parse_s3_path, 's3://bucket/')
    52  
    53    def test_s3_path_object_optional(self):
    54      self.assertEqual(
    55          s3io.parse_s3_path('s3://bucket/name', object_optional=True),
    56          ('bucket', 'name'))
    57      self.assertEqual(
    58          s3io.parse_s3_path('s3://bucket/', object_optional=True),
    59          ('bucket', ''))
    60  
    61    def test_bad_s3_path_object_optional(self):
    62      for path in self.BAD_S3_PATHS:
    63        self.assertRaises(ValueError, s3io.parse_s3_path, path, True)
    64  
    65  
    66  class TestS3IO(unittest.TestCase):
    67    def _insert_random_file(self, client, path, size):
    68      bucket, name = s3io.parse_s3_path(path)
    69      contents = os.urandom(size)
    70      fakeFile = fake_client.FakeFile(bucket, name, contents)
    71  
    72      if self.USE_MOCK:
    73        self.client.add_file(fakeFile)
    74  
    75      else:
    76        f = self.aws.open(path, 'w')
    77        f.write(contents)
    78        f.close()
    79  
    80      return fakeFile
    81  
    82    def setUp(self):
    83  
    84      # These tests can be run locally against a mock S3 client, or as integration
    85      # tests against the real S3 client.
    86      self.USE_MOCK = True
    87  
    88      # If you're running integration tests with S3, set this variable to be an
    89      # s3 path that you have access to where test data can be written. If you're
    90      # just running tests against the mock, this can be any s3 path. It should
    91      # end with a '/'.
    92      self.TEST_DATA_PATH = 's3://random-data-sets/beam_tests/'
    93  
    94      if self.USE_MOCK:
    95        self.client = fake_client.FakeS3Client()
    96        test_data_bucket, _ = s3io.parse_s3_path(self.TEST_DATA_PATH)
    97        self.client.known_buckets.add(test_data_bucket)
    98        self.aws = s3io.S3IO(self.client)
    99  
   100      else:
   101        self.aws = s3io.S3IO(options=pipeline_options.S3Options())
   102        self.client = self.aws.client
   103  
   104    def test_size(self):
   105      file_name = self.TEST_DATA_PATH + 'dummy_file'
   106      file_size = 1234
   107  
   108      self._insert_random_file(self.client, file_name, file_size)
   109      self.assertTrue(self.aws.exists(file_name))
   110      self.assertEqual(1234, self.aws.size(file_name))
   111  
   112      # Clean up
   113      self.aws.delete(file_name)
   114  
   115    def test_last_updated(self):
   116      file_name = self.TEST_DATA_PATH + 'dummy_file'
   117      file_size = 1234
   118  
   119      self._insert_random_file(self.client, file_name, file_size)
   120      self.assertTrue(self.aws.exists(file_name))
   121      # The time difference should be tiny for the mock client.
   122      # A loose tolerance is for the consideration of real s3 client.
   123      tolerance = 5 * 60  # 5 mins
   124      result = self.aws.last_updated(file_name)
   125      self.assertAlmostEqual(result, time.time(), delta=tolerance)
   126  
   127      # Clean up
   128      self.aws.delete(file_name)
   129  
   130    def test_checksum(self):
   131      file_name = self.TEST_DATA_PATH + 'checksum'
   132      file_size = 1024
   133      file_ = self._insert_random_file(self.client, file_name, file_size)
   134  
   135      original_etag = self.aws.checksum(file_name)
   136  
   137      self.aws.delete(file_name)
   138  
   139      with self.aws.open(file_name, 'w') as f:
   140        f.write(file_.contents)
   141  
   142      rewritten_etag = self.aws.checksum(file_name)
   143  
   144      self.assertEqual(original_etag, rewritten_etag)
   145      self.assertEqual(len(original_etag), 36)
   146      self.assertTrue(original_etag.endswith('-1"'))
   147  
   148      # Clean up
   149      self.aws.delete(file_name)
   150  
   151    def test_file_status(self):
   152      file_name = self.TEST_DATA_PATH + 'metadata'
   153      file_size = 1024
   154      self._insert_random_file(self.client, file_name, file_size)
   155      file_checksum = self.aws.checksum(file_name)
   156      file_timestamp = self.aws.last_updated(file_name)
   157  
   158      file_status = self.aws._status(file_name)
   159  
   160      self.assertEqual(file_status['size'], file_size)
   161      self.assertEqual(file_status['checksum'], file_checksum)
   162      self.assertEqual(file_status['last_updated'], file_timestamp)
   163  
   164      # Clean up
   165      self.aws.delete(file_name)
   166  
   167    def test_copy(self):
   168      src_file_name = self.TEST_DATA_PATH + 'source'
   169      dest_file_name = self.TEST_DATA_PATH + 'dest'
   170      file_size = 1024
   171      self._insert_random_file(self.client, src_file_name, file_size)
   172  
   173      self.assertTrue(src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   174      self.assertFalse(
   175          dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   176  
   177      self.aws.copy(src_file_name, dest_file_name)
   178  
   179      self.assertTrue(src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   180      self.assertTrue(dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   181  
   182      # Clean up
   183      self.aws.delete_files([src_file_name, dest_file_name])
   184  
   185      # Test copy of non-existent files.
   186      with self.assertRaises(messages.S3ClientError) as err:
   187        self.aws.copy(
   188            self.TEST_DATA_PATH + 'non-existent',
   189            self.TEST_DATA_PATH + 'non-existent-destination')
   190  
   191      self.assertTrue('Not Found' in err.exception.message)
   192  
   193    def test_copy_paths(self):
   194      from_name_pattern = self.TEST_DATA_PATH + 'copy_me_%d'
   195      to_name_pattern = self.TEST_DATA_PATH + 'destination_%d'
   196      file_size = 1024
   197      num_files = 10
   198  
   199      src_dest_pairs = [(from_name_pattern % i, to_name_pattern % i)
   200                        for i in range(num_files)]
   201  
   202      result = self.aws.copy_paths(src_dest_pairs)
   203  
   204      self.assertTrue(result)
   205      for i, (src, dest, exception) in enumerate(result):
   206        self.assertEqual(src, from_name_pattern % i)
   207        self.assertEqual(dest, to_name_pattern % i)
   208        self.assertTrue(isinstance(exception, messages.S3ClientError))
   209        self.assertEqual(exception.code, 404)
   210        self.assertFalse(self.aws.exists(from_name_pattern % i))
   211        self.assertFalse(self.aws.exists(to_name_pattern % i))
   212  
   213      # Insert some files.
   214      for i in range(num_files):
   215        self._insert_random_file(self.client, from_name_pattern % i, file_size)
   216  
   217      # Check files inserted properly.
   218      for i in range(num_files):
   219        self.assertTrue(self.aws.exists(from_name_pattern % i))
   220  
   221      # Execute batch copy.
   222      result = self.aws.copy_paths(src_dest_pairs)
   223  
   224      # Check files copied properly.
   225      for i in range(num_files):
   226        self.assertTrue(self.aws.exists(from_name_pattern % i))
   227        self.assertTrue(self.aws.exists(to_name_pattern % i))
   228  
   229      # Check results
   230      for i, (src, dest, exception) in enumerate(result):
   231        self.assertEqual(src_dest_pairs[i], (src, dest))
   232        self.assertEqual(exception, None)
   233  
   234      # Clean up
   235      all_files = set().union(*[set(pair) for pair in src_dest_pairs])
   236      self.aws.delete_files(all_files)
   237  
   238    def test_copy_paths_error(self):
   239      n_real_files = 3
   240  
   241      # Create some files
   242      from_path = self.TEST_DATA_PATH + 'copy_paths/'
   243      files = [from_path + '%d' % i for i in range(n_real_files)]
   244      to_path = self.TEST_DATA_PATH + 'destination/'
   245      destinations = [to_path + '%d' % i for i in range(n_real_files)]
   246      for file_ in files:
   247        self._insert_random_file(self.client, file_, 1024)
   248  
   249      # Add nonexistent files to the sources and destinations
   250      sources = files + [
   251          from_path + 'X',
   252          from_path + 'fake_directory_1/',
   253          from_path + 'fake_directory_2/'
   254      ]
   255      destinations += [
   256          to_path + 'X',
   257          to_path + 'fake_directory_1/',
   258          to_path + 'fake_directory_2'
   259      ]
   260      result = self.aws.copy_paths(list(zip(sources, destinations)))
   261  
   262      # The copy_paths function of class S3IO does not return one single
   263      # result when copying a directory. Instead, it returns the results
   264      # of copying every file in the source directory.
   265      self.assertEqual(len(result), len(sources) - 1)
   266  
   267      for _, _, err in result[:n_real_files]:
   268        self.assertTrue(err is None)
   269  
   270      for _, _, err in result[n_real_files:]:
   271        self.assertIsInstance(err, messages.S3ClientError)
   272  
   273      # For the same reason of copy_paths function of S3IO above
   274      # skip this assert.
   275      #self.assertEqual(result[-3][2].code, 404)
   276      self.assertEqual(result[-2][2].code, 404)
   277      self.assertEqual(result[-1][2].code, 400)
   278  
   279      # Clean up
   280      self.aws.delete_files(files)
   281      self.aws.delete_files(destinations)
   282  
   283    def test_copy_tree(self):
   284      src_dir_name = self.TEST_DATA_PATH + 'source/'
   285      dest_dir_name = self.TEST_DATA_PATH + 'dest/'
   286      file_size = 1024
   287      paths = ['a', 'b/c', 'b/d']
   288      for path in paths:
   289        src_file_name = src_dir_name + path
   290        dest_file_name = dest_dir_name + path
   291        self._insert_random_file(self.client, src_file_name, file_size)
   292        self.assertTrue(
   293            src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   294        self.assertFalse(
   295            dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   296  
   297      results = self.aws.copy_tree(src_dir_name, dest_dir_name)
   298  
   299      for src_file_name, dest_file_name, err in results:
   300  
   301        self.assertTrue(src_dir_name in src_file_name)
   302        self.assertTrue(dest_dir_name in dest_file_name)
   303        self.assertIsNone(err)
   304  
   305        self.assertTrue(
   306            src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   307        self.assertTrue(
   308            dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   309  
   310      # Clean up
   311      for path in paths:
   312        src_file_name = src_dir_name + path
   313        dest_file_name = dest_dir_name + path
   314        self.aws.delete_files([src_file_name, dest_file_name])
   315  
   316    def test_rename(self):
   317      src_file_name = self.TEST_DATA_PATH + 'source'
   318      dest_file_name = self.TEST_DATA_PATH + 'dest'
   319      file_size = 1024
   320  
   321      self._insert_random_file(self.client, src_file_name, file_size)
   322  
   323      self.assertTrue(src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   324      self.assertFalse(
   325          dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   326  
   327      self.aws.rename(src_file_name, dest_file_name)
   328  
   329      self.assertFalse(src_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   330      self.assertTrue(dest_file_name in self.aws.list_prefix(self.TEST_DATA_PATH))
   331  
   332      # Clean up
   333      self.aws.delete_files([src_file_name, dest_file_name])
   334  
   335    def test_rename_files(self):
   336      from_name_pattern = self.TEST_DATA_PATH + 'to_rename_%d'
   337      to_name_pattern = self.TEST_DATA_PATH + 'been_renamed_%d'
   338      file_size = 1024
   339      num_files = 10
   340  
   341      src_dest_pairs = [(from_name_pattern % i, to_name_pattern % i)
   342                        for i in range(num_files)]
   343  
   344      result = self.aws.rename_files(src_dest_pairs)
   345  
   346      self.assertTrue(result)
   347      for i, (src, dest, exception) in enumerate(result):
   348        self.assertEqual(src, from_name_pattern % i)
   349        self.assertEqual(dest, to_name_pattern % i)
   350        self.assertTrue(isinstance(exception, messages.S3ClientError))
   351        self.assertEqual(exception.code, 404)
   352        self.assertFalse(self.aws.exists(from_name_pattern % i))
   353        self.assertFalse(self.aws.exists(to_name_pattern % i))
   354  
   355      # Insert some files.
   356      for i in range(num_files):
   357        self._insert_random_file(self.client, from_name_pattern % i, file_size)
   358  
   359      # Check files inserted properly.
   360      for i in range(num_files):
   361        self.assertTrue(self.aws.exists(from_name_pattern % i))
   362        self.assertFalse(self.aws.exists(to_name_pattern % i))
   363  
   364      # Execute batch rename.
   365      self.aws.rename_files(src_dest_pairs)
   366  
   367      # Check files were renamed properly.
   368      for i in range(num_files):
   369        self.assertFalse(self.aws.exists(from_name_pattern % i))
   370        self.assertTrue(self.aws.exists(to_name_pattern % i))
   371  
   372      # Clean up
   373      all_files = set().union(*[set(pair) for pair in src_dest_pairs])
   374      self.aws.delete_files(all_files)
   375  
   376    def test_rename_files_with_errors(self):
   377      real_prefix = self.TEST_DATA_PATH + 'rename_batch_%s'
   378      fake_prefix = 's3://fake-bucket-68ae4b0ef7b9/rename_batch_%s'
   379      src_dest_pairs = [(prefix % 'src', prefix % 'dest')
   380                        for prefix in (real_prefix, fake_prefix)]
   381  
   382      # Create the file in the real bucket
   383      self._insert_random_file(self.client, real_prefix % 'src', 1024)
   384  
   385      # Execute batch rename
   386      result = self.aws.rename_files(src_dest_pairs)
   387  
   388      # First is the file in the real bucket, which shouldn't throw an error
   389      self.assertEqual(result[0][0], src_dest_pairs[0][0])
   390      self.assertEqual(result[0][1], src_dest_pairs[0][1])
   391      self.assertIsNone(result[0][2])
   392  
   393      # Second is the file in the fake bucket, which should throw a 404
   394      self.assertEqual(result[1][0], src_dest_pairs[1][0])
   395      self.assertEqual(result[1][1], src_dest_pairs[1][1])
   396      self.assertEqual(result[1][2].code, 404)
   397  
   398      # Clean up
   399      self.aws.delete(real_prefix % 'dest')
   400  
   401    def test_rename_files_with_errors_directory(self):
   402  
   403      # Make file
   404      dir_name = self.TEST_DATA_PATH + 'rename_dir/'
   405      file_name = dir_name + 'file'
   406      self._insert_random_file(self.client, file_name, 1024)
   407  
   408      self.assertTrue(self.aws.exists(file_name))
   409  
   410      with self.assertRaises(ValueError):
   411        self.aws.rename_files([(file_name, self.TEST_DATA_PATH + 'dir_dest/')])
   412  
   413      # Clean up
   414      self.aws.delete(file_name)
   415  
   416    def test_delete_paths(self):
   417      # Make files
   418      prefix = self.TEST_DATA_PATH + 'delete_paths/'
   419      file_names = [prefix + 'a', prefix + 'b/c']
   420      for file_name in file_names:
   421        self._insert_random_file(self.client, file_name, 1024)
   422  
   423      self.assertTrue(self.aws.exists(file_names[0]))
   424      self.assertTrue(self.aws.exists(file_names[1]))
   425  
   426      # Delete paths
   427      paths = [prefix + 'a', prefix + 'b/']
   428      self.aws.delete_paths(paths)
   429  
   430      self.assertFalse(self.aws.exists(file_names[0]))
   431      self.assertFalse(self.aws.exists(file_names[1]))
   432  
   433    def test_delete(self):
   434      file_name = self.TEST_DATA_PATH + 'delete_file'
   435      file_size = 1024
   436  
   437      # Test deletion of non-existent file (shouldn't raise any error)
   438      self.aws.delete(file_name)
   439  
   440      # Create the file and check that it was created
   441      self._insert_random_file(self.aws.client, file_name, file_size)
   442      files = self.aws.list_prefix(self.TEST_DATA_PATH)
   443      self.assertTrue(file_name in files)
   444  
   445      # Delete the file and check that it was deleted
   446      self.aws.delete(file_name)
   447      self.assertFalse(self.aws.exists(file_name))
   448  
   449    def test_delete_files(self, *unused_args):
   450      file_name_pattern = self.TEST_DATA_PATH + 'delete_batch/%d'
   451      file_size = 1024
   452      num_files = 5
   453  
   454      # Test deletion of non-existent files.
   455      result = self.aws.delete_files(
   456          [file_name_pattern % i for i in range(num_files)])
   457      self.assertTrue(result)
   458      for i, (file_name, exception) in enumerate(result):
   459        self.assertEqual(file_name, file_name_pattern % i)
   460        self.assertEqual(exception, None)
   461        self.assertFalse(self.aws.exists(file_name_pattern % i))
   462  
   463      # Insert some files.
   464      for i in range(num_files):
   465        self._insert_random_file(self.client, file_name_pattern % i, file_size)
   466  
   467      # Check files inserted properly.
   468      for i in range(num_files):
   469        self.assertTrue(self.aws.exists(file_name_pattern % i))
   470  
   471      # Execute batch delete.
   472      self.aws.delete_files([file_name_pattern % i for i in range(num_files)])
   473  
   474      # Check files deleted properly.
   475      for i in range(num_files):
   476        self.assertFalse(self.aws.exists(file_name_pattern % i))
   477  
   478    def test_delete_files_with_errors(self, *unused_args):
   479      real_file = self.TEST_DATA_PATH + 'delete_batch/file'
   480      fake_file = 's3://fake-bucket-68ae4b0ef7b9/delete_batch/file'
   481      filenames = [real_file, fake_file]
   482  
   483      result = self.aws.delete_files(filenames)
   484  
   485      # First is the file in the real bucket, which shouldn't throw an error
   486      self.assertEqual(result[0][0], filenames[0])
   487      self.assertIsNone(result[0][1])
   488  
   489      # Second is the file in the fake bucket, which should throw a 404
   490      self.assertEqual(result[1][0], filenames[1])
   491      self.assertEqual(result[1][1].code, 404)
   492  
   493    def test_delete_tree(self):
   494  
   495      root_path = self.TEST_DATA_PATH + 'delete_tree/'
   496      leaf_paths = ['a', 'b/c', 'b/d', 'b/d/e']
   497      paths = [root_path + leaf for leaf in leaf_paths]
   498  
   499      # Create file tree
   500      file_size = 1024
   501      for path in paths:
   502        self._insert_random_file(self.client, path, file_size)
   503  
   504      # Check that the files exist
   505      for path in paths:
   506        self.assertTrue(self.aws.exists(path))
   507  
   508      # Delete the tree
   509      self.aws.delete_tree(root_path)
   510  
   511      # Check that the files have been deleted
   512      for path in paths:
   513        self.assertFalse(self.aws.exists(path))
   514  
   515    def test_exists(self):
   516      file_name = self.TEST_DATA_PATH + 'exists'
   517      file_size = 1024
   518  
   519      self.assertFalse(self.aws.exists(file_name))
   520  
   521      self._insert_random_file(self.aws.client, file_name, file_size)
   522  
   523      self.assertTrue(self.aws.exists(file_name))
   524  
   525      # Clean up
   526      self.aws.delete(file_name)
   527  
   528      self.assertFalse(self.aws.exists(file_name))
   529  
   530    def test_file_mode(self):
   531      file_name = self.TEST_DATA_PATH + 'jerry/pigpen/bobby'
   532      with self.aws.open(file_name, 'w') as f:
   533        assert f.mode == 'w'
   534      with self.aws.open(file_name, 'r') as f:
   535        assert f.mode == 'r'
   536  
   537      # Clean up
   538      self.aws.delete(file_name)
   539  
   540    def test_full_file_read(self):
   541      file_name = self.TEST_DATA_PATH + 'jerry/pigpen/phil'
   542      file_size = 1024
   543  
   544      f = self._insert_random_file(self.aws.client, file_name, file_size)
   545      contents = f.contents
   546  
   547      f = self.aws.open(file_name)
   548      self.assertEqual(f.mode, 'r')
   549      f.seek(0, os.SEEK_END)
   550      self.assertEqual(f.tell(), file_size)
   551      self.assertEqual(f.read(), b'')
   552      f.seek(0)
   553      self.assertEqual(f.read(), contents)
   554  
   555      # Clean up
   556      self.aws.delete(file_name)
   557  
   558    def test_file_write(self):
   559      file_name = self.TEST_DATA_PATH + 'write_file'
   560      file_size = 8 * 1024 * 1024 + 2000
   561      contents = os.urandom(file_size)
   562      f = self.aws.open(file_name, 'w')
   563      self.assertEqual(f.mode, 'w')
   564      f.write(contents[0:1000])
   565      f.write(contents[1000:1024 * 1024])
   566      f.write(contents[1024 * 1024:])
   567      f.close()
   568      new_f = self.aws.open(file_name, 'r')
   569      new_f_contents = new_f.read()
   570      self.assertEqual(new_f_contents, contents)
   571  
   572      # Clean up
   573      self.aws.delete(file_name)
   574  
   575    def test_file_mime_type(self):
   576      if self.USE_MOCK:
   577        self.skipTest("The boto3_client mock doesn't support mime_types")
   578  
   579      mime_type = 'example/example'
   580      file_name = self.TEST_DATA_PATH + 'write_file'
   581      f = self.aws.open(file_name, 'w', mime_type=mime_type)
   582      f.write(b'a string of binary text')
   583      f.close()
   584  
   585      bucket, key = s3io.parse_s3_path(file_name)
   586      metadata = self.client.get_object_metadata(messages.GetRequest(bucket, key))
   587  
   588      self.assertEqual(mime_type, metadata.mime_type)
   589  
   590      # Clean up
   591      self.aws.delete(file_name)
   592  
   593    def test_file_random_seek(self):
   594      file_name = self.TEST_DATA_PATH + 'write_seek_file'
   595      file_size = 5 * 1024 * 1024 - 100
   596      contents = os.urandom(file_size)
   597      with self.aws.open(file_name, 'w') as wf:
   598        wf.write(contents)
   599  
   600      f = self.aws.open(file_name)
   601      random.seed(0)
   602  
   603      for _ in range(0, 10):
   604        a = random.randint(0, file_size - 1)
   605        b = random.randint(0, file_size - 1)
   606        start, end = min(a, b), max(a, b)
   607        f.seek(start)
   608  
   609        self.assertEqual(f.tell(), start)
   610  
   611        self.assertEqual(f.read(end - start + 1), contents[start:end + 1])
   612        self.assertEqual(f.tell(), end + 1)
   613  
   614      # Clean up
   615      self.aws.delete(file_name)
   616  
   617    def test_file_flush(self):
   618      file_name = self.TEST_DATA_PATH + 'flush_file'
   619      file_size = 5 * 1024 * 1024 + 2000
   620      contents = os.urandom(file_size)
   621      f = self.aws.open(file_name, 'w')
   622      self.assertEqual(f.mode, 'w')
   623      f.write(contents[0:1000])
   624      f.flush()
   625      f.write(contents[1000:1024 * 1024])
   626      f.flush()
   627      f.flush()  # Should be a NOOP.
   628      f.write(contents[1024 * 1024:])
   629      f.close(
   630      )  # This should al`read`y call the equivalent of flush() in its body
   631      new_f = self.aws.open(file_name, 'r')
   632      new_f_contents = new_f.read()
   633      self.assertEqual(new_f_contents, contents)
   634  
   635      # Clean up
   636      self.aws.delete(file_name)
   637  
   638    def test_file_iterator(self):
   639      file_name = self.TEST_DATA_PATH + 'iterate_file'
   640      lines = []
   641      line_count = 10
   642      for _ in range(line_count):
   643        line_length = random.randint(100, 500)
   644        line = os.urandom(line_length).replace(b'\n', b' ') + b'\n'
   645        lines.append(line)
   646  
   647      contents = b''.join(lines)
   648  
   649      with self.aws.open(file_name, 'w') as wf:
   650        wf.write(contents)
   651  
   652      f = self.aws.open(file_name)
   653  
   654      read_lines = 0
   655      for line in f:
   656        read_lines += 1
   657  
   658      self.assertEqual(read_lines, line_count)
   659  
   660      # Clean up
   661      self.aws.delete(file_name)
   662  
   663    def test_file_read_line(self):
   664      file_name = self.TEST_DATA_PATH + 'read_line_file'
   665      lines = []
   666  
   667      # Set a small buffer size to exercise refilling the buffer.
   668      # First line is carefully crafted so the newline falls as the last character
   669      # of the buffer to exercise this code path.
   670      read_buffer_size = 1099
   671      lines.append(b'x' * 1023 + b'\n')
   672  
   673      for _ in range(1, 1000):
   674        line_length = random.randint(100, 500)
   675        line = os.urandom(line_length).replace(b'\n', b' ') + b'\n'
   676        lines.append(line)
   677      contents = b''.join(lines)
   678  
   679      file_size = len(contents)
   680  
   681      with self.aws.open(file_name, 'wb') as wf:
   682        wf.write(contents)
   683  
   684      f = self.aws.open(file_name, 'rb', read_buffer_size=read_buffer_size)
   685  
   686      # Test read of first two lines.
   687      f.seek(0)
   688      self.assertEqual(f.readline(), lines[0])
   689      self.assertEqual(f.tell(), len(lines[0]))
   690      self.assertEqual(f.readline(), lines[1])
   691  
   692      # Test read at line boundary.
   693      f.seek(file_size - len(lines[-1]) - 1)
   694      self.assertEqual(f.readline(), b'\n')
   695  
   696      # Test read at end of file.
   697      f.seek(file_size)
   698      self.assertEqual(f.readline(), b'')
   699  
   700      # Test reads at random positions.
   701      random.seed(0)
   702      for _ in range(0, 10):
   703        start = random.randint(0, file_size - 1)
   704        line_index = 0
   705        # Find line corresponding to start index.
   706        chars_left = start
   707        while True:
   708          next_line_length = len(lines[line_index])
   709          if chars_left - next_line_length < 0:
   710            break
   711          chars_left -= next_line_length
   712          line_index += 1
   713        f.seek(start)
   714        self.assertEqual(f.readline(), lines[line_index][chars_left:])
   715  
   716      # Clean up
   717      self.aws.delete(file_name)
   718  
   719    def test_file_close(self):
   720      file_name = self.TEST_DATA_PATH + 'close_file'
   721      file_size = 5 * 1024 * 1024 + 2000
   722      contents = os.urandom(file_size)
   723      f = self.aws.open(file_name, 'w')
   724      self.assertEqual(f.mode, 'w')
   725      f.write(contents)
   726      f.close()
   727      f.close()  # This should not crash.
   728  
   729      with self.aws.open(file_name, 'r') as f:
   730        read_contents = f.read()
   731  
   732      self.assertEqual(read_contents, contents)
   733  
   734      # Clean up
   735      self.aws.delete(file_name)
   736  
   737    def test_context_manager(self):
   738      # Test writing with a context manager.
   739      file_name = self.TEST_DATA_PATH + 'context_manager_file'
   740      file_size = 1024
   741      contents = os.urandom(file_size)
   742      with self.aws.open(file_name, 'w') as f:
   743        f.write(contents)
   744  
   745      with self.aws.open(file_name, 'r') as f:
   746        self.assertEqual(f.read(), contents)
   747  
   748      # Clean up
   749      self.aws.delete(file_name)
   750  
   751    def test_list_prefix(self):
   752  
   753      objects = [
   754          ('jerry/pigpen/phil', 5),
   755          ('jerry/pigpen/bobby', 3),
   756          ('jerry/billy/bobby', 4),
   757      ]
   758  
   759      for (object_name, size) in objects:
   760        file_name = self.TEST_DATA_PATH + object_name
   761        self._insert_random_file(self.aws.client, file_name, size)
   762  
   763      test_cases = [
   764          (
   765              self.TEST_DATA_PATH + 'j',
   766              [
   767                  ('jerry/pigpen/phil', 5),
   768                  ('jerry/pigpen/bobby', 3),
   769                  ('jerry/billy/bobby', 4),
   770              ]),
   771          (
   772              self.TEST_DATA_PATH + 'jerry/',
   773              [
   774                  ('jerry/pigpen/phil', 5),
   775                  ('jerry/pigpen/bobby', 3),
   776                  ('jerry/billy/bobby', 4),
   777              ]),
   778          (
   779              self.TEST_DATA_PATH + 'jerry/pigpen/phil', [
   780                  ('jerry/pigpen/phil', 5),
   781              ]),
   782      ]
   783  
   784      for file_pattern, expected_object_names in test_cases:
   785        expected_file_names = [(self.TEST_DATA_PATH + object_name, size)
   786                               for (object_name, size) in expected_object_names]
   787        self.assertEqual(
   788            set(self.aws.list_prefix(file_pattern).items()),
   789            set(expected_file_names))
   790  
   791      # Clean up
   792      for (object_name, size) in objects:
   793        self.aws.delete(self.TEST_DATA_PATH + object_name)
   794  
   795    def test_midsize_file(self):
   796      file_name = self.TEST_DATA_PATH + 'midsized'
   797      file_size = 6 * 1024 * 1024
   798      self._insert_random_file(self.aws.client, file_name, file_size)
   799      with self.aws.open(file_name, 'r') as f:
   800        self.assertEqual(len(f.read()), file_size)
   801      self.aws.delete(file_name)
   802  
   803    def test_zerosize_file(self):
   804      file_name = self.TEST_DATA_PATH + 'zerosized'
   805      file_size = 0
   806      self._insert_random_file(self.aws.client, file_name, file_size)
   807      with self.aws.open(file_name, 'r') as f:
   808        self.assertEqual(len(f.read()), file_size)
   809      self.aws.delete(file_name)
   810  
   811  
   812  if __name__ == '__main__':
   813    logging.getLogger().setLevel(logging.INFO)
   814    unittest.main()