github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/aws/s3filesystem_test.py (about)

     1  # -*- coding: utf-8 -*-
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  """Unit tests for the S3 File System"""
    20  
    21  # pytype: skip-file
    22  
    23  import logging
    24  import unittest
    25  
    26  import mock
    27  
    28  from apache_beam.io.aws.clients.s3 import messages
    29  from apache_beam.io.filesystem import BeamIOError
    30  from apache_beam.io.filesystem import FileMetadata
    31  from apache_beam.options.pipeline_options import PipelineOptions
    32  
    33  # Protect against environments where boto3 library is not available.
    34  # pylint: disable=wrong-import-order, wrong-import-position
    35  try:
    36    from apache_beam.io.aws import s3filesystem
    37  except ImportError:
    38    s3filesystem = None  # type: ignore[assignment]
    39  # pylint: enable=wrong-import-order, wrong-import-position
    40  
    41  
    42  @unittest.skipIf(s3filesystem is None, 'AWS dependencies are not installed')
    43  class S3FileSystemTest(unittest.TestCase):
    44    def setUp(self):
    45      pipeline_options = PipelineOptions()
    46      self.fs = s3filesystem.S3FileSystem(pipeline_options=pipeline_options)
    47  
    48    def test_scheme(self):
    49      self.assertEqual(self.fs.scheme(), 's3')
    50      self.assertEqual(s3filesystem.S3FileSystem.scheme(), 's3')
    51  
    52    def test_join(self):
    53      self.assertEqual(
    54          's3://bucket/path/to/file',
    55          self.fs.join('s3://bucket/path', 'to', 'file'))
    56      self.assertEqual(
    57          's3://bucket/path/to/file', self.fs.join('s3://bucket/path', 'to/file'))
    58      self.assertEqual(
    59          's3://bucket/path/to/file',
    60          self.fs.join('s3://bucket/path', '/to/file'))
    61      self.assertEqual(
    62          's3://bucket/path/to/file',
    63          self.fs.join('s3://bucket/path/', 'to', 'file'))
    64      self.assertEqual(
    65          's3://bucket/path/to/file',
    66          self.fs.join('s3://bucket/path/', 'to/file'))
    67      self.assertEqual(
    68          's3://bucket/path/to/file',
    69          self.fs.join('s3://bucket/path/', '/to/file'))
    70      with self.assertRaises(ValueError):
    71        self.fs.join('/bucket/path/', '/to/file')
    72  
    73    def test_split(self):
    74      self.assertEqual(('s3://foo/bar', 'baz'), self.fs.split('s3://foo/bar/baz'))
    75      self.assertEqual(('s3://foo', ''), self.fs.split('s3://foo/'))
    76      self.assertEqual(('s3://foo', ''), self.fs.split('s3://foo'))
    77  
    78      with self.assertRaises(ValueError):
    79        self.fs.split('/no/s3/prefix')
    80  
    81    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
    82    def test_match_single(self, unused_mock_arg):
    83      # Prepare mocks.
    84      s3io_mock = mock.MagicMock()
    85      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
    86      s3io_mock._status.return_value = {'size': 1, 'last_updated': 9999999.0}
    87      expected_results = [FileMetadata('s3://bucket/file1', 1, 9999999.0)]
    88      match_result = self.fs.match(['s3://bucket/file1'])[0]
    89  
    90      self.assertEqual(match_result.metadata_list, expected_results)
    91      s3io_mock._status.assert_called_once_with('s3://bucket/file1')
    92  
    93    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
    94    def test_match_multiples(self, unused_mock_arg):
    95      # Prepare mocks.
    96      s3io_mock = mock.MagicMock()
    97      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
    98      s3io_mock.list_files.return_value = iter([
    99          ('s3://bucket/file1', (1, 9999999.0)),
   100          ('s3://bucket/file2', (2, 8888888.0))
   101      ])
   102      expected_results = set([
   103          FileMetadata('s3://bucket/file1', 1, 9999999.0),
   104          FileMetadata('s3://bucket/file2', 2, 8888888.0)
   105      ])
   106      match_result = self.fs.match(['s3://bucket/'])[0]
   107  
   108      self.assertEqual(set(match_result.metadata_list), expected_results)
   109      s3io_mock.list_files.assert_called_once_with(
   110          's3://bucket/', with_metadata=True)
   111  
   112    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   113    def test_match_multiples_limit(self, unused_mock_arg):
   114      # Prepare mocks.
   115      s3io_mock = mock.MagicMock()
   116      limit = 1
   117      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   118      s3io_mock.list_files.return_value = iter([
   119          ('s3://bucket/file1', (1, 99999.0))
   120      ])
   121      expected_results = set([FileMetadata('s3://bucket/file1', 1, 99999.0)])
   122      match_result = self.fs.match(['s3://bucket/'], [limit])[0]
   123      self.assertEqual(set(match_result.metadata_list), expected_results)
   124      self.assertEqual(len(match_result.metadata_list), limit)
   125      s3io_mock.list_files.assert_called_once_with(
   126          's3://bucket/', with_metadata=True)
   127  
   128    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   129    def test_match_multiples_error(self, unused_mock_arg):
   130      # Prepare mocks.
   131      s3io_mock = mock.MagicMock()
   132      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   133      exception = IOError('Failed')
   134      s3io_mock.list_files.side_effect = exception
   135  
   136      with self.assertRaises(BeamIOError) as error:
   137        self.fs.match(['s3://bucket/'])
   138  
   139      self.assertIn('Match operation failed', str(error.exception))
   140      s3io_mock.list_files.assert_called_once_with(
   141          's3://bucket/', with_metadata=True)
   142  
   143    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   144    def test_match_multiple_patterns(self, unused_mock_arg):
   145      # Prepare mocks.
   146      s3io_mock = mock.MagicMock()
   147      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   148      s3io_mock.list_files.side_effect = [
   149          iter([('s3://bucket/file1', (1, 99999.0))]),
   150          iter([('s3://bucket/file2', (2, 88888.0))]),
   151      ]
   152      expected_results = [[FileMetadata('s3://bucket/file1', 1, 99999.0)],
   153                          [FileMetadata('s3://bucket/file2', 2, 88888.0)]]
   154      result = self.fs.match(['s3://bucket/file1*', 's3://bucket/file2*'])
   155      self.assertEqual([mr.metadata_list for mr in result], expected_results)
   156  
   157    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   158    def test_create(self, unused_mock_arg):
   159      # Prepare mocks.
   160      s3io_mock = mock.MagicMock()
   161      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   162      # Issue file copy
   163      _ = self.fs.create('s3://bucket/from1', 'application/octet-stream')
   164  
   165      s3io_mock.open.assert_called_once_with(
   166          's3://bucket/from1', 'wb', mime_type='application/octet-stream')
   167  
   168    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   169    def test_open(self, unused_mock_arg):
   170      # Prepare mocks.
   171      s3io_mock = mock.MagicMock()
   172      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   173      # Issue file copy
   174      _ = self.fs.open('s3://bucket/from1', 'application/octet-stream')
   175  
   176      s3io_mock.open.assert_called_once_with(
   177          's3://bucket/from1', 'rb', mime_type='application/octet-stream')
   178  
   179    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   180    def test_copy_file(self, unused_mock_arg):
   181      # Prepare mocks.
   182      s3io_mock = mock.MagicMock()
   183      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   184  
   185      sources = ['s3://bucket/from1', 's3://bucket/from2']
   186      destinations = ['s3://bucket/to1', 's3://bucket/to2']
   187  
   188      # Issue file copy
   189      self.fs.copy(sources, destinations)
   190  
   191      src_dest_pairs = list(zip(sources, destinations))
   192      s3io_mock.copy_paths.assert_called_once_with(src_dest_pairs)
   193  
   194    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   195    def test_copy_file_error(self, unused_mock_arg):
   196      # Prepare mocks.
   197      s3io_mock = mock.MagicMock()
   198      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   199  
   200      sources = ['s3://bucket/from1', 's3://bucket/from2', 's3://bucket/from3']
   201      destinations = ['s3://bucket/to1', 's3://bucket/to2']
   202  
   203      # Issue file copy
   204      with self.assertRaises(BeamIOError):
   205        self.fs.copy(sources, destinations)
   206  
   207    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   208    def test_delete(self, unused_mock_arg):
   209      # Prepare mocks.
   210      s3io_mock = mock.MagicMock()
   211      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   212      s3io_mock.size.return_value = 0
   213      files = [
   214          's3://bucket/from1',
   215          's3://bucket/from2',
   216          's3://bucket/from3',
   217      ]
   218  
   219      # Issue batch delete.
   220      self.fs.delete(files)
   221      s3io_mock.delete_paths.assert_called_once_with(files)
   222  
   223    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   224    def test_delete_error(self, unused_mock_arg):
   225      # Prepare mocks.
   226      s3io_mock = mock.MagicMock()
   227      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   228  
   229      problematic_directory = 's3://nonexistent-bucket/tree/'
   230      exception = messages.S3ClientError('Not found', 404)
   231  
   232      s3io_mock.delete_paths.return_value = {
   233          problematic_directory: exception,
   234          's3://bucket/object1': None,
   235          's3://bucket/object2': None,
   236      }
   237  
   238      s3io_mock.size.return_value = 0
   239      files = [
   240          problematic_directory,
   241          's3://bucket/object1',
   242          's3://bucket/object2',
   243      ]
   244      expected_results = {problematic_directory: exception}
   245  
   246      # Issue batch delete.
   247      with self.assertRaises(BeamIOError) as error:
   248        self.fs.delete(files)
   249      self.assertIn('Delete operation failed', str(error.exception))
   250      self.assertEqual(error.exception.exception_details, expected_results)
   251      s3io_mock.delete_paths.assert_called()
   252  
   253    @mock.patch('apache_beam.io.aws.s3filesystem.s3io')
   254    def test_rename(self, unused_mock_arg):
   255      # Prepare mocks.
   256      s3io_mock = mock.MagicMock()
   257      s3filesystem.s3io.S3IO = lambda options: s3io_mock  # type: ignore[misc]
   258  
   259      sources = ['s3://bucket/from1', 's3://bucket/from2']
   260      destinations = ['s3://bucket/to1', 's3://bucket/to2']
   261  
   262      # Issue file copy
   263      self.fs.rename(sources, destinations)
   264  
   265      src_dest_pairs = list(zip(sources, destinations))
   266      s3io_mock.rename_files.assert_called_once_with(src_dest_pairs)
   267  
   268  
   269  if __name__ == '__main__':
   270    logging.getLogger().setLevel(logging.INFO)
   271    unittest.main()