github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/aws/s3filesystem_test.py (about) 1 # -*- coding: utf-8 -*- 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 """Unit tests for the S3 File System""" 20 21 # pytype: skip-file 22 23 import logging 24 import unittest 25 26 import mock 27 28 from apache_beam.io.aws.clients.s3 import messages 29 from apache_beam.io.filesystem import BeamIOError 30 from apache_beam.io.filesystem import FileMetadata 31 from apache_beam.options.pipeline_options import PipelineOptions 32 33 # Protect against environments where boto3 library is not available. 34 # pylint: disable=wrong-import-order, wrong-import-position 35 try: 36 from apache_beam.io.aws import s3filesystem 37 except ImportError: 38 s3filesystem = None # type: ignore[assignment] 39 # pylint: enable=wrong-import-order, wrong-import-position 40 41 42 @unittest.skipIf(s3filesystem is None, 'AWS dependencies are not installed') 43 class S3FileSystemTest(unittest.TestCase): 44 def setUp(self): 45 pipeline_options = PipelineOptions() 46 self.fs = s3filesystem.S3FileSystem(pipeline_options=pipeline_options) 47 48 def test_scheme(self): 49 self.assertEqual(self.fs.scheme(), 's3') 50 self.assertEqual(s3filesystem.S3FileSystem.scheme(), 's3') 51 52 def test_join(self): 53 self.assertEqual( 54 's3://bucket/path/to/file', 55 self.fs.join('s3://bucket/path', 'to', 'file')) 56 self.assertEqual( 57 's3://bucket/path/to/file', self.fs.join('s3://bucket/path', 'to/file')) 58 self.assertEqual( 59 's3://bucket/path/to/file', 60 self.fs.join('s3://bucket/path', '/to/file')) 61 self.assertEqual( 62 's3://bucket/path/to/file', 63 self.fs.join('s3://bucket/path/', 'to', 'file')) 64 self.assertEqual( 65 's3://bucket/path/to/file', 66 self.fs.join('s3://bucket/path/', 'to/file')) 67 self.assertEqual( 68 's3://bucket/path/to/file', 69 self.fs.join('s3://bucket/path/', '/to/file')) 70 with self.assertRaises(ValueError): 71 self.fs.join('/bucket/path/', '/to/file') 72 73 def test_split(self): 74 self.assertEqual(('s3://foo/bar', 'baz'), self.fs.split('s3://foo/bar/baz')) 75 self.assertEqual(('s3://foo', ''), self.fs.split('s3://foo/')) 76 self.assertEqual(('s3://foo', ''), self.fs.split('s3://foo')) 77 78 with self.assertRaises(ValueError): 79 self.fs.split('/no/s3/prefix') 80 81 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 82 def test_match_single(self, unused_mock_arg): 83 # Prepare mocks. 84 s3io_mock = mock.MagicMock() 85 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 86 s3io_mock._status.return_value = {'size': 1, 'last_updated': 9999999.0} 87 expected_results = [FileMetadata('s3://bucket/file1', 1, 9999999.0)] 88 match_result = self.fs.match(['s3://bucket/file1'])[0] 89 90 self.assertEqual(match_result.metadata_list, expected_results) 91 s3io_mock._status.assert_called_once_with('s3://bucket/file1') 92 93 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 94 def test_match_multiples(self, unused_mock_arg): 95 # Prepare mocks. 96 s3io_mock = mock.MagicMock() 97 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 98 s3io_mock.list_files.return_value = iter([ 99 ('s3://bucket/file1', (1, 9999999.0)), 100 ('s3://bucket/file2', (2, 8888888.0)) 101 ]) 102 expected_results = set([ 103 FileMetadata('s3://bucket/file1', 1, 9999999.0), 104 FileMetadata('s3://bucket/file2', 2, 8888888.0) 105 ]) 106 match_result = self.fs.match(['s3://bucket/'])[0] 107 108 self.assertEqual(set(match_result.metadata_list), expected_results) 109 s3io_mock.list_files.assert_called_once_with( 110 's3://bucket/', with_metadata=True) 111 112 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 113 def test_match_multiples_limit(self, unused_mock_arg): 114 # Prepare mocks. 115 s3io_mock = mock.MagicMock() 116 limit = 1 117 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 118 s3io_mock.list_files.return_value = iter([ 119 ('s3://bucket/file1', (1, 99999.0)) 120 ]) 121 expected_results = set([FileMetadata('s3://bucket/file1', 1, 99999.0)]) 122 match_result = self.fs.match(['s3://bucket/'], [limit])[0] 123 self.assertEqual(set(match_result.metadata_list), expected_results) 124 self.assertEqual(len(match_result.metadata_list), limit) 125 s3io_mock.list_files.assert_called_once_with( 126 's3://bucket/', with_metadata=True) 127 128 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 129 def test_match_multiples_error(self, unused_mock_arg): 130 # Prepare mocks. 131 s3io_mock = mock.MagicMock() 132 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 133 exception = IOError('Failed') 134 s3io_mock.list_files.side_effect = exception 135 136 with self.assertRaises(BeamIOError) as error: 137 self.fs.match(['s3://bucket/']) 138 139 self.assertIn('Match operation failed', str(error.exception)) 140 s3io_mock.list_files.assert_called_once_with( 141 's3://bucket/', with_metadata=True) 142 143 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 144 def test_match_multiple_patterns(self, unused_mock_arg): 145 # Prepare mocks. 146 s3io_mock = mock.MagicMock() 147 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 148 s3io_mock.list_files.side_effect = [ 149 iter([('s3://bucket/file1', (1, 99999.0))]), 150 iter([('s3://bucket/file2', (2, 88888.0))]), 151 ] 152 expected_results = [[FileMetadata('s3://bucket/file1', 1, 99999.0)], 153 [FileMetadata('s3://bucket/file2', 2, 88888.0)]] 154 result = self.fs.match(['s3://bucket/file1*', 's3://bucket/file2*']) 155 self.assertEqual([mr.metadata_list for mr in result], expected_results) 156 157 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 158 def test_create(self, unused_mock_arg): 159 # Prepare mocks. 160 s3io_mock = mock.MagicMock() 161 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 162 # Issue file copy 163 _ = self.fs.create('s3://bucket/from1', 'application/octet-stream') 164 165 s3io_mock.open.assert_called_once_with( 166 's3://bucket/from1', 'wb', mime_type='application/octet-stream') 167 168 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 169 def test_open(self, unused_mock_arg): 170 # Prepare mocks. 171 s3io_mock = mock.MagicMock() 172 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 173 # Issue file copy 174 _ = self.fs.open('s3://bucket/from1', 'application/octet-stream') 175 176 s3io_mock.open.assert_called_once_with( 177 's3://bucket/from1', 'rb', mime_type='application/octet-stream') 178 179 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 180 def test_copy_file(self, unused_mock_arg): 181 # Prepare mocks. 182 s3io_mock = mock.MagicMock() 183 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 184 185 sources = ['s3://bucket/from1', 's3://bucket/from2'] 186 destinations = ['s3://bucket/to1', 's3://bucket/to2'] 187 188 # Issue file copy 189 self.fs.copy(sources, destinations) 190 191 src_dest_pairs = list(zip(sources, destinations)) 192 s3io_mock.copy_paths.assert_called_once_with(src_dest_pairs) 193 194 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 195 def test_copy_file_error(self, unused_mock_arg): 196 # Prepare mocks. 197 s3io_mock = mock.MagicMock() 198 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 199 200 sources = ['s3://bucket/from1', 's3://bucket/from2', 's3://bucket/from3'] 201 destinations = ['s3://bucket/to1', 's3://bucket/to2'] 202 203 # Issue file copy 204 with self.assertRaises(BeamIOError): 205 self.fs.copy(sources, destinations) 206 207 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 208 def test_delete(self, unused_mock_arg): 209 # Prepare mocks. 210 s3io_mock = mock.MagicMock() 211 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 212 s3io_mock.size.return_value = 0 213 files = [ 214 's3://bucket/from1', 215 's3://bucket/from2', 216 's3://bucket/from3', 217 ] 218 219 # Issue batch delete. 220 self.fs.delete(files) 221 s3io_mock.delete_paths.assert_called_once_with(files) 222 223 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 224 def test_delete_error(self, unused_mock_arg): 225 # Prepare mocks. 226 s3io_mock = mock.MagicMock() 227 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 228 229 problematic_directory = 's3://nonexistent-bucket/tree/' 230 exception = messages.S3ClientError('Not found', 404) 231 232 s3io_mock.delete_paths.return_value = { 233 problematic_directory: exception, 234 's3://bucket/object1': None, 235 's3://bucket/object2': None, 236 } 237 238 s3io_mock.size.return_value = 0 239 files = [ 240 problematic_directory, 241 's3://bucket/object1', 242 's3://bucket/object2', 243 ] 244 expected_results = {problematic_directory: exception} 245 246 # Issue batch delete. 247 with self.assertRaises(BeamIOError) as error: 248 self.fs.delete(files) 249 self.assertIn('Delete operation failed', str(error.exception)) 250 self.assertEqual(error.exception.exception_details, expected_results) 251 s3io_mock.delete_paths.assert_called() 252 253 @mock.patch('apache_beam.io.aws.s3filesystem.s3io') 254 def test_rename(self, unused_mock_arg): 255 # Prepare mocks. 256 s3io_mock = mock.MagicMock() 257 s3filesystem.s3io.S3IO = lambda options: s3io_mock # type: ignore[misc] 258 259 sources = ['s3://bucket/from1', 's3://bucket/from2'] 260 destinations = ['s3://bucket/to1', 's3://bucket/to2'] 261 262 # Issue file copy 263 self.fs.rename(sources, destinations) 264 265 src_dest_pairs = list(zip(sources, destinations)) 266 s3io_mock.rename_files.assert_called_once_with(src_dest_pairs) 267 268 269 if __name__ == '__main__': 270 logging.getLogger().setLevel(logging.INFO) 271 unittest.main()