github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/gcsfilesystem_test.py (about) 1 # -*- coding: utf-8 -*- 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 """Unit tests for GCS File System.""" 20 21 # pytype: skip-file 22 23 import logging 24 import unittest 25 26 import mock 27 28 from apache_beam.io.filesystem import BeamIOError 29 from apache_beam.io.filesystem import FileMetadata 30 from apache_beam.options.pipeline_options import PipelineOptions 31 32 # Protect against environments where apitools library is not available. 33 # pylint: disable=wrong-import-order, wrong-import-position 34 try: 35 from apache_beam.io.gcp import gcsfilesystem 36 except ImportError: 37 gcsfilesystem = None # type: ignore 38 # pylint: enable=wrong-import-order, wrong-import-position 39 40 41 @unittest.skipIf(gcsfilesystem is None, 'GCP dependencies are not installed') 42 class GCSFileSystemTest(unittest.TestCase): 43 def setUp(self): 44 pipeline_options = PipelineOptions() 45 self.fs = gcsfilesystem.GCSFileSystem(pipeline_options=pipeline_options) 46 47 def test_scheme(self): 48 self.assertEqual(self.fs.scheme(), 'gs') 49 self.assertEqual(gcsfilesystem.GCSFileSystem.scheme(), 'gs') 50 51 def test_join(self): 52 self.assertEqual( 53 'gs://bucket/path/to/file', 54 self.fs.join('gs://bucket/path', 'to', 'file')) 55 self.assertEqual( 56 'gs://bucket/path/to/file', self.fs.join('gs://bucket/path', 'to/file')) 57 self.assertEqual( 58 'gs://bucket/path/to/file', 59 self.fs.join('gs://bucket/path', '/to/file')) 60 self.assertEqual( 61 'gs://bucket/path/to/file', 62 self.fs.join('gs://bucket/path/', 'to', 'file')) 63 self.assertEqual( 64 'gs://bucket/path/to/file', 65 self.fs.join('gs://bucket/path/', 'to/file')) 66 self.assertEqual( 67 'gs://bucket/path/to/file', 68 self.fs.join('gs://bucket/path/', '/to/file')) 69 with self.assertRaises(ValueError): 70 self.fs.join('/bucket/path/', '/to/file') 71 72 def test_split(self): 73 self.assertEqual(('gs://foo/bar', 'baz'), self.fs.split('gs://foo/bar/baz')) 74 self.assertEqual(('gs://foo', ''), self.fs.split('gs://foo/')) 75 self.assertEqual(('gs://foo', ''), self.fs.split('gs://foo')) 76 77 with self.assertRaises(ValueError): 78 self.fs.split('/no/gcs/prefix') 79 80 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 81 def test_match_multiples(self, mock_gcsio): 82 # Prepare mocks. 83 gcsio_mock = mock.MagicMock() 84 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 85 gcsio_mock.list_files.return_value = iter([ 86 ('gs://bucket/file1', (1, 99999.0)), 87 ('gs://bucket/file2', (2, 88888.0)) 88 ]) 89 expected_results = set([ 90 FileMetadata('gs://bucket/file1', 1, 99999.0), 91 FileMetadata('gs://bucket/file2', 2, 88888.0) 92 ]) 93 match_result = self.fs.match(['gs://bucket/'])[0] 94 self.assertEqual(set(match_result.metadata_list), expected_results) 95 gcsio_mock.list_files.assert_called_once_with( 96 'gs://bucket/', with_metadata=True) 97 98 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 99 def test_match_multiples_limit(self, mock_gcsio): 100 # Prepare mocks. 101 gcsio_mock = mock.MagicMock() 102 limit = 1 103 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 104 gcsio_mock.list_files.return_value = iter([ 105 ('gs://bucket/file1', (1, 99999.0)) 106 ]) 107 expected_results = set([FileMetadata('gs://bucket/file1', 1, 99999.0)]) 108 match_result = self.fs.match(['gs://bucket/'], [limit])[0] 109 self.assertEqual(set(match_result.metadata_list), expected_results) 110 self.assertEqual(len(match_result.metadata_list), limit) 111 gcsio_mock.list_files.assert_called_once_with( 112 'gs://bucket/', with_metadata=True) 113 114 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 115 def test_match_multiples_error(self, mock_gcsio): 116 # Prepare mocks. 117 gcsio_mock = mock.MagicMock() 118 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 119 exception = IOError('Failed') 120 gcsio_mock.list_files.side_effect = exception 121 122 with self.assertRaisesRegex(BeamIOError, 123 r'^Match operation failed') as error: 124 self.fs.match(['gs://bucket/']) 125 self.assertRegex( 126 str(error.exception.exception_details), r'gs://bucket/.*%s' % exception) 127 gcsio_mock.list_files.assert_called_once_with( 128 'gs://bucket/', with_metadata=True) 129 130 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 131 def test_match_multiple_patterns(self, mock_gcsio): 132 # Prepare mocks. 133 gcsio_mock = mock.MagicMock() 134 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 135 gcsio_mock.list_files.side_effect = [ 136 iter([('gs://bucket/file1', (1, 99999.0))]), 137 iter([('gs://bucket/file2', (2, 88888.0))]), 138 ] 139 expected_results = [[FileMetadata('gs://bucket/file1', 1, 99999.0)], 140 [FileMetadata('gs://bucket/file2', 2, 88888.0)]] 141 result = self.fs.match(['gs://bucket/file1*', 'gs://bucket/file2*']) 142 self.assertEqual([mr.metadata_list for mr in result], expected_results) 143 144 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 145 def test_create(self, mock_gcsio): 146 # Prepare mocks. 147 gcsio_mock = mock.MagicMock() 148 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 149 # Issue file copy 150 _ = self.fs.create('gs://bucket/from1', 'application/octet-stream') 151 152 gcsio_mock.open.assert_called_once_with( 153 'gs://bucket/from1', 'wb', mime_type='application/octet-stream') 154 155 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 156 def test_open(self, mock_gcsio): 157 # Prepare mocks. 158 gcsio_mock = mock.MagicMock() 159 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 160 # Issue file copy 161 _ = self.fs.open('gs://bucket/from1', 'application/octet-stream') 162 163 gcsio_mock.open.assert_called_once_with( 164 'gs://bucket/from1', 'rb', mime_type='application/octet-stream') 165 166 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 167 def test_copy_file(self, mock_gcsio): 168 # Prepare mocks. 169 gcsio_mock = mock.MagicMock() 170 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 171 sources = ['gs://bucket/from1'] 172 destinations = ['gs://bucket/to1'] 173 174 # Issue file copy 175 self.fs.copy(sources, destinations) 176 177 gcsio_mock.copy.assert_called_once_with( 178 'gs://bucket/from1', 'gs://bucket/to1') 179 180 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 181 def test_copy_file_error(self, mock_gcsio): 182 # Prepare mocks. 183 gcsio_mock = mock.MagicMock() 184 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 185 sources = ['gs://bucket/from1'] 186 destinations = ['gs://bucket/to1'] 187 188 exception = IOError('Failed') 189 gcsio_mock.copy.side_effect = exception 190 191 # Issue batch rename. 192 expected_results = { 193 (s, d): exception 194 for s, d in zip(sources, destinations) 195 } 196 197 # Issue batch copy. 198 with self.assertRaisesRegex(BeamIOError, 199 r'^Copy operation failed') as error: 200 self.fs.copy(sources, destinations) 201 self.assertEqual(error.exception.exception_details, expected_results) 202 203 gcsio_mock.copy.assert_called_once_with( 204 'gs://bucket/from1', 'gs://bucket/to1') 205 206 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 207 def test_copy_tree(self, mock_gcsio): 208 # Prepare mocks. 209 gcsio_mock = mock.MagicMock() 210 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 211 sources = ['gs://bucket1/'] 212 destinations = ['gs://bucket2/'] 213 214 # Issue directory copy 215 self.fs.copy(sources, destinations) 216 217 gcsio_mock.copytree.assert_called_once_with( 218 'gs://bucket1/', 'gs://bucket2/') 219 220 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 221 def test_rename(self, mock_gcsio): 222 # Prepare mocks. 223 gcsio_mock = mock.MagicMock() 224 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 225 sources = [ 226 'gs://bucket/from1', 227 'gs://bucket/from2', 228 'gs://bucket/from3', 229 ] 230 destinations = [ 231 'gs://bucket/to1', 232 'gs://bucket/to2', 233 'gs://bucket/to3', 234 ] 235 gcsio_mock.copy_batch.side_effect = [[ 236 ('gs://bucket/from1', 'gs://bucket/to1', None), 237 ('gs://bucket/from2', 'gs://bucket/to2', None), 238 ('gs://bucket/from3', 'gs://bucket/to3', None), 239 ]] 240 gcsio_mock.delete_batch.side_effect = [[ 241 ('gs://bucket/from1', None), 242 ('gs://bucket/from2', None), 243 ('gs://bucket/from3', None), 244 ]] 245 246 # Issue batch rename. 247 self.fs.rename(sources, destinations) 248 249 gcsio_mock.copy_batch.assert_called_once_with([ 250 ('gs://bucket/from1', 'gs://bucket/to1'), 251 ('gs://bucket/from2', 'gs://bucket/to2'), 252 ('gs://bucket/from3', 'gs://bucket/to3'), 253 ]) 254 gcsio_mock.delete_batch.assert_called_once_with([ 255 'gs://bucket/from1', 256 'gs://bucket/from2', 257 'gs://bucket/from3', 258 ]) 259 260 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 261 def test_rename_error(self, mock_gcsio): 262 # Prepare mocks. 263 gcsio_mock = mock.MagicMock() 264 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 265 sources = [ 266 'gs://bucket/from1', 267 'gs://bucket/from2', 268 'gs://bucket/from3', 269 ] 270 destinations = [ 271 'gs://bucket/to1', 272 'gs://bucket/to2', 273 'gs://bucket/to3', 274 ] 275 exception = IOError('Failed') 276 gcsio_mock.delete_batch.side_effect = [[(f, exception) for f in sources]] 277 gcsio_mock.copy_batch.side_effect = [[ 278 ('gs://bucket/from1', 'gs://bucket/to1', None), 279 ('gs://bucket/from2', 'gs://bucket/to2', None), 280 ('gs://bucket/from3', 'gs://bucket/to3', None), 281 ]] 282 283 # Issue batch rename. 284 expected_results = { 285 (s, d): exception 286 for s, d in zip(sources, destinations) 287 } 288 289 # Issue batch rename. 290 with self.assertRaisesRegex(BeamIOError, 291 r'^Rename operation failed') as error: 292 self.fs.rename(sources, destinations) 293 self.assertEqual(error.exception.exception_details, expected_results) 294 295 gcsio_mock.copy_batch.assert_called_once_with([ 296 ('gs://bucket/from1', 'gs://bucket/to1'), 297 ('gs://bucket/from2', 'gs://bucket/to2'), 298 ('gs://bucket/from3', 'gs://bucket/to3'), 299 ]) 300 gcsio_mock.delete_batch.assert_called_once_with([ 301 'gs://bucket/from1', 302 'gs://bucket/from2', 303 'gs://bucket/from3', 304 ]) 305 306 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 307 def test_delete(self, mock_gcsio): 308 # Prepare mocks. 309 gcsio_mock = mock.MagicMock() 310 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 311 gcsio_mock._status.return_value = {'size': 0, 'last_updated': 99999.0} 312 files = [ 313 'gs://bucket/from1', 314 'gs://bucket/from2', 315 'gs://bucket/from3', 316 ] 317 318 # Issue batch delete. 319 self.fs.delete(files) 320 gcsio_mock.delete_batch.assert_called() 321 322 @mock.patch('apache_beam.io.gcp.gcsfilesystem.gcsio') 323 def test_delete_error(self, mock_gcsio): 324 # Prepare mocks. 325 gcsio_mock = mock.MagicMock() 326 gcsfilesystem.gcsio.GcsIO = lambda pipeline_options=None: gcsio_mock 327 exception = IOError('Failed') 328 gcsio_mock.delete_batch.side_effect = exception 329 gcsio_mock._status.return_value = {'size': 0, 'last_updated': 99999.0} 330 files = [ 331 'gs://bucket/from1', 332 'gs://bucket/from2', 333 'gs://bucket/from3', 334 ] 335 expected_results = {f: exception for f in files} 336 337 # Issue batch delete. 338 with self.assertRaisesRegex(BeamIOError, 339 r'^Delete operation failed') as error: 340 self.fs.delete(files) 341 self.assertEqual(error.exception.exception_details, expected_results) 342 gcsio_mock.delete_batch.assert_called() 343 344 345 if __name__ == '__main__': 346 logging.getLogger().setLevel(logging.INFO) 347 unittest.main()