github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/localfilesystem_test.py (about) 1 # -*- coding: utf-8 -*- 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 """Unit tests for LocalFileSystem.""" 20 21 # pytype: skip-file 22 23 import filecmp 24 import logging 25 import os 26 import shutil 27 import tempfile 28 import unittest 29 30 import mock 31 from parameterized import param 32 from parameterized import parameterized 33 34 from apache_beam.io import localfilesystem 35 from apache_beam.io.filesystem import BeamIOError 36 from apache_beam.options.pipeline_options import PipelineOptions 37 38 39 def _gen_fake_join(separator): 40 """Returns a callable that joins paths with the given separator.""" 41 def _join(first_path, *paths): 42 return separator.join((first_path.rstrip(separator), ) + paths) 43 44 return _join 45 46 47 def _gen_fake_split(separator): 48 """Returns a callable that splits a with the given separator.""" 49 def _split(path): 50 sep_index = path.rfind(separator) 51 if sep_index >= 0: 52 return (path[:sep_index], path[sep_index + 1:]) 53 else: 54 return (path, '') 55 56 return _split 57 58 59 class LocalFileSystemTest(unittest.TestCase): 60 def setUp(self): 61 self.tmpdir = tempfile.mkdtemp() 62 pipeline_options = PipelineOptions() 63 self.fs = localfilesystem.LocalFileSystem(pipeline_options) 64 65 def tearDown(self): 66 shutil.rmtree(self.tmpdir) 67 68 def test_scheme(self): 69 self.assertIsNone(self.fs.scheme()) 70 self.assertIsNone(localfilesystem.LocalFileSystem.scheme()) 71 72 @mock.patch('apache_beam.io.localfilesystem.os') 73 def test_unix_path_join(self, *unused_mocks): 74 # Test joining of Unix paths. 75 localfilesystem.os.path.join.side_effect = _gen_fake_join('/') 76 self.assertEqual( 77 '/tmp/path/to/file', self.fs.join('/tmp/path', 'to', 'file')) 78 self.assertEqual('/tmp/path/to/file', self.fs.join('/tmp/path', 'to/file')) 79 80 @mock.patch('apache_beam.io.localfilesystem.os') 81 def test_windows_path_join(self, *unused_mocks): 82 # Test joining of Windows paths. 83 localfilesystem.os.path.join.side_effect = _gen_fake_join('\\') 84 self.assertEqual( 85 r'C:\tmp\path\to\file', self.fs.join(r'C:\tmp\path', 'to', 'file')) 86 self.assertEqual( 87 r'C:\tmp\path\to\file', self.fs.join(r'C:\tmp\path', r'to\file')) 88 89 @mock.patch('apache_beam.io.localfilesystem.os') 90 def test_unix_path_split(self, os_mock): 91 os_mock.path.abspath.side_effect = lambda a: a 92 os_mock.path.split.side_effect = _gen_fake_split('/') 93 self.assertEqual(('/tmp/path/to', 'file'), 94 self.fs.split('/tmp/path/to/file')) 95 # Actual os.path.split will split following to '/' and 'tmp' when run in 96 # Unix. 97 self.assertEqual(('', 'tmp'), self.fs.split('/tmp')) 98 99 @mock.patch('apache_beam.io.localfilesystem.os') 100 def test_windows_path_split(self, os_mock): 101 os_mock.path.abspath = lambda a: a 102 os_mock.path.split.side_effect = _gen_fake_split('\\') 103 self.assertEqual((r'C:\tmp\path\to', 'file'), 104 self.fs.split(r'C:\tmp\path\to\file')) 105 # Actual os.path.split will split following to 'C:\' and 'tmp' when run in 106 # Windows. 107 self.assertEqual((r'C:', 'tmp'), self.fs.split(r'C:\tmp')) 108 109 def test_mkdirs(self): 110 path = os.path.join(self.tmpdir, 't1/t2') 111 self.fs.mkdirs(path) 112 self.assertTrue(os.path.isdir(path)) 113 114 def test_mkdirs_failed(self): 115 path = os.path.join(self.tmpdir, 't1/t2') 116 self.fs.mkdirs(path) 117 118 # Check IOError if existing directory is created 119 with self.assertRaises(IOError): 120 self.fs.mkdirs(path) 121 122 with self.assertRaises(IOError): 123 self.fs.mkdirs(os.path.join(self.tmpdir, 't1')) 124 125 def test_match_file(self): 126 path = os.path.join(self.tmpdir, 'f1') 127 open(path, 'a').close() 128 129 # Match files in the temp directory 130 result = self.fs.match([path])[0] 131 files = [f.path for f in result.metadata_list] 132 self.assertEqual(files, [path]) 133 134 def test_match_file_empty(self): 135 path = os.path.join(self.tmpdir, 'f2') # Does not exist 136 137 # Match files in the temp directory 138 result = self.fs.match([path])[0] 139 files = [f.path for f in result.metadata_list] 140 self.assertEqual(files, []) 141 142 def test_match_file_exception(self): 143 # Match files with None so that it throws an exception 144 with self.assertRaisesRegex(BeamIOError, 145 r'^Match operation failed') as error: 146 self.fs.match([None]) 147 self.assertEqual(list(error.exception.exception_details.keys()), [None]) 148 149 @parameterized.expand([ 150 param('*', files=['a', 'b', os.path.join('c', 'x')], expected=['a', 'b']), 151 param( 152 '**', 153 files=['a', os.path.join('b', 'x'), os.path.join('c', 'x')], 154 expected=['a', os.path.join('b', 'x'), os.path.join('c', 'x')]), 155 param( 156 os.path.join('*', '*'), 157 files=[ 158 'a', 159 os.path.join('b', 'x'), 160 os.path.join('c', 'x'), 161 os.path.join('d', 'x', 'y') 162 ], 163 expected=[os.path.join('b', 'x'), os.path.join('c', 'x')]), 164 param( 165 os.path.join('**', '*'), 166 files=[ 167 'a', 168 os.path.join('b', 'x'), 169 os.path.join('c', 'x'), 170 os.path.join('d', 'x', 'y') 171 ], 172 expected=[ 173 os.path.join('b', 'x'), 174 os.path.join('c', 'x'), 175 os.path.join('d', 'x', 'y') 176 ]), 177 ]) 178 def test_match_glob(self, pattern, files, expected): 179 for filename in files: 180 full_path = os.path.join(self.tmpdir, filename) 181 dirname = os.path.dirname(full_path) 182 if not dirname == full_path: 183 # Make sure we don't go outside the tmpdir 184 assert os.path.commonprefix([self.tmpdir, full_path]) == self.tmpdir 185 try: 186 self.fs.mkdirs(dirname) 187 except IOError: 188 # Directory exists 189 pass 190 191 open(full_path, 'a').close() # create empty file 192 193 # Match both the files in the directory 194 full_pattern = os.path.join(self.tmpdir, pattern) 195 result = self.fs.match([full_pattern])[0] 196 files = [os.path.relpath(f.path, self.tmpdir) for f in result.metadata_list] 197 self.assertCountEqual(files, expected) 198 199 def test_match_directory(self): 200 result = self.fs.match([self.tmpdir])[0] 201 files = [f.path for f in result.metadata_list] 202 self.assertEqual(files, [self.tmpdir]) 203 204 def test_match_directory_contents(self): 205 path1 = os.path.join(self.tmpdir, 'f1') 206 path2 = os.path.join(self.tmpdir, 'f2') 207 open(path1, 'a').close() 208 open(path2, 'a').close() 209 210 result = self.fs.match([os.path.join(self.tmpdir, '*')])[0] 211 files = [f.path for f in result.metadata_list] 212 self.assertCountEqual(files, [path1, path2]) 213 214 def test_copy(self): 215 path1 = os.path.join(self.tmpdir, 'f1') 216 path2 = os.path.join(self.tmpdir, 'f2') 217 with open(path1, 'a') as f: 218 f.write('Hello') 219 220 self.fs.copy([path1], [path2]) 221 self.assertTrue(filecmp.cmp(path1, path2)) 222 223 def test_copy_error(self): 224 path1 = os.path.join(self.tmpdir, 'f1') 225 path2 = os.path.join(self.tmpdir, 'f2') 226 with self.assertRaisesRegex(BeamIOError, 227 r'^Copy operation failed') as error: 228 self.fs.copy([path1], [path2]) 229 self.assertEqual( 230 list(error.exception.exception_details.keys()), [(path1, path2)]) 231 232 def test_copy_directory(self): 233 path_t1 = os.path.join(self.tmpdir, 't1') 234 path_t2 = os.path.join(self.tmpdir, 't2') 235 self.fs.mkdirs(path_t1) 236 self.fs.mkdirs(path_t2) 237 238 path1 = os.path.join(path_t1, 'f1') 239 path2 = os.path.join(path_t2, 'f1') 240 with open(path1, 'a') as f: 241 f.write('Hello') 242 243 self.fs.copy([path_t1], [path_t2]) 244 self.assertTrue(filecmp.cmp(path1, path2)) 245 246 def test_rename(self): 247 path1 = os.path.join(self.tmpdir, 'f1') 248 path2 = os.path.join(self.tmpdir, 'f2') 249 with open(path1, 'a') as f: 250 f.write('Hello') 251 252 self.fs.rename([path1], [path2]) 253 self.assertTrue(self.fs.exists(path2)) 254 self.assertFalse(self.fs.exists(path1)) 255 256 def test_rename_error(self): 257 path1 = os.path.join(self.tmpdir, 'f1') 258 path2 = os.path.join(self.tmpdir, 'f2') 259 with self.assertRaisesRegex(BeamIOError, 260 r'^Rename operation failed') as error: 261 self.fs.rename([path1], [path2]) 262 self.assertEqual( 263 list(error.exception.exception_details.keys()), [(path1, path2)]) 264 265 def test_rename_directory(self): 266 path_t1 = os.path.join(self.tmpdir, 't1') 267 path_t2 = os.path.join(self.tmpdir, 't2') 268 self.fs.mkdirs(path_t1) 269 270 path1 = os.path.join(path_t1, 'f1') 271 path2 = os.path.join(path_t2, 'f1') 272 with open(path1, 'a') as f: 273 f.write('Hello') 274 275 self.fs.rename([path_t1], [path_t2]) 276 self.assertTrue(self.fs.exists(path_t2)) 277 self.assertFalse(self.fs.exists(path_t1)) 278 self.assertTrue(self.fs.exists(path2)) 279 self.assertFalse(self.fs.exists(path1)) 280 281 def test_exists(self): 282 path1 = os.path.join(self.tmpdir, 'f1') 283 path2 = os.path.join(self.tmpdir, 'f2') 284 with open(path1, 'a') as f: 285 f.write('Hello') 286 self.assertTrue(self.fs.exists(path1)) 287 self.assertFalse(self.fs.exists(path2)) 288 289 def test_checksum(self): 290 path1 = os.path.join(self.tmpdir, 'f1') 291 path2 = os.path.join(self.tmpdir, 'f2') 292 with open(path1, 'a') as f: 293 f.write('Hello') 294 with open(path2, 'a') as f: 295 f.write('foo') 296 # tests that localfilesystem checksum returns file size 297 checksum1 = self.fs.checksum(path1) 298 checksum2 = self.fs.checksum(path2) 299 self.assertEqual(checksum1, str(5)) 300 self.assertEqual(checksum2, str(3)) 301 # tests that fs.checksum and str(fs.size) are consistent 302 self.assertEqual(checksum1, str(self.fs.size(path1))) 303 self.assertEqual(checksum2, str(self.fs.size(path2))) 304 305 def make_tree(self, path, value, expected_leaf_count=None): 306 """Create a file+directory structure from a simple dict-based DSL 307 308 :param path: root path to create directories+files under 309 :param value: a specification of what ``path`` should contain: ``None`` to 310 make it an empty directory, a string literal to make it a file with those 311 contents, and a ``dict`` to make it a non-empty directory and recurse 312 :param expected_leaf_count: only be set at the top of a recursive call 313 stack; after the whole tree has been created, verify the presence and 314 number of all files+directories, as a sanity check 315 """ 316 if value is None: 317 # empty directory 318 os.makedirs(path) 319 elif isinstance(value, str): 320 # file with string-literal contents 321 dir = os.path.dirname(path) 322 if not os.path.exists(dir): 323 os.makedirs(dir) 324 with open(path, 'a') as f: 325 f.write(value) 326 elif isinstance(value, dict): 327 # recurse to create a subdirectory tree 328 for basename, v in value.items(): 329 self.make_tree(os.path.join(path, basename), v) 330 else: 331 raise Exception('Unexpected value in tempdir tree: %s' % value) 332 333 if expected_leaf_count is not None: 334 self.assertEqual(self.check_tree(path, value), expected_leaf_count) 335 336 def check_tree(self, path, value, expected_leaf_count=None): 337 """Verify a directory+file structure according to the rules described in 338 ``make_tree`` 339 340 :param path: path to check under 341 :param value: DSL-representation of expected files+directories under 342 ``path`` 343 :return: number of leaf files/directories that were verified 344 """ 345 actual_leaf_count = None 346 if value is None: 347 # empty directory 348 self.assertTrue(os.path.exists(path), msg=path) 349 self.assertEqual(os.listdir(path), []) 350 actual_leaf_count = 1 351 elif isinstance(value, str): 352 # file with string-literal contents 353 with open(path, 'r') as f: 354 self.assertEqual(f.read(), value, msg=path) 355 356 actual_leaf_count = 1 357 elif isinstance(value, dict): 358 # recurse to check subdirectory tree 359 actual_leaf_count = sum([ 360 self.check_tree(os.path.join(path, basename), v) for basename, 361 v in value.items() 362 ]) 363 else: 364 raise Exception('Unexpected value in tempdir tree: %s' % value) 365 366 if expected_leaf_count is not None: 367 self.assertEqual(actual_leaf_count, expected_leaf_count) 368 369 return actual_leaf_count 370 371 _test_tree = { 372 'path1': '111', 373 'path2': { 374 '2': '222', 'emptydir': None 375 }, 376 'aaa': { 377 'b1': 'b1', 'b2': None, 'bbb': { 378 'ccc': { 379 'ddd': 'DDD' 380 } 381 }, 'c': None 382 } 383 } 384 385 def test_delete_globs(self): 386 dir = os.path.join(self.tmpdir, 'dir') 387 self.make_tree(dir, self._test_tree, expected_leaf_count=7) 388 389 self.fs.delete([os.path.join(dir, 'path*'), os.path.join(dir, 'aaa', 'b*')]) 390 391 # One empty nested directory is left 392 self.check_tree(dir, {'aaa': {'c': None}}, expected_leaf_count=1) 393 394 def test_recursive_delete(self): 395 dir = os.path.join(self.tmpdir, 'dir') 396 self.make_tree(dir, self._test_tree, expected_leaf_count=7) 397 398 self.fs.delete([dir]) 399 400 self.check_tree(self.tmpdir, {'': None}, expected_leaf_count=1) 401 402 def test_delete_glob_errors(self): 403 dir = os.path.join(self.tmpdir, 'dir') 404 self.make_tree(dir, self._test_tree, expected_leaf_count=7) 405 406 with self.assertRaisesRegex(BeamIOError, 407 r'^Delete operation failed') as error: 408 self.fs.delete([ 409 os.path.join(dir, 'path*'), 410 os.path.join(dir, 'aaa', 'b*'), 411 os.path.join(dir, 'aaa', 'd*') # doesn't match anything, will raise 412 ]) 413 414 self.check_tree(dir, {'aaa': {'c': None}}, expected_leaf_count=1) 415 416 self.assertEqual( 417 list(error.exception.exception_details.keys()), 418 [os.path.join(dir, 'aaa', 'd*')]) 419 420 with self.assertRaisesRegex(BeamIOError, 421 r'^Delete operation failed') as error: 422 self.fs.delete([ 423 os.path.join(dir, 'path*') # doesn't match anything, will raise 424 ]) 425 426 self.check_tree(dir, {'aaa': {'c': None}}, expected_leaf_count=1) 427 428 self.assertEqual( 429 list(error.exception.exception_details.keys()), 430 [os.path.join(dir, 'path*')]) 431 432 def test_delete(self): 433 path1 = os.path.join(self.tmpdir, 'f1') 434 435 with open(path1, 'a') as f: 436 f.write('Hello') 437 438 self.assertTrue(self.fs.exists(path1)) 439 self.fs.delete([path1]) 440 self.assertFalse(self.fs.exists(path1)) 441 442 def test_delete_error(self): 443 path1 = os.path.join(self.tmpdir, 'f1') 444 with self.assertRaisesRegex(BeamIOError, 445 r'^Delete operation failed') as error: 446 self.fs.delete([path1]) 447 self.assertEqual(list(error.exception.exception_details.keys()), [path1]) 448 449 450 if __name__ == '__main__': 451 logging.getLogger().setLevel(logging.INFO) 452 unittest.main()