github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/localfilesystem_test.py (about)

     1  # -*- coding: utf-8 -*-
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  """Unit tests for LocalFileSystem."""
    20  
    21  # pytype: skip-file
    22  
    23  import filecmp
    24  import logging
    25  import os
    26  import shutil
    27  import tempfile
    28  import unittest
    29  
    30  import mock
    31  from parameterized import param
    32  from parameterized import parameterized
    33  
    34  from apache_beam.io import localfilesystem
    35  from apache_beam.io.filesystem import BeamIOError
    36  from apache_beam.options.pipeline_options import PipelineOptions
    37  
    38  
    39  def _gen_fake_join(separator):
    40    """Returns a callable that joins paths with the given separator."""
    41    def _join(first_path, *paths):
    42      return separator.join((first_path.rstrip(separator), ) + paths)
    43  
    44    return _join
    45  
    46  
    47  def _gen_fake_split(separator):
    48    """Returns a callable that splits a with the given separator."""
    49    def _split(path):
    50      sep_index = path.rfind(separator)
    51      if sep_index >= 0:
    52        return (path[:sep_index], path[sep_index + 1:])
    53      else:
    54        return (path, '')
    55  
    56    return _split
    57  
    58  
    59  class LocalFileSystemTest(unittest.TestCase):
    60    def setUp(self):
    61      self.tmpdir = tempfile.mkdtemp()
    62      pipeline_options = PipelineOptions()
    63      self.fs = localfilesystem.LocalFileSystem(pipeline_options)
    64  
    65    def tearDown(self):
    66      shutil.rmtree(self.tmpdir)
    67  
    68    def test_scheme(self):
    69      self.assertIsNone(self.fs.scheme())
    70      self.assertIsNone(localfilesystem.LocalFileSystem.scheme())
    71  
    72    @mock.patch('apache_beam.io.localfilesystem.os')
    73    def test_unix_path_join(self, *unused_mocks):
    74      # Test joining of Unix paths.
    75      localfilesystem.os.path.join.side_effect = _gen_fake_join('/')
    76      self.assertEqual(
    77          '/tmp/path/to/file', self.fs.join('/tmp/path', 'to', 'file'))
    78      self.assertEqual('/tmp/path/to/file', self.fs.join('/tmp/path', 'to/file'))
    79  
    80    @mock.patch('apache_beam.io.localfilesystem.os')
    81    def test_windows_path_join(self, *unused_mocks):
    82      # Test joining of Windows paths.
    83      localfilesystem.os.path.join.side_effect = _gen_fake_join('\\')
    84      self.assertEqual(
    85          r'C:\tmp\path\to\file', self.fs.join(r'C:\tmp\path', 'to', 'file'))
    86      self.assertEqual(
    87          r'C:\tmp\path\to\file', self.fs.join(r'C:\tmp\path', r'to\file'))
    88  
    89    @mock.patch('apache_beam.io.localfilesystem.os')
    90    def test_unix_path_split(self, os_mock):
    91      os_mock.path.abspath.side_effect = lambda a: a
    92      os_mock.path.split.side_effect = _gen_fake_split('/')
    93      self.assertEqual(('/tmp/path/to', 'file'),
    94                       self.fs.split('/tmp/path/to/file'))
    95      # Actual os.path.split will split following to '/' and 'tmp' when run in
    96      # Unix.
    97      self.assertEqual(('', 'tmp'), self.fs.split('/tmp'))
    98  
    99    @mock.patch('apache_beam.io.localfilesystem.os')
   100    def test_windows_path_split(self, os_mock):
   101      os_mock.path.abspath = lambda a: a
   102      os_mock.path.split.side_effect = _gen_fake_split('\\')
   103      self.assertEqual((r'C:\tmp\path\to', 'file'),
   104                       self.fs.split(r'C:\tmp\path\to\file'))
   105      # Actual os.path.split will split following to 'C:\' and 'tmp' when run in
   106      # Windows.
   107      self.assertEqual((r'C:', 'tmp'), self.fs.split(r'C:\tmp'))
   108  
   109    def test_mkdirs(self):
   110      path = os.path.join(self.tmpdir, 't1/t2')
   111      self.fs.mkdirs(path)
   112      self.assertTrue(os.path.isdir(path))
   113  
   114    def test_mkdirs_failed(self):
   115      path = os.path.join(self.tmpdir, 't1/t2')
   116      self.fs.mkdirs(path)
   117  
   118      # Check IOError if existing directory is created
   119      with self.assertRaises(IOError):
   120        self.fs.mkdirs(path)
   121  
   122      with self.assertRaises(IOError):
   123        self.fs.mkdirs(os.path.join(self.tmpdir, 't1'))
   124  
   125    def test_match_file(self):
   126      path = os.path.join(self.tmpdir, 'f1')
   127      open(path, 'a').close()
   128  
   129      # Match files in the temp directory
   130      result = self.fs.match([path])[0]
   131      files = [f.path for f in result.metadata_list]
   132      self.assertEqual(files, [path])
   133  
   134    def test_match_file_empty(self):
   135      path = os.path.join(self.tmpdir, 'f2')  # Does not exist
   136  
   137      # Match files in the temp directory
   138      result = self.fs.match([path])[0]
   139      files = [f.path for f in result.metadata_list]
   140      self.assertEqual(files, [])
   141  
   142    def test_match_file_exception(self):
   143      # Match files with None so that it throws an exception
   144      with self.assertRaisesRegex(BeamIOError,
   145                                  r'^Match operation failed') as error:
   146        self.fs.match([None])
   147      self.assertEqual(list(error.exception.exception_details.keys()), [None])
   148  
   149    @parameterized.expand([
   150        param('*', files=['a', 'b', os.path.join('c', 'x')], expected=['a', 'b']),
   151        param(
   152            '**',
   153            files=['a', os.path.join('b', 'x'), os.path.join('c', 'x')],
   154            expected=['a', os.path.join('b', 'x'), os.path.join('c', 'x')]),
   155        param(
   156            os.path.join('*', '*'),
   157            files=[
   158                'a',
   159                os.path.join('b', 'x'),
   160                os.path.join('c', 'x'),
   161                os.path.join('d', 'x', 'y')
   162            ],
   163            expected=[os.path.join('b', 'x'), os.path.join('c', 'x')]),
   164        param(
   165            os.path.join('**', '*'),
   166            files=[
   167                'a',
   168                os.path.join('b', 'x'),
   169                os.path.join('c', 'x'),
   170                os.path.join('d', 'x', 'y')
   171            ],
   172            expected=[
   173                os.path.join('b', 'x'),
   174                os.path.join('c', 'x'),
   175                os.path.join('d', 'x', 'y')
   176            ]),
   177    ])
   178    def test_match_glob(self, pattern, files, expected):
   179      for filename in files:
   180        full_path = os.path.join(self.tmpdir, filename)
   181        dirname = os.path.dirname(full_path)
   182        if not dirname == full_path:
   183          # Make sure we don't go outside the tmpdir
   184          assert os.path.commonprefix([self.tmpdir, full_path]) == self.tmpdir
   185          try:
   186            self.fs.mkdirs(dirname)
   187          except IOError:
   188            # Directory exists
   189            pass
   190  
   191        open(full_path, 'a').close()  # create empty file
   192  
   193      # Match both the files in the directory
   194      full_pattern = os.path.join(self.tmpdir, pattern)
   195      result = self.fs.match([full_pattern])[0]
   196      files = [os.path.relpath(f.path, self.tmpdir) for f in result.metadata_list]
   197      self.assertCountEqual(files, expected)
   198  
   199    def test_match_directory(self):
   200      result = self.fs.match([self.tmpdir])[0]
   201      files = [f.path for f in result.metadata_list]
   202      self.assertEqual(files, [self.tmpdir])
   203  
   204    def test_match_directory_contents(self):
   205      path1 = os.path.join(self.tmpdir, 'f1')
   206      path2 = os.path.join(self.tmpdir, 'f2')
   207      open(path1, 'a').close()
   208      open(path2, 'a').close()
   209  
   210      result = self.fs.match([os.path.join(self.tmpdir, '*')])[0]
   211      files = [f.path for f in result.metadata_list]
   212      self.assertCountEqual(files, [path1, path2])
   213  
   214    def test_copy(self):
   215      path1 = os.path.join(self.tmpdir, 'f1')
   216      path2 = os.path.join(self.tmpdir, 'f2')
   217      with open(path1, 'a') as f:
   218        f.write('Hello')
   219  
   220      self.fs.copy([path1], [path2])
   221      self.assertTrue(filecmp.cmp(path1, path2))
   222  
   223    def test_copy_error(self):
   224      path1 = os.path.join(self.tmpdir, 'f1')
   225      path2 = os.path.join(self.tmpdir, 'f2')
   226      with self.assertRaisesRegex(BeamIOError,
   227                                  r'^Copy operation failed') as error:
   228        self.fs.copy([path1], [path2])
   229      self.assertEqual(
   230          list(error.exception.exception_details.keys()), [(path1, path2)])
   231  
   232    def test_copy_directory(self):
   233      path_t1 = os.path.join(self.tmpdir, 't1')
   234      path_t2 = os.path.join(self.tmpdir, 't2')
   235      self.fs.mkdirs(path_t1)
   236      self.fs.mkdirs(path_t2)
   237  
   238      path1 = os.path.join(path_t1, 'f1')
   239      path2 = os.path.join(path_t2, 'f1')
   240      with open(path1, 'a') as f:
   241        f.write('Hello')
   242  
   243      self.fs.copy([path_t1], [path_t2])
   244      self.assertTrue(filecmp.cmp(path1, path2))
   245  
   246    def test_rename(self):
   247      path1 = os.path.join(self.tmpdir, 'f1')
   248      path2 = os.path.join(self.tmpdir, 'f2')
   249      with open(path1, 'a') as f:
   250        f.write('Hello')
   251  
   252      self.fs.rename([path1], [path2])
   253      self.assertTrue(self.fs.exists(path2))
   254      self.assertFalse(self.fs.exists(path1))
   255  
   256    def test_rename_error(self):
   257      path1 = os.path.join(self.tmpdir, 'f1')
   258      path2 = os.path.join(self.tmpdir, 'f2')
   259      with self.assertRaisesRegex(BeamIOError,
   260                                  r'^Rename operation failed') as error:
   261        self.fs.rename([path1], [path2])
   262      self.assertEqual(
   263          list(error.exception.exception_details.keys()), [(path1, path2)])
   264  
   265    def test_rename_directory(self):
   266      path_t1 = os.path.join(self.tmpdir, 't1')
   267      path_t2 = os.path.join(self.tmpdir, 't2')
   268      self.fs.mkdirs(path_t1)
   269  
   270      path1 = os.path.join(path_t1, 'f1')
   271      path2 = os.path.join(path_t2, 'f1')
   272      with open(path1, 'a') as f:
   273        f.write('Hello')
   274  
   275      self.fs.rename([path_t1], [path_t2])
   276      self.assertTrue(self.fs.exists(path_t2))
   277      self.assertFalse(self.fs.exists(path_t1))
   278      self.assertTrue(self.fs.exists(path2))
   279      self.assertFalse(self.fs.exists(path1))
   280  
   281    def test_exists(self):
   282      path1 = os.path.join(self.tmpdir, 'f1')
   283      path2 = os.path.join(self.tmpdir, 'f2')
   284      with open(path1, 'a') as f:
   285        f.write('Hello')
   286      self.assertTrue(self.fs.exists(path1))
   287      self.assertFalse(self.fs.exists(path2))
   288  
   289    def test_checksum(self):
   290      path1 = os.path.join(self.tmpdir, 'f1')
   291      path2 = os.path.join(self.tmpdir, 'f2')
   292      with open(path1, 'a') as f:
   293        f.write('Hello')
   294      with open(path2, 'a') as f:
   295        f.write('foo')
   296      # tests that localfilesystem checksum returns file size
   297      checksum1 = self.fs.checksum(path1)
   298      checksum2 = self.fs.checksum(path2)
   299      self.assertEqual(checksum1, str(5))
   300      self.assertEqual(checksum2, str(3))
   301      # tests that fs.checksum and str(fs.size) are consistent
   302      self.assertEqual(checksum1, str(self.fs.size(path1)))
   303      self.assertEqual(checksum2, str(self.fs.size(path2)))
   304  
   305    def make_tree(self, path, value, expected_leaf_count=None):
   306      """Create a file+directory structure from a simple dict-based DSL
   307  
   308      :param path: root path to create directories+files under
   309      :param value: a specification of what ``path`` should contain: ``None`` to
   310       make it an empty directory, a string literal to make it a file with those
   311        contents, and a ``dict`` to make it a non-empty directory and recurse
   312      :param expected_leaf_count: only be set at the top of a recursive call
   313       stack; after the whole tree has been created, verify the presence and
   314       number of all files+directories, as a sanity check
   315      """
   316      if value is None:
   317        # empty directory
   318        os.makedirs(path)
   319      elif isinstance(value, str):
   320        # file with string-literal contents
   321        dir = os.path.dirname(path)
   322        if not os.path.exists(dir):
   323          os.makedirs(dir)
   324        with open(path, 'a') as f:
   325          f.write(value)
   326      elif isinstance(value, dict):
   327        # recurse to create a subdirectory tree
   328        for basename, v in value.items():
   329          self.make_tree(os.path.join(path, basename), v)
   330      else:
   331        raise Exception('Unexpected value in tempdir tree: %s' % value)
   332  
   333      if expected_leaf_count is not None:
   334        self.assertEqual(self.check_tree(path, value), expected_leaf_count)
   335  
   336    def check_tree(self, path, value, expected_leaf_count=None):
   337      """Verify a directory+file structure according to the rules described in
   338      ``make_tree``
   339  
   340      :param path: path to check under
   341      :param value: DSL-representation of expected files+directories under
   342      ``path``
   343      :return: number of leaf files/directories that were verified
   344      """
   345      actual_leaf_count = None
   346      if value is None:
   347        # empty directory
   348        self.assertTrue(os.path.exists(path), msg=path)
   349        self.assertEqual(os.listdir(path), [])
   350        actual_leaf_count = 1
   351      elif isinstance(value, str):
   352        # file with string-literal contents
   353        with open(path, 'r') as f:
   354          self.assertEqual(f.read(), value, msg=path)
   355  
   356        actual_leaf_count = 1
   357      elif isinstance(value, dict):
   358        # recurse to check subdirectory tree
   359        actual_leaf_count = sum([
   360            self.check_tree(os.path.join(path, basename), v) for basename,
   361            v in value.items()
   362        ])
   363      else:
   364        raise Exception('Unexpected value in tempdir tree: %s' % value)
   365  
   366      if expected_leaf_count is not None:
   367        self.assertEqual(actual_leaf_count, expected_leaf_count)
   368  
   369      return actual_leaf_count
   370  
   371    _test_tree = {
   372        'path1': '111',
   373        'path2': {
   374            '2': '222', 'emptydir': None
   375        },
   376        'aaa': {
   377            'b1': 'b1', 'b2': None, 'bbb': {
   378                'ccc': {
   379                    'ddd': 'DDD'
   380                }
   381            }, 'c': None
   382        }
   383    }
   384  
   385    def test_delete_globs(self):
   386      dir = os.path.join(self.tmpdir, 'dir')
   387      self.make_tree(dir, self._test_tree, expected_leaf_count=7)
   388  
   389      self.fs.delete([os.path.join(dir, 'path*'), os.path.join(dir, 'aaa', 'b*')])
   390  
   391      # One empty nested directory is left
   392      self.check_tree(dir, {'aaa': {'c': None}}, expected_leaf_count=1)
   393  
   394    def test_recursive_delete(self):
   395      dir = os.path.join(self.tmpdir, 'dir')
   396      self.make_tree(dir, self._test_tree, expected_leaf_count=7)
   397  
   398      self.fs.delete([dir])
   399  
   400      self.check_tree(self.tmpdir, {'': None}, expected_leaf_count=1)
   401  
   402    def test_delete_glob_errors(self):
   403      dir = os.path.join(self.tmpdir, 'dir')
   404      self.make_tree(dir, self._test_tree, expected_leaf_count=7)
   405  
   406      with self.assertRaisesRegex(BeamIOError,
   407                                  r'^Delete operation failed') as error:
   408        self.fs.delete([
   409            os.path.join(dir, 'path*'),
   410            os.path.join(dir, 'aaa', 'b*'),
   411            os.path.join(dir, 'aaa', 'd*')  # doesn't match anything, will raise
   412        ])
   413  
   414      self.check_tree(dir, {'aaa': {'c': None}}, expected_leaf_count=1)
   415  
   416      self.assertEqual(
   417          list(error.exception.exception_details.keys()),
   418          [os.path.join(dir, 'aaa', 'd*')])
   419  
   420      with self.assertRaisesRegex(BeamIOError,
   421                                  r'^Delete operation failed') as error:
   422        self.fs.delete([
   423            os.path.join(dir, 'path*')  # doesn't match anything, will raise
   424        ])
   425  
   426      self.check_tree(dir, {'aaa': {'c': None}}, expected_leaf_count=1)
   427  
   428      self.assertEqual(
   429          list(error.exception.exception_details.keys()),
   430          [os.path.join(dir, 'path*')])
   431  
   432    def test_delete(self):
   433      path1 = os.path.join(self.tmpdir, 'f1')
   434  
   435      with open(path1, 'a') as f:
   436        f.write('Hello')
   437  
   438      self.assertTrue(self.fs.exists(path1))
   439      self.fs.delete([path1])
   440      self.assertFalse(self.fs.exists(path1))
   441  
   442    def test_delete_error(self):
   443      path1 = os.path.join(self.tmpdir, 'f1')
   444      with self.assertRaisesRegex(BeamIOError,
   445                                  r'^Delete operation failed') as error:
   446        self.fs.delete([path1])
   447      self.assertEqual(list(error.exception.exception_details.keys()), [path1])
   448  
   449  
   450  if __name__ == '__main__':
   451    logging.getLogger().setLevel(logging.INFO)
   452    unittest.main()