github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/filesystems_test.py (about)

     1  # -*- coding: utf-8 -*-
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  """Unit tests for LocalFileSystem."""
    20  
    21  # pytype: skip-file
    22  
    23  import filecmp
    24  import logging
    25  import os
    26  import shutil
    27  import tempfile
    28  import unittest
    29  
    30  import mock
    31  
    32  from apache_beam.io import localfilesystem
    33  from apache_beam.io.filesystem import BeamIOError
    34  from apache_beam.io.filesystems import FileSystems
    35  
    36  
    37  def _gen_fake_join(separator):
    38    """Returns a callable that joins paths with the given separator."""
    39    def _join(first_path, *paths):
    40      return separator.join((first_path.rstrip(separator), ) + paths)
    41  
    42    return _join
    43  
    44  
    45  class FileSystemsTest(unittest.TestCase):
    46    def setUp(self):
    47      self.tmpdir = tempfile.mkdtemp()
    48  
    49    def tearDown(self):
    50      shutil.rmtree(self.tmpdir)
    51  
    52    def test_get_scheme(self):
    53      self.assertIsNone(FileSystems.get_scheme('/abc/cdf'))
    54      self.assertIsNone(FileSystems.get_scheme('c:\\abc\\cdf'))
    55      self.assertEqual(FileSystems.get_scheme('gs://abc/cdf'), 'gs')
    56  
    57    def test_get_filesystem(self):
    58      self.assertTrue(
    59          isinstance(
    60              FileSystems.get_filesystem('/tmp'),
    61              localfilesystem.LocalFileSystem))
    62      self.assertTrue(
    63          isinstance(
    64              FileSystems.get_filesystem('c:\\abc\\def'),
    65              localfilesystem.LocalFileSystem))
    66      with self.assertRaises(ValueError):
    67        FileSystems.get_filesystem('error://abc/def')
    68  
    69    @mock.patch('apache_beam.io.localfilesystem.os')
    70    def test_unix_path_join(self, *unused_mocks):
    71      # Test joining of Unix paths.
    72      localfilesystem.os.path.join.side_effect = _gen_fake_join('/')
    73      self.assertEqual(
    74          '/tmp/path/to/file', FileSystems.join('/tmp/path', 'to', 'file'))
    75      self.assertEqual(
    76          '/tmp/path/to/file', FileSystems.join('/tmp/path', 'to/file'))
    77      self.assertEqual(
    78          '/tmp/path/to/file', FileSystems.join('/', 'tmp/path', 'to/file'))
    79      self.assertEqual(
    80          '/tmp/path/to/file', FileSystems.join('/tmp/', 'path', 'to/file'))
    81  
    82    @mock.patch('apache_beam.io.localfilesystem.os')
    83    def test_windows_path_join(self, *unused_mocks):
    84      # Test joining of Windows paths.
    85      localfilesystem.os.path.join.side_effect = _gen_fake_join('\\')
    86      self.assertEqual(
    87          r'C:\tmp\path\to\file', FileSystems.join(r'C:\tmp\path', 'to', 'file'))
    88      self.assertEqual(
    89          r'C:\tmp\path\to\file', FileSystems.join(r'C:\tmp\path', r'to\file'))
    90      self.assertEqual(
    91          r'C:\tmp\path\to\file',
    92          FileSystems.join(r'C:\tmp\path\\', 'to', 'file'))
    93  
    94    def test_mkdirs(self):
    95      path = os.path.join(self.tmpdir, 't1/t2')
    96      FileSystems.mkdirs(path)
    97      self.assertTrue(os.path.isdir(path))
    98  
    99    def test_mkdirs_failed(self):
   100      path = os.path.join(self.tmpdir, 't1/t2')
   101      FileSystems.mkdirs(path)
   102  
   103      # Check IOError if existing directory is created
   104      with self.assertRaises(IOError):
   105        FileSystems.mkdirs(path)
   106  
   107      with self.assertRaises(IOError):
   108        FileSystems.mkdirs(os.path.join(self.tmpdir, 't1'))
   109  
   110    def test_match_file(self):
   111      path = os.path.join(self.tmpdir, 'f1')
   112      open(path, 'a').close()
   113  
   114      # Match files in the temp directory
   115      result = FileSystems.match([path])[0]
   116      files = [f.path for f in result.metadata_list]
   117      self.assertEqual(files, [path])
   118  
   119    def test_match_file_empty(self):
   120      path = os.path.join(self.tmpdir, 'f2')  # Does not exist
   121  
   122      # Match files in the temp directory
   123      result = FileSystems.match([path])[0]
   124      files = [f.path for f in result.metadata_list]
   125      self.assertEqual(files, [])
   126  
   127    def test_match_file_exception(self):
   128      # Match files with None so that it throws an exception
   129      with self.assertRaisesRegex(BeamIOError,
   130                                  r'^Unable to get the Filesystem') as error:
   131        FileSystems.match([None])
   132      self.assertEqual(list(error.exception.exception_details), [None])
   133  
   134    def test_match_directory_with_files(self):
   135      path1 = os.path.join(self.tmpdir, 'f1')
   136      path2 = os.path.join(self.tmpdir, 'f2')
   137      open(path1, 'a').close()
   138      open(path2, 'a').close()
   139  
   140      # Match both the files in the directory
   141      path = os.path.join(self.tmpdir, '*')
   142      result = FileSystems.match([path])[0]
   143      files = [f.path for f in result.metadata_list]
   144      self.assertCountEqual(files, [path1, path2])
   145  
   146    def test_match_directory(self):
   147      result = FileSystems.match([self.tmpdir])[0]
   148      files = [f.path for f in result.metadata_list]
   149      self.assertEqual(files, [self.tmpdir])
   150  
   151    def test_copy(self):
   152      path1 = os.path.join(self.tmpdir, 'f1')
   153      path2 = os.path.join(self.tmpdir, 'f2')
   154      with open(path1, 'a') as f:
   155        f.write('Hello')
   156  
   157      FileSystems.copy([path1], [path2])
   158      self.assertTrue(filecmp.cmp(path1, path2))
   159  
   160    def test_copy_error(self):
   161      path1 = os.path.join(self.tmpdir, 'f1')
   162      path2 = os.path.join(self.tmpdir, 'f2')
   163      with self.assertRaisesRegex(BeamIOError,
   164                                  r'^Copy operation failed') as error:
   165        FileSystems.copy([path1], [path2])
   166      self.assertEqual(
   167          list(error.exception.exception_details.keys()), [(path1, path2)])
   168  
   169    def test_copy_directory(self):
   170      path_t1 = os.path.join(self.tmpdir, 't1')
   171      path_t2 = os.path.join(self.tmpdir, 't2')
   172      FileSystems.mkdirs(path_t1)
   173      FileSystems.mkdirs(path_t2)
   174  
   175      path1 = os.path.join(path_t1, 'f1')
   176      path2 = os.path.join(path_t2, 'f1')
   177      with open(path1, 'a') as f:
   178        f.write('Hello')
   179  
   180      FileSystems.copy([path_t1], [path_t2])
   181      self.assertTrue(filecmp.cmp(path1, path2))
   182  
   183    def test_rename(self):
   184      path1 = os.path.join(self.tmpdir, 'f1')
   185      path2 = os.path.join(self.tmpdir, 'f2')
   186      with open(path1, 'a') as f:
   187        f.write('Hello')
   188  
   189      FileSystems.rename([path1], [path2])
   190      self.assertTrue(FileSystems.exists(path2))
   191      self.assertFalse(FileSystems.exists(path1))
   192  
   193    def test_rename_error(self):
   194      path1 = os.path.join(self.tmpdir, 'f1')
   195      path2 = os.path.join(self.tmpdir, 'f2')
   196      with self.assertRaisesRegex(BeamIOError,
   197                                  r'^Rename operation failed') as error:
   198        FileSystems.rename([path1], [path2])
   199      self.assertEqual(
   200          list(error.exception.exception_details.keys()), [(path1, path2)])
   201  
   202    def test_rename_directory(self):
   203      path_t1 = os.path.join(self.tmpdir, 't1')
   204      path_t2 = os.path.join(self.tmpdir, 't2')
   205      FileSystems.mkdirs(path_t1)
   206  
   207      path1 = os.path.join(path_t1, 'f1')
   208      path2 = os.path.join(path_t2, 'f1')
   209      with open(path1, 'a') as f:
   210        f.write('Hello')
   211  
   212      FileSystems.rename([path_t1], [path_t2])
   213      self.assertTrue(FileSystems.exists(path_t2))
   214      self.assertFalse(FileSystems.exists(path_t1))
   215      self.assertTrue(FileSystems.exists(path2))
   216      self.assertFalse(FileSystems.exists(path1))
   217  
   218    def test_exists(self):
   219      path1 = os.path.join(self.tmpdir, 'f1')
   220      path2 = os.path.join(self.tmpdir, 'f2')
   221      with open(path1, 'a') as f:
   222        f.write('Hello')
   223      self.assertTrue(FileSystems.exists(path1))
   224      self.assertFalse(FileSystems.exists(path2))
   225  
   226    def test_delete(self):
   227      path1 = os.path.join(self.tmpdir, 'f1')
   228  
   229      with open(path1, 'a') as f:
   230        f.write('Hello')
   231  
   232      self.assertTrue(FileSystems.exists(path1))
   233      FileSystems.delete([path1])
   234      self.assertFalse(FileSystems.exists(path1))
   235  
   236    def test_delete_error(self):
   237      path1 = os.path.join(self.tmpdir, 'f1')
   238      with self.assertRaisesRegex(BeamIOError,
   239                                  r'^Delete operation failed') as error:
   240        FileSystems.delete([path1])
   241      self.assertEqual(list(error.exception.exception_details.keys()), [path1])
   242  
   243  
   244  if __name__ == '__main__':
   245    logging.getLogger().setLevel(logging.INFO)
   246    unittest.main()