github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/azure/blobstoragefilesystem_test.py (about)

     1  # -*- coding: utf-8 -*-
     2  #
     3  # Licensed to the Apache Software Foundation (ASF) under one or more
     4  # contributor license agreements.  See the NOTICE file distributed with
     5  # this work for additional information regarding copyright ownership.
     6  # The ASF licenses this file to You under the Apache License, Version 2.0
     7  # (the "License"); you may not use this file except in compliance with
     8  # the License.  You may obtain a copy of the License at
     9  #
    10  #    http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  # Unless required by applicable law or agreed to in writing, software
    13  # distributed under the License is distributed on an "AS IS" BASIS,
    14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  # See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  #
    18  
    19  """Unit tests for Azure Blob Storage File System."""
    20  
    21  # pytype: skip-file
    22  
    23  import logging
    24  import unittest
    25  
    26  import mock
    27  
    28  from apache_beam.io.filesystem import BeamIOError
    29  from apache_beam.io.filesystem import FileMetadata
    30  from apache_beam.options.pipeline_options import PipelineOptions
    31  
    32  # Protect against environments where azure library is not available.
    33  # pylint: disable=wrong-import-order, wrong-import-position
    34  try:
    35    from apache_beam.io.azure import blobstorageio
    36    from apache_beam.io.azure import blobstoragefilesystem
    37  except ImportError:
    38    blobstoragefilesystem = None  # type: ignore[assignment]
    39  # pylint: enable=wrong-import-order, wrong-import-position
    40  
    41  
    42  @unittest.skipIf(
    43      blobstoragefilesystem is None, 'Azure dependencies are not installed')
    44  class BlobStorageFileSystemTest(unittest.TestCase):
    45    def setUp(self):
    46      pipeline_options = PipelineOptions()
    47      self.fs = blobstoragefilesystem.BlobStorageFileSystem(
    48          pipeline_options=pipeline_options)
    49  
    50    def test_scheme(self):
    51      self.assertEqual(self.fs.scheme(), 'azfs')
    52      self.assertEqual(
    53          blobstoragefilesystem.BlobStorageFileSystem.scheme(), 'azfs')
    54  
    55    def test_join(self):
    56      self.assertEqual(
    57          'azfs://account-name/container/path/to/file',
    58          self.fs.join('azfs://account-name/container/path', 'to', 'file'))
    59      self.assertEqual(
    60          'azfs://account-name/container/path/to/file',
    61          self.fs.join('azfs://account-name/container/path', 'to/file'))
    62      self.assertEqual(
    63          'azfs://account-name/container/path/to/file',
    64          self.fs.join('azfs://account-name/container/path', '/to/file'))
    65      self.assertEqual(
    66          'azfs://account-name/container/path/to/file',
    67          self.fs.join('azfs://account-name/container/path', 'to', 'file'))
    68      self.assertEqual(
    69          'azfs://account-name/container/path/to/file',
    70          self.fs.join('azfs://account-name/container/path', 'to/file'))
    71      self.assertEqual(
    72          'azfs://account-name/container/path/to/file',
    73          self.fs.join('azfs://account-name/container/path', '/to/file'))
    74      with self.assertRaises(ValueError):
    75        self.fs.join('account-name/container/path', '/to/file')
    76  
    77    def test_split(self):
    78      self.assertEqual(('azfs://foo/bar', 'baz'),
    79                       self.fs.split('azfs://foo/bar/baz'))
    80      self.assertEqual(('azfs://foo', ''), self.fs.split('azfs://foo/'))
    81      self.assertEqual(('azfs://foo', ''), self.fs.split('azfs://foo'))
    82  
    83      with self.assertRaises(ValueError):
    84        self.fs.split('/no/azfs/prefix')
    85  
    86    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
    87    def test_match_single(self, unused_mock_blobstorageio):
    88      # Prepare mocks.
    89      blobstorageio_mock = mock.MagicMock()
    90      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
    91          lambda pipeline_options: blobstorageio_mock
    92      blobstorageio_mock.exists.return_value = True
    93      blobstorageio_mock._status.return_value = {
    94          'size': 1, 'last_updated': 99999.0
    95      }
    96      expected_results = [
    97          FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0)
    98      ]
    99      match_result = self.fs.match(['azfs://storageaccount/container/file1'])[0]
   100  
   101      self.assertEqual(match_result.metadata_list, expected_results)
   102      blobstorageio_mock._status.assert_called_once_with(
   103          'azfs://storageaccount/container/file1')
   104  
   105    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   106    def test_match_multiples(self, unused_mock_blobstorageio):
   107      # Prepare mocks.
   108      blobstorageio_mock = mock.MagicMock()
   109      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   110          lambda pipeline_options: blobstorageio_mock
   111      blobstorageio_mock.list_files.return_value = iter([
   112          ('azfs://storageaccount/container/file1', (1, 99999.0)),
   113          ('azfs://storageaccount/container/file2', (2, 88888.0))
   114      ])
   115      expected_results = set([
   116          FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0),
   117          FileMetadata('azfs://storageaccount/container/file2', 2, 88888.0),
   118      ])
   119      match_result = self.fs.match(['azfs://storageaccount/container/'])[0]
   120  
   121      self.assertEqual(set(match_result.metadata_list), expected_results)
   122      blobstorageio_mock.list_files.assert_called_once_with(
   123          'azfs://storageaccount/container/', with_metadata=True)
   124  
   125    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   126    def test_match_multiples_limit(self, unused_mock_blobstorageio):
   127      # Prepare mocks.
   128      blobstorageio_mock = mock.MagicMock()
   129      limit = 1
   130      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   131          lambda pipeline_options: blobstorageio_mock
   132      blobstorageio_mock.list_files.return_value = iter([
   133          ('azfs://storageaccount/container/file1', (1, 99999.0))
   134      ])
   135      expected_results = set(
   136          [FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0)])
   137      match_result = self.fs.match(['azfs://storageaccount/container/'],
   138                                   [limit])[0]
   139      self.assertEqual(set(match_result.metadata_list), expected_results)
   140      self.assertEqual(len(match_result.metadata_list), limit)
   141      blobstorageio_mock.list_files.assert_called_once_with(
   142          'azfs://storageaccount/container/', with_metadata=True)
   143  
   144    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   145    def test_match_multiples_error(self, unused_mock_blobstorageio):
   146      # Prepare mocks.
   147      blobstorageio_mock = mock.MagicMock()
   148      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   149          lambda pipeline_options: blobstorageio_mock
   150      exception = IOError('Failed')
   151      blobstorageio_mock.list_files.side_effect = exception
   152  
   153      with self.assertRaisesRegex(BeamIOError,
   154                                  r'^Match operation failed') as error:
   155        self.fs.match(['azfs://storageaccount/container/'])
   156  
   157      self.assertRegex(
   158          str(error.exception.exception_details),
   159          r'azfs://storageaccount/container/.*%s' % exception)
   160      blobstorageio_mock.list_files.assert_called_once_with(
   161          'azfs://storageaccount/container/', with_metadata=True)
   162  
   163    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   164    def test_match_multiple_patterns(self, unused_mock_blobstorageio):
   165      # Prepare mocks.
   166      blobstorageio_mock = mock.MagicMock()
   167      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   168          lambda pipeline_options: blobstorageio_mock
   169      blobstorageio_mock.list_files.side_effect = [
   170          iter([('azfs://storageaccount/container/file1', (1, 99999.0))]),
   171          iter([('azfs://storageaccount/container/file2', (2, 88888.0))]),
   172      ]
   173      expected_results = [
   174          [FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0)],
   175          [FileMetadata('azfs://storageaccount/container/file2', 2, 88888.0)]
   176      ]
   177      result = self.fs.match([
   178          'azfs://storageaccount/container/file1*',
   179          'azfs://storageaccount/container/file2*'
   180      ])
   181      self.assertEqual([mr.metadata_list for mr in result], expected_results)
   182  
   183    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   184    def test_create(self, unused_mock_blobstorageio):
   185      # Prepare mocks.
   186      blobstorageio_mock = mock.MagicMock()
   187      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   188          lambda pipeline_options: blobstorageio_mock
   189      # Issue file copy.
   190      _ = self.fs.create(
   191          'azfs://storageaccount/container/file1', 'application/octet-stream')
   192  
   193      blobstorageio_mock.open.assert_called_once_with(
   194          'azfs://storageaccount/container/file1',
   195          'wb',
   196          mime_type='application/octet-stream')
   197  
   198    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   199    def test_open(self, unused_mock_blobstorageio):
   200      # Prepare mocks.
   201      blobstorageio_mock = mock.MagicMock()
   202      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   203          lambda pipeline_options: blobstorageio_mock
   204      # Issue file copy.
   205      _ = self.fs.open(
   206          'azfs://storageaccount/container/file1', 'application/octet-stream')
   207  
   208      blobstorageio_mock.open.assert_called_once_with(
   209          'azfs://storageaccount/container/file1',
   210          'rb',
   211          mime_type='application/octet-stream')
   212  
   213    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   214    def test_copy_file(self, unused_mock_blobstorageio):
   215      # Prepare mocks.
   216      blobstorageio_mock = mock.MagicMock()
   217      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   218          lambda pipeline_options: blobstorageio_mock
   219      sources = [
   220          'azfs://storageaccount/container/from1',
   221          'azfs://storageaccount/container/from2',
   222      ]
   223      destinations = [
   224          'azfs://storageaccount/container/to1',
   225          'azfs://storageaccount/container/to2',
   226      ]
   227  
   228      # Issue file copy.
   229      self.fs.copy(sources, destinations)
   230  
   231      src_dest_pairs = list(zip(sources, destinations))
   232      blobstorageio_mock.copy_paths.assert_called_once_with(src_dest_pairs)
   233  
   234    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   235    def test_copy_file_error(self, unused_mock_blobstorageio):
   236      # Prepare mocks.
   237      blobstorageio_mock = mock.MagicMock()
   238      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   239          lambda pipeline_options: blobstorageio_mock
   240      sources = [
   241          'azfs://storageaccount/container/from1',
   242          'azfs://storageaccount/container/from2',
   243          'azfs://storageaccount/container/from3',
   244      ]
   245      destinations = [
   246          'azfs://storageaccount/container/to1',
   247          'azfs://storageaccount/container/to2',
   248      ]
   249  
   250      # Issue file copy.
   251      with self.assertRaises(BeamIOError):
   252        self.fs.copy(sources, destinations)
   253  
   254    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   255    def test_delete(self, unused_mock_blobstorageio):
   256      # Prepare mocks.
   257      blobstorageio_mock = mock.MagicMock()
   258      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   259          lambda pipeline_options: blobstorageio_mock
   260      blobstorageio_mock.size.return_value = 0
   261      files = [
   262          'azfs://storageaccount/container/from1',
   263          'azfs://storageaccount/container/from2',
   264          'azfs://storageaccount/container/from3',
   265      ]
   266      # Issue batch delete operation.
   267      self.fs.delete(files)
   268      blobstorageio_mock.delete_paths.assert_called_once_with(files)
   269  
   270    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   271    def test_delete_error(self, unused_mock_blobstorageio):
   272      # Prepare mocks.
   273      blobstorageio_mock = mock.MagicMock()
   274      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   275          lambda pipeline_options: blobstorageio_mock
   276      nonexistent_directory = 'azfs://storageaccount/nonexistent-container/tree/'
   277      exception = blobstorageio.BlobStorageError('Not found', 404)
   278  
   279      blobstorageio_mock.delete_paths.return_value = {
   280          nonexistent_directory: exception,
   281          'azfs://storageaccount/container/blob1': None,
   282          'azfs://storageaccount/container/blob2': None,
   283      }
   284  
   285      blobstorageio_mock.size.return_value = 0
   286      files = [
   287          nonexistent_directory,
   288          'azfs://storageaccount/container/blob1',
   289          'azfs://storageaccount/container/blob2',
   290      ]
   291      expected_results = {nonexistent_directory: exception}
   292  
   293      # Issue batch delete.
   294      with self.assertRaises(BeamIOError) as error:
   295        self.fs.delete(files)
   296  
   297      self.assertIn('Delete operation failed', str(error.exception))
   298      self.assertEqual(error.exception.exception_details, expected_results)
   299      blobstorageio_mock.delete_paths.assert_called()
   300  
   301    @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio')
   302    def test_rename(self, unused_mock_blobstorageio):
   303      # Prepare mocks.
   304      blobstorageio_mock = mock.MagicMock()
   305      blobstoragefilesystem.blobstorageio.BlobStorageIO = \
   306          lambda pipeline_options: blobstorageio_mock
   307  
   308      sources = [
   309          'azfs://storageaccount/container/original_blob1',
   310          'azfs://storageaccount/container/original_blob2',
   311      ]
   312      destinations = [
   313          'azfs://storageaccount/container/renamed_blob1',
   314          'azfs://storageaccount/container/renamed_blob2',
   315      ]
   316  
   317      # Issue bath rename.
   318      self.fs.rename(sources, destinations)
   319  
   320      src_dest_pairs = list(zip(sources, destinations))
   321      blobstorageio_mock.rename_files.assert_called_once_with(src_dest_pairs)
   322  
   323  
   324  if __name__ == '__main__':
   325    logging.getLogger().setLevel(logging.INFO)
   326    unittest.main()