github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/azure/blobstoragefilesystem_test.py (about) 1 # -*- coding: utf-8 -*- 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 """Unit tests for Azure Blob Storage File System.""" 20 21 # pytype: skip-file 22 23 import logging 24 import unittest 25 26 import mock 27 28 from apache_beam.io.filesystem import BeamIOError 29 from apache_beam.io.filesystem import FileMetadata 30 from apache_beam.options.pipeline_options import PipelineOptions 31 32 # Protect against environments where azure library is not available. 33 # pylint: disable=wrong-import-order, wrong-import-position 34 try: 35 from apache_beam.io.azure import blobstorageio 36 from apache_beam.io.azure import blobstoragefilesystem 37 except ImportError: 38 blobstoragefilesystem = None # type: ignore[assignment] 39 # pylint: enable=wrong-import-order, wrong-import-position 40 41 42 @unittest.skipIf( 43 blobstoragefilesystem is None, 'Azure dependencies are not installed') 44 class BlobStorageFileSystemTest(unittest.TestCase): 45 def setUp(self): 46 pipeline_options = PipelineOptions() 47 self.fs = blobstoragefilesystem.BlobStorageFileSystem( 48 pipeline_options=pipeline_options) 49 50 def test_scheme(self): 51 self.assertEqual(self.fs.scheme(), 'azfs') 52 self.assertEqual( 53 blobstoragefilesystem.BlobStorageFileSystem.scheme(), 'azfs') 54 55 def test_join(self): 56 self.assertEqual( 57 'azfs://account-name/container/path/to/file', 58 self.fs.join('azfs://account-name/container/path', 'to', 'file')) 59 self.assertEqual( 60 'azfs://account-name/container/path/to/file', 61 self.fs.join('azfs://account-name/container/path', 'to/file')) 62 self.assertEqual( 63 'azfs://account-name/container/path/to/file', 64 self.fs.join('azfs://account-name/container/path', '/to/file')) 65 self.assertEqual( 66 'azfs://account-name/container/path/to/file', 67 self.fs.join('azfs://account-name/container/path', 'to', 'file')) 68 self.assertEqual( 69 'azfs://account-name/container/path/to/file', 70 self.fs.join('azfs://account-name/container/path', 'to/file')) 71 self.assertEqual( 72 'azfs://account-name/container/path/to/file', 73 self.fs.join('azfs://account-name/container/path', '/to/file')) 74 with self.assertRaises(ValueError): 75 self.fs.join('account-name/container/path', '/to/file') 76 77 def test_split(self): 78 self.assertEqual(('azfs://foo/bar', 'baz'), 79 self.fs.split('azfs://foo/bar/baz')) 80 self.assertEqual(('azfs://foo', ''), self.fs.split('azfs://foo/')) 81 self.assertEqual(('azfs://foo', ''), self.fs.split('azfs://foo')) 82 83 with self.assertRaises(ValueError): 84 self.fs.split('/no/azfs/prefix') 85 86 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 87 def test_match_single(self, unused_mock_blobstorageio): 88 # Prepare mocks. 89 blobstorageio_mock = mock.MagicMock() 90 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 91 lambda pipeline_options: blobstorageio_mock 92 blobstorageio_mock.exists.return_value = True 93 blobstorageio_mock._status.return_value = { 94 'size': 1, 'last_updated': 99999.0 95 } 96 expected_results = [ 97 FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0) 98 ] 99 match_result = self.fs.match(['azfs://storageaccount/container/file1'])[0] 100 101 self.assertEqual(match_result.metadata_list, expected_results) 102 blobstorageio_mock._status.assert_called_once_with( 103 'azfs://storageaccount/container/file1') 104 105 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 106 def test_match_multiples(self, unused_mock_blobstorageio): 107 # Prepare mocks. 108 blobstorageio_mock = mock.MagicMock() 109 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 110 lambda pipeline_options: blobstorageio_mock 111 blobstorageio_mock.list_files.return_value = iter([ 112 ('azfs://storageaccount/container/file1', (1, 99999.0)), 113 ('azfs://storageaccount/container/file2', (2, 88888.0)) 114 ]) 115 expected_results = set([ 116 FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0), 117 FileMetadata('azfs://storageaccount/container/file2', 2, 88888.0), 118 ]) 119 match_result = self.fs.match(['azfs://storageaccount/container/'])[0] 120 121 self.assertEqual(set(match_result.metadata_list), expected_results) 122 blobstorageio_mock.list_files.assert_called_once_with( 123 'azfs://storageaccount/container/', with_metadata=True) 124 125 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 126 def test_match_multiples_limit(self, unused_mock_blobstorageio): 127 # Prepare mocks. 128 blobstorageio_mock = mock.MagicMock() 129 limit = 1 130 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 131 lambda pipeline_options: blobstorageio_mock 132 blobstorageio_mock.list_files.return_value = iter([ 133 ('azfs://storageaccount/container/file1', (1, 99999.0)) 134 ]) 135 expected_results = set( 136 [FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0)]) 137 match_result = self.fs.match(['azfs://storageaccount/container/'], 138 [limit])[0] 139 self.assertEqual(set(match_result.metadata_list), expected_results) 140 self.assertEqual(len(match_result.metadata_list), limit) 141 blobstorageio_mock.list_files.assert_called_once_with( 142 'azfs://storageaccount/container/', with_metadata=True) 143 144 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 145 def test_match_multiples_error(self, unused_mock_blobstorageio): 146 # Prepare mocks. 147 blobstorageio_mock = mock.MagicMock() 148 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 149 lambda pipeline_options: blobstorageio_mock 150 exception = IOError('Failed') 151 blobstorageio_mock.list_files.side_effect = exception 152 153 with self.assertRaisesRegex(BeamIOError, 154 r'^Match operation failed') as error: 155 self.fs.match(['azfs://storageaccount/container/']) 156 157 self.assertRegex( 158 str(error.exception.exception_details), 159 r'azfs://storageaccount/container/.*%s' % exception) 160 blobstorageio_mock.list_files.assert_called_once_with( 161 'azfs://storageaccount/container/', with_metadata=True) 162 163 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 164 def test_match_multiple_patterns(self, unused_mock_blobstorageio): 165 # Prepare mocks. 166 blobstorageio_mock = mock.MagicMock() 167 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 168 lambda pipeline_options: blobstorageio_mock 169 blobstorageio_mock.list_files.side_effect = [ 170 iter([('azfs://storageaccount/container/file1', (1, 99999.0))]), 171 iter([('azfs://storageaccount/container/file2', (2, 88888.0))]), 172 ] 173 expected_results = [ 174 [FileMetadata('azfs://storageaccount/container/file1', 1, 99999.0)], 175 [FileMetadata('azfs://storageaccount/container/file2', 2, 88888.0)] 176 ] 177 result = self.fs.match([ 178 'azfs://storageaccount/container/file1*', 179 'azfs://storageaccount/container/file2*' 180 ]) 181 self.assertEqual([mr.metadata_list for mr in result], expected_results) 182 183 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 184 def test_create(self, unused_mock_blobstorageio): 185 # Prepare mocks. 186 blobstorageio_mock = mock.MagicMock() 187 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 188 lambda pipeline_options: blobstorageio_mock 189 # Issue file copy. 190 _ = self.fs.create( 191 'azfs://storageaccount/container/file1', 'application/octet-stream') 192 193 blobstorageio_mock.open.assert_called_once_with( 194 'azfs://storageaccount/container/file1', 195 'wb', 196 mime_type='application/octet-stream') 197 198 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 199 def test_open(self, unused_mock_blobstorageio): 200 # Prepare mocks. 201 blobstorageio_mock = mock.MagicMock() 202 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 203 lambda pipeline_options: blobstorageio_mock 204 # Issue file copy. 205 _ = self.fs.open( 206 'azfs://storageaccount/container/file1', 'application/octet-stream') 207 208 blobstorageio_mock.open.assert_called_once_with( 209 'azfs://storageaccount/container/file1', 210 'rb', 211 mime_type='application/octet-stream') 212 213 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 214 def test_copy_file(self, unused_mock_blobstorageio): 215 # Prepare mocks. 216 blobstorageio_mock = mock.MagicMock() 217 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 218 lambda pipeline_options: blobstorageio_mock 219 sources = [ 220 'azfs://storageaccount/container/from1', 221 'azfs://storageaccount/container/from2', 222 ] 223 destinations = [ 224 'azfs://storageaccount/container/to1', 225 'azfs://storageaccount/container/to2', 226 ] 227 228 # Issue file copy. 229 self.fs.copy(sources, destinations) 230 231 src_dest_pairs = list(zip(sources, destinations)) 232 blobstorageio_mock.copy_paths.assert_called_once_with(src_dest_pairs) 233 234 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 235 def test_copy_file_error(self, unused_mock_blobstorageio): 236 # Prepare mocks. 237 blobstorageio_mock = mock.MagicMock() 238 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 239 lambda pipeline_options: blobstorageio_mock 240 sources = [ 241 'azfs://storageaccount/container/from1', 242 'azfs://storageaccount/container/from2', 243 'azfs://storageaccount/container/from3', 244 ] 245 destinations = [ 246 'azfs://storageaccount/container/to1', 247 'azfs://storageaccount/container/to2', 248 ] 249 250 # Issue file copy. 251 with self.assertRaises(BeamIOError): 252 self.fs.copy(sources, destinations) 253 254 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 255 def test_delete(self, unused_mock_blobstorageio): 256 # Prepare mocks. 257 blobstorageio_mock = mock.MagicMock() 258 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 259 lambda pipeline_options: blobstorageio_mock 260 blobstorageio_mock.size.return_value = 0 261 files = [ 262 'azfs://storageaccount/container/from1', 263 'azfs://storageaccount/container/from2', 264 'azfs://storageaccount/container/from3', 265 ] 266 # Issue batch delete operation. 267 self.fs.delete(files) 268 blobstorageio_mock.delete_paths.assert_called_once_with(files) 269 270 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 271 def test_delete_error(self, unused_mock_blobstorageio): 272 # Prepare mocks. 273 blobstorageio_mock = mock.MagicMock() 274 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 275 lambda pipeline_options: blobstorageio_mock 276 nonexistent_directory = 'azfs://storageaccount/nonexistent-container/tree/' 277 exception = blobstorageio.BlobStorageError('Not found', 404) 278 279 blobstorageio_mock.delete_paths.return_value = { 280 nonexistent_directory: exception, 281 'azfs://storageaccount/container/blob1': None, 282 'azfs://storageaccount/container/blob2': None, 283 } 284 285 blobstorageio_mock.size.return_value = 0 286 files = [ 287 nonexistent_directory, 288 'azfs://storageaccount/container/blob1', 289 'azfs://storageaccount/container/blob2', 290 ] 291 expected_results = {nonexistent_directory: exception} 292 293 # Issue batch delete. 294 with self.assertRaises(BeamIOError) as error: 295 self.fs.delete(files) 296 297 self.assertIn('Delete operation failed', str(error.exception)) 298 self.assertEqual(error.exception.exception_details, expected_results) 299 blobstorageio_mock.delete_paths.assert_called() 300 301 @mock.patch('apache_beam.io.azure.blobstoragefilesystem.blobstorageio') 302 def test_rename(self, unused_mock_blobstorageio): 303 # Prepare mocks. 304 blobstorageio_mock = mock.MagicMock() 305 blobstoragefilesystem.blobstorageio.BlobStorageIO = \ 306 lambda pipeline_options: blobstorageio_mock 307 308 sources = [ 309 'azfs://storageaccount/container/original_blob1', 310 'azfs://storageaccount/container/original_blob2', 311 ] 312 destinations = [ 313 'azfs://storageaccount/container/renamed_blob1', 314 'azfs://storageaccount/container/renamed_blob2', 315 ] 316 317 # Issue bath rename. 318 self.fs.rename(sources, destinations) 319 320 src_dest_pairs = list(zip(sources, destinations)) 321 blobstorageio_mock.rename_files.assert_called_once_with(src_dest_pairs) 322 323 324 if __name__ == '__main__': 325 logging.getLogger().setLevel(logging.INFO) 326 unittest.main()