github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/aws/clients/s3/client_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  # pytype: skip-file
    18  
    19  import logging
    20  import os
    21  import unittest
    22  
    23  from apache_beam.io.aws import s3io
    24  from apache_beam.io.aws.clients.s3 import fake_client
    25  from apache_beam.io.aws.clients.s3 import messages
    26  from apache_beam.options import pipeline_options
    27  
    28  
    29  class ClientErrorTest(unittest.TestCase):
    30    def setUp(self):
    31  
    32      # These tests can be run locally against a mock S3 client, or as integration
    33      # tests against the real S3 client.
    34      self.USE_MOCK = True
    35  
    36      # If you're running integration tests with S3, set this variable to be an
    37      # s3 path that you have access to where test data can be written. If you're
    38      # just running tests against the mock, this can be any s3 path. It should
    39      # end with a '/'.
    40      self.TEST_DATA_PATH = 's3://random-data-sets/beam_tests/'
    41  
    42      self.test_bucket, self.test_path = s3io.parse_s3_path(self.TEST_DATA_PATH)
    43  
    44      if self.USE_MOCK:
    45        self.client = fake_client.FakeS3Client()
    46        test_data_bucket, _ = s3io.parse_s3_path(self.TEST_DATA_PATH)
    47        self.client.known_buckets.add(test_data_bucket)
    48        self.aws = s3io.S3IO(self.client)
    49      else:
    50        self.aws = s3io.S3IO(options=pipeline_options.S3Options())
    51  
    52    def test_get_object_metadata(self):
    53  
    54      # Test nonexistent object
    55      object = self.test_path + 'nonexistent_file_doesnt_exist'
    56      request = messages.GetRequest(self.test_bucket, object)
    57      self.assertRaises(
    58          messages.S3ClientError, self.client.get_object_metadata, request)
    59  
    60      try:
    61        self.client.get_object_metadata(request)
    62      except Exception as e:
    63        self.assertIsInstance(e, messages.S3ClientError)
    64        self.assertEqual(e.code, 404)
    65  
    66    def test_get_range_nonexistent(self):
    67  
    68      # Test nonexistent object
    69      object = self.test_path + 'nonexistent_file_doesnt_exist'
    70      request = messages.GetRequest(self.test_bucket, object)
    71      self.assertRaises(
    72          messages.S3ClientError, self.client.get_range, request, 0, 10)
    73  
    74      try:
    75        self.client.get_range(request, 0, 10)
    76      except Exception as e:
    77        self.assertIsInstance(e, messages.S3ClientError)
    78        self.assertEqual(e.code, 404)
    79  
    80    def test_get_range_bad_start_end(self):
    81  
    82      file_name = self.TEST_DATA_PATH + 'get_range'
    83      contents = os.urandom(1024)
    84  
    85      with self.aws.open(file_name, 'w') as f:
    86        f.write(contents)
    87      bucket, object = s3io.parse_s3_path(file_name)
    88  
    89      response = self.client.get_range(
    90          messages.GetRequest(bucket, object), -10, 20)
    91      self.assertEqual(response, contents)
    92  
    93      response = self.client.get_range(
    94          messages.GetRequest(bucket, object), 20, 10)
    95      self.assertEqual(response, contents)
    96  
    97      # Clean up
    98      self.aws.delete(file_name)
    99  
   100    def test_upload_part_nonexistent_upload_id(self):
   101  
   102      object = self.test_path + 'upload_part'
   103      upload_id = 'not-an-id-12345'
   104      part_number = 1
   105      contents = os.urandom(1024)
   106  
   107      request = messages.UploadPartRequest(
   108          self.test_bucket, object, upload_id, part_number, contents)
   109  
   110      self.assertRaises(messages.S3ClientError, self.client.upload_part, request)
   111  
   112      try:
   113        self.client.upload_part(request)
   114      except Exception as e:
   115        self.assertIsInstance(e, messages.S3ClientError)
   116        self.assertEqual(e.code, 404)
   117  
   118    def test_copy_nonexistent(self):
   119  
   120      src_key = self.test_path + 'not_a_real_file_does_not_exist'
   121      dest_key = self.test_path + 'destination_file_location'
   122  
   123      request = messages.CopyRequest(
   124          self.test_bucket, src_key, self.test_bucket, dest_key)
   125  
   126      with self.assertRaises(messages.S3ClientError) as e:
   127        self.client.copy(request)
   128  
   129      self.assertEqual(e.exception.code, 404)
   130  
   131    def test_upload_part_bad_number(self):
   132  
   133      object = self.test_path + 'upload_part'
   134      contents = os.urandom(1024)
   135  
   136      request = messages.UploadRequest(self.test_bucket, object, None)
   137      response = self.client.create_multipart_upload(request)
   138      upload_id = response.upload_id
   139  
   140      part_number = 0.5
   141      request = messages.UploadPartRequest(
   142          self.test_bucket, object, upload_id, part_number, contents)
   143  
   144      self.assertRaises(messages.S3ClientError, self.client.upload_part, request)
   145  
   146      try:
   147        response = self.client.upload_part(request)
   148      except Exception as e:
   149        self.assertIsInstance(e, messages.S3ClientError)
   150        self.assertEqual(e.code, 400)
   151  
   152    def test_complete_multipart_upload_too_small(self):
   153  
   154      object = self.test_path + 'upload_part'
   155      request = messages.UploadRequest(self.test_bucket, object, None)
   156      response = self.client.create_multipart_upload(request)
   157      upload_id = response.upload_id
   158  
   159      part_number = 1
   160      contents_1 = os.urandom(1024)
   161      request_1 = messages.UploadPartRequest(
   162          self.test_bucket, object, upload_id, part_number, contents_1)
   163      response_1 = self.client.upload_part(request_1)
   164  
   165      part_number = 2
   166      contents_2 = os.urandom(1024)
   167      request_2 = messages.UploadPartRequest(
   168          self.test_bucket, object, upload_id, part_number, contents_2)
   169      response_2 = self.client.upload_part(request_2)
   170  
   171      parts = [{
   172          'PartNumber': 1, 'ETag': response_1.etag
   173      }, {
   174          'PartNumber': 2, 'ETag': response_2.etag
   175      }]
   176      complete_request = messages.CompleteMultipartUploadRequest(
   177          self.test_bucket, object, upload_id, parts)
   178  
   179      try:
   180        self.client.complete_multipart_upload(complete_request)
   181      except Exception as e:
   182        self.assertIsInstance(e, messages.S3ClientError)
   183        self.assertEqual(e.code, 400)
   184  
   185    def test_complete_multipart_upload_too_many(self):
   186  
   187      object = self.test_path + 'upload_part'
   188      request = messages.UploadRequest(self.test_bucket, object, None)
   189      response = self.client.create_multipart_upload(request)
   190      upload_id = response.upload_id
   191  
   192      part_number = 1
   193      contents_1 = os.urandom(5 * 1024)
   194      request_1 = messages.UploadPartRequest(
   195          self.test_bucket, object, upload_id, part_number, contents_1)
   196      response_1 = self.client.upload_part(request_1)
   197  
   198      part_number = 2
   199      contents_2 = os.urandom(1024)
   200      request_2 = messages.UploadPartRequest(
   201          self.test_bucket, object, upload_id, part_number, contents_2)
   202      response_2 = self.client.upload_part(request_2)
   203  
   204      parts = [
   205          {
   206              'PartNumber': 1, 'ETag': response_1.etag
   207          },
   208          {
   209              'PartNumber': 2, 'ETag': response_2.etag
   210          },
   211          {
   212              'PartNumber': 3, 'ETag': 'fake-etag'
   213          },
   214      ]
   215      complete_request = messages.CompleteMultipartUploadRequest(
   216          self.test_bucket, object, upload_id, parts)
   217  
   218      try:
   219        self.client.complete_multipart_upload(complete_request)
   220      except Exception as e:
   221        self.assertIsInstance(e, messages.S3ClientError)
   222        self.assertEqual(e.code, 400)
   223  
   224  
   225  if __name__ == '__main__':
   226    logging.getLogger().setLevel(logging.INFO)
   227    unittest.main()