github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/aws/clients/s3/client_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 # pytype: skip-file 18 19 import logging 20 import os 21 import unittest 22 23 from apache_beam.io.aws import s3io 24 from apache_beam.io.aws.clients.s3 import fake_client 25 from apache_beam.io.aws.clients.s3 import messages 26 from apache_beam.options import pipeline_options 27 28 29 class ClientErrorTest(unittest.TestCase): 30 def setUp(self): 31 32 # These tests can be run locally against a mock S3 client, or as integration 33 # tests against the real S3 client. 34 self.USE_MOCK = True 35 36 # If you're running integration tests with S3, set this variable to be an 37 # s3 path that you have access to where test data can be written. If you're 38 # just running tests against the mock, this can be any s3 path. It should 39 # end with a '/'. 40 self.TEST_DATA_PATH = 's3://random-data-sets/beam_tests/' 41 42 self.test_bucket, self.test_path = s3io.parse_s3_path(self.TEST_DATA_PATH) 43 44 if self.USE_MOCK: 45 self.client = fake_client.FakeS3Client() 46 test_data_bucket, _ = s3io.parse_s3_path(self.TEST_DATA_PATH) 47 self.client.known_buckets.add(test_data_bucket) 48 self.aws = s3io.S3IO(self.client) 49 else: 50 self.aws = s3io.S3IO(options=pipeline_options.S3Options()) 51 52 def test_get_object_metadata(self): 53 54 # Test nonexistent object 55 object = self.test_path + 'nonexistent_file_doesnt_exist' 56 request = messages.GetRequest(self.test_bucket, object) 57 self.assertRaises( 58 messages.S3ClientError, self.client.get_object_metadata, request) 59 60 try: 61 self.client.get_object_metadata(request) 62 except Exception as e: 63 self.assertIsInstance(e, messages.S3ClientError) 64 self.assertEqual(e.code, 404) 65 66 def test_get_range_nonexistent(self): 67 68 # Test nonexistent object 69 object = self.test_path + 'nonexistent_file_doesnt_exist' 70 request = messages.GetRequest(self.test_bucket, object) 71 self.assertRaises( 72 messages.S3ClientError, self.client.get_range, request, 0, 10) 73 74 try: 75 self.client.get_range(request, 0, 10) 76 except Exception as e: 77 self.assertIsInstance(e, messages.S3ClientError) 78 self.assertEqual(e.code, 404) 79 80 def test_get_range_bad_start_end(self): 81 82 file_name = self.TEST_DATA_PATH + 'get_range' 83 contents = os.urandom(1024) 84 85 with self.aws.open(file_name, 'w') as f: 86 f.write(contents) 87 bucket, object = s3io.parse_s3_path(file_name) 88 89 response = self.client.get_range( 90 messages.GetRequest(bucket, object), -10, 20) 91 self.assertEqual(response, contents) 92 93 response = self.client.get_range( 94 messages.GetRequest(bucket, object), 20, 10) 95 self.assertEqual(response, contents) 96 97 # Clean up 98 self.aws.delete(file_name) 99 100 def test_upload_part_nonexistent_upload_id(self): 101 102 object = self.test_path + 'upload_part' 103 upload_id = 'not-an-id-12345' 104 part_number = 1 105 contents = os.urandom(1024) 106 107 request = messages.UploadPartRequest( 108 self.test_bucket, object, upload_id, part_number, contents) 109 110 self.assertRaises(messages.S3ClientError, self.client.upload_part, request) 111 112 try: 113 self.client.upload_part(request) 114 except Exception as e: 115 self.assertIsInstance(e, messages.S3ClientError) 116 self.assertEqual(e.code, 404) 117 118 def test_copy_nonexistent(self): 119 120 src_key = self.test_path + 'not_a_real_file_does_not_exist' 121 dest_key = self.test_path + 'destination_file_location' 122 123 request = messages.CopyRequest( 124 self.test_bucket, src_key, self.test_bucket, dest_key) 125 126 with self.assertRaises(messages.S3ClientError) as e: 127 self.client.copy(request) 128 129 self.assertEqual(e.exception.code, 404) 130 131 def test_upload_part_bad_number(self): 132 133 object = self.test_path + 'upload_part' 134 contents = os.urandom(1024) 135 136 request = messages.UploadRequest(self.test_bucket, object, None) 137 response = self.client.create_multipart_upload(request) 138 upload_id = response.upload_id 139 140 part_number = 0.5 141 request = messages.UploadPartRequest( 142 self.test_bucket, object, upload_id, part_number, contents) 143 144 self.assertRaises(messages.S3ClientError, self.client.upload_part, request) 145 146 try: 147 response = self.client.upload_part(request) 148 except Exception as e: 149 self.assertIsInstance(e, messages.S3ClientError) 150 self.assertEqual(e.code, 400) 151 152 def test_complete_multipart_upload_too_small(self): 153 154 object = self.test_path + 'upload_part' 155 request = messages.UploadRequest(self.test_bucket, object, None) 156 response = self.client.create_multipart_upload(request) 157 upload_id = response.upload_id 158 159 part_number = 1 160 contents_1 = os.urandom(1024) 161 request_1 = messages.UploadPartRequest( 162 self.test_bucket, object, upload_id, part_number, contents_1) 163 response_1 = self.client.upload_part(request_1) 164 165 part_number = 2 166 contents_2 = os.urandom(1024) 167 request_2 = messages.UploadPartRequest( 168 self.test_bucket, object, upload_id, part_number, contents_2) 169 response_2 = self.client.upload_part(request_2) 170 171 parts = [{ 172 'PartNumber': 1, 'ETag': response_1.etag 173 }, { 174 'PartNumber': 2, 'ETag': response_2.etag 175 }] 176 complete_request = messages.CompleteMultipartUploadRequest( 177 self.test_bucket, object, upload_id, parts) 178 179 try: 180 self.client.complete_multipart_upload(complete_request) 181 except Exception as e: 182 self.assertIsInstance(e, messages.S3ClientError) 183 self.assertEqual(e.code, 400) 184 185 def test_complete_multipart_upload_too_many(self): 186 187 object = self.test_path + 'upload_part' 188 request = messages.UploadRequest(self.test_bucket, object, None) 189 response = self.client.create_multipart_upload(request) 190 upload_id = response.upload_id 191 192 part_number = 1 193 contents_1 = os.urandom(5 * 1024) 194 request_1 = messages.UploadPartRequest( 195 self.test_bucket, object, upload_id, part_number, contents_1) 196 response_1 = self.client.upload_part(request_1) 197 198 part_number = 2 199 contents_2 = os.urandom(1024) 200 request_2 = messages.UploadPartRequest( 201 self.test_bucket, object, upload_id, part_number, contents_2) 202 response_2 = self.client.upload_part(request_2) 203 204 parts = [ 205 { 206 'PartNumber': 1, 'ETag': response_1.etag 207 }, 208 { 209 'PartNumber': 2, 'ETag': response_2.etag 210 }, 211 { 212 'PartNumber': 3, 'ETag': 'fake-etag' 213 }, 214 ] 215 complete_request = messages.CompleteMultipartUploadRequest( 216 self.test_bucket, object, upload_id, parts) 217 218 try: 219 self.client.complete_multipart_upload(complete_request) 220 except Exception as e: 221 self.assertIsInstance(e, messages.S3ClientError) 222 self.assertEqual(e.code, 400) 223 224 225 if __name__ == '__main__': 226 logging.getLogger().setLevel(logging.INFO) 227 unittest.main()