github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/python-wrapper/tests/utests/test_object.py (about) 1 import http 2 from contextlib import contextmanager 3 from typing import get_args 4 import urllib3 5 6 import pytest 7 8 import lakefs_sdk.api 9 10 from lakefs.object import ReadModes 11 from tests.utests.common import get_test_client, expect_exception_context 12 13 14 class ObjectTestKWArgs: 15 def __init__(self) -> None: 16 self.repository_id = "test_repo" 17 self.reference_id = "test_reference" 18 self.path = "test_path" 19 20 21 class StorageTestConfig(lakefs_sdk.StorageConfig): 22 23 def __init__(self) -> None: 24 super().__init__(blockstore_type="s3", 25 blockstore_namespace_example="", 26 blockstore_namespace_ValidityRegex="", 27 pre_sign_support=True, 28 pre_sign_support_ui=False, 29 import_support=False, 30 import_validity_regex="") 31 32 33 class ObjectTestStats(lakefs_sdk.ObjectStats): 34 def __init__(self) -> None: 35 super().__init__(path="", 36 path_type="object", 37 physical_address="", 38 checksum="", 39 mtime=0) 40 41 42 class StagingTestLocation(lakefs_sdk.StagingLocation): 43 def __init__(self) -> None: 44 super().__init__(physical_address="physical_address") 45 46 47 @contextmanager 48 def readable_object_context(monkey, **kwargs): 49 with monkey.context(): 50 from lakefs.object import StoredObject 51 clt = get_test_client() 52 conf = lakefs_sdk.Config(version_config=lakefs_sdk.VersionConfig(), storage_config=StorageTestConfig()) 53 monkey.setattr(clt, "_server_conf", conf) 54 read_obj = StoredObject(client=clt, **kwargs) 55 yield read_obj 56 57 58 @contextmanager 59 def writeable_object_context(monkey, **kwargs): 60 with monkey.context(): 61 monkey.setattr(lakefs_sdk.api.BranchesApi, "get_branch", lambda *args: None) 62 from lakefs.object import WriteableObject 63 conf = lakefs_sdk.Config(version_config=lakefs_sdk.VersionConfig(), storage_config=StorageTestConfig()) 64 clt = get_test_client() 65 monkey.setattr(clt, "_server_conf", conf) 66 obj = WriteableObject(client=clt, **kwargs) 67 yield obj 68 69 70 class TestStoredObject: 71 def test_exists(self, monkeypatch, tmp_path): 72 test_kwargs = ObjectTestKWArgs() 73 with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 74 # Object exists 75 monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "head_object", lambda *args: None) 76 assert obj.exists() 77 # Object doesn't exist 78 monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "head_object", 79 lambda *args: (_ for _ in ()).throw(lakefs_sdk.exceptions.NotFoundException( 80 status=http.HTTPStatus.NOT_FOUND))) 81 assert not obj.exists() 82 83 # Other exception 84 monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "head_object", 85 lambda *args: 1 / 0) 86 with expect_exception_context(ZeroDivisionError): 87 obj.exists() 88 89 90 class TestObjectReader: 91 def test_seek(self, monkeypatch, tmp_path): 92 test_kwargs = ObjectTestKWArgs() 93 with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 94 with obj.reader() as fd: 95 assert fd.tell() == 0 96 fd.seek(30) 97 assert fd.tell() == 30 98 with expect_exception_context(OSError): 99 fd.seek(-1) 100 101 # Create another reader 102 with obj.reader() as fd: 103 assert fd.tell() == 0 104 105 def test_fileno(self, monkeypatch, tmp_path): 106 test_kwargs = ObjectTestKWArgs() 107 with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 108 with obj.reader() as fd: 109 with expect_exception_context(OSError): 110 fd.fileno() 111 112 @staticmethod 113 def verify_reader(fd, patch_setattr, test_kwargs, data): 114 object_stats = ObjectTestStats() 115 object_stats.path = test_kwargs.path 116 object_stats.size_bytes = len(data) 117 patch_setattr(lakefs_sdk.api.ObjectsApi, "stat_object", lambda *args: object_stats) 118 119 # read negative 120 with expect_exception_context(OSError): 121 fd.read(-1) 122 123 # Read whole file 124 start_pos = 0 125 end_pos = "" 126 127 def monkey_get_object(_, repository, ref, path, range, presign, **__): # pylint: disable=W0622 128 assert repository == test_kwargs.repository_id 129 assert ref == test_kwargs.reference_id 130 assert path == test_kwargs.path 131 assert presign 132 133 if isinstance(end_pos, int): 134 return data[start_pos:end_pos] 135 return data[start_pos:] 136 137 patch_setattr(lakefs_sdk.api.ObjectsApi, "get_object", monkey_get_object) 138 assert fd.read() == data 139 assert fd.tell() == object_stats.size_bytes 140 141 # Test reading from middle 142 start_pos = 132 143 fd.seek(start_pos) 144 read_size = 456 145 end_pos = start_pos + read_size - 1 146 fd.read(read_size) 147 assert fd.tell() == start_pos + read_size - 1 148 149 # Read more than file size 150 start_pos = fd.tell() 151 read_size = 2 * object_stats.size_bytes 152 end_pos = start_pos + 2 * object_stats.size_bytes - 1 153 fd.read(read_size) 154 assert fd.tell() == object_stats.size_bytes 155 156 def test_read_by_context(self, monkeypatch, tmp_path): 157 test_kwargs = ObjectTestKWArgs() 158 with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 159 data = b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82\n" * 100 160 with obj.reader(mode="rb") as fd: 161 self.verify_reader(fd, monkeypatch.setattr, test_kwargs, data) 162 163 def test_read_by_calling_reader(self, monkeypatch, tmp_path): 164 test_kwargs = ObjectTestKWArgs() 165 with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 166 data = b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82\n" * 100 167 fd = obj.reader(mode="rb") 168 self.verify_reader(fd, monkeypatch.setattr, test_kwargs, data) 169 fd.close() 170 assert fd.closed 171 172 @pytest.mark.parametrize("mode", [*get_args(ReadModes)]) 173 def test_read_modes(self, monkeypatch, tmp_path, mode): 174 test_kwargs = ObjectTestKWArgs() 175 data = b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82" 176 with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 177 with obj.reader(mode=mode) as fd: 178 object_stats = ObjectTestStats() 179 object_stats.path = test_kwargs.path 180 object_stats.size_bytes = len(data) 181 monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "stat_object", lambda *args: object_stats) 182 183 # Read whole file 184 start_pos = 0 185 186 def monkey_get_object(_, repository, ref, path, range, presign, **__): # pylint: disable=W0622 187 assert repository == test_kwargs.repository_id 188 assert ref == test_kwargs.reference_id 189 assert path == test_kwargs.path 190 assert range is None 191 assert presign 192 return b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82" 193 194 monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "get_object", monkey_get_object) 195 res = fd.read() 196 if 'b' not in mode: 197 assert res == data.decode('utf-8') 198 else: 199 assert res == data 200 201 assert fd.tell() == start_pos + object_stats.size_bytes 202 203 def test_read_invalid_mode(self, monkeypatch, tmp_path): 204 test_kwargs = ObjectTestKWArgs() 205 with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 206 with expect_exception_context(ValueError): 207 with obj.reader(mode="invalid"): 208 pass 209 210 211 class TestWriteableObject: 212 def test_upload(self, monkeypatch, tmp_path): 213 test_kwargs = ObjectTestKWArgs() 214 with writeable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 215 staging_location = StagingTestLocation() 216 monkeypatch.setattr(lakefs_sdk.api.StagingApi, "get_physical_address", lambda *args: staging_location) 217 monkeypatch.setattr(urllib3.PoolManager, "request", 218 lambda *args, **kwargs: urllib3.response.HTTPResponse(status=201)) 219 220 def monkey_link_physical_address(*_, staging_metadata: lakefs_sdk.StagingMetadata, **__): 221 assert staging_metadata.size_bytes == len(data) 222 assert staging_metadata.staging == staging_location 223 return lakefs_sdk.ObjectStats(path=obj.path, 224 path_type="object", 225 physical_address=staging_location.physical_address, 226 checksum="", 227 mtime=12345) 228 229 monkeypatch.setattr(lakefs_sdk.api.StagingApi, "link_physical_address", monkey_link_physical_address) 230 # Test string 231 data = "test_data" 232 obj.upload(data=data) 233 234 def test_upload_invalid_mode(self, monkeypatch, tmp_path): 235 test_kwargs = ObjectTestKWArgs() 236 with writeable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 237 with expect_exception_context(ValueError): 238 obj.upload(data="", mode="invalid") 239 240 241 class TestObjectWriter: 242 def test_fileno(self, monkeypatch, tmp_path): 243 test_kwargs = ObjectTestKWArgs() 244 with writeable_object_context(monkeypatch, **test_kwargs.__dict__) as obj: 245 with obj.reader() as fd: 246 with expect_exception_context(OSError): 247 fd.fileno()