github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/python-wrapper/tests/utests/test_object.py (about)

     1  import http
     2  from contextlib import contextmanager
     3  from typing import get_args
     4  import urllib3
     5  
     6  import pytest
     7  
     8  import lakefs_sdk.api
     9  
    10  from lakefs.object import ReadModes
    11  from tests.utests.common import get_test_client, expect_exception_context
    12  
    13  
    14  class ObjectTestKWArgs:
    15      def __init__(self) -> None:
    16          self.repository_id = "test_repo"
    17          self.reference_id = "test_reference"
    18          self.path = "test_path"
    19  
    20  
    21  class StorageTestConfig(lakefs_sdk.StorageConfig):
    22  
    23      def __init__(self) -> None:
    24          super().__init__(blockstore_type="s3",
    25                           blockstore_namespace_example="",
    26                           blockstore_namespace_ValidityRegex="",
    27                           pre_sign_support=True,
    28                           pre_sign_support_ui=False,
    29                           import_support=False,
    30                           import_validity_regex="")
    31  
    32  
    33  class ObjectTestStats(lakefs_sdk.ObjectStats):
    34      def __init__(self) -> None:
    35          super().__init__(path="",
    36                           path_type="object",
    37                           physical_address="",
    38                           checksum="",
    39                           mtime=0)
    40  
    41  
    42  class StagingTestLocation(lakefs_sdk.StagingLocation):
    43      def __init__(self) -> None:
    44          super().__init__(physical_address="physical_address")
    45  
    46  
    47  @contextmanager
    48  def readable_object_context(monkey, **kwargs):
    49      with monkey.context():
    50          from lakefs.object import StoredObject
    51          clt = get_test_client()
    52          conf = lakefs_sdk.Config(version_config=lakefs_sdk.VersionConfig(), storage_config=StorageTestConfig())
    53          monkey.setattr(clt, "_server_conf", conf)
    54          read_obj = StoredObject(client=clt, **kwargs)
    55          yield read_obj
    56  
    57  
    58  @contextmanager
    59  def writeable_object_context(monkey, **kwargs):
    60      with monkey.context():
    61          monkey.setattr(lakefs_sdk.api.BranchesApi, "get_branch", lambda *args: None)
    62          from lakefs.object import WriteableObject
    63          conf = lakefs_sdk.Config(version_config=lakefs_sdk.VersionConfig(), storage_config=StorageTestConfig())
    64          clt = get_test_client()
    65          monkey.setattr(clt, "_server_conf", conf)
    66          obj = WriteableObject(client=clt, **kwargs)
    67          yield obj
    68  
    69  
    70  class TestStoredObject:
    71      def test_exists(self, monkeypatch, tmp_path):
    72          test_kwargs = ObjectTestKWArgs()
    73          with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
    74              # Object exists
    75              monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "head_object", lambda *args: None)
    76              assert obj.exists()
    77              # Object doesn't exist
    78              monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "head_object",
    79                                  lambda *args: (_ for _ in ()).throw(lakefs_sdk.exceptions.NotFoundException(
    80                                      status=http.HTTPStatus.NOT_FOUND)))
    81              assert not obj.exists()
    82  
    83              # Other exception
    84              monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "head_object",
    85                                  lambda *args: 1 / 0)
    86              with expect_exception_context(ZeroDivisionError):
    87                  obj.exists()
    88  
    89  
    90  class TestObjectReader:
    91      def test_seek(self, monkeypatch, tmp_path):
    92          test_kwargs = ObjectTestKWArgs()
    93          with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
    94              with obj.reader() as fd:
    95                  assert fd.tell() == 0
    96                  fd.seek(30)
    97                  assert fd.tell() == 30
    98                  with expect_exception_context(OSError):
    99                      fd.seek(-1)
   100  
   101              # Create another reader
   102              with obj.reader() as fd:
   103                  assert fd.tell() == 0
   104  
   105      def test_fileno(self, monkeypatch, tmp_path):
   106          test_kwargs = ObjectTestKWArgs()
   107          with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   108              with obj.reader() as fd:
   109                  with expect_exception_context(OSError):
   110                      fd.fileno()
   111  
   112      @staticmethod
   113      def verify_reader(fd, patch_setattr, test_kwargs, data):
   114          object_stats = ObjectTestStats()
   115          object_stats.path = test_kwargs.path
   116          object_stats.size_bytes = len(data)
   117          patch_setattr(lakefs_sdk.api.ObjectsApi, "stat_object", lambda *args: object_stats)
   118  
   119          # read negative
   120          with expect_exception_context(OSError):
   121              fd.read(-1)
   122  
   123          # Read whole file
   124          start_pos = 0
   125          end_pos = ""
   126  
   127          def monkey_get_object(_, repository, ref, path, range, presign, **__):  # pylint: disable=W0622
   128              assert repository == test_kwargs.repository_id
   129              assert ref == test_kwargs.reference_id
   130              assert path == test_kwargs.path
   131              assert presign
   132  
   133              if isinstance(end_pos, int):
   134                  return data[start_pos:end_pos]
   135              return data[start_pos:]
   136  
   137          patch_setattr(lakefs_sdk.api.ObjectsApi, "get_object", monkey_get_object)
   138          assert fd.read() == data
   139          assert fd.tell() == object_stats.size_bytes
   140  
   141          # Test reading from middle
   142          start_pos = 132
   143          fd.seek(start_pos)
   144          read_size = 456
   145          end_pos = start_pos + read_size - 1
   146          fd.read(read_size)
   147          assert fd.tell() == start_pos + read_size - 1
   148  
   149          # Read more than file size
   150          start_pos = fd.tell()
   151          read_size = 2 * object_stats.size_bytes
   152          end_pos = start_pos + 2 * object_stats.size_bytes - 1
   153          fd.read(read_size)
   154          assert fd.tell() == object_stats.size_bytes
   155  
   156      def test_read_by_context(self, monkeypatch, tmp_path):
   157          test_kwargs = ObjectTestKWArgs()
   158          with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   159              data = b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82\n" * 100
   160              with obj.reader(mode="rb") as fd:
   161                  self.verify_reader(fd, monkeypatch.setattr, test_kwargs, data)
   162  
   163      def test_read_by_calling_reader(self, monkeypatch, tmp_path):
   164          test_kwargs = ObjectTestKWArgs()
   165          with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   166              data = b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82\n" * 100
   167              fd = obj.reader(mode="rb")
   168              self.verify_reader(fd, monkeypatch.setattr, test_kwargs, data)
   169              fd.close()
   170              assert fd.closed
   171  
   172      @pytest.mark.parametrize("mode", [*get_args(ReadModes)])
   173      def test_read_modes(self, monkeypatch, tmp_path, mode):
   174          test_kwargs = ObjectTestKWArgs()
   175          data = b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82"
   176          with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   177              with obj.reader(mode=mode) as fd:
   178                  object_stats = ObjectTestStats()
   179                  object_stats.path = test_kwargs.path
   180                  object_stats.size_bytes = len(data)
   181                  monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "stat_object", lambda *args: object_stats)
   182  
   183                  # Read whole file
   184                  start_pos = 0
   185  
   186                  def monkey_get_object(_, repository, ref, path, range, presign, **__):  # pylint: disable=W0622
   187                      assert repository == test_kwargs.repository_id
   188                      assert ref == test_kwargs.reference_id
   189                      assert path == test_kwargs.path
   190                      assert range is None
   191                      assert presign
   192                      return b"test \xcf\x84o\xcf\x81\xce\xbdo\xcf\x82"
   193  
   194                  monkeypatch.setattr(lakefs_sdk.api.ObjectsApi, "get_object", monkey_get_object)
   195                  res = fd.read()
   196                  if 'b' not in mode:
   197                      assert res == data.decode('utf-8')
   198                  else:
   199                      assert res == data
   200  
   201                  assert fd.tell() == start_pos + object_stats.size_bytes
   202  
   203      def test_read_invalid_mode(self, monkeypatch, tmp_path):
   204          test_kwargs = ObjectTestKWArgs()
   205          with readable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   206              with expect_exception_context(ValueError):
   207                  with obj.reader(mode="invalid"):
   208                      pass
   209  
   210  
   211  class TestWriteableObject:
   212      def test_upload(self, monkeypatch, tmp_path):
   213          test_kwargs = ObjectTestKWArgs()
   214          with writeable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   215              staging_location = StagingTestLocation()
   216              monkeypatch.setattr(lakefs_sdk.api.StagingApi, "get_physical_address", lambda *args: staging_location)
   217              monkeypatch.setattr(urllib3.PoolManager, "request",
   218                                  lambda *args, **kwargs: urllib3.response.HTTPResponse(status=201))
   219  
   220              def monkey_link_physical_address(*_, staging_metadata: lakefs_sdk.StagingMetadata, **__):
   221                  assert staging_metadata.size_bytes == len(data)
   222                  assert staging_metadata.staging == staging_location
   223                  return lakefs_sdk.ObjectStats(path=obj.path,
   224                                                path_type="object",
   225                                                physical_address=staging_location.physical_address,
   226                                                checksum="",
   227                                                mtime=12345)
   228  
   229              monkeypatch.setattr(lakefs_sdk.api.StagingApi, "link_physical_address", monkey_link_physical_address)
   230              # Test string
   231              data = "test_data"
   232              obj.upload(data=data)
   233  
   234      def test_upload_invalid_mode(self, monkeypatch, tmp_path):
   235          test_kwargs = ObjectTestKWArgs()
   236          with writeable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   237              with expect_exception_context(ValueError):
   238                  obj.upload(data="", mode="invalid")
   239  
   240  
   241  class TestObjectWriter:
   242      def test_fileno(self, monkeypatch, tmp_path):
   243          test_kwargs = ObjectTestKWArgs()
   244          with writeable_object_context(monkeypatch, **test_kwargs.__dict__) as obj:
   245              with obj.reader() as fd:
   246                  with expect_exception_context(OSError):
   247                      fd.fileno()