github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/python-wrapper/tests/integration/test_import.py (about)

     1  from time import sleep
     2  
     3  import pytest
     4  
     5  from lakefs.client import Client
     6  from lakefs.exceptions import ImportManagerException, ConflictException
     7  from tests.utests.common import expect_exception_context
     8  
     9  _IMPORT_PATH = "s3://esti-system-testing-data/import-test-data/"
    10  
    11  _FILES_TO_CHECK = ["nested/prefix-1/file002005",
    12                     "nested/prefix-2/file001894",
    13                     "nested/prefix-3/file000005",
    14                     "nested/prefix-4/file000645",
    15                     "nested/prefix-5/file001566",
    16                     "nested/prefix-6/file002011",
    17                     "nested/prefix-7/file000101", ]
    18  
    19  
    20  def skip_on_unsupported_blockstore(clt: Client, supported_blockstores: [str]):
    21      if clt.storage_config.blockstore_type not in supported_blockstores:
    22          pytest.skip(f"Unsupported blockstore type for test: {clt.storage_config.blockstore_type}")
    23  
    24  
    25  def test_import_manager(setup_repo):
    26      clt, repo = setup_repo
    27      skip_on_unsupported_blockstore(clt, "s3")
    28      branch = repo.branch("import-branch").create("main")
    29      mgr = branch.import_data(commit_message="my imported data", metadata={"foo": "bar"})
    30  
    31      #  No import running
    32      with expect_exception_context(ImportManagerException):
    33          mgr.cancel()
    34  
    35      # empty import
    36      res = mgr.run()
    37      assert res.error is None
    38      assert res.completed
    39      assert res.commit.id == branch.get_commit().id
    40      assert res.commit.message == "my imported data"
    41      assert res.commit.metadata.get("foo") == "bar"
    42      assert res.ingested_objects == 0
    43  
    44      # Expect failure trying to run manager twice
    45      with expect_exception_context(ImportManagerException):
    46          mgr.run()
    47  
    48      # Import with objects and prefixes
    49      mgr = branch.import_data()
    50      dest_prefix = "imported/new-prefix/"
    51      mgr.prefix(_IMPORT_PATH + "prefix-1/",
    52                 dest_prefix + "prefix-1/").prefix(_IMPORT_PATH + "prefix-2/",
    53                                                   dest_prefix + "prefix-2/")
    54      for o in _FILES_TO_CHECK:
    55          mgr.object(_IMPORT_PATH + o, dest_prefix + o)
    56      mgr.commit_message = "new commit"
    57      mgr.commit_metadata = None
    58      res = mgr.run()
    59  
    60      assert res.error is None
    61      assert res.completed
    62      assert res.commit.id == branch.get_commit().id
    63      assert res.commit.message == mgr.commit_message
    64      assert res.commit.metadata.get("foo") is None
    65      assert res.ingested_objects == 4207
    66  
    67      # Conflict since import completed
    68      with expect_exception_context(ConflictException):
    69          mgr.cancel()
    70  
    71  
    72  def test_import_manager_cancel(setup_repo):
    73      clt, repo = setup_repo
    74      skip_on_unsupported_blockstore(clt, "s3")
    75      branch = repo.branch("import-branch").create("main")
    76      expected_commit_id = branch.get_commit().id
    77      expected_commit_message = branch.get_commit().message
    78  
    79      mgr = branch.import_data(commit_message="my imported data", metadata={"foo": "bar"})
    80      mgr.prefix(_IMPORT_PATH, "import/")
    81  
    82      mgr.start()
    83      sleep(1)
    84  
    85      with expect_exception_context(ImportManagerException):
    86          mgr.start()
    87  
    88      mgr.cancel()
    89  
    90      status = mgr.status()
    91      assert branch.get_commit().id == expected_commit_id
    92      assert branch.get_commit().message == expected_commit_message
    93      assert not status.completed
    94      assert "Canceled" in status.error.message
    95      assert len(mgr.sources) == 1