github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/python-wrapper/tests/integration/test_import.py (about) 1 from time import sleep 2 3 import pytest 4 5 from lakefs.client import Client 6 from lakefs.exceptions import ImportManagerException, ConflictException 7 from tests.utests.common import expect_exception_context 8 9 _IMPORT_PATH = "s3://esti-system-testing-data/import-test-data/" 10 11 _FILES_TO_CHECK = ["nested/prefix-1/file002005", 12 "nested/prefix-2/file001894", 13 "nested/prefix-3/file000005", 14 "nested/prefix-4/file000645", 15 "nested/prefix-5/file001566", 16 "nested/prefix-6/file002011", 17 "nested/prefix-7/file000101", ] 18 19 20 def skip_on_unsupported_blockstore(clt: Client, supported_blockstores: [str]): 21 if clt.storage_config.blockstore_type not in supported_blockstores: 22 pytest.skip(f"Unsupported blockstore type for test: {clt.storage_config.blockstore_type}") 23 24 25 def test_import_manager(setup_repo): 26 clt, repo = setup_repo 27 skip_on_unsupported_blockstore(clt, "s3") 28 branch = repo.branch("import-branch").create("main") 29 mgr = branch.import_data(commit_message="my imported data", metadata={"foo": "bar"}) 30 31 # No import running 32 with expect_exception_context(ImportManagerException): 33 mgr.cancel() 34 35 # empty import 36 res = mgr.run() 37 assert res.error is None 38 assert res.completed 39 assert res.commit.id == branch.get_commit().id 40 assert res.commit.message == "my imported data" 41 assert res.commit.metadata.get("foo") == "bar" 42 assert res.ingested_objects == 0 43 44 # Expect failure trying to run manager twice 45 with expect_exception_context(ImportManagerException): 46 mgr.run() 47 48 # Import with objects and prefixes 49 mgr = branch.import_data() 50 dest_prefix = "imported/new-prefix/" 51 mgr.prefix(_IMPORT_PATH + "prefix-1/", 52 dest_prefix + "prefix-1/").prefix(_IMPORT_PATH + "prefix-2/", 53 dest_prefix + "prefix-2/") 54 for o in _FILES_TO_CHECK: 55 mgr.object(_IMPORT_PATH + o, dest_prefix + o) 56 mgr.commit_message = "new commit" 57 mgr.commit_metadata = None 58 res = mgr.run() 59 60 assert res.error is None 61 assert res.completed 62 assert res.commit.id == branch.get_commit().id 63 assert res.commit.message == mgr.commit_message 64 assert res.commit.metadata.get("foo") is None 65 assert res.ingested_objects == 4207 66 67 # Conflict since import completed 68 with expect_exception_context(ConflictException): 69 mgr.cancel() 70 71 72 def test_import_manager_cancel(setup_repo): 73 clt, repo = setup_repo 74 skip_on_unsupported_blockstore(clt, "s3") 75 branch = repo.branch("import-branch").create("main") 76 expected_commit_id = branch.get_commit().id 77 expected_commit_message = branch.get_commit().message 78 79 mgr = branch.import_data(commit_message="my imported data", metadata={"foo": "bar"}) 80 mgr.prefix(_IMPORT_PATH, "import/") 81 82 mgr.start() 83 sleep(1) 84 85 with expect_exception_context(ImportManagerException): 86 mgr.start() 87 88 mgr.cancel() 89 90 status = mgr.status() 91 assert branch.get_commit().id == expected_commit_id 92 assert branch.get_commit().message == expected_commit_message 93 assert not status.completed 94 assert "Canceled" in status.error.message 95 assert len(mgr.sources) == 1