github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/actions/lua/lakefs/catalogexport/unity_exporter.lua (about) 1 --[[ TABLE SPECIFICATION: _lakefs_tables/<table path> 2 name: <table name> 3 type: delta 4 catalog: <catalog name> 5 ]] 6 local strings = require("strings") 7 local pathlib = require("path") 8 local lakefs = require("lakefs") 9 local extractor = require("lakefs/catalogexport/table_extractor") 10 --[[ 11 - table_descriptors_path: the path under which the table descriptors reside (e.g. "_lakefs_tables"). 12 It's necessary that every <table path> in the provided `table_paths` will have a complementary 13 `<table_descriptors_path>/<table path>.yaml` file describing the used Delta Table. 14 - delta_table_paths: a mapping of Delta Lake table descriptors yaml name (with or without ".yaml" extension) to their locations in the object storage 15 { <delta table name yaml>: <physical location in the object storage> } 16 - databricks_client: a client to interact with databricks. 17 - warehouse_id: Databricks warehouse ID 18 19 Returns a "<table name>: status" map for registration of provided tables. 20 ]] 21 local function register_tables(action, table_descriptors_path, delta_table_details, databricks_client, warehouse_id) 22 local repo = action.repository_id 23 local commit_id = action.commit_id 24 if not commit_id then 25 error("missing commit id") 26 end 27 local branch_id = action.branch_id 28 local response = {} 29 for table_name_yaml, table_details in pairs(delta_table_details) do 30 local tny = table_name_yaml 31 if not strings.has_suffix(tny, ".yaml") then 32 tny = tny .. ".yaml" 33 end 34 local table_src_path = pathlib.join("/", table_descriptors_path, tny) 35 local table_descriptor = extractor.get_table_descriptor(lakefs, repo, commit_id, table_src_path) 36 local table_name = table_descriptor.name 37 if not table_name then 38 error("table name is required to proceed with unity catalog export") 39 end 40 if table_descriptor.type ~= "delta" then 41 error("unity exporter supports only table descriptors of type 'delta'. registration failed for table " .. table_name) 42 end 43 local catalog = table_descriptor.catalog 44 if not catalog then 45 error("catalog name is required to proceed with unity catalog export") 46 end 47 local get_schema_if_exists = true 48 local schema_name = databricks_client.create_schema(branch_id, catalog, get_schema_if_exists) 49 if not schema_name then 50 error("failed creating/getting catalog's schema: " .. catalog .. "." .. branch_id) 51 end 52 local physical_path = table_details.path 53 local table_metadata = table_details.metadata 54 local status = databricks_client.register_external_table(table_name, physical_path, warehouse_id, catalog, schema_name, table_metadata) 55 response[table_name_yaml] = status 56 end 57 return response 58 end 59 60 61 return { 62 register_tables = register_tables, 63 }