github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/actions/lua/lakefs/catalogexport/unity_exporter.lua (about)

     1  --[[ TABLE SPECIFICATION:   _lakefs_tables/<table path>
     2  name: <table name>
     3  type: delta
     4  catalog: <catalog name>
     5  ]]
     6  local strings = require("strings")
     7  local pathlib = require("path")
     8  local lakefs = require("lakefs")
     9  local extractor = require("lakefs/catalogexport/table_extractor")
    10  --[[
    11      - table_descriptors_path: the path under which the table descriptors reside (e.g. "_lakefs_tables").
    12        It's necessary that every <table path> in the provided `table_paths` will have a complementary
    13        `<table_descriptors_path>/<table path>.yaml` file describing the used Delta Table.
    14      - delta_table_paths: a mapping of Delta Lake table descriptors yaml name (with or without ".yaml" extension) to their locations in the object storage
    15          { <delta table name yaml>: <physical location in the object storage> }
    16      - databricks_client: a client to interact with databricks.
    17      - warehouse_id: Databricks warehouse ID
    18  
    19      Returns a "<table name>: status" map for registration of provided tables.
    20  ]]
    21  local function register_tables(action, table_descriptors_path, delta_table_details, databricks_client, warehouse_id)
    22      local repo = action.repository_id
    23      local commit_id = action.commit_id
    24      if not commit_id then
    25          error("missing commit id")
    26      end
    27      local branch_id = action.branch_id
    28      local response = {}
    29      for table_name_yaml, table_details in pairs(delta_table_details) do
    30          local tny  = table_name_yaml
    31          if not strings.has_suffix(tny, ".yaml") then
    32              tny = tny .. ".yaml"
    33          end
    34          local table_src_path = pathlib.join("/", table_descriptors_path, tny)
    35          local table_descriptor = extractor.get_table_descriptor(lakefs, repo, commit_id, table_src_path)
    36          local table_name = table_descriptor.name
    37          if not table_name then
    38              error("table name is required to proceed with unity catalog export")
    39          end
    40          if table_descriptor.type ~= "delta" then
    41              error("unity exporter supports only table descriptors of type 'delta'. registration failed for table " .. table_name)
    42          end
    43          local catalog = table_descriptor.catalog
    44          if not catalog then
    45              error("catalog name is required to proceed with unity catalog export")
    46          end
    47          local get_schema_if_exists = true
    48          local schema_name = databricks_client.create_schema(branch_id, catalog, get_schema_if_exists)
    49          if not schema_name then
    50              error("failed creating/getting catalog's schema: " .. catalog .. "." .. branch_id)
    51          end
    52          local physical_path = table_details.path
    53          local table_metadata = table_details.metadata
    54          local status = databricks_client.register_external_table(table_name, physical_path, warehouse_id, catalog, schema_name, table_metadata)
    55          response[table_name_yaml] = status
    56      end
    57      return response
    58  end
    59  
    60  
    61  return {
    62      register_tables = register_tables,
    63  }