github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/actions/lua/lakefs/catalogexport/table_extractor.lua (about) 1 local pathlib = require("path") 2 local strings = require("strings") 3 local yaml = require("encoding/yaml") 4 local utils = require("lakefs/catalogexport/internal") 5 6 local LAKEFS_TABLES_BASE = "_lakefs_tables/" 7 8 -- check if lakefs entry is a table spec under _lakefs_tables/ 9 local function is_table_obj(entry, tables_base) 10 if entry.path_type ~= "object" then 11 return false 12 end 13 local path = entry.path 14 if strings.has_prefix(path, tables_base) then 15 -- remove _lakefs_tables/ from path 16 path = entry.path:sub(#tables_base, #path) 17 end 18 return not pathlib.is_hidden(path) and strings.has_suffix(path, ".yaml") 19 end 20 21 -- list all YAML files under _lakefs_tables/* 22 local function list_table_descriptor_entries(client, repo_id, commit_id) 23 local table_entries = {} 24 local page_size = 30 25 local pager = utils.lakefs_object_pager(client, repo_id, commit_id, "", LAKEFS_TABLES_BASE,"", page_size) 26 for entries in pager do 27 for _, entry in ipairs(entries) do 28 if is_table_obj(entry, LAKEFS_TABLES_BASE) then 29 table.insert(table_entries, { 30 physical_address = entry.physical_address, 31 path = entry.path 32 }) 33 end 34 end 35 end 36 return table_entries 37 end 38 39 -- table as parsed YAML object 40 local function get_table_descriptor(client, repo_id, commit_id, logical_path) 41 local code, content = client.get_object(repo_id, commit_id, logical_path) 42 if code ~= 200 then 43 error("could not fetch data file: HTTP " .. tostring(code) .. " path: " .. logical_path) 44 end 45 local descriptor = yaml.unmarshal(content) 46 descriptor.partition_columns = descriptor.partition_columns or {} 47 return descriptor 48 end 49 50 return { 51 list_table_descriptor_entries = list_table_descriptor_entries, 52 get_table_descriptor = get_table_descriptor, 53 }