github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/actions/lua/lakefs/catalogexport/symlink_exporter.lua (about) 1 local extractor = require("lakefs/catalogexport/table_extractor") 2 local hive = require("lakefs/catalogexport/hive") 3 local utils = require("lakefs/catalogexport/internal") 4 local pathlib = require("path") 5 local strings = require("strings") 6 local lakefs = require("lakefs") 7 8 --[[ 9 @repo_id: repository id 10 @commit_id: commit id of the current table 11 @table_src_path: path to table spec (i.e _lakefs_tables/my_table.yaml) 12 @options: 13 - skip_trim_obj_base_path(boolean) if true will skip removing the prefix path before the partition path. 14 ]] 15 local function export_it(repo_id, commit_id, table_src_path, options) 16 local opts = options or {} 17 local descriptor = extractor.get_table_descriptor(lakefs, repo_id, commit_id, table_src_path) 18 if descriptor.type ~= "hive" then 19 error("table " .. descriptor.type .. " in path " .. table_src_path .. " not supported") 20 end 21 if opts.debug then 22 print(string.format('%s table `lakefs://%s/%s/%s`', descriptor.type, repo_id, utils.short_digest(commit_id), 23 descriptor.path)) 24 end 25 local base_path = descriptor.path 26 local cols = descriptor.partition_columns 27 local pager = hive.extract_partition_pager(lakefs, repo_id, commit_id, base_path, cols) 28 return function() 29 local part_key, entries = pager() 30 if part_key == nil then 31 return nil 32 end 33 local symlink_data = "" 34 for _, entry in ipairs(entries) do 35 symlink_data = symlink_data .. entry.physical_address .. "\n" 36 end 37 -- create key suffix for symlink file 38 local storage_key_suffix = part_key 39 if #descriptor.partition_columns == 0 then 40 storage_key_suffix = descriptor.name .. "/" .. "symlink.txt" 41 else 42 if not opts.skip_trim_obj_base_path then 43 storage_key_suffix = strings.replace(part_key, base_path .. "/", "", 1) -- remove base_path prefix from partition path 44 end 45 -- append to partition path to suffix 46 storage_key_suffix = pathlib.join("/", descriptor.name, storage_key_suffix, "symlink.txt") 47 end 48 return { 49 key_suffix = storage_key_suffix, 50 data = symlink_data 51 } 52 end 53 end 54 55 --[[ 56 export a Symlinks that represent a table to S3 57 @s3_client: configured client 58 @table_src_path: object path to the table spec (_lakefs_tables/my_table.yaml) 59 @action_info: the global action object 60 @options: 61 - debug(boolean) 62 - export_base_uri(string): override the prefix in S3 i.e s3://other-bucket/path/ 63 - writer(function(bucket, key, data)): if passed then will not use s3 client, helpful for debug 64 ]] 65 local function export_s3(s3_client, table_src_path, action_info, options) 66 local opts = options or {} 67 local repo_id = action_info.repository_id 68 local commit_id = action_info.commit_id 69 local base_prefix = opts.export_base_uri or action_info.storage_namespace 70 local export_base_uri = utils.get_storage_uri_prefix(base_prefix, commit_id, action_info) 71 local location = utils.parse_storage_uri(export_base_uri) 72 local put_object = opts.writer or s3_client.put_object 73 local it = export_it(repo_id, commit_id, table_src_path, opts) 74 for symlink in it do 75 local key = pathlib.join("/", location.key, symlink.key_suffix) 76 if opts.debug then 77 print("S3 writing bucket: " .. location.bucket .. " key: " .. key) 78 end 79 put_object(location.bucket, key, symlink.data) 80 end 81 return { 82 location = location 83 } 84 end 85 86 return { 87 export_s3 = export_s3, 88 }