github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/actions/lua/formats/delta.go (about) 1 package formats 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net/url" 8 "regexp" 9 10 "github.com/Shopify/go-lua" 11 "github.com/aws/aws-sdk-go-v2/aws" 12 delta "github.com/csimplestring/delta-go" 13 "github.com/csimplestring/delta-go/action" 14 "github.com/csimplestring/delta-go/storage" 15 deltaStore "github.com/csimplestring/delta-go/store" 16 luautil "github.com/treeverse/lakefs/pkg/actions/lua/util" 17 ) 18 19 type storageType string 20 21 const ( 22 s3StorageType storageType = "s3" 23 ) 24 25 var errUnimplementedProvided = errors.New("unimplemented provider") 26 27 type DeltaClient struct { 28 accessProvider AccessProvider 29 ctx context.Context 30 } 31 32 func newDeltaTableMetadata(meta *action.Metadata) map[string]any { 33 return map[string]any{ 34 "description": meta.Description, 35 "id": meta.ID, 36 "name": meta.Name, 37 "schema_string": meta.SchemaString, 38 "partition_columns": meta.PartitionColumns, 39 "configuration": meta.Configuration, 40 "created_time": *meta.CreatedTime, 41 } 42 } 43 44 func (dc *DeltaClient) fetchS3Table(repo, ref, prefix string, awsProps *storage.AWSProperties) (map[int64][]string, map[string]any, error) { 45 table, err := dc.getS3DeltaTable(repo, ref, prefix, awsProps) 46 if err != nil { 47 return nil, nil, err 48 } 49 log, err := dc.buildLog(table) 50 if err != nil { 51 return nil, nil, err 52 } 53 meta, err := dc.getTableMetadata(table) 54 if err != nil { 55 return nil, nil, err 56 } 57 return log, meta, nil 58 } 59 60 func (dc *DeltaClient) getTableMetadata(log delta.Log) (map[string]any, error) { 61 s, err := log.Snapshot() 62 if err != nil { 63 return nil, err 64 } 65 m, err := s.Metadata() 66 if err != nil { 67 return nil, err 68 } 69 return newDeltaTableMetadata(m), nil 70 } 71 72 func (dc *DeltaClient) getS3DeltaTable(repo, ref, prefix string, awsProps *storage.AWSProperties) (delta.Log, error) { 73 config := delta.Config{StoreType: string(s3StorageType)} 74 u := fmt.Sprintf("lakefs://%s/%s/%s", repo, ref, prefix) 75 parsedURL, err := url.Parse(u) 76 if err != nil { 77 return nil, err 78 } 79 s3LogStore, err := deltaStore.NewS3CompatLogStore(awsProps, parsedURL) 80 if err != nil { 81 return nil, err 82 } 83 store := deltaStore.Store(s3LogStore) 84 return delta.ForTableWithStore(u, config, &delta.SystemClock{}, &store) 85 } 86 87 func (dc *DeltaClient) buildLog(table delta.Log) (map[int64][]string, error) { 88 s, err := table.Snapshot() 89 if err != nil { 90 return nil, err 91 } 92 version, err := s.EarliestVersion() 93 if err != nil { 94 return nil, err 95 } 96 versionLog, err := table.Changes(version, false) 97 if err != nil { 98 return nil, err 99 } 100 101 entries := make(map[int64][]string) 102 for entry, err := versionLog.Next(); err == nil; entry, err = versionLog.Next() { 103 strLog := make([]string, 0) 104 entryVersion := entry.Version() 105 actions, aErr := entry.Actions() 106 if aErr != nil { 107 return nil, aErr 108 } 109 for _, a := range actions { 110 aj, _ := a.Json() 111 strLog = append(strLog, aj) 112 } 113 entries[entryVersion] = strLog 114 } 115 return entries, nil 116 } 117 118 func (dc *DeltaClient) fetchTableLog(repo, ref, prefix string) (map[int64][]string, map[string]any, error) { 119 ap, _ := dc.accessProvider.GetAccessProperties() 120 switch access := ap.(type) { 121 case AWSInfo: 122 return dc.fetchS3Table(repo, ref, prefix, &access.AWSProps) 123 default: 124 return nil, nil, errUnimplementedProvided 125 } 126 } 127 128 func getTable(client *DeltaClient) lua.Function { 129 return func(l *lua.State) int { 130 repo := lua.CheckString(l, 1) 131 ref := lua.CheckString(l, 2) 132 prefix := lua.CheckString(l, 3) 133 tableLog, metadata, err := client.fetchTableLog(repo, ref, prefix) 134 if err != nil { 135 lua.Errorf(l, "%s", err.Error()) 136 panic("failed fetching table log") 137 } 138 luautil.DeepPush(l, tableLog) 139 luautil.DeepPush(l, metadata) 140 return 2 141 } 142 } 143 144 var functions = map[string]func(client *DeltaClient) lua.Function{ 145 "get_table": getTable, 146 } 147 148 // AccessProvider is used to provide different expected access properties to different storage providers 149 type AccessProvider interface { 150 GetAccessProperties() (interface{}, error) 151 } 152 153 type AWSInfo struct { 154 AWSProps storage.AWSProperties 155 } 156 157 func (awsI AWSInfo) GetAccessProperties() (interface{}, error) { 158 return awsI, nil 159 } 160 161 // newDelta is a factory function to create server/cloud specific Delta Lake client 162 // lakeFSAddr is the domain or "authority:port" of the running lakeFS server 163 func newDelta(ctx context.Context, lakeFSAddr string) lua.Function { 164 if regexp.MustCompile(`^:\d+`).MatchString(lakeFSAddr) { 165 // workaround in case we listen on all interfaces without specifying ip 166 lakeFSAddr = fmt.Sprintf("localhost%s", lakeFSAddr) 167 } 168 lakeFSAddr = fmt.Sprintf("http://%s", lakeFSAddr) 169 return func(l *lua.State) int { 170 client := newS3DeltaClient(l, ctx, lakeFSAddr) 171 l.NewTable() 172 for name, goFn := range functions { 173 l.PushGoFunction(goFn(client)) 174 l.SetField(-2, name) 175 } 176 return 1 177 } 178 } 179 180 func newS3DeltaClient(l *lua.State, ctx context.Context, lakeFSAddr string) *DeltaClient { 181 accessKeyID := lua.CheckString(l, 1) 182 secretAccessKey := lua.CheckString(l, 2) 183 awsProps := storage.AWSProperties{ 184 ForcePathStyle: true, 185 CredsProvider: aws.CredentialsProviderFunc(func(context.Context) (aws.Credentials, error) { 186 return aws.Credentials{ 187 AccessKeyID: accessKeyID, 188 SecretAccessKey: secretAccessKey, 189 }, nil 190 }), 191 Endpoint: lakeFSAddr, 192 } 193 if !l.IsNone(3) { 194 awsProps.Region = lua.CheckString(l, 3) 195 } 196 197 storage.RegisterS3CompatBucketURLOpener("lakefs", &awsProps) 198 199 return &DeltaClient{accessProvider: AWSInfo{AWSProps: awsProps}, ctx: ctx} 200 }