github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/ingest/store/factory.go (about) 1 package store 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net/url" 8 9 "cloud.google.com/go/storage" 10 "github.com/aws/aws-sdk-go-v2/aws" 11 awsconfig "github.com/aws/aws-sdk-go-v2/config" 12 awss3 "github.com/aws/aws-sdk-go-v2/service/s3" 13 "github.com/treeverse/lakefs/pkg/block" 14 "github.com/treeverse/lakefs/pkg/block/azure" 15 "github.com/treeverse/lakefs/pkg/block/factory" 16 "github.com/treeverse/lakefs/pkg/block/gs" 17 "github.com/treeverse/lakefs/pkg/block/local" 18 "github.com/treeverse/lakefs/pkg/block/params" 19 "github.com/treeverse/lakefs/pkg/block/s3" 20 ) 21 22 var ErrNotSupported = errors.New("no storage adapter found") 23 24 type WalkerOptions struct { 25 S3EndpointURL string 26 StorageURI string 27 SkipOutOfOrder bool 28 } 29 30 type WalkerWrapper struct { 31 walker block.Walker 32 uri *url.URL 33 } 34 35 func NewWrapper(walker block.Walker, uri *url.URL) *WalkerWrapper { 36 return &WalkerWrapper{ 37 walker: walker, 38 uri: uri, 39 } 40 } 41 42 func (ww *WalkerWrapper) Walk(ctx context.Context, opts block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error { 43 return ww.walker.Walk(ctx, ww.uri, opts, walkFn) 44 } 45 46 func (ww *WalkerWrapper) Marker() block.Mark { 47 return ww.walker.Marker() 48 } 49 50 func (ww *WalkerWrapper) GetSkippedEntries() []block.ObjectStoreEntry { 51 return ww.walker.GetSkippedEntries() 52 } 53 54 type WalkerFactory struct { 55 params params.AdapterConfig 56 } 57 58 func NewFactory(params params.AdapterConfig) *WalkerFactory { 59 return &WalkerFactory{params: params} 60 } 61 62 func (f *WalkerFactory) buildS3Walker(opts WalkerOptions) (*s3.Walker, error) { 63 var client *awss3.Client 64 if f.params != nil { 65 s3params, err := f.params.BlockstoreS3Params() 66 if err != nil { 67 return nil, err 68 } 69 client, err = factory.BuildS3Client(context.Background(), s3params) 70 if err != nil { 71 return nil, err 72 } 73 } else { 74 var err error 75 client, err = getS3Client(opts.S3EndpointURL) 76 if err != nil { 77 return nil, err 78 } 79 } 80 return s3.NewS3Walker(client), nil 81 } 82 83 func (f *WalkerFactory) buildGCSWalker(ctx context.Context) (*gs.GCSWalker, error) { 84 var svc *storage.Client 85 if f.params != nil { 86 gsParams, err := f.params.BlockstoreGSParams() 87 if err != nil { 88 return nil, err 89 } 90 svc, err = factory.BuildGSClient(ctx, gsParams) 91 if err != nil { 92 return nil, err 93 } 94 } else { 95 var err error 96 svc, err = storage.NewClient(ctx) 97 if err != nil { 98 return nil, err 99 } 100 } 101 return gs.NewGCSWalker(svc), nil 102 } 103 104 func (f *WalkerFactory) buildAzureWalker(importURL *url.URL, skipOutOfOrder bool) (block.Walker, error) { 105 storageAccount, err := azure.ExtractStorageAccount(importURL) 106 if err != nil { 107 return nil, err 108 } 109 110 var azureParams params.Azure 111 if f.params != nil { 112 // server settings 113 azureParams, err = f.params.BlockstoreAzureParams() 114 if err != nil { 115 return nil, err 116 } 117 } 118 119 // Use StorageAccessKey to initialize the storage account client only if it was provided for this given storage account 120 // Otherwise fall back to the default credentials 121 if azureParams.StorageAccount != storageAccount { 122 azureParams.StorageAccount = storageAccount 123 azureParams.StorageAccessKey = "" 124 } 125 client, err := azure.BuildAzureServiceClient(azureParams) 126 if err != nil { 127 return nil, err 128 } 129 130 return azure.NewAzureDataLakeWalker(client, skipOutOfOrder) 131 } 132 133 func (f *WalkerFactory) GetWalker(ctx context.Context, opts WalkerOptions) (*WalkerWrapper, error) { 134 uri, err := url.Parse(opts.StorageURI) 135 if err != nil { 136 return nil, fmt.Errorf("could not parse storage URI %s: %w", uri, err) 137 } 138 139 var walker block.Walker 140 switch uri.Scheme { 141 case "s3": 142 walker, err = f.buildS3Walker(opts) 143 if err != nil { 144 return nil, fmt.Errorf("creating s3 walker: %w", err) 145 } 146 case "gs": 147 walker, err = f.buildGCSWalker(ctx) 148 if err != nil { 149 return nil, fmt.Errorf("creating gs walker: %w", err) 150 } 151 case "http", "https": 152 walker, err = f.buildAzureWalker(uri, opts.SkipOutOfOrder) 153 if err != nil { 154 return nil, fmt.Errorf("creating Azure walker: %w", err) 155 } 156 case "local": 157 walker, err = f.buildLocalWalker() 158 if err != nil { 159 return nil, fmt.Errorf("creating local walker: %w", err) 160 } 161 default: 162 return nil, fmt.Errorf("%w: for scheme: %s", ErrNotSupported, uri.Scheme) 163 } 164 return NewWrapper(walker, uri), nil 165 } 166 167 func (f *WalkerFactory) buildLocalWalker() (*local.Walker, error) { 168 var ( 169 localParams params.Local 170 err error 171 ) 172 173 if f.params != nil { 174 localParams, err = f.params.BlockstoreLocalParams() 175 if err != nil { 176 return nil, err 177 } 178 } 179 180 return local.NewLocalWalker(localParams), nil 181 } 182 183 func getS3Client(s3EndpointURL string) (*awss3.Client, error) { 184 cfg, err := awsconfig.LoadDefaultConfig(context.Background()) 185 if err != nil { 186 return nil, err 187 } 188 client := awss3.NewFromConfig(cfg, func(o *awss3.Options) { 189 if s3EndpointURL != "" { 190 o.BaseEndpoint = aws.String(s3EndpointURL) 191 o.Region = "us-east-1" 192 o.UsePathStyle = true 193 } 194 }) 195 // TODO(barak): do we require SharedConfigState: session.SharedConfigEnable, 196 return client, nil 197 }