github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/ingest/store/factory.go (about)

     1  package store
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net/url"
     8  
     9  	"cloud.google.com/go/storage"
    10  	"github.com/aws/aws-sdk-go-v2/aws"
    11  	awsconfig "github.com/aws/aws-sdk-go-v2/config"
    12  	awss3 "github.com/aws/aws-sdk-go-v2/service/s3"
    13  	"github.com/treeverse/lakefs/pkg/block"
    14  	"github.com/treeverse/lakefs/pkg/block/azure"
    15  	"github.com/treeverse/lakefs/pkg/block/factory"
    16  	"github.com/treeverse/lakefs/pkg/block/gs"
    17  	"github.com/treeverse/lakefs/pkg/block/local"
    18  	"github.com/treeverse/lakefs/pkg/block/params"
    19  	"github.com/treeverse/lakefs/pkg/block/s3"
    20  )
    21  
    22  var ErrNotSupported = errors.New("no storage adapter found")
    23  
    24  type WalkerOptions struct {
    25  	S3EndpointURL  string
    26  	StorageURI     string
    27  	SkipOutOfOrder bool
    28  }
    29  
    30  type WalkerWrapper struct {
    31  	walker block.Walker
    32  	uri    *url.URL
    33  }
    34  
    35  func NewWrapper(walker block.Walker, uri *url.URL) *WalkerWrapper {
    36  	return &WalkerWrapper{
    37  		walker: walker,
    38  		uri:    uri,
    39  	}
    40  }
    41  
    42  func (ww *WalkerWrapper) Walk(ctx context.Context, opts block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error {
    43  	return ww.walker.Walk(ctx, ww.uri, opts, walkFn)
    44  }
    45  
    46  func (ww *WalkerWrapper) Marker() block.Mark {
    47  	return ww.walker.Marker()
    48  }
    49  
    50  func (ww *WalkerWrapper) GetSkippedEntries() []block.ObjectStoreEntry {
    51  	return ww.walker.GetSkippedEntries()
    52  }
    53  
    54  type WalkerFactory struct {
    55  	params params.AdapterConfig
    56  }
    57  
    58  func NewFactory(params params.AdapterConfig) *WalkerFactory {
    59  	return &WalkerFactory{params: params}
    60  }
    61  
    62  func (f *WalkerFactory) buildS3Walker(opts WalkerOptions) (*s3.Walker, error) {
    63  	var client *awss3.Client
    64  	if f.params != nil {
    65  		s3params, err := f.params.BlockstoreS3Params()
    66  		if err != nil {
    67  			return nil, err
    68  		}
    69  		client, err = factory.BuildS3Client(context.Background(), s3params)
    70  		if err != nil {
    71  			return nil, err
    72  		}
    73  	} else {
    74  		var err error
    75  		client, err = getS3Client(opts.S3EndpointURL)
    76  		if err != nil {
    77  			return nil, err
    78  		}
    79  	}
    80  	return s3.NewS3Walker(client), nil
    81  }
    82  
    83  func (f *WalkerFactory) buildGCSWalker(ctx context.Context) (*gs.GCSWalker, error) {
    84  	var svc *storage.Client
    85  	if f.params != nil {
    86  		gsParams, err := f.params.BlockstoreGSParams()
    87  		if err != nil {
    88  			return nil, err
    89  		}
    90  		svc, err = factory.BuildGSClient(ctx, gsParams)
    91  		if err != nil {
    92  			return nil, err
    93  		}
    94  	} else {
    95  		var err error
    96  		svc, err = storage.NewClient(ctx)
    97  		if err != nil {
    98  			return nil, err
    99  		}
   100  	}
   101  	return gs.NewGCSWalker(svc), nil
   102  }
   103  
   104  func (f *WalkerFactory) buildAzureWalker(importURL *url.URL, skipOutOfOrder bool) (block.Walker, error) {
   105  	storageAccount, err := azure.ExtractStorageAccount(importURL)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	var azureParams params.Azure
   111  	if f.params != nil {
   112  		// server settings
   113  		azureParams, err = f.params.BlockstoreAzureParams()
   114  		if err != nil {
   115  			return nil, err
   116  		}
   117  	}
   118  
   119  	// Use StorageAccessKey to initialize the storage account client only if it was provided for this given storage account
   120  	// Otherwise fall back to the default credentials
   121  	if azureParams.StorageAccount != storageAccount {
   122  		azureParams.StorageAccount = storageAccount
   123  		azureParams.StorageAccessKey = ""
   124  	}
   125  	client, err := azure.BuildAzureServiceClient(azureParams)
   126  	if err != nil {
   127  		return nil, err
   128  	}
   129  
   130  	return azure.NewAzureDataLakeWalker(client, skipOutOfOrder)
   131  }
   132  
   133  func (f *WalkerFactory) GetWalker(ctx context.Context, opts WalkerOptions) (*WalkerWrapper, error) {
   134  	uri, err := url.Parse(opts.StorageURI)
   135  	if err != nil {
   136  		return nil, fmt.Errorf("could not parse storage URI %s: %w", uri, err)
   137  	}
   138  
   139  	var walker block.Walker
   140  	switch uri.Scheme {
   141  	case "s3":
   142  		walker, err = f.buildS3Walker(opts)
   143  		if err != nil {
   144  			return nil, fmt.Errorf("creating s3 walker: %w", err)
   145  		}
   146  	case "gs":
   147  		walker, err = f.buildGCSWalker(ctx)
   148  		if err != nil {
   149  			return nil, fmt.Errorf("creating gs walker: %w", err)
   150  		}
   151  	case "http", "https":
   152  		walker, err = f.buildAzureWalker(uri, opts.SkipOutOfOrder)
   153  		if err != nil {
   154  			return nil, fmt.Errorf("creating Azure walker: %w", err)
   155  		}
   156  	case "local":
   157  		walker, err = f.buildLocalWalker()
   158  		if err != nil {
   159  			return nil, fmt.Errorf("creating local walker: %w", err)
   160  		}
   161  	default:
   162  		return nil, fmt.Errorf("%w: for scheme: %s", ErrNotSupported, uri.Scheme)
   163  	}
   164  	return NewWrapper(walker, uri), nil
   165  }
   166  
   167  func (f *WalkerFactory) buildLocalWalker() (*local.Walker, error) {
   168  	var (
   169  		localParams params.Local
   170  		err         error
   171  	)
   172  
   173  	if f.params != nil {
   174  		localParams, err = f.params.BlockstoreLocalParams()
   175  		if err != nil {
   176  			return nil, err
   177  		}
   178  	}
   179  
   180  	return local.NewLocalWalker(localParams), nil
   181  }
   182  
   183  func getS3Client(s3EndpointURL string) (*awss3.Client, error) {
   184  	cfg, err := awsconfig.LoadDefaultConfig(context.Background())
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  	client := awss3.NewFromConfig(cfg, func(o *awss3.Options) {
   189  		if s3EndpointURL != "" {
   190  			o.BaseEndpoint = aws.String(s3EndpointURL)
   191  			o.Region = "us-east-1"
   192  			o.UsePathStyle = true
   193  		}
   194  	})
   195  	// TODO(barak): do we require SharedConfigState: session.SharedConfigEnable,
   196  	return client, nil
   197  }