github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/cloud/s3_storage.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package cloud
    12  
    13  import (
    14  	"context"
    15  	"io"
    16  	"net/url"
    17  	"path"
    18  	"strings"
    19  
    20  	"github.com/aws/aws-sdk-go/aws"
    21  	"github.com/aws/aws-sdk-go/aws/session"
    22  	"github.com/aws/aws-sdk-go/service/s3"
    23  	"github.com/aws/aws-sdk-go/service/s3/s3manager"
    24  	"github.com/cockroachdb/cockroach/pkg/base"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    27  	"github.com/cockroachdb/cockroach/pkg/util/contextutil"
    28  	"github.com/cockroachdb/errors"
    29  )
    30  
    31  type s3Storage struct {
    32  	bucket   *string
    33  	conf     *roachpb.ExternalStorage_S3
    34  	prefix   string
    35  	s3       *s3.S3
    36  	settings *cluster.Settings
    37  }
    38  
    39  var _ ExternalStorage = &s3Storage{}
    40  
    41  func s3QueryParams(conf *roachpb.ExternalStorage_S3) string {
    42  	q := make(url.Values)
    43  	setIf := func(key, value string) {
    44  		if value != "" {
    45  			q.Set(key, value)
    46  		}
    47  	}
    48  	setIf(S3AccessKeyParam, conf.AccessKey)
    49  	setIf(S3SecretParam, conf.Secret)
    50  	setIf(S3TempTokenParam, conf.TempToken)
    51  	setIf(S3EndpointParam, conf.Endpoint)
    52  	setIf(S3RegionParam, conf.Region)
    53  	setIf(AuthParam, conf.Auth)
    54  
    55  	return q.Encode()
    56  }
    57  
    58  func makeS3Storage(
    59  	ctx context.Context,
    60  	ioConf base.ExternalIODirConfig,
    61  	conf *roachpb.ExternalStorage_S3,
    62  	settings *cluster.Settings,
    63  ) (ExternalStorage, error) {
    64  	if conf == nil {
    65  		return nil, errors.Errorf("s3 upload requested but info missing")
    66  	}
    67  	region := conf.Region
    68  	config := conf.Keys()
    69  	if conf.Endpoint != "" {
    70  		if ioConf.DisableHTTP {
    71  			return nil, errors.New(
    72  				"custom endpoints disallowed for s3 due to --external-io-disable-http flag")
    73  		}
    74  		config.Endpoint = &conf.Endpoint
    75  		if conf.Region == "" {
    76  			region = "default-region"
    77  		}
    78  		client, err := makeHTTPClient(settings)
    79  		if err != nil {
    80  			return nil, err
    81  		}
    82  		config.HTTPClient = client
    83  	}
    84  
    85  	// "specified": use credentials provided in URI params; error if not present.
    86  	// "implicit": enable SharedConfig, which loads in credentials from environment.
    87  	//             Detailed in https://docs.aws.amazon.com/sdk-for-go/api/aws/session/
    88  	// "": default to `specified`.
    89  	opts := session.Options{}
    90  	switch conf.Auth {
    91  	case "", authParamSpecified:
    92  		if conf.AccessKey == "" {
    93  			return nil, errors.Errorf(
    94  				"%s is set to '%s', but %s is not set",
    95  				AuthParam,
    96  				authParamSpecified,
    97  				S3AccessKeyParam,
    98  			)
    99  		}
   100  		if conf.Secret == "" {
   101  			return nil, errors.Errorf(
   102  				"%s is set to '%s', but %s is not set",
   103  				AuthParam,
   104  				authParamSpecified,
   105  				S3SecretParam,
   106  			)
   107  		}
   108  		opts.Config.MergeIn(config)
   109  	case authParamImplicit:
   110  		if ioConf.DisableImplicitCredentials {
   111  			return nil, errors.New(
   112  				"implicit credentials disallowed for s3 due to --external-io-implicit-credentials flag")
   113  		}
   114  		opts.SharedConfigState = session.SharedConfigEnable
   115  	default:
   116  		return nil, errors.Errorf("unsupported value %s for %s", conf.Auth, AuthParam)
   117  	}
   118  
   119  	sess, err := session.NewSessionWithOptions(opts)
   120  	if err != nil {
   121  		return nil, errors.Wrap(err, "new aws session")
   122  	}
   123  	if region == "" {
   124  		err = delayedRetry(ctx, func() error {
   125  			var err error
   126  			region, err = s3manager.GetBucketRegion(ctx, sess, conf.Bucket, "us-east-1")
   127  			return err
   128  		})
   129  		if err != nil {
   130  			return nil, errors.Wrap(err, "could not find s3 bucket's region")
   131  		}
   132  	}
   133  	sess.Config.Region = aws.String(region)
   134  	if conf.Endpoint != "" {
   135  		sess.Config.S3ForcePathStyle = aws.Bool(true)
   136  	}
   137  	return &s3Storage{
   138  		bucket:   aws.String(conf.Bucket),
   139  		conf:     conf,
   140  		prefix:   conf.Prefix,
   141  		s3:       s3.New(sess),
   142  		settings: settings,
   143  	}, nil
   144  }
   145  
   146  func (s *s3Storage) Conf() roachpb.ExternalStorage {
   147  	return roachpb.ExternalStorage{
   148  		Provider: roachpb.ExternalStorageProvider_S3,
   149  		S3Config: s.conf,
   150  	}
   151  }
   152  
   153  func (s *s3Storage) WriteFile(ctx context.Context, basename string, content io.ReadSeeker) error {
   154  	err := contextutil.RunWithTimeout(ctx, "put s3 object",
   155  		timeoutSetting.Get(&s.settings.SV),
   156  		func(ctx context.Context) error {
   157  			_, err := s.s3.PutObjectWithContext(ctx, &s3.PutObjectInput{
   158  				Bucket: s.bucket,
   159  				Key:    aws.String(path.Join(s.prefix, basename)),
   160  				Body:   content,
   161  			})
   162  			return err
   163  		})
   164  	return errors.Wrap(err, "failed to put s3 object")
   165  }
   166  
   167  func (s *s3Storage) ReadFile(ctx context.Context, basename string) (io.ReadCloser, error) {
   168  	// https://github.com/cockroachdb/cockroach/issues/23859
   169  	out, err := s.s3.GetObjectWithContext(ctx, &s3.GetObjectInput{
   170  		Bucket: s.bucket,
   171  		Key:    aws.String(path.Join(s.prefix, basename)),
   172  	})
   173  	if err != nil {
   174  		return nil, errors.Wrap(err, "failed to get s3 object")
   175  	}
   176  	return out.Body, nil
   177  }
   178  
   179  func getPrefixBeforeWildcard(p string) string {
   180  	globIndex := strings.IndexAny(p, "*?[")
   181  	if globIndex < 0 {
   182  		return p
   183  	}
   184  	return path.Dir(p[:globIndex])
   185  }
   186  
   187  func (s *s3Storage) ListFiles(ctx context.Context, patternSuffix string) ([]string, error) {
   188  	var fileList []string
   189  
   190  	pattern := s.prefix
   191  	if patternSuffix != "" {
   192  		if containsGlob(s.prefix) {
   193  			return nil, errors.New("prefix cannot contain globs pattern when passing an explicit pattern")
   194  		}
   195  		pattern = path.Join(pattern, patternSuffix)
   196  	}
   197  
   198  	var matchErr error
   199  	err := s.s3.ListObjectsPagesWithContext(
   200  		ctx,
   201  		&s3.ListObjectsInput{
   202  			Bucket: s.bucket,
   203  			Prefix: aws.String(getPrefixBeforeWildcard(s.prefix)),
   204  		},
   205  		func(page *s3.ListObjectsOutput, lastPage bool) bool {
   206  			for _, fileObject := range page.Contents {
   207  				matches, err := path.Match(pattern, *fileObject.Key)
   208  				if err != nil {
   209  					matchErr = err
   210  					return false
   211  				}
   212  				if matches {
   213  					if patternSuffix != "" {
   214  						if !strings.HasPrefix(*fileObject.Key, s.prefix) {
   215  							// TODO(dt): return a nice rel-path instead of erroring out.
   216  							matchErr = errors.New("pattern matched file outside of path")
   217  							return false
   218  						}
   219  						fileList = append(fileList, strings.TrimPrefix(strings.TrimPrefix(*fileObject.Key, s.prefix), "/"))
   220  					} else {
   221  						s3URL := url.URL{
   222  							Scheme:   "s3",
   223  							Host:     *s.bucket,
   224  							Path:     *fileObject.Key,
   225  							RawQuery: s3QueryParams(s.conf),
   226  						}
   227  						fileList = append(fileList, s3URL.String())
   228  					}
   229  				}
   230  			}
   231  			return !lastPage
   232  		},
   233  	)
   234  	if err != nil {
   235  		return nil, errors.Wrap(err, `failed to list s3 bucket`)
   236  	}
   237  	if matchErr != nil {
   238  		return nil, errors.Wrap(matchErr, `failed to list s3 bucket`)
   239  	}
   240  
   241  	return fileList, nil
   242  }
   243  
   244  func (s *s3Storage) Delete(ctx context.Context, basename string) error {
   245  	return contextutil.RunWithTimeout(ctx, "delete s3 object",
   246  		timeoutSetting.Get(&s.settings.SV),
   247  		func(ctx context.Context) error {
   248  			_, err := s.s3.DeleteObjectWithContext(ctx, &s3.DeleteObjectInput{
   249  				Bucket: s.bucket,
   250  				Key:    aws.String(path.Join(s.prefix, basename)),
   251  			})
   252  			return err
   253  		})
   254  }
   255  
   256  func (s *s3Storage) Size(ctx context.Context, basename string) (int64, error) {
   257  	var out *s3.HeadObjectOutput
   258  	err := contextutil.RunWithTimeout(ctx, "get s3 object header",
   259  		timeoutSetting.Get(&s.settings.SV),
   260  		func(ctx context.Context) error {
   261  			var err error
   262  			out, err = s.s3.HeadObjectWithContext(ctx, &s3.HeadObjectInput{
   263  				Bucket: s.bucket,
   264  				Key:    aws.String(path.Join(s.prefix, basename)),
   265  			})
   266  			return err
   267  		})
   268  	if err != nil {
   269  		return 0, errors.Wrap(err, "failed to get s3 object headers")
   270  	}
   271  	return *out.ContentLength, nil
   272  }
   273  
   274  func (s *s3Storage) Close() error {
   275  	return nil
   276  }