github.com/grailbio/base@v0.0.11/file/s3file/s3file.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package s3file implements grail file interface for S3.
     6  package s3file
     7  
     8  import (
     9  	"context"
    10  	"fmt"
    11  	"net/http"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/aws/aws-sdk-go/aws"
    16  	awsrequest "github.com/aws/aws-sdk-go/aws/request"
    17  	"github.com/aws/aws-sdk-go/service/s3"
    18  	"github.com/aws/aws-sdk-go/service/s3/s3iface"
    19  	"github.com/grailbio/base/errors"
    20  	"github.com/grailbio/base/file"
    21  )
    22  
    23  const (
    24  	Scheme        = "s3"
    25  	pathSeparator = "/"
    26  	pathPrefix    = "s3://"
    27  )
    28  
    29  // Options defines options that can be given when creating an s3Impl
    30  type Options struct {
    31  	// ServerSideEncryption allows you to set the `ServerSideEncryption` value to use when
    32  	// uploading files (e.g.  "AES256")
    33  	ServerSideEncryption string
    34  }
    35  
    36  type s3Impl struct {
    37  	clientsForAction clientsForActionFunc
    38  	options          Options
    39  }
    40  
    41  // NewImplementation creates a new file.Implementation for S3. The provider is
    42  // called to create s3 client objects.
    43  func NewImplementation(provider SessionProvider, opts Options) file.Implementation {
    44  	metricAutolog()
    45  	return &s3Impl{newClientCache(provider).forAction, opts}
    46  }
    47  
    48  // Run handler in a separate goroutine, then wait for either the handler to
    49  // finish, or ctx to be cancelled.
    50  func runRequest(ctx context.Context, handler func() response) response {
    51  	ch := make(chan response)
    52  	go func() {
    53  		ch <- handler()
    54  		close(ch)
    55  	}()
    56  	select {
    57  	case res := <-ch:
    58  		return res
    59  	case <-ctx.Done():
    60  		return response{err: errors.E(errors.Canceled)}
    61  	}
    62  }
    63  
    64  // String implements a human-readable description.
    65  func (impl *s3Impl) String() string { return "s3" }
    66  
    67  // Open opens a file for reading. The provided path should be of form
    68  // "bucket/key..."
    69  func (impl *s3Impl) Open(ctx context.Context, path string, opts ...file.Opts) (file.File, error) {
    70  	f, err := impl.internalOpen(ctx, path, readonly, opts...)
    71  	res := f.runRequest(ctx, request{reqType: statRequest})
    72  	if res.err != nil {
    73  		return nil, res.err
    74  	}
    75  	return f, err
    76  }
    77  
    78  // Create opens a file for writing.
    79  func (impl *s3Impl) Create(ctx context.Context, path string, opts ...file.Opts) (file.File, error) {
    80  	return impl.internalOpen(ctx, path, writeonly, opts...)
    81  }
    82  
    83  type accessMode int
    84  
    85  const (
    86  	readonly  accessMode = iota // file is opened by Open.
    87  	writeonly                   // file is opened by Create.
    88  )
    89  
    90  func (impl *s3Impl) internalOpen(ctx context.Context, path string, mode accessMode, optsList ...file.Opts) (*s3File, error) {
    91  	opts := mergeFileOpts(optsList)
    92  	_, bucket, key, err := ParseURL(path)
    93  	if err != nil {
    94  		return nil, err
    95  	}
    96  	var uploader *s3Uploader
    97  	if mode == writeonly {
    98  		resp := runRequest(ctx, func() response {
    99  			u, err := newUploader(ctx, impl.clientsForAction, impl.options, path, bucket, key, opts)
   100  			return response{uploader: u, err: err}
   101  		})
   102  		if resp.err != nil {
   103  			return nil, resp.err
   104  		}
   105  		uploader = resp.uploader
   106  	}
   107  	f := &s3File{
   108  		name:             path,
   109  		mode:             mode,
   110  		opts:             opts,
   111  		clientsForAction: impl.clientsForAction,
   112  		bucket:           bucket,
   113  		key:              key,
   114  		uploader:         uploader,
   115  		reqCh:            make(chan request, 16),
   116  	}
   117  	go f.handleRequests()
   118  	return f, err
   119  }
   120  
   121  // Remove implements file.Implementation interface.
   122  func (impl *s3Impl) Remove(ctx context.Context, path string) error {
   123  	resp := runRequest(ctx, func() response {
   124  		_, bucket, key, err := ParseURL(path)
   125  		if err != nil {
   126  			return response{err: err}
   127  		}
   128  		clients, err := impl.clientsForAction(ctx, "DeleteObject", bucket, key)
   129  		if err != nil {
   130  			return response{err: errors.E(err, "s3file.remove", path)}
   131  		}
   132  		policy := newBackoffPolicy(clients, file.Opts{})
   133  		for {
   134  			var ids s3RequestIDs
   135  			_, err = policy.client().DeleteObjectWithContext(ctx, &s3.DeleteObjectInput{Bucket: aws.String(bucket), Key: aws.String(key)},
   136  				ids.captureOption())
   137  			if policy.shouldRetry(ctx, err, path) {
   138  				continue
   139  			}
   140  			if err != nil {
   141  				err = annotate(err, ids, &policy, "s3file.remove", path)
   142  			}
   143  			return response{err: err}
   144  		}
   145  	})
   146  	return resp.err
   147  }
   148  
   149  // Presign implements file.Implementation interface.
   150  func (impl *s3Impl) Presign(ctx context.Context, path, method string, expiry time.Duration) (string, error) {
   151  	resp := runRequest(ctx, func() response {
   152  		_, bucket, key, err := ParseURL(path)
   153  		if err != nil {
   154  			return response{err: err}
   155  		}
   156  		var action string
   157  		var getRequestFn func(client s3iface.S3API) *awsrequest.Request
   158  		switch method {
   159  		case http.MethodGet:
   160  			action = "GetObject"
   161  			getRequestFn = func(client s3iface.S3API) *awsrequest.Request {
   162  				req, _ := client.GetObjectRequest(&s3.GetObjectInput{Bucket: &bucket, Key: &key})
   163  				return req
   164  			}
   165  		case http.MethodPut:
   166  			action = "PutObject"
   167  			getRequestFn = func(client s3iface.S3API) *awsrequest.Request {
   168  				req, _ := client.PutObjectRequest(&s3.PutObjectInput{Bucket: &bucket, Key: &key})
   169  				return req
   170  			}
   171  		case http.MethodDelete:
   172  			action = "DeleteObject"
   173  			getRequestFn = func(client s3iface.S3API) *awsrequest.Request {
   174  				req, _ := client.DeleteObjectRequest(&s3.DeleteObjectInput{Bucket: &bucket, Key: &key})
   175  				return req
   176  			}
   177  		default:
   178  			return response{err: errors.E(errors.NotSupported, "s3file.presign: unsupported http method", method)}
   179  		}
   180  		clients, err := impl.clientsForAction(ctx, action, bucket, key)
   181  		if err != nil {
   182  			return response{err: err}
   183  		}
   184  		policy := newBackoffPolicy(clients, file.Opts{})
   185  		for {
   186  			var ids s3RequestIDs
   187  			req := getRequestFn(policy.client())
   188  			req.ApplyOptions(ids.captureOption())
   189  			url, err := req.Presign(expiry)
   190  			if policy.shouldRetry(ctx, err, path) {
   191  				continue
   192  			}
   193  			if err != nil {
   194  				return response{err: annotate(err, ids, &policy, fmt.Sprintf("s3file.presign %s", path))}
   195  			}
   196  			return response{signedURL: url}
   197  		}
   198  	})
   199  	return resp.signedURL, resp.err
   200  }
   201  
   202  // ParseURL parses a path of form "s3://grail-bucket/dir/file" and returns
   203  // ("s3", "grail-bucket", "dir/file", nil).
   204  func ParseURL(url string) (scheme, bucket, key string, err error) {
   205  	var suffix string
   206  	scheme, suffix, err = file.ParsePath(url)
   207  	if err != nil {
   208  		return "", "", "", err
   209  	}
   210  	parts := strings.SplitN(suffix, pathSeparator, 2)
   211  	if len(parts) == 1 {
   212  		return scheme, parts[0], "", nil
   213  	}
   214  	return scheme, parts[0], parts[1], nil
   215  }
   216  
   217  func mergeFileOpts(opts []file.Opts) (o file.Opts) {
   218  	switch len(opts) {
   219  	case 0:
   220  	case 1:
   221  		o = opts[0]
   222  	default:
   223  		panic(fmt.Sprintf("More than one options specified: %+v", opts))
   224  	}
   225  	return
   226  }