github.com/cornelk/go-cloud@v0.17.1/blob/gcsblob/gcsblob.go (about)

     1  // Copyright 2018 The Go Cloud Development Kit Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package gcsblob provides a blob implementation that uses GCS. Use OpenBucket
    16  // to construct a *blob.Bucket.
    17  //
    18  // URLs
    19  //
    20  // For blob.OpenBucket, gcsblob registers for the scheme "gs".
    21  // The default URL opener will creating a connection using use default
    22  // credentials from the environment, as described in
    23  // https://cloud.google.com/docs/authentication/production.
    24  // To customize the URL opener, or for more details on the URL format,
    25  // see URLOpener.
    26  // See https://github.com/cornelk/go-cloud/concepts/urls/ for background information.
    27  //
    28  // Escaping
    29  //
    30  // Go CDK supports all UTF-8 strings; to make this work with services lacking
    31  // full UTF-8 support, strings must be escaped (during writes) and unescaped
    32  // (during reads). The following escapes are performed for gcsblob:
    33  //  - Blob keys: ASCII characters 10 and 13 are escaped to "__0x<hex>__".
    34  //    Additionally, the "/" in "../" is escaped in the same way.
    35  //
    36  // As
    37  //
    38  // gcsblob exposes the following types for As:
    39  //  - Bucket: *storage.Client
    40  //  - Error: *googleapi.Error
    41  //  - ListObject: storage.ObjectAttrs
    42  //  - ListOptions.BeforeList: *storage.Query
    43  //  - Reader: *storage.Reader
    44  //  - ReaderOptions.BeforeRead: **storage.ObjectHandle, *storage.Reader
    45  //  - Attributes: storage.ObjectAttrs
    46  //  - CopyOptions.BeforeCopy: *CopyObjectHandles, *storage.Copier
    47  //  - WriterOptions.BeforeWrite: **storage.ObjectHandle, *storage.Writer
    48  package gcsblob // import "github.com/cornelk/go-cloud/blob/gcsblob"
    49  
    50  import (
    51  	"context"
    52  	"errors"
    53  	"fmt"
    54  	"io"
    55  	"io/ioutil"
    56  	"net/http"
    57  	"net/url"
    58  	"sort"
    59  	"strings"
    60  	"sync"
    61  	"time"
    62  
    63  	"cloud.google.com/go/storage"
    64  	"github.com/cornelk/go-cloud/blob"
    65  	"github.com/cornelk/go-cloud/blob/driver"
    66  	"github.com/cornelk/go-cloud/gcerrors"
    67  	"github.com/cornelk/go-cloud/gcp"
    68  	"github.com/cornelk/go-cloud/internal/escape"
    69  	"github.com/cornelk/go-cloud/internal/useragent"
    70  	"github.com/google/wire"
    71  	"google.golang.org/api/googleapi"
    72  	"google.golang.org/api/iterator"
    73  	"google.golang.org/api/option"
    74  )
    75  
    76  const defaultPageSize = 1000
    77  
    78  func init() {
    79  	blob.DefaultURLMux().RegisterBucket(Scheme, new(lazyCredsOpener))
    80  }
    81  
    82  // Set holds Wire providers for this package.
    83  var Set = wire.NewSet(
    84  	wire.Struct(new(URLOpener), "Client"),
    85  )
    86  
    87  // lazyCredsOpener obtains Application Default Credentials on the first call
    88  // lazyCredsOpener obtains Application Default Credentials on the first call
    89  // to OpenBucketURL.
    90  type lazyCredsOpener struct {
    91  	init   sync.Once
    92  	opener *URLOpener
    93  	err    error
    94  }
    95  
    96  func (o *lazyCredsOpener) OpenBucketURL(ctx context.Context, u *url.URL) (*blob.Bucket, error) {
    97  	o.init.Do(func() {
    98  		creds, err := gcp.DefaultCredentials(ctx)
    99  		if err != nil {
   100  			o.err = err
   101  			return
   102  		}
   103  		client, err := gcp.NewHTTPClient(gcp.DefaultTransport(), creds.TokenSource)
   104  		if err != nil {
   105  			o.err = err
   106  			return
   107  		}
   108  		o.opener = &URLOpener{Client: client}
   109  	})
   110  	if o.err != nil {
   111  		return nil, fmt.Errorf("open bucket %v: %v", u, o.err)
   112  	}
   113  	return o.opener.OpenBucketURL(ctx, u)
   114  }
   115  
   116  // Scheme is the URL scheme gcsblob registers its URLOpener under on
   117  // blob.DefaultMux.
   118  const Scheme = "gs"
   119  
   120  // URLOpener opens GCS URLs like "gs://mybucket".
   121  //
   122  // The URL host is used as the bucket name.
   123  //
   124  // The following query parameters are supported:
   125  //
   126  //   - access_id: sets Options.GoogleAccessID
   127  //   - private_key_path: path to read for Options.PrivateKey
   128  type URLOpener struct {
   129  	// Client must be set to a non-nil HTTP client authenticated with
   130  	// Cloud Storage scope or equivalent.
   131  	Client *gcp.HTTPClient
   132  
   133  	// Options specifies the default options to pass to OpenBucket.
   134  	Options Options
   135  }
   136  
   137  // OpenBucketURL opens the GCS bucket with the same name as the URL's host.
   138  func (o *URLOpener) OpenBucketURL(ctx context.Context, u *url.URL) (*blob.Bucket, error) {
   139  	opts, err := o.forParams(ctx, u.Query())
   140  	if err != nil {
   141  		return nil, fmt.Errorf("open bucket %v: %v", u, err)
   142  	}
   143  	return OpenBucket(ctx, o.Client, u.Host, opts)
   144  }
   145  
   146  func (o *URLOpener) forParams(ctx context.Context, q url.Values) (*Options, error) {
   147  	for k := range q {
   148  		if k != "access_id" && k != "private_key_path" {
   149  			return nil, fmt.Errorf("invalid query parameter %q", k)
   150  		}
   151  	}
   152  	opts := new(Options)
   153  	*opts = o.Options
   154  	if accessID := q.Get("access_id"); accessID != "" {
   155  		opts.GoogleAccessID = accessID
   156  	}
   157  	if keyPath := q.Get("private_key_path"); keyPath != "" {
   158  		pk, err := ioutil.ReadFile(keyPath)
   159  		if err != nil {
   160  			return nil, err
   161  		}
   162  		opts.PrivateKey = pk
   163  	}
   164  	return opts, nil
   165  }
   166  
   167  // Options sets options for constructing a *blob.Bucket backed by GCS.
   168  type Options struct {
   169  	// GoogleAccessID represents the authorizer for SignedURL.
   170  	// Required to use SignedURL.
   171  	// See https://godoc.org/cloud.google.com/go/storage#SignedURLOptions.
   172  	GoogleAccessID string
   173  
   174  	// PrivateKey is the Google service account private key.
   175  	// Exactly one of PrivateKey or SignBytes must be non-nil to use SignedURL.
   176  	// See https://godoc.org/cloud.google.com/go/storage#SignedURLOptions.
   177  	PrivateKey []byte
   178  
   179  	// SignBytes is a function for implementing custom signing.
   180  	// Exactly one of PrivateKey or SignBytes must be non-nil to use SignedURL.
   181  	// See https://godoc.org/cloud.google.com/go/storage#SignedURLOptions.
   182  	SignBytes func([]byte) ([]byte, error)
   183  }
   184  
   185  // openBucket returns a GCS Bucket that communicates using the given HTTP client.
   186  func openBucket(ctx context.Context, client *gcp.HTTPClient, bucketName string, opts *Options) (*bucket, error) {
   187  	if client == nil {
   188  		return nil, errors.New("gcsblob.OpenBucket: client is required")
   189  	}
   190  	if bucketName == "" {
   191  		return nil, errors.New("gcsblob.OpenBucket: bucketName is required")
   192  	}
   193  	// We wrap the provided http.Client to add a Go CDK User-Agent.
   194  	c, err := storage.NewClient(ctx, option.WithHTTPClient(useragent.HTTPClient(&client.Client, "blob")))
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  	if opts == nil {
   199  		opts = &Options{}
   200  	}
   201  	return &bucket{name: bucketName, client: c, opts: opts}, nil
   202  }
   203  
   204  // OpenBucket returns a *blob.Bucket backed by an existing GCS bucket. See the
   205  // package documentation for an example.
   206  func OpenBucket(ctx context.Context, client *gcp.HTTPClient, bucketName string, opts *Options) (*blob.Bucket, error) {
   207  	drv, err := openBucket(ctx, client, bucketName, opts)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	return blob.NewBucket(drv), nil
   212  }
   213  
   214  // bucket represents a GCS bucket, which handles read, write and delete operations
   215  // on objects within it.
   216  type bucket struct {
   217  	name   string
   218  	client *storage.Client
   219  	opts   *Options
   220  }
   221  
   222  var emptyBody = ioutil.NopCloser(strings.NewReader(""))
   223  
   224  // reader reads a GCS object. It implements driver.Reader.
   225  type reader struct {
   226  	body  io.ReadCloser
   227  	attrs driver.ReaderAttributes
   228  	raw   *storage.Reader
   229  }
   230  
   231  func (r *reader) Read(p []byte) (int, error) {
   232  	return r.body.Read(p)
   233  }
   234  
   235  // Close closes the reader itself. It must be called when done reading.
   236  func (r *reader) Close() error {
   237  	return r.body.Close()
   238  }
   239  
   240  func (r *reader) Attributes() *driver.ReaderAttributes {
   241  	return &r.attrs
   242  }
   243  
   244  func (r *reader) As(i interface{}) bool {
   245  	p, ok := i.(**storage.Reader)
   246  	if !ok {
   247  		return false
   248  	}
   249  	*p = r.raw
   250  	return true
   251  }
   252  
   253  func (b *bucket) ErrorCode(err error) gcerrors.ErrorCode {
   254  	if err == storage.ErrObjectNotExist {
   255  		return gcerrors.NotFound
   256  	}
   257  	if gerr, ok := err.(*googleapi.Error); ok {
   258  		switch gerr.Code {
   259  		case http.StatusNotFound:
   260  			return gcerrors.NotFound
   261  		case http.StatusPreconditionFailed:
   262  			return gcerrors.FailedPrecondition
   263  		}
   264  	}
   265  	return gcerrors.Unknown
   266  }
   267  
   268  func (b *bucket) Close() error {
   269  	return nil
   270  }
   271  
   272  // ListPaged implements driver.ListPaged.
   273  func (b *bucket) ListPaged(ctx context.Context, opts *driver.ListOptions) (*driver.ListPage, error) {
   274  	bkt := b.client.Bucket(b.name)
   275  	query := &storage.Query{
   276  		Prefix:    escapeKey(opts.Prefix),
   277  		Delimiter: escapeKey(opts.Delimiter),
   278  	}
   279  	if opts.BeforeList != nil {
   280  		asFunc := func(i interface{}) bool {
   281  			p, ok := i.(**storage.Query)
   282  			if !ok {
   283  				return false
   284  			}
   285  			*p = query
   286  			return true
   287  		}
   288  		if err := opts.BeforeList(asFunc); err != nil {
   289  			return nil, err
   290  		}
   291  	}
   292  	pageSize := opts.PageSize
   293  	if pageSize == 0 {
   294  		pageSize = defaultPageSize
   295  	}
   296  	iter := bkt.Objects(ctx, query)
   297  	pager := iterator.NewPager(iter, pageSize, string(opts.PageToken))
   298  	var objects []*storage.ObjectAttrs
   299  	nextPageToken, err := pager.NextPage(&objects)
   300  	if err != nil {
   301  		return nil, err
   302  	}
   303  	page := driver.ListPage{NextPageToken: []byte(nextPageToken)}
   304  	if len(objects) > 0 {
   305  		page.Objects = make([]*driver.ListObject, len(objects))
   306  		for i, obj := range objects {
   307  			asFunc := func(i interface{}) bool {
   308  				p, ok := i.(*storage.ObjectAttrs)
   309  				if !ok {
   310  					return false
   311  				}
   312  				*p = *obj
   313  				return true
   314  			}
   315  			if obj.Prefix == "" {
   316  				// Regular blob.
   317  				page.Objects[i] = &driver.ListObject{
   318  					Key:     unescapeKey(obj.Name),
   319  					ModTime: obj.Updated,
   320  					Size:    obj.Size,
   321  					MD5:     obj.MD5,
   322  					AsFunc:  asFunc,
   323  				}
   324  			} else {
   325  				// "Directory".
   326  				page.Objects[i] = &driver.ListObject{
   327  					Key:    unescapeKey(obj.Prefix),
   328  					IsDir:  true,
   329  					AsFunc: asFunc,
   330  				}
   331  			}
   332  		}
   333  		// GCS always returns "directories" at the end; sort them.
   334  		sort.Slice(page.Objects, func(i, j int) bool {
   335  			return page.Objects[i].Key < page.Objects[j].Key
   336  		})
   337  	}
   338  	return &page, nil
   339  }
   340  
   341  // As implements driver.As.
   342  func (b *bucket) As(i interface{}) bool {
   343  	p, ok := i.(**storage.Client)
   344  	if !ok {
   345  		return false
   346  	}
   347  	*p = b.client
   348  	return true
   349  }
   350  
   351  // As implements driver.ErrorAs.
   352  func (b *bucket) ErrorAs(err error, i interface{}) bool {
   353  	switch v := err.(type) {
   354  	case *googleapi.Error:
   355  		if p, ok := i.(**googleapi.Error); ok {
   356  			*p = v
   357  			return true
   358  		}
   359  	}
   360  	return false
   361  }
   362  
   363  // Attributes implements driver.Attributes.
   364  func (b *bucket) Attributes(ctx context.Context, key string) (*driver.Attributes, error) {
   365  	key = escapeKey(key)
   366  	bkt := b.client.Bucket(b.name)
   367  	obj := bkt.Object(key)
   368  	attrs, err := obj.Attrs(ctx)
   369  	if err != nil {
   370  		return nil, err
   371  	}
   372  	return &driver.Attributes{
   373  		CacheControl:       attrs.CacheControl,
   374  		ContentDisposition: attrs.ContentDisposition,
   375  		ContentEncoding:    attrs.ContentEncoding,
   376  		ContentLanguage:    attrs.ContentLanguage,
   377  		ContentType:        attrs.ContentType,
   378  		Metadata:           attrs.Metadata,
   379  		ModTime:            attrs.Updated,
   380  		Size:               attrs.Size,
   381  		MD5:                attrs.MD5,
   382  		AsFunc: func(i interface{}) bool {
   383  			p, ok := i.(*storage.ObjectAttrs)
   384  			if !ok {
   385  				return false
   386  			}
   387  			*p = *attrs
   388  			return true
   389  		},
   390  	}, nil
   391  }
   392  
   393  // NewRangeReader implements driver.NewRangeReader.
   394  func (b *bucket) NewRangeReader(ctx context.Context, key string, offset, length int64, opts *driver.ReaderOptions) (driver.Reader, error) {
   395  	key = escapeKey(key)
   396  	bkt := b.client.Bucket(b.name)
   397  	obj := bkt.Object(key)
   398  
   399  	// Add an extra level of indirection so that BeforeRead can replace obj
   400  	// if needed. For example, ObjectHandle.If returns a new ObjectHandle.
   401  	// Also, make the Reader lazily in case this replacement happens.
   402  	objp := &obj
   403  	makeReader := func() (*storage.Reader, error) {
   404  		return (*objp).NewRangeReader(ctx, offset, length)
   405  	}
   406  
   407  	var r *storage.Reader
   408  	var rerr error
   409  	madeReader := false
   410  	if opts.BeforeRead != nil {
   411  		asFunc := func(i interface{}) bool {
   412  			if p, ok := i.(***storage.ObjectHandle); ok && !madeReader {
   413  				*p = objp
   414  				return true
   415  			}
   416  			if p, ok := i.(**storage.Reader); ok {
   417  				if !madeReader {
   418  					r, rerr = makeReader()
   419  					madeReader = true
   420  				}
   421  				*p = r
   422  				return true
   423  			}
   424  			return false
   425  		}
   426  		if err := opts.BeforeRead(asFunc); err != nil {
   427  			return nil, err
   428  		}
   429  	}
   430  	if !madeReader {
   431  		r, rerr = makeReader()
   432  	}
   433  	if rerr != nil {
   434  		return nil, rerr
   435  	}
   436  	modTime, _ := r.LastModified()
   437  	return &reader{
   438  		body: r,
   439  		attrs: driver.ReaderAttributes{
   440  			ContentType: r.ContentType(),
   441  			ModTime:     modTime,
   442  			Size:        r.Size(),
   443  		},
   444  		raw: r,
   445  	}, nil
   446  }
   447  
   448  // escapeKey does all required escaping for UTF-8 strings to work with GCS.
   449  func escapeKey(key string) string {
   450  	return escape.HexEscape(key, func(r []rune, i int) bool {
   451  		switch {
   452  		// GCS doesn't handle these characters (determined via experimentation).
   453  		case r[i] == 10 || r[i] == 13:
   454  			return true
   455  		// For "../", escape the trailing slash.
   456  		case i > 1 && r[i] == '/' && r[i-1] == '.' && r[i-2] == '.':
   457  			return true
   458  		}
   459  		return false
   460  	})
   461  }
   462  
   463  // unescapeKey reverses escapeKey.
   464  func unescapeKey(key string) string {
   465  	return escape.HexUnescape(key)
   466  }
   467  
   468  // NewTypedWriter implements driver.NewTypedWriter.
   469  func (b *bucket) NewTypedWriter(ctx context.Context, key string, contentType string, opts *driver.WriterOptions) (driver.Writer, error) {
   470  	key = escapeKey(key)
   471  	bkt := b.client.Bucket(b.name)
   472  	obj := bkt.Object(key)
   473  
   474  	// Add an extra level of indirection so that BeforeWrite can replace obj
   475  	// if needed. For example, ObjectHandle.If returns a new ObjectHandle.
   476  	// Also, make the Writer lazily in case this replacement happens.
   477  	objp := &obj
   478  	makeWriter := func() *storage.Writer {
   479  		w := (*objp).NewWriter(ctx)
   480  		w.CacheControl = opts.CacheControl
   481  		w.ContentDisposition = opts.ContentDisposition
   482  		w.ContentEncoding = opts.ContentEncoding
   483  		w.ContentLanguage = opts.ContentLanguage
   484  		w.ContentType = contentType
   485  		w.ChunkSize = bufferSize(opts.BufferSize)
   486  		w.Metadata = opts.Metadata
   487  		w.MD5 = opts.ContentMD5
   488  		return w
   489  	}
   490  
   491  	var w *storage.Writer
   492  	if opts.BeforeWrite != nil {
   493  		asFunc := func(i interface{}) bool {
   494  			if p, ok := i.(***storage.ObjectHandle); ok && w == nil {
   495  				*p = objp
   496  				return true
   497  			}
   498  			if p, ok := i.(**storage.Writer); ok {
   499  				if w == nil {
   500  					w = makeWriter()
   501  				}
   502  				*p = w
   503  				return true
   504  			}
   505  			return false
   506  		}
   507  		if err := opts.BeforeWrite(asFunc); err != nil {
   508  			return nil, err
   509  		}
   510  	}
   511  	if w == nil {
   512  		w = makeWriter()
   513  	}
   514  	return w, nil
   515  }
   516  
   517  // CopyObjectHandles holds the ObjectHandles for the destination and source
   518  // of a Copy. It is used by the BeforeCopy As hook.
   519  type CopyObjectHandles struct {
   520  	Dst, Src *storage.ObjectHandle
   521  }
   522  
   523  // Copy implements driver.Copy.
   524  func (b *bucket) Copy(ctx context.Context, dstKey, srcKey string, opts *driver.CopyOptions) error {
   525  	dstKey = escapeKey(dstKey)
   526  	srcKey = escapeKey(srcKey)
   527  	bkt := b.client.Bucket(b.name)
   528  
   529  	// Add an extra level of indirection so that BeforeCopy can replace the
   530  	// dst or src ObjectHandles if needed.
   531  	// Also, make the Copier lazily in case this replacement happens.
   532  	handles := CopyObjectHandles{
   533  		Dst: bkt.Object(dstKey),
   534  		Src: bkt.Object(srcKey),
   535  	}
   536  	makeCopier := func() *storage.Copier {
   537  		return handles.Dst.CopierFrom(handles.Src)
   538  	}
   539  
   540  	var copier *storage.Copier
   541  	if opts.BeforeCopy != nil {
   542  		asFunc := func(i interface{}) bool {
   543  			if p, ok := i.(**CopyObjectHandles); ok && copier == nil {
   544  				*p = &handles
   545  				return true
   546  			}
   547  			if p, ok := i.(**storage.Copier); ok {
   548  				if copier == nil {
   549  					copier = makeCopier()
   550  				}
   551  				*p = copier
   552  				return true
   553  			}
   554  			return false
   555  		}
   556  		if err := opts.BeforeCopy(asFunc); err != nil {
   557  			return err
   558  		}
   559  	}
   560  	if copier == nil {
   561  		copier = makeCopier()
   562  	}
   563  	_, err := copier.Run(ctx)
   564  	return err
   565  }
   566  
   567  // Delete implements driver.Delete.
   568  func (b *bucket) Delete(ctx context.Context, key string) error {
   569  	key = escapeKey(key)
   570  	bkt := b.client.Bucket(b.name)
   571  	obj := bkt.Object(key)
   572  	return obj.Delete(ctx)
   573  }
   574  
   575  func (b *bucket) SignedURL(ctx context.Context, key string, dopts *driver.SignedURLOptions) (string, error) {
   576  	if b.opts.GoogleAccessID == "" || (b.opts.PrivateKey == nil && b.opts.SignBytes == nil) {
   577  		return "", errors.New("to use SignedURL, you must call OpenBucket with a valid Options.GoogleAccessID and exactly one of Options.PrivateKey or Options.SignBytes")
   578  	}
   579  	key = escapeKey(key)
   580  	opts := &storage.SignedURLOptions{
   581  		Expires:        time.Now().Add(dopts.Expiry),
   582  		Method:         dopts.Method,
   583  		GoogleAccessID: b.opts.GoogleAccessID,
   584  		PrivateKey:     b.opts.PrivateKey,
   585  		SignBytes:      b.opts.SignBytes,
   586  	}
   587  	return storage.SignedURL(b.name, key, opts)
   588  }
   589  
   590  func bufferSize(size int) int {
   591  	if size == 0 {
   592  		return googleapi.DefaultUploadChunkSize
   593  	} else if size > 0 {
   594  		return size
   595  	}
   596  	return 0 // disable buffering
   597  }