github.com/cornelk/go-cloud@v0.17.1/blob/blob.go (about)

     1  // Copyright 2018 The Go Cloud Development Kit Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package blob provides an easy and portable way to interact with blobs
    16  // within a storage location. Subpackages contain driver implementations of
    17  // blob for supported services.
    18  //
    19  // See https://github.com/cornelk/go-cloud/howto/blob/ for a detailed how-to guide.
    20  //
    21  //
    22  // Errors
    23  //
    24  // The errors returned from this package can be inspected in several ways:
    25  //
    26  // The Code function from github.com/cornelk/go-cloud/gcerrors will return an error code, also
    27  // defined in that package, when invoked on an error.
    28  //
    29  // The Bucket.ErrorAs method can retrieve the driver error underlying the returned
    30  // error.
    31  //
    32  //
    33  // OpenCensus Integration
    34  //
    35  // OpenCensus supports tracing and metric collection for multiple languages and
    36  // backend providers. See https://opencensus.io.
    37  //
    38  // This API collects OpenCensus traces and metrics for the following methods:
    39  //  - Attributes
    40  //  - Copy
    41  //  - Delete
    42  //  - NewRangeReader, from creation until the call to Close. (NewReader and ReadAll
    43  //    are included because they call NewRangeReader.)
    44  //  - NewWriter, from creation until the call to Close.
    45  // All trace and metric names begin with the package import path.
    46  // The traces add the method name.
    47  // For example, "github.com/cornelk/go-cloud/blob/Attributes".
    48  // The metrics are "completed_calls", a count of completed method calls by driver,
    49  // method and status (error code); and "latency", a distribution of method latency
    50  // by driver and method.
    51  // For example, "github.com/cornelk/go-cloud/blob/latency".
    52  //
    53  // It also collects the following metrics:
    54  //  - github.com/cornelk/go-cloud/blob/bytes_read: the total number of bytes read, by driver.
    55  //  - github.com/cornelk/go-cloud/blob/bytes_written: the total number of bytes written, by driver.
    56  //
    57  // To enable trace collection in your application, see "Configure Exporter" at
    58  // https://opencensus.io/quickstart/go/tracing.
    59  // To enable metric collection in your application, see "Exporting stats" at
    60  // https://opencensus.io/quickstart/go/metrics.
    61  package blob // import "github.com/cornelk/go-cloud/blob"
    62  
    63  import (
    64  	"bytes"
    65  	"context"
    66  	"crypto/md5"
    67  	"fmt"
    68  	"hash"
    69  	"io"
    70  	"io/ioutil"
    71  	"log"
    72  	"mime"
    73  	"net/http"
    74  	"net/url"
    75  	"runtime"
    76  	"strings"
    77  	"sync"
    78  	"time"
    79  	"unicode/utf8"
    80  
    81  	"github.com/cornelk/go-cloud/blob/driver"
    82  	"github.com/cornelk/go-cloud/gcerrors"
    83  	"github.com/cornelk/go-cloud/internal/gcerr"
    84  	"github.com/cornelk/go-cloud/internal/oc"
    85  	"github.com/cornelk/go-cloud/internal/openurl"
    86  	"go.opencensus.io/stats"
    87  	"go.opencensus.io/stats/view"
    88  	"go.opencensus.io/tag"
    89  )
    90  
    91  // Reader reads bytes from a blob.
    92  // It implements io.ReadCloser, and must be closed after
    93  // reads are finished.
    94  type Reader struct {
    95  	b        driver.Bucket
    96  	r        driver.Reader
    97  	key      string
    98  	end      func(error) // called at Close to finish trace and metric collection
    99  	provider string      // for metric collection; refers to driver package
   100  	closed   bool
   101  }
   102  
   103  // Read implements io.Reader (https://golang.org/pkg/io/#Reader).
   104  func (r *Reader) Read(p []byte) (int, error) {
   105  	n, err := r.r.Read(p)
   106  	stats.RecordWithTags(context.Background(), []tag.Mutator{tag.Upsert(oc.ProviderKey, r.provider)},
   107  		bytesReadMeasure.M(int64(n)))
   108  	return n, wrapError(r.b, err, r.key)
   109  }
   110  
   111  // Close implements io.Closer (https://golang.org/pkg/io/#Closer).
   112  func (r *Reader) Close() error {
   113  	r.closed = true
   114  	err := wrapError(r.b, r.r.Close(), r.key)
   115  	r.end(err)
   116  	return err
   117  }
   118  
   119  // ContentType returns the MIME type of the blob.
   120  func (r *Reader) ContentType() string {
   121  	return r.r.Attributes().ContentType
   122  }
   123  
   124  // ModTime returns the time the blob was last modified.
   125  func (r *Reader) ModTime() time.Time {
   126  	return r.r.Attributes().ModTime
   127  }
   128  
   129  // Size returns the size of the blob content in bytes.
   130  func (r *Reader) Size() int64 {
   131  	return r.r.Attributes().Size
   132  }
   133  
   134  // As converts i to driver-specific types.
   135  // See https://github.com/cornelk/go-cloud/concepts/as/ for background information, the "As"
   136  // examples in this package for examples, and the driver package
   137  // documentation for the specific types supported for that driver.
   138  func (r *Reader) As(i interface{}) bool {
   139  	return r.r.As(i)
   140  }
   141  
   142  // Attributes contains attributes about a blob.
   143  type Attributes struct {
   144  	// CacheControl specifies caching attributes that services may use
   145  	// when serving the blob.
   146  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
   147  	CacheControl string
   148  	// ContentDisposition specifies whether the blob content is expected to be
   149  	// displayed inline or as an attachment.
   150  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition
   151  	ContentDisposition string
   152  	// ContentEncoding specifies the encoding used for the blob's content, if any.
   153  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
   154  	ContentEncoding string
   155  	// ContentLanguage specifies the language used in the blob's content, if any.
   156  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language
   157  	ContentLanguage string
   158  	// ContentType is the MIME type of the blob. It will not be empty.
   159  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type
   160  	ContentType string
   161  	// Metadata holds key/value pairs associated with the blob.
   162  	// Keys are guaranteed to be in lowercase, even if the backend service
   163  	// has case-sensitive keys (although note that Metadata written via
   164  	// this package will always be lowercased). If there are duplicate
   165  	// case-insensitive keys (e.g., "foo" and "FOO"), only one value
   166  	// will be kept, and it is undefined which one.
   167  	Metadata map[string]string
   168  	// ModTime is the time the blob was last modified.
   169  	ModTime time.Time
   170  	// Size is the size of the blob's content in bytes.
   171  	Size int64
   172  	// MD5 is an MD5 hash of the blob contents or nil if not available.
   173  	MD5 []byte
   174  
   175  	asFunc func(interface{}) bool
   176  }
   177  
   178  // As converts i to driver-specific types.
   179  // See https://github.com/cornelk/go-cloud/concepts/as/ for background information, the "As"
   180  // examples in this package for examples, and the driver package
   181  // documentation for the specific types supported for that driver.
   182  func (a *Attributes) As(i interface{}) bool {
   183  	if a.asFunc == nil {
   184  		return false
   185  	}
   186  	return a.asFunc(i)
   187  }
   188  
   189  // Writer writes bytes to a blob.
   190  //
   191  // It implements io.WriteCloser (https://golang.org/pkg/io/#Closer), and must be
   192  // closed after all writes are done.
   193  type Writer struct {
   194  	b          driver.Bucket
   195  	w          driver.Writer
   196  	key        string
   197  	end        func(error) // called at Close to finish trace and metric collection
   198  	cancel     func()      // cancels the ctx provided to NewTypedWriter if contentMD5 verification fails
   199  	contentMD5 []byte
   200  	md5hash    hash.Hash
   201  	provider   string // for metric collection, refers to driver package name
   202  	closed     bool
   203  
   204  	// These fields are non-zero values only when w is nil (not yet created).
   205  	//
   206  	// A ctx is stored in the Writer since we need to pass it into NewTypedWriter
   207  	// when we finish detecting the content type of the blob and create the
   208  	// underlying driver.Writer. This step happens inside Write or Close and
   209  	// neither of them take a context.Context as an argument.
   210  	//
   211  	// All 3 fields are only initialized when we create the Writer without
   212  	// setting the w field, and are reset to zero values after w is created.
   213  	ctx  context.Context
   214  	opts *driver.WriterOptions
   215  	buf  *bytes.Buffer
   216  }
   217  
   218  // sniffLen is the byte size of Writer.buf used to detect content-type.
   219  const sniffLen = 512
   220  
   221  // Write implements the io.Writer interface (https://golang.org/pkg/io/#Writer).
   222  //
   223  // Writes may happen asynchronously, so the returned error can be nil
   224  // even if the actual write eventually fails. The write is only guaranteed to
   225  // have succeeded if Close returns no error.
   226  func (w *Writer) Write(p []byte) (int, error) {
   227  	if len(w.contentMD5) > 0 {
   228  		if _, err := w.md5hash.Write(p); err != nil {
   229  			return 0, err
   230  		}
   231  	}
   232  	if w.w != nil {
   233  		return w.write(p)
   234  	}
   235  
   236  	// If w is not yet created due to no content-type being passed in, try to sniff
   237  	// the MIME type based on at most 512 bytes of the blob content of p.
   238  
   239  	// Detect the content-type directly if the first chunk is at least 512 bytes.
   240  	if w.buf.Len() == 0 && len(p) >= sniffLen {
   241  		return w.open(p)
   242  	}
   243  
   244  	// Store p in w.buf and detect the content-type when the size of content in
   245  	// w.buf is at least 512 bytes.
   246  	n, err := w.buf.Write(p)
   247  	if err != nil {
   248  		return 0, err
   249  	}
   250  	if w.buf.Len() >= sniffLen {
   251  		// Note that w.open will return the full length of the buffer; we don't want
   252  		// to return that as the length of this write since some of them were written in
   253  		// previous writes. Instead, we return the n from this write, above.
   254  		_, err := w.open(w.buf.Bytes())
   255  		return n, err
   256  	}
   257  	return n, nil
   258  }
   259  
   260  // Close closes the blob writer. The write operation is not guaranteed to have succeeded until
   261  // Close returns with no error.
   262  // Close may return an error if the context provided to create the Writer is
   263  // canceled or reaches its deadline.
   264  func (w *Writer) Close() (err error) {
   265  	w.closed = true
   266  	defer func() { w.end(err) }()
   267  	if len(w.contentMD5) > 0 {
   268  		// Verify the MD5 hash of what was written matches the ContentMD5 provided
   269  		// by the user.
   270  		md5sum := w.md5hash.Sum(nil)
   271  		if !bytes.Equal(md5sum, w.contentMD5) {
   272  			// No match! Return an error, but first cancel the context and call the
   273  			// driver's Close function to ensure the write is aborted.
   274  			w.cancel()
   275  			if w.w != nil {
   276  				_ = w.w.Close()
   277  			}
   278  			return gcerr.Newf(gcerr.FailedPrecondition, nil, "blob: the WriterOptions.ContentMD5 you specified (%X) did not match what was written (%X)", w.contentMD5, md5sum)
   279  		}
   280  	}
   281  
   282  	defer w.cancel()
   283  	if w.w != nil {
   284  		return wrapError(w.b, w.w.Close(), w.key)
   285  	}
   286  	if _, err := w.open(w.buf.Bytes()); err != nil {
   287  		return err
   288  	}
   289  	return wrapError(w.b, w.w.Close(), w.key)
   290  }
   291  
   292  // open tries to detect the MIME type of p and write it to the blob.
   293  // The error it returns is wrapped.
   294  func (w *Writer) open(p []byte) (int, error) {
   295  	ct := http.DetectContentType(p)
   296  	var err error
   297  	if w.w, err = w.b.NewTypedWriter(w.ctx, w.key, ct, w.opts); err != nil {
   298  		return 0, wrapError(w.b, err, w.key)
   299  	}
   300  	// Set the 3 fields needed for lazy NewTypedWriter back to zero values
   301  	// (see the comment on Writer).
   302  	w.buf = nil
   303  	w.ctx = nil
   304  	w.opts = nil
   305  	return w.write(p)
   306  }
   307  
   308  func (w *Writer) write(p []byte) (int, error) {
   309  	n, err := w.w.Write(p)
   310  	stats.RecordWithTags(context.Background(), []tag.Mutator{tag.Upsert(oc.ProviderKey, w.provider)},
   311  		bytesWrittenMeasure.M(int64(n)))
   312  	return n, wrapError(w.b, err, w.key)
   313  }
   314  
   315  // ListOptions sets options for listing blobs via Bucket.List.
   316  type ListOptions struct {
   317  	// Prefix indicates that only blobs with a key starting with this prefix
   318  	// should be returned.
   319  	Prefix string
   320  	// Delimiter sets the delimiter used to define a hierarchical namespace,
   321  	// like a filesystem with "directories". It is highly recommended that you
   322  	// use "" or "/" as the Delimiter. Other values should work through this API,
   323  	// but service UIs generally assume "/".
   324  	//
   325  	// An empty delimiter means that the bucket is treated as a single flat
   326  	// namespace.
   327  	//
   328  	// A non-empty delimiter means that any result with the delimiter in its key
   329  	// after Prefix is stripped will be returned with ListObject.IsDir = true,
   330  	// ListObject.Key truncated after the delimiter, and zero values for other
   331  	// ListObject fields. These results represent "directories". Multiple results
   332  	// in a "directory" are returned as a single result.
   333  	Delimiter string
   334  
   335  	// BeforeList is a callback that will be called before each call to the
   336  	// the underlying service's list functionality.
   337  	// asFunc converts its argument to driver-specific types.
   338  	// See https://github.com/cornelk/go-cloud/concepts/as/ for background information.
   339  	BeforeList func(asFunc func(interface{}) bool) error
   340  }
   341  
   342  // ListIterator iterates over List results.
   343  type ListIterator struct {
   344  	b       *Bucket
   345  	opts    *driver.ListOptions
   346  	page    *driver.ListPage
   347  	nextIdx int
   348  }
   349  
   350  // Next returns a *ListObject for the next blob. It returns (nil, io.EOF) if
   351  // there are no more.
   352  func (i *ListIterator) Next(ctx context.Context) (*ListObject, error) {
   353  	if i.page != nil {
   354  		// We've already got a page of results.
   355  		if i.nextIdx < len(i.page.Objects) {
   356  			// Next object is in the page; return it.
   357  			dobj := i.page.Objects[i.nextIdx]
   358  			i.nextIdx++
   359  			return &ListObject{
   360  				Key:     dobj.Key,
   361  				ModTime: dobj.ModTime,
   362  				Size:    dobj.Size,
   363  				MD5:     dobj.MD5,
   364  				IsDir:   dobj.IsDir,
   365  				asFunc:  dobj.AsFunc,
   366  			}, nil
   367  		}
   368  		if len(i.page.NextPageToken) == 0 {
   369  			// Done with current page, and there are no more; return io.EOF.
   370  			return nil, io.EOF
   371  		}
   372  		// We need to load the next page.
   373  		i.opts.PageToken = i.page.NextPageToken
   374  	}
   375  	i.b.mu.RLock()
   376  	defer i.b.mu.RUnlock()
   377  	if i.b.closed {
   378  		return nil, errClosed
   379  	}
   380  	// Loading a new page.
   381  	p, err := i.b.b.ListPaged(ctx, i.opts)
   382  	if err != nil {
   383  		return nil, wrapError(i.b.b, err, "")
   384  	}
   385  	i.page = p
   386  	i.nextIdx = 0
   387  	return i.Next(ctx)
   388  }
   389  
   390  // ListObject represents a single blob returned from List.
   391  type ListObject struct {
   392  	// Key is the key for this blob.
   393  	Key string
   394  	// ModTime is the time the blob was last modified.
   395  	ModTime time.Time
   396  	// Size is the size of the blob's content in bytes.
   397  	Size int64
   398  	// MD5 is an MD5 hash of the blob contents or nil if not available.
   399  	MD5 []byte
   400  	// IsDir indicates that this result represents a "directory" in the
   401  	// hierarchical namespace, ending in ListOptions.Delimiter. Key can be
   402  	// passed as ListOptions.Prefix to list items in the "directory".
   403  	// Fields other than Key and IsDir will not be set if IsDir is true.
   404  	IsDir bool
   405  
   406  	asFunc func(interface{}) bool
   407  }
   408  
   409  // As converts i to driver-specific types.
   410  // See https://github.com/cornelk/go-cloud/concepts/as/ for background information, the "As"
   411  // examples in this package for examples, and the driver package
   412  // documentation for the specific types supported for that driver.
   413  func (o *ListObject) As(i interface{}) bool {
   414  	if o.asFunc == nil {
   415  		return false
   416  	}
   417  	return o.asFunc(i)
   418  }
   419  
   420  // Bucket provides an easy and portable way to interact with blobs
   421  // within a "bucket", including read, write, and list operations.
   422  // To create a Bucket, use constructors found in driver subpackages.
   423  type Bucket struct {
   424  	b      driver.Bucket
   425  	tracer *oc.Tracer
   426  
   427  	// mu protects the closed variable.
   428  	// Read locks are kept to allow holding a read lock for long-running calls,
   429  	// and thereby prevent closing until a call finishes.
   430  	mu     sync.RWMutex
   431  	closed bool
   432  }
   433  
   434  const pkgName = "github.com/cornelk/go-cloud/blob"
   435  
   436  var (
   437  	latencyMeasure      = oc.LatencyMeasure(pkgName)
   438  	bytesReadMeasure    = stats.Int64(pkgName+"/bytes_read", "Total bytes read", stats.UnitBytes)
   439  	bytesWrittenMeasure = stats.Int64(pkgName+"/bytes_written", "Total bytes written", stats.UnitBytes)
   440  
   441  	// OpenCensusViews are predefined views for OpenCensus metrics.
   442  	// The views include counts and latency distributions for API method calls,
   443  	// and total bytes read and written.
   444  	// See the example at https://godoc.org/go.opencensus.io/stats/view for usage.
   445  	OpenCensusViews = append(
   446  		oc.Views(pkgName, latencyMeasure),
   447  		&view.View{
   448  			Name:        pkgName + "/bytes_read",
   449  			Measure:     bytesReadMeasure,
   450  			Description: "Sum of bytes read from the service.",
   451  			TagKeys:     []tag.Key{oc.ProviderKey},
   452  			Aggregation: view.Sum(),
   453  		},
   454  		&view.View{
   455  			Name:        pkgName + "/bytes_written",
   456  			Measure:     bytesWrittenMeasure,
   457  			Description: "Sum of bytes written to the service.",
   458  			TagKeys:     []tag.Key{oc.ProviderKey},
   459  			Aggregation: view.Sum(),
   460  		})
   461  )
   462  
   463  // NewBucket is intended for use by drivers only. Do not use in application code.
   464  var NewBucket = newBucket
   465  
   466  // newBucket creates a new *Bucket based on a specific driver implementation.
   467  // End users should use subpackages to construct a *Bucket instead of this
   468  // function; see the package documentation for details.
   469  func newBucket(b driver.Bucket) *Bucket {
   470  	return &Bucket{
   471  		b: b,
   472  		tracer: &oc.Tracer{
   473  			Package:        pkgName,
   474  			Provider:       oc.ProviderName(b),
   475  			LatencyMeasure: latencyMeasure,
   476  		},
   477  	}
   478  }
   479  
   480  // As converts i to driver-specific types.
   481  // See https://github.com/cornelk/go-cloud/concepts/as/ for background information, the "As"
   482  // examples in this package for examples, and the driver package
   483  // documentation for the specific types supported for that driver.
   484  func (b *Bucket) As(i interface{}) bool {
   485  	if i == nil {
   486  		return false
   487  	}
   488  	return b.b.As(i)
   489  }
   490  
   491  // ErrorAs converts err to driver-specific types.
   492  // ErrorAs panics if i is nil or not a pointer.
   493  // ErrorAs returns false if err == nil.
   494  // See https://github.com/cornelk/go-cloud/concepts/as/ for background information.
   495  func (b *Bucket) ErrorAs(err error, i interface{}) bool {
   496  	return gcerr.ErrorAs(err, i, b.b.ErrorAs)
   497  }
   498  
   499  // ReadAll is a shortcut for creating a Reader via NewReader with nil
   500  // ReaderOptions, and reading the entire blob.
   501  func (b *Bucket) ReadAll(ctx context.Context, key string) (_ []byte, err error) {
   502  	b.mu.RLock()
   503  	defer b.mu.RUnlock()
   504  	if b.closed {
   505  		return nil, errClosed
   506  	}
   507  	r, err := b.NewReader(ctx, key, nil)
   508  	if err != nil {
   509  		return nil, err
   510  	}
   511  	defer r.Close()
   512  	return ioutil.ReadAll(r)
   513  }
   514  
   515  // List returns a ListIterator that can be used to iterate over blobs in a
   516  // bucket, in lexicographical order of UTF-8 encoded keys. The underlying
   517  // implementation fetches results in pages.
   518  //
   519  // A nil ListOptions is treated the same as the zero value.
   520  //
   521  // List is not guaranteed to include all recently-written blobs;
   522  // some services are only eventually consistent.
   523  func (b *Bucket) List(opts *ListOptions) *ListIterator {
   524  	if opts == nil {
   525  		opts = &ListOptions{}
   526  	}
   527  	dopts := &driver.ListOptions{
   528  		Prefix:     opts.Prefix,
   529  		Delimiter:  opts.Delimiter,
   530  		BeforeList: opts.BeforeList,
   531  	}
   532  	return &ListIterator{b: b, opts: dopts}
   533  }
   534  
   535  // Exists returns true if a blob exists at key, false if it does not exist, or
   536  // an error.
   537  // It is a shortcut for calling Attributes and checking if it returns an error
   538  // with code gcerrors.NotFound.
   539  func (b *Bucket) Exists(ctx context.Context, key string) (bool, error) {
   540  	_, err := b.Attributes(ctx, key)
   541  	if err == nil {
   542  		return true, nil
   543  	}
   544  	if gcerrors.Code(err) == gcerrors.NotFound {
   545  		return false, nil
   546  	}
   547  	return false, err
   548  }
   549  
   550  // Attributes returns attributes for the blob stored at key.
   551  //
   552  // If the blob does not exist, Attributes returns an error for which
   553  // gcerrors.Code will return gcerrors.NotFound.
   554  func (b *Bucket) Attributes(ctx context.Context, key string) (_ *Attributes, err error) {
   555  	if !utf8.ValidString(key) {
   556  		return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: Attributes key must be a valid UTF-8 string: %q", key)
   557  	}
   558  
   559  	b.mu.RLock()
   560  	defer b.mu.RUnlock()
   561  	if b.closed {
   562  		return nil, errClosed
   563  	}
   564  	ctx = b.tracer.Start(ctx, "Attributes")
   565  	defer func() { b.tracer.End(ctx, err) }()
   566  
   567  	a, err := b.b.Attributes(ctx, key)
   568  	if err != nil {
   569  		return nil, wrapError(b.b, err, key)
   570  	}
   571  	var md map[string]string
   572  	if len(a.Metadata) > 0 {
   573  		// Services are inconsistent, but at least some treat keys
   574  		// as case-insensitive. To make the behavior consistent, we
   575  		// force-lowercase them when writing and reading.
   576  		md = make(map[string]string, len(a.Metadata))
   577  		for k, v := range a.Metadata {
   578  			md[strings.ToLower(k)] = v
   579  		}
   580  	}
   581  	return &Attributes{
   582  		CacheControl:       a.CacheControl,
   583  		ContentDisposition: a.ContentDisposition,
   584  		ContentEncoding:    a.ContentEncoding,
   585  		ContentLanguage:    a.ContentLanguage,
   586  		ContentType:        a.ContentType,
   587  		Metadata:           md,
   588  		ModTime:            a.ModTime,
   589  		Size:               a.Size,
   590  		MD5:                a.MD5,
   591  		asFunc:             a.AsFunc,
   592  	}, nil
   593  }
   594  
   595  // NewReader is a shortcut for NewRangeReader with offset=0 and length=-1.
   596  func (b *Bucket) NewReader(ctx context.Context, key string, opts *ReaderOptions) (*Reader, error) {
   597  	return b.newRangeReader(ctx, key, 0, -1, opts)
   598  }
   599  
   600  // NewRangeReader returns a Reader to read content from the blob stored at key.
   601  // It reads at most length bytes starting at offset (>= 0).
   602  // If length is negative, it will read till the end of the blob.
   603  //
   604  // If the blob does not exist, NewRangeReader returns an error for which
   605  // gcerrors.Code will return gcerrors.NotFound. Exists is a lighter-weight way
   606  // to check for existence.
   607  //
   608  // A nil ReaderOptions is treated the same as the zero value.
   609  //
   610  // The caller must call Close on the returned Reader when done reading.
   611  func (b *Bucket) NewRangeReader(ctx context.Context, key string, offset, length int64, opts *ReaderOptions) (_ *Reader, err error) {
   612  	return b.newRangeReader(ctx, key, offset, length, opts)
   613  }
   614  
   615  func (b *Bucket) newRangeReader(ctx context.Context, key string, offset, length int64, opts *ReaderOptions) (_ *Reader, err error) {
   616  	b.mu.RLock()
   617  	defer b.mu.RUnlock()
   618  	if b.closed {
   619  		return nil, errClosed
   620  	}
   621  	if offset < 0 {
   622  		return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: NewRangeReader offset must be non-negative (%d)", offset)
   623  	}
   624  	if !utf8.ValidString(key) {
   625  		return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: NewRangeReader key must be a valid UTF-8 string: %q", key)
   626  	}
   627  	if opts == nil {
   628  		opts = &ReaderOptions{}
   629  	}
   630  	dopts := &driver.ReaderOptions{
   631  		BeforeRead: opts.BeforeRead,
   632  	}
   633  	tctx := b.tracer.Start(ctx, "NewRangeReader")
   634  	defer func() {
   635  		// If err == nil, we handed the end closure off to the returned *Writer; it
   636  		// will be called when the Writer is Closed.
   637  		if err != nil {
   638  			b.tracer.End(tctx, err)
   639  		}
   640  	}()
   641  	dr, err := b.b.NewRangeReader(ctx, key, offset, length, dopts)
   642  	if err != nil {
   643  		return nil, wrapError(b.b, err, key)
   644  	}
   645  	end := func(err error) { b.tracer.End(tctx, err) }
   646  	r := &Reader{b: b.b, r: dr, key: key, end: end, provider: b.tracer.Provider}
   647  	_, file, lineno, ok := runtime.Caller(2)
   648  	runtime.SetFinalizer(r, func(r *Reader) {
   649  		if !r.closed {
   650  			var caller string
   651  			if ok {
   652  				caller = fmt.Sprintf(" (%s:%d)", file, lineno)
   653  			}
   654  			log.Printf("A blob.Reader reading from %q was never closed%s", key, caller)
   655  		}
   656  	})
   657  	return r, nil
   658  }
   659  
   660  // WriteAll is a shortcut for creating a Writer via NewWriter and writing p.
   661  //
   662  // If opts.ContentMD5 is not set, WriteAll will compute the MD5 of p and use it
   663  // as the ContentMD5 option for the Writer it creates.
   664  func (b *Bucket) WriteAll(ctx context.Context, key string, p []byte, opts *WriterOptions) (err error) {
   665  	realOpts := new(WriterOptions)
   666  	if opts != nil {
   667  		*realOpts = *opts
   668  	}
   669  	if len(realOpts.ContentMD5) == 0 {
   670  		sum := md5.Sum(p)
   671  		realOpts.ContentMD5 = sum[:]
   672  	}
   673  	w, err := b.NewWriter(ctx, key, realOpts)
   674  	if err != nil {
   675  		return err
   676  	}
   677  	if _, err := w.Write(p); err != nil {
   678  		_ = w.Close()
   679  		return err
   680  	}
   681  	return w.Close()
   682  }
   683  
   684  // NewWriter returns a Writer that writes to the blob stored at key.
   685  // A nil WriterOptions is treated the same as the zero value.
   686  //
   687  // If a blob with this key already exists, it will be replaced.
   688  // The blob being written is not guaranteed to be readable until Close
   689  // has been called; until then, any previous blob will still be readable.
   690  // Even after Close is called, newly written blobs are not guaranteed to be
   691  // returned from List; some services are only eventually consistent.
   692  //
   693  // The returned Writer will store ctx for later use in Write and/or Close.
   694  // To abort a write, cancel ctx; otherwise, it must remain open until
   695  // Close is called.
   696  //
   697  // The caller must call Close on the returned Writer, even if the write is
   698  // aborted.
   699  func (b *Bucket) NewWriter(ctx context.Context, key string, opts *WriterOptions) (_ *Writer, err error) {
   700  	if !utf8.ValidString(key) {
   701  		return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: NewWriter key must be a valid UTF-8 string: %q", key)
   702  	}
   703  	if opts == nil {
   704  		opts = &WriterOptions{}
   705  	}
   706  	dopts := &driver.WriterOptions{
   707  		CacheControl:       opts.CacheControl,
   708  		ContentDisposition: opts.ContentDisposition,
   709  		ContentEncoding:    opts.ContentEncoding,
   710  		ContentLanguage:    opts.ContentLanguage,
   711  		ContentMD5:         opts.ContentMD5,
   712  		BufferSize:         opts.BufferSize,
   713  		BeforeWrite:        opts.BeforeWrite,
   714  	}
   715  	if len(opts.Metadata) > 0 {
   716  		// Services are inconsistent, but at least some treat keys
   717  		// as case-insensitive. To make the behavior consistent, we
   718  		// force-lowercase them when writing and reading.
   719  		md := make(map[string]string, len(opts.Metadata))
   720  		for k, v := range opts.Metadata {
   721  			if k == "" {
   722  				return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: WriterOptions.Metadata keys may not be empty strings")
   723  			}
   724  			if !utf8.ValidString(k) {
   725  				return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: WriterOptions.Metadata keys must be valid UTF-8 strings: %q", k)
   726  			}
   727  			if !utf8.ValidString(v) {
   728  				return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: WriterOptions.Metadata values must be valid UTF-8 strings: %q", v)
   729  			}
   730  			lowerK := strings.ToLower(k)
   731  			if _, found := md[lowerK]; found {
   732  				return nil, gcerr.Newf(gcerr.InvalidArgument, nil, "blob: WriterOptions.Metadata has a duplicate case-insensitive metadata key: %q", lowerK)
   733  			}
   734  			md[lowerK] = v
   735  		}
   736  		dopts.Metadata = md
   737  	}
   738  	b.mu.RLock()
   739  	defer b.mu.RUnlock()
   740  	if b.closed {
   741  		return nil, errClosed
   742  	}
   743  	ctx, cancel := context.WithCancel(ctx)
   744  	tctx := b.tracer.Start(ctx, "NewWriter")
   745  	end := func(err error) { b.tracer.End(tctx, err) }
   746  	defer func() {
   747  		if err != nil {
   748  			end(err)
   749  		}
   750  	}()
   751  
   752  	w := &Writer{
   753  		b:          b.b,
   754  		end:        end,
   755  		cancel:     cancel,
   756  		key:        key,
   757  		contentMD5: opts.ContentMD5,
   758  		md5hash:    md5.New(),
   759  		provider:   b.tracer.Provider,
   760  	}
   761  	if opts.ContentType != "" {
   762  		t, p, err := mime.ParseMediaType(opts.ContentType)
   763  		if err != nil {
   764  			cancel()
   765  			return nil, err
   766  		}
   767  		ct := mime.FormatMediaType(t, p)
   768  		dw, err := b.b.NewTypedWriter(ctx, key, ct, dopts)
   769  		if err != nil {
   770  			cancel()
   771  			return nil, wrapError(b.b, err, key)
   772  		}
   773  		w.w = dw
   774  	} else {
   775  		// Save the fields needed to called NewTypedWriter later, once we've gotten
   776  		// sniffLen bytes; see the comment on Writer.
   777  		w.ctx = ctx
   778  		w.opts = dopts
   779  		w.buf = bytes.NewBuffer([]byte{})
   780  	}
   781  	_, file, lineno, ok := runtime.Caller(1)
   782  	runtime.SetFinalizer(w, func(w *Writer) {
   783  		if !w.closed {
   784  			var caller string
   785  			if ok {
   786  				caller = fmt.Sprintf(" (%s:%d)", file, lineno)
   787  			}
   788  			log.Printf("A blob.Writer writing to %q was never closed%s", key, caller)
   789  		}
   790  	})
   791  	return w, nil
   792  }
   793  
   794  // Copy the blob stored at srcKey to dstKey.
   795  // A nil CopyOptions is treated the same as the zero value.
   796  //
   797  // If the source blob does not exist, Copy returns an error for which
   798  // gcerrors.Code will return gcerrors.NotFound.
   799  //
   800  // If the destination blob already exists, it is overwritten.
   801  func (b *Bucket) Copy(ctx context.Context, dstKey, srcKey string, opts *CopyOptions) (err error) {
   802  	if !utf8.ValidString(srcKey) {
   803  		return gcerr.Newf(gcerr.InvalidArgument, nil, "blob: Copy srcKey must be a valid UTF-8 string: %q", srcKey)
   804  	}
   805  	if !utf8.ValidString(dstKey) {
   806  		return gcerr.Newf(gcerr.InvalidArgument, nil, "blob: Copy dstKey must be a valid UTF-8 string: %q", dstKey)
   807  	}
   808  	if opts == nil {
   809  		opts = &CopyOptions{}
   810  	}
   811  	dopts := &driver.CopyOptions{
   812  		BeforeCopy: opts.BeforeCopy,
   813  	}
   814  	b.mu.RLock()
   815  	defer b.mu.RUnlock()
   816  	if b.closed {
   817  		return errClosed
   818  	}
   819  	ctx = b.tracer.Start(ctx, "Copy")
   820  	defer func() { b.tracer.End(ctx, err) }()
   821  	return wrapError(b.b, b.b.Copy(ctx, dstKey, srcKey, dopts), fmt.Sprintf("%s -> %s", srcKey, dstKey))
   822  }
   823  
   824  // Delete deletes the blob stored at key.
   825  //
   826  // If the blob does not exist, Delete returns an error for which
   827  // gcerrors.Code will return gcerrors.NotFound.
   828  func (b *Bucket) Delete(ctx context.Context, key string) (err error) {
   829  	if !utf8.ValidString(key) {
   830  		return gcerr.Newf(gcerr.InvalidArgument, nil, "blob: Delete key must be a valid UTF-8 string: %q", key)
   831  	}
   832  	b.mu.RLock()
   833  	defer b.mu.RUnlock()
   834  	if b.closed {
   835  		return errClosed
   836  	}
   837  	ctx = b.tracer.Start(ctx, "Delete")
   838  	defer func() { b.tracer.End(ctx, err) }()
   839  	return wrapError(b.b, b.b.Delete(ctx, key), key)
   840  }
   841  
   842  // SignedURL returns a URL that can be used to GET the blob for the duration
   843  // specified in opts.Expiry.
   844  //
   845  // A nil SignedURLOptions is treated the same as the zero value.
   846  //
   847  // It is valid to call SignedURL for a key that does not exist.
   848  //
   849  // If the driver does not support this functionality, SignedURL
   850  // will return an error for which gcerrors.Code will return gcerrors.Unimplemented.
   851  func (b *Bucket) SignedURL(ctx context.Context, key string, opts *SignedURLOptions) (string, error) {
   852  	if !utf8.ValidString(key) {
   853  		return "", gcerr.Newf(gcerr.InvalidArgument, nil, "blob: SignedURL key must be a valid UTF-8 string: %q", key)
   854  	}
   855  	if opts == nil {
   856  		opts = &SignedURLOptions{}
   857  	}
   858  	if opts.Expiry < 0 {
   859  		return "", gcerr.Newf(gcerr.InvalidArgument, nil, "blob: SignedURLOptions.Expiry must be >= 0 (%v)", opts.Expiry)
   860  	}
   861  	if opts.Expiry == 0 {
   862  		opts.Expiry = DefaultSignedURLExpiry
   863  	}
   864  	if opts.Method == "" {
   865  		opts.Method = http.MethodGet
   866  	}
   867  	switch opts.Method {
   868  	case http.MethodGet:
   869  	case http.MethodPut:
   870  	case http.MethodDelete:
   871  	default:
   872  		return "", fmt.Errorf("unsupported SignedURLOptions.Method %q", opts.Method)
   873  	}
   874  	dopts := driver.SignedURLOptions{
   875  		Expiry: opts.Expiry,
   876  		Method: opts.Method,
   877  	}
   878  	b.mu.RLock()
   879  	defer b.mu.RUnlock()
   880  	if b.closed {
   881  		return "", errClosed
   882  	}
   883  	url, err := b.b.SignedURL(ctx, key, &dopts)
   884  	return url, wrapError(b.b, err, key)
   885  }
   886  
   887  // Close releases any resources used for the bucket.
   888  func (b *Bucket) Close() error {
   889  	b.mu.Lock()
   890  	prev := b.closed
   891  	b.closed = true
   892  	b.mu.Unlock()
   893  	if prev {
   894  		return errClosed
   895  	}
   896  	return wrapError(b.b, b.b.Close(), "")
   897  }
   898  
   899  // DefaultSignedURLExpiry is the default duration for SignedURLOptions.Expiry.
   900  const DefaultSignedURLExpiry = 1 * time.Hour
   901  
   902  // SignedURLOptions sets options for SignedURL.
   903  type SignedURLOptions struct {
   904  	// Expiry sets how long the returned URL is valid for.
   905  	// Defaults to DefaultSignedURLExpiry.
   906  	Expiry time.Duration
   907  	// Method is the HTTP method that can be used on the URL; one of "GET", "PUT",
   908  	// or "DELETE". Defaults to "GET".
   909  	Method string
   910  }
   911  
   912  // ReaderOptions sets options for NewReader and NewRangeReader.
   913  type ReaderOptions struct {
   914  	// BeforeRead is a callback that will be called exactly once, before
   915  	// any data is read (unless NewReader returns an error before then, in which
   916  	// case it may not be called at all).
   917  	//
   918  	// asFunc converts its argument to driver-specific types.
   919  	// See https://github.com/cornelk/go-cloud/concepts/as/ for background information.
   920  	BeforeRead func(asFunc func(interface{}) bool) error
   921  }
   922  
   923  // WriterOptions sets options for NewWriter.
   924  type WriterOptions struct {
   925  	// BufferSize changes the default size in bytes of the chunks that
   926  	// Writer will upload in a single request; larger blobs will be split into
   927  	// multiple requests.
   928  	//
   929  	// This option may be ignored by some drivers.
   930  	//
   931  	// If 0, the driver will choose a reasonable default.
   932  	//
   933  	// If the Writer is used to do many small writes concurrently, using a
   934  	// smaller BufferSize may reduce memory usage.
   935  	BufferSize int
   936  
   937  	// CacheControl specifies caching attributes that services may use
   938  	// when serving the blob.
   939  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
   940  	CacheControl string
   941  
   942  	// ContentDisposition specifies whether the blob content is expected to be
   943  	// displayed inline or as an attachment.
   944  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition
   945  	ContentDisposition string
   946  
   947  	// ContentEncoding specifies the encoding used for the blob's content, if any.
   948  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
   949  	ContentEncoding string
   950  
   951  	// ContentLanguage specifies the language used in the blob's content, if any.
   952  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language
   953  	ContentLanguage string
   954  
   955  	// ContentType specifies the MIME type of the blob being written. If not set,
   956  	// it will be inferred from the content using the algorithm described at
   957  	// http://mimesniff.spec.whatwg.org/.
   958  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type
   959  	ContentType string
   960  
   961  	// ContentMD5 is used as a message integrity check.
   962  	// If len(ContentMD5) > 0, the MD5 hash of the bytes written must match
   963  	// ContentMD5, or Close will return an error without completing the write.
   964  	// https://tools.ietf.org/html/rfc1864
   965  	ContentMD5 []byte
   966  
   967  	// Metadata holds key/value strings to be associated with the blob, or nil.
   968  	// Keys may not be empty, and are lowercased before being written.
   969  	// Duplicate case-insensitive keys (e.g., "foo" and "FOO") will result in
   970  	// an error.
   971  	Metadata map[string]string
   972  
   973  	// BeforeWrite is a callback that will be called exactly once, before
   974  	// any data is written (unless NewWriter returns an error, in which case
   975  	// it will not be called at all). Note that this is not necessarily during
   976  	// or after the first Write call, as drivers may buffer bytes before
   977  	// sending an upload request.
   978  	//
   979  	// asFunc converts its argument to driver-specific types.
   980  	// See https://github.com/cornelk/go-cloud/concepts/as/ for background information.
   981  	BeforeWrite func(asFunc func(interface{}) bool) error
   982  }
   983  
   984  // CopyOptions sets options for Copy.
   985  type CopyOptions struct {
   986  	// BeforeCopy is a callback that will be called before the copy is
   987  	// initiated.
   988  	//
   989  	// asFunc converts its argument to driver-specific types.
   990  	// See https://github.com/cornelk/go-cloud/concepts/as/ for background information.
   991  	BeforeCopy func(asFunc func(interface{}) bool) error
   992  }
   993  
   994  // BucketURLOpener represents types that can open buckets based on a URL.
   995  // The opener must not modify the URL argument. OpenBucketURL must be safe to
   996  // call from multiple goroutines.
   997  //
   998  // This interface is generally implemented by types in driver packages.
   999  type BucketURLOpener interface {
  1000  	OpenBucketURL(ctx context.Context, u *url.URL) (*Bucket, error)
  1001  }
  1002  
  1003  // URLMux is a URL opener multiplexer. It matches the scheme of the URLs
  1004  // against a set of registered schemes and calls the opener that matches the
  1005  // URL's scheme.
  1006  // See https://github.com/cornelk/go-cloud/concepts/urls/ for more information.
  1007  //
  1008  // The zero value is a multiplexer with no registered schemes.
  1009  type URLMux struct {
  1010  	schemes openurl.SchemeMap
  1011  }
  1012  
  1013  // BucketSchemes returns a sorted slice of the registered Bucket schemes.
  1014  func (mux *URLMux) BucketSchemes() []string { return mux.schemes.Schemes() }
  1015  
  1016  // ValidBucketScheme returns true iff scheme has been registered for Buckets.
  1017  func (mux *URLMux) ValidBucketScheme(scheme string) bool { return mux.schemes.ValidScheme(scheme) }
  1018  
  1019  // RegisterBucket registers the opener with the given scheme. If an opener
  1020  // already exists for the scheme, RegisterBucket panics.
  1021  func (mux *URLMux) RegisterBucket(scheme string, opener BucketURLOpener) {
  1022  	mux.schemes.Register("blob", "Bucket", scheme, opener)
  1023  }
  1024  
  1025  // OpenBucket calls OpenBucketURL with the URL parsed from urlstr.
  1026  // OpenBucket is safe to call from multiple goroutines.
  1027  func (mux *URLMux) OpenBucket(ctx context.Context, urlstr string) (*Bucket, error) {
  1028  	opener, u, err := mux.schemes.FromString("Bucket", urlstr)
  1029  	if err != nil {
  1030  		return nil, err
  1031  	}
  1032  	return applyPrefixParam(ctx, opener.(BucketURLOpener), u)
  1033  }
  1034  
  1035  // OpenBucketURL dispatches the URL to the opener that is registered with the
  1036  // URL's scheme. OpenBucketURL is safe to call from multiple goroutines.
  1037  func (mux *URLMux) OpenBucketURL(ctx context.Context, u *url.URL) (*Bucket, error) {
  1038  	opener, err := mux.schemes.FromURL("Bucket", u)
  1039  	if err != nil {
  1040  		return nil, err
  1041  	}
  1042  	return applyPrefixParam(ctx, opener.(BucketURLOpener), u)
  1043  }
  1044  
  1045  func applyPrefixParam(ctx context.Context, opener BucketURLOpener, u *url.URL) (*Bucket, error) {
  1046  	prefix := u.Query().Get("prefix")
  1047  	if prefix != "" {
  1048  		// Make a copy of u with the "prefix" parameter removed.
  1049  		urlCopy := *u
  1050  		q := urlCopy.Query()
  1051  		q.Del("prefix")
  1052  		urlCopy.RawQuery = q.Encode()
  1053  		u = &urlCopy
  1054  	}
  1055  	bucket, err := opener.OpenBucketURL(ctx, u)
  1056  	if err != nil {
  1057  		return nil, err
  1058  	}
  1059  	if prefix != "" {
  1060  		bucket = PrefixedBucket(bucket, prefix)
  1061  	}
  1062  	return bucket, nil
  1063  }
  1064  
  1065  var defaultURLMux = new(URLMux)
  1066  
  1067  // DefaultURLMux returns the URLMux used by OpenBucket.
  1068  //
  1069  // Driver packages can use this to register their BucketURLOpener on the mux.
  1070  func DefaultURLMux() *URLMux {
  1071  	return defaultURLMux
  1072  }
  1073  
  1074  // OpenBucket opens the bucket identified by the URL given.
  1075  //
  1076  // See the URLOpener documentation in driver subpackages for
  1077  // details on supported URL formats, and https://github.com/cornelk/go-cloud/concepts/urls/
  1078  // for more information.
  1079  //
  1080  // In addition to driver-specific query parameters, OpenBucket supports
  1081  // the following query parameters:
  1082  //
  1083  //   - prefix: wraps the resulting Bucket using PrefixedBucket with the
  1084  //             given prefix.
  1085  func OpenBucket(ctx context.Context, urlstr string) (*Bucket, error) {
  1086  	return defaultURLMux.OpenBucket(ctx, urlstr)
  1087  }
  1088  
  1089  func wrapError(b driver.Bucket, err error, key string) error {
  1090  	if err == nil {
  1091  		return nil
  1092  	}
  1093  	if gcerr.DoNotWrap(err) {
  1094  		return err
  1095  	}
  1096  	msg := "blob"
  1097  	if key != "" {
  1098  		msg += fmt.Sprintf(" (key %q)", key)
  1099  	}
  1100  	return gcerr.New(b.ErrorCode(err), err, 2, msg)
  1101  }
  1102  
  1103  var errClosed = gcerr.Newf(gcerr.FailedPrecondition, nil, "blob: Bucket has been closed")
  1104  
  1105  // PrefixedBucket returns a *Bucket based on b with all keys modified to have
  1106  // prefix, which will usually end with a "/" to target a subdirectory in the
  1107  // bucket.
  1108  //
  1109  // bucket will be closed and no longer usable after this function returns.
  1110  func PrefixedBucket(bucket *Bucket, prefix string) *Bucket {
  1111  	bucket.mu.Lock()
  1112  	defer bucket.mu.Unlock()
  1113  	bucket.closed = true
  1114  	return NewBucket(driver.NewPrefixedBucket(bucket.b, prefix))
  1115  }