github.com/coreos/mantle@v0.13.0/storage/bucket.go (about)

     1  // Copyright 2016 CoreOS, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package storage
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"io"
    21  	"net/http"
    22  	"net/url"
    23  	"path"
    24  	"strings"
    25  	"sync"
    26  
    27  	"golang.org/x/net/context"
    28  	"google.golang.org/api/googleapi"
    29  	"google.golang.org/api/storage/v1"
    30  )
    31  
    32  var (
    33  	UnknownScheme = errors.New("storage: URL missing gs:// scheme")
    34  	UnknownBucket = errors.New("storage: URL missing bucket name")
    35  )
    36  
    37  type Bucket struct {
    38  	service *storage.Service
    39  	name    string
    40  	prefix  string
    41  
    42  	mu       sync.RWMutex
    43  	prefixes map[string]struct{}
    44  	objects  map[string]*storage.Object
    45  
    46  	// writeAlways enables overwriting of objects that appear up-to-date
    47  	writeAlways bool
    48  	// writeDryRun blocks any changes, merely logging them instead
    49  	writeDryRun bool
    50  }
    51  
    52  func NewBucket(client *http.Client, bucketURL string) (*Bucket, error) {
    53  	service, err := storage.New(client)
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  
    58  	parsedURL, err := url.Parse(bucketURL)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  	if parsedURL.Scheme != "gs" {
    63  		return nil, UnknownScheme
    64  	}
    65  	if parsedURL.Host == "" {
    66  		return nil, UnknownBucket
    67  	}
    68  
    69  	return &Bucket{
    70  		service:  service,
    71  		name:     parsedURL.Host,
    72  		prefix:   FixPrefix(parsedURL.Path),
    73  		prefixes: make(map[string]struct{}),
    74  		objects:  make(map[string]*storage.Object),
    75  	}, nil
    76  }
    77  
    78  func (b *Bucket) Name() string {
    79  	return b.name
    80  }
    81  
    82  func (b *Bucket) Prefix() string {
    83  	return b.prefix
    84  }
    85  
    86  func (b *Bucket) URL() *url.URL {
    87  	return &url.URL{Scheme: "gs", Host: b.name, Path: b.prefix}
    88  }
    89  
    90  func (b *Bucket) WriteAlways(always bool) {
    91  	b.writeAlways = always
    92  }
    93  
    94  func (b *Bucket) WriteDryRun(dryrun bool) {
    95  	b.writeDryRun = dryrun
    96  }
    97  
    98  func (b *Bucket) Object(objName string) *storage.Object {
    99  	b.mu.RLock()
   100  	defer b.mu.RUnlock()
   101  	return b.objects[objName]
   102  }
   103  
   104  func (b *Bucket) Objects() []*storage.Object {
   105  	b.mu.RLock()
   106  	defer b.mu.RUnlock()
   107  	objs := make([]*storage.Object, 0, len(b.objects))
   108  	for _, obj := range b.objects {
   109  		objs = append(objs, obj)
   110  	}
   111  	return objs
   112  }
   113  
   114  func (b *Bucket) Prefixes() []string {
   115  	seen := make(map[string]bool)
   116  	list := make([]string, 0)
   117  	add := func(prefix string) {
   118  		for !seen[prefix] {
   119  			seen[prefix] = true
   120  			list = append(list, prefix)
   121  			prefix = NextPrefix(prefix)
   122  		}
   123  	}
   124  
   125  	b.mu.RLock()
   126  	defer b.mu.RUnlock()
   127  	for prefix := range b.prefixes {
   128  		add(prefix)
   129  	}
   130  	for objName := range b.objects {
   131  		add(NextPrefix(objName))
   132  	}
   133  
   134  	return list
   135  }
   136  
   137  func (b *Bucket) Len() int {
   138  	b.mu.RLock()
   139  	defer b.mu.RUnlock()
   140  	return len(b.objects)
   141  }
   142  
   143  func (b *Bucket) addObject(obj *storage.Object) {
   144  	if obj.Bucket != b.name {
   145  		panic(fmt.Errorf("adding gs://%s/%s to bucket %s", obj.Bucket, obj.Name, b.name))
   146  	}
   147  	b.mu.Lock()
   148  	defer b.mu.Unlock()
   149  	b.objects[obj.Name] = obj
   150  }
   151  
   152  func (b *Bucket) addObjects(objs *storage.Objects) {
   153  	b.mu.Lock()
   154  	defer b.mu.Unlock()
   155  	for _, obj := range objs.Items {
   156  		if obj.Bucket != b.name {
   157  			panic(fmt.Errorf("adding gs://%s/%s to bucket %s", obj.Bucket, obj.Name, b.name))
   158  		}
   159  		b.objects[obj.Name] = obj
   160  	}
   161  	for _, pfx := range objs.Prefixes {
   162  		b.prefixes[pfx] = struct{}{}
   163  	}
   164  }
   165  
   166  func (b *Bucket) delObject(objName string) {
   167  	b.mu.Lock()
   168  	defer b.mu.Unlock()
   169  	delete(b.objects, objName)
   170  }
   171  
   172  func (b *Bucket) mkURL(obj interface{}) *url.URL {
   173  	switch v := obj.(type) {
   174  	case string:
   175  		u := b.URL()
   176  		u.Path = v
   177  		return u
   178  	case *storage.Object:
   179  		u := b.URL()
   180  		u.Path = v.Name
   181  		if v.Bucket != "" {
   182  			u.Host = v.Bucket
   183  		}
   184  		return u
   185  	case *url.URL:
   186  		return v
   187  	case nil:
   188  		return b.URL()
   189  	default:
   190  		panic(fmt.Errorf("unknown type %T", obj))
   191  	}
   192  }
   193  
   194  func (b *Bucket) apiErr(op string, obj interface{}, e error) error {
   195  	if _, ok := e.(*googleapi.Error); ok {
   196  		return &Error{Op: op, URL: b.mkURL(obj).String(), Err: e}
   197  	}
   198  	return e
   199  }
   200  
   201  func (b *Bucket) Fetch(ctx context.Context) error {
   202  	return b.FetchPrefix(ctx, b.prefix, true)
   203  }
   204  
   205  func (b *Bucket) FetchPrefix(ctx context.Context, prefix string, recursive bool) error {
   206  	prefix = FixPrefix(prefix)
   207  	req := b.service.Objects.List(b.name)
   208  	if prefix != "" {
   209  		req.Prefix(prefix)
   210  	}
   211  	if !recursive {
   212  		req.Delimiter("/")
   213  	}
   214  
   215  	n := 0
   216  	p := 0
   217  	u := b.URL()
   218  	u.Path = prefix
   219  	add := func(objs *storage.Objects) error {
   220  		b.addObjects(objs)
   221  		n += len(objs.Items)
   222  		plog.Infof("Found %d objects under %s", n, u)
   223  		if len(objs.Prefixes) > 0 {
   224  			p += len(objs.Prefixes)
   225  			plog.Infof("Found %d directories under %s", p, u)
   226  		}
   227  		return nil
   228  	}
   229  
   230  	plog.Noticef("Fetching %s", u)
   231  
   232  	if err := req.Pages(ctx, add); err != nil {
   233  		return b.apiErr("storage.objects.list", nil, err)
   234  	}
   235  
   236  	if prefix == "" {
   237  		return nil
   238  	}
   239  
   240  	// In order to pair well with HTML indexing we need to check for
   241  	// a redirect object (prefix minus trailing slash). The list
   242  	// request needs the slash get foo/bar/* but not foo/barbaz.
   243  	redirName := strings.TrimSuffix(prefix, "/")
   244  	if b.Object(redirName) != nil {
   245  		return nil
   246  	}
   247  
   248  	redirReq := b.service.Objects.Get(b.name, redirName)
   249  	redirReq.Context(ctx)
   250  	redirObj, err := redirReq.Do()
   251  	if e, ok := err.(*googleapi.Error); ok && e.Code == 404 {
   252  		return nil // missing is perfectly valid
   253  	} else if err != nil {
   254  		return b.apiErr("storage.objects.get", redirName, err)
   255  	}
   256  
   257  	b.addObject(redirObj)
   258  	return nil
   259  }
   260  
   261  func (b *Bucket) Upload(ctx context.Context, obj *storage.Object, media io.ReaderAt) error {
   262  	// Calculate the checksum to enable upload integrity checking.
   263  	if obj.Crc32c == "" {
   264  		obj = dupObj(obj) // avoid editing the original
   265  		if err := crcSum(obj, media); err != nil {
   266  			return err
   267  		}
   268  	}
   269  
   270  	old := b.Object(obj.Name)
   271  	if !b.writeAlways && crcEq(old, obj) {
   272  		return nil // up to date!
   273  	}
   274  	if b.writeDryRun {
   275  		plog.Noticef("Would write %s", b.mkURL(obj))
   276  		return nil
   277  	}
   278  
   279  	req := b.service.Objects.Insert(b.name, obj)
   280  	// ResumableMedia is documented as deprecated in favor of Media
   281  	// but Media's retry support was bad and got temporarily removed.
   282  	// https://github.com/google/google-api-go-client/commit/9737cc9e103c00d06a8f3993361dec083df3d252
   283  	req.ResumableMedia(ctx, media, int64(obj.Size), obj.ContentType)
   284  
   285  	// Watch out for unexpected conflicting updates.
   286  	if old != nil {
   287  		req.IfGenerationMatch(old.Generation)
   288  	}
   289  
   290  	plog.Noticef("Writing %s", b.mkURL(obj))
   291  
   292  	inserted, err := req.Do()
   293  	if err != nil {
   294  		return b.apiErr("storage.objects.insert", obj, err)
   295  	}
   296  
   297  	b.addObject(inserted)
   298  	return nil
   299  }
   300  
   301  func (b *Bucket) Copy(ctx context.Context, src *storage.Object, dstName string) error {
   302  	if src.Bucket == "" {
   303  		panic(fmt.Errorf("src.Bucket is blank: %#v", src))
   304  	}
   305  
   306  	old := b.Object(dstName)
   307  	if !b.writeAlways && crcEq(old, src) {
   308  		return nil // up to date!
   309  	}
   310  
   311  	// It does work to pass src directly to the Rewrite API call, the
   312  	// name and bucket values don't really matter, they just cannot be
   313  	// blank for whatever reason. We make a copy just to get consistent
   314  	// results, e.g. always use the destination bucket's default ACL.
   315  	dst := dupObj(src)
   316  	dst.Name = dstName
   317  	dst.Bucket = b.name
   318  
   319  	if b.writeDryRun {
   320  		plog.Noticef("Would copy %s to %s", b.mkURL(src), b.mkURL(dst))
   321  		return nil
   322  	}
   323  
   324  	req := b.service.Objects.Rewrite(
   325  		src.Bucket, src.Name, dst.Bucket, dst.Name, src)
   326  	req.Context(ctx)
   327  
   328  	// Watch out for unexpected conflicting updates.
   329  	if old != nil {
   330  		req.IfGenerationMatch(old.Generation)
   331  	}
   332  	if src.Generation != 0 {
   333  		req.IfSourceGenerationMatch(src.Generation)
   334  	}
   335  
   336  	plog.Noticef("Copying %s to %s", b.mkURL(src), b.mkURL(dst))
   337  
   338  	for {
   339  		resp, err := req.Do()
   340  		if err != nil {
   341  			return b.apiErr("storage.objects.rewrite", dst, err)
   342  		}
   343  		if resp.Done {
   344  			b.addObject(resp.Resource)
   345  			return nil
   346  		}
   347  		req.RewriteToken(resp.RewriteToken)
   348  	}
   349  }
   350  
   351  func (b *Bucket) Delete(ctx context.Context, objName string) error {
   352  	if b.writeDryRun {
   353  		plog.Noticef("Would delete %s", b.mkURL(objName))
   354  		return nil
   355  	}
   356  
   357  	req := b.service.Objects.Delete(b.name, objName)
   358  	req.Context(ctx)
   359  
   360  	// Watch out for unexpected conflicting updates.
   361  	if old := b.Object(objName); old != nil {
   362  		req.IfGenerationMatch(old.Generation)
   363  		req.IfMetagenerationMatch(old.Metageneration)
   364  	}
   365  
   366  	plog.Noticef("Deleting %s", b.mkURL(objName))
   367  
   368  	if err := req.Do(); err != nil {
   369  		return b.apiErr("storage.objects.delete", objName, err)
   370  	}
   371  
   372  	b.delObject(objName)
   373  	return nil
   374  }
   375  
   376  // FixPrefix ensures non-empty paths end in a slash but never start with one.
   377  func FixPrefix(p string) string {
   378  	if p != "" && !strings.HasSuffix(p, "/") {
   379  		p += "/"
   380  	}
   381  	return strings.TrimPrefix(p, "/")
   382  }
   383  
   384  // NextPrefix chops off the final component of an object name or prefix.
   385  func NextPrefix(name string) string {
   386  	prefix, _ := path.Split(strings.TrimSuffix(name, "/"))
   387  	return prefix
   388  }