github.com/fawick/restic@v0.1.1-0.20171126184616-c02923fbfc79/internal/backend/gs/gs.go (about)

     1  // Package gs provides a restic backend for Google Cloud Storage.
     2  package gs
     3  
     4  import (
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path"
    10  	"strings"
    11  
    12  	"github.com/pkg/errors"
    13  	"github.com/restic/restic/internal/backend"
    14  	"github.com/restic/restic/internal/debug"
    15  	"github.com/restic/restic/internal/restic"
    16  
    17  	"io/ioutil"
    18  
    19  	"golang.org/x/oauth2/google"
    20  	"google.golang.org/api/googleapi"
    21  	storage "google.golang.org/api/storage/v1"
    22  )
    23  
    24  // Backend stores data in a GCS bucket.
    25  //
    26  // The service account used to access the bucket must have these permissions:
    27  //  * storage.objects.create
    28  //  * storage.objects.delete
    29  //  * storage.objects.get
    30  //  * storage.objects.list
    31  type Backend struct {
    32  	service      *storage.Service
    33  	projectID    string
    34  	sem          *backend.Semaphore
    35  	bucketName   string
    36  	prefix       string
    37  	listMaxItems int
    38  	backend.Layout
    39  }
    40  
    41  // Ensure that *Backend implements restic.Backend.
    42  var _ restic.Backend = &Backend{}
    43  
    44  func getStorageService(jsonKeyPath string) (*storage.Service, error) {
    45  
    46  	raw, err := ioutil.ReadFile(jsonKeyPath)
    47  	if err != nil {
    48  		return nil, errors.Wrap(err, "ReadFile")
    49  	}
    50  
    51  	conf, err := google.JWTConfigFromJSON(raw, storage.DevstorageReadWriteScope)
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  
    56  	client := conf.Client(context.TODO())
    57  
    58  	service, err := storage.New(client)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  
    63  	return service, nil
    64  }
    65  
    66  const defaultListMaxItems = 1000
    67  
    68  func open(cfg Config) (*Backend, error) {
    69  	debug.Log("open, config %#v", cfg)
    70  
    71  	service, err := getStorageService(cfg.JSONKeyPath)
    72  	if err != nil {
    73  		return nil, errors.Wrap(err, "getStorageService")
    74  	}
    75  
    76  	sem, err := backend.NewSemaphore(cfg.Connections)
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  
    81  	be := &Backend{
    82  		service:    service,
    83  		projectID:  cfg.ProjectID,
    84  		sem:        sem,
    85  		bucketName: cfg.Bucket,
    86  		prefix:     cfg.Prefix,
    87  		Layout: &backend.DefaultLayout{
    88  			Path: cfg.Prefix,
    89  			Join: path.Join,
    90  		},
    91  		listMaxItems: defaultListMaxItems,
    92  	}
    93  
    94  	return be, nil
    95  }
    96  
    97  // Open opens the gs backend at the specified bucket.
    98  func Open(cfg Config) (restic.Backend, error) {
    99  	return open(cfg)
   100  }
   101  
   102  // Create opens the gs backend at the specified bucket and attempts to creates
   103  // the bucket if it does not exist yet.
   104  //
   105  // The service account must have the "storage.buckets.create" permission to
   106  // create a bucket the does not yet exist.
   107  func Create(cfg Config) (restic.Backend, error) {
   108  	be, err := open(cfg)
   109  	if err != nil {
   110  		return nil, errors.Wrap(err, "open")
   111  	}
   112  
   113  	// Try to determine if the bucket exists. If it does not, try to create it.
   114  	//
   115  	// A Get call has three typical error cases:
   116  	//
   117  	// * nil: Bucket exists and we have access to the metadata (returned).
   118  	//
   119  	// * 403: Bucket exists and we do not have access to the metadata. We
   120  	// don't have storage.buckets.get permission to the bucket, but we may
   121  	// still be able to access objects in the bucket.
   122  	//
   123  	// * 404: Bucket doesn't exist.
   124  	//
   125  	// Determining if the bucket is accessible is best-effort because the
   126  	// 403 case is ambiguous.
   127  	if _, err := be.service.Buckets.Get(be.bucketName).Do(); err != nil {
   128  		gerr, ok := err.(*googleapi.Error)
   129  		if !ok {
   130  			// Don't know what to do with this error.
   131  			return nil, errors.Wrap(err, "service.Buckets.Get")
   132  		}
   133  
   134  		switch gerr.Code {
   135  		case 403:
   136  			// Bucket exists, but we don't know if it is
   137  			// accessible. Optimistically assume it is; if not,
   138  			// future Backend calls will fail.
   139  			debug.Log("Unable to determine if bucket %s is accessible (err %v). Continuing as if it is.", be.bucketName, err)
   140  		case 404:
   141  			// Bucket doesn't exist, try to create it.
   142  			bucket := &storage.Bucket{
   143  				Name: be.bucketName,
   144  			}
   145  
   146  			if _, err := be.service.Buckets.Insert(be.projectID, bucket).Do(); err != nil {
   147  				// Always an error, as the bucket definitely
   148  				// doesn't exist.
   149  				return nil, errors.Wrap(err, "service.Buckets.Insert")
   150  			}
   151  		default:
   152  			// Don't know what to do with this error.
   153  			return nil, errors.Wrap(err, "service.Buckets.Get")
   154  		}
   155  	}
   156  
   157  	return be, nil
   158  }
   159  
   160  // SetListMaxItems sets the number of list items to load per request.
   161  func (be *Backend) SetListMaxItems(i int) {
   162  	be.listMaxItems = i
   163  }
   164  
   165  // IsNotExist returns true if the error is caused by a not existing file.
   166  func (be *Backend) IsNotExist(err error) bool {
   167  	debug.Log("IsNotExist(%T, %#v)", err, err)
   168  
   169  	if os.IsNotExist(err) {
   170  		return true
   171  	}
   172  
   173  	if er, ok := err.(*googleapi.Error); ok {
   174  		if er.Code == 404 {
   175  			return true
   176  		}
   177  	}
   178  
   179  	return false
   180  }
   181  
   182  // Join combines path components with slashes.
   183  func (be *Backend) Join(p ...string) string {
   184  	return path.Join(p...)
   185  }
   186  
   187  // Location returns this backend's location (the bucket name).
   188  func (be *Backend) Location() string {
   189  	return be.Join(be.bucketName, be.prefix)
   190  }
   191  
   192  // Path returns the path in the bucket that is used for this backend.
   193  func (be *Backend) Path() string {
   194  	return be.prefix
   195  }
   196  
   197  // Save stores data in the backend at the handle.
   198  func (be *Backend) Save(ctx context.Context, h restic.Handle, rd io.Reader) (err error) {
   199  	if err := h.Valid(); err != nil {
   200  		return err
   201  	}
   202  
   203  	objName := be.Filename(h)
   204  
   205  	debug.Log("Save %v at %v", h, objName)
   206  
   207  	be.sem.GetToken()
   208  
   209  	// Check key does not already exist
   210  	if _, err := be.service.Objects.Get(be.bucketName, objName).Do(); err == nil {
   211  		debug.Log("%v already exists", h)
   212  		be.sem.ReleaseToken()
   213  		return errors.New("key already exists")
   214  	}
   215  
   216  	debug.Log("InsertObject(%v, %v)", be.bucketName, objName)
   217  
   218  	// Set chunk size to zero to disable resumable uploads.
   219  	//
   220  	// With a non-zero chunk size (the default is
   221  	// googleapi.DefaultUploadChunkSize, 8MB), Insert will buffer data from
   222  	// rd in chunks of this size so it can upload these chunks in
   223  	// individual requests.
   224  	//
   225  	// This chunking allows the library to automatically handle network
   226  	// interruptions and re-upload only the last chunk rather than the full
   227  	// file.
   228  	//
   229  	// Unfortunately, this buffering doesn't play nicely with
   230  	// --limit-upload, which applies a rate limit to rd. This rate limit
   231  	// ends up only limiting the read from rd into the buffer rather than
   232  	// the network traffic itself. This results in poor network rate limit
   233  	// behavior, where individual chunks are written to the network at full
   234  	// bandwidth for several seconds, followed by several seconds of no
   235  	// network traffic as the next chunk is read through the rate limiter.
   236  	//
   237  	// By disabling chunking, rd is passed further down the request stack,
   238  	// where there is less (but some) buffering, which ultimately results
   239  	// in better rate limiting behavior.
   240  	//
   241  	// restic typically writes small blobs (4MB-30MB), so the resumable
   242  	// uploads are not providing significant benefit anyways.
   243  	cs := googleapi.ChunkSize(0)
   244  
   245  	info, err := be.service.Objects.Insert(be.bucketName,
   246  		&storage.Object{
   247  			Name: objName,
   248  		}).Media(rd, cs).Do()
   249  
   250  	be.sem.ReleaseToken()
   251  
   252  	if err != nil {
   253  		debug.Log("%v: err %#v: %v", objName, err, err)
   254  		return errors.Wrap(err, "service.Objects.Insert")
   255  	}
   256  
   257  	debug.Log("%v -> %v bytes", objName, info.Size)
   258  	return nil
   259  }
   260  
   261  // wrapReader wraps an io.ReadCloser to run an additional function on Close.
   262  type wrapReader struct {
   263  	io.ReadCloser
   264  	f func()
   265  }
   266  
   267  func (wr wrapReader) Close() error {
   268  	err := wr.ReadCloser.Close()
   269  	wr.f()
   270  	return err
   271  }
   272  
   273  // Load returns a reader that yields the contents of the file at h at the
   274  // given offset. If length is nonzero, only a portion of the file is
   275  // returned. rd must be closed after use.
   276  func (be *Backend) Load(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
   277  	debug.Log("Load %v, length %v, offset %v from %v", h, length, offset, be.Filename(h))
   278  	if err := h.Valid(); err != nil {
   279  		return nil, err
   280  	}
   281  
   282  	if offset < 0 {
   283  		return nil, errors.New("offset is negative")
   284  	}
   285  
   286  	if length < 0 {
   287  		return nil, errors.Errorf("invalid length %d", length)
   288  	}
   289  
   290  	objName := be.Filename(h)
   291  
   292  	be.sem.GetToken()
   293  
   294  	var byteRange string
   295  	if length > 0 {
   296  		byteRange = fmt.Sprintf("bytes=%d-%d", offset, offset+int64(length-1))
   297  	} else {
   298  		byteRange = fmt.Sprintf("bytes=%d-", offset)
   299  	}
   300  
   301  	req := be.service.Objects.Get(be.bucketName, objName)
   302  	// https://cloud.google.com/storage/docs/json_api/v1/parameters#range
   303  	req.Header().Set("Range", byteRange)
   304  	res, err := req.Download()
   305  	if err != nil {
   306  		be.sem.ReleaseToken()
   307  		return nil, err
   308  	}
   309  
   310  	closeRd := wrapReader{
   311  		ReadCloser: res.Body,
   312  		f: func() {
   313  			debug.Log("Close()")
   314  			be.sem.ReleaseToken()
   315  		},
   316  	}
   317  
   318  	return closeRd, err
   319  }
   320  
   321  // Stat returns information about a blob.
   322  func (be *Backend) Stat(ctx context.Context, h restic.Handle) (bi restic.FileInfo, err error) {
   323  	debug.Log("%v", h)
   324  
   325  	objName := be.Filename(h)
   326  
   327  	be.sem.GetToken()
   328  	obj, err := be.service.Objects.Get(be.bucketName, objName).Do()
   329  	be.sem.ReleaseToken()
   330  
   331  	if err != nil {
   332  		debug.Log("GetObject() err %v", err)
   333  		return restic.FileInfo{}, errors.Wrap(err, "service.Objects.Get")
   334  	}
   335  
   336  	return restic.FileInfo{Size: int64(obj.Size)}, nil
   337  }
   338  
   339  // Test returns true if a blob of the given type and name exists in the backend.
   340  func (be *Backend) Test(ctx context.Context, h restic.Handle) (bool, error) {
   341  	found := false
   342  	objName := be.Filename(h)
   343  
   344  	be.sem.GetToken()
   345  	_, err := be.service.Objects.Get(be.bucketName, objName).Do()
   346  	be.sem.ReleaseToken()
   347  
   348  	if err == nil {
   349  		found = true
   350  	}
   351  	// If error, then not found
   352  	return found, nil
   353  }
   354  
   355  // Remove removes the blob with the given name and type.
   356  func (be *Backend) Remove(ctx context.Context, h restic.Handle) error {
   357  	objName := be.Filename(h)
   358  
   359  	be.sem.GetToken()
   360  	err := be.service.Objects.Delete(be.bucketName, objName).Do()
   361  	be.sem.ReleaseToken()
   362  
   363  	if er, ok := err.(*googleapi.Error); ok {
   364  		if er.Code == 404 {
   365  			err = nil
   366  		}
   367  	}
   368  
   369  	debug.Log("Remove(%v) at %v -> err %v", h, objName, err)
   370  	return errors.Wrap(err, "client.RemoveObject")
   371  }
   372  
   373  // List returns a channel that yields all names of blobs of type t. A
   374  // goroutine is started for this. If the channel done is closed, sending
   375  // stops.
   376  func (be *Backend) List(ctx context.Context, t restic.FileType) <-chan string {
   377  	debug.Log("listing %v", t)
   378  	ch := make(chan string)
   379  
   380  	prefix := be.Dirname(restic.Handle{Type: t})
   381  
   382  	// make sure prefix ends with a slash
   383  	if prefix[len(prefix)-1] != '/' {
   384  		prefix += "/"
   385  	}
   386  
   387  	go func() {
   388  		defer close(ch)
   389  
   390  		listReq := be.service.Objects.List(be.bucketName).Prefix(prefix).MaxResults(int64(be.listMaxItems))
   391  		for {
   392  			be.sem.GetToken()
   393  			obj, err := listReq.Do()
   394  			be.sem.ReleaseToken()
   395  
   396  			if err != nil {
   397  				fmt.Fprintf(os.Stderr, "error listing %v: %v\n", prefix, err)
   398  				return
   399  			}
   400  
   401  			debug.Log("returned %v items", len(obj.Items))
   402  
   403  			for _, item := range obj.Items {
   404  				m := strings.TrimPrefix(item.Name, prefix)
   405  				if m == "" {
   406  					continue
   407  				}
   408  
   409  				select {
   410  				case ch <- path.Base(m):
   411  				case <-ctx.Done():
   412  					return
   413  				}
   414  			}
   415  
   416  			if obj.NextPageToken == "" {
   417  				break
   418  			}
   419  			listReq.PageToken(obj.NextPageToken)
   420  		}
   421  	}()
   422  
   423  	return ch
   424  }
   425  
   426  // Remove keys for a specified backend type.
   427  func (be *Backend) removeKeys(ctx context.Context, t restic.FileType) error {
   428  	for key := range be.List(ctx, restic.DataFile) {
   429  		err := be.Remove(ctx, restic.Handle{Type: restic.DataFile, Name: key})
   430  		if err != nil {
   431  			return err
   432  		}
   433  	}
   434  
   435  	return nil
   436  }
   437  
   438  // Delete removes all restic keys in the bucket. It will not remove the bucket itself.
   439  func (be *Backend) Delete(ctx context.Context) error {
   440  	alltypes := []restic.FileType{
   441  		restic.DataFile,
   442  		restic.KeyFile,
   443  		restic.LockFile,
   444  		restic.SnapshotFile,
   445  		restic.IndexFile}
   446  
   447  	for _, t := range alltypes {
   448  		err := be.removeKeys(ctx, t)
   449  		if err != nil {
   450  			return nil
   451  		}
   452  	}
   453  
   454  	return be.Remove(ctx, restic.Handle{Type: restic.ConfigFile})
   455  }
   456  
   457  // Close does nothing.
   458  func (be *Backend) Close() error { return nil }