github.com/ncw/rclone@v1.48.1-0.20190724201158-a35aa1360e3e/backend/b2/upload.go (about)

     1  // Upload large files for b2
     2  //
     3  // Docs - https://www.backblaze.com/b2/docs/large_files.html
     4  
     5  package b2
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/sha1"
    11  	"encoding/hex"
    12  	"fmt"
    13  	gohash "hash"
    14  	"io"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/ncw/rclone/backend/b2/api"
    19  	"github.com/ncw/rclone/fs"
    20  	"github.com/ncw/rclone/fs/accounting"
    21  	"github.com/ncw/rclone/fs/hash"
    22  	"github.com/ncw/rclone/lib/rest"
    23  	"github.com/pkg/errors"
    24  )
    25  
    26  type hashAppendingReader struct {
    27  	h         gohash.Hash
    28  	in        io.Reader
    29  	hexSum    string
    30  	hexReader io.Reader
    31  }
    32  
    33  // Read returns bytes all bytes from the original reader, then the hex sum
    34  // of what was read so far, then EOF.
    35  func (har *hashAppendingReader) Read(b []byte) (int, error) {
    36  	if har.hexReader == nil {
    37  		n, err := har.in.Read(b)
    38  		if err == io.EOF {
    39  			har.in = nil // allow GC
    40  			err = nil    // allow reading hexSum before EOF
    41  
    42  			har.hexSum = hex.EncodeToString(har.h.Sum(nil))
    43  			har.hexReader = strings.NewReader(har.hexSum)
    44  		}
    45  		return n, err
    46  	}
    47  	return har.hexReader.Read(b)
    48  }
    49  
    50  // AdditionalLength returns how many bytes the appended hex sum will take up.
    51  func (har *hashAppendingReader) AdditionalLength() int {
    52  	return hex.EncodedLen(har.h.Size())
    53  }
    54  
    55  // HexSum returns the hash sum as hex. It's only available after the original
    56  // reader has EOF'd. It's an empty string before that.
    57  func (har *hashAppendingReader) HexSum() string {
    58  	return har.hexSum
    59  }
    60  
    61  // newHashAppendingReader takes a Reader and a Hash and will append the hex sum
    62  // after the original reader reaches EOF. The increased size depends on the
    63  // given hash, which may be queried through AdditionalLength()
    64  func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader {
    65  	withHash := io.TeeReader(in, h)
    66  	return &hashAppendingReader{h: h, in: withHash}
    67  }
    68  
    69  // largeUpload is used to control the upload of large files which need chunking
    70  type largeUpload struct {
    71  	f        *Fs                             // parent Fs
    72  	o        *Object                         // object being uploaded
    73  	in       io.Reader                       // read the data from here
    74  	wrap     accounting.WrapFn               // account parts being transferred
    75  	id       string                          // ID of the file being uploaded
    76  	size     int64                           // total size
    77  	parts    int64                           // calculated number of parts, if known
    78  	sha1s    []string                        // slice of SHA1s for each part
    79  	uploadMu sync.Mutex                      // lock for upload variable
    80  	uploads  []*api.GetUploadPartURLResponse // result of get upload URL calls
    81  }
    82  
    83  // newLargeUpload starts an upload of object o from in with metadata in src
    84  func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) {
    85  	remote := o.remote
    86  	size := src.Size()
    87  	parts := int64(0)
    88  	sha1SliceSize := int64(maxParts)
    89  	if size == -1 {
    90  		fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize)
    91  	} else {
    92  		parts = size / int64(o.fs.opt.ChunkSize)
    93  		if size%int64(o.fs.opt.ChunkSize) != 0 {
    94  			parts++
    95  		}
    96  		if parts > maxParts {
    97  			return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts)
    98  		}
    99  		sha1SliceSize = parts
   100  	}
   101  
   102  	modTime := src.ModTime(ctx)
   103  	opts := rest.Opts{
   104  		Method: "POST",
   105  		Path:   "/b2_start_large_file",
   106  	}
   107  	bucketID, err := f.getBucketID()
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  	var request = api.StartLargeFileRequest{
   112  		BucketID:    bucketID,
   113  		Name:        o.fs.root + remote,
   114  		ContentType: fs.MimeType(ctx, src),
   115  		Info: map[string]string{
   116  			timeKey: timeString(modTime),
   117  		},
   118  	}
   119  	// Set the SHA1 if known
   120  	if !o.fs.opt.DisableCheckSum {
   121  		if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" {
   122  			request.Info[sha1Key] = calculatedSha1
   123  		}
   124  	}
   125  	var response api.StartLargeFileResponse
   126  	err = f.pacer.Call(func() (bool, error) {
   127  		resp, err := f.srv.CallJSON(&opts, &request, &response)
   128  		return f.shouldRetry(resp, err)
   129  	})
   130  	if err != nil {
   131  		return nil, err
   132  	}
   133  	// unwrap the accounting from the input, we use wrap to put it
   134  	// back on after the buffering
   135  	in, wrap := accounting.UnWrap(in)
   136  	up = &largeUpload{
   137  		f:     f,
   138  		o:     o,
   139  		in:    in,
   140  		wrap:  wrap,
   141  		id:    response.ID,
   142  		size:  size,
   143  		parts: parts,
   144  		sha1s: make([]string, sha1SliceSize),
   145  	}
   146  	return up, nil
   147  }
   148  
   149  // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken
   150  //
   151  // This should be returned with returnUploadURL when finished
   152  func (up *largeUpload) getUploadURL() (upload *api.GetUploadPartURLResponse, err error) {
   153  	up.uploadMu.Lock()
   154  	defer up.uploadMu.Unlock()
   155  	if len(up.uploads) == 0 {
   156  		opts := rest.Opts{
   157  			Method: "POST",
   158  			Path:   "/b2_get_upload_part_url",
   159  		}
   160  		var request = api.GetUploadPartURLRequest{
   161  			ID: up.id,
   162  		}
   163  		err := up.f.pacer.Call(func() (bool, error) {
   164  			resp, err := up.f.srv.CallJSON(&opts, &request, &upload)
   165  			return up.f.shouldRetry(resp, err)
   166  		})
   167  		if err != nil {
   168  			return nil, errors.Wrap(err, "failed to get upload URL")
   169  		}
   170  	} else {
   171  		upload, up.uploads = up.uploads[0], up.uploads[1:]
   172  	}
   173  	return upload, nil
   174  }
   175  
   176  // returnUploadURL returns the UploadURL to the cache
   177  func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) {
   178  	if upload == nil {
   179  		return
   180  	}
   181  	up.uploadMu.Lock()
   182  	up.uploads = append(up.uploads, upload)
   183  	up.uploadMu.Unlock()
   184  }
   185  
   186  // clearUploadURL clears the current UploadURL and the AuthorizationToken
   187  func (up *largeUpload) clearUploadURL() {
   188  	up.uploadMu.Lock()
   189  	up.uploads = nil
   190  	up.uploadMu.Unlock()
   191  }
   192  
   193  // Transfer a chunk
   194  func (up *largeUpload) transferChunk(part int64, body []byte) error {
   195  	err := up.f.pacer.Call(func() (bool, error) {
   196  		fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body))
   197  
   198  		// Get upload URL
   199  		upload, err := up.getUploadURL()
   200  		if err != nil {
   201  			return false, err
   202  		}
   203  
   204  		in := newHashAppendingReader(bytes.NewReader(body), sha1.New())
   205  		size := int64(len(body)) + int64(in.AdditionalLength())
   206  
   207  		// Authorization
   208  		//
   209  		// An upload authorization token, from b2_get_upload_part_url.
   210  		//
   211  		// X-Bz-Part-Number
   212  		//
   213  		// A number from 1 to 10000. The parts uploaded for one file
   214  		// must have contiguous numbers, starting with 1.
   215  		//
   216  		// Content-Length
   217  		//
   218  		// The number of bytes in the file being uploaded. Note that
   219  		// this header is required; you cannot leave it out and just
   220  		// use chunked encoding.  The minimum size of every part but
   221  		// the last one is 100MB.
   222  		//
   223  		// X-Bz-Content-Sha1
   224  		//
   225  		// The SHA1 checksum of the this part of the file. B2 will
   226  		// check this when the part is uploaded, to make sure that the
   227  		// data arrived correctly.  The same SHA1 checksum must be
   228  		// passed to b2_finish_large_file.
   229  		opts := rest.Opts{
   230  			Method:  "POST",
   231  			RootURL: upload.UploadURL,
   232  			Body:    up.wrap(in),
   233  			ExtraHeaders: map[string]string{
   234  				"Authorization":    upload.AuthorizationToken,
   235  				"X-Bz-Part-Number": fmt.Sprintf("%d", part),
   236  				sha1Header:         "hex_digits_at_end",
   237  			},
   238  			ContentLength: &size,
   239  		}
   240  
   241  		var response api.UploadPartResponse
   242  
   243  		resp, err := up.f.srv.CallJSON(&opts, nil, &response)
   244  		retry, err := up.f.shouldRetry(resp, err)
   245  		if err != nil {
   246  			fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", part, retry, err, err)
   247  		}
   248  		// On retryable error clear PartUploadURL
   249  		if retry {
   250  			fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err)
   251  			upload = nil
   252  		}
   253  		up.returnUploadURL(upload)
   254  		up.sha1s[part-1] = in.HexSum()
   255  		return retry, err
   256  	})
   257  	if err != nil {
   258  		fs.Debugf(up.o, "Error sending chunk %d: %v", part, err)
   259  	} else {
   260  		fs.Debugf(up.o, "Done sending chunk %d", part)
   261  	}
   262  	return err
   263  }
   264  
   265  // finish closes off the large upload
   266  func (up *largeUpload) finish() error {
   267  	fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts)
   268  	opts := rest.Opts{
   269  		Method: "POST",
   270  		Path:   "/b2_finish_large_file",
   271  	}
   272  	var request = api.FinishLargeFileRequest{
   273  		ID:    up.id,
   274  		SHA1s: up.sha1s,
   275  	}
   276  	var response api.FileInfo
   277  	err := up.f.pacer.Call(func() (bool, error) {
   278  		resp, err := up.f.srv.CallJSON(&opts, &request, &response)
   279  		return up.f.shouldRetry(resp, err)
   280  	})
   281  	if err != nil {
   282  		return err
   283  	}
   284  	return up.o.decodeMetaDataFileInfo(&response)
   285  }
   286  
   287  // cancel aborts the large upload
   288  func (up *largeUpload) cancel() error {
   289  	opts := rest.Opts{
   290  		Method: "POST",
   291  		Path:   "/b2_cancel_large_file",
   292  	}
   293  	var request = api.CancelLargeFileRequest{
   294  		ID: up.id,
   295  	}
   296  	var response api.CancelLargeFileResponse
   297  	err := up.f.pacer.Call(func() (bool, error) {
   298  		resp, err := up.f.srv.CallJSON(&opts, &request, &response)
   299  		return up.f.shouldRetry(resp, err)
   300  	})
   301  	return err
   302  }
   303  
   304  func (up *largeUpload) managedTransferChunk(wg *sync.WaitGroup, errs chan error, part int64, buf []byte) {
   305  	wg.Add(1)
   306  	go func(part int64, buf []byte) {
   307  		defer wg.Done()
   308  		defer up.f.putUploadBlock(buf)
   309  		err := up.transferChunk(part, buf)
   310  		if err != nil {
   311  			select {
   312  			case errs <- err:
   313  			default:
   314  			}
   315  		}
   316  	}(part, buf)
   317  }
   318  
   319  func (up *largeUpload) finishOrCancelOnError(err error, errs chan error) error {
   320  	if err == nil {
   321  		select {
   322  		case err = <-errs:
   323  		default:
   324  		}
   325  	}
   326  	if err != nil {
   327  		fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err)
   328  		cancelErr := up.cancel()
   329  		if cancelErr != nil {
   330  			fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr)
   331  		}
   332  		return err
   333  	}
   334  	return up.finish()
   335  }
   336  
   337  // Stream uploads the chunks from the input, starting with a required initial
   338  // chunk. Assumes the file size is unknown and will upload until the input
   339  // reaches EOF.
   340  func (up *largeUpload) Stream(initialUploadBlock []byte) (err error) {
   341  	fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id)
   342  	errs := make(chan error, 1)
   343  	hasMoreParts := true
   344  	var wg sync.WaitGroup
   345  
   346  	// Transfer initial chunk
   347  	up.size = int64(len(initialUploadBlock))
   348  	up.managedTransferChunk(&wg, errs, 1, initialUploadBlock)
   349  
   350  outer:
   351  	for part := int64(2); hasMoreParts; part++ {
   352  		// Check any errors
   353  		select {
   354  		case err = <-errs:
   355  			break outer
   356  		default:
   357  		}
   358  
   359  		// Get a block of memory
   360  		buf := up.f.getUploadBlock()
   361  
   362  		// Read the chunk
   363  		var n int
   364  		n, err = io.ReadFull(up.in, buf)
   365  		if err == io.ErrUnexpectedEOF {
   366  			fs.Debugf(up.o, "Read less than a full chunk, making this the last one.")
   367  			buf = buf[:n]
   368  			hasMoreParts = false
   369  			err = nil
   370  		} else if err == io.EOF {
   371  			fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.")
   372  			up.f.putUploadBlock(buf)
   373  			err = nil
   374  			break outer
   375  		} else if err != nil {
   376  			// other kinds of errors indicate failure
   377  			up.f.putUploadBlock(buf)
   378  			break outer
   379  		}
   380  
   381  		// Keep stats up to date
   382  		up.parts = part
   383  		up.size += int64(n)
   384  		if part > maxParts {
   385  			err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts)
   386  			break outer
   387  		}
   388  
   389  		// Transfer the chunk
   390  		up.managedTransferChunk(&wg, errs, part, buf)
   391  	}
   392  	wg.Wait()
   393  	up.sha1s = up.sha1s[:up.parts]
   394  
   395  	return up.finishOrCancelOnError(err, errs)
   396  }
   397  
   398  // Upload uploads the chunks from the input
   399  func (up *largeUpload) Upload() error {
   400  	fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id)
   401  	remaining := up.size
   402  	errs := make(chan error, 1)
   403  	var wg sync.WaitGroup
   404  	var err error
   405  outer:
   406  	for part := int64(1); part <= up.parts; part++ {
   407  		// Check any errors
   408  		select {
   409  		case err = <-errs:
   410  			break outer
   411  		default:
   412  		}
   413  
   414  		reqSize := remaining
   415  		if reqSize >= int64(up.f.opt.ChunkSize) {
   416  			reqSize = int64(up.f.opt.ChunkSize)
   417  		}
   418  
   419  		// Get a block of memory
   420  		buf := up.f.getUploadBlock()[:reqSize]
   421  
   422  		// Read the chunk
   423  		_, err = io.ReadFull(up.in, buf)
   424  		if err != nil {
   425  			up.f.putUploadBlock(buf)
   426  			break outer
   427  		}
   428  
   429  		// Transfer the chunk
   430  		up.managedTransferChunk(&wg, errs, part, buf)
   431  		remaining -= reqSize
   432  	}
   433  	wg.Wait()
   434  
   435  	return up.finishOrCancelOnError(err, errs)
   436  }