github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/b2/upload.go (about)

     1  // Upload large files for b2
     2  //
     3  // Docs - https://www.backblaze.com/b2/docs/large_files.html
     4  
     5  package b2
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/sha1"
    11  	"encoding/hex"
    12  	"fmt"
    13  	gohash "hash"
    14  	"io"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/pkg/errors"
    19  	"github.com/rclone/rclone/backend/b2/api"
    20  	"github.com/rclone/rclone/fs"
    21  	"github.com/rclone/rclone/fs/accounting"
    22  	"github.com/rclone/rclone/fs/hash"
    23  	"github.com/rclone/rclone/lib/rest"
    24  )
    25  
    26  type hashAppendingReader struct {
    27  	h         gohash.Hash
    28  	in        io.Reader
    29  	hexSum    string
    30  	hexReader io.Reader
    31  }
    32  
    33  // Read returns bytes all bytes from the original reader, then the hex sum
    34  // of what was read so far, then EOF.
    35  func (har *hashAppendingReader) Read(b []byte) (int, error) {
    36  	if har.hexReader == nil {
    37  		n, err := har.in.Read(b)
    38  		if err == io.EOF {
    39  			har.in = nil // allow GC
    40  			err = nil    // allow reading hexSum before EOF
    41  
    42  			har.hexSum = hex.EncodeToString(har.h.Sum(nil))
    43  			har.hexReader = strings.NewReader(har.hexSum)
    44  		}
    45  		return n, err
    46  	}
    47  	return har.hexReader.Read(b)
    48  }
    49  
    50  // AdditionalLength returns how many bytes the appended hex sum will take up.
    51  func (har *hashAppendingReader) AdditionalLength() int {
    52  	return hex.EncodedLen(har.h.Size())
    53  }
    54  
    55  // HexSum returns the hash sum as hex. It's only available after the original
    56  // reader has EOF'd. It's an empty string before that.
    57  func (har *hashAppendingReader) HexSum() string {
    58  	return har.hexSum
    59  }
    60  
    61  // newHashAppendingReader takes a Reader and a Hash and will append the hex sum
    62  // after the original reader reaches EOF. The increased size depends on the
    63  // given hash, which may be queried through AdditionalLength()
    64  func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader {
    65  	withHash := io.TeeReader(in, h)
    66  	return &hashAppendingReader{h: h, in: withHash}
    67  }
    68  
    69  // largeUpload is used to control the upload of large files which need chunking
    70  type largeUpload struct {
    71  	f        *Fs                             // parent Fs
    72  	o        *Object                         // object being uploaded
    73  	in       io.Reader                       // read the data from here
    74  	wrap     accounting.WrapFn               // account parts being transferred
    75  	id       string                          // ID of the file being uploaded
    76  	size     int64                           // total size
    77  	parts    int64                           // calculated number of parts, if known
    78  	sha1s    []string                        // slice of SHA1s for each part
    79  	uploadMu sync.Mutex                      // lock for upload variable
    80  	uploads  []*api.GetUploadPartURLResponse // result of get upload URL calls
    81  }
    82  
    83  // newLargeUpload starts an upload of object o from in with metadata in src
    84  func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) {
    85  	remote := o.remote
    86  	size := src.Size()
    87  	parts := int64(0)
    88  	sha1SliceSize := int64(maxParts)
    89  	if size == -1 {
    90  		fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize)
    91  	} else {
    92  		parts = size / int64(o.fs.opt.ChunkSize)
    93  		if size%int64(o.fs.opt.ChunkSize) != 0 {
    94  			parts++
    95  		}
    96  		if parts > maxParts {
    97  			return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts)
    98  		}
    99  		sha1SliceSize = parts
   100  	}
   101  
   102  	modTime := src.ModTime(ctx)
   103  	opts := rest.Opts{
   104  		Method: "POST",
   105  		Path:   "/b2_start_large_file",
   106  	}
   107  	bucket, bucketPath := o.split()
   108  	bucketID, err := f.getBucketID(ctx, bucket)
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  	var request = api.StartLargeFileRequest{
   113  		BucketID:    bucketID,
   114  		Name:        f.opt.Enc.FromStandardPath(bucketPath),
   115  		ContentType: fs.MimeType(ctx, src),
   116  		Info: map[string]string{
   117  			timeKey: timeString(modTime),
   118  		},
   119  	}
   120  	// Set the SHA1 if known
   121  	if !o.fs.opt.DisableCheckSum {
   122  		if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" {
   123  			request.Info[sha1Key] = calculatedSha1
   124  		}
   125  	}
   126  	var response api.StartLargeFileResponse
   127  	err = f.pacer.Call(func() (bool, error) {
   128  		resp, err := f.srv.CallJSON(ctx, &opts, &request, &response)
   129  		return f.shouldRetry(ctx, resp, err)
   130  	})
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  	// unwrap the accounting from the input, we use wrap to put it
   135  	// back on after the buffering
   136  	in, wrap := accounting.UnWrap(in)
   137  	up = &largeUpload{
   138  		f:     f,
   139  		o:     o,
   140  		in:    in,
   141  		wrap:  wrap,
   142  		id:    response.ID,
   143  		size:  size,
   144  		parts: parts,
   145  		sha1s: make([]string, sha1SliceSize),
   146  	}
   147  	return up, nil
   148  }
   149  
   150  // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken
   151  //
   152  // This should be returned with returnUploadURL when finished
   153  func (up *largeUpload) getUploadURL(ctx context.Context) (upload *api.GetUploadPartURLResponse, err error) {
   154  	up.uploadMu.Lock()
   155  	defer up.uploadMu.Unlock()
   156  	if len(up.uploads) == 0 {
   157  		opts := rest.Opts{
   158  			Method: "POST",
   159  			Path:   "/b2_get_upload_part_url",
   160  		}
   161  		var request = api.GetUploadPartURLRequest{
   162  			ID: up.id,
   163  		}
   164  		err := up.f.pacer.Call(func() (bool, error) {
   165  			resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &upload)
   166  			return up.f.shouldRetry(ctx, resp, err)
   167  		})
   168  		if err != nil {
   169  			return nil, errors.Wrap(err, "failed to get upload URL")
   170  		}
   171  	} else {
   172  		upload, up.uploads = up.uploads[0], up.uploads[1:]
   173  	}
   174  	return upload, nil
   175  }
   176  
   177  // returnUploadURL returns the UploadURL to the cache
   178  func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) {
   179  	if upload == nil {
   180  		return
   181  	}
   182  	up.uploadMu.Lock()
   183  	up.uploads = append(up.uploads, upload)
   184  	up.uploadMu.Unlock()
   185  }
   186  
   187  // Transfer a chunk
   188  func (up *largeUpload) transferChunk(ctx context.Context, part int64, body []byte) error {
   189  	err := up.f.pacer.Call(func() (bool, error) {
   190  		fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body))
   191  
   192  		// Get upload URL
   193  		upload, err := up.getUploadURL(ctx)
   194  		if err != nil {
   195  			return false, err
   196  		}
   197  
   198  		in := newHashAppendingReader(bytes.NewReader(body), sha1.New())
   199  		size := int64(len(body)) + int64(in.AdditionalLength())
   200  
   201  		// Authorization
   202  		//
   203  		// An upload authorization token, from b2_get_upload_part_url.
   204  		//
   205  		// X-Bz-Part-Number
   206  		//
   207  		// A number from 1 to 10000. The parts uploaded for one file
   208  		// must have contiguous numbers, starting with 1.
   209  		//
   210  		// Content-Length
   211  		//
   212  		// The number of bytes in the file being uploaded. Note that
   213  		// this header is required; you cannot leave it out and just
   214  		// use chunked encoding.  The minimum size of every part but
   215  		// the last one is 100MB.
   216  		//
   217  		// X-Bz-Content-Sha1
   218  		//
   219  		// The SHA1 checksum of the this part of the file. B2 will
   220  		// check this when the part is uploaded, to make sure that the
   221  		// data arrived correctly.  The same SHA1 checksum must be
   222  		// passed to b2_finish_large_file.
   223  		opts := rest.Opts{
   224  			Method:  "POST",
   225  			RootURL: upload.UploadURL,
   226  			Body:    up.wrap(in),
   227  			ExtraHeaders: map[string]string{
   228  				"Authorization":    upload.AuthorizationToken,
   229  				"X-Bz-Part-Number": fmt.Sprintf("%d", part),
   230  				sha1Header:         "hex_digits_at_end",
   231  			},
   232  			ContentLength: &size,
   233  		}
   234  
   235  		var response api.UploadPartResponse
   236  
   237  		resp, err := up.f.srv.CallJSON(ctx, &opts, nil, &response)
   238  		retry, err := up.f.shouldRetry(ctx, resp, err)
   239  		if err != nil {
   240  			fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", part, retry, err, err)
   241  		}
   242  		// On retryable error clear PartUploadURL
   243  		if retry {
   244  			fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err)
   245  			upload = nil
   246  		}
   247  		up.returnUploadURL(upload)
   248  		up.sha1s[part-1] = in.HexSum()
   249  		return retry, err
   250  	})
   251  	if err != nil {
   252  		fs.Debugf(up.o, "Error sending chunk %d: %v", part, err)
   253  	} else {
   254  		fs.Debugf(up.o, "Done sending chunk %d", part)
   255  	}
   256  	return err
   257  }
   258  
   259  // finish closes off the large upload
   260  func (up *largeUpload) finish(ctx context.Context) error {
   261  	fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts)
   262  	opts := rest.Opts{
   263  		Method: "POST",
   264  		Path:   "/b2_finish_large_file",
   265  	}
   266  	var request = api.FinishLargeFileRequest{
   267  		ID:    up.id,
   268  		SHA1s: up.sha1s,
   269  	}
   270  	var response api.FileInfo
   271  	err := up.f.pacer.Call(func() (bool, error) {
   272  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response)
   273  		return up.f.shouldRetry(ctx, resp, err)
   274  	})
   275  	if err != nil {
   276  		return err
   277  	}
   278  	return up.o.decodeMetaDataFileInfo(&response)
   279  }
   280  
   281  // cancel aborts the large upload
   282  func (up *largeUpload) cancel(ctx context.Context) error {
   283  	opts := rest.Opts{
   284  		Method: "POST",
   285  		Path:   "/b2_cancel_large_file",
   286  	}
   287  	var request = api.CancelLargeFileRequest{
   288  		ID: up.id,
   289  	}
   290  	var response api.CancelLargeFileResponse
   291  	err := up.f.pacer.Call(func() (bool, error) {
   292  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response)
   293  		return up.f.shouldRetry(ctx, resp, err)
   294  	})
   295  	return err
   296  }
   297  
   298  func (up *largeUpload) managedTransferChunk(ctx context.Context, wg *sync.WaitGroup, errs chan error, part int64, buf []byte) {
   299  	wg.Add(1)
   300  	go func(part int64, buf []byte) {
   301  		defer wg.Done()
   302  		defer up.f.putUploadBlock(buf)
   303  		err := up.transferChunk(ctx, part, buf)
   304  		if err != nil {
   305  			select {
   306  			case errs <- err:
   307  			default:
   308  			}
   309  		}
   310  	}(part, buf)
   311  }
   312  
   313  func (up *largeUpload) finishOrCancelOnError(ctx context.Context, err error, errs chan error) error {
   314  	if err == nil {
   315  		select {
   316  		case err = <-errs:
   317  		default:
   318  		}
   319  	}
   320  	if err != nil {
   321  		fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err)
   322  		cancelErr := up.cancel(ctx)
   323  		if cancelErr != nil {
   324  			fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr)
   325  		}
   326  		return err
   327  	}
   328  	return up.finish(ctx)
   329  }
   330  
   331  // Stream uploads the chunks from the input, starting with a required initial
   332  // chunk. Assumes the file size is unknown and will upload until the input
   333  // reaches EOF.
   334  func (up *largeUpload) Stream(ctx context.Context, initialUploadBlock []byte) (err error) {
   335  	fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id)
   336  	errs := make(chan error, 1)
   337  	hasMoreParts := true
   338  	var wg sync.WaitGroup
   339  
   340  	// Transfer initial chunk
   341  	up.size = int64(len(initialUploadBlock))
   342  	up.managedTransferChunk(ctx, &wg, errs, 1, initialUploadBlock)
   343  
   344  outer:
   345  	for part := int64(2); hasMoreParts; part++ {
   346  		// Check any errors
   347  		select {
   348  		case err = <-errs:
   349  			break outer
   350  		default:
   351  		}
   352  
   353  		// Get a block of memory
   354  		buf := up.f.getUploadBlock()
   355  
   356  		// Read the chunk
   357  		var n int
   358  		n, err = io.ReadFull(up.in, buf)
   359  		if err == io.ErrUnexpectedEOF {
   360  			fs.Debugf(up.o, "Read less than a full chunk, making this the last one.")
   361  			buf = buf[:n]
   362  			hasMoreParts = false
   363  			err = nil
   364  		} else if err == io.EOF {
   365  			fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.")
   366  			up.f.putUploadBlock(buf)
   367  			err = nil
   368  			break outer
   369  		} else if err != nil {
   370  			// other kinds of errors indicate failure
   371  			up.f.putUploadBlock(buf)
   372  			break outer
   373  		}
   374  
   375  		// Keep stats up to date
   376  		up.parts = part
   377  		up.size += int64(n)
   378  		if part > maxParts {
   379  			err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts)
   380  			break outer
   381  		}
   382  
   383  		// Transfer the chunk
   384  		up.managedTransferChunk(ctx, &wg, errs, part, buf)
   385  	}
   386  	wg.Wait()
   387  	up.sha1s = up.sha1s[:up.parts]
   388  
   389  	return up.finishOrCancelOnError(ctx, err, errs)
   390  }
   391  
   392  // Upload uploads the chunks from the input
   393  func (up *largeUpload) Upload(ctx context.Context) error {
   394  	fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id)
   395  	remaining := up.size
   396  	errs := make(chan error, 1)
   397  	var wg sync.WaitGroup
   398  	var err error
   399  outer:
   400  	for part := int64(1); part <= up.parts; part++ {
   401  		// Check any errors
   402  		select {
   403  		case err = <-errs:
   404  			break outer
   405  		default:
   406  		}
   407  
   408  		reqSize := remaining
   409  		if reqSize >= int64(up.f.opt.ChunkSize) {
   410  			reqSize = int64(up.f.opt.ChunkSize)
   411  		}
   412  
   413  		// Get a block of memory
   414  		buf := up.f.getUploadBlock()[:reqSize]
   415  
   416  		// Read the chunk
   417  		_, err = io.ReadFull(up.in, buf)
   418  		if err != nil {
   419  			up.f.putUploadBlock(buf)
   420  			break outer
   421  		}
   422  
   423  		// Transfer the chunk
   424  		up.managedTransferChunk(ctx, &wg, errs, part, buf)
   425  		remaining -= reqSize
   426  	}
   427  	wg.Wait()
   428  
   429  	return up.finishOrCancelOnError(ctx, err, errs)
   430  }