github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/backend/b2/upload.go (about)

     1  // Upload large files for b2
     2  //
     3  // Docs - https://www.backblaze.com/b2/docs/large_files.html
     4  
     5  package b2
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/sha1"
    11  	"encoding/hex"
    12  	"fmt"
    13  	gohash "hash"
    14  	"io"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/pkg/errors"
    19  	"github.com/rclone/rclone/backend/b2/api"
    20  	"github.com/rclone/rclone/fs"
    21  	"github.com/rclone/rclone/fs/accounting"
    22  	"github.com/rclone/rclone/fs/hash"
    23  	"github.com/rclone/rclone/lib/rest"
    24  )
    25  
    26  type hashAppendingReader struct {
    27  	h         gohash.Hash
    28  	in        io.Reader
    29  	hexSum    string
    30  	hexReader io.Reader
    31  }
    32  
    33  // Read returns bytes all bytes from the original reader, then the hex sum
    34  // of what was read so far, then EOF.
    35  func (har *hashAppendingReader) Read(b []byte) (int, error) {
    36  	if har.hexReader == nil {
    37  		n, err := har.in.Read(b)
    38  		if err == io.EOF {
    39  			har.in = nil // allow GC
    40  			err = nil    // allow reading hexSum before EOF
    41  
    42  			har.hexSum = hex.EncodeToString(har.h.Sum(nil))
    43  			har.hexReader = strings.NewReader(har.hexSum)
    44  		}
    45  		return n, err
    46  	}
    47  	return har.hexReader.Read(b)
    48  }
    49  
    50  // AdditionalLength returns how many bytes the appended hex sum will take up.
    51  func (har *hashAppendingReader) AdditionalLength() int {
    52  	return hex.EncodedLen(har.h.Size())
    53  }
    54  
    55  // HexSum returns the hash sum as hex. It's only available after the original
    56  // reader has EOF'd. It's an empty string before that.
    57  func (har *hashAppendingReader) HexSum() string {
    58  	return har.hexSum
    59  }
    60  
    61  // newHashAppendingReader takes a Reader and a Hash and will append the hex sum
    62  // after the original reader reaches EOF. The increased size depends on the
    63  // given hash, which may be queried through AdditionalLength()
    64  func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader {
    65  	withHash := io.TeeReader(in, h)
    66  	return &hashAppendingReader{h: h, in: withHash}
    67  }
    68  
    69  // largeUpload is used to control the upload of large files which need chunking
    70  type largeUpload struct {
    71  	f        *Fs                             // parent Fs
    72  	o        *Object                         // object being uploaded
    73  	in       io.Reader                       // read the data from here
    74  	wrap     accounting.WrapFn               // account parts being transferred
    75  	id       string                          // ID of the file being uploaded
    76  	size     int64                           // total size
    77  	parts    int64                           // calculated number of parts, if known
    78  	sha1s    []string                        // slice of SHA1s for each part
    79  	uploadMu sync.Mutex                      // lock for upload variable
    80  	uploads  []*api.GetUploadPartURLResponse // result of get upload URL calls
    81  }
    82  
    83  // newLargeUpload starts an upload of object o from in with metadata in src
    84  func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) {
    85  	remote := o.remote
    86  	size := src.Size()
    87  	parts := int64(0)
    88  	sha1SliceSize := int64(maxParts)
    89  	if size == -1 {
    90  		fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize)
    91  	} else {
    92  		parts = size / int64(o.fs.opt.ChunkSize)
    93  		if size%int64(o.fs.opt.ChunkSize) != 0 {
    94  			parts++
    95  		}
    96  		if parts > maxParts {
    97  			return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts)
    98  		}
    99  		sha1SliceSize = parts
   100  	}
   101  
   102  	modTime := src.ModTime(ctx)
   103  	opts := rest.Opts{
   104  		Method: "POST",
   105  		Path:   "/b2_start_large_file",
   106  	}
   107  	bucket, bucketPath := o.split()
   108  	bucketID, err := f.getBucketID(ctx, bucket)
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  	var request = api.StartLargeFileRequest{
   113  		BucketID:    bucketID,
   114  		Name:        f.opt.Enc.FromStandardPath(bucketPath),
   115  		ContentType: fs.MimeType(ctx, src),
   116  		Info: map[string]string{
   117  			timeKey: timeString(modTime),
   118  		},
   119  	}
   120  	// Set the SHA1 if known
   121  	if !o.fs.opt.DisableCheckSum {
   122  		if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" {
   123  			request.Info[sha1Key] = calculatedSha1
   124  		}
   125  	}
   126  	var response api.StartLargeFileResponse
   127  	err = f.pacer.Call(func() (bool, error) {
   128  		resp, err := f.srv.CallJSON(ctx, &opts, &request, &response)
   129  		return f.shouldRetry(ctx, resp, err)
   130  	})
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  	// unwrap the accounting from the input, we use wrap to put it
   135  	// back on after the buffering
   136  	in, wrap := accounting.UnWrap(in)
   137  	up = &largeUpload{
   138  		f:     f,
   139  		o:     o,
   140  		in:    in,
   141  		wrap:  wrap,
   142  		id:    response.ID,
   143  		size:  size,
   144  		parts: parts,
   145  		sha1s: make([]string, sha1SliceSize),
   146  	}
   147  	return up, nil
   148  }
   149  
   150  // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken
   151  //
   152  // This should be returned with returnUploadURL when finished
   153  func (up *largeUpload) getUploadURL(ctx context.Context) (upload *api.GetUploadPartURLResponse, err error) {
   154  	up.uploadMu.Lock()
   155  	defer up.uploadMu.Unlock()
   156  	if len(up.uploads) == 0 {
   157  		opts := rest.Opts{
   158  			Method: "POST",
   159  			Path:   "/b2_get_upload_part_url",
   160  		}
   161  		var request = api.GetUploadPartURLRequest{
   162  			ID: up.id,
   163  		}
   164  		err := up.f.pacer.Call(func() (bool, error) {
   165  			resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &upload)
   166  			return up.f.shouldRetry(ctx, resp, err)
   167  		})
   168  		if err != nil {
   169  			return nil, errors.Wrap(err, "failed to get upload URL")
   170  		}
   171  	} else {
   172  		upload, up.uploads = up.uploads[0], up.uploads[1:]
   173  	}
   174  	return upload, nil
   175  }
   176  
   177  // returnUploadURL returns the UploadURL to the cache
   178  func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) {
   179  	if upload == nil {
   180  		return
   181  	}
   182  	up.uploadMu.Lock()
   183  	up.uploads = append(up.uploads, upload)
   184  	up.uploadMu.Unlock()
   185  }
   186  
   187  // clearUploadURL clears the current UploadURL and the AuthorizationToken
   188  func (up *largeUpload) clearUploadURL() {
   189  	up.uploadMu.Lock()
   190  	up.uploads = nil
   191  	up.uploadMu.Unlock()
   192  }
   193  
   194  // Transfer a chunk
   195  func (up *largeUpload) transferChunk(ctx context.Context, part int64, body []byte) error {
   196  	err := up.f.pacer.Call(func() (bool, error) {
   197  		fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body))
   198  
   199  		// Get upload URL
   200  		upload, err := up.getUploadURL(ctx)
   201  		if err != nil {
   202  			return false, err
   203  		}
   204  
   205  		in := newHashAppendingReader(bytes.NewReader(body), sha1.New())
   206  		size := int64(len(body)) + int64(in.AdditionalLength())
   207  
   208  		// Authorization
   209  		//
   210  		// An upload authorization token, from b2_get_upload_part_url.
   211  		//
   212  		// X-Bz-Part-Number
   213  		//
   214  		// A number from 1 to 10000. The parts uploaded for one file
   215  		// must have contiguous numbers, starting with 1.
   216  		//
   217  		// Content-Length
   218  		//
   219  		// The number of bytes in the file being uploaded. Note that
   220  		// this header is required; you cannot leave it out and just
   221  		// use chunked encoding.  The minimum size of every part but
   222  		// the last one is 100MB.
   223  		//
   224  		// X-Bz-Content-Sha1
   225  		//
   226  		// The SHA1 checksum of the this part of the file. B2 will
   227  		// check this when the part is uploaded, to make sure that the
   228  		// data arrived correctly.  The same SHA1 checksum must be
   229  		// passed to b2_finish_large_file.
   230  		opts := rest.Opts{
   231  			Method:  "POST",
   232  			RootURL: upload.UploadURL,
   233  			Body:    up.wrap(in),
   234  			ExtraHeaders: map[string]string{
   235  				"Authorization":    upload.AuthorizationToken,
   236  				"X-Bz-Part-Number": fmt.Sprintf("%d", part),
   237  				sha1Header:         "hex_digits_at_end",
   238  			},
   239  			ContentLength: &size,
   240  		}
   241  
   242  		var response api.UploadPartResponse
   243  
   244  		resp, err := up.f.srv.CallJSON(ctx, &opts, nil, &response)
   245  		retry, err := up.f.shouldRetry(ctx, resp, err)
   246  		if err != nil {
   247  			fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", part, retry, err, err)
   248  		}
   249  		// On retryable error clear PartUploadURL
   250  		if retry {
   251  			fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err)
   252  			upload = nil
   253  		}
   254  		up.returnUploadURL(upload)
   255  		up.sha1s[part-1] = in.HexSum()
   256  		return retry, err
   257  	})
   258  	if err != nil {
   259  		fs.Debugf(up.o, "Error sending chunk %d: %v", part, err)
   260  	} else {
   261  		fs.Debugf(up.o, "Done sending chunk %d", part)
   262  	}
   263  	return err
   264  }
   265  
   266  // finish closes off the large upload
   267  func (up *largeUpload) finish(ctx context.Context) error {
   268  	fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts)
   269  	opts := rest.Opts{
   270  		Method: "POST",
   271  		Path:   "/b2_finish_large_file",
   272  	}
   273  	var request = api.FinishLargeFileRequest{
   274  		ID:    up.id,
   275  		SHA1s: up.sha1s,
   276  	}
   277  	var response api.FileInfo
   278  	err := up.f.pacer.Call(func() (bool, error) {
   279  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response)
   280  		return up.f.shouldRetry(ctx, resp, err)
   281  	})
   282  	if err != nil {
   283  		return err
   284  	}
   285  	return up.o.decodeMetaDataFileInfo(&response)
   286  }
   287  
   288  // cancel aborts the large upload
   289  func (up *largeUpload) cancel(ctx context.Context) error {
   290  	opts := rest.Opts{
   291  		Method: "POST",
   292  		Path:   "/b2_cancel_large_file",
   293  	}
   294  	var request = api.CancelLargeFileRequest{
   295  		ID: up.id,
   296  	}
   297  	var response api.CancelLargeFileResponse
   298  	err := up.f.pacer.Call(func() (bool, error) {
   299  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response)
   300  		return up.f.shouldRetry(ctx, resp, err)
   301  	})
   302  	return err
   303  }
   304  
   305  func (up *largeUpload) managedTransferChunk(ctx context.Context, wg *sync.WaitGroup, errs chan error, part int64, buf []byte) {
   306  	wg.Add(1)
   307  	go func(part int64, buf []byte) {
   308  		defer wg.Done()
   309  		defer up.f.putUploadBlock(buf)
   310  		err := up.transferChunk(ctx, part, buf)
   311  		if err != nil {
   312  			select {
   313  			case errs <- err:
   314  			default:
   315  			}
   316  		}
   317  	}(part, buf)
   318  }
   319  
   320  func (up *largeUpload) finishOrCancelOnError(ctx context.Context, err error, errs chan error) error {
   321  	if err == nil {
   322  		select {
   323  		case err = <-errs:
   324  		default:
   325  		}
   326  	}
   327  	if err != nil {
   328  		fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err)
   329  		cancelErr := up.cancel(ctx)
   330  		if cancelErr != nil {
   331  			fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr)
   332  		}
   333  		return err
   334  	}
   335  	return up.finish(ctx)
   336  }
   337  
   338  // Stream uploads the chunks from the input, starting with a required initial
   339  // chunk. Assumes the file size is unknown and will upload until the input
   340  // reaches EOF.
   341  func (up *largeUpload) Stream(ctx context.Context, initialUploadBlock []byte) (err error) {
   342  	fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id)
   343  	errs := make(chan error, 1)
   344  	hasMoreParts := true
   345  	var wg sync.WaitGroup
   346  
   347  	// Transfer initial chunk
   348  	up.size = int64(len(initialUploadBlock))
   349  	up.managedTransferChunk(ctx, &wg, errs, 1, initialUploadBlock)
   350  
   351  outer:
   352  	for part := int64(2); hasMoreParts; part++ {
   353  		// Check any errors
   354  		select {
   355  		case err = <-errs:
   356  			break outer
   357  		default:
   358  		}
   359  
   360  		// Get a block of memory
   361  		buf := up.f.getUploadBlock()
   362  
   363  		// Read the chunk
   364  		var n int
   365  		n, err = io.ReadFull(up.in, buf)
   366  		if err == io.ErrUnexpectedEOF {
   367  			fs.Debugf(up.o, "Read less than a full chunk, making this the last one.")
   368  			buf = buf[:n]
   369  			hasMoreParts = false
   370  			err = nil
   371  		} else if err == io.EOF {
   372  			fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.")
   373  			up.f.putUploadBlock(buf)
   374  			err = nil
   375  			break outer
   376  		} else if err != nil {
   377  			// other kinds of errors indicate failure
   378  			up.f.putUploadBlock(buf)
   379  			break outer
   380  		}
   381  
   382  		// Keep stats up to date
   383  		up.parts = part
   384  		up.size += int64(n)
   385  		if part > maxParts {
   386  			err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts)
   387  			break outer
   388  		}
   389  
   390  		// Transfer the chunk
   391  		up.managedTransferChunk(ctx, &wg, errs, part, buf)
   392  	}
   393  	wg.Wait()
   394  	up.sha1s = up.sha1s[:up.parts]
   395  
   396  	return up.finishOrCancelOnError(ctx, err, errs)
   397  }
   398  
   399  // Upload uploads the chunks from the input
   400  func (up *largeUpload) Upload(ctx context.Context) error {
   401  	fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id)
   402  	remaining := up.size
   403  	errs := make(chan error, 1)
   404  	var wg sync.WaitGroup
   405  	var err error
   406  outer:
   407  	for part := int64(1); part <= up.parts; part++ {
   408  		// Check any errors
   409  		select {
   410  		case err = <-errs:
   411  			break outer
   412  		default:
   413  		}
   414  
   415  		reqSize := remaining
   416  		if reqSize >= int64(up.f.opt.ChunkSize) {
   417  			reqSize = int64(up.f.opt.ChunkSize)
   418  		}
   419  
   420  		// Get a block of memory
   421  		buf := up.f.getUploadBlock()[:reqSize]
   422  
   423  		// Read the chunk
   424  		_, err = io.ReadFull(up.in, buf)
   425  		if err != nil {
   426  			up.f.putUploadBlock(buf)
   427  			break outer
   428  		}
   429  
   430  		// Transfer the chunk
   431  		up.managedTransferChunk(ctx, &wg, errs, part, buf)
   432  		remaining -= reqSize
   433  	}
   434  	wg.Wait()
   435  
   436  	return up.finishOrCancelOnError(ctx, err, errs)
   437  }