github.com/artpar/rclone@v1.67.3/backend/b2/upload.go (about)

     1  // Upload large files for b2
     2  //
     3  // Docs - https://www.backblaze.com/b2/docs/large_files.html
     4  
     5  package b2
     6  
     7  import (
     8  	"context"
     9  	"crypto/sha1"
    10  	"encoding/hex"
    11  	"fmt"
    12  	gohash "hash"
    13  	"io"
    14  	"strings"
    15  	"sync"
    16  
    17  	"github.com/artpar/rclone/backend/b2/api"
    18  	"github.com/artpar/rclone/fs"
    19  	"github.com/artpar/rclone/fs/accounting"
    20  	"github.com/artpar/rclone/fs/chunksize"
    21  	"github.com/artpar/rclone/fs/hash"
    22  	"github.com/artpar/rclone/lib/atexit"
    23  	"github.com/artpar/rclone/lib/pool"
    24  	"github.com/artpar/rclone/lib/rest"
    25  	"golang.org/x/sync/errgroup"
    26  )
    27  
    28  type hashAppendingReader struct {
    29  	h         gohash.Hash
    30  	in        io.Reader
    31  	hexSum    string
    32  	hexReader io.Reader
    33  }
    34  
    35  // Read returns bytes all bytes from the original reader, then the hex sum
    36  // of what was read so far, then EOF.
    37  func (har *hashAppendingReader) Read(b []byte) (int, error) {
    38  	if har.hexReader == nil {
    39  		n, err := har.in.Read(b)
    40  		if err == io.EOF {
    41  			har.in = nil // allow GC
    42  			err = nil    // allow reading hexSum before EOF
    43  
    44  			har.hexSum = hex.EncodeToString(har.h.Sum(nil))
    45  			har.hexReader = strings.NewReader(har.hexSum)
    46  		}
    47  		return n, err
    48  	}
    49  	return har.hexReader.Read(b)
    50  }
    51  
    52  // AdditionalLength returns how many bytes the appended hex sum will take up.
    53  func (har *hashAppendingReader) AdditionalLength() int {
    54  	return hex.EncodedLen(har.h.Size())
    55  }
    56  
    57  // HexSum returns the hash sum as hex. It's only available after the original
    58  // reader has EOF'd. It's an empty string before that.
    59  func (har *hashAppendingReader) HexSum() string {
    60  	return har.hexSum
    61  }
    62  
    63  // newHashAppendingReader takes a Reader and a Hash and will append the hex sum
    64  // after the original reader reaches EOF. The increased size depends on the
    65  // given hash, which may be queried through AdditionalLength()
    66  func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader {
    67  	withHash := io.TeeReader(in, h)
    68  	return &hashAppendingReader{h: h, in: withHash}
    69  }
    70  
    71  // largeUpload is used to control the upload of large files which need chunking
    72  type largeUpload struct {
    73  	f         *Fs                             // parent Fs
    74  	o         *Object                         // object being uploaded
    75  	doCopy    bool                            // doing copy rather than upload
    76  	what      string                          // text name of operation for logs
    77  	in        io.Reader                       // read the data from here
    78  	wrap      accounting.WrapFn               // account parts being transferred
    79  	id        string                          // ID of the file being uploaded
    80  	size      int64                           // total size
    81  	parts     int                             // calculated number of parts, if known
    82  	sha1smu   sync.Mutex                      // mutex to protect sha1s
    83  	sha1s     []string                        // slice of SHA1s for each part
    84  	uploadMu  sync.Mutex                      // lock for upload variable
    85  	uploads   []*api.GetUploadPartURLResponse // result of get upload URL calls
    86  	chunkSize int64                           // chunk size to use
    87  	src       *Object                         // if copying, object we are reading from
    88  	info      *api.FileInfo                   // final response with info about the object
    89  }
    90  
    91  // newLargeUpload starts an upload of object o from in with metadata in src
    92  //
    93  // If newInfo is set then metadata from that will be used instead of reading it from src
    94  func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo, defaultChunkSize fs.SizeSuffix, doCopy bool, newInfo *api.File) (up *largeUpload, err error) {
    95  	size := src.Size()
    96  	parts := 0
    97  	chunkSize := defaultChunkSize
    98  	if size == -1 {
    99  		fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize)
   100  	} else {
   101  		chunkSize = chunksize.Calculator(o, size, maxParts, defaultChunkSize)
   102  		parts = int(size / int64(chunkSize))
   103  		if size%int64(chunkSize) != 0 {
   104  			parts++
   105  		}
   106  	}
   107  
   108  	opts := rest.Opts{
   109  		Method: "POST",
   110  		Path:   "/b2_start_large_file",
   111  	}
   112  	bucket, bucketPath := o.split()
   113  	bucketID, err := f.getBucketID(ctx, bucket)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  	var request = api.StartLargeFileRequest{
   118  		BucketID: bucketID,
   119  		Name:     f.opt.Enc.FromStandardPath(bucketPath),
   120  	}
   121  	if newInfo == nil {
   122  		modTime := src.ModTime(ctx)
   123  		request.ContentType = fs.MimeType(ctx, src)
   124  		request.Info = map[string]string{
   125  			timeKey: timeString(modTime),
   126  		}
   127  		// Set the SHA1 if known
   128  		if !o.fs.opt.DisableCheckSum || doCopy {
   129  			if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" {
   130  				request.Info[sha1Key] = calculatedSha1
   131  			}
   132  		}
   133  	} else {
   134  		request.ContentType = newInfo.ContentType
   135  		request.Info = newInfo.Info
   136  	}
   137  	var response api.StartLargeFileResponse
   138  	err = f.pacer.Call(func() (bool, error) {
   139  		resp, err := f.srv.CallJSON(ctx, &opts, &request, &response)
   140  		return f.shouldRetry(ctx, resp, err)
   141  	})
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	up = &largeUpload{
   146  		f:         f,
   147  		o:         o,
   148  		doCopy:    doCopy,
   149  		what:      "upload",
   150  		id:        response.ID,
   151  		size:      size,
   152  		parts:     parts,
   153  		sha1s:     make([]string, 0, 16),
   154  		chunkSize: int64(chunkSize),
   155  	}
   156  	// unwrap the accounting from the input, we use wrap to put it
   157  	// back on after the buffering
   158  	if doCopy {
   159  		up.what = "copy"
   160  		up.src = src.(*Object)
   161  	} else {
   162  		up.in, up.wrap = accounting.UnWrap(in)
   163  	}
   164  	return up, nil
   165  }
   166  
   167  // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken
   168  //
   169  // This should be returned with returnUploadURL when finished
   170  func (up *largeUpload) getUploadURL(ctx context.Context) (upload *api.GetUploadPartURLResponse, err error) {
   171  	up.uploadMu.Lock()
   172  	if len(up.uploads) > 0 {
   173  		upload, up.uploads = up.uploads[0], up.uploads[1:]
   174  		up.uploadMu.Unlock()
   175  		return upload, nil
   176  	}
   177  	up.uploadMu.Unlock()
   178  
   179  	opts := rest.Opts{
   180  		Method: "POST",
   181  		Path:   "/b2_get_upload_part_url",
   182  	}
   183  	var request = api.GetUploadPartURLRequest{
   184  		ID: up.id,
   185  	}
   186  	err = up.f.pacer.Call(func() (bool, error) {
   187  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &upload)
   188  		return up.f.shouldRetry(ctx, resp, err)
   189  	})
   190  	if err != nil {
   191  		return nil, fmt.Errorf("failed to get upload URL: %w", err)
   192  	}
   193  	return upload, nil
   194  }
   195  
   196  // returnUploadURL returns the UploadURL to the cache
   197  func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) {
   198  	if upload == nil {
   199  		return
   200  	}
   201  	up.uploadMu.Lock()
   202  	up.uploads = append(up.uploads, upload)
   203  	up.uploadMu.Unlock()
   204  }
   205  
   206  // Add an sha1 to the being built up sha1s
   207  func (up *largeUpload) addSha1(chunkNumber int, sha1 string) {
   208  	up.sha1smu.Lock()
   209  	defer up.sha1smu.Unlock()
   210  	if len(up.sha1s) < chunkNumber+1 {
   211  		up.sha1s = append(up.sha1s, make([]string, chunkNumber+1-len(up.sha1s))...)
   212  	}
   213  	up.sha1s[chunkNumber] = sha1
   214  }
   215  
   216  // WriteChunk will write chunk number with reader bytes, where chunk number >= 0
   217  func (up *largeUpload) WriteChunk(ctx context.Context, chunkNumber int, reader io.ReadSeeker) (size int64, err error) {
   218  	// Only account after the checksum reads have been done
   219  	if do, ok := reader.(pool.DelayAccountinger); ok {
   220  		// To figure out this number, do a transfer and if the accounted size is 0 or a
   221  		// multiple of what it should be, increase or decrease this number.
   222  		do.DelayAccounting(1)
   223  	}
   224  
   225  	err = up.f.pacer.Call(func() (bool, error) {
   226  		// Discover the size by seeking to the end
   227  		size, err = reader.Seek(0, io.SeekEnd)
   228  		if err != nil {
   229  			return false, err
   230  		}
   231  
   232  		// rewind the reader on retry and after reading size
   233  		_, err = reader.Seek(0, io.SeekStart)
   234  		if err != nil {
   235  			return false, err
   236  		}
   237  
   238  		fs.Debugf(up.o, "Sending chunk %d length %d", chunkNumber, size)
   239  
   240  		// Get upload URL
   241  		upload, err := up.getUploadURL(ctx)
   242  		if err != nil {
   243  			return false, err
   244  		}
   245  
   246  		in := newHashAppendingReader(reader, sha1.New())
   247  		sizeWithHash := size + int64(in.AdditionalLength())
   248  
   249  		// Authorization
   250  		//
   251  		// An upload authorization token, from b2_get_upload_part_url.
   252  		//
   253  		// X-Bz-Part-Number
   254  		//
   255  		// A number from 1 to 10000. The parts uploaded for one file
   256  		// must have contiguous numbers, starting with 1.
   257  		//
   258  		// Content-Length
   259  		//
   260  		// The number of bytes in the file being uploaded. Note that
   261  		// this header is required; you cannot leave it out and just
   262  		// use chunked encoding. The minimum size of every part but
   263  		// the last one is 100 MB (100,000,000 bytes)
   264  		//
   265  		// X-Bz-Content-Sha1
   266  		//
   267  		// The SHA1 checksum of the this part of the file. B2 will
   268  		// check this when the part is uploaded, to make sure that the
   269  		// data arrived correctly. The same SHA1 checksum must be
   270  		// passed to b2_finish_large_file.
   271  		opts := rest.Opts{
   272  			Method:  "POST",
   273  			RootURL: upload.UploadURL,
   274  			Body:    up.wrap(in),
   275  			ExtraHeaders: map[string]string{
   276  				"Authorization":    upload.AuthorizationToken,
   277  				"X-Bz-Part-Number": fmt.Sprintf("%d", chunkNumber+1),
   278  				sha1Header:         "hex_digits_at_end",
   279  			},
   280  			ContentLength: &sizeWithHash,
   281  		}
   282  
   283  		var response api.UploadPartResponse
   284  
   285  		resp, err := up.f.srv.CallJSON(ctx, &opts, nil, &response)
   286  		retry, err := up.f.shouldRetry(ctx, resp, err)
   287  		if err != nil {
   288  			fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", chunkNumber, retry, err, err)
   289  		}
   290  		// On retryable error clear PartUploadURL
   291  		if retry {
   292  			fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err)
   293  			upload = nil
   294  		}
   295  		up.returnUploadURL(upload)
   296  		up.addSha1(chunkNumber, in.HexSum())
   297  		return retry, err
   298  	})
   299  	if err != nil {
   300  		fs.Debugf(up.o, "Error sending chunk %d: %v", chunkNumber, err)
   301  	} else {
   302  		fs.Debugf(up.o, "Done sending chunk %d", chunkNumber)
   303  	}
   304  	return size, err
   305  }
   306  
   307  // Copy a chunk
   308  func (up *largeUpload) copyChunk(ctx context.Context, part int, partSize int64) error {
   309  	err := up.f.pacer.Call(func() (bool, error) {
   310  		fs.Debugf(up.o, "Copying chunk %d length %d", part, partSize)
   311  		opts := rest.Opts{
   312  			Method: "POST",
   313  			Path:   "/b2_copy_part",
   314  		}
   315  		offset := int64(part) * up.chunkSize // where we are in the source file
   316  		var request = api.CopyPartRequest{
   317  			SourceID:    up.src.id,
   318  			LargeFileID: up.id,
   319  			PartNumber:  int64(part + 1),
   320  			Range:       fmt.Sprintf("bytes=%d-%d", offset, offset+partSize-1),
   321  		}
   322  		var response api.UploadPartResponse
   323  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response)
   324  		retry, err := up.f.shouldRetry(ctx, resp, err)
   325  		if err != nil {
   326  			fs.Debugf(up.o, "Error copying chunk %d (retry=%v): %v: %#v", part, retry, err, err)
   327  		}
   328  		up.addSha1(part, response.SHA1)
   329  		return retry, err
   330  	})
   331  	if err != nil {
   332  		fs.Debugf(up.o, "Error copying chunk %d: %v", part, err)
   333  	} else {
   334  		fs.Debugf(up.o, "Done copying chunk %d", part)
   335  	}
   336  	return err
   337  }
   338  
   339  // Close closes off the large upload
   340  func (up *largeUpload) Close(ctx context.Context) error {
   341  	fs.Debugf(up.o, "Finishing large file %s with %d parts", up.what, up.parts)
   342  	opts := rest.Opts{
   343  		Method: "POST",
   344  		Path:   "/b2_finish_large_file",
   345  	}
   346  	var request = api.FinishLargeFileRequest{
   347  		ID:    up.id,
   348  		SHA1s: up.sha1s,
   349  	}
   350  	var response api.FileInfo
   351  	err := up.f.pacer.Call(func() (bool, error) {
   352  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response)
   353  		return up.f.shouldRetry(ctx, resp, err)
   354  	})
   355  	if err != nil {
   356  		return err
   357  	}
   358  	up.info = &response
   359  	return nil
   360  }
   361  
   362  // Abort aborts the large upload
   363  func (up *largeUpload) Abort(ctx context.Context) error {
   364  	fs.Debugf(up.o, "Cancelling large file %s", up.what)
   365  	opts := rest.Opts{
   366  		Method: "POST",
   367  		Path:   "/b2_cancel_large_file",
   368  	}
   369  	var request = api.CancelLargeFileRequest{
   370  		ID: up.id,
   371  	}
   372  	var response api.CancelLargeFileResponse
   373  	err := up.f.pacer.Call(func() (bool, error) {
   374  		resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response)
   375  		return up.f.shouldRetry(ctx, resp, err)
   376  	})
   377  	if err != nil {
   378  		fs.Errorf(up.o, "Failed to cancel large file %s: %v", up.what, err)
   379  	}
   380  	return err
   381  }
   382  
   383  // Stream uploads the chunks from the input, starting with a required initial
   384  // chunk. Assumes the file size is unknown and will upload until the input
   385  // reaches EOF.
   386  //
   387  // Note that initialUploadBlock must be returned to f.putBuf()
   388  func (up *largeUpload) Stream(ctx context.Context, initialUploadBlock *pool.RW) (err error) {
   389  	defer atexit.OnError(&err, func() { _ = up.Abort(ctx) })()
   390  	fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id)
   391  	var (
   392  		g, gCtx      = errgroup.WithContext(ctx)
   393  		hasMoreParts = true
   394  	)
   395  	up.size = initialUploadBlock.Size()
   396  	up.parts = 0
   397  	for part := 0; hasMoreParts; part++ {
   398  		// Get a block of memory from the pool and token which limits concurrency.
   399  		var rw *pool.RW
   400  		if part == 0 {
   401  			rw = initialUploadBlock
   402  		} else {
   403  			rw = up.f.getRW(false)
   404  		}
   405  
   406  		// Fail fast, in case an errgroup managed function returns an error
   407  		// gCtx is cancelled. There is no point in uploading all the other parts.
   408  		if gCtx.Err() != nil {
   409  			up.f.putRW(rw)
   410  			break
   411  		}
   412  
   413  		// Read the chunk
   414  		var n int64
   415  		if part == 0 {
   416  			n = rw.Size()
   417  		} else {
   418  			n, err = io.CopyN(rw, up.in, up.chunkSize)
   419  			if err == io.EOF {
   420  				if n == 0 {
   421  					fs.Debugf(up.o, "Not sending empty chunk after EOF - ending.")
   422  					up.f.putRW(rw)
   423  					break
   424  				} else {
   425  					fs.Debugf(up.o, "Read less than a full chunk %d, making this the last one.", n)
   426  				}
   427  				hasMoreParts = false
   428  			} else if err != nil {
   429  				// other kinds of errors indicate failure
   430  				up.f.putRW(rw)
   431  				return err
   432  			}
   433  		}
   434  
   435  		// Keep stats up to date
   436  		up.parts += 1
   437  		up.size += n
   438  		if part > maxParts {
   439  			up.f.putRW(rw)
   440  			return fmt.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts)
   441  		}
   442  
   443  		part := part // for the closure
   444  		g.Go(func() (err error) {
   445  			defer up.f.putRW(rw)
   446  			_, err = up.WriteChunk(gCtx, part, rw)
   447  			return err
   448  		})
   449  	}
   450  	err = g.Wait()
   451  	if err != nil {
   452  		return err
   453  	}
   454  	return up.Close(ctx)
   455  }
   456  
   457  // Copy the chunks from the source to the destination
   458  func (up *largeUpload) Copy(ctx context.Context) (err error) {
   459  	defer atexit.OnError(&err, func() { _ = up.Abort(ctx) })()
   460  	fs.Debugf(up.o, "Starting %s of large file in %d chunks (id %q)", up.what, up.parts, up.id)
   461  	var (
   462  		g, gCtx   = errgroup.WithContext(ctx)
   463  		remaining = up.size
   464  	)
   465  	g.SetLimit(up.f.opt.UploadConcurrency)
   466  	for part := 0; part < up.parts; part++ {
   467  		// Fail fast, in case an errgroup managed function returns an error
   468  		// gCtx is cancelled. There is no point in copying all the other parts.
   469  		if gCtx.Err() != nil {
   470  			break
   471  		}
   472  
   473  		reqSize := remaining
   474  		if reqSize >= up.chunkSize {
   475  			reqSize = up.chunkSize
   476  		}
   477  
   478  		part := part // for the closure
   479  		g.Go(func() (err error) {
   480  			return up.copyChunk(gCtx, part, reqSize)
   481  		})
   482  		remaining -= reqSize
   483  	}
   484  	err = g.Wait()
   485  	if err != nil {
   486  		return err
   487  	}
   488  	return up.Close(ctx)
   489  }