github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/operations/multithread.go (about)

     1  package operations
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  
    10  	"github.com/rclone/rclone/fs"
    11  	"github.com/rclone/rclone/fs/accounting"
    12  	"github.com/rclone/rclone/lib/atexit"
    13  	"github.com/rclone/rclone/lib/multipart"
    14  	"golang.org/x/sync/errgroup"
    15  )
    16  
    17  const (
    18  	multithreadChunkSize = 64 << 10
    19  )
    20  
    21  // Return a boolean as to whether we should use multi thread copy for
    22  // this transfer
    23  func doMultiThreadCopy(ctx context.Context, f fs.Fs, src fs.Object) bool {
    24  	ci := fs.GetConfig(ctx)
    25  
    26  	// Disable multi thread if...
    27  
    28  	// ...it isn't configured
    29  	if ci.MultiThreadStreams <= 1 {
    30  		return false
    31  	}
    32  	// ...if the source doesn't support it
    33  	if src.Fs().Features().NoMultiThreading {
    34  		return false
    35  	}
    36  	// ...size of object is less than cutoff
    37  	if src.Size() < int64(ci.MultiThreadCutoff) {
    38  		return false
    39  	}
    40  	// ...destination doesn't support it
    41  	dstFeatures := f.Features()
    42  	if dstFeatures.OpenChunkWriter == nil && dstFeatures.OpenWriterAt == nil {
    43  		return false
    44  	}
    45  	// ...if --multi-thread-streams not in use and source and
    46  	// destination are both local
    47  	if !ci.MultiThreadSet && dstFeatures.IsLocal && src.Fs().Features().IsLocal {
    48  		return false
    49  	}
    50  	return true
    51  }
    52  
    53  // state for a multi-thread copy
    54  type multiThreadCopyState struct {
    55  	ctx         context.Context
    56  	partSize    int64
    57  	size        int64
    58  	src         fs.Object
    59  	acc         *accounting.Account
    60  	numChunks   int
    61  	noBuffering bool // set to read the input without buffering
    62  }
    63  
    64  // Copy a single chunk into place
    65  func (mc *multiThreadCopyState) copyChunk(ctx context.Context, chunk int, writer fs.ChunkWriter) (err error) {
    66  	defer func() {
    67  		if err != nil {
    68  			fs.Debugf(mc.src, "multi-thread copy: chunk %d/%d failed: %v", chunk+1, mc.numChunks, err)
    69  		}
    70  	}()
    71  	start := int64(chunk) * mc.partSize
    72  	if start >= mc.size {
    73  		return nil
    74  	}
    75  	end := start + mc.partSize
    76  	if end > mc.size {
    77  		end = mc.size
    78  	}
    79  	size := end - start
    80  
    81  	fs.Debugf(mc.src, "multi-thread copy: chunk %d/%d (%d-%d) size %v starting", chunk+1, mc.numChunks, start, end, fs.SizeSuffix(size))
    82  
    83  	rc, err := Open(ctx, mc.src, &fs.RangeOption{Start: start, End: end - 1})
    84  	if err != nil {
    85  		return fmt.Errorf("multi-thread copy: failed to open source: %w", err)
    86  	}
    87  	defer fs.CheckClose(rc, &err)
    88  
    89  	var rs io.ReadSeeker
    90  	if mc.noBuffering {
    91  		// Read directly if we are sure we aren't going to seek
    92  		// and account with accounting
    93  		rc.SetAccounting(mc.acc.AccountRead)
    94  		rs = rc
    95  	} else {
    96  		// Read the chunk into buffered reader
    97  		rw := multipart.NewRW()
    98  		defer fs.CheckClose(rw, &err)
    99  		_, err = io.CopyN(rw, rc, size)
   100  		if err != nil {
   101  			return fmt.Errorf("multi-thread copy: failed to read chunk: %w", err)
   102  		}
   103  		// Account as we go
   104  		rw.SetAccounting(mc.acc.AccountRead)
   105  		rs = rw
   106  	}
   107  
   108  	// Write the chunk
   109  	bytesWritten, err := writer.WriteChunk(ctx, chunk, rs)
   110  	if err != nil {
   111  		return fmt.Errorf("multi-thread copy: failed to write chunk: %w", err)
   112  	}
   113  
   114  	fs.Debugf(mc.src, "multi-thread copy: chunk %d/%d (%d-%d) size %v finished", chunk+1, mc.numChunks, start, end, fs.SizeSuffix(bytesWritten))
   115  	return nil
   116  }
   117  
   118  // Given a file size and a chunkSize
   119  // it returns the number of chunks, so that chunkSize * numChunks >= size
   120  func calculateNumChunks(size int64, chunkSize int64) int {
   121  	numChunks := size / chunkSize
   122  	if size%chunkSize != 0 {
   123  		numChunks++
   124  	}
   125  	return int(numChunks)
   126  }
   127  
   128  // Copy src to (f, remote) using streams download threads. It tries to use the OpenChunkWriter feature
   129  // and if that's not available it creates an adapter using OpenWriterAt
   130  func multiThreadCopy(ctx context.Context, f fs.Fs, remote string, src fs.Object, concurrency int, tr *accounting.Transfer, options ...fs.OpenOption) (newDst fs.Object, err error) {
   131  	openChunkWriter := f.Features().OpenChunkWriter
   132  	ci := fs.GetConfig(ctx)
   133  	noBuffering := false
   134  	usingOpenWriterAt := false
   135  	if openChunkWriter == nil {
   136  		openWriterAt := f.Features().OpenWriterAt
   137  		if openWriterAt == nil {
   138  			return nil, errors.New("multi-thread copy: neither OpenChunkWriter nor OpenWriterAt supported")
   139  		}
   140  		openChunkWriter = openChunkWriterFromOpenWriterAt(openWriterAt, int64(ci.MultiThreadChunkSize), int64(ci.MultiThreadWriteBufferSize), f)
   141  		// If we are using OpenWriterAt we don't seek the chunks so don't need to buffer
   142  		fs.Debugf(src, "multi-thread copy: disabling buffering because destination uses OpenWriterAt")
   143  		noBuffering = true
   144  		usingOpenWriterAt = true
   145  	} else if src.Fs().Features().IsLocal {
   146  		// If the source fs is local we don't need to buffer
   147  		fs.Debugf(src, "multi-thread copy: disabling buffering because source is local disk")
   148  		noBuffering = true
   149  	} else if f.Features().ChunkWriterDoesntSeek {
   150  		// If the destination Fs promises not to seek its chunks
   151  		// (except for retries) then we don't need buffering.
   152  		fs.Debugf(src, "multi-thread copy: disabling buffering because destination has set ChunkWriterDoesntSeek")
   153  		noBuffering = true
   154  	}
   155  
   156  	if src.Size() < 0 {
   157  		return nil, fmt.Errorf("multi-thread copy: can't copy unknown sized file")
   158  	}
   159  	if src.Size() == 0 {
   160  		return nil, fmt.Errorf("multi-thread copy: can't copy zero sized file")
   161  	}
   162  
   163  	info, chunkWriter, err := openChunkWriter(ctx, remote, src, options...)
   164  	if err != nil {
   165  		return nil, fmt.Errorf("multi-thread copy: failed to open chunk writer: %w", err)
   166  	}
   167  
   168  	uploadCtx, cancel := context.WithCancel(ctx)
   169  	defer cancel()
   170  	uploadedOK := false
   171  	defer atexit.OnError(&err, func() {
   172  		cancel()
   173  		if info.LeavePartsOnError || uploadedOK {
   174  			return
   175  		}
   176  		fs.Debugf(src, "multi-thread copy: cancelling transfer on exit")
   177  		abortErr := chunkWriter.Abort(ctx)
   178  		if abortErr != nil {
   179  			fs.Debugf(src, "multi-thread copy: abort failed: %v", abortErr)
   180  		}
   181  	})()
   182  
   183  	if info.ChunkSize > src.Size() {
   184  		fs.Debugf(src, "multi-thread copy: chunk size %v was bigger than source file size %v", fs.SizeSuffix(info.ChunkSize), fs.SizeSuffix(src.Size()))
   185  		info.ChunkSize = src.Size()
   186  	}
   187  
   188  	// Use the backend concurrency if it is higher than --multi-thread-streams or if --multi-thread-streams wasn't set explicitly
   189  	if !ci.MultiThreadSet || info.Concurrency > concurrency {
   190  		fs.Debugf(src, "multi-thread copy: using backend concurrency of %d instead of --multi-thread-streams %d", info.Concurrency, concurrency)
   191  		concurrency = info.Concurrency
   192  	}
   193  
   194  	numChunks := calculateNumChunks(src.Size(), info.ChunkSize)
   195  	if concurrency > numChunks {
   196  		fs.Debugf(src, "multi-thread copy: number of streams %d was bigger than number of chunks %d", concurrency, numChunks)
   197  		concurrency = numChunks
   198  	}
   199  
   200  	if concurrency < 1 {
   201  		concurrency = 1
   202  	}
   203  
   204  	g, gCtx := errgroup.WithContext(uploadCtx)
   205  	g.SetLimit(concurrency)
   206  
   207  	mc := &multiThreadCopyState{
   208  		ctx:         gCtx,
   209  		size:        src.Size(),
   210  		src:         src,
   211  		partSize:    info.ChunkSize,
   212  		numChunks:   numChunks,
   213  		noBuffering: noBuffering,
   214  	}
   215  
   216  	// Make accounting
   217  	mc.acc = tr.Account(gCtx, nil)
   218  
   219  	fs.Debugf(src, "Starting multi-thread copy with %d chunks of size %v with %v parallel streams", mc.numChunks, fs.SizeSuffix(mc.partSize), concurrency)
   220  	for chunk := 0; chunk < mc.numChunks; chunk++ {
   221  		// Fail fast, in case an errgroup managed function returns an error
   222  		if gCtx.Err() != nil {
   223  			break
   224  		}
   225  		chunk := chunk
   226  		g.Go(func() error {
   227  			return mc.copyChunk(gCtx, chunk, chunkWriter)
   228  		})
   229  	}
   230  
   231  	err = g.Wait()
   232  	if err != nil {
   233  		return nil, err
   234  	}
   235  	err = chunkWriter.Close(ctx)
   236  	if err != nil {
   237  		return nil, fmt.Errorf("multi-thread copy: failed to close object after copy: %w", err)
   238  	}
   239  	uploadedOK = true // file is definitely uploaded OK so no need to abort
   240  
   241  	obj, err := f.NewObject(ctx, remote)
   242  	if err != nil {
   243  		return nil, fmt.Errorf("multi-thread copy: failed to find object after copy: %w", err)
   244  	}
   245  
   246  	// OpenWriterAt doesn't set metadata so we need to set it on completion
   247  	if usingOpenWriterAt {
   248  		setModTime := true
   249  		if ci.Metadata {
   250  			do, ok := obj.(fs.SetMetadataer)
   251  			if ok {
   252  				meta, err := fs.GetMetadataOptions(ctx, f, src, options)
   253  				if err != nil {
   254  					return nil, fmt.Errorf("multi-thread copy: failed to read metadata from source object: %w", err)
   255  				}
   256  				err = do.SetMetadata(ctx, meta)
   257  				if err != nil {
   258  					return nil, fmt.Errorf("multi-thread copy: failed to set metadata: %w", err)
   259  				}
   260  				setModTime = false
   261  			} else {
   262  				fs.Errorf(obj, "multi-thread copy: can't set metadata as SetMetadata isn't implemented in: %v", f)
   263  			}
   264  		}
   265  		if setModTime {
   266  			err = obj.SetModTime(ctx, src.ModTime(ctx))
   267  			switch err {
   268  			case nil, fs.ErrorCantSetModTime, fs.ErrorCantSetModTimeWithoutDelete:
   269  			default:
   270  				return nil, fmt.Errorf("multi-thread copy: failed to set modification time: %w", err)
   271  			}
   272  		}
   273  	}
   274  
   275  	fs.Debugf(src, "Finished multi-thread copy with %d parts of size %v", mc.numChunks, fs.SizeSuffix(mc.partSize))
   276  	return obj, nil
   277  }
   278  
   279  // writerAtChunkWriter converts a WriterAtCloser into a ChunkWriter
   280  type writerAtChunkWriter struct {
   281  	remote          string
   282  	size            int64
   283  	writerAt        fs.WriterAtCloser
   284  	chunkSize       int64
   285  	chunks          int
   286  	writeBufferSize int64
   287  	f               fs.Fs
   288  	closed          bool
   289  }
   290  
   291  // WriteChunk writes chunkNumber from reader
   292  func (w *writerAtChunkWriter) WriteChunk(ctx context.Context, chunkNumber int, reader io.ReadSeeker) (int64, error) {
   293  	fs.Debugf(w.remote, "writing chunk %v", chunkNumber)
   294  
   295  	bytesToWrite := w.chunkSize
   296  	if chunkNumber == (w.chunks-1) && w.size%w.chunkSize != 0 {
   297  		bytesToWrite = w.size % w.chunkSize
   298  	}
   299  
   300  	var writer io.Writer = io.NewOffsetWriter(w.writerAt, int64(chunkNumber)*w.chunkSize)
   301  	if w.writeBufferSize > 0 {
   302  		writer = bufio.NewWriterSize(writer, int(w.writeBufferSize))
   303  	}
   304  	n, err := io.Copy(writer, reader)
   305  	if err != nil {
   306  		return -1, err
   307  	}
   308  	if n != bytesToWrite {
   309  		return -1, fmt.Errorf("expected to write %v bytes for chunk %v, but wrote %v bytes", bytesToWrite, chunkNumber, n)
   310  	}
   311  	// if we were buffering, flush to disk
   312  	switch w := writer.(type) {
   313  	case *bufio.Writer:
   314  		er2 := w.Flush()
   315  		if er2 != nil {
   316  			return -1, fmt.Errorf("multi-thread copy: flush failed: %w", err)
   317  		}
   318  	}
   319  	return n, nil
   320  }
   321  
   322  // Close the chunk writing
   323  func (w *writerAtChunkWriter) Close(ctx context.Context) error {
   324  	if w.closed {
   325  		return nil
   326  	}
   327  	w.closed = true
   328  	return w.writerAt.Close()
   329  }
   330  
   331  // Abort the chunk writing
   332  func (w *writerAtChunkWriter) Abort(ctx context.Context) error {
   333  	err := w.Close(ctx)
   334  	if err != nil {
   335  		fs.Errorf(w.remote, "multi-thread copy: failed to close file before aborting: %v", err)
   336  	}
   337  	obj, err := w.f.NewObject(ctx, w.remote)
   338  	if err != nil {
   339  		return fmt.Errorf("multi-thread copy: failed to find temp file when aborting chunk writer: %w", err)
   340  	}
   341  	return obj.Remove(ctx)
   342  }
   343  
   344  // openChunkWriterFromOpenWriterAt adapts an OpenWriterAtFn into an OpenChunkWriterFn using chunkSize and writeBufferSize
   345  func openChunkWriterFromOpenWriterAt(openWriterAt fs.OpenWriterAtFn, chunkSize int64, writeBufferSize int64, f fs.Fs) fs.OpenChunkWriterFn {
   346  	return func(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (info fs.ChunkWriterInfo, writer fs.ChunkWriter, err error) {
   347  		ci := fs.GetConfig(ctx)
   348  
   349  		writerAt, err := openWriterAt(ctx, remote, src.Size())
   350  		if err != nil {
   351  			return info, nil, err
   352  		}
   353  
   354  		if writeBufferSize > 0 {
   355  			fs.Debugf(src.Remote(), "multi-thread copy: write buffer set to %v", writeBufferSize)
   356  		}
   357  
   358  		chunkWriter := &writerAtChunkWriter{
   359  			remote:          remote,
   360  			size:            src.Size(),
   361  			chunkSize:       chunkSize,
   362  			chunks:          calculateNumChunks(src.Size(), chunkSize),
   363  			writerAt:        writerAt,
   364  			writeBufferSize: writeBufferSize,
   365  			f:               f,
   366  		}
   367  		info = fs.ChunkWriterInfo{
   368  			ChunkSize:   chunkSize,
   369  			Concurrency: ci.MultiThreadStreams,
   370  		}
   371  		return info, chunkWriter, nil
   372  	}
   373  }