github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/operations/multithread.go (about) 1 package operations 2 3 import ( 4 "bufio" 5 "context" 6 "errors" 7 "fmt" 8 "io" 9 10 "github.com/rclone/rclone/fs" 11 "github.com/rclone/rclone/fs/accounting" 12 "github.com/rclone/rclone/lib/atexit" 13 "github.com/rclone/rclone/lib/multipart" 14 "golang.org/x/sync/errgroup" 15 ) 16 17 const ( 18 multithreadChunkSize = 64 << 10 19 ) 20 21 // Return a boolean as to whether we should use multi thread copy for 22 // this transfer 23 func doMultiThreadCopy(ctx context.Context, f fs.Fs, src fs.Object) bool { 24 ci := fs.GetConfig(ctx) 25 26 // Disable multi thread if... 27 28 // ...it isn't configured 29 if ci.MultiThreadStreams <= 1 { 30 return false 31 } 32 // ...if the source doesn't support it 33 if src.Fs().Features().NoMultiThreading { 34 return false 35 } 36 // ...size of object is less than cutoff 37 if src.Size() < int64(ci.MultiThreadCutoff) { 38 return false 39 } 40 // ...destination doesn't support it 41 dstFeatures := f.Features() 42 if dstFeatures.OpenChunkWriter == nil && dstFeatures.OpenWriterAt == nil { 43 return false 44 } 45 // ...if --multi-thread-streams not in use and source and 46 // destination are both local 47 if !ci.MultiThreadSet && dstFeatures.IsLocal && src.Fs().Features().IsLocal { 48 return false 49 } 50 return true 51 } 52 53 // state for a multi-thread copy 54 type multiThreadCopyState struct { 55 ctx context.Context 56 partSize int64 57 size int64 58 src fs.Object 59 acc *accounting.Account 60 numChunks int 61 noBuffering bool // set to read the input without buffering 62 } 63 64 // Copy a single chunk into place 65 func (mc *multiThreadCopyState) copyChunk(ctx context.Context, chunk int, writer fs.ChunkWriter) (err error) { 66 defer func() { 67 if err != nil { 68 fs.Debugf(mc.src, "multi-thread copy: chunk %d/%d failed: %v", chunk+1, mc.numChunks, err) 69 } 70 }() 71 start := int64(chunk) * mc.partSize 72 if start >= mc.size { 73 return nil 74 } 75 end := start + mc.partSize 76 if end > mc.size { 77 end = mc.size 78 } 79 size := end - start 80 81 fs.Debugf(mc.src, "multi-thread copy: chunk %d/%d (%d-%d) size %v starting", chunk+1, mc.numChunks, start, end, fs.SizeSuffix(size)) 82 83 rc, err := Open(ctx, mc.src, &fs.RangeOption{Start: start, End: end - 1}) 84 if err != nil { 85 return fmt.Errorf("multi-thread copy: failed to open source: %w", err) 86 } 87 defer fs.CheckClose(rc, &err) 88 89 var rs io.ReadSeeker 90 if mc.noBuffering { 91 // Read directly if we are sure we aren't going to seek 92 // and account with accounting 93 rc.SetAccounting(mc.acc.AccountRead) 94 rs = rc 95 } else { 96 // Read the chunk into buffered reader 97 rw := multipart.NewRW() 98 defer fs.CheckClose(rw, &err) 99 _, err = io.CopyN(rw, rc, size) 100 if err != nil { 101 return fmt.Errorf("multi-thread copy: failed to read chunk: %w", err) 102 } 103 // Account as we go 104 rw.SetAccounting(mc.acc.AccountRead) 105 rs = rw 106 } 107 108 // Write the chunk 109 bytesWritten, err := writer.WriteChunk(ctx, chunk, rs) 110 if err != nil { 111 return fmt.Errorf("multi-thread copy: failed to write chunk: %w", err) 112 } 113 114 fs.Debugf(mc.src, "multi-thread copy: chunk %d/%d (%d-%d) size %v finished", chunk+1, mc.numChunks, start, end, fs.SizeSuffix(bytesWritten)) 115 return nil 116 } 117 118 // Given a file size and a chunkSize 119 // it returns the number of chunks, so that chunkSize * numChunks >= size 120 func calculateNumChunks(size int64, chunkSize int64) int { 121 numChunks := size / chunkSize 122 if size%chunkSize != 0 { 123 numChunks++ 124 } 125 return int(numChunks) 126 } 127 128 // Copy src to (f, remote) using streams download threads. It tries to use the OpenChunkWriter feature 129 // and if that's not available it creates an adapter using OpenWriterAt 130 func multiThreadCopy(ctx context.Context, f fs.Fs, remote string, src fs.Object, concurrency int, tr *accounting.Transfer, options ...fs.OpenOption) (newDst fs.Object, err error) { 131 openChunkWriter := f.Features().OpenChunkWriter 132 ci := fs.GetConfig(ctx) 133 noBuffering := false 134 usingOpenWriterAt := false 135 if openChunkWriter == nil { 136 openWriterAt := f.Features().OpenWriterAt 137 if openWriterAt == nil { 138 return nil, errors.New("multi-thread copy: neither OpenChunkWriter nor OpenWriterAt supported") 139 } 140 openChunkWriter = openChunkWriterFromOpenWriterAt(openWriterAt, int64(ci.MultiThreadChunkSize), int64(ci.MultiThreadWriteBufferSize), f) 141 // If we are using OpenWriterAt we don't seek the chunks so don't need to buffer 142 fs.Debugf(src, "multi-thread copy: disabling buffering because destination uses OpenWriterAt") 143 noBuffering = true 144 usingOpenWriterAt = true 145 } else if src.Fs().Features().IsLocal { 146 // If the source fs is local we don't need to buffer 147 fs.Debugf(src, "multi-thread copy: disabling buffering because source is local disk") 148 noBuffering = true 149 } else if f.Features().ChunkWriterDoesntSeek { 150 // If the destination Fs promises not to seek its chunks 151 // (except for retries) then we don't need buffering. 152 fs.Debugf(src, "multi-thread copy: disabling buffering because destination has set ChunkWriterDoesntSeek") 153 noBuffering = true 154 } 155 156 if src.Size() < 0 { 157 return nil, fmt.Errorf("multi-thread copy: can't copy unknown sized file") 158 } 159 if src.Size() == 0 { 160 return nil, fmt.Errorf("multi-thread copy: can't copy zero sized file") 161 } 162 163 info, chunkWriter, err := openChunkWriter(ctx, remote, src, options...) 164 if err != nil { 165 return nil, fmt.Errorf("multi-thread copy: failed to open chunk writer: %w", err) 166 } 167 168 uploadCtx, cancel := context.WithCancel(ctx) 169 defer cancel() 170 uploadedOK := false 171 defer atexit.OnError(&err, func() { 172 cancel() 173 if info.LeavePartsOnError || uploadedOK { 174 return 175 } 176 fs.Debugf(src, "multi-thread copy: cancelling transfer on exit") 177 abortErr := chunkWriter.Abort(ctx) 178 if abortErr != nil { 179 fs.Debugf(src, "multi-thread copy: abort failed: %v", abortErr) 180 } 181 })() 182 183 if info.ChunkSize > src.Size() { 184 fs.Debugf(src, "multi-thread copy: chunk size %v was bigger than source file size %v", fs.SizeSuffix(info.ChunkSize), fs.SizeSuffix(src.Size())) 185 info.ChunkSize = src.Size() 186 } 187 188 // Use the backend concurrency if it is higher than --multi-thread-streams or if --multi-thread-streams wasn't set explicitly 189 if !ci.MultiThreadSet || info.Concurrency > concurrency { 190 fs.Debugf(src, "multi-thread copy: using backend concurrency of %d instead of --multi-thread-streams %d", info.Concurrency, concurrency) 191 concurrency = info.Concurrency 192 } 193 194 numChunks := calculateNumChunks(src.Size(), info.ChunkSize) 195 if concurrency > numChunks { 196 fs.Debugf(src, "multi-thread copy: number of streams %d was bigger than number of chunks %d", concurrency, numChunks) 197 concurrency = numChunks 198 } 199 200 if concurrency < 1 { 201 concurrency = 1 202 } 203 204 g, gCtx := errgroup.WithContext(uploadCtx) 205 g.SetLimit(concurrency) 206 207 mc := &multiThreadCopyState{ 208 ctx: gCtx, 209 size: src.Size(), 210 src: src, 211 partSize: info.ChunkSize, 212 numChunks: numChunks, 213 noBuffering: noBuffering, 214 } 215 216 // Make accounting 217 mc.acc = tr.Account(gCtx, nil) 218 219 fs.Debugf(src, "Starting multi-thread copy with %d chunks of size %v with %v parallel streams", mc.numChunks, fs.SizeSuffix(mc.partSize), concurrency) 220 for chunk := 0; chunk < mc.numChunks; chunk++ { 221 // Fail fast, in case an errgroup managed function returns an error 222 if gCtx.Err() != nil { 223 break 224 } 225 chunk := chunk 226 g.Go(func() error { 227 return mc.copyChunk(gCtx, chunk, chunkWriter) 228 }) 229 } 230 231 err = g.Wait() 232 if err != nil { 233 return nil, err 234 } 235 err = chunkWriter.Close(ctx) 236 if err != nil { 237 return nil, fmt.Errorf("multi-thread copy: failed to close object after copy: %w", err) 238 } 239 uploadedOK = true // file is definitely uploaded OK so no need to abort 240 241 obj, err := f.NewObject(ctx, remote) 242 if err != nil { 243 return nil, fmt.Errorf("multi-thread copy: failed to find object after copy: %w", err) 244 } 245 246 // OpenWriterAt doesn't set metadata so we need to set it on completion 247 if usingOpenWriterAt { 248 setModTime := true 249 if ci.Metadata { 250 do, ok := obj.(fs.SetMetadataer) 251 if ok { 252 meta, err := fs.GetMetadataOptions(ctx, f, src, options) 253 if err != nil { 254 return nil, fmt.Errorf("multi-thread copy: failed to read metadata from source object: %w", err) 255 } 256 err = do.SetMetadata(ctx, meta) 257 if err != nil { 258 return nil, fmt.Errorf("multi-thread copy: failed to set metadata: %w", err) 259 } 260 setModTime = false 261 } else { 262 fs.Errorf(obj, "multi-thread copy: can't set metadata as SetMetadata isn't implemented in: %v", f) 263 } 264 } 265 if setModTime { 266 err = obj.SetModTime(ctx, src.ModTime(ctx)) 267 switch err { 268 case nil, fs.ErrorCantSetModTime, fs.ErrorCantSetModTimeWithoutDelete: 269 default: 270 return nil, fmt.Errorf("multi-thread copy: failed to set modification time: %w", err) 271 } 272 } 273 } 274 275 fs.Debugf(src, "Finished multi-thread copy with %d parts of size %v", mc.numChunks, fs.SizeSuffix(mc.partSize)) 276 return obj, nil 277 } 278 279 // writerAtChunkWriter converts a WriterAtCloser into a ChunkWriter 280 type writerAtChunkWriter struct { 281 remote string 282 size int64 283 writerAt fs.WriterAtCloser 284 chunkSize int64 285 chunks int 286 writeBufferSize int64 287 f fs.Fs 288 closed bool 289 } 290 291 // WriteChunk writes chunkNumber from reader 292 func (w *writerAtChunkWriter) WriteChunk(ctx context.Context, chunkNumber int, reader io.ReadSeeker) (int64, error) { 293 fs.Debugf(w.remote, "writing chunk %v", chunkNumber) 294 295 bytesToWrite := w.chunkSize 296 if chunkNumber == (w.chunks-1) && w.size%w.chunkSize != 0 { 297 bytesToWrite = w.size % w.chunkSize 298 } 299 300 var writer io.Writer = io.NewOffsetWriter(w.writerAt, int64(chunkNumber)*w.chunkSize) 301 if w.writeBufferSize > 0 { 302 writer = bufio.NewWriterSize(writer, int(w.writeBufferSize)) 303 } 304 n, err := io.Copy(writer, reader) 305 if err != nil { 306 return -1, err 307 } 308 if n != bytesToWrite { 309 return -1, fmt.Errorf("expected to write %v bytes for chunk %v, but wrote %v bytes", bytesToWrite, chunkNumber, n) 310 } 311 // if we were buffering, flush to disk 312 switch w := writer.(type) { 313 case *bufio.Writer: 314 er2 := w.Flush() 315 if er2 != nil { 316 return -1, fmt.Errorf("multi-thread copy: flush failed: %w", err) 317 } 318 } 319 return n, nil 320 } 321 322 // Close the chunk writing 323 func (w *writerAtChunkWriter) Close(ctx context.Context) error { 324 if w.closed { 325 return nil 326 } 327 w.closed = true 328 return w.writerAt.Close() 329 } 330 331 // Abort the chunk writing 332 func (w *writerAtChunkWriter) Abort(ctx context.Context) error { 333 err := w.Close(ctx) 334 if err != nil { 335 fs.Errorf(w.remote, "multi-thread copy: failed to close file before aborting: %v", err) 336 } 337 obj, err := w.f.NewObject(ctx, w.remote) 338 if err != nil { 339 return fmt.Errorf("multi-thread copy: failed to find temp file when aborting chunk writer: %w", err) 340 } 341 return obj.Remove(ctx) 342 } 343 344 // openChunkWriterFromOpenWriterAt adapts an OpenWriterAtFn into an OpenChunkWriterFn using chunkSize and writeBufferSize 345 func openChunkWriterFromOpenWriterAt(openWriterAt fs.OpenWriterAtFn, chunkSize int64, writeBufferSize int64, f fs.Fs) fs.OpenChunkWriterFn { 346 return func(ctx context.Context, remote string, src fs.ObjectInfo, options ...fs.OpenOption) (info fs.ChunkWriterInfo, writer fs.ChunkWriter, err error) { 347 ci := fs.GetConfig(ctx) 348 349 writerAt, err := openWriterAt(ctx, remote, src.Size()) 350 if err != nil { 351 return info, nil, err 352 } 353 354 if writeBufferSize > 0 { 355 fs.Debugf(src.Remote(), "multi-thread copy: write buffer set to %v", writeBufferSize) 356 } 357 358 chunkWriter := &writerAtChunkWriter{ 359 remote: remote, 360 size: src.Size(), 361 chunkSize: chunkSize, 362 chunks: calculateNumChunks(src.Size(), chunkSize), 363 writerAt: writerAt, 364 writeBufferSize: writeBufferSize, 365 f: f, 366 } 367 info = fs.ChunkWriterInfo{ 368 ChunkSize: chunkSize, 369 Concurrency: ci.MultiThreadStreams, 370 } 371 return info, chunkWriter, nil 372 } 373 }