github.com/artpar/rclone@v1.67.3/backend/b2/upload.go (about) 1 // Upload large files for b2 2 // 3 // Docs - https://www.backblaze.com/b2/docs/large_files.html 4 5 package b2 6 7 import ( 8 "context" 9 "crypto/sha1" 10 "encoding/hex" 11 "fmt" 12 gohash "hash" 13 "io" 14 "strings" 15 "sync" 16 17 "github.com/artpar/rclone/backend/b2/api" 18 "github.com/artpar/rclone/fs" 19 "github.com/artpar/rclone/fs/accounting" 20 "github.com/artpar/rclone/fs/chunksize" 21 "github.com/artpar/rclone/fs/hash" 22 "github.com/artpar/rclone/lib/atexit" 23 "github.com/artpar/rclone/lib/pool" 24 "github.com/artpar/rclone/lib/rest" 25 "golang.org/x/sync/errgroup" 26 ) 27 28 type hashAppendingReader struct { 29 h gohash.Hash 30 in io.Reader 31 hexSum string 32 hexReader io.Reader 33 } 34 35 // Read returns bytes all bytes from the original reader, then the hex sum 36 // of what was read so far, then EOF. 37 func (har *hashAppendingReader) Read(b []byte) (int, error) { 38 if har.hexReader == nil { 39 n, err := har.in.Read(b) 40 if err == io.EOF { 41 har.in = nil // allow GC 42 err = nil // allow reading hexSum before EOF 43 44 har.hexSum = hex.EncodeToString(har.h.Sum(nil)) 45 har.hexReader = strings.NewReader(har.hexSum) 46 } 47 return n, err 48 } 49 return har.hexReader.Read(b) 50 } 51 52 // AdditionalLength returns how many bytes the appended hex sum will take up. 53 func (har *hashAppendingReader) AdditionalLength() int { 54 return hex.EncodedLen(har.h.Size()) 55 } 56 57 // HexSum returns the hash sum as hex. It's only available after the original 58 // reader has EOF'd. It's an empty string before that. 59 func (har *hashAppendingReader) HexSum() string { 60 return har.hexSum 61 } 62 63 // newHashAppendingReader takes a Reader and a Hash and will append the hex sum 64 // after the original reader reaches EOF. The increased size depends on the 65 // given hash, which may be queried through AdditionalLength() 66 func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader { 67 withHash := io.TeeReader(in, h) 68 return &hashAppendingReader{h: h, in: withHash} 69 } 70 71 // largeUpload is used to control the upload of large files which need chunking 72 type largeUpload struct { 73 f *Fs // parent Fs 74 o *Object // object being uploaded 75 doCopy bool // doing copy rather than upload 76 what string // text name of operation for logs 77 in io.Reader // read the data from here 78 wrap accounting.WrapFn // account parts being transferred 79 id string // ID of the file being uploaded 80 size int64 // total size 81 parts int // calculated number of parts, if known 82 sha1smu sync.Mutex // mutex to protect sha1s 83 sha1s []string // slice of SHA1s for each part 84 uploadMu sync.Mutex // lock for upload variable 85 uploads []*api.GetUploadPartURLResponse // result of get upload URL calls 86 chunkSize int64 // chunk size to use 87 src *Object // if copying, object we are reading from 88 info *api.FileInfo // final response with info about the object 89 } 90 91 // newLargeUpload starts an upload of object o from in with metadata in src 92 // 93 // If newInfo is set then metadata from that will be used instead of reading it from src 94 func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo, defaultChunkSize fs.SizeSuffix, doCopy bool, newInfo *api.File) (up *largeUpload, err error) { 95 size := src.Size() 96 parts := 0 97 chunkSize := defaultChunkSize 98 if size == -1 { 99 fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize) 100 } else { 101 chunkSize = chunksize.Calculator(o, size, maxParts, defaultChunkSize) 102 parts = int(size / int64(chunkSize)) 103 if size%int64(chunkSize) != 0 { 104 parts++ 105 } 106 } 107 108 opts := rest.Opts{ 109 Method: "POST", 110 Path: "/b2_start_large_file", 111 } 112 bucket, bucketPath := o.split() 113 bucketID, err := f.getBucketID(ctx, bucket) 114 if err != nil { 115 return nil, err 116 } 117 var request = api.StartLargeFileRequest{ 118 BucketID: bucketID, 119 Name: f.opt.Enc.FromStandardPath(bucketPath), 120 } 121 if newInfo == nil { 122 modTime := src.ModTime(ctx) 123 request.ContentType = fs.MimeType(ctx, src) 124 request.Info = map[string]string{ 125 timeKey: timeString(modTime), 126 } 127 // Set the SHA1 if known 128 if !o.fs.opt.DisableCheckSum || doCopy { 129 if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" { 130 request.Info[sha1Key] = calculatedSha1 131 } 132 } 133 } else { 134 request.ContentType = newInfo.ContentType 135 request.Info = newInfo.Info 136 } 137 var response api.StartLargeFileResponse 138 err = f.pacer.Call(func() (bool, error) { 139 resp, err := f.srv.CallJSON(ctx, &opts, &request, &response) 140 return f.shouldRetry(ctx, resp, err) 141 }) 142 if err != nil { 143 return nil, err 144 } 145 up = &largeUpload{ 146 f: f, 147 o: o, 148 doCopy: doCopy, 149 what: "upload", 150 id: response.ID, 151 size: size, 152 parts: parts, 153 sha1s: make([]string, 0, 16), 154 chunkSize: int64(chunkSize), 155 } 156 // unwrap the accounting from the input, we use wrap to put it 157 // back on after the buffering 158 if doCopy { 159 up.what = "copy" 160 up.src = src.(*Object) 161 } else { 162 up.in, up.wrap = accounting.UnWrap(in) 163 } 164 return up, nil 165 } 166 167 // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken 168 // 169 // This should be returned with returnUploadURL when finished 170 func (up *largeUpload) getUploadURL(ctx context.Context) (upload *api.GetUploadPartURLResponse, err error) { 171 up.uploadMu.Lock() 172 if len(up.uploads) > 0 { 173 upload, up.uploads = up.uploads[0], up.uploads[1:] 174 up.uploadMu.Unlock() 175 return upload, nil 176 } 177 up.uploadMu.Unlock() 178 179 opts := rest.Opts{ 180 Method: "POST", 181 Path: "/b2_get_upload_part_url", 182 } 183 var request = api.GetUploadPartURLRequest{ 184 ID: up.id, 185 } 186 err = up.f.pacer.Call(func() (bool, error) { 187 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &upload) 188 return up.f.shouldRetry(ctx, resp, err) 189 }) 190 if err != nil { 191 return nil, fmt.Errorf("failed to get upload URL: %w", err) 192 } 193 return upload, nil 194 } 195 196 // returnUploadURL returns the UploadURL to the cache 197 func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) { 198 if upload == nil { 199 return 200 } 201 up.uploadMu.Lock() 202 up.uploads = append(up.uploads, upload) 203 up.uploadMu.Unlock() 204 } 205 206 // Add an sha1 to the being built up sha1s 207 func (up *largeUpload) addSha1(chunkNumber int, sha1 string) { 208 up.sha1smu.Lock() 209 defer up.sha1smu.Unlock() 210 if len(up.sha1s) < chunkNumber+1 { 211 up.sha1s = append(up.sha1s, make([]string, chunkNumber+1-len(up.sha1s))...) 212 } 213 up.sha1s[chunkNumber] = sha1 214 } 215 216 // WriteChunk will write chunk number with reader bytes, where chunk number >= 0 217 func (up *largeUpload) WriteChunk(ctx context.Context, chunkNumber int, reader io.ReadSeeker) (size int64, err error) { 218 // Only account after the checksum reads have been done 219 if do, ok := reader.(pool.DelayAccountinger); ok { 220 // To figure out this number, do a transfer and if the accounted size is 0 or a 221 // multiple of what it should be, increase or decrease this number. 222 do.DelayAccounting(1) 223 } 224 225 err = up.f.pacer.Call(func() (bool, error) { 226 // Discover the size by seeking to the end 227 size, err = reader.Seek(0, io.SeekEnd) 228 if err != nil { 229 return false, err 230 } 231 232 // rewind the reader on retry and after reading size 233 _, err = reader.Seek(0, io.SeekStart) 234 if err != nil { 235 return false, err 236 } 237 238 fs.Debugf(up.o, "Sending chunk %d length %d", chunkNumber, size) 239 240 // Get upload URL 241 upload, err := up.getUploadURL(ctx) 242 if err != nil { 243 return false, err 244 } 245 246 in := newHashAppendingReader(reader, sha1.New()) 247 sizeWithHash := size + int64(in.AdditionalLength()) 248 249 // Authorization 250 // 251 // An upload authorization token, from b2_get_upload_part_url. 252 // 253 // X-Bz-Part-Number 254 // 255 // A number from 1 to 10000. The parts uploaded for one file 256 // must have contiguous numbers, starting with 1. 257 // 258 // Content-Length 259 // 260 // The number of bytes in the file being uploaded. Note that 261 // this header is required; you cannot leave it out and just 262 // use chunked encoding. The minimum size of every part but 263 // the last one is 100 MB (100,000,000 bytes) 264 // 265 // X-Bz-Content-Sha1 266 // 267 // The SHA1 checksum of the this part of the file. B2 will 268 // check this when the part is uploaded, to make sure that the 269 // data arrived correctly. The same SHA1 checksum must be 270 // passed to b2_finish_large_file. 271 opts := rest.Opts{ 272 Method: "POST", 273 RootURL: upload.UploadURL, 274 Body: up.wrap(in), 275 ExtraHeaders: map[string]string{ 276 "Authorization": upload.AuthorizationToken, 277 "X-Bz-Part-Number": fmt.Sprintf("%d", chunkNumber+1), 278 sha1Header: "hex_digits_at_end", 279 }, 280 ContentLength: &sizeWithHash, 281 } 282 283 var response api.UploadPartResponse 284 285 resp, err := up.f.srv.CallJSON(ctx, &opts, nil, &response) 286 retry, err := up.f.shouldRetry(ctx, resp, err) 287 if err != nil { 288 fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", chunkNumber, retry, err, err) 289 } 290 // On retryable error clear PartUploadURL 291 if retry { 292 fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err) 293 upload = nil 294 } 295 up.returnUploadURL(upload) 296 up.addSha1(chunkNumber, in.HexSum()) 297 return retry, err 298 }) 299 if err != nil { 300 fs.Debugf(up.o, "Error sending chunk %d: %v", chunkNumber, err) 301 } else { 302 fs.Debugf(up.o, "Done sending chunk %d", chunkNumber) 303 } 304 return size, err 305 } 306 307 // Copy a chunk 308 func (up *largeUpload) copyChunk(ctx context.Context, part int, partSize int64) error { 309 err := up.f.pacer.Call(func() (bool, error) { 310 fs.Debugf(up.o, "Copying chunk %d length %d", part, partSize) 311 opts := rest.Opts{ 312 Method: "POST", 313 Path: "/b2_copy_part", 314 } 315 offset := int64(part) * up.chunkSize // where we are in the source file 316 var request = api.CopyPartRequest{ 317 SourceID: up.src.id, 318 LargeFileID: up.id, 319 PartNumber: int64(part + 1), 320 Range: fmt.Sprintf("bytes=%d-%d", offset, offset+partSize-1), 321 } 322 var response api.UploadPartResponse 323 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response) 324 retry, err := up.f.shouldRetry(ctx, resp, err) 325 if err != nil { 326 fs.Debugf(up.o, "Error copying chunk %d (retry=%v): %v: %#v", part, retry, err, err) 327 } 328 up.addSha1(part, response.SHA1) 329 return retry, err 330 }) 331 if err != nil { 332 fs.Debugf(up.o, "Error copying chunk %d: %v", part, err) 333 } else { 334 fs.Debugf(up.o, "Done copying chunk %d", part) 335 } 336 return err 337 } 338 339 // Close closes off the large upload 340 func (up *largeUpload) Close(ctx context.Context) error { 341 fs.Debugf(up.o, "Finishing large file %s with %d parts", up.what, up.parts) 342 opts := rest.Opts{ 343 Method: "POST", 344 Path: "/b2_finish_large_file", 345 } 346 var request = api.FinishLargeFileRequest{ 347 ID: up.id, 348 SHA1s: up.sha1s, 349 } 350 var response api.FileInfo 351 err := up.f.pacer.Call(func() (bool, error) { 352 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response) 353 return up.f.shouldRetry(ctx, resp, err) 354 }) 355 if err != nil { 356 return err 357 } 358 up.info = &response 359 return nil 360 } 361 362 // Abort aborts the large upload 363 func (up *largeUpload) Abort(ctx context.Context) error { 364 fs.Debugf(up.o, "Cancelling large file %s", up.what) 365 opts := rest.Opts{ 366 Method: "POST", 367 Path: "/b2_cancel_large_file", 368 } 369 var request = api.CancelLargeFileRequest{ 370 ID: up.id, 371 } 372 var response api.CancelLargeFileResponse 373 err := up.f.pacer.Call(func() (bool, error) { 374 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response) 375 return up.f.shouldRetry(ctx, resp, err) 376 }) 377 if err != nil { 378 fs.Errorf(up.o, "Failed to cancel large file %s: %v", up.what, err) 379 } 380 return err 381 } 382 383 // Stream uploads the chunks from the input, starting with a required initial 384 // chunk. Assumes the file size is unknown and will upload until the input 385 // reaches EOF. 386 // 387 // Note that initialUploadBlock must be returned to f.putBuf() 388 func (up *largeUpload) Stream(ctx context.Context, initialUploadBlock *pool.RW) (err error) { 389 defer atexit.OnError(&err, func() { _ = up.Abort(ctx) })() 390 fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id) 391 var ( 392 g, gCtx = errgroup.WithContext(ctx) 393 hasMoreParts = true 394 ) 395 up.size = initialUploadBlock.Size() 396 up.parts = 0 397 for part := 0; hasMoreParts; part++ { 398 // Get a block of memory from the pool and token which limits concurrency. 399 var rw *pool.RW 400 if part == 0 { 401 rw = initialUploadBlock 402 } else { 403 rw = up.f.getRW(false) 404 } 405 406 // Fail fast, in case an errgroup managed function returns an error 407 // gCtx is cancelled. There is no point in uploading all the other parts. 408 if gCtx.Err() != nil { 409 up.f.putRW(rw) 410 break 411 } 412 413 // Read the chunk 414 var n int64 415 if part == 0 { 416 n = rw.Size() 417 } else { 418 n, err = io.CopyN(rw, up.in, up.chunkSize) 419 if err == io.EOF { 420 if n == 0 { 421 fs.Debugf(up.o, "Not sending empty chunk after EOF - ending.") 422 up.f.putRW(rw) 423 break 424 } else { 425 fs.Debugf(up.o, "Read less than a full chunk %d, making this the last one.", n) 426 } 427 hasMoreParts = false 428 } else if err != nil { 429 // other kinds of errors indicate failure 430 up.f.putRW(rw) 431 return err 432 } 433 } 434 435 // Keep stats up to date 436 up.parts += 1 437 up.size += n 438 if part > maxParts { 439 up.f.putRW(rw) 440 return fmt.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts) 441 } 442 443 part := part // for the closure 444 g.Go(func() (err error) { 445 defer up.f.putRW(rw) 446 _, err = up.WriteChunk(gCtx, part, rw) 447 return err 448 }) 449 } 450 err = g.Wait() 451 if err != nil { 452 return err 453 } 454 return up.Close(ctx) 455 } 456 457 // Copy the chunks from the source to the destination 458 func (up *largeUpload) Copy(ctx context.Context) (err error) { 459 defer atexit.OnError(&err, func() { _ = up.Abort(ctx) })() 460 fs.Debugf(up.o, "Starting %s of large file in %d chunks (id %q)", up.what, up.parts, up.id) 461 var ( 462 g, gCtx = errgroup.WithContext(ctx) 463 remaining = up.size 464 ) 465 g.SetLimit(up.f.opt.UploadConcurrency) 466 for part := 0; part < up.parts; part++ { 467 // Fail fast, in case an errgroup managed function returns an error 468 // gCtx is cancelled. There is no point in copying all the other parts. 469 if gCtx.Err() != nil { 470 break 471 } 472 473 reqSize := remaining 474 if reqSize >= up.chunkSize { 475 reqSize = up.chunkSize 476 } 477 478 part := part // for the closure 479 g.Go(func() (err error) { 480 return up.copyChunk(gCtx, part, reqSize) 481 }) 482 remaining -= reqSize 483 } 484 err = g.Wait() 485 if err != nil { 486 return err 487 } 488 return up.Close(ctx) 489 }