github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/backend/b2/upload.go (about) 1 // Upload large files for b2 2 // 3 // Docs - https://www.backblaze.com/b2/docs/large_files.html 4 5 package b2 6 7 import ( 8 "bytes" 9 "context" 10 "crypto/sha1" 11 "encoding/hex" 12 "fmt" 13 gohash "hash" 14 "io" 15 "strings" 16 "sync" 17 18 "github.com/pkg/errors" 19 "github.com/rclone/rclone/backend/b2/api" 20 "github.com/rclone/rclone/fs" 21 "github.com/rclone/rclone/fs/accounting" 22 "github.com/rclone/rclone/fs/hash" 23 "github.com/rclone/rclone/lib/rest" 24 ) 25 26 type hashAppendingReader struct { 27 h gohash.Hash 28 in io.Reader 29 hexSum string 30 hexReader io.Reader 31 } 32 33 // Read returns bytes all bytes from the original reader, then the hex sum 34 // of what was read so far, then EOF. 35 func (har *hashAppendingReader) Read(b []byte) (int, error) { 36 if har.hexReader == nil { 37 n, err := har.in.Read(b) 38 if err == io.EOF { 39 har.in = nil // allow GC 40 err = nil // allow reading hexSum before EOF 41 42 har.hexSum = hex.EncodeToString(har.h.Sum(nil)) 43 har.hexReader = strings.NewReader(har.hexSum) 44 } 45 return n, err 46 } 47 return har.hexReader.Read(b) 48 } 49 50 // AdditionalLength returns how many bytes the appended hex sum will take up. 51 func (har *hashAppendingReader) AdditionalLength() int { 52 return hex.EncodedLen(har.h.Size()) 53 } 54 55 // HexSum returns the hash sum as hex. It's only available after the original 56 // reader has EOF'd. It's an empty string before that. 57 func (har *hashAppendingReader) HexSum() string { 58 return har.hexSum 59 } 60 61 // newHashAppendingReader takes a Reader and a Hash and will append the hex sum 62 // after the original reader reaches EOF. The increased size depends on the 63 // given hash, which may be queried through AdditionalLength() 64 func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader { 65 withHash := io.TeeReader(in, h) 66 return &hashAppendingReader{h: h, in: withHash} 67 } 68 69 // largeUpload is used to control the upload of large files which need chunking 70 type largeUpload struct { 71 f *Fs // parent Fs 72 o *Object // object being uploaded 73 in io.Reader // read the data from here 74 wrap accounting.WrapFn // account parts being transferred 75 id string // ID of the file being uploaded 76 size int64 // total size 77 parts int64 // calculated number of parts, if known 78 sha1s []string // slice of SHA1s for each part 79 uploadMu sync.Mutex // lock for upload variable 80 uploads []*api.GetUploadPartURLResponse // result of get upload URL calls 81 } 82 83 // newLargeUpload starts an upload of object o from in with metadata in src 84 func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) { 85 remote := o.remote 86 size := src.Size() 87 parts := int64(0) 88 sha1SliceSize := int64(maxParts) 89 if size == -1 { 90 fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize) 91 } else { 92 parts = size / int64(o.fs.opt.ChunkSize) 93 if size%int64(o.fs.opt.ChunkSize) != 0 { 94 parts++ 95 } 96 if parts > maxParts { 97 return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts) 98 } 99 sha1SliceSize = parts 100 } 101 102 modTime := src.ModTime(ctx) 103 opts := rest.Opts{ 104 Method: "POST", 105 Path: "/b2_start_large_file", 106 } 107 bucket, bucketPath := o.split() 108 bucketID, err := f.getBucketID(ctx, bucket) 109 if err != nil { 110 return nil, err 111 } 112 var request = api.StartLargeFileRequest{ 113 BucketID: bucketID, 114 Name: f.opt.Enc.FromStandardPath(bucketPath), 115 ContentType: fs.MimeType(ctx, src), 116 Info: map[string]string{ 117 timeKey: timeString(modTime), 118 }, 119 } 120 // Set the SHA1 if known 121 if !o.fs.opt.DisableCheckSum { 122 if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" { 123 request.Info[sha1Key] = calculatedSha1 124 } 125 } 126 var response api.StartLargeFileResponse 127 err = f.pacer.Call(func() (bool, error) { 128 resp, err := f.srv.CallJSON(ctx, &opts, &request, &response) 129 return f.shouldRetry(ctx, resp, err) 130 }) 131 if err != nil { 132 return nil, err 133 } 134 // unwrap the accounting from the input, we use wrap to put it 135 // back on after the buffering 136 in, wrap := accounting.UnWrap(in) 137 up = &largeUpload{ 138 f: f, 139 o: o, 140 in: in, 141 wrap: wrap, 142 id: response.ID, 143 size: size, 144 parts: parts, 145 sha1s: make([]string, sha1SliceSize), 146 } 147 return up, nil 148 } 149 150 // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken 151 // 152 // This should be returned with returnUploadURL when finished 153 func (up *largeUpload) getUploadURL(ctx context.Context) (upload *api.GetUploadPartURLResponse, err error) { 154 up.uploadMu.Lock() 155 defer up.uploadMu.Unlock() 156 if len(up.uploads) == 0 { 157 opts := rest.Opts{ 158 Method: "POST", 159 Path: "/b2_get_upload_part_url", 160 } 161 var request = api.GetUploadPartURLRequest{ 162 ID: up.id, 163 } 164 err := up.f.pacer.Call(func() (bool, error) { 165 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &upload) 166 return up.f.shouldRetry(ctx, resp, err) 167 }) 168 if err != nil { 169 return nil, errors.Wrap(err, "failed to get upload URL") 170 } 171 } else { 172 upload, up.uploads = up.uploads[0], up.uploads[1:] 173 } 174 return upload, nil 175 } 176 177 // returnUploadURL returns the UploadURL to the cache 178 func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) { 179 if upload == nil { 180 return 181 } 182 up.uploadMu.Lock() 183 up.uploads = append(up.uploads, upload) 184 up.uploadMu.Unlock() 185 } 186 187 // clearUploadURL clears the current UploadURL and the AuthorizationToken 188 func (up *largeUpload) clearUploadURL() { 189 up.uploadMu.Lock() 190 up.uploads = nil 191 up.uploadMu.Unlock() 192 } 193 194 // Transfer a chunk 195 func (up *largeUpload) transferChunk(ctx context.Context, part int64, body []byte) error { 196 err := up.f.pacer.Call(func() (bool, error) { 197 fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body)) 198 199 // Get upload URL 200 upload, err := up.getUploadURL(ctx) 201 if err != nil { 202 return false, err 203 } 204 205 in := newHashAppendingReader(bytes.NewReader(body), sha1.New()) 206 size := int64(len(body)) + int64(in.AdditionalLength()) 207 208 // Authorization 209 // 210 // An upload authorization token, from b2_get_upload_part_url. 211 // 212 // X-Bz-Part-Number 213 // 214 // A number from 1 to 10000. The parts uploaded for one file 215 // must have contiguous numbers, starting with 1. 216 // 217 // Content-Length 218 // 219 // The number of bytes in the file being uploaded. Note that 220 // this header is required; you cannot leave it out and just 221 // use chunked encoding. The minimum size of every part but 222 // the last one is 100MB. 223 // 224 // X-Bz-Content-Sha1 225 // 226 // The SHA1 checksum of the this part of the file. B2 will 227 // check this when the part is uploaded, to make sure that the 228 // data arrived correctly. The same SHA1 checksum must be 229 // passed to b2_finish_large_file. 230 opts := rest.Opts{ 231 Method: "POST", 232 RootURL: upload.UploadURL, 233 Body: up.wrap(in), 234 ExtraHeaders: map[string]string{ 235 "Authorization": upload.AuthorizationToken, 236 "X-Bz-Part-Number": fmt.Sprintf("%d", part), 237 sha1Header: "hex_digits_at_end", 238 }, 239 ContentLength: &size, 240 } 241 242 var response api.UploadPartResponse 243 244 resp, err := up.f.srv.CallJSON(ctx, &opts, nil, &response) 245 retry, err := up.f.shouldRetry(ctx, resp, err) 246 if err != nil { 247 fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", part, retry, err, err) 248 } 249 // On retryable error clear PartUploadURL 250 if retry { 251 fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err) 252 upload = nil 253 } 254 up.returnUploadURL(upload) 255 up.sha1s[part-1] = in.HexSum() 256 return retry, err 257 }) 258 if err != nil { 259 fs.Debugf(up.o, "Error sending chunk %d: %v", part, err) 260 } else { 261 fs.Debugf(up.o, "Done sending chunk %d", part) 262 } 263 return err 264 } 265 266 // finish closes off the large upload 267 func (up *largeUpload) finish(ctx context.Context) error { 268 fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts) 269 opts := rest.Opts{ 270 Method: "POST", 271 Path: "/b2_finish_large_file", 272 } 273 var request = api.FinishLargeFileRequest{ 274 ID: up.id, 275 SHA1s: up.sha1s, 276 } 277 var response api.FileInfo 278 err := up.f.pacer.Call(func() (bool, error) { 279 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response) 280 return up.f.shouldRetry(ctx, resp, err) 281 }) 282 if err != nil { 283 return err 284 } 285 return up.o.decodeMetaDataFileInfo(&response) 286 } 287 288 // cancel aborts the large upload 289 func (up *largeUpload) cancel(ctx context.Context) error { 290 opts := rest.Opts{ 291 Method: "POST", 292 Path: "/b2_cancel_large_file", 293 } 294 var request = api.CancelLargeFileRequest{ 295 ID: up.id, 296 } 297 var response api.CancelLargeFileResponse 298 err := up.f.pacer.Call(func() (bool, error) { 299 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response) 300 return up.f.shouldRetry(ctx, resp, err) 301 }) 302 return err 303 } 304 305 func (up *largeUpload) managedTransferChunk(ctx context.Context, wg *sync.WaitGroup, errs chan error, part int64, buf []byte) { 306 wg.Add(1) 307 go func(part int64, buf []byte) { 308 defer wg.Done() 309 defer up.f.putUploadBlock(buf) 310 err := up.transferChunk(ctx, part, buf) 311 if err != nil { 312 select { 313 case errs <- err: 314 default: 315 } 316 } 317 }(part, buf) 318 } 319 320 func (up *largeUpload) finishOrCancelOnError(ctx context.Context, err error, errs chan error) error { 321 if err == nil { 322 select { 323 case err = <-errs: 324 default: 325 } 326 } 327 if err != nil { 328 fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err) 329 cancelErr := up.cancel(ctx) 330 if cancelErr != nil { 331 fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr) 332 } 333 return err 334 } 335 return up.finish(ctx) 336 } 337 338 // Stream uploads the chunks from the input, starting with a required initial 339 // chunk. Assumes the file size is unknown and will upload until the input 340 // reaches EOF. 341 func (up *largeUpload) Stream(ctx context.Context, initialUploadBlock []byte) (err error) { 342 fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id) 343 errs := make(chan error, 1) 344 hasMoreParts := true 345 var wg sync.WaitGroup 346 347 // Transfer initial chunk 348 up.size = int64(len(initialUploadBlock)) 349 up.managedTransferChunk(ctx, &wg, errs, 1, initialUploadBlock) 350 351 outer: 352 for part := int64(2); hasMoreParts; part++ { 353 // Check any errors 354 select { 355 case err = <-errs: 356 break outer 357 default: 358 } 359 360 // Get a block of memory 361 buf := up.f.getUploadBlock() 362 363 // Read the chunk 364 var n int 365 n, err = io.ReadFull(up.in, buf) 366 if err == io.ErrUnexpectedEOF { 367 fs.Debugf(up.o, "Read less than a full chunk, making this the last one.") 368 buf = buf[:n] 369 hasMoreParts = false 370 err = nil 371 } else if err == io.EOF { 372 fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.") 373 up.f.putUploadBlock(buf) 374 err = nil 375 break outer 376 } else if err != nil { 377 // other kinds of errors indicate failure 378 up.f.putUploadBlock(buf) 379 break outer 380 } 381 382 // Keep stats up to date 383 up.parts = part 384 up.size += int64(n) 385 if part > maxParts { 386 err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts) 387 break outer 388 } 389 390 // Transfer the chunk 391 up.managedTransferChunk(ctx, &wg, errs, part, buf) 392 } 393 wg.Wait() 394 up.sha1s = up.sha1s[:up.parts] 395 396 return up.finishOrCancelOnError(ctx, err, errs) 397 } 398 399 // Upload uploads the chunks from the input 400 func (up *largeUpload) Upload(ctx context.Context) error { 401 fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id) 402 remaining := up.size 403 errs := make(chan error, 1) 404 var wg sync.WaitGroup 405 var err error 406 outer: 407 for part := int64(1); part <= up.parts; part++ { 408 // Check any errors 409 select { 410 case err = <-errs: 411 break outer 412 default: 413 } 414 415 reqSize := remaining 416 if reqSize >= int64(up.f.opt.ChunkSize) { 417 reqSize = int64(up.f.opt.ChunkSize) 418 } 419 420 // Get a block of memory 421 buf := up.f.getUploadBlock()[:reqSize] 422 423 // Read the chunk 424 _, err = io.ReadFull(up.in, buf) 425 if err != nil { 426 up.f.putUploadBlock(buf) 427 break outer 428 } 429 430 // Transfer the chunk 431 up.managedTransferChunk(ctx, &wg, errs, part, buf) 432 remaining -= reqSize 433 } 434 wg.Wait() 435 436 return up.finishOrCancelOnError(ctx, err, errs) 437 }