github.com/ncw/rclone@v1.48.1-0.20190724201158-a35aa1360e3e/backend/b2/upload.go (about) 1 // Upload large files for b2 2 // 3 // Docs - https://www.backblaze.com/b2/docs/large_files.html 4 5 package b2 6 7 import ( 8 "bytes" 9 "context" 10 "crypto/sha1" 11 "encoding/hex" 12 "fmt" 13 gohash "hash" 14 "io" 15 "strings" 16 "sync" 17 18 "github.com/ncw/rclone/backend/b2/api" 19 "github.com/ncw/rclone/fs" 20 "github.com/ncw/rclone/fs/accounting" 21 "github.com/ncw/rclone/fs/hash" 22 "github.com/ncw/rclone/lib/rest" 23 "github.com/pkg/errors" 24 ) 25 26 type hashAppendingReader struct { 27 h gohash.Hash 28 in io.Reader 29 hexSum string 30 hexReader io.Reader 31 } 32 33 // Read returns bytes all bytes from the original reader, then the hex sum 34 // of what was read so far, then EOF. 35 func (har *hashAppendingReader) Read(b []byte) (int, error) { 36 if har.hexReader == nil { 37 n, err := har.in.Read(b) 38 if err == io.EOF { 39 har.in = nil // allow GC 40 err = nil // allow reading hexSum before EOF 41 42 har.hexSum = hex.EncodeToString(har.h.Sum(nil)) 43 har.hexReader = strings.NewReader(har.hexSum) 44 } 45 return n, err 46 } 47 return har.hexReader.Read(b) 48 } 49 50 // AdditionalLength returns how many bytes the appended hex sum will take up. 51 func (har *hashAppendingReader) AdditionalLength() int { 52 return hex.EncodedLen(har.h.Size()) 53 } 54 55 // HexSum returns the hash sum as hex. It's only available after the original 56 // reader has EOF'd. It's an empty string before that. 57 func (har *hashAppendingReader) HexSum() string { 58 return har.hexSum 59 } 60 61 // newHashAppendingReader takes a Reader and a Hash and will append the hex sum 62 // after the original reader reaches EOF. The increased size depends on the 63 // given hash, which may be queried through AdditionalLength() 64 func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader { 65 withHash := io.TeeReader(in, h) 66 return &hashAppendingReader{h: h, in: withHash} 67 } 68 69 // largeUpload is used to control the upload of large files which need chunking 70 type largeUpload struct { 71 f *Fs // parent Fs 72 o *Object // object being uploaded 73 in io.Reader // read the data from here 74 wrap accounting.WrapFn // account parts being transferred 75 id string // ID of the file being uploaded 76 size int64 // total size 77 parts int64 // calculated number of parts, if known 78 sha1s []string // slice of SHA1s for each part 79 uploadMu sync.Mutex // lock for upload variable 80 uploads []*api.GetUploadPartURLResponse // result of get upload URL calls 81 } 82 83 // newLargeUpload starts an upload of object o from in with metadata in src 84 func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) { 85 remote := o.remote 86 size := src.Size() 87 parts := int64(0) 88 sha1SliceSize := int64(maxParts) 89 if size == -1 { 90 fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize) 91 } else { 92 parts = size / int64(o.fs.opt.ChunkSize) 93 if size%int64(o.fs.opt.ChunkSize) != 0 { 94 parts++ 95 } 96 if parts > maxParts { 97 return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts) 98 } 99 sha1SliceSize = parts 100 } 101 102 modTime := src.ModTime(ctx) 103 opts := rest.Opts{ 104 Method: "POST", 105 Path: "/b2_start_large_file", 106 } 107 bucketID, err := f.getBucketID() 108 if err != nil { 109 return nil, err 110 } 111 var request = api.StartLargeFileRequest{ 112 BucketID: bucketID, 113 Name: o.fs.root + remote, 114 ContentType: fs.MimeType(ctx, src), 115 Info: map[string]string{ 116 timeKey: timeString(modTime), 117 }, 118 } 119 // Set the SHA1 if known 120 if !o.fs.opt.DisableCheckSum { 121 if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" { 122 request.Info[sha1Key] = calculatedSha1 123 } 124 } 125 var response api.StartLargeFileResponse 126 err = f.pacer.Call(func() (bool, error) { 127 resp, err := f.srv.CallJSON(&opts, &request, &response) 128 return f.shouldRetry(resp, err) 129 }) 130 if err != nil { 131 return nil, err 132 } 133 // unwrap the accounting from the input, we use wrap to put it 134 // back on after the buffering 135 in, wrap := accounting.UnWrap(in) 136 up = &largeUpload{ 137 f: f, 138 o: o, 139 in: in, 140 wrap: wrap, 141 id: response.ID, 142 size: size, 143 parts: parts, 144 sha1s: make([]string, sha1SliceSize), 145 } 146 return up, nil 147 } 148 149 // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken 150 // 151 // This should be returned with returnUploadURL when finished 152 func (up *largeUpload) getUploadURL() (upload *api.GetUploadPartURLResponse, err error) { 153 up.uploadMu.Lock() 154 defer up.uploadMu.Unlock() 155 if len(up.uploads) == 0 { 156 opts := rest.Opts{ 157 Method: "POST", 158 Path: "/b2_get_upload_part_url", 159 } 160 var request = api.GetUploadPartURLRequest{ 161 ID: up.id, 162 } 163 err := up.f.pacer.Call(func() (bool, error) { 164 resp, err := up.f.srv.CallJSON(&opts, &request, &upload) 165 return up.f.shouldRetry(resp, err) 166 }) 167 if err != nil { 168 return nil, errors.Wrap(err, "failed to get upload URL") 169 } 170 } else { 171 upload, up.uploads = up.uploads[0], up.uploads[1:] 172 } 173 return upload, nil 174 } 175 176 // returnUploadURL returns the UploadURL to the cache 177 func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) { 178 if upload == nil { 179 return 180 } 181 up.uploadMu.Lock() 182 up.uploads = append(up.uploads, upload) 183 up.uploadMu.Unlock() 184 } 185 186 // clearUploadURL clears the current UploadURL and the AuthorizationToken 187 func (up *largeUpload) clearUploadURL() { 188 up.uploadMu.Lock() 189 up.uploads = nil 190 up.uploadMu.Unlock() 191 } 192 193 // Transfer a chunk 194 func (up *largeUpload) transferChunk(part int64, body []byte) error { 195 err := up.f.pacer.Call(func() (bool, error) { 196 fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body)) 197 198 // Get upload URL 199 upload, err := up.getUploadURL() 200 if err != nil { 201 return false, err 202 } 203 204 in := newHashAppendingReader(bytes.NewReader(body), sha1.New()) 205 size := int64(len(body)) + int64(in.AdditionalLength()) 206 207 // Authorization 208 // 209 // An upload authorization token, from b2_get_upload_part_url. 210 // 211 // X-Bz-Part-Number 212 // 213 // A number from 1 to 10000. The parts uploaded for one file 214 // must have contiguous numbers, starting with 1. 215 // 216 // Content-Length 217 // 218 // The number of bytes in the file being uploaded. Note that 219 // this header is required; you cannot leave it out and just 220 // use chunked encoding. The minimum size of every part but 221 // the last one is 100MB. 222 // 223 // X-Bz-Content-Sha1 224 // 225 // The SHA1 checksum of the this part of the file. B2 will 226 // check this when the part is uploaded, to make sure that the 227 // data arrived correctly. The same SHA1 checksum must be 228 // passed to b2_finish_large_file. 229 opts := rest.Opts{ 230 Method: "POST", 231 RootURL: upload.UploadURL, 232 Body: up.wrap(in), 233 ExtraHeaders: map[string]string{ 234 "Authorization": upload.AuthorizationToken, 235 "X-Bz-Part-Number": fmt.Sprintf("%d", part), 236 sha1Header: "hex_digits_at_end", 237 }, 238 ContentLength: &size, 239 } 240 241 var response api.UploadPartResponse 242 243 resp, err := up.f.srv.CallJSON(&opts, nil, &response) 244 retry, err := up.f.shouldRetry(resp, err) 245 if err != nil { 246 fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", part, retry, err, err) 247 } 248 // On retryable error clear PartUploadURL 249 if retry { 250 fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err) 251 upload = nil 252 } 253 up.returnUploadURL(upload) 254 up.sha1s[part-1] = in.HexSum() 255 return retry, err 256 }) 257 if err != nil { 258 fs.Debugf(up.o, "Error sending chunk %d: %v", part, err) 259 } else { 260 fs.Debugf(up.o, "Done sending chunk %d", part) 261 } 262 return err 263 } 264 265 // finish closes off the large upload 266 func (up *largeUpload) finish() error { 267 fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts) 268 opts := rest.Opts{ 269 Method: "POST", 270 Path: "/b2_finish_large_file", 271 } 272 var request = api.FinishLargeFileRequest{ 273 ID: up.id, 274 SHA1s: up.sha1s, 275 } 276 var response api.FileInfo 277 err := up.f.pacer.Call(func() (bool, error) { 278 resp, err := up.f.srv.CallJSON(&opts, &request, &response) 279 return up.f.shouldRetry(resp, err) 280 }) 281 if err != nil { 282 return err 283 } 284 return up.o.decodeMetaDataFileInfo(&response) 285 } 286 287 // cancel aborts the large upload 288 func (up *largeUpload) cancel() error { 289 opts := rest.Opts{ 290 Method: "POST", 291 Path: "/b2_cancel_large_file", 292 } 293 var request = api.CancelLargeFileRequest{ 294 ID: up.id, 295 } 296 var response api.CancelLargeFileResponse 297 err := up.f.pacer.Call(func() (bool, error) { 298 resp, err := up.f.srv.CallJSON(&opts, &request, &response) 299 return up.f.shouldRetry(resp, err) 300 }) 301 return err 302 } 303 304 func (up *largeUpload) managedTransferChunk(wg *sync.WaitGroup, errs chan error, part int64, buf []byte) { 305 wg.Add(1) 306 go func(part int64, buf []byte) { 307 defer wg.Done() 308 defer up.f.putUploadBlock(buf) 309 err := up.transferChunk(part, buf) 310 if err != nil { 311 select { 312 case errs <- err: 313 default: 314 } 315 } 316 }(part, buf) 317 } 318 319 func (up *largeUpload) finishOrCancelOnError(err error, errs chan error) error { 320 if err == nil { 321 select { 322 case err = <-errs: 323 default: 324 } 325 } 326 if err != nil { 327 fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err) 328 cancelErr := up.cancel() 329 if cancelErr != nil { 330 fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr) 331 } 332 return err 333 } 334 return up.finish() 335 } 336 337 // Stream uploads the chunks from the input, starting with a required initial 338 // chunk. Assumes the file size is unknown and will upload until the input 339 // reaches EOF. 340 func (up *largeUpload) Stream(initialUploadBlock []byte) (err error) { 341 fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id) 342 errs := make(chan error, 1) 343 hasMoreParts := true 344 var wg sync.WaitGroup 345 346 // Transfer initial chunk 347 up.size = int64(len(initialUploadBlock)) 348 up.managedTransferChunk(&wg, errs, 1, initialUploadBlock) 349 350 outer: 351 for part := int64(2); hasMoreParts; part++ { 352 // Check any errors 353 select { 354 case err = <-errs: 355 break outer 356 default: 357 } 358 359 // Get a block of memory 360 buf := up.f.getUploadBlock() 361 362 // Read the chunk 363 var n int 364 n, err = io.ReadFull(up.in, buf) 365 if err == io.ErrUnexpectedEOF { 366 fs.Debugf(up.o, "Read less than a full chunk, making this the last one.") 367 buf = buf[:n] 368 hasMoreParts = false 369 err = nil 370 } else if err == io.EOF { 371 fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.") 372 up.f.putUploadBlock(buf) 373 err = nil 374 break outer 375 } else if err != nil { 376 // other kinds of errors indicate failure 377 up.f.putUploadBlock(buf) 378 break outer 379 } 380 381 // Keep stats up to date 382 up.parts = part 383 up.size += int64(n) 384 if part > maxParts { 385 err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts) 386 break outer 387 } 388 389 // Transfer the chunk 390 up.managedTransferChunk(&wg, errs, part, buf) 391 } 392 wg.Wait() 393 up.sha1s = up.sha1s[:up.parts] 394 395 return up.finishOrCancelOnError(err, errs) 396 } 397 398 // Upload uploads the chunks from the input 399 func (up *largeUpload) Upload() error { 400 fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id) 401 remaining := up.size 402 errs := make(chan error, 1) 403 var wg sync.WaitGroup 404 var err error 405 outer: 406 for part := int64(1); part <= up.parts; part++ { 407 // Check any errors 408 select { 409 case err = <-errs: 410 break outer 411 default: 412 } 413 414 reqSize := remaining 415 if reqSize >= int64(up.f.opt.ChunkSize) { 416 reqSize = int64(up.f.opt.ChunkSize) 417 } 418 419 // Get a block of memory 420 buf := up.f.getUploadBlock()[:reqSize] 421 422 // Read the chunk 423 _, err = io.ReadFull(up.in, buf) 424 if err != nil { 425 up.f.putUploadBlock(buf) 426 break outer 427 } 428 429 // Transfer the chunk 430 up.managedTransferChunk(&wg, errs, part, buf) 431 remaining -= reqSize 432 } 433 wg.Wait() 434 435 return up.finishOrCancelOnError(err, errs) 436 }