github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/b2/upload.go (about) 1 // Upload large files for b2 2 // 3 // Docs - https://www.backblaze.com/b2/docs/large_files.html 4 5 package b2 6 7 import ( 8 "bytes" 9 "context" 10 "crypto/sha1" 11 "encoding/hex" 12 "fmt" 13 gohash "hash" 14 "io" 15 "strings" 16 "sync" 17 18 "github.com/pkg/errors" 19 "github.com/rclone/rclone/backend/b2/api" 20 "github.com/rclone/rclone/fs" 21 "github.com/rclone/rclone/fs/accounting" 22 "github.com/rclone/rclone/fs/hash" 23 "github.com/rclone/rclone/lib/rest" 24 ) 25 26 type hashAppendingReader struct { 27 h gohash.Hash 28 in io.Reader 29 hexSum string 30 hexReader io.Reader 31 } 32 33 // Read returns bytes all bytes from the original reader, then the hex sum 34 // of what was read so far, then EOF. 35 func (har *hashAppendingReader) Read(b []byte) (int, error) { 36 if har.hexReader == nil { 37 n, err := har.in.Read(b) 38 if err == io.EOF { 39 har.in = nil // allow GC 40 err = nil // allow reading hexSum before EOF 41 42 har.hexSum = hex.EncodeToString(har.h.Sum(nil)) 43 har.hexReader = strings.NewReader(har.hexSum) 44 } 45 return n, err 46 } 47 return har.hexReader.Read(b) 48 } 49 50 // AdditionalLength returns how many bytes the appended hex sum will take up. 51 func (har *hashAppendingReader) AdditionalLength() int { 52 return hex.EncodedLen(har.h.Size()) 53 } 54 55 // HexSum returns the hash sum as hex. It's only available after the original 56 // reader has EOF'd. It's an empty string before that. 57 func (har *hashAppendingReader) HexSum() string { 58 return har.hexSum 59 } 60 61 // newHashAppendingReader takes a Reader and a Hash and will append the hex sum 62 // after the original reader reaches EOF. The increased size depends on the 63 // given hash, which may be queried through AdditionalLength() 64 func newHashAppendingReader(in io.Reader, h gohash.Hash) *hashAppendingReader { 65 withHash := io.TeeReader(in, h) 66 return &hashAppendingReader{h: h, in: withHash} 67 } 68 69 // largeUpload is used to control the upload of large files which need chunking 70 type largeUpload struct { 71 f *Fs // parent Fs 72 o *Object // object being uploaded 73 in io.Reader // read the data from here 74 wrap accounting.WrapFn // account parts being transferred 75 id string // ID of the file being uploaded 76 size int64 // total size 77 parts int64 // calculated number of parts, if known 78 sha1s []string // slice of SHA1s for each part 79 uploadMu sync.Mutex // lock for upload variable 80 uploads []*api.GetUploadPartURLResponse // result of get upload URL calls 81 } 82 83 // newLargeUpload starts an upload of object o from in with metadata in src 84 func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) { 85 remote := o.remote 86 size := src.Size() 87 parts := int64(0) 88 sha1SliceSize := int64(maxParts) 89 if size == -1 { 90 fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize) 91 } else { 92 parts = size / int64(o.fs.opt.ChunkSize) 93 if size%int64(o.fs.opt.ChunkSize) != 0 { 94 parts++ 95 } 96 if parts > maxParts { 97 return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts) 98 } 99 sha1SliceSize = parts 100 } 101 102 modTime := src.ModTime(ctx) 103 opts := rest.Opts{ 104 Method: "POST", 105 Path: "/b2_start_large_file", 106 } 107 bucket, bucketPath := o.split() 108 bucketID, err := f.getBucketID(ctx, bucket) 109 if err != nil { 110 return nil, err 111 } 112 var request = api.StartLargeFileRequest{ 113 BucketID: bucketID, 114 Name: f.opt.Enc.FromStandardPath(bucketPath), 115 ContentType: fs.MimeType(ctx, src), 116 Info: map[string]string{ 117 timeKey: timeString(modTime), 118 }, 119 } 120 // Set the SHA1 if known 121 if !o.fs.opt.DisableCheckSum { 122 if calculatedSha1, err := src.Hash(ctx, hash.SHA1); err == nil && calculatedSha1 != "" { 123 request.Info[sha1Key] = calculatedSha1 124 } 125 } 126 var response api.StartLargeFileResponse 127 err = f.pacer.Call(func() (bool, error) { 128 resp, err := f.srv.CallJSON(ctx, &opts, &request, &response) 129 return f.shouldRetry(ctx, resp, err) 130 }) 131 if err != nil { 132 return nil, err 133 } 134 // unwrap the accounting from the input, we use wrap to put it 135 // back on after the buffering 136 in, wrap := accounting.UnWrap(in) 137 up = &largeUpload{ 138 f: f, 139 o: o, 140 in: in, 141 wrap: wrap, 142 id: response.ID, 143 size: size, 144 parts: parts, 145 sha1s: make([]string, sha1SliceSize), 146 } 147 return up, nil 148 } 149 150 // getUploadURL returns the upload info with the UploadURL and the AuthorizationToken 151 // 152 // This should be returned with returnUploadURL when finished 153 func (up *largeUpload) getUploadURL(ctx context.Context) (upload *api.GetUploadPartURLResponse, err error) { 154 up.uploadMu.Lock() 155 defer up.uploadMu.Unlock() 156 if len(up.uploads) == 0 { 157 opts := rest.Opts{ 158 Method: "POST", 159 Path: "/b2_get_upload_part_url", 160 } 161 var request = api.GetUploadPartURLRequest{ 162 ID: up.id, 163 } 164 err := up.f.pacer.Call(func() (bool, error) { 165 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &upload) 166 return up.f.shouldRetry(ctx, resp, err) 167 }) 168 if err != nil { 169 return nil, errors.Wrap(err, "failed to get upload URL") 170 } 171 } else { 172 upload, up.uploads = up.uploads[0], up.uploads[1:] 173 } 174 return upload, nil 175 } 176 177 // returnUploadURL returns the UploadURL to the cache 178 func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) { 179 if upload == nil { 180 return 181 } 182 up.uploadMu.Lock() 183 up.uploads = append(up.uploads, upload) 184 up.uploadMu.Unlock() 185 } 186 187 // Transfer a chunk 188 func (up *largeUpload) transferChunk(ctx context.Context, part int64, body []byte) error { 189 err := up.f.pacer.Call(func() (bool, error) { 190 fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body)) 191 192 // Get upload URL 193 upload, err := up.getUploadURL(ctx) 194 if err != nil { 195 return false, err 196 } 197 198 in := newHashAppendingReader(bytes.NewReader(body), sha1.New()) 199 size := int64(len(body)) + int64(in.AdditionalLength()) 200 201 // Authorization 202 // 203 // An upload authorization token, from b2_get_upload_part_url. 204 // 205 // X-Bz-Part-Number 206 // 207 // A number from 1 to 10000. The parts uploaded for one file 208 // must have contiguous numbers, starting with 1. 209 // 210 // Content-Length 211 // 212 // The number of bytes in the file being uploaded. Note that 213 // this header is required; you cannot leave it out and just 214 // use chunked encoding. The minimum size of every part but 215 // the last one is 100MB. 216 // 217 // X-Bz-Content-Sha1 218 // 219 // The SHA1 checksum of the this part of the file. B2 will 220 // check this when the part is uploaded, to make sure that the 221 // data arrived correctly. The same SHA1 checksum must be 222 // passed to b2_finish_large_file. 223 opts := rest.Opts{ 224 Method: "POST", 225 RootURL: upload.UploadURL, 226 Body: up.wrap(in), 227 ExtraHeaders: map[string]string{ 228 "Authorization": upload.AuthorizationToken, 229 "X-Bz-Part-Number": fmt.Sprintf("%d", part), 230 sha1Header: "hex_digits_at_end", 231 }, 232 ContentLength: &size, 233 } 234 235 var response api.UploadPartResponse 236 237 resp, err := up.f.srv.CallJSON(ctx, &opts, nil, &response) 238 retry, err := up.f.shouldRetry(ctx, resp, err) 239 if err != nil { 240 fs.Debugf(up.o, "Error sending chunk %d (retry=%v): %v: %#v", part, retry, err, err) 241 } 242 // On retryable error clear PartUploadURL 243 if retry { 244 fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err) 245 upload = nil 246 } 247 up.returnUploadURL(upload) 248 up.sha1s[part-1] = in.HexSum() 249 return retry, err 250 }) 251 if err != nil { 252 fs.Debugf(up.o, "Error sending chunk %d: %v", part, err) 253 } else { 254 fs.Debugf(up.o, "Done sending chunk %d", part) 255 } 256 return err 257 } 258 259 // finish closes off the large upload 260 func (up *largeUpload) finish(ctx context.Context) error { 261 fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts) 262 opts := rest.Opts{ 263 Method: "POST", 264 Path: "/b2_finish_large_file", 265 } 266 var request = api.FinishLargeFileRequest{ 267 ID: up.id, 268 SHA1s: up.sha1s, 269 } 270 var response api.FileInfo 271 err := up.f.pacer.Call(func() (bool, error) { 272 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response) 273 return up.f.shouldRetry(ctx, resp, err) 274 }) 275 if err != nil { 276 return err 277 } 278 return up.o.decodeMetaDataFileInfo(&response) 279 } 280 281 // cancel aborts the large upload 282 func (up *largeUpload) cancel(ctx context.Context) error { 283 opts := rest.Opts{ 284 Method: "POST", 285 Path: "/b2_cancel_large_file", 286 } 287 var request = api.CancelLargeFileRequest{ 288 ID: up.id, 289 } 290 var response api.CancelLargeFileResponse 291 err := up.f.pacer.Call(func() (bool, error) { 292 resp, err := up.f.srv.CallJSON(ctx, &opts, &request, &response) 293 return up.f.shouldRetry(ctx, resp, err) 294 }) 295 return err 296 } 297 298 func (up *largeUpload) managedTransferChunk(ctx context.Context, wg *sync.WaitGroup, errs chan error, part int64, buf []byte) { 299 wg.Add(1) 300 go func(part int64, buf []byte) { 301 defer wg.Done() 302 defer up.f.putUploadBlock(buf) 303 err := up.transferChunk(ctx, part, buf) 304 if err != nil { 305 select { 306 case errs <- err: 307 default: 308 } 309 } 310 }(part, buf) 311 } 312 313 func (up *largeUpload) finishOrCancelOnError(ctx context.Context, err error, errs chan error) error { 314 if err == nil { 315 select { 316 case err = <-errs: 317 default: 318 } 319 } 320 if err != nil { 321 fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err) 322 cancelErr := up.cancel(ctx) 323 if cancelErr != nil { 324 fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr) 325 } 326 return err 327 } 328 return up.finish(ctx) 329 } 330 331 // Stream uploads the chunks from the input, starting with a required initial 332 // chunk. Assumes the file size is unknown and will upload until the input 333 // reaches EOF. 334 func (up *largeUpload) Stream(ctx context.Context, initialUploadBlock []byte) (err error) { 335 fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id) 336 errs := make(chan error, 1) 337 hasMoreParts := true 338 var wg sync.WaitGroup 339 340 // Transfer initial chunk 341 up.size = int64(len(initialUploadBlock)) 342 up.managedTransferChunk(ctx, &wg, errs, 1, initialUploadBlock) 343 344 outer: 345 for part := int64(2); hasMoreParts; part++ { 346 // Check any errors 347 select { 348 case err = <-errs: 349 break outer 350 default: 351 } 352 353 // Get a block of memory 354 buf := up.f.getUploadBlock() 355 356 // Read the chunk 357 var n int 358 n, err = io.ReadFull(up.in, buf) 359 if err == io.ErrUnexpectedEOF { 360 fs.Debugf(up.o, "Read less than a full chunk, making this the last one.") 361 buf = buf[:n] 362 hasMoreParts = false 363 err = nil 364 } else if err == io.EOF { 365 fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.") 366 up.f.putUploadBlock(buf) 367 err = nil 368 break outer 369 } else if err != nil { 370 // other kinds of errors indicate failure 371 up.f.putUploadBlock(buf) 372 break outer 373 } 374 375 // Keep stats up to date 376 up.parts = part 377 up.size += int64(n) 378 if part > maxParts { 379 err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts) 380 break outer 381 } 382 383 // Transfer the chunk 384 up.managedTransferChunk(ctx, &wg, errs, part, buf) 385 } 386 wg.Wait() 387 up.sha1s = up.sha1s[:up.parts] 388 389 return up.finishOrCancelOnError(ctx, err, errs) 390 } 391 392 // Upload uploads the chunks from the input 393 func (up *largeUpload) Upload(ctx context.Context) error { 394 fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id) 395 remaining := up.size 396 errs := make(chan error, 1) 397 var wg sync.WaitGroup 398 var err error 399 outer: 400 for part := int64(1); part <= up.parts; part++ { 401 // Check any errors 402 select { 403 case err = <-errs: 404 break outer 405 default: 406 } 407 408 reqSize := remaining 409 if reqSize >= int64(up.f.opt.ChunkSize) { 410 reqSize = int64(up.f.opt.ChunkSize) 411 } 412 413 // Get a block of memory 414 buf := up.f.getUploadBlock()[:reqSize] 415 416 // Read the chunk 417 _, err = io.ReadFull(up.in, buf) 418 if err != nil { 419 up.f.putUploadBlock(buf) 420 break outer 421 } 422 423 // Transfer the chunk 424 up.managedTransferChunk(ctx, &wg, errs, part, buf) 425 remaining -= reqSize 426 } 427 wg.Wait() 428 429 return up.finishOrCancelOnError(ctx, err, errs) 430 }