github.com/aliyun/aliyun-oss-go-sdk@v3.0.2+incompatible/oss/upload.go (about) 1 package oss 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "encoding/base64" 7 "encoding/hex" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "io/ioutil" 12 "net/http" 13 "os" 14 "path/filepath" 15 "time" 16 ) 17 18 // UploadFile is multipart file upload. 19 // 20 // objectKey the object name. 21 // filePath the local file path to upload. 22 // partSize the part size in byte. 23 // options the options for uploading object. 24 // 25 // error it's nil if the operation succeeds, otherwise it's an error object. 26 // 27 func (bucket Bucket) UploadFile(objectKey, filePath string, partSize int64, options ...Option) error { 28 if partSize < MinPartSize || partSize > MaxPartSize { 29 return errors.New("oss: part size invalid range (100KB, 5GB]") 30 } 31 32 cpConf := getCpConfig(options) 33 routines := getRoutines(options) 34 35 if cpConf != nil && cpConf.IsEnable { 36 cpFilePath := getUploadCpFilePath(cpConf, filePath, bucket.BucketName, objectKey) 37 if cpFilePath != "" { 38 return bucket.uploadFileWithCp(objectKey, filePath, partSize, options, cpFilePath, routines) 39 } 40 } 41 42 return bucket.uploadFile(objectKey, filePath, partSize, options, routines) 43 } 44 45 func getUploadCpFilePath(cpConf *cpConfig, srcFile, destBucket, destObject string) string { 46 if cpConf.FilePath == "" && cpConf.DirPath != "" { 47 dest := fmt.Sprintf("oss://%v/%v", destBucket, destObject) 48 absPath, _ := filepath.Abs(srcFile) 49 cpFileName := getCpFileName(absPath, dest, "") 50 cpConf.FilePath = cpConf.DirPath + string(os.PathSeparator) + cpFileName 51 } 52 return cpConf.FilePath 53 } 54 55 // ----- concurrent upload without checkpoint ----- 56 57 // getCpConfig gets checkpoint configuration 58 func getCpConfig(options []Option) *cpConfig { 59 cpcOpt, err := FindOption(options, checkpointConfig, nil) 60 if err != nil || cpcOpt == nil { 61 return nil 62 } 63 64 return cpcOpt.(*cpConfig) 65 } 66 67 // getCpFileName return the name of the checkpoint file 68 func getCpFileName(src, dest, versionId string) string { 69 md5Ctx := md5.New() 70 md5Ctx.Write([]byte(src)) 71 srcCheckSum := hex.EncodeToString(md5Ctx.Sum(nil)) 72 73 md5Ctx.Reset() 74 md5Ctx.Write([]byte(dest)) 75 destCheckSum := hex.EncodeToString(md5Ctx.Sum(nil)) 76 77 if versionId == "" { 78 return fmt.Sprintf("%v-%v.cp", srcCheckSum, destCheckSum) 79 } 80 81 md5Ctx.Reset() 82 md5Ctx.Write([]byte(versionId)) 83 versionCheckSum := hex.EncodeToString(md5Ctx.Sum(nil)) 84 return fmt.Sprintf("%v-%v-%v.cp", srcCheckSum, destCheckSum, versionCheckSum) 85 } 86 87 // getRoutines gets the routine count. by default it's 1. 88 func getRoutines(options []Option) int { 89 rtnOpt, err := FindOption(options, routineNum, nil) 90 if err != nil || rtnOpt == nil { 91 return 1 92 } 93 94 rs := rtnOpt.(int) 95 if rs < 1 { 96 rs = 1 97 } else if rs > 100 { 98 rs = 100 99 } 100 101 return rs 102 } 103 104 // getPayer return the payer of the request 105 func getPayer(options []Option) string { 106 payerOpt, err := FindOption(options, HTTPHeaderOssRequester, nil) 107 if err != nil || payerOpt == nil { 108 return "" 109 } 110 return payerOpt.(string) 111 } 112 113 // GetProgressListener gets the progress callback 114 func GetProgressListener(options []Option) ProgressListener { 115 isSet, listener, _ := IsOptionSet(options, progressListener) 116 if !isSet { 117 return nil 118 } 119 return listener.(ProgressListener) 120 } 121 122 // uploadPartHook is for testing usage 123 type uploadPartHook func(id int, chunk FileChunk) error 124 125 var uploadPartHooker uploadPartHook = defaultUploadPart 126 127 func defaultUploadPart(id int, chunk FileChunk) error { 128 return nil 129 } 130 131 // workerArg defines worker argument structure 132 type workerArg struct { 133 bucket *Bucket 134 filePath string 135 imur InitiateMultipartUploadResult 136 options []Option 137 hook uploadPartHook 138 } 139 140 // worker is the worker coroutine function 141 type defaultUploadProgressListener struct { 142 } 143 144 // ProgressChanged no-ops 145 func (listener *defaultUploadProgressListener) ProgressChanged(event *ProgressEvent) { 146 } 147 148 func worker(id int, arg workerArg, jobs <-chan FileChunk, results chan<- UploadPart, failed chan<- error, die <-chan bool) { 149 for chunk := range jobs { 150 if err := arg.hook(id, chunk); err != nil { 151 failed <- err 152 break 153 } 154 var respHeader http.Header 155 p := Progress(&defaultUploadProgressListener{}) 156 opts := make([]Option, len(arg.options)+2) 157 opts = append(opts, arg.options...) 158 159 // use defaultUploadProgressListener 160 opts = append(opts, p, GetResponseHeader(&respHeader)) 161 162 startT := time.Now().UnixNano() / 1000 / 1000 / 1000 163 part, err := arg.bucket.UploadPartFromFile(arg.imur, arg.filePath, chunk.Offset, chunk.Size, chunk.Number, opts...) 164 endT := time.Now().UnixNano() / 1000 / 1000 / 1000 165 if err != nil { 166 arg.bucket.Client.Config.WriteLog(Debug, "upload part error,cost:%d second,part number:%d,request id:%s,error:%s\n", endT-startT, chunk.Number, GetRequestId(respHeader), err.Error()) 167 failed <- err 168 break 169 } 170 select { 171 case <-die: 172 return 173 default: 174 } 175 results <- part 176 } 177 } 178 179 // scheduler function 180 func scheduler(jobs chan FileChunk, chunks []FileChunk) { 181 for _, chunk := range chunks { 182 jobs <- chunk 183 } 184 close(jobs) 185 } 186 187 func getTotalBytes(chunks []FileChunk) int64 { 188 var tb int64 189 for _, chunk := range chunks { 190 tb += chunk.Size 191 } 192 return tb 193 } 194 195 // uploadFile is a concurrent upload, without checkpoint 196 func (bucket Bucket) uploadFile(objectKey, filePath string, partSize int64, options []Option, routines int) error { 197 listener := GetProgressListener(options) 198 199 chunks, err := SplitFileByPartSize(filePath, partSize) 200 if err != nil { 201 return err 202 } 203 204 partOptions := ChoiceTransferPartOption(options) 205 completeOptions := ChoiceCompletePartOption(options) 206 abortOptions := ChoiceAbortPartOption(options) 207 208 // Initialize the multipart upload 209 imur, err := bucket.InitiateMultipartUpload(objectKey, options...) 210 if err != nil { 211 return err 212 } 213 214 jobs := make(chan FileChunk, len(chunks)) 215 results := make(chan UploadPart, len(chunks)) 216 failed := make(chan error) 217 die := make(chan bool) 218 219 var completedBytes int64 220 totalBytes := getTotalBytes(chunks) 221 event := newProgressEvent(TransferStartedEvent, 0, totalBytes, 0) 222 publishProgress(listener, event) 223 224 // Start the worker coroutine 225 arg := workerArg{&bucket, filePath, imur, partOptions, uploadPartHooker} 226 for w := 1; w <= routines; w++ { 227 go worker(w, arg, jobs, results, failed, die) 228 } 229 230 // Schedule the jobs 231 go scheduler(jobs, chunks) 232 233 // Waiting for the upload finished 234 completed := 0 235 parts := make([]UploadPart, len(chunks)) 236 for completed < len(chunks) { 237 select { 238 case part := <-results: 239 completed++ 240 parts[part.PartNumber-1] = part 241 completedBytes += chunks[part.PartNumber-1].Size 242 243 // why RwBytes in ProgressEvent is 0 ? 244 // because read or write event has been notified in teeReader.Read() 245 event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes, chunks[part.PartNumber-1].Size) 246 publishProgress(listener, event) 247 case err := <-failed: 248 close(die) 249 event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes, 0) 250 publishProgress(listener, event) 251 bucket.AbortMultipartUpload(imur, abortOptions...) 252 return err 253 } 254 255 if completed >= len(chunks) { 256 break 257 } 258 } 259 260 event = newProgressEvent(TransferCompletedEvent, completedBytes, totalBytes, 0) 261 publishProgress(listener, event) 262 263 // Complete the multpart upload 264 _, err = bucket.CompleteMultipartUpload(imur, parts, completeOptions...) 265 if err != nil { 266 bucket.AbortMultipartUpload(imur, abortOptions...) 267 return err 268 } 269 return nil 270 } 271 272 // ----- concurrent upload with checkpoint ----- 273 const uploadCpMagic = "FE8BB4EA-B593-4FAC-AD7A-2459A36E2E62" 274 275 type uploadCheckpoint struct { 276 Magic string // Magic 277 MD5 string // Checkpoint file content's MD5 278 FilePath string // Local file path 279 FileStat cpStat // File state 280 ObjectKey string // Key 281 UploadID string // Upload ID 282 Parts []cpPart // All parts of the local file 283 CallbackVal string 284 CallbackBody *[]byte 285 } 286 287 type cpStat struct { 288 Size int64 // File size 289 LastModified time.Time // File's last modified time 290 MD5 string // Local file's MD5 291 } 292 293 type cpPart struct { 294 Chunk FileChunk // File chunk 295 Part UploadPart // Uploaded part 296 IsCompleted bool // Upload complete flag 297 } 298 299 // isValid checks if the uploaded data is valid---it's valid when the file is not updated and the checkpoint data is valid. 300 func (cp uploadCheckpoint) isValid(filePath string,options []Option) (bool, error) { 301 302 callbackVal, _ := FindOption(options, HTTPHeaderOssCallback, "") 303 if callbackVal != "" && cp.CallbackVal != callbackVal { 304 return false, nil 305 } 306 callbackBody, _ := FindOption(options, responseBody, nil) 307 if callbackBody != nil{ 308 body, _ := json.Marshal(callbackBody) 309 if bytes.Equal(*cp.CallbackBody, body) { 310 return false, nil 311 } 312 } 313 // Compare the CP's magic number and MD5. 314 cpb := cp 315 cpb.MD5 = "" 316 js, _ := json.Marshal(cpb) 317 sum := md5.Sum(js) 318 b64 := base64.StdEncoding.EncodeToString(sum[:]) 319 320 if cp.Magic != uploadCpMagic || b64 != cp.MD5 { 321 return false, nil 322 } 323 324 // Make sure if the local file is updated. 325 fd, err := os.Open(filePath) 326 if err != nil { 327 return false, err 328 } 329 defer fd.Close() 330 331 st, err := fd.Stat() 332 if err != nil { 333 return false, err 334 } 335 336 md, err := calcFileMD5(filePath) 337 if err != nil { 338 return false, err 339 } 340 341 // Compare the file size, file's last modified time and file's MD5 342 if cp.FileStat.Size != st.Size() || 343 !cp.FileStat.LastModified.Equal(st.ModTime()) || 344 cp.FileStat.MD5 != md { 345 return false, nil 346 } 347 348 return true, nil 349 } 350 351 // load loads from the file 352 func (cp *uploadCheckpoint) load(filePath string) error { 353 contents, err := ioutil.ReadFile(filePath) 354 if err != nil { 355 return err 356 } 357 358 err = json.Unmarshal(contents, cp) 359 return err 360 } 361 362 // dump dumps to the local file 363 func (cp *uploadCheckpoint) dump(filePath string) error { 364 bcp := *cp 365 366 // Calculate MD5 367 bcp.MD5 = "" 368 js, err := json.Marshal(bcp) 369 if err != nil { 370 return err 371 } 372 sum := md5.Sum(js) 373 b64 := base64.StdEncoding.EncodeToString(sum[:]) 374 bcp.MD5 = b64 375 376 // Serialization 377 js, err = json.Marshal(bcp) 378 if err != nil { 379 return err 380 } 381 382 // Dump 383 return ioutil.WriteFile(filePath, js, FilePermMode) 384 } 385 386 // updatePart updates the part status 387 func (cp *uploadCheckpoint) updatePart(part UploadPart) { 388 cp.Parts[part.PartNumber-1].Part = part 389 cp.Parts[part.PartNumber-1].IsCompleted = true 390 } 391 392 // todoParts returns unfinished parts 393 func (cp *uploadCheckpoint) todoParts() []FileChunk { 394 fcs := []FileChunk{} 395 for _, part := range cp.Parts { 396 if !part.IsCompleted { 397 fcs = append(fcs, part.Chunk) 398 } 399 } 400 return fcs 401 } 402 403 // allParts returns all parts 404 func (cp *uploadCheckpoint) allParts() []UploadPart { 405 ps := []UploadPart{} 406 for _, part := range cp.Parts { 407 ps = append(ps, part.Part) 408 } 409 return ps 410 } 411 412 // getCompletedBytes returns completed bytes count 413 func (cp *uploadCheckpoint) getCompletedBytes() int64 { 414 var completedBytes int64 415 for _, part := range cp.Parts { 416 if part.IsCompleted { 417 completedBytes += part.Chunk.Size 418 } 419 } 420 return completedBytes 421 } 422 423 // calcFileMD5 calculates the MD5 for the specified local file 424 func calcFileMD5(filePath string) (string, error) { 425 return "", nil 426 } 427 428 // prepare initializes the multipart upload 429 func prepare(cp *uploadCheckpoint, objectKey, filePath string, partSize int64, bucket *Bucket, options []Option) error { 430 // CP 431 cp.Magic = uploadCpMagic 432 cp.FilePath = filePath 433 cp.ObjectKey = objectKey 434 435 // Local file 436 fd, err := os.Open(filePath) 437 if err != nil { 438 return err 439 } 440 defer fd.Close() 441 442 st, err := fd.Stat() 443 if err != nil { 444 return err 445 } 446 cp.FileStat.Size = st.Size() 447 cp.FileStat.LastModified = st.ModTime() 448 callbackVal, _ := FindOption(options, HTTPHeaderOssCallback, "") 449 cp.CallbackVal = callbackVal.(string) 450 callbackBody, _ := FindOption(options, responseBody, nil) 451 if callbackBody != nil { 452 body, _ := json.Marshal(callbackBody) 453 cp.CallbackBody = &body 454 } 455 md, err := calcFileMD5(filePath) 456 if err != nil { 457 return err 458 } 459 cp.FileStat.MD5 = md 460 461 // Chunks 462 parts, err := SplitFileByPartSize(filePath, partSize) 463 if err != nil { 464 return err 465 } 466 467 cp.Parts = make([]cpPart, len(parts)) 468 for i, part := range parts { 469 cp.Parts[i].Chunk = part 470 cp.Parts[i].IsCompleted = false 471 } 472 473 // Init load 474 imur, err := bucket.InitiateMultipartUpload(objectKey, options...) 475 if err != nil { 476 return err 477 } 478 cp.UploadID = imur.UploadID 479 480 return nil 481 } 482 483 // complete completes the multipart upload and deletes the local CP files 484 func complete(cp *uploadCheckpoint, bucket *Bucket, parts []UploadPart, cpFilePath string, options []Option) error { 485 imur := InitiateMultipartUploadResult{Bucket: bucket.BucketName, 486 Key: cp.ObjectKey, UploadID: cp.UploadID} 487 488 _, err := bucket.CompleteMultipartUpload(imur, parts, options...) 489 if err != nil { 490 if e, ok := err.(ServiceError);ok && (e.StatusCode == 203 || e.StatusCode == 404) { 491 os.Remove(cpFilePath) 492 } 493 return err 494 } 495 os.Remove(cpFilePath) 496 return err 497 } 498 499 // uploadFileWithCp handles concurrent upload with checkpoint 500 func (bucket Bucket) uploadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int) error { 501 listener := GetProgressListener(options) 502 503 partOptions := ChoiceTransferPartOption(options) 504 completeOptions := ChoiceCompletePartOption(options) 505 506 // Load CP data 507 ucp := uploadCheckpoint{} 508 err := ucp.load(cpFilePath) 509 if err != nil { 510 os.Remove(cpFilePath) 511 } 512 513 // Load error or the CP data is invalid. 514 valid, err := ucp.isValid(filePath,options) 515 if err != nil || !valid { 516 if err = prepare(&ucp, objectKey, filePath, partSize, &bucket, options); err != nil { 517 return err 518 } 519 os.Remove(cpFilePath) 520 } 521 522 chunks := ucp.todoParts() 523 imur := InitiateMultipartUploadResult{ 524 Bucket: bucket.BucketName, 525 Key: objectKey, 526 UploadID: ucp.UploadID} 527 528 jobs := make(chan FileChunk, len(chunks)) 529 results := make(chan UploadPart, len(chunks)) 530 failed := make(chan error) 531 die := make(chan bool) 532 533 completedBytes := ucp.getCompletedBytes() 534 535 // why RwBytes in ProgressEvent is 0 ? 536 // because read or write event has been notified in teeReader.Read() 537 event := newProgressEvent(TransferStartedEvent, completedBytes, ucp.FileStat.Size, 0) 538 publishProgress(listener, event) 539 540 // Start the workers 541 arg := workerArg{&bucket, filePath, imur, partOptions, uploadPartHooker} 542 for w := 1; w <= routines; w++ { 543 go worker(w, arg, jobs, results, failed, die) 544 } 545 546 // Schedule jobs 547 go scheduler(jobs, chunks) 548 549 // Waiting for the job finished 550 completed := 0 551 for completed < len(chunks) { 552 select { 553 case part := <-results: 554 completed++ 555 ucp.updatePart(part) 556 ucp.dump(cpFilePath) 557 completedBytes += ucp.Parts[part.PartNumber-1].Chunk.Size 558 event = newProgressEvent(TransferDataEvent, completedBytes, ucp.FileStat.Size, ucp.Parts[part.PartNumber-1].Chunk.Size) 559 publishProgress(listener, event) 560 case err := <-failed: 561 close(die) 562 event = newProgressEvent(TransferFailedEvent, completedBytes, ucp.FileStat.Size, 0) 563 publishProgress(listener, event) 564 return err 565 } 566 567 if completed >= len(chunks) { 568 break 569 } 570 } 571 572 event = newProgressEvent(TransferCompletedEvent, completedBytes, ucp.FileStat.Size, 0) 573 publishProgress(listener, event) 574 575 // Complete the multipart upload 576 err = complete(&ucp, &bucket, ucp.allParts(), cpFilePath, completeOptions) 577 return err 578 }