github.com/aliyun/aliyun-oss-go-sdk@v3.0.2+incompatible/oss/multicopy.go (about) 1 package oss 2 3 import ( 4 "crypto/md5" 5 "encoding/base64" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "io/ioutil" 10 "net/http" 11 "os" 12 "strconv" 13 ) 14 15 // CopyFile is multipart copy object 16 // 17 // srcBucketName source bucket name 18 // srcObjectKey source object name 19 // destObjectKey target object name in the form of bucketname.objectkey 20 // partSize the part size in byte. 21 // options object's contraints. Check out function InitiateMultipartUpload. 22 // 23 // error it's nil if the operation succeeds, otherwise it's an error object. 24 // 25 func (bucket Bucket) CopyFile(srcBucketName, srcObjectKey, destObjectKey string, partSize int64, options ...Option) error { 26 destBucketName := bucket.BucketName 27 if partSize < MinPartSize || partSize > MaxPartSize { 28 return errors.New("oss: part size invalid range (1024KB, 5GB]") 29 } 30 31 cpConf := getCpConfig(options) 32 routines := getRoutines(options) 33 34 var strVersionId string 35 versionId, _ := FindOption(options, "versionId", nil) 36 if versionId != nil { 37 strVersionId = versionId.(string) 38 } 39 40 if cpConf != nil && cpConf.IsEnable { 41 cpFilePath := getCopyCpFilePath(cpConf, srcBucketName, srcObjectKey, destBucketName, destObjectKey, strVersionId) 42 if cpFilePath != "" { 43 return bucket.copyFileWithCp(srcBucketName, srcObjectKey, destBucketName, destObjectKey, partSize, options, cpFilePath, routines) 44 } 45 } 46 47 return bucket.copyFile(srcBucketName, srcObjectKey, destBucketName, destObjectKey, 48 partSize, options, routines) 49 } 50 51 func getCopyCpFilePath(cpConf *cpConfig, srcBucket, srcObject, destBucket, destObject, versionId string) string { 52 if cpConf.FilePath == "" && cpConf.DirPath != "" { 53 dest := fmt.Sprintf("oss://%v/%v", destBucket, destObject) 54 src := fmt.Sprintf("oss://%v/%v", srcBucket, srcObject) 55 cpFileName := getCpFileName(src, dest, versionId) 56 cpConf.FilePath = cpConf.DirPath + string(os.PathSeparator) + cpFileName 57 } 58 return cpConf.FilePath 59 } 60 61 // ----- Concurrently copy without checkpoint --------- 62 63 // copyWorkerArg defines the copy worker arguments 64 type copyWorkerArg struct { 65 bucket *Bucket 66 imur InitiateMultipartUploadResult 67 srcBucketName string 68 srcObjectKey string 69 options []Option 70 hook copyPartHook 71 } 72 73 // copyPartHook is the hook for testing purpose 74 type copyPartHook func(part copyPart) error 75 76 var copyPartHooker copyPartHook = defaultCopyPartHook 77 78 func defaultCopyPartHook(part copyPart) error { 79 return nil 80 } 81 82 // copyWorker copies worker 83 func copyWorker(id int, arg copyWorkerArg, jobs <-chan copyPart, results chan<- UploadPart, failed chan<- error, die <-chan bool) { 84 for chunk := range jobs { 85 if err := arg.hook(chunk); err != nil { 86 failed <- err 87 break 88 } 89 chunkSize := chunk.End - chunk.Start + 1 90 part, err := arg.bucket.UploadPartCopy(arg.imur, arg.srcBucketName, arg.srcObjectKey, 91 chunk.Start, chunkSize, chunk.Number, arg.options...) 92 if err != nil { 93 failed <- err 94 break 95 } 96 select { 97 case <-die: 98 return 99 default: 100 } 101 results <- part 102 } 103 } 104 105 // copyScheduler 106 func copyScheduler(jobs chan copyPart, parts []copyPart) { 107 for _, part := range parts { 108 jobs <- part 109 } 110 close(jobs) 111 } 112 113 // copyPart structure 114 type copyPart struct { 115 Number int // Part number (from 1 to 10,000) 116 Start int64 // The start index in the source file. 117 End int64 // The end index in the source file 118 } 119 120 // getCopyParts calculates copy parts 121 func getCopyParts(objectSize, partSize int64) []copyPart { 122 parts := []copyPart{} 123 part := copyPart{} 124 i := 0 125 for offset := int64(0); offset < objectSize; offset += partSize { 126 part.Number = i + 1 127 part.Start = offset 128 part.End = GetPartEnd(offset, objectSize, partSize) 129 parts = append(parts, part) 130 i++ 131 } 132 return parts 133 } 134 135 // getSrcObjectBytes gets the source file size 136 func getSrcObjectBytes(parts []copyPart) int64 { 137 var ob int64 138 for _, part := range parts { 139 ob += (part.End - part.Start + 1) 140 } 141 return ob 142 } 143 144 // copyFile is a concurrently copy without checkpoint 145 func (bucket Bucket) copyFile(srcBucketName, srcObjectKey, destBucketName, destObjectKey string, 146 partSize int64, options []Option, routines int) error { 147 descBucket, err := bucket.Client.Bucket(destBucketName) 148 srcBucket, err := bucket.Client.Bucket(srcBucketName) 149 listener := GetProgressListener(options) 150 151 // choice valid options 152 headerOptions := ChoiceHeadObjectOption(options) 153 partOptions := ChoiceTransferPartOption(options) 154 completeOptions := ChoiceCompletePartOption(options) 155 abortOptions := ChoiceAbortPartOption(options) 156 157 meta, err := srcBucket.GetObjectDetailedMeta(srcObjectKey, headerOptions...) 158 if err != nil { 159 return err 160 } 161 162 objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 0) 163 if err != nil { 164 return err 165 } 166 167 // Get copy parts 168 parts := getCopyParts(objectSize, partSize) 169 // Initialize the multipart upload 170 imur, err := descBucket.InitiateMultipartUpload(destObjectKey, options...) 171 if err != nil { 172 return err 173 } 174 175 jobs := make(chan copyPart, len(parts)) 176 results := make(chan UploadPart, len(parts)) 177 failed := make(chan error) 178 die := make(chan bool) 179 180 var completedBytes int64 181 totalBytes := getSrcObjectBytes(parts) 182 event := newProgressEvent(TransferStartedEvent, 0, totalBytes, 0) 183 publishProgress(listener, event) 184 185 // Start to copy workers 186 arg := copyWorkerArg{descBucket, imur, srcBucketName, srcObjectKey, partOptions, copyPartHooker} 187 for w := 1; w <= routines; w++ { 188 go copyWorker(w, arg, jobs, results, failed, die) 189 } 190 191 // Start the scheduler 192 go copyScheduler(jobs, parts) 193 194 // Wait for the parts finished. 195 completed := 0 196 ups := make([]UploadPart, len(parts)) 197 for completed < len(parts) { 198 select { 199 case part := <-results: 200 completed++ 201 ups[part.PartNumber-1] = part 202 copyBytes := (parts[part.PartNumber-1].End - parts[part.PartNumber-1].Start + 1) 203 completedBytes += copyBytes 204 event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes, copyBytes) 205 publishProgress(listener, event) 206 case err := <-failed: 207 close(die) 208 descBucket.AbortMultipartUpload(imur, abortOptions...) 209 event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes, 0) 210 publishProgress(listener, event) 211 return err 212 } 213 214 if completed >= len(parts) { 215 break 216 } 217 } 218 219 event = newProgressEvent(TransferCompletedEvent, completedBytes, totalBytes, 0) 220 publishProgress(listener, event) 221 222 // Complete the multipart upload 223 _, err = descBucket.CompleteMultipartUpload(imur, ups, completeOptions...) 224 if err != nil { 225 bucket.AbortMultipartUpload(imur, abortOptions...) 226 return err 227 } 228 return nil 229 } 230 231 // ----- Concurrently copy with checkpoint ----- 232 233 const copyCpMagic = "84F1F18C-FF1D-403B-A1D8-9DEB5F65910A" 234 235 type copyCheckpoint struct { 236 Magic string // Magic 237 MD5 string // CP content MD5 238 SrcBucketName string // Source bucket 239 SrcObjectKey string // Source object 240 DestBucketName string // Target bucket 241 DestObjectKey string // Target object 242 CopyID string // Copy ID 243 ObjStat objectStat // Object stat 244 Parts []copyPart // Copy parts 245 CopyParts []UploadPart // The uploaded parts 246 PartStat []bool // The part status 247 } 248 249 // isValid checks if the data is valid which means CP is valid and object is not updated. 250 func (cp copyCheckpoint) isValid(meta http.Header) (bool, error) { 251 // Compare CP's magic number and the MD5. 252 cpb := cp 253 cpb.MD5 = "" 254 js, _ := json.Marshal(cpb) 255 sum := md5.Sum(js) 256 b64 := base64.StdEncoding.EncodeToString(sum[:]) 257 258 if cp.Magic != downloadCpMagic || b64 != cp.MD5 { 259 return false, nil 260 } 261 262 objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 64) 263 if err != nil { 264 return false, err 265 } 266 267 // Compare the object size and last modified time and etag. 268 if cp.ObjStat.Size != objectSize || 269 cp.ObjStat.LastModified != meta.Get(HTTPHeaderLastModified) || 270 cp.ObjStat.Etag != meta.Get(HTTPHeaderEtag) { 271 return false, nil 272 } 273 274 return true, nil 275 } 276 277 // load loads from the checkpoint file 278 func (cp *copyCheckpoint) load(filePath string) error { 279 contents, err := ioutil.ReadFile(filePath) 280 if err != nil { 281 return err 282 } 283 284 err = json.Unmarshal(contents, cp) 285 return err 286 } 287 288 // update updates the parts status 289 func (cp *copyCheckpoint) update(part UploadPart) { 290 cp.CopyParts[part.PartNumber-1] = part 291 cp.PartStat[part.PartNumber-1] = true 292 } 293 294 // dump dumps the CP to the file 295 func (cp *copyCheckpoint) dump(filePath string) error { 296 bcp := *cp 297 298 // Calculate MD5 299 bcp.MD5 = "" 300 js, err := json.Marshal(bcp) 301 if err != nil { 302 return err 303 } 304 sum := md5.Sum(js) 305 b64 := base64.StdEncoding.EncodeToString(sum[:]) 306 bcp.MD5 = b64 307 308 // Serialization 309 js, err = json.Marshal(bcp) 310 if err != nil { 311 return err 312 } 313 314 // Dump 315 return ioutil.WriteFile(filePath, js, FilePermMode) 316 } 317 318 // todoParts returns unfinished parts 319 func (cp copyCheckpoint) todoParts() []copyPart { 320 dps := []copyPart{} 321 for i, ps := range cp.PartStat { 322 if !ps { 323 dps = append(dps, cp.Parts[i]) 324 } 325 } 326 return dps 327 } 328 329 // getCompletedBytes returns finished bytes count 330 func (cp copyCheckpoint) getCompletedBytes() int64 { 331 var completedBytes int64 332 for i, part := range cp.Parts { 333 if cp.PartStat[i] { 334 completedBytes += (part.End - part.Start + 1) 335 } 336 } 337 return completedBytes 338 } 339 340 // prepare initializes the multipart upload 341 func (cp *copyCheckpoint) prepare(meta http.Header, srcBucket *Bucket, srcObjectKey string, destBucket *Bucket, destObjectKey string, 342 partSize int64, options []Option) error { 343 // CP 344 cp.Magic = copyCpMagic 345 cp.SrcBucketName = srcBucket.BucketName 346 cp.SrcObjectKey = srcObjectKey 347 cp.DestBucketName = destBucket.BucketName 348 cp.DestObjectKey = destObjectKey 349 350 objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 64) 351 if err != nil { 352 return err 353 } 354 355 cp.ObjStat.Size = objectSize 356 cp.ObjStat.LastModified = meta.Get(HTTPHeaderLastModified) 357 cp.ObjStat.Etag = meta.Get(HTTPHeaderEtag) 358 359 // Parts 360 cp.Parts = getCopyParts(objectSize, partSize) 361 cp.PartStat = make([]bool, len(cp.Parts)) 362 for i := range cp.PartStat { 363 cp.PartStat[i] = false 364 } 365 cp.CopyParts = make([]UploadPart, len(cp.Parts)) 366 367 // Init copy 368 imur, err := destBucket.InitiateMultipartUpload(destObjectKey, options...) 369 if err != nil { 370 return err 371 } 372 cp.CopyID = imur.UploadID 373 374 return nil 375 } 376 377 func (cp *copyCheckpoint) complete(bucket *Bucket, parts []UploadPart, cpFilePath string, options []Option) error { 378 imur := InitiateMultipartUploadResult{Bucket: cp.DestBucketName, 379 Key: cp.DestObjectKey, UploadID: cp.CopyID} 380 _, err := bucket.CompleteMultipartUpload(imur, parts, options...) 381 if err != nil { 382 return err 383 } 384 os.Remove(cpFilePath) 385 return err 386 } 387 388 // copyFileWithCp is concurrently copy with checkpoint 389 func (bucket Bucket) copyFileWithCp(srcBucketName, srcObjectKey, destBucketName, destObjectKey string, 390 partSize int64, options []Option, cpFilePath string, routines int) error { 391 descBucket, err := bucket.Client.Bucket(destBucketName) 392 srcBucket, err := bucket.Client.Bucket(srcBucketName) 393 listener := GetProgressListener(options) 394 395 // Load CP data 396 ccp := copyCheckpoint{} 397 err = ccp.load(cpFilePath) 398 if err != nil { 399 os.Remove(cpFilePath) 400 } 401 402 // choice valid options 403 headerOptions := ChoiceHeadObjectOption(options) 404 partOptions := ChoiceTransferPartOption(options) 405 completeOptions := ChoiceCompletePartOption(options) 406 407 meta, err := srcBucket.GetObjectDetailedMeta(srcObjectKey, headerOptions...) 408 if err != nil { 409 return err 410 } 411 412 // Load error or the CP data is invalid---reinitialize 413 valid, err := ccp.isValid(meta) 414 if err != nil || !valid { 415 if err = ccp.prepare(meta, srcBucket, srcObjectKey, descBucket, destObjectKey, partSize, options); err != nil { 416 return err 417 } 418 os.Remove(cpFilePath) 419 } 420 421 // Unfinished parts 422 parts := ccp.todoParts() 423 imur := InitiateMultipartUploadResult{ 424 Bucket: destBucketName, 425 Key: destObjectKey, 426 UploadID: ccp.CopyID} 427 428 jobs := make(chan copyPart, len(parts)) 429 results := make(chan UploadPart, len(parts)) 430 failed := make(chan error) 431 die := make(chan bool) 432 433 completedBytes := ccp.getCompletedBytes() 434 event := newProgressEvent(TransferStartedEvent, completedBytes, ccp.ObjStat.Size, 0) 435 publishProgress(listener, event) 436 437 // Start the worker coroutines 438 arg := copyWorkerArg{descBucket, imur, srcBucketName, srcObjectKey, partOptions, copyPartHooker} 439 for w := 1; w <= routines; w++ { 440 go copyWorker(w, arg, jobs, results, failed, die) 441 } 442 443 // Start the scheduler 444 go copyScheduler(jobs, parts) 445 446 // Wait for the parts completed. 447 completed := 0 448 for completed < len(parts) { 449 select { 450 case part := <-results: 451 completed++ 452 ccp.update(part) 453 ccp.dump(cpFilePath) 454 copyBytes := (parts[part.PartNumber-1].End - parts[part.PartNumber-1].Start + 1) 455 completedBytes += copyBytes 456 event = newProgressEvent(TransferDataEvent, completedBytes, ccp.ObjStat.Size, copyBytes) 457 publishProgress(listener, event) 458 case err := <-failed: 459 close(die) 460 event = newProgressEvent(TransferFailedEvent, completedBytes, ccp.ObjStat.Size, 0) 461 publishProgress(listener, event) 462 return err 463 } 464 465 if completed >= len(parts) { 466 break 467 } 468 } 469 470 event = newProgressEvent(TransferCompletedEvent, completedBytes, ccp.ObjStat.Size, 0) 471 publishProgress(listener, event) 472 473 return ccp.complete(descBucket, ccp.CopyParts, cpFilePath, completeOptions) 474 }