github.com/aliyun/aliyun-oss-go-sdk@v3.0.2+incompatible/oss/upload.go (about)

     1  package oss
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"encoding/base64"
     7  	"encoding/hex"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"io/ioutil"
    12  	"net/http"
    13  	"os"
    14  	"path/filepath"
    15  	"time"
    16  )
    17  
    18  // UploadFile is multipart file upload.
    19  //
    20  // objectKey    the object name.
    21  // filePath    the local file path to upload.
    22  // partSize    the part size in byte.
    23  // options    the options for uploading object.
    24  //
    25  // error    it's nil if the operation succeeds, otherwise it's an error object.
    26  //
    27  func (bucket Bucket) UploadFile(objectKey, filePath string, partSize int64, options ...Option) error {
    28  	if partSize < MinPartSize || partSize > MaxPartSize {
    29  		return errors.New("oss: part size invalid range (100KB, 5GB]")
    30  	}
    31  
    32  	cpConf := getCpConfig(options)
    33  	routines := getRoutines(options)
    34  
    35  	if cpConf != nil && cpConf.IsEnable {
    36  		cpFilePath := getUploadCpFilePath(cpConf, filePath, bucket.BucketName, objectKey)
    37  		if cpFilePath != "" {
    38  			return bucket.uploadFileWithCp(objectKey, filePath, partSize, options, cpFilePath, routines)
    39  		}
    40  	}
    41  
    42  	return bucket.uploadFile(objectKey, filePath, partSize, options, routines)
    43  }
    44  
    45  func getUploadCpFilePath(cpConf *cpConfig, srcFile, destBucket, destObject string) string {
    46  	if cpConf.FilePath == "" && cpConf.DirPath != "" {
    47  		dest := fmt.Sprintf("oss://%v/%v", destBucket, destObject)
    48  		absPath, _ := filepath.Abs(srcFile)
    49  		cpFileName := getCpFileName(absPath, dest, "")
    50  		cpConf.FilePath = cpConf.DirPath + string(os.PathSeparator) + cpFileName
    51  	}
    52  	return cpConf.FilePath
    53  }
    54  
    55  // ----- concurrent upload without checkpoint  -----
    56  
    57  // getCpConfig gets checkpoint configuration
    58  func getCpConfig(options []Option) *cpConfig {
    59  	cpcOpt, err := FindOption(options, checkpointConfig, nil)
    60  	if err != nil || cpcOpt == nil {
    61  		return nil
    62  	}
    63  
    64  	return cpcOpt.(*cpConfig)
    65  }
    66  
    67  // getCpFileName return the name of the checkpoint file
    68  func getCpFileName(src, dest, versionId string) string {
    69  	md5Ctx := md5.New()
    70  	md5Ctx.Write([]byte(src))
    71  	srcCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
    72  
    73  	md5Ctx.Reset()
    74  	md5Ctx.Write([]byte(dest))
    75  	destCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
    76  
    77  	if versionId == "" {
    78  		return fmt.Sprintf("%v-%v.cp", srcCheckSum, destCheckSum)
    79  	}
    80  
    81  	md5Ctx.Reset()
    82  	md5Ctx.Write([]byte(versionId))
    83  	versionCheckSum := hex.EncodeToString(md5Ctx.Sum(nil))
    84  	return fmt.Sprintf("%v-%v-%v.cp", srcCheckSum, destCheckSum, versionCheckSum)
    85  }
    86  
    87  // getRoutines gets the routine count. by default it's 1.
    88  func getRoutines(options []Option) int {
    89  	rtnOpt, err := FindOption(options, routineNum, nil)
    90  	if err != nil || rtnOpt == nil {
    91  		return 1
    92  	}
    93  
    94  	rs := rtnOpt.(int)
    95  	if rs < 1 {
    96  		rs = 1
    97  	} else if rs > 100 {
    98  		rs = 100
    99  	}
   100  
   101  	return rs
   102  }
   103  
   104  // getPayer return the payer of the request
   105  func getPayer(options []Option) string {
   106  	payerOpt, err := FindOption(options, HTTPHeaderOssRequester, nil)
   107  	if err != nil || payerOpt == nil {
   108  		return ""
   109  	}
   110  	return payerOpt.(string)
   111  }
   112  
   113  // GetProgressListener gets the progress callback
   114  func GetProgressListener(options []Option) ProgressListener {
   115  	isSet, listener, _ := IsOptionSet(options, progressListener)
   116  	if !isSet {
   117  		return nil
   118  	}
   119  	return listener.(ProgressListener)
   120  }
   121  
   122  // uploadPartHook is for testing usage
   123  type uploadPartHook func(id int, chunk FileChunk) error
   124  
   125  var uploadPartHooker uploadPartHook = defaultUploadPart
   126  
   127  func defaultUploadPart(id int, chunk FileChunk) error {
   128  	return nil
   129  }
   130  
   131  // workerArg defines worker argument structure
   132  type workerArg struct {
   133  	bucket   *Bucket
   134  	filePath string
   135  	imur     InitiateMultipartUploadResult
   136  	options  []Option
   137  	hook     uploadPartHook
   138  }
   139  
   140  // worker is the worker coroutine function
   141  type defaultUploadProgressListener struct {
   142  }
   143  
   144  // ProgressChanged no-ops
   145  func (listener *defaultUploadProgressListener) ProgressChanged(event *ProgressEvent) {
   146  }
   147  
   148  func worker(id int, arg workerArg, jobs <-chan FileChunk, results chan<- UploadPart, failed chan<- error, die <-chan bool) {
   149  	for chunk := range jobs {
   150  		if err := arg.hook(id, chunk); err != nil {
   151  			failed <- err
   152  			break
   153  		}
   154  		var respHeader http.Header
   155  		p := Progress(&defaultUploadProgressListener{})
   156  		opts := make([]Option, len(arg.options)+2)
   157  		opts = append(opts, arg.options...)
   158  
   159  		// use defaultUploadProgressListener
   160  		opts = append(opts, p, GetResponseHeader(&respHeader))
   161  
   162  		startT := time.Now().UnixNano() / 1000 / 1000 / 1000
   163  		part, err := arg.bucket.UploadPartFromFile(arg.imur, arg.filePath, chunk.Offset, chunk.Size, chunk.Number, opts...)
   164  		endT := time.Now().UnixNano() / 1000 / 1000 / 1000
   165  		if err != nil {
   166  			arg.bucket.Client.Config.WriteLog(Debug, "upload part error,cost:%d second,part number:%d,request id:%s,error:%s\n", endT-startT, chunk.Number, GetRequestId(respHeader), err.Error())
   167  			failed <- err
   168  			break
   169  		}
   170  		select {
   171  		case <-die:
   172  			return
   173  		default:
   174  		}
   175  		results <- part
   176  	}
   177  }
   178  
   179  // scheduler function
   180  func scheduler(jobs chan FileChunk, chunks []FileChunk) {
   181  	for _, chunk := range chunks {
   182  		jobs <- chunk
   183  	}
   184  	close(jobs)
   185  }
   186  
   187  func getTotalBytes(chunks []FileChunk) int64 {
   188  	var tb int64
   189  	for _, chunk := range chunks {
   190  		tb += chunk.Size
   191  	}
   192  	return tb
   193  }
   194  
   195  // uploadFile is a concurrent upload, without checkpoint
   196  func (bucket Bucket) uploadFile(objectKey, filePath string, partSize int64, options []Option, routines int) error {
   197  	listener := GetProgressListener(options)
   198  
   199  	chunks, err := SplitFileByPartSize(filePath, partSize)
   200  	if err != nil {
   201  		return err
   202  	}
   203  
   204  	partOptions := ChoiceTransferPartOption(options)
   205  	completeOptions := ChoiceCompletePartOption(options)
   206  	abortOptions := ChoiceAbortPartOption(options)
   207  
   208  	// Initialize the multipart upload
   209  	imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	jobs := make(chan FileChunk, len(chunks))
   215  	results := make(chan UploadPart, len(chunks))
   216  	failed := make(chan error)
   217  	die := make(chan bool)
   218  
   219  	var completedBytes int64
   220  	totalBytes := getTotalBytes(chunks)
   221  	event := newProgressEvent(TransferStartedEvent, 0, totalBytes, 0)
   222  	publishProgress(listener, event)
   223  
   224  	// Start the worker coroutine
   225  	arg := workerArg{&bucket, filePath, imur, partOptions, uploadPartHooker}
   226  	for w := 1; w <= routines; w++ {
   227  		go worker(w, arg, jobs, results, failed, die)
   228  	}
   229  
   230  	// Schedule the jobs
   231  	go scheduler(jobs, chunks)
   232  
   233  	// Waiting for the upload finished
   234  	completed := 0
   235  	parts := make([]UploadPart, len(chunks))
   236  	for completed < len(chunks) {
   237  		select {
   238  		case part := <-results:
   239  			completed++
   240  			parts[part.PartNumber-1] = part
   241  			completedBytes += chunks[part.PartNumber-1].Size
   242  
   243  			// why RwBytes in ProgressEvent is 0 ?
   244  			// because read or write event has been notified in teeReader.Read()
   245  			event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes, chunks[part.PartNumber-1].Size)
   246  			publishProgress(listener, event)
   247  		case err := <-failed:
   248  			close(die)
   249  			event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes, 0)
   250  			publishProgress(listener, event)
   251  			bucket.AbortMultipartUpload(imur, abortOptions...)
   252  			return err
   253  		}
   254  
   255  		if completed >= len(chunks) {
   256  			break
   257  		}
   258  	}
   259  
   260  	event = newProgressEvent(TransferCompletedEvent, completedBytes, totalBytes, 0)
   261  	publishProgress(listener, event)
   262  
   263  	// Complete the multpart upload
   264  	_, err = bucket.CompleteMultipartUpload(imur, parts, completeOptions...)
   265  	if err != nil {
   266  		bucket.AbortMultipartUpload(imur, abortOptions...)
   267  		return err
   268  	}
   269  	return nil
   270  }
   271  
   272  // ----- concurrent upload with checkpoint  -----
   273  const uploadCpMagic = "FE8BB4EA-B593-4FAC-AD7A-2459A36E2E62"
   274  
   275  type uploadCheckpoint struct {
   276  	Magic     string   // Magic
   277  	MD5       string   // Checkpoint file content's MD5
   278  	FilePath  string   // Local file path
   279  	FileStat  cpStat   // File state
   280  	ObjectKey string   // Key
   281  	UploadID  string   // Upload ID
   282  	Parts     []cpPart // All parts of the local file
   283  	CallbackVal string
   284  	CallbackBody *[]byte
   285  }
   286  
   287  type cpStat struct {
   288  	Size         int64     // File size
   289  	LastModified time.Time // File's last modified time
   290  	MD5          string    // Local file's MD5
   291  }
   292  
   293  type cpPart struct {
   294  	Chunk       FileChunk  // File chunk
   295  	Part        UploadPart // Uploaded part
   296  	IsCompleted bool       // Upload complete flag
   297  }
   298  
   299  // isValid checks if the uploaded data is valid---it's valid when the file is not updated and the checkpoint data is valid.
   300  func (cp uploadCheckpoint) isValid(filePath string,options []Option) (bool, error) {
   301  
   302  	callbackVal, _ := FindOption(options, HTTPHeaderOssCallback, "")
   303  	if callbackVal != "" && cp.CallbackVal != callbackVal {
   304  		return false, nil
   305  	}
   306  	callbackBody, _ := FindOption(options, responseBody, nil)
   307  	if callbackBody != nil{
   308  		body, _ := json.Marshal(callbackBody)
   309  		if bytes.Equal(*cp.CallbackBody, body) {
   310  			return false, nil
   311  		}
   312  	}
   313  	// Compare the CP's magic number and MD5.
   314  	cpb := cp
   315  	cpb.MD5 = ""
   316  	js, _ := json.Marshal(cpb)
   317  	sum := md5.Sum(js)
   318  	b64 := base64.StdEncoding.EncodeToString(sum[:])
   319  
   320  	if cp.Magic != uploadCpMagic || b64 != cp.MD5 {
   321  		return false, nil
   322  	}
   323  
   324  	// Make sure if the local file is updated.
   325  	fd, err := os.Open(filePath)
   326  	if err != nil {
   327  		return false, err
   328  	}
   329  	defer fd.Close()
   330  
   331  	st, err := fd.Stat()
   332  	if err != nil {
   333  		return false, err
   334  	}
   335  
   336  	md, err := calcFileMD5(filePath)
   337  	if err != nil {
   338  		return false, err
   339  	}
   340  
   341  	// Compare the file size, file's last modified time and file's MD5
   342  	if cp.FileStat.Size != st.Size() ||
   343  		!cp.FileStat.LastModified.Equal(st.ModTime()) ||
   344  		cp.FileStat.MD5 != md {
   345  		return false, nil
   346  	}
   347  
   348  	return true, nil
   349  }
   350  
   351  // load loads from the file
   352  func (cp *uploadCheckpoint) load(filePath string) error {
   353  	contents, err := ioutil.ReadFile(filePath)
   354  	if err != nil {
   355  		return err
   356  	}
   357  
   358  	err = json.Unmarshal(contents, cp)
   359  	return err
   360  }
   361  
   362  // dump dumps to the local file
   363  func (cp *uploadCheckpoint) dump(filePath string) error {
   364  	bcp := *cp
   365  
   366  	// Calculate MD5
   367  	bcp.MD5 = ""
   368  	js, err := json.Marshal(bcp)
   369  	if err != nil {
   370  		return err
   371  	}
   372  	sum := md5.Sum(js)
   373  	b64 := base64.StdEncoding.EncodeToString(sum[:])
   374  	bcp.MD5 = b64
   375  
   376  	// Serialization
   377  	js, err = json.Marshal(bcp)
   378  	if err != nil {
   379  		return err
   380  	}
   381  
   382  	// Dump
   383  	return ioutil.WriteFile(filePath, js, FilePermMode)
   384  }
   385  
   386  // updatePart updates the part status
   387  func (cp *uploadCheckpoint) updatePart(part UploadPart) {
   388  	cp.Parts[part.PartNumber-1].Part = part
   389  	cp.Parts[part.PartNumber-1].IsCompleted = true
   390  }
   391  
   392  // todoParts returns unfinished parts
   393  func (cp *uploadCheckpoint) todoParts() []FileChunk {
   394  	fcs := []FileChunk{}
   395  	for _, part := range cp.Parts {
   396  		if !part.IsCompleted {
   397  			fcs = append(fcs, part.Chunk)
   398  		}
   399  	}
   400  	return fcs
   401  }
   402  
   403  // allParts returns all parts
   404  func (cp *uploadCheckpoint) allParts() []UploadPart {
   405  	ps := []UploadPart{}
   406  	for _, part := range cp.Parts {
   407  		ps = append(ps, part.Part)
   408  	}
   409  	return ps
   410  }
   411  
   412  // getCompletedBytes returns completed bytes count
   413  func (cp *uploadCheckpoint) getCompletedBytes() int64 {
   414  	var completedBytes int64
   415  	for _, part := range cp.Parts {
   416  		if part.IsCompleted {
   417  			completedBytes += part.Chunk.Size
   418  		}
   419  	}
   420  	return completedBytes
   421  }
   422  
   423  // calcFileMD5 calculates the MD5 for the specified local file
   424  func calcFileMD5(filePath string) (string, error) {
   425  	return "", nil
   426  }
   427  
   428  // prepare initializes the multipart upload
   429  func prepare(cp *uploadCheckpoint, objectKey, filePath string, partSize int64, bucket *Bucket, options []Option) error {
   430  	// CP
   431  	cp.Magic = uploadCpMagic
   432  	cp.FilePath = filePath
   433  	cp.ObjectKey = objectKey
   434  
   435  	// Local file
   436  	fd, err := os.Open(filePath)
   437  	if err != nil {
   438  		return err
   439  	}
   440  	defer fd.Close()
   441  
   442  	st, err := fd.Stat()
   443  	if err != nil {
   444  		return err
   445  	}
   446  	cp.FileStat.Size = st.Size()
   447  	cp.FileStat.LastModified = st.ModTime()
   448  	callbackVal, _ := FindOption(options, HTTPHeaderOssCallback, "")
   449  	cp.CallbackVal = callbackVal.(string)
   450  	callbackBody, _ := FindOption(options, responseBody, nil)
   451  	if callbackBody != nil  {
   452  		body, _ := json.Marshal(callbackBody)
   453  		cp.CallbackBody = &body
   454  	}
   455  	md, err := calcFileMD5(filePath)
   456  	if err != nil {
   457  		return err
   458  	}
   459  	cp.FileStat.MD5 = md
   460  
   461  	// Chunks
   462  	parts, err := SplitFileByPartSize(filePath, partSize)
   463  	if err != nil {
   464  		return err
   465  	}
   466  
   467  	cp.Parts = make([]cpPart, len(parts))
   468  	for i, part := range parts {
   469  		cp.Parts[i].Chunk = part
   470  		cp.Parts[i].IsCompleted = false
   471  	}
   472  
   473  	// Init load
   474  	imur, err := bucket.InitiateMultipartUpload(objectKey, options...)
   475  	if err != nil {
   476  		return err
   477  	}
   478  	cp.UploadID = imur.UploadID
   479  
   480  	return nil
   481  }
   482  
   483  // complete completes the multipart upload and deletes the local CP files
   484  func complete(cp *uploadCheckpoint, bucket *Bucket, parts []UploadPart, cpFilePath string, options []Option) error {
   485  	imur := InitiateMultipartUploadResult{Bucket: bucket.BucketName,
   486  		Key: cp.ObjectKey, UploadID: cp.UploadID}
   487  
   488  	_, err := bucket.CompleteMultipartUpload(imur, parts, options...)
   489  	if err != nil {
   490  		if e, ok := err.(ServiceError);ok && (e.StatusCode == 203 || e.StatusCode == 404) {
   491  			os.Remove(cpFilePath)
   492  		}
   493  		return err
   494  	}
   495  	os.Remove(cpFilePath)
   496  	return err
   497  }
   498  
   499  // uploadFileWithCp handles concurrent upload with checkpoint
   500  func (bucket Bucket) uploadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int) error {
   501  	listener := GetProgressListener(options)
   502  
   503  	partOptions := ChoiceTransferPartOption(options)
   504  	completeOptions := ChoiceCompletePartOption(options)
   505  
   506  	// Load CP data
   507  	ucp := uploadCheckpoint{}
   508  	err := ucp.load(cpFilePath)
   509  	if err != nil {
   510  		os.Remove(cpFilePath)
   511  	}
   512  
   513  	// Load error or the CP data is invalid.
   514  	valid, err := ucp.isValid(filePath,options)
   515  	if err != nil || !valid {
   516  		if err = prepare(&ucp, objectKey, filePath, partSize, &bucket, options); err != nil {
   517  			return err
   518  		}
   519  		os.Remove(cpFilePath)
   520  	}
   521  
   522  	chunks := ucp.todoParts()
   523  	imur := InitiateMultipartUploadResult{
   524  		Bucket:   bucket.BucketName,
   525  		Key:      objectKey,
   526  		UploadID: ucp.UploadID}
   527  
   528  	jobs := make(chan FileChunk, len(chunks))
   529  	results := make(chan UploadPart, len(chunks))
   530  	failed := make(chan error)
   531  	die := make(chan bool)
   532  
   533  	completedBytes := ucp.getCompletedBytes()
   534  
   535  	// why RwBytes in ProgressEvent is 0 ?
   536  	// because read or write event has been notified in teeReader.Read()
   537  	event := newProgressEvent(TransferStartedEvent, completedBytes, ucp.FileStat.Size, 0)
   538  	publishProgress(listener, event)
   539  
   540  	// Start the workers
   541  	arg := workerArg{&bucket, filePath, imur, partOptions, uploadPartHooker}
   542  	for w := 1; w <= routines; w++ {
   543  		go worker(w, arg, jobs, results, failed, die)
   544  	}
   545  
   546  	// Schedule jobs
   547  	go scheduler(jobs, chunks)
   548  
   549  	// Waiting for the job finished
   550  	completed := 0
   551  	for completed < len(chunks) {
   552  		select {
   553  		case part := <-results:
   554  			completed++
   555  			ucp.updatePart(part)
   556  			ucp.dump(cpFilePath)
   557  			completedBytes += ucp.Parts[part.PartNumber-1].Chunk.Size
   558  			event = newProgressEvent(TransferDataEvent, completedBytes, ucp.FileStat.Size, ucp.Parts[part.PartNumber-1].Chunk.Size)
   559  			publishProgress(listener, event)
   560  		case err := <-failed:
   561  			close(die)
   562  			event = newProgressEvent(TransferFailedEvent, completedBytes, ucp.FileStat.Size, 0)
   563  			publishProgress(listener, event)
   564  			return err
   565  		}
   566  
   567  		if completed >= len(chunks) {
   568  			break
   569  		}
   570  	}
   571  
   572  	event = newProgressEvent(TransferCompletedEvent, completedBytes, ucp.FileStat.Size, 0)
   573  	publishProgress(listener, event)
   574  
   575  	// Complete the multipart upload
   576  	err = complete(&ucp, &bucket, ucp.allParts(), cpFilePath, completeOptions)
   577  	return err
   578  }