github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/qingstor/upload.go (about)

     1  // Upload object to QingStor
     2  
     3  // +build !plan9
     4  
     5  package qingstor
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/md5"
    10  	"fmt"
    11  	"hash"
    12  	"io"
    13  	"sort"
    14  	"sync"
    15  
    16  	"github.com/pkg/errors"
    17  	"github.com/rclone/rclone/fs"
    18  	qs "github.com/yunify/qingstor-sdk-go/v3/service"
    19  )
    20  
    21  const (
    22  	// maxSinglePartSize = 1024 * 1024 * 1024 * 5 // The maximum allowed size when uploading a single object to QingStor
    23  	// maxMultiPartSize = 1024 * 1024 * 1024 * 1 // The maximum allowed part size when uploading a part to QingStor
    24  	minMultiPartSize = 1024 * 1024 * 4 // The minimum allowed part size when uploading a part to QingStor
    25  	maxMultiParts    = 10000           // The maximum allowed number of parts in a multi-part upload
    26  )
    27  
    28  const (
    29  	defaultUploadPartSize    = 1024 * 1024 * 64 // The default part size to buffer chunks of a payload into.
    30  	defaultUploadConcurrency = 4                // the default number of goroutines to spin up when using multiPartUpload.
    31  )
    32  
    33  func readFillBuf(r io.Reader, b []byte) (offset int, err error) {
    34  	for offset < len(b) && err == nil {
    35  		var n int
    36  		n, err = r.Read(b[offset:])
    37  		offset += n
    38  	}
    39  
    40  	return offset, err
    41  }
    42  
    43  // uploadInput contains all input for upload requests to QingStor.
    44  type uploadInput struct {
    45  	body           io.Reader
    46  	qsSvc          *qs.Service
    47  	mimeType       string
    48  	zone           string
    49  	bucket         string
    50  	key            string
    51  	partSize       int64
    52  	concurrency    int
    53  	maxUploadParts int
    54  }
    55  
    56  // uploader internal structure to manage an upload to QingStor.
    57  type uploader struct {
    58  	cfg        *uploadInput
    59  	totalSize  int64 // set to -1 if the size is not known
    60  	readerPos  int64 // current reader position
    61  	readerSize int64 // current reader content size
    62  }
    63  
    64  // newUploader creates a new Uploader instance to upload objects to QingStor.
    65  func newUploader(in *uploadInput) *uploader {
    66  	u := &uploader{
    67  		cfg: in,
    68  	}
    69  	return u
    70  }
    71  
    72  // bucketInit initiate as bucket controller
    73  func (u *uploader) bucketInit() (*qs.Bucket, error) {
    74  	bucketInit, err := u.cfg.qsSvc.Bucket(u.cfg.bucket, u.cfg.zone)
    75  	return bucketInit, err
    76  }
    77  
    78  // String converts uploader to a string
    79  func (u *uploader) String() string {
    80  	return fmt.Sprintf("QingStor bucket %s key %s", u.cfg.bucket, u.cfg.key)
    81  }
    82  
    83  // nextReader returns a seekable reader representing the next packet of data.
    84  // This operation increases the shared u.readerPos counter, but note that it
    85  // does not need to be wrapped in a mutex because nextReader is only called
    86  // from the main thread.
    87  func (u *uploader) nextReader() (io.ReadSeeker, int, error) {
    88  	type readerAtSeeker interface {
    89  		io.ReaderAt
    90  		io.ReadSeeker
    91  	}
    92  	switch r := u.cfg.body.(type) {
    93  	case readerAtSeeker:
    94  		var err error
    95  		n := u.cfg.partSize
    96  		if u.totalSize >= 0 {
    97  			bytesLeft := u.totalSize - u.readerPos
    98  
    99  			if bytesLeft <= u.cfg.partSize {
   100  				err = io.EOF
   101  				n = bytesLeft
   102  			}
   103  		}
   104  		reader := io.NewSectionReader(r, u.readerPos, n)
   105  		u.readerPos += n
   106  		u.readerSize = n
   107  		return reader, int(n), err
   108  
   109  	default:
   110  		part := make([]byte, u.cfg.partSize)
   111  		n, err := readFillBuf(r, part)
   112  		u.readerPos += int64(n)
   113  		u.readerSize = int64(n)
   114  		return bytes.NewReader(part[0:n]), n, err
   115  	}
   116  }
   117  
   118  // init will initialize all default options.
   119  func (u *uploader) init() {
   120  	if u.cfg.concurrency == 0 {
   121  		u.cfg.concurrency = defaultUploadConcurrency
   122  	}
   123  	if u.cfg.partSize == 0 {
   124  		u.cfg.partSize = defaultUploadPartSize
   125  	}
   126  	if u.cfg.maxUploadParts == 0 {
   127  		u.cfg.maxUploadParts = maxMultiParts
   128  	}
   129  	// Try to get the total size for some optimizations
   130  	u.totalSize = -1
   131  	switch r := u.cfg.body.(type) {
   132  	case io.Seeker:
   133  		pos, _ := r.Seek(0, io.SeekCurrent)
   134  		defer func() {
   135  			_, _ = r.Seek(pos, io.SeekStart)
   136  		}()
   137  
   138  		n, err := r.Seek(0, io.SeekEnd)
   139  		if err != nil {
   140  			return
   141  		}
   142  		u.totalSize = n
   143  
   144  		// Try to adjust partSize if it is too small and account for
   145  		// integer division truncation.
   146  		if u.totalSize/u.cfg.partSize >= u.cfg.partSize {
   147  			// Add one to the part size to account for remainders
   148  			// during the size calculation. e.g odd number of bytes.
   149  			u.cfg.partSize = (u.totalSize / int64(u.cfg.maxUploadParts)) + 1
   150  		}
   151  	}
   152  }
   153  
   154  // singlePartUpload upload a single object that contentLength less than "defaultUploadPartSize"
   155  func (u *uploader) singlePartUpload(buf io.Reader, size int64) error {
   156  	bucketInit, _ := u.bucketInit()
   157  
   158  	req := qs.PutObjectInput{
   159  		ContentLength: &size,
   160  		ContentType:   &u.cfg.mimeType,
   161  		Body:          buf,
   162  	}
   163  
   164  	_, err := bucketInit.PutObject(u.cfg.key, &req)
   165  	if err == nil {
   166  		fs.Debugf(u, "Upload single object finished")
   167  	}
   168  	return err
   169  }
   170  
   171  // Upload upload an object into QingStor
   172  func (u *uploader) upload() error {
   173  	u.init()
   174  
   175  	if u.cfg.partSize < minMultiPartSize {
   176  		return errors.Errorf("part size must be at least %d bytes", minMultiPartSize)
   177  	}
   178  
   179  	// Do one read to determine if we have more than one part
   180  	reader, _, err := u.nextReader()
   181  	if err == io.EOF { // single part
   182  		fs.Debugf(u, "Uploading as single part object to QingStor")
   183  		return u.singlePartUpload(reader, u.readerPos)
   184  	} else if err != nil {
   185  		return errors.Errorf("read upload data failed: %s", err)
   186  	}
   187  
   188  	fs.Debugf(u, "Uploading as multi-part object to QingStor")
   189  	mu := multiUploader{uploader: u}
   190  	return mu.multiPartUpload(reader)
   191  }
   192  
   193  // internal structure to manage a specific multipart upload to QingStor.
   194  type multiUploader struct {
   195  	*uploader
   196  	wg          sync.WaitGroup
   197  	mtx         sync.Mutex
   198  	err         error
   199  	uploadID    *string
   200  	objectParts completedParts
   201  	hashMd5     hash.Hash
   202  }
   203  
   204  // keeps track of a single chunk of data being sent to QingStor.
   205  type chunk struct {
   206  	buffer     io.ReadSeeker
   207  	partNumber int
   208  	size       int64
   209  }
   210  
   211  // completedParts is a wrapper to make parts sortable by their part number,
   212  // since QingStor required this list to be sent in sorted order.
   213  type completedParts []*qs.ObjectPartType
   214  
   215  func (a completedParts) Len() int           { return len(a) }
   216  func (a completedParts) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   217  func (a completedParts) Less(i, j int) bool { return *a[i].PartNumber < *a[j].PartNumber }
   218  
   219  // String converts multiUploader to a string
   220  func (mu *multiUploader) String() string {
   221  	if uploadID := mu.uploadID; uploadID != nil {
   222  		return fmt.Sprintf("QingStor bucket %s key %s uploadID %s", mu.cfg.bucket, mu.cfg.key, *uploadID)
   223  	}
   224  	return fmt.Sprintf("QingStor bucket %s key %s uploadID <nil>", mu.cfg.bucket, mu.cfg.key)
   225  }
   226  
   227  // getErr is a thread-safe getter for the error object
   228  func (mu *multiUploader) getErr() error {
   229  	mu.mtx.Lock()
   230  	defer mu.mtx.Unlock()
   231  	return mu.err
   232  }
   233  
   234  // setErr is a thread-safe setter for the error object
   235  func (mu *multiUploader) setErr(e error) {
   236  	mu.mtx.Lock()
   237  	defer mu.mtx.Unlock()
   238  	mu.err = e
   239  }
   240  
   241  // readChunk runs in worker goroutines to pull chunks off of the ch channel
   242  // and send() them as UploadPart requests.
   243  func (mu *multiUploader) readChunk(ch chan chunk) {
   244  	defer mu.wg.Done()
   245  	for {
   246  		c, ok := <-ch
   247  		if !ok {
   248  			break
   249  		}
   250  		if mu.getErr() == nil {
   251  			if err := mu.send(c); err != nil {
   252  				mu.setErr(err)
   253  			}
   254  		}
   255  	}
   256  }
   257  
   258  // initiate init a Multiple Object and obtain UploadID
   259  func (mu *multiUploader) initiate() error {
   260  	bucketInit, _ := mu.bucketInit()
   261  	req := qs.InitiateMultipartUploadInput{
   262  		ContentType: &mu.cfg.mimeType,
   263  	}
   264  	fs.Debugf(mu, "Initiating a multi-part upload")
   265  	rsp, err := bucketInit.InitiateMultipartUpload(mu.cfg.key, &req)
   266  	if err == nil {
   267  		mu.uploadID = rsp.UploadID
   268  		mu.hashMd5 = md5.New()
   269  	}
   270  	return err
   271  }
   272  
   273  // send upload a part into QingStor
   274  func (mu *multiUploader) send(c chunk) error {
   275  	bucketInit, _ := mu.bucketInit()
   276  	req := qs.UploadMultipartInput{
   277  		PartNumber:    &c.partNumber,
   278  		UploadID:      mu.uploadID,
   279  		ContentLength: &c.size,
   280  		Body:          c.buffer,
   281  	}
   282  	fs.Debugf(mu, "Uploading a part to QingStor with partNumber %d and partSize %d", c.partNumber, c.size)
   283  	_, err := bucketInit.UploadMultipart(mu.cfg.key, &req)
   284  	if err != nil {
   285  		return err
   286  	}
   287  	fs.Debugf(mu, "Done uploading part partNumber %d and partSize %d", c.partNumber, c.size)
   288  
   289  	mu.mtx.Lock()
   290  	defer mu.mtx.Unlock()
   291  
   292  	_, _ = c.buffer.Seek(0, 0)
   293  	_, _ = io.Copy(mu.hashMd5, c.buffer)
   294  
   295  	parts := qs.ObjectPartType{PartNumber: &c.partNumber, Size: &c.size}
   296  	mu.objectParts = append(mu.objectParts, &parts)
   297  	return err
   298  }
   299  
   300  // complete complete a multipart upload
   301  func (mu *multiUploader) complete() error {
   302  	var err error
   303  	if err = mu.getErr(); err != nil {
   304  		return err
   305  	}
   306  	bucketInit, _ := mu.bucketInit()
   307  	//if err = mu.list(); err != nil {
   308  	//	return err
   309  	//}
   310  	//md5String := fmt.Sprintf("\"%s\"", hex.EncodeToString(mu.hashMd5.Sum(nil)))
   311  
   312  	md5String := fmt.Sprintf("\"%x\"", mu.hashMd5.Sum(nil))
   313  	sort.Sort(mu.objectParts)
   314  	req := qs.CompleteMultipartUploadInput{
   315  		UploadID:    mu.uploadID,
   316  		ObjectParts: mu.objectParts,
   317  		ETag:        &md5String,
   318  	}
   319  	fs.Debugf(mu, "Completing multi-part object")
   320  	_, err = bucketInit.CompleteMultipartUpload(mu.cfg.key, &req)
   321  	if err == nil {
   322  		fs.Debugf(mu, "Complete multi-part finished")
   323  	}
   324  	return err
   325  }
   326  
   327  // abort abort a multipart upload
   328  func (mu *multiUploader) abort() error {
   329  	var err error
   330  	bucketInit, _ := mu.bucketInit()
   331  
   332  	if uploadID := mu.uploadID; uploadID != nil {
   333  		req := qs.AbortMultipartUploadInput{
   334  			UploadID: uploadID,
   335  		}
   336  		fs.Debugf(mu, "Aborting multi-part object %q", *uploadID)
   337  		_, err = bucketInit.AbortMultipartUpload(mu.cfg.key, &req)
   338  	}
   339  
   340  	return err
   341  }
   342  
   343  // multiPartUpload upload a multiple object into QingStor
   344  func (mu *multiUploader) multiPartUpload(firstBuf io.ReadSeeker) (err error) {
   345  	// Initiate a multi-part upload
   346  	if err = mu.initiate(); err != nil {
   347  		return err
   348  	}
   349  	defer func() {
   350  		// Abort the transfer if returning an error
   351  		if err != nil {
   352  			_ = mu.abort()
   353  		}
   354  	}()
   355  
   356  	ch := make(chan chunk, mu.cfg.concurrency)
   357  	for i := 0; i < mu.cfg.concurrency; i++ {
   358  		mu.wg.Add(1)
   359  		go mu.readChunk(ch)
   360  	}
   361  
   362  	var partNumber int
   363  	ch <- chunk{partNumber: partNumber, buffer: firstBuf, size: mu.readerSize}
   364  
   365  	for mu.getErr() == nil {
   366  		partNumber++
   367  		// This upload exceeded maximum number of supported parts, error now.
   368  		if partNumber > mu.cfg.maxUploadParts || partNumber > maxMultiParts {
   369  			var msg string
   370  			if partNumber > mu.cfg.maxUploadParts {
   371  				msg = fmt.Sprintf("exceeded total allowed configured maxUploadParts (%d). "+
   372  					"Adjust PartSize to fit in this limit", mu.cfg.maxUploadParts)
   373  			} else {
   374  				msg = fmt.Sprintf("exceeded total allowed QingStor limit maxUploadParts (%d). "+
   375  					"Adjust PartSize to fit in this limit", maxMultiParts)
   376  			}
   377  			mu.setErr(errors.New(msg))
   378  			break
   379  		}
   380  
   381  		var reader io.ReadSeeker
   382  		var nextChunkLen int
   383  		reader, nextChunkLen, err = mu.nextReader()
   384  		if err != nil && err != io.EOF {
   385  			// empty ch
   386  			go func() {
   387  				for range ch {
   388  				}
   389  			}()
   390  			// Wait for all goroutines finish
   391  			close(ch)
   392  			mu.wg.Wait()
   393  			return err
   394  		}
   395  		if nextChunkLen == 0 && partNumber > 0 {
   396  			// No need to upload empty part, if file was empty to start
   397  			// with empty single part would of been created and never
   398  			// started multipart upload.
   399  			break
   400  		}
   401  		num := partNumber
   402  		ch <- chunk{partNumber: num, buffer: reader, size: mu.readerSize}
   403  	}
   404  	// Wait for all goroutines finish
   405  	close(ch)
   406  	mu.wg.Wait()
   407  	// Complete Multipart Upload
   408  	return mu.complete()
   409  }