github.com/artpar/rclone@v1.67.3/backend/qingstor/upload.go (about)

     1  // Upload object to QingStor
     2  
     3  //go:build !plan9 && !js
     4  
     5  package qingstor
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/md5"
    10  	"errors"
    11  	"fmt"
    12  	"hash"
    13  	"io"
    14  	"sort"
    15  	"sync"
    16  
    17  	"github.com/artpar/rclone/fs"
    18  	"github.com/artpar/rclone/lib/atexit"
    19  	qs "github.com/yunify/qingstor-sdk-go/v3/service"
    20  )
    21  
    22  const (
    23  	// maxSinglePartSize = 1024 * 1024 * 1024 * 5 // The maximum allowed size when uploading a single object to QingStor
    24  	// maxMultiPartSize = 1024 * 1024 * 1024 * 1 // The maximum allowed part size when uploading a part to QingStor
    25  	minMultiPartSize = 1024 * 1024 * 4 // The minimum allowed part size when uploading a part to QingStor
    26  	maxMultiParts    = 10000           // The maximum allowed number of parts in a multi-part upload
    27  )
    28  
    29  const (
    30  	defaultUploadPartSize    = 1024 * 1024 * 64 // The default part size to buffer chunks of a payload into.
    31  	defaultUploadConcurrency = 4                // the default number of goroutines to spin up when using multiPartUpload.
    32  )
    33  
    34  func readFillBuf(r io.Reader, b []byte) (offset int, err error) {
    35  	for offset < len(b) && err == nil {
    36  		var n int
    37  		n, err = r.Read(b[offset:])
    38  		offset += n
    39  	}
    40  
    41  	return offset, err
    42  }
    43  
    44  // uploadInput contains all input for upload requests to QingStor.
    45  type uploadInput struct {
    46  	body           io.Reader
    47  	qsSvc          *qs.Service
    48  	mimeType       string
    49  	zone           string
    50  	bucket         string
    51  	key            string
    52  	partSize       int64
    53  	concurrency    int
    54  	maxUploadParts int
    55  }
    56  
    57  // uploader internal structure to manage an upload to QingStor.
    58  type uploader struct {
    59  	cfg        *uploadInput
    60  	totalSize  int64 // set to -1 if the size is not known
    61  	readerPos  int64 // current reader position
    62  	readerSize int64 // current reader content size
    63  }
    64  
    65  // newUploader creates a new Uploader instance to upload objects to QingStor.
    66  func newUploader(in *uploadInput) *uploader {
    67  	u := &uploader{
    68  		cfg: in,
    69  	}
    70  	return u
    71  }
    72  
    73  // bucketInit initiate as bucket controller
    74  func (u *uploader) bucketInit() (*qs.Bucket, error) {
    75  	bucketInit, err := u.cfg.qsSvc.Bucket(u.cfg.bucket, u.cfg.zone)
    76  	return bucketInit, err
    77  }
    78  
    79  // String converts uploader to a string
    80  func (u *uploader) String() string {
    81  	return fmt.Sprintf("QingStor bucket %s key %s", u.cfg.bucket, u.cfg.key)
    82  }
    83  
    84  // nextReader returns a seekable reader representing the next packet of data.
    85  // This operation increases the shared u.readerPos counter, but note that it
    86  // does not need to be wrapped in a mutex because nextReader is only called
    87  // from the main thread.
    88  func (u *uploader) nextReader() (io.ReadSeeker, int, error) {
    89  	type readerAtSeeker interface {
    90  		io.ReaderAt
    91  		io.ReadSeeker
    92  	}
    93  	switch r := u.cfg.body.(type) {
    94  	case readerAtSeeker:
    95  		var err error
    96  		n := u.cfg.partSize
    97  		if u.totalSize >= 0 {
    98  			bytesLeft := u.totalSize - u.readerPos
    99  
   100  			if bytesLeft <= u.cfg.partSize {
   101  				err = io.EOF
   102  				n = bytesLeft
   103  			}
   104  		}
   105  		reader := io.NewSectionReader(r, u.readerPos, n)
   106  		u.readerPos += n
   107  		u.readerSize = n
   108  		return reader, int(n), err
   109  
   110  	default:
   111  		part := make([]byte, u.cfg.partSize)
   112  		n, err := readFillBuf(r, part)
   113  		u.readerPos += int64(n)
   114  		u.readerSize = int64(n)
   115  		return bytes.NewReader(part[0:n]), n, err
   116  	}
   117  }
   118  
   119  // init will initialize all default options.
   120  func (u *uploader) init() {
   121  	if u.cfg.concurrency == 0 {
   122  		u.cfg.concurrency = defaultUploadConcurrency
   123  	}
   124  	if u.cfg.partSize == 0 {
   125  		u.cfg.partSize = defaultUploadPartSize
   126  	}
   127  	if u.cfg.maxUploadParts == 0 {
   128  		u.cfg.maxUploadParts = maxMultiParts
   129  	}
   130  	// Try to get the total size for some optimizations
   131  	u.totalSize = -1
   132  	switch r := u.cfg.body.(type) {
   133  	case io.Seeker:
   134  		pos, _ := r.Seek(0, io.SeekCurrent)
   135  		defer func() {
   136  			_, _ = r.Seek(pos, io.SeekStart)
   137  		}()
   138  
   139  		n, err := r.Seek(0, io.SeekEnd)
   140  		if err != nil {
   141  			return
   142  		}
   143  		u.totalSize = n
   144  
   145  		// Try to adjust partSize if it is too small and account for
   146  		// integer division truncation.
   147  		if u.totalSize/u.cfg.partSize >= u.cfg.partSize {
   148  			// Add one to the part size to account for remainders
   149  			// during the size calculation. e.g odd number of bytes.
   150  			u.cfg.partSize = (u.totalSize / int64(u.cfg.maxUploadParts)) + 1
   151  		}
   152  	}
   153  }
   154  
   155  // singlePartUpload upload a single object that contentLength less than "defaultUploadPartSize"
   156  func (u *uploader) singlePartUpload(buf io.Reader, size int64) error {
   157  	bucketInit, _ := u.bucketInit()
   158  
   159  	req := qs.PutObjectInput{
   160  		ContentLength: &size,
   161  		ContentType:   &u.cfg.mimeType,
   162  		Body:          buf,
   163  	}
   164  
   165  	_, err := bucketInit.PutObject(u.cfg.key, &req)
   166  	if err == nil {
   167  		fs.Debugf(u, "Upload single object finished")
   168  	}
   169  	return err
   170  }
   171  
   172  // Upload upload an object into QingStor
   173  func (u *uploader) upload() error {
   174  	u.init()
   175  
   176  	if u.cfg.partSize < minMultiPartSize {
   177  		return fmt.Errorf("part size must be at least %d bytes", minMultiPartSize)
   178  	}
   179  
   180  	// Do one read to determine if we have more than one part
   181  	reader, _, err := u.nextReader()
   182  	if err == io.EOF { // single part
   183  		fs.Debugf(u, "Uploading as single part object to QingStor")
   184  		return u.singlePartUpload(reader, u.readerPos)
   185  	} else if err != nil {
   186  		return fmt.Errorf("read upload data failed: %w", err)
   187  	}
   188  
   189  	fs.Debugf(u, "Uploading as multi-part object to QingStor")
   190  	mu := multiUploader{uploader: u}
   191  	return mu.multiPartUpload(reader)
   192  }
   193  
   194  // internal structure to manage a specific multipart upload to QingStor.
   195  type multiUploader struct {
   196  	*uploader
   197  	wg          sync.WaitGroup
   198  	mtx         sync.Mutex
   199  	err         error
   200  	uploadID    *string
   201  	objectParts completedParts
   202  	hashMd5     hash.Hash
   203  }
   204  
   205  // keeps track of a single chunk of data being sent to QingStor.
   206  type chunk struct {
   207  	buffer     io.ReadSeeker
   208  	partNumber int
   209  	size       int64
   210  }
   211  
   212  // completedParts is a wrapper to make parts sortable by their part number,
   213  // since QingStor required this list to be sent in sorted order.
   214  type completedParts []*qs.ObjectPartType
   215  
   216  func (a completedParts) Len() int           { return len(a) }
   217  func (a completedParts) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   218  func (a completedParts) Less(i, j int) bool { return *a[i].PartNumber < *a[j].PartNumber }
   219  
   220  // String converts multiUploader to a string
   221  func (mu *multiUploader) String() string {
   222  	if uploadID := mu.uploadID; uploadID != nil {
   223  		return fmt.Sprintf("QingStor bucket %s key %s uploadID %s", mu.cfg.bucket, mu.cfg.key, *uploadID)
   224  	}
   225  	return fmt.Sprintf("QingStor bucket %s key %s uploadID <nil>", mu.cfg.bucket, mu.cfg.key)
   226  }
   227  
   228  // getErr is a thread-safe getter for the error object
   229  func (mu *multiUploader) getErr() error {
   230  	mu.mtx.Lock()
   231  	defer mu.mtx.Unlock()
   232  	return mu.err
   233  }
   234  
   235  // setErr is a thread-safe setter for the error object
   236  func (mu *multiUploader) setErr(e error) {
   237  	mu.mtx.Lock()
   238  	defer mu.mtx.Unlock()
   239  	mu.err = e
   240  }
   241  
   242  // readChunk runs in worker goroutines to pull chunks off of the ch channel
   243  // and send() them as UploadPart requests.
   244  func (mu *multiUploader) readChunk(ch chan chunk) {
   245  	defer mu.wg.Done()
   246  	for {
   247  		c, ok := <-ch
   248  		if !ok {
   249  			break
   250  		}
   251  		if mu.getErr() == nil {
   252  			if err := mu.send(c); err != nil {
   253  				mu.setErr(err)
   254  			}
   255  		}
   256  	}
   257  }
   258  
   259  // initiate init a Multiple Object and obtain UploadID
   260  func (mu *multiUploader) initiate() error {
   261  	bucketInit, _ := mu.bucketInit()
   262  	req := qs.InitiateMultipartUploadInput{
   263  		ContentType: &mu.cfg.mimeType,
   264  	}
   265  	fs.Debugf(mu, "Initiating a multi-part upload")
   266  	rsp, err := bucketInit.InitiateMultipartUpload(mu.cfg.key, &req)
   267  	if err == nil {
   268  		mu.uploadID = rsp.UploadID
   269  		mu.hashMd5 = md5.New()
   270  	}
   271  	return err
   272  }
   273  
   274  // send upload a part into QingStor
   275  func (mu *multiUploader) send(c chunk) error {
   276  	bucketInit, _ := mu.bucketInit()
   277  	req := qs.UploadMultipartInput{
   278  		PartNumber:    &c.partNumber,
   279  		UploadID:      mu.uploadID,
   280  		ContentLength: &c.size,
   281  		Body:          c.buffer,
   282  	}
   283  	fs.Debugf(mu, "Uploading a part to QingStor with partNumber %d and partSize %d", c.partNumber, c.size)
   284  	_, err := bucketInit.UploadMultipart(mu.cfg.key, &req)
   285  	if err != nil {
   286  		return err
   287  	}
   288  	fs.Debugf(mu, "Done uploading part partNumber %d and partSize %d", c.partNumber, c.size)
   289  
   290  	mu.mtx.Lock()
   291  	defer mu.mtx.Unlock()
   292  
   293  	_, _ = c.buffer.Seek(0, 0)
   294  	_, _ = io.Copy(mu.hashMd5, c.buffer)
   295  
   296  	parts := qs.ObjectPartType{PartNumber: &c.partNumber, Size: &c.size}
   297  	mu.objectParts = append(mu.objectParts, &parts)
   298  	return err
   299  }
   300  
   301  // complete complete a multipart upload
   302  func (mu *multiUploader) complete() error {
   303  	var err error
   304  	if err = mu.getErr(); err != nil {
   305  		return err
   306  	}
   307  	bucketInit, _ := mu.bucketInit()
   308  	//if err = mu.list(); err != nil {
   309  	//	return err
   310  	//}
   311  	//md5String := fmt.Sprintf("\"%s\"", hex.EncodeToString(mu.hashMd5.Sum(nil)))
   312  
   313  	md5String := fmt.Sprintf("\"%x\"", mu.hashMd5.Sum(nil))
   314  	sort.Sort(mu.objectParts)
   315  	req := qs.CompleteMultipartUploadInput{
   316  		UploadID:    mu.uploadID,
   317  		ObjectParts: mu.objectParts,
   318  		ETag:        &md5String,
   319  	}
   320  	fs.Debugf(mu, "Completing multi-part object")
   321  	_, err = bucketInit.CompleteMultipartUpload(mu.cfg.key, &req)
   322  	if err == nil {
   323  		fs.Debugf(mu, "Complete multi-part finished")
   324  	}
   325  	return err
   326  }
   327  
   328  // abort abort a multipart upload
   329  func (mu *multiUploader) abort() error {
   330  	var err error
   331  	bucketInit, _ := mu.bucketInit()
   332  
   333  	if uploadID := mu.uploadID; uploadID != nil {
   334  		req := qs.AbortMultipartUploadInput{
   335  			UploadID: uploadID,
   336  		}
   337  		fs.Debugf(mu, "Aborting multi-part object %q", *uploadID)
   338  		_, err = bucketInit.AbortMultipartUpload(mu.cfg.key, &req)
   339  	}
   340  
   341  	return err
   342  }
   343  
   344  // multiPartUpload upload a multiple object into QingStor
   345  func (mu *multiUploader) multiPartUpload(firstBuf io.ReadSeeker) (err error) {
   346  	// Initiate a multi-part upload
   347  	if err = mu.initiate(); err != nil {
   348  		return err
   349  	}
   350  
   351  	// Cancel the session if something went wrong
   352  	defer atexit.OnError(&err, func() {
   353  		fs.Debugf(mu, "Cancelling multipart upload: %v", err)
   354  		cancelErr := mu.abort()
   355  		if cancelErr != nil {
   356  			fs.Logf(mu, "Failed to cancel multipart upload: %v", cancelErr)
   357  		}
   358  	})()
   359  
   360  	ch := make(chan chunk, mu.cfg.concurrency)
   361  	for i := 0; i < mu.cfg.concurrency; i++ {
   362  		mu.wg.Add(1)
   363  		go mu.readChunk(ch)
   364  	}
   365  
   366  	var partNumber int
   367  	ch <- chunk{partNumber: partNumber, buffer: firstBuf, size: mu.readerSize}
   368  
   369  	for mu.getErr() == nil {
   370  		partNumber++
   371  		// This upload exceeded maximum number of supported parts, error now.
   372  		if partNumber > mu.cfg.maxUploadParts || partNumber > maxMultiParts {
   373  			var msg string
   374  			if partNumber > mu.cfg.maxUploadParts {
   375  				msg = fmt.Sprintf("exceeded total allowed configured maxUploadParts (%d). "+
   376  					"Adjust PartSize to fit in this limit", mu.cfg.maxUploadParts)
   377  			} else {
   378  				msg = fmt.Sprintf("exceeded total allowed QingStor limit maxUploadParts (%d). "+
   379  					"Adjust PartSize to fit in this limit", maxMultiParts)
   380  			}
   381  			mu.setErr(errors.New(msg))
   382  			break
   383  		}
   384  
   385  		var reader io.ReadSeeker
   386  		var nextChunkLen int
   387  		reader, nextChunkLen, err = mu.nextReader()
   388  		if err != nil && err != io.EOF {
   389  			// empty ch
   390  			go func() {
   391  				for range ch {
   392  				}
   393  			}()
   394  			// Wait for all goroutines finish
   395  			close(ch)
   396  			mu.wg.Wait()
   397  			return err
   398  		}
   399  		if nextChunkLen == 0 && partNumber > 0 {
   400  			// No need to upload empty part, if file was empty to start
   401  			// with empty single part would of been created and never
   402  			// started multipart upload.
   403  			break
   404  		}
   405  		num := partNumber
   406  		ch <- chunk{partNumber: num, buffer: reader, size: mu.readerSize}
   407  	}
   408  	// Wait for all goroutines finish
   409  	close(ch)
   410  	mu.wg.Wait()
   411  	// Complete Multipart Upload
   412  	return mu.complete()
   413  }