github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/qingstor/upload.go (about) 1 // Upload object to QingStor 2 3 // +build !plan9 4 5 package qingstor 6 7 import ( 8 "bytes" 9 "crypto/md5" 10 "fmt" 11 "hash" 12 "io" 13 "sort" 14 "sync" 15 16 "github.com/pkg/errors" 17 "github.com/rclone/rclone/fs" 18 qs "github.com/yunify/qingstor-sdk-go/v3/service" 19 ) 20 21 const ( 22 // maxSinglePartSize = 1024 * 1024 * 1024 * 5 // The maximum allowed size when uploading a single object to QingStor 23 // maxMultiPartSize = 1024 * 1024 * 1024 * 1 // The maximum allowed part size when uploading a part to QingStor 24 minMultiPartSize = 1024 * 1024 * 4 // The minimum allowed part size when uploading a part to QingStor 25 maxMultiParts = 10000 // The maximum allowed number of parts in a multi-part upload 26 ) 27 28 const ( 29 defaultUploadPartSize = 1024 * 1024 * 64 // The default part size to buffer chunks of a payload into. 30 defaultUploadConcurrency = 4 // the default number of goroutines to spin up when using multiPartUpload. 31 ) 32 33 func readFillBuf(r io.Reader, b []byte) (offset int, err error) { 34 for offset < len(b) && err == nil { 35 var n int 36 n, err = r.Read(b[offset:]) 37 offset += n 38 } 39 40 return offset, err 41 } 42 43 // uploadInput contains all input for upload requests to QingStor. 44 type uploadInput struct { 45 body io.Reader 46 qsSvc *qs.Service 47 mimeType string 48 zone string 49 bucket string 50 key string 51 partSize int64 52 concurrency int 53 maxUploadParts int 54 } 55 56 // uploader internal structure to manage an upload to QingStor. 57 type uploader struct { 58 cfg *uploadInput 59 totalSize int64 // set to -1 if the size is not known 60 readerPos int64 // current reader position 61 readerSize int64 // current reader content size 62 } 63 64 // newUploader creates a new Uploader instance to upload objects to QingStor. 65 func newUploader(in *uploadInput) *uploader { 66 u := &uploader{ 67 cfg: in, 68 } 69 return u 70 } 71 72 // bucketInit initiate as bucket controller 73 func (u *uploader) bucketInit() (*qs.Bucket, error) { 74 bucketInit, err := u.cfg.qsSvc.Bucket(u.cfg.bucket, u.cfg.zone) 75 return bucketInit, err 76 } 77 78 // String converts uploader to a string 79 func (u *uploader) String() string { 80 return fmt.Sprintf("QingStor bucket %s key %s", u.cfg.bucket, u.cfg.key) 81 } 82 83 // nextReader returns a seekable reader representing the next packet of data. 84 // This operation increases the shared u.readerPos counter, but note that it 85 // does not need to be wrapped in a mutex because nextReader is only called 86 // from the main thread. 87 func (u *uploader) nextReader() (io.ReadSeeker, int, error) { 88 type readerAtSeeker interface { 89 io.ReaderAt 90 io.ReadSeeker 91 } 92 switch r := u.cfg.body.(type) { 93 case readerAtSeeker: 94 var err error 95 n := u.cfg.partSize 96 if u.totalSize >= 0 { 97 bytesLeft := u.totalSize - u.readerPos 98 99 if bytesLeft <= u.cfg.partSize { 100 err = io.EOF 101 n = bytesLeft 102 } 103 } 104 reader := io.NewSectionReader(r, u.readerPos, n) 105 u.readerPos += n 106 u.readerSize = n 107 return reader, int(n), err 108 109 default: 110 part := make([]byte, u.cfg.partSize) 111 n, err := readFillBuf(r, part) 112 u.readerPos += int64(n) 113 u.readerSize = int64(n) 114 return bytes.NewReader(part[0:n]), n, err 115 } 116 } 117 118 // init will initialize all default options. 119 func (u *uploader) init() { 120 if u.cfg.concurrency == 0 { 121 u.cfg.concurrency = defaultUploadConcurrency 122 } 123 if u.cfg.partSize == 0 { 124 u.cfg.partSize = defaultUploadPartSize 125 } 126 if u.cfg.maxUploadParts == 0 { 127 u.cfg.maxUploadParts = maxMultiParts 128 } 129 // Try to get the total size for some optimizations 130 u.totalSize = -1 131 switch r := u.cfg.body.(type) { 132 case io.Seeker: 133 pos, _ := r.Seek(0, io.SeekCurrent) 134 defer func() { 135 _, _ = r.Seek(pos, io.SeekStart) 136 }() 137 138 n, err := r.Seek(0, io.SeekEnd) 139 if err != nil { 140 return 141 } 142 u.totalSize = n 143 144 // Try to adjust partSize if it is too small and account for 145 // integer division truncation. 146 if u.totalSize/u.cfg.partSize >= u.cfg.partSize { 147 // Add one to the part size to account for remainders 148 // during the size calculation. e.g odd number of bytes. 149 u.cfg.partSize = (u.totalSize / int64(u.cfg.maxUploadParts)) + 1 150 } 151 } 152 } 153 154 // singlePartUpload upload a single object that contentLength less than "defaultUploadPartSize" 155 func (u *uploader) singlePartUpload(buf io.Reader, size int64) error { 156 bucketInit, _ := u.bucketInit() 157 158 req := qs.PutObjectInput{ 159 ContentLength: &size, 160 ContentType: &u.cfg.mimeType, 161 Body: buf, 162 } 163 164 _, err := bucketInit.PutObject(u.cfg.key, &req) 165 if err == nil { 166 fs.Debugf(u, "Upload single object finished") 167 } 168 return err 169 } 170 171 // Upload upload an object into QingStor 172 func (u *uploader) upload() error { 173 u.init() 174 175 if u.cfg.partSize < minMultiPartSize { 176 return errors.Errorf("part size must be at least %d bytes", minMultiPartSize) 177 } 178 179 // Do one read to determine if we have more than one part 180 reader, _, err := u.nextReader() 181 if err == io.EOF { // single part 182 fs.Debugf(u, "Uploading as single part object to QingStor") 183 return u.singlePartUpload(reader, u.readerPos) 184 } else if err != nil { 185 return errors.Errorf("read upload data failed: %s", err) 186 } 187 188 fs.Debugf(u, "Uploading as multi-part object to QingStor") 189 mu := multiUploader{uploader: u} 190 return mu.multiPartUpload(reader) 191 } 192 193 // internal structure to manage a specific multipart upload to QingStor. 194 type multiUploader struct { 195 *uploader 196 wg sync.WaitGroup 197 mtx sync.Mutex 198 err error 199 uploadID *string 200 objectParts completedParts 201 hashMd5 hash.Hash 202 } 203 204 // keeps track of a single chunk of data being sent to QingStor. 205 type chunk struct { 206 buffer io.ReadSeeker 207 partNumber int 208 size int64 209 } 210 211 // completedParts is a wrapper to make parts sortable by their part number, 212 // since QingStor required this list to be sent in sorted order. 213 type completedParts []*qs.ObjectPartType 214 215 func (a completedParts) Len() int { return len(a) } 216 func (a completedParts) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 217 func (a completedParts) Less(i, j int) bool { return *a[i].PartNumber < *a[j].PartNumber } 218 219 // String converts multiUploader to a string 220 func (mu *multiUploader) String() string { 221 if uploadID := mu.uploadID; uploadID != nil { 222 return fmt.Sprintf("QingStor bucket %s key %s uploadID %s", mu.cfg.bucket, mu.cfg.key, *uploadID) 223 } 224 return fmt.Sprintf("QingStor bucket %s key %s uploadID <nil>", mu.cfg.bucket, mu.cfg.key) 225 } 226 227 // getErr is a thread-safe getter for the error object 228 func (mu *multiUploader) getErr() error { 229 mu.mtx.Lock() 230 defer mu.mtx.Unlock() 231 return mu.err 232 } 233 234 // setErr is a thread-safe setter for the error object 235 func (mu *multiUploader) setErr(e error) { 236 mu.mtx.Lock() 237 defer mu.mtx.Unlock() 238 mu.err = e 239 } 240 241 // readChunk runs in worker goroutines to pull chunks off of the ch channel 242 // and send() them as UploadPart requests. 243 func (mu *multiUploader) readChunk(ch chan chunk) { 244 defer mu.wg.Done() 245 for { 246 c, ok := <-ch 247 if !ok { 248 break 249 } 250 if mu.getErr() == nil { 251 if err := mu.send(c); err != nil { 252 mu.setErr(err) 253 } 254 } 255 } 256 } 257 258 // initiate init a Multiple Object and obtain UploadID 259 func (mu *multiUploader) initiate() error { 260 bucketInit, _ := mu.bucketInit() 261 req := qs.InitiateMultipartUploadInput{ 262 ContentType: &mu.cfg.mimeType, 263 } 264 fs.Debugf(mu, "Initiating a multi-part upload") 265 rsp, err := bucketInit.InitiateMultipartUpload(mu.cfg.key, &req) 266 if err == nil { 267 mu.uploadID = rsp.UploadID 268 mu.hashMd5 = md5.New() 269 } 270 return err 271 } 272 273 // send upload a part into QingStor 274 func (mu *multiUploader) send(c chunk) error { 275 bucketInit, _ := mu.bucketInit() 276 req := qs.UploadMultipartInput{ 277 PartNumber: &c.partNumber, 278 UploadID: mu.uploadID, 279 ContentLength: &c.size, 280 Body: c.buffer, 281 } 282 fs.Debugf(mu, "Uploading a part to QingStor with partNumber %d and partSize %d", c.partNumber, c.size) 283 _, err := bucketInit.UploadMultipart(mu.cfg.key, &req) 284 if err != nil { 285 return err 286 } 287 fs.Debugf(mu, "Done uploading part partNumber %d and partSize %d", c.partNumber, c.size) 288 289 mu.mtx.Lock() 290 defer mu.mtx.Unlock() 291 292 _, _ = c.buffer.Seek(0, 0) 293 _, _ = io.Copy(mu.hashMd5, c.buffer) 294 295 parts := qs.ObjectPartType{PartNumber: &c.partNumber, Size: &c.size} 296 mu.objectParts = append(mu.objectParts, &parts) 297 return err 298 } 299 300 // complete complete a multipart upload 301 func (mu *multiUploader) complete() error { 302 var err error 303 if err = mu.getErr(); err != nil { 304 return err 305 } 306 bucketInit, _ := mu.bucketInit() 307 //if err = mu.list(); err != nil { 308 // return err 309 //} 310 //md5String := fmt.Sprintf("\"%s\"", hex.EncodeToString(mu.hashMd5.Sum(nil))) 311 312 md5String := fmt.Sprintf("\"%x\"", mu.hashMd5.Sum(nil)) 313 sort.Sort(mu.objectParts) 314 req := qs.CompleteMultipartUploadInput{ 315 UploadID: mu.uploadID, 316 ObjectParts: mu.objectParts, 317 ETag: &md5String, 318 } 319 fs.Debugf(mu, "Completing multi-part object") 320 _, err = bucketInit.CompleteMultipartUpload(mu.cfg.key, &req) 321 if err == nil { 322 fs.Debugf(mu, "Complete multi-part finished") 323 } 324 return err 325 } 326 327 // abort abort a multipart upload 328 func (mu *multiUploader) abort() error { 329 var err error 330 bucketInit, _ := mu.bucketInit() 331 332 if uploadID := mu.uploadID; uploadID != nil { 333 req := qs.AbortMultipartUploadInput{ 334 UploadID: uploadID, 335 } 336 fs.Debugf(mu, "Aborting multi-part object %q", *uploadID) 337 _, err = bucketInit.AbortMultipartUpload(mu.cfg.key, &req) 338 } 339 340 return err 341 } 342 343 // multiPartUpload upload a multiple object into QingStor 344 func (mu *multiUploader) multiPartUpload(firstBuf io.ReadSeeker) (err error) { 345 // Initiate a multi-part upload 346 if err = mu.initiate(); err != nil { 347 return err 348 } 349 defer func() { 350 // Abort the transfer if returning an error 351 if err != nil { 352 _ = mu.abort() 353 } 354 }() 355 356 ch := make(chan chunk, mu.cfg.concurrency) 357 for i := 0; i < mu.cfg.concurrency; i++ { 358 mu.wg.Add(1) 359 go mu.readChunk(ch) 360 } 361 362 var partNumber int 363 ch <- chunk{partNumber: partNumber, buffer: firstBuf, size: mu.readerSize} 364 365 for mu.getErr() == nil { 366 partNumber++ 367 // This upload exceeded maximum number of supported parts, error now. 368 if partNumber > mu.cfg.maxUploadParts || partNumber > maxMultiParts { 369 var msg string 370 if partNumber > mu.cfg.maxUploadParts { 371 msg = fmt.Sprintf("exceeded total allowed configured maxUploadParts (%d). "+ 372 "Adjust PartSize to fit in this limit", mu.cfg.maxUploadParts) 373 } else { 374 msg = fmt.Sprintf("exceeded total allowed QingStor limit maxUploadParts (%d). "+ 375 "Adjust PartSize to fit in this limit", maxMultiParts) 376 } 377 mu.setErr(errors.New(msg)) 378 break 379 } 380 381 var reader io.ReadSeeker 382 var nextChunkLen int 383 reader, nextChunkLen, err = mu.nextReader() 384 if err != nil && err != io.EOF { 385 // empty ch 386 go func() { 387 for range ch { 388 } 389 }() 390 // Wait for all goroutines finish 391 close(ch) 392 mu.wg.Wait() 393 return err 394 } 395 if nextChunkLen == 0 && partNumber > 0 { 396 // No need to upload empty part, if file was empty to start 397 // with empty single part would of been created and never 398 // started multipart upload. 399 break 400 } 401 num := partNumber 402 ch <- chunk{partNumber: num, buffer: reader, size: mu.readerSize} 403 } 404 // Wait for all goroutines finish 405 close(ch) 406 mu.wg.Wait() 407 // Complete Multipart Upload 408 return mu.complete() 409 }