github.com/artpar/rclone@v1.67.3/backend/qingstor/upload.go (about) 1 // Upload object to QingStor 2 3 //go:build !plan9 && !js 4 5 package qingstor 6 7 import ( 8 "bytes" 9 "crypto/md5" 10 "errors" 11 "fmt" 12 "hash" 13 "io" 14 "sort" 15 "sync" 16 17 "github.com/artpar/rclone/fs" 18 "github.com/artpar/rclone/lib/atexit" 19 qs "github.com/yunify/qingstor-sdk-go/v3/service" 20 ) 21 22 const ( 23 // maxSinglePartSize = 1024 * 1024 * 1024 * 5 // The maximum allowed size when uploading a single object to QingStor 24 // maxMultiPartSize = 1024 * 1024 * 1024 * 1 // The maximum allowed part size when uploading a part to QingStor 25 minMultiPartSize = 1024 * 1024 * 4 // The minimum allowed part size when uploading a part to QingStor 26 maxMultiParts = 10000 // The maximum allowed number of parts in a multi-part upload 27 ) 28 29 const ( 30 defaultUploadPartSize = 1024 * 1024 * 64 // The default part size to buffer chunks of a payload into. 31 defaultUploadConcurrency = 4 // the default number of goroutines to spin up when using multiPartUpload. 32 ) 33 34 func readFillBuf(r io.Reader, b []byte) (offset int, err error) { 35 for offset < len(b) && err == nil { 36 var n int 37 n, err = r.Read(b[offset:]) 38 offset += n 39 } 40 41 return offset, err 42 } 43 44 // uploadInput contains all input for upload requests to QingStor. 45 type uploadInput struct { 46 body io.Reader 47 qsSvc *qs.Service 48 mimeType string 49 zone string 50 bucket string 51 key string 52 partSize int64 53 concurrency int 54 maxUploadParts int 55 } 56 57 // uploader internal structure to manage an upload to QingStor. 58 type uploader struct { 59 cfg *uploadInput 60 totalSize int64 // set to -1 if the size is not known 61 readerPos int64 // current reader position 62 readerSize int64 // current reader content size 63 } 64 65 // newUploader creates a new Uploader instance to upload objects to QingStor. 66 func newUploader(in *uploadInput) *uploader { 67 u := &uploader{ 68 cfg: in, 69 } 70 return u 71 } 72 73 // bucketInit initiate as bucket controller 74 func (u *uploader) bucketInit() (*qs.Bucket, error) { 75 bucketInit, err := u.cfg.qsSvc.Bucket(u.cfg.bucket, u.cfg.zone) 76 return bucketInit, err 77 } 78 79 // String converts uploader to a string 80 func (u *uploader) String() string { 81 return fmt.Sprintf("QingStor bucket %s key %s", u.cfg.bucket, u.cfg.key) 82 } 83 84 // nextReader returns a seekable reader representing the next packet of data. 85 // This operation increases the shared u.readerPos counter, but note that it 86 // does not need to be wrapped in a mutex because nextReader is only called 87 // from the main thread. 88 func (u *uploader) nextReader() (io.ReadSeeker, int, error) { 89 type readerAtSeeker interface { 90 io.ReaderAt 91 io.ReadSeeker 92 } 93 switch r := u.cfg.body.(type) { 94 case readerAtSeeker: 95 var err error 96 n := u.cfg.partSize 97 if u.totalSize >= 0 { 98 bytesLeft := u.totalSize - u.readerPos 99 100 if bytesLeft <= u.cfg.partSize { 101 err = io.EOF 102 n = bytesLeft 103 } 104 } 105 reader := io.NewSectionReader(r, u.readerPos, n) 106 u.readerPos += n 107 u.readerSize = n 108 return reader, int(n), err 109 110 default: 111 part := make([]byte, u.cfg.partSize) 112 n, err := readFillBuf(r, part) 113 u.readerPos += int64(n) 114 u.readerSize = int64(n) 115 return bytes.NewReader(part[0:n]), n, err 116 } 117 } 118 119 // init will initialize all default options. 120 func (u *uploader) init() { 121 if u.cfg.concurrency == 0 { 122 u.cfg.concurrency = defaultUploadConcurrency 123 } 124 if u.cfg.partSize == 0 { 125 u.cfg.partSize = defaultUploadPartSize 126 } 127 if u.cfg.maxUploadParts == 0 { 128 u.cfg.maxUploadParts = maxMultiParts 129 } 130 // Try to get the total size for some optimizations 131 u.totalSize = -1 132 switch r := u.cfg.body.(type) { 133 case io.Seeker: 134 pos, _ := r.Seek(0, io.SeekCurrent) 135 defer func() { 136 _, _ = r.Seek(pos, io.SeekStart) 137 }() 138 139 n, err := r.Seek(0, io.SeekEnd) 140 if err != nil { 141 return 142 } 143 u.totalSize = n 144 145 // Try to adjust partSize if it is too small and account for 146 // integer division truncation. 147 if u.totalSize/u.cfg.partSize >= u.cfg.partSize { 148 // Add one to the part size to account for remainders 149 // during the size calculation. e.g odd number of bytes. 150 u.cfg.partSize = (u.totalSize / int64(u.cfg.maxUploadParts)) + 1 151 } 152 } 153 } 154 155 // singlePartUpload upload a single object that contentLength less than "defaultUploadPartSize" 156 func (u *uploader) singlePartUpload(buf io.Reader, size int64) error { 157 bucketInit, _ := u.bucketInit() 158 159 req := qs.PutObjectInput{ 160 ContentLength: &size, 161 ContentType: &u.cfg.mimeType, 162 Body: buf, 163 } 164 165 _, err := bucketInit.PutObject(u.cfg.key, &req) 166 if err == nil { 167 fs.Debugf(u, "Upload single object finished") 168 } 169 return err 170 } 171 172 // Upload upload an object into QingStor 173 func (u *uploader) upload() error { 174 u.init() 175 176 if u.cfg.partSize < minMultiPartSize { 177 return fmt.Errorf("part size must be at least %d bytes", minMultiPartSize) 178 } 179 180 // Do one read to determine if we have more than one part 181 reader, _, err := u.nextReader() 182 if err == io.EOF { // single part 183 fs.Debugf(u, "Uploading as single part object to QingStor") 184 return u.singlePartUpload(reader, u.readerPos) 185 } else if err != nil { 186 return fmt.Errorf("read upload data failed: %w", err) 187 } 188 189 fs.Debugf(u, "Uploading as multi-part object to QingStor") 190 mu := multiUploader{uploader: u} 191 return mu.multiPartUpload(reader) 192 } 193 194 // internal structure to manage a specific multipart upload to QingStor. 195 type multiUploader struct { 196 *uploader 197 wg sync.WaitGroup 198 mtx sync.Mutex 199 err error 200 uploadID *string 201 objectParts completedParts 202 hashMd5 hash.Hash 203 } 204 205 // keeps track of a single chunk of data being sent to QingStor. 206 type chunk struct { 207 buffer io.ReadSeeker 208 partNumber int 209 size int64 210 } 211 212 // completedParts is a wrapper to make parts sortable by their part number, 213 // since QingStor required this list to be sent in sorted order. 214 type completedParts []*qs.ObjectPartType 215 216 func (a completedParts) Len() int { return len(a) } 217 func (a completedParts) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 218 func (a completedParts) Less(i, j int) bool { return *a[i].PartNumber < *a[j].PartNumber } 219 220 // String converts multiUploader to a string 221 func (mu *multiUploader) String() string { 222 if uploadID := mu.uploadID; uploadID != nil { 223 return fmt.Sprintf("QingStor bucket %s key %s uploadID %s", mu.cfg.bucket, mu.cfg.key, *uploadID) 224 } 225 return fmt.Sprintf("QingStor bucket %s key %s uploadID <nil>", mu.cfg.bucket, mu.cfg.key) 226 } 227 228 // getErr is a thread-safe getter for the error object 229 func (mu *multiUploader) getErr() error { 230 mu.mtx.Lock() 231 defer mu.mtx.Unlock() 232 return mu.err 233 } 234 235 // setErr is a thread-safe setter for the error object 236 func (mu *multiUploader) setErr(e error) { 237 mu.mtx.Lock() 238 defer mu.mtx.Unlock() 239 mu.err = e 240 } 241 242 // readChunk runs in worker goroutines to pull chunks off of the ch channel 243 // and send() them as UploadPart requests. 244 func (mu *multiUploader) readChunk(ch chan chunk) { 245 defer mu.wg.Done() 246 for { 247 c, ok := <-ch 248 if !ok { 249 break 250 } 251 if mu.getErr() == nil { 252 if err := mu.send(c); err != nil { 253 mu.setErr(err) 254 } 255 } 256 } 257 } 258 259 // initiate init a Multiple Object and obtain UploadID 260 func (mu *multiUploader) initiate() error { 261 bucketInit, _ := mu.bucketInit() 262 req := qs.InitiateMultipartUploadInput{ 263 ContentType: &mu.cfg.mimeType, 264 } 265 fs.Debugf(mu, "Initiating a multi-part upload") 266 rsp, err := bucketInit.InitiateMultipartUpload(mu.cfg.key, &req) 267 if err == nil { 268 mu.uploadID = rsp.UploadID 269 mu.hashMd5 = md5.New() 270 } 271 return err 272 } 273 274 // send upload a part into QingStor 275 func (mu *multiUploader) send(c chunk) error { 276 bucketInit, _ := mu.bucketInit() 277 req := qs.UploadMultipartInput{ 278 PartNumber: &c.partNumber, 279 UploadID: mu.uploadID, 280 ContentLength: &c.size, 281 Body: c.buffer, 282 } 283 fs.Debugf(mu, "Uploading a part to QingStor with partNumber %d and partSize %d", c.partNumber, c.size) 284 _, err := bucketInit.UploadMultipart(mu.cfg.key, &req) 285 if err != nil { 286 return err 287 } 288 fs.Debugf(mu, "Done uploading part partNumber %d and partSize %d", c.partNumber, c.size) 289 290 mu.mtx.Lock() 291 defer mu.mtx.Unlock() 292 293 _, _ = c.buffer.Seek(0, 0) 294 _, _ = io.Copy(mu.hashMd5, c.buffer) 295 296 parts := qs.ObjectPartType{PartNumber: &c.partNumber, Size: &c.size} 297 mu.objectParts = append(mu.objectParts, &parts) 298 return err 299 } 300 301 // complete complete a multipart upload 302 func (mu *multiUploader) complete() error { 303 var err error 304 if err = mu.getErr(); err != nil { 305 return err 306 } 307 bucketInit, _ := mu.bucketInit() 308 //if err = mu.list(); err != nil { 309 // return err 310 //} 311 //md5String := fmt.Sprintf("\"%s\"", hex.EncodeToString(mu.hashMd5.Sum(nil))) 312 313 md5String := fmt.Sprintf("\"%x\"", mu.hashMd5.Sum(nil)) 314 sort.Sort(mu.objectParts) 315 req := qs.CompleteMultipartUploadInput{ 316 UploadID: mu.uploadID, 317 ObjectParts: mu.objectParts, 318 ETag: &md5String, 319 } 320 fs.Debugf(mu, "Completing multi-part object") 321 _, err = bucketInit.CompleteMultipartUpload(mu.cfg.key, &req) 322 if err == nil { 323 fs.Debugf(mu, "Complete multi-part finished") 324 } 325 return err 326 } 327 328 // abort abort a multipart upload 329 func (mu *multiUploader) abort() error { 330 var err error 331 bucketInit, _ := mu.bucketInit() 332 333 if uploadID := mu.uploadID; uploadID != nil { 334 req := qs.AbortMultipartUploadInput{ 335 UploadID: uploadID, 336 } 337 fs.Debugf(mu, "Aborting multi-part object %q", *uploadID) 338 _, err = bucketInit.AbortMultipartUpload(mu.cfg.key, &req) 339 } 340 341 return err 342 } 343 344 // multiPartUpload upload a multiple object into QingStor 345 func (mu *multiUploader) multiPartUpload(firstBuf io.ReadSeeker) (err error) { 346 // Initiate a multi-part upload 347 if err = mu.initiate(); err != nil { 348 return err 349 } 350 351 // Cancel the session if something went wrong 352 defer atexit.OnError(&err, func() { 353 fs.Debugf(mu, "Cancelling multipart upload: %v", err) 354 cancelErr := mu.abort() 355 if cancelErr != nil { 356 fs.Logf(mu, "Failed to cancel multipart upload: %v", cancelErr) 357 } 358 })() 359 360 ch := make(chan chunk, mu.cfg.concurrency) 361 for i := 0; i < mu.cfg.concurrency; i++ { 362 mu.wg.Add(1) 363 go mu.readChunk(ch) 364 } 365 366 var partNumber int 367 ch <- chunk{partNumber: partNumber, buffer: firstBuf, size: mu.readerSize} 368 369 for mu.getErr() == nil { 370 partNumber++ 371 // This upload exceeded maximum number of supported parts, error now. 372 if partNumber > mu.cfg.maxUploadParts || partNumber > maxMultiParts { 373 var msg string 374 if partNumber > mu.cfg.maxUploadParts { 375 msg = fmt.Sprintf("exceeded total allowed configured maxUploadParts (%d). "+ 376 "Adjust PartSize to fit in this limit", mu.cfg.maxUploadParts) 377 } else { 378 msg = fmt.Sprintf("exceeded total allowed QingStor limit maxUploadParts (%d). "+ 379 "Adjust PartSize to fit in this limit", maxMultiParts) 380 } 381 mu.setErr(errors.New(msg)) 382 break 383 } 384 385 var reader io.ReadSeeker 386 var nextChunkLen int 387 reader, nextChunkLen, err = mu.nextReader() 388 if err != nil && err != io.EOF { 389 // empty ch 390 go func() { 391 for range ch { 392 } 393 }() 394 // Wait for all goroutines finish 395 close(ch) 396 mu.wg.Wait() 397 return err 398 } 399 if nextChunkLen == 0 && partNumber > 0 { 400 // No need to upload empty part, if file was empty to start 401 // with empty single part would of been created and never 402 // started multipart upload. 403 break 404 } 405 num := partNumber 406 ch <- chunk{partNumber: num, buffer: reader, size: mu.readerSize} 407 } 408 // Wait for all goroutines finish 409 close(ch) 410 mu.wg.Wait() 411 // Complete Multipart Upload 412 return mu.complete() 413 }