github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/oracleobjectstorage/multipart.go (about) 1 //go:build !plan9 && !solaris && !js 2 3 package oracleobjectstorage 4 5 import ( 6 "context" 7 "crypto/md5" 8 "encoding/base64" 9 "encoding/hex" 10 "fmt" 11 "io" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/ncw/swift/v2" 17 "github.com/rclone/rclone/lib/multipart" 18 "github.com/rclone/rclone/lib/pool" 19 "golang.org/x/net/http/httpguts" 20 21 "github.com/oracle/oci-go-sdk/v65/common" 22 "github.com/oracle/oci-go-sdk/v65/objectstorage" 23 "github.com/rclone/rclone/fs" 24 "github.com/rclone/rclone/fs/chunksize" 25 "github.com/rclone/rclone/fs/hash" 26 ) 27 28 var warnStreamUpload sync.Once 29 30 // Info needed for an upload 31 type uploadInfo struct { 32 req *objectstorage.PutObjectRequest 33 md5sumHex string 34 } 35 36 type objectChunkWriter struct { 37 chunkSize int64 38 size int64 39 f *Fs 40 bucket *string 41 key *string 42 uploadID *string 43 partsToCommit []objectstorage.CommitMultipartUploadPartDetails 44 partsToCommitMu sync.Mutex 45 existingParts map[int]objectstorage.MultipartUploadPartSummary 46 eTag string 47 md5sMu sync.Mutex 48 md5s []byte 49 ui uploadInfo 50 o *Object 51 } 52 53 func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.Reader, options ...fs.OpenOption) error { 54 _, err := multipart.UploadMultipart(ctx, src, in, multipart.UploadMultipartOptions{ 55 Open: o.fs, 56 OpenOptions: options, 57 }) 58 return err 59 } 60 61 // OpenChunkWriter returns the chunk size and a ChunkWriter 62 // 63 // Pass in the remote and the src object 64 // You can also use options to hint at the desired chunk size 65 func (f *Fs) OpenChunkWriter( 66 ctx context.Context, 67 remote string, 68 src fs.ObjectInfo, 69 options ...fs.OpenOption) (info fs.ChunkWriterInfo, writer fs.ChunkWriter, err error) { 70 // Temporary Object under construction 71 o := &Object{ 72 fs: f, 73 remote: remote, 74 } 75 ui, err := o.prepareUpload(ctx, src, options) 76 if err != nil { 77 return info, nil, fmt.Errorf("failed to prepare upload: %w", err) 78 } 79 80 uploadParts := f.opt.MaxUploadParts 81 if uploadParts < 1 { 82 uploadParts = 1 83 } else if uploadParts > maxUploadParts { 84 uploadParts = maxUploadParts 85 } 86 size := src.Size() 87 88 // calculate size of parts 89 chunkSize := f.opt.ChunkSize 90 91 // size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize 92 // buffers here (default 5 MiB). With a maximum number of parts (10,000) this will be a file of 93 // 48 GiB which seems like a not too unreasonable limit. 94 if size == -1 { 95 warnStreamUpload.Do(func() { 96 fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v", 97 f.opt.ChunkSize, fs.SizeSuffix(int64(chunkSize)*int64(uploadParts))) 98 }) 99 } else { 100 chunkSize = chunksize.Calculator(src, size, uploadParts, chunkSize) 101 } 102 103 uploadID, existingParts, err := o.createMultipartUpload(ctx, ui.req) 104 if err != nil { 105 return info, nil, fmt.Errorf("create multipart upload request failed: %w", err) 106 } 107 bucketName, bucketPath := o.split() 108 chunkWriter := &objectChunkWriter{ 109 chunkSize: int64(chunkSize), 110 size: size, 111 f: f, 112 bucket: &bucketName, 113 key: &bucketPath, 114 uploadID: &uploadID, 115 existingParts: existingParts, 116 ui: ui, 117 o: o, 118 } 119 info = fs.ChunkWriterInfo{ 120 ChunkSize: int64(chunkSize), 121 Concurrency: o.fs.opt.UploadConcurrency, 122 LeavePartsOnError: o.fs.opt.LeavePartsOnError, 123 } 124 fs.Debugf(o, "open chunk writer: started multipart upload: %v", uploadID) 125 return info, chunkWriter, err 126 } 127 128 // WriteChunk will write chunk number with reader bytes, where chunk number >= 0 129 func (w *objectChunkWriter) WriteChunk(ctx context.Context, chunkNumber int, reader io.ReadSeeker) (bytesWritten int64, err error) { 130 if chunkNumber < 0 { 131 err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber) 132 return -1, err 133 } 134 // Only account after the checksum reads have been done 135 if do, ok := reader.(pool.DelayAccountinger); ok { 136 // To figure out this number, do a transfer and if the accounted size is 0 or a 137 // multiple of what it should be, increase or decrease this number. 138 do.DelayAccounting(2) 139 } 140 m := md5.New() 141 currentChunkSize, err := io.Copy(m, reader) 142 if err != nil { 143 return -1, err 144 } 145 // If no data read, don't write the chunk 146 if currentChunkSize == 0 { 147 return 0, nil 148 } 149 md5sumBinary := m.Sum([]byte{}) 150 w.addMd5(&md5sumBinary, int64(chunkNumber)) 151 md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:]) 152 153 // Object storage requires 1 <= PartNumber <= 10000 154 ossPartNumber := chunkNumber + 1 155 if existing, ok := w.existingParts[ossPartNumber]; ok { 156 if md5sum == *existing.Md5 { 157 fs.Debugf(w.o, "matched uploaded part found, part num %d, skipping part, md5=%v", *existing.PartNumber, md5sum) 158 w.addCompletedPart(existing.PartNumber, existing.Etag) 159 return currentChunkSize, nil 160 } 161 } 162 req := objectstorage.UploadPartRequest{ 163 NamespaceName: common.String(w.f.opt.Namespace), 164 BucketName: w.bucket, 165 ObjectName: w.key, 166 UploadId: w.uploadID, 167 UploadPartNum: common.Int(ossPartNumber), 168 ContentLength: common.Int64(currentChunkSize), 169 ContentMD5: common.String(md5sum), 170 } 171 w.o.applyPartUploadOptions(w.ui.req, &req) 172 var resp objectstorage.UploadPartResponse 173 err = w.f.pacer.Call(func() (bool, error) { 174 // req.UploadPartBody = io.NopCloser(bytes.NewReader(buf)) 175 // rewind the reader on retry and after reading md5 176 _, err = reader.Seek(0, io.SeekStart) 177 if err != nil { 178 return false, err 179 } 180 req.UploadPartBody = io.NopCloser(reader) 181 resp, err = w.f.srv.UploadPart(ctx, req) 182 if err != nil { 183 if ossPartNumber <= 8 { 184 return shouldRetry(ctx, resp.HTTPResponse(), err) 185 } 186 // retry all chunks once have done the first few 187 return true, err 188 } 189 return false, err 190 }) 191 if err != nil { 192 fs.Errorf(w.o, "multipart upload failed to upload part:%d err: %v", ossPartNumber, err) 193 return -1, fmt.Errorf("multipart upload failed to upload part: %w", err) 194 } 195 w.addCompletedPart(&ossPartNumber, resp.ETag) 196 return currentChunkSize, err 197 198 } 199 200 // add a part number and etag to the completed parts 201 func (w *objectChunkWriter) addCompletedPart(partNum *int, eTag *string) { 202 w.partsToCommitMu.Lock() 203 defer w.partsToCommitMu.Unlock() 204 w.partsToCommit = append(w.partsToCommit, objectstorage.CommitMultipartUploadPartDetails{ 205 PartNum: partNum, 206 Etag: eTag, 207 }) 208 } 209 210 func (w *objectChunkWriter) Close(ctx context.Context) (err error) { 211 req := objectstorage.CommitMultipartUploadRequest{ 212 NamespaceName: common.String(w.f.opt.Namespace), 213 BucketName: w.bucket, 214 ObjectName: w.key, 215 UploadId: w.uploadID, 216 } 217 req.PartsToCommit = w.partsToCommit 218 var resp objectstorage.CommitMultipartUploadResponse 219 err = w.f.pacer.Call(func() (bool, error) { 220 resp, err = w.f.srv.CommitMultipartUpload(ctx, req) 221 // if multipart is corrupted, we will abort the uploadId 222 if isMultiPartUploadCorrupted(err) { 223 fs.Debugf(w.o, "multipart uploadId %v is corrupted, aborting...", *w.uploadID) 224 _ = w.Abort(ctx) 225 return false, err 226 } 227 return shouldRetry(ctx, resp.HTTPResponse(), err) 228 }) 229 if err != nil { 230 return err 231 } 232 w.eTag = *resp.ETag 233 hashOfHashes := md5.Sum(w.md5s) 234 wantMultipartMd5 := fmt.Sprintf("%s-%d", base64.StdEncoding.EncodeToString(hashOfHashes[:]), len(w.partsToCommit)) 235 gotMultipartMd5 := *resp.OpcMultipartMd5 236 if wantMultipartMd5 != gotMultipartMd5 { 237 fs.Errorf(w.o, "multipart upload corrupted: multipart md5 differ: expecting %s but got %s", wantMultipartMd5, gotMultipartMd5) 238 return fmt.Errorf("multipart upload corrupted: md5 differ: expecting %s but got %s", wantMultipartMd5, gotMultipartMd5) 239 } 240 fs.Debugf(w.o, "multipart upload %v md5 matched: expecting %s and got %s", *w.uploadID, wantMultipartMd5, gotMultipartMd5) 241 return nil 242 } 243 244 func isMultiPartUploadCorrupted(err error) bool { 245 if err == nil { 246 return false 247 } 248 // Check if this oci-err object, and if it is multipart commit error 249 if ociError, ok := err.(common.ServiceError); ok { 250 // If it is a timeout then we want to retry that 251 if ociError.GetCode() == "InvalidUploadPart" { 252 return true 253 } 254 } 255 return false 256 } 257 258 func (w *objectChunkWriter) Abort(ctx context.Context) error { 259 fs.Debugf(w.o, "Cancelling multipart upload") 260 err := w.o.fs.abortMultiPartUpload( 261 ctx, 262 w.bucket, 263 w.key, 264 w.uploadID) 265 if err != nil { 266 fs.Debugf(w.o, "Failed to cancel multipart upload: %v", err) 267 } else { 268 fs.Debugf(w.o, "canceled and aborted multipart upload: %v", *w.uploadID) 269 } 270 return err 271 } 272 273 // addMd5 adds a binary md5 to the md5 calculated so far 274 func (w *objectChunkWriter) addMd5(md5binary *[]byte, chunkNumber int64) { 275 w.md5sMu.Lock() 276 defer w.md5sMu.Unlock() 277 start := chunkNumber * md5.Size 278 end := start + md5.Size 279 if extend := end - int64(len(w.md5s)); extend > 0 { 280 w.md5s = append(w.md5s, make([]byte, extend)...) 281 } 282 copy(w.md5s[start:end], (*md5binary)[:]) 283 } 284 285 func (o *Object) prepareUpload(ctx context.Context, src fs.ObjectInfo, options []fs.OpenOption) (ui uploadInfo, err error) { 286 bucket, bucketPath := o.split() 287 288 ui.req = &objectstorage.PutObjectRequest{ 289 NamespaceName: common.String(o.fs.opt.Namespace), 290 BucketName: common.String(bucket), 291 ObjectName: common.String(bucketPath), 292 } 293 294 // Set the mtime in the metadata 295 modTime := src.ModTime(ctx) 296 // Fetch metadata if --metadata is in use 297 meta, err := fs.GetMetadataOptions(ctx, o.fs, src, options) 298 if err != nil { 299 return ui, fmt.Errorf("failed to read metadata from source object: %w", err) 300 } 301 ui.req.OpcMeta = make(map[string]string, len(meta)+2) 302 // merge metadata into request and user metadata 303 for k, v := range meta { 304 pv := common.String(v) 305 k = strings.ToLower(k) 306 switch k { 307 case "cache-control": 308 ui.req.CacheControl = pv 309 case "content-disposition": 310 ui.req.ContentDisposition = pv 311 case "content-encoding": 312 ui.req.ContentEncoding = pv 313 case "content-language": 314 ui.req.ContentLanguage = pv 315 case "content-type": 316 ui.req.ContentType = pv 317 case "tier": 318 // ignore 319 case "mtime": 320 // mtime in meta overrides source ModTime 321 metaModTime, err := time.Parse(time.RFC3339Nano, v) 322 if err != nil { 323 fs.Debugf(o, "failed to parse metadata %s: %q: %v", k, v, err) 324 } else { 325 modTime = metaModTime 326 } 327 case "btime": 328 // write as metadata since we can't set it 329 ui.req.OpcMeta[k] = v 330 default: 331 ui.req.OpcMeta[k] = v 332 } 333 } 334 335 // Set the mtime in the metadata 336 ui.req.OpcMeta[metaMtime] = swift.TimeToFloatString(modTime) 337 338 // read the md5sum if available 339 // - for non-multipart 340 // - so we can add a ContentMD5 341 // - so we can add the md5sum in the metadata as metaMD5Hash if using SSE/SSE-C 342 // - for multipart provided checksums aren't disabled 343 // - so we can add the md5sum in the metadata as metaMD5Hash 344 size := src.Size() 345 isMultipart := size < 0 || size >= int64(o.fs.opt.UploadCutoff) 346 var md5sumBase64 string 347 if !isMultipart || !o.fs.opt.DisableChecksum { 348 ui.md5sumHex, err = src.Hash(ctx, hash.MD5) 349 if err == nil && matchMd5.MatchString(ui.md5sumHex) { 350 hashBytes, err := hex.DecodeString(ui.md5sumHex) 351 if err == nil { 352 md5sumBase64 = base64.StdEncoding.EncodeToString(hashBytes) 353 if isMultipart && !o.fs.opt.DisableChecksum { 354 // Set the md5sum as metadata on the object if 355 // - a multipart upload 356 // - the ETag is not an MD5, e.g. when using SSE/SSE-C 357 // provided checksums aren't disabled 358 ui.req.OpcMeta[metaMD5Hash] = md5sumBase64 359 } 360 } 361 } 362 } 363 // Set the content type if it isn't set already 364 if ui.req.ContentType == nil { 365 ui.req.ContentType = common.String(fs.MimeType(ctx, src)) 366 } 367 if size >= 0 { 368 ui.req.ContentLength = common.Int64(size) 369 } 370 if md5sumBase64 != "" { 371 ui.req.ContentMD5 = &md5sumBase64 372 } 373 o.applyPutOptions(ui.req, options...) 374 useBYOKPutObject(o.fs, ui.req) 375 if o.fs.opt.StorageTier != "" { 376 storageTier, ok := objectstorage.GetMappingPutObjectStorageTierEnum(o.fs.opt.StorageTier) 377 if !ok { 378 return ui, fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier) 379 } 380 ui.req.StorageTier = storageTier 381 } 382 // Check metadata keys and values are valid 383 for key, value := range ui.req.OpcMeta { 384 if !httpguts.ValidHeaderFieldName(key) { 385 fs.Errorf(o, "Dropping invalid metadata key %q", key) 386 delete(ui.req.OpcMeta, key) 387 } else if value == "" { 388 fs.Errorf(o, "Dropping nil metadata value for key %q", key) 389 delete(ui.req.OpcMeta, key) 390 } else if !httpguts.ValidHeaderFieldValue(value) { 391 fs.Errorf(o, "Dropping invalid metadata value %q for key %q", value, key) 392 delete(ui.req.OpcMeta, key) 393 } 394 } 395 return ui, nil 396 } 397 398 func (o *Object) createMultipartUpload(ctx context.Context, putReq *objectstorage.PutObjectRequest) ( 399 uploadID string, existingParts map[int]objectstorage.MultipartUploadPartSummary, err error) { 400 bucketName, bucketPath := o.split() 401 err = o.fs.makeBucket(ctx, bucketName) 402 if err != nil { 403 fs.Errorf(o, "failed to create bucket: %v, err: %v", bucketName, err) 404 return uploadID, existingParts, err 405 } 406 if o.fs.opt.AttemptResumeUpload { 407 fs.Debugf(o, "attempting to resume upload for %v (if any)", o.remote) 408 resumeUploads, err := o.fs.findLatestMultipartUpload(ctx, bucketName, bucketPath) 409 if err == nil && len(resumeUploads) > 0 { 410 uploadID = *resumeUploads[0].UploadId 411 existingParts, err = o.fs.listMultipartUploadParts(ctx, bucketName, bucketPath, uploadID) 412 if err == nil { 413 fs.Debugf(o, "resuming with existing upload id: %v", uploadID) 414 return uploadID, existingParts, err 415 } 416 } 417 } 418 req := objectstorage.CreateMultipartUploadRequest{ 419 NamespaceName: common.String(o.fs.opt.Namespace), 420 BucketName: common.String(bucketName), 421 } 422 req.Object = common.String(bucketPath) 423 if o.fs.opt.StorageTier != "" { 424 storageTier, ok := objectstorage.GetMappingStorageTierEnum(o.fs.opt.StorageTier) 425 if !ok { 426 return "", nil, fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier) 427 } 428 req.StorageTier = storageTier 429 } 430 o.applyMultipartUploadOptions(putReq, &req) 431 432 var resp objectstorage.CreateMultipartUploadResponse 433 err = o.fs.pacer.Call(func() (bool, error) { 434 resp, err = o.fs.srv.CreateMultipartUpload(ctx, req) 435 return shouldRetry(ctx, resp.HTTPResponse(), err) 436 }) 437 if err != nil { 438 return "", existingParts, err 439 } 440 existingParts = make(map[int]objectstorage.MultipartUploadPartSummary) 441 uploadID = *resp.UploadId 442 fs.Debugf(o, "created new upload id: %v", uploadID) 443 return uploadID, existingParts, err 444 }