github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/oracleobjectstorage/multipart.go (about)

     1  //go:build !plan9 && !solaris && !js
     2  
     3  package oracleobjectstorage
     4  
     5  import (
     6  	"context"
     7  	"crypto/md5"
     8  	"encoding/base64"
     9  	"encoding/hex"
    10  	"fmt"
    11  	"io"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/ncw/swift/v2"
    17  	"github.com/rclone/rclone/lib/multipart"
    18  	"github.com/rclone/rclone/lib/pool"
    19  	"golang.org/x/net/http/httpguts"
    20  
    21  	"github.com/oracle/oci-go-sdk/v65/common"
    22  	"github.com/oracle/oci-go-sdk/v65/objectstorage"
    23  	"github.com/rclone/rclone/fs"
    24  	"github.com/rclone/rclone/fs/chunksize"
    25  	"github.com/rclone/rclone/fs/hash"
    26  )
    27  
    28  var warnStreamUpload sync.Once
    29  
    30  // Info needed for an upload
    31  type uploadInfo struct {
    32  	req       *objectstorage.PutObjectRequest
    33  	md5sumHex string
    34  }
    35  
    36  type objectChunkWriter struct {
    37  	chunkSize       int64
    38  	size            int64
    39  	f               *Fs
    40  	bucket          *string
    41  	key             *string
    42  	uploadID        *string
    43  	partsToCommit   []objectstorage.CommitMultipartUploadPartDetails
    44  	partsToCommitMu sync.Mutex
    45  	existingParts   map[int]objectstorage.MultipartUploadPartSummary
    46  	eTag            string
    47  	md5sMu          sync.Mutex
    48  	md5s            []byte
    49  	ui              uploadInfo
    50  	o               *Object
    51  }
    52  
    53  func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.Reader, options ...fs.OpenOption) error {
    54  	_, err := multipart.UploadMultipart(ctx, src, in, multipart.UploadMultipartOptions{
    55  		Open:        o.fs,
    56  		OpenOptions: options,
    57  	})
    58  	return err
    59  }
    60  
    61  // OpenChunkWriter returns the chunk size and a ChunkWriter
    62  //
    63  // Pass in the remote and the src object
    64  // You can also use options to hint at the desired chunk size
    65  func (f *Fs) OpenChunkWriter(
    66  	ctx context.Context,
    67  	remote string,
    68  	src fs.ObjectInfo,
    69  	options ...fs.OpenOption) (info fs.ChunkWriterInfo, writer fs.ChunkWriter, err error) {
    70  	// Temporary Object under construction
    71  	o := &Object{
    72  		fs:     f,
    73  		remote: remote,
    74  	}
    75  	ui, err := o.prepareUpload(ctx, src, options)
    76  	if err != nil {
    77  		return info, nil, fmt.Errorf("failed to prepare upload: %w", err)
    78  	}
    79  
    80  	uploadParts := f.opt.MaxUploadParts
    81  	if uploadParts < 1 {
    82  		uploadParts = 1
    83  	} else if uploadParts > maxUploadParts {
    84  		uploadParts = maxUploadParts
    85  	}
    86  	size := src.Size()
    87  
    88  	// calculate size of parts
    89  	chunkSize := f.opt.ChunkSize
    90  
    91  	// size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize
    92  	// buffers here (default 5 MiB). With a maximum number of parts (10,000) this will be a file of
    93  	// 48 GiB which seems like a not too unreasonable limit.
    94  	if size == -1 {
    95  		warnStreamUpload.Do(func() {
    96  			fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v",
    97  				f.opt.ChunkSize, fs.SizeSuffix(int64(chunkSize)*int64(uploadParts)))
    98  		})
    99  	} else {
   100  		chunkSize = chunksize.Calculator(src, size, uploadParts, chunkSize)
   101  	}
   102  
   103  	uploadID, existingParts, err := o.createMultipartUpload(ctx, ui.req)
   104  	if err != nil {
   105  		return info, nil, fmt.Errorf("create multipart upload request failed: %w", err)
   106  	}
   107  	bucketName, bucketPath := o.split()
   108  	chunkWriter := &objectChunkWriter{
   109  		chunkSize:     int64(chunkSize),
   110  		size:          size,
   111  		f:             f,
   112  		bucket:        &bucketName,
   113  		key:           &bucketPath,
   114  		uploadID:      &uploadID,
   115  		existingParts: existingParts,
   116  		ui:            ui,
   117  		o:             o,
   118  	}
   119  	info = fs.ChunkWriterInfo{
   120  		ChunkSize:         int64(chunkSize),
   121  		Concurrency:       o.fs.opt.UploadConcurrency,
   122  		LeavePartsOnError: o.fs.opt.LeavePartsOnError,
   123  	}
   124  	fs.Debugf(o, "open chunk writer: started multipart upload: %v", uploadID)
   125  	return info, chunkWriter, err
   126  }
   127  
   128  // WriteChunk will write chunk number with reader bytes, where chunk number >= 0
   129  func (w *objectChunkWriter) WriteChunk(ctx context.Context, chunkNumber int, reader io.ReadSeeker) (bytesWritten int64, err error) {
   130  	if chunkNumber < 0 {
   131  		err := fmt.Errorf("invalid chunk number provided: %v", chunkNumber)
   132  		return -1, err
   133  	}
   134  	// Only account after the checksum reads have been done
   135  	if do, ok := reader.(pool.DelayAccountinger); ok {
   136  		// To figure out this number, do a transfer and if the accounted size is 0 or a
   137  		// multiple of what it should be, increase or decrease this number.
   138  		do.DelayAccounting(2)
   139  	}
   140  	m := md5.New()
   141  	currentChunkSize, err := io.Copy(m, reader)
   142  	if err != nil {
   143  		return -1, err
   144  	}
   145  	// If no data read, don't write the chunk
   146  	if currentChunkSize == 0 {
   147  		return 0, nil
   148  	}
   149  	md5sumBinary := m.Sum([]byte{})
   150  	w.addMd5(&md5sumBinary, int64(chunkNumber))
   151  	md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:])
   152  
   153  	// Object storage requires 1 <= PartNumber <= 10000
   154  	ossPartNumber := chunkNumber + 1
   155  	if existing, ok := w.existingParts[ossPartNumber]; ok {
   156  		if md5sum == *existing.Md5 {
   157  			fs.Debugf(w.o, "matched uploaded part found, part num %d, skipping part, md5=%v", *existing.PartNumber, md5sum)
   158  			w.addCompletedPart(existing.PartNumber, existing.Etag)
   159  			return currentChunkSize, nil
   160  		}
   161  	}
   162  	req := objectstorage.UploadPartRequest{
   163  		NamespaceName: common.String(w.f.opt.Namespace),
   164  		BucketName:    w.bucket,
   165  		ObjectName:    w.key,
   166  		UploadId:      w.uploadID,
   167  		UploadPartNum: common.Int(ossPartNumber),
   168  		ContentLength: common.Int64(currentChunkSize),
   169  		ContentMD5:    common.String(md5sum),
   170  	}
   171  	w.o.applyPartUploadOptions(w.ui.req, &req)
   172  	var resp objectstorage.UploadPartResponse
   173  	err = w.f.pacer.Call(func() (bool, error) {
   174  		// req.UploadPartBody = io.NopCloser(bytes.NewReader(buf))
   175  		// rewind the reader on retry and after reading md5
   176  		_, err = reader.Seek(0, io.SeekStart)
   177  		if err != nil {
   178  			return false, err
   179  		}
   180  		req.UploadPartBody = io.NopCloser(reader)
   181  		resp, err = w.f.srv.UploadPart(ctx, req)
   182  		if err != nil {
   183  			if ossPartNumber <= 8 {
   184  				return shouldRetry(ctx, resp.HTTPResponse(), err)
   185  			}
   186  			// retry all chunks once have done the first few
   187  			return true, err
   188  		}
   189  		return false, err
   190  	})
   191  	if err != nil {
   192  		fs.Errorf(w.o, "multipart upload failed to upload part:%d err: %v", ossPartNumber, err)
   193  		return -1, fmt.Errorf("multipart upload failed to upload part: %w", err)
   194  	}
   195  	w.addCompletedPart(&ossPartNumber, resp.ETag)
   196  	return currentChunkSize, err
   197  
   198  }
   199  
   200  // add a part number and etag to the completed parts
   201  func (w *objectChunkWriter) addCompletedPart(partNum *int, eTag *string) {
   202  	w.partsToCommitMu.Lock()
   203  	defer w.partsToCommitMu.Unlock()
   204  	w.partsToCommit = append(w.partsToCommit, objectstorage.CommitMultipartUploadPartDetails{
   205  		PartNum: partNum,
   206  		Etag:    eTag,
   207  	})
   208  }
   209  
   210  func (w *objectChunkWriter) Close(ctx context.Context) (err error) {
   211  	req := objectstorage.CommitMultipartUploadRequest{
   212  		NamespaceName: common.String(w.f.opt.Namespace),
   213  		BucketName:    w.bucket,
   214  		ObjectName:    w.key,
   215  		UploadId:      w.uploadID,
   216  	}
   217  	req.PartsToCommit = w.partsToCommit
   218  	var resp objectstorage.CommitMultipartUploadResponse
   219  	err = w.f.pacer.Call(func() (bool, error) {
   220  		resp, err = w.f.srv.CommitMultipartUpload(ctx, req)
   221  		// if multipart is corrupted, we will abort the uploadId
   222  		if isMultiPartUploadCorrupted(err) {
   223  			fs.Debugf(w.o, "multipart uploadId %v is corrupted, aborting...", *w.uploadID)
   224  			_ = w.Abort(ctx)
   225  			return false, err
   226  		}
   227  		return shouldRetry(ctx, resp.HTTPResponse(), err)
   228  	})
   229  	if err != nil {
   230  		return err
   231  	}
   232  	w.eTag = *resp.ETag
   233  	hashOfHashes := md5.Sum(w.md5s)
   234  	wantMultipartMd5 := fmt.Sprintf("%s-%d", base64.StdEncoding.EncodeToString(hashOfHashes[:]), len(w.partsToCommit))
   235  	gotMultipartMd5 := *resp.OpcMultipartMd5
   236  	if wantMultipartMd5 != gotMultipartMd5 {
   237  		fs.Errorf(w.o, "multipart upload corrupted: multipart md5 differ: expecting %s but got %s", wantMultipartMd5, gotMultipartMd5)
   238  		return fmt.Errorf("multipart upload corrupted: md5 differ: expecting %s but got %s", wantMultipartMd5, gotMultipartMd5)
   239  	}
   240  	fs.Debugf(w.o, "multipart upload %v md5 matched: expecting %s and got %s", *w.uploadID, wantMultipartMd5, gotMultipartMd5)
   241  	return nil
   242  }
   243  
   244  func isMultiPartUploadCorrupted(err error) bool {
   245  	if err == nil {
   246  		return false
   247  	}
   248  	// Check if this oci-err object, and if it is multipart commit error
   249  	if ociError, ok := err.(common.ServiceError); ok {
   250  		// If it is a timeout then we want to retry that
   251  		if ociError.GetCode() == "InvalidUploadPart" {
   252  			return true
   253  		}
   254  	}
   255  	return false
   256  }
   257  
   258  func (w *objectChunkWriter) Abort(ctx context.Context) error {
   259  	fs.Debugf(w.o, "Cancelling multipart upload")
   260  	err := w.o.fs.abortMultiPartUpload(
   261  		ctx,
   262  		w.bucket,
   263  		w.key,
   264  		w.uploadID)
   265  	if err != nil {
   266  		fs.Debugf(w.o, "Failed to cancel multipart upload: %v", err)
   267  	} else {
   268  		fs.Debugf(w.o, "canceled and aborted multipart upload: %v", *w.uploadID)
   269  	}
   270  	return err
   271  }
   272  
   273  // addMd5 adds a binary md5 to the md5 calculated so far
   274  func (w *objectChunkWriter) addMd5(md5binary *[]byte, chunkNumber int64) {
   275  	w.md5sMu.Lock()
   276  	defer w.md5sMu.Unlock()
   277  	start := chunkNumber * md5.Size
   278  	end := start + md5.Size
   279  	if extend := end - int64(len(w.md5s)); extend > 0 {
   280  		w.md5s = append(w.md5s, make([]byte, extend)...)
   281  	}
   282  	copy(w.md5s[start:end], (*md5binary)[:])
   283  }
   284  
   285  func (o *Object) prepareUpload(ctx context.Context, src fs.ObjectInfo, options []fs.OpenOption) (ui uploadInfo, err error) {
   286  	bucket, bucketPath := o.split()
   287  
   288  	ui.req = &objectstorage.PutObjectRequest{
   289  		NamespaceName: common.String(o.fs.opt.Namespace),
   290  		BucketName:    common.String(bucket),
   291  		ObjectName:    common.String(bucketPath),
   292  	}
   293  
   294  	// Set the mtime in the metadata
   295  	modTime := src.ModTime(ctx)
   296  	// Fetch metadata if --metadata is in use
   297  	meta, err := fs.GetMetadataOptions(ctx, o.fs, src, options)
   298  	if err != nil {
   299  		return ui, fmt.Errorf("failed to read metadata from source object: %w", err)
   300  	}
   301  	ui.req.OpcMeta = make(map[string]string, len(meta)+2)
   302  	// merge metadata into request and user metadata
   303  	for k, v := range meta {
   304  		pv := common.String(v)
   305  		k = strings.ToLower(k)
   306  		switch k {
   307  		case "cache-control":
   308  			ui.req.CacheControl = pv
   309  		case "content-disposition":
   310  			ui.req.ContentDisposition = pv
   311  		case "content-encoding":
   312  			ui.req.ContentEncoding = pv
   313  		case "content-language":
   314  			ui.req.ContentLanguage = pv
   315  		case "content-type":
   316  			ui.req.ContentType = pv
   317  		case "tier":
   318  			// ignore
   319  		case "mtime":
   320  			// mtime in meta overrides source ModTime
   321  			metaModTime, err := time.Parse(time.RFC3339Nano, v)
   322  			if err != nil {
   323  				fs.Debugf(o, "failed to parse metadata %s: %q: %v", k, v, err)
   324  			} else {
   325  				modTime = metaModTime
   326  			}
   327  		case "btime":
   328  			// write as metadata since we can't set it
   329  			ui.req.OpcMeta[k] = v
   330  		default:
   331  			ui.req.OpcMeta[k] = v
   332  		}
   333  	}
   334  
   335  	// Set the mtime in the metadata
   336  	ui.req.OpcMeta[metaMtime] = swift.TimeToFloatString(modTime)
   337  
   338  	// read the md5sum if available
   339  	// - for non-multipart
   340  	//    - so we can add a ContentMD5
   341  	//    - so we can add the md5sum in the metadata as metaMD5Hash if using SSE/SSE-C
   342  	// - for multipart provided checksums aren't disabled
   343  	//    - so we can add the md5sum in the metadata as metaMD5Hash
   344  	size := src.Size()
   345  	isMultipart := size < 0 || size >= int64(o.fs.opt.UploadCutoff)
   346  	var md5sumBase64 string
   347  	if !isMultipart || !o.fs.opt.DisableChecksum {
   348  		ui.md5sumHex, err = src.Hash(ctx, hash.MD5)
   349  		if err == nil && matchMd5.MatchString(ui.md5sumHex) {
   350  			hashBytes, err := hex.DecodeString(ui.md5sumHex)
   351  			if err == nil {
   352  				md5sumBase64 = base64.StdEncoding.EncodeToString(hashBytes)
   353  				if isMultipart && !o.fs.opt.DisableChecksum {
   354  					// Set the md5sum as metadata on the object if
   355  					// - a multipart upload
   356  					// - the ETag is not an MD5, e.g. when using SSE/SSE-C
   357  					// provided checksums aren't disabled
   358  					ui.req.OpcMeta[metaMD5Hash] = md5sumBase64
   359  				}
   360  			}
   361  		}
   362  	}
   363  	// Set the content type if it isn't set already
   364  	if ui.req.ContentType == nil {
   365  		ui.req.ContentType = common.String(fs.MimeType(ctx, src))
   366  	}
   367  	if size >= 0 {
   368  		ui.req.ContentLength = common.Int64(size)
   369  	}
   370  	if md5sumBase64 != "" {
   371  		ui.req.ContentMD5 = &md5sumBase64
   372  	}
   373  	o.applyPutOptions(ui.req, options...)
   374  	useBYOKPutObject(o.fs, ui.req)
   375  	if o.fs.opt.StorageTier != "" {
   376  		storageTier, ok := objectstorage.GetMappingPutObjectStorageTierEnum(o.fs.opt.StorageTier)
   377  		if !ok {
   378  			return ui, fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier)
   379  		}
   380  		ui.req.StorageTier = storageTier
   381  	}
   382  	// Check metadata keys and values are valid
   383  	for key, value := range ui.req.OpcMeta {
   384  		if !httpguts.ValidHeaderFieldName(key) {
   385  			fs.Errorf(o, "Dropping invalid metadata key %q", key)
   386  			delete(ui.req.OpcMeta, key)
   387  		} else if value == "" {
   388  			fs.Errorf(o, "Dropping nil metadata value for key %q", key)
   389  			delete(ui.req.OpcMeta, key)
   390  		} else if !httpguts.ValidHeaderFieldValue(value) {
   391  			fs.Errorf(o, "Dropping invalid metadata value %q for key %q", value, key)
   392  			delete(ui.req.OpcMeta, key)
   393  		}
   394  	}
   395  	return ui, nil
   396  }
   397  
   398  func (o *Object) createMultipartUpload(ctx context.Context, putReq *objectstorage.PutObjectRequest) (
   399  	uploadID string, existingParts map[int]objectstorage.MultipartUploadPartSummary, err error) {
   400  	bucketName, bucketPath := o.split()
   401  	err = o.fs.makeBucket(ctx, bucketName)
   402  	if err != nil {
   403  		fs.Errorf(o, "failed to create bucket: %v, err: %v", bucketName, err)
   404  		return uploadID, existingParts, err
   405  	}
   406  	if o.fs.opt.AttemptResumeUpload {
   407  		fs.Debugf(o, "attempting to resume upload for %v (if any)", o.remote)
   408  		resumeUploads, err := o.fs.findLatestMultipartUpload(ctx, bucketName, bucketPath)
   409  		if err == nil && len(resumeUploads) > 0 {
   410  			uploadID = *resumeUploads[0].UploadId
   411  			existingParts, err = o.fs.listMultipartUploadParts(ctx, bucketName, bucketPath, uploadID)
   412  			if err == nil {
   413  				fs.Debugf(o, "resuming with existing upload id: %v", uploadID)
   414  				return uploadID, existingParts, err
   415  			}
   416  		}
   417  	}
   418  	req := objectstorage.CreateMultipartUploadRequest{
   419  		NamespaceName: common.String(o.fs.opt.Namespace),
   420  		BucketName:    common.String(bucketName),
   421  	}
   422  	req.Object = common.String(bucketPath)
   423  	if o.fs.opt.StorageTier != "" {
   424  		storageTier, ok := objectstorage.GetMappingStorageTierEnum(o.fs.opt.StorageTier)
   425  		if !ok {
   426  			return "", nil, fmt.Errorf("not a valid storage tier: %v", o.fs.opt.StorageTier)
   427  		}
   428  		req.StorageTier = storageTier
   429  	}
   430  	o.applyMultipartUploadOptions(putReq, &req)
   431  
   432  	var resp objectstorage.CreateMultipartUploadResponse
   433  	err = o.fs.pacer.Call(func() (bool, error) {
   434  		resp, err = o.fs.srv.CreateMultipartUpload(ctx, req)
   435  		return shouldRetry(ctx, resp.HTTPResponse(), err)
   436  	})
   437  	if err != nil {
   438  		return "", existingParts, err
   439  	}
   440  	existingParts = make(map[int]objectstorage.MultipartUploadPartSummary)
   441  	uploadID = *resp.UploadId
   442  	fs.Debugf(o, "created new upload id: %v", uploadID)
   443  	return uploadID, existingParts, err
   444  }