storj.io/uplink@v1.13.0/multipart.go (about)

     1  // Copyright (C) 2023 Storj Labs, Inc.
     2  // See LICENSE for copying information.
     3  
     4  package uplink
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"math"
    10  	"runtime"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/zeebo/errs"
    16  
    17  	"storj.io/common/base58"
    18  	"storj.io/common/leak"
    19  	"storj.io/common/pb"
    20  	"storj.io/common/storj"
    21  	"storj.io/eventkit"
    22  	"storj.io/uplink/private/eestream/scheduler"
    23  	"storj.io/uplink/private/metaclient"
    24  	"storj.io/uplink/private/storage/streams"
    25  	"storj.io/uplink/private/stream"
    26  	"storj.io/uplink/private/testuplink"
    27  )
    28  
    29  // ErrUploadIDInvalid is returned when the upload ID is invalid.
    30  var ErrUploadIDInvalid = errors.New("upload ID invalid")
    31  
    32  // UploadInfo contains information about an upload.
    33  type UploadInfo struct {
    34  	UploadID string
    35  	Key      string
    36  
    37  	IsPrefix bool
    38  
    39  	System SystemMetadata
    40  	Custom CustomMetadata
    41  }
    42  
    43  // CommitUploadOptions options for committing multipart upload.
    44  type CommitUploadOptions struct {
    45  	CustomMetadata CustomMetadata
    46  }
    47  
    48  // BeginUpload begins a new multipart upload to bucket and key.
    49  //
    50  // Use UploadPart to upload individual parts.
    51  //
    52  // Use CommitUpload to finish the upload.
    53  //
    54  // Use AbortUpload to cancel the upload at any time.
    55  //
    56  // UploadObject is a convenient way to upload single part objects.
    57  func (project *Project) BeginUpload(ctx context.Context, bucket, key string, options *UploadOptions) (info UploadInfo, err error) {
    58  	defer mon.Task()(&ctx)(&err)
    59  
    60  	switch {
    61  	case bucket == "":
    62  		return UploadInfo{}, errwrapf("%w (%q)", ErrBucketNameInvalid, bucket)
    63  	case key == "":
    64  		return UploadInfo{}, errwrapf("%w (%q)", ErrObjectKeyInvalid, key)
    65  	}
    66  
    67  	if options == nil {
    68  		options = &UploadOptions{}
    69  	}
    70  
    71  	encPath, err := encryptPath(project, bucket, key)
    72  	if err != nil {
    73  		return UploadInfo{}, packageError.Wrap(err)
    74  	}
    75  
    76  	metainfoClient, err := project.dialMetainfoClient(ctx)
    77  	if err != nil {
    78  		return UploadInfo{}, packageError.Wrap(err)
    79  	}
    80  	defer func() { err = errs.Combine(err, metainfoClient.Close()) }()
    81  
    82  	response, err := metainfoClient.BeginObject(ctx, metaclient.BeginObjectParams{
    83  		Bucket:               []byte(bucket),
    84  		EncryptedObjectKey:   []byte(encPath.Raw()),
    85  		ExpiresAt:            options.Expires,
    86  		EncryptionParameters: project.encryptionParameters,
    87  	})
    88  	if err != nil {
    89  		return UploadInfo{}, convertKnownErrors(err, bucket, key)
    90  	}
    91  
    92  	encodedStreamID := base58.CheckEncode(response.StreamID[:], 1)
    93  	return UploadInfo{
    94  		Key:      key,
    95  		UploadID: encodedStreamID,
    96  		System: SystemMetadata{
    97  			Expires: options.Expires,
    98  		},
    99  	}, nil
   100  }
   101  
   102  // CommitUpload commits a multipart upload to bucket and key started with BeginUpload.
   103  //
   104  // uploadID is an upload identifier returned by BeginUpload.
   105  func (project *Project) CommitUpload(ctx context.Context, bucket, key, uploadID string, opts *CommitUploadOptions) (object *Object, err error) {
   106  	defer mon.Task()(&ctx)(&err)
   107  
   108  	// TODO add completedPart to options when we will have implementation for that
   109  
   110  	if opts == nil {
   111  		opts = &CommitUploadOptions{}
   112  	}
   113  
   114  	metainfoDB, err := project.dialMetainfoDB(ctx)
   115  	if err != nil {
   116  		return nil, packageError.Wrap(err)
   117  	}
   118  	defer func() { err = errs.Combine(err, metainfoDB.Close()) }()
   119  
   120  	mObject, err := metainfoDB.CommitObject(ctx, bucket, key, uploadID, opts.CustomMetadata, project.encryptionParameters)
   121  	if err != nil {
   122  		return nil, convertKnownErrors(err, bucket, key)
   123  	}
   124  
   125  	return convertObject(&mObject), nil
   126  }
   127  
   128  // UploadPart uploads a part with partNumber to a multipart upload started with BeginUpload.
   129  //
   130  // uploadID is an upload identifier returned by BeginUpload.
   131  func (project *Project) UploadPart(ctx context.Context, bucket, key, uploadID string, partNumber uint32) (_ *PartUpload, err error) {
   132  	upload := &PartUpload{
   133  		bucket: bucket,
   134  		key:    key,
   135  		part: &Part{
   136  			PartNumber: partNumber,
   137  		},
   138  		stats:  newOperationStats(ctx, project.access.satelliteURL),
   139  		eTagCh: make(chan []byte, 1),
   140  	}
   141  	upload.task = mon.TaskNamed("PartUpload")(&ctx)
   142  	defer func() {
   143  		if err != nil {
   144  			upload.stats.flagFailure(err)
   145  			upload.emitEvent(false)
   146  		}
   147  	}()
   148  	defer upload.stats.trackWorking()()
   149  	defer mon.Task()(&ctx)(&err)
   150  
   151  	switch {
   152  	case bucket == "":
   153  		return nil, errwrapf("%w (%q)", ErrBucketNameInvalid, bucket)
   154  	case key == "":
   155  		return nil, errwrapf("%w (%q)", ErrObjectKeyInvalid, key)
   156  	case uploadID == "":
   157  		return nil, packageError.Wrap(ErrUploadIDInvalid)
   158  	case partNumber >= math.MaxInt32:
   159  		return nil, packageError.New("partNumber should be less than max(int32)")
   160  	}
   161  
   162  	decodedStreamID, version, err := base58.CheckDecode(uploadID)
   163  	if err != nil || version != 1 {
   164  		return nil, packageError.Wrap(ErrUploadIDInvalid)
   165  	}
   166  
   167  	if encPath, err := encryptPath(project, bucket, key); err == nil {
   168  		upload.stats.encPath = encPath
   169  	}
   170  
   171  	ctx, cancel := context.WithCancel(ctx)
   172  	upload.cancel = cancel
   173  
   174  	streams, err := project.getStreamsStore(ctx)
   175  	if err != nil {
   176  		return nil, convertKnownErrors(err, bucket, key)
   177  	}
   178  	upload.streams = streams
   179  
   180  	if project.concurrentSegmentUploadConfig == nil {
   181  		upload.upload = stream.NewUploadPart(ctx, bucket, key, decodedStreamID, partNumber, upload.eTagCh, streams)
   182  	} else {
   183  		sched := scheduler.New(project.concurrentSegmentUploadConfig.SchedulerOptions)
   184  		u, err := streams.UploadPart(ctx, bucket, key, decodedStreamID, int32(partNumber), upload.eTagCh, sched)
   185  		if err != nil {
   186  			return nil, convertKnownErrors(err, bucket, key)
   187  		}
   188  		upload.upload = u
   189  	}
   190  
   191  	upload.tracker = project.tracker.Child("upload-part", 1)
   192  	return upload, nil
   193  }
   194  
   195  // AbortUpload aborts a multipart upload started with BeginUpload.
   196  //
   197  // uploadID is an upload identifier returned by BeginUpload.
   198  func (project *Project) AbortUpload(ctx context.Context, bucket, key, uploadID string) (err error) {
   199  	defer mon.Task()(&ctx)(&err)
   200  
   201  	switch {
   202  	case bucket == "":
   203  		return errwrapf("%w (%q)", ErrBucketNameInvalid, bucket)
   204  	case key == "":
   205  		return errwrapf("%w (%q)", ErrObjectKeyInvalid, key)
   206  	case uploadID == "":
   207  		return packageError.Wrap(ErrUploadIDInvalid)
   208  	}
   209  
   210  	decodedStreamID, version, err := base58.CheckDecode(uploadID)
   211  	if err != nil || version != 1 {
   212  		return packageError.Wrap(ErrUploadIDInvalid)
   213  	}
   214  
   215  	id, err := storj.StreamIDFromBytes(decodedStreamID)
   216  	if err != nil {
   217  		return packageError.Wrap(err)
   218  	}
   219  
   220  	encPath, err := encryptPath(project, bucket, key)
   221  	if err != nil {
   222  		return convertKnownErrors(err, bucket, key)
   223  	}
   224  
   225  	metainfoClient, err := project.dialMetainfoClient(ctx)
   226  	if err != nil {
   227  		return convertKnownErrors(err, bucket, key)
   228  	}
   229  	defer func() { err = errs.Combine(err, metainfoClient.Close()) }()
   230  
   231  	_, err = metainfoClient.BeginDeleteObject(ctx, metaclient.BeginDeleteObjectParams{
   232  		Bucket:             []byte(bucket),
   233  		EncryptedObjectKey: []byte(encPath.Raw()),
   234  		StreamID:           id,
   235  		Status:             int32(pb.Object_UPLOADING),
   236  	})
   237  	return convertKnownErrors(err, bucket, key)
   238  }
   239  
   240  // ListUploadParts returns an iterator over the parts of a multipart upload started with BeginUpload.
   241  func (project *Project) ListUploadParts(ctx context.Context, bucket, key, uploadID string, options *ListUploadPartsOptions) *PartIterator {
   242  	defer mon.Task()(&ctx)(nil)
   243  
   244  	opts := metaclient.ListSegmentsParams{}
   245  
   246  	if options != nil {
   247  		opts.Cursor = metaclient.SegmentPosition{
   248  			PartNumber: int32(options.Cursor),
   249  			// cursor needs to be last segment in a part
   250  			// satellite can accept uint32 as segment index
   251  			// but protobuf is defined as int32 for now
   252  			Index: math.MaxInt32,
   253  		}
   254  	}
   255  
   256  	parts := PartIterator{
   257  		ctx:      ctx,
   258  		project:  project,
   259  		bucket:   bucket,
   260  		key:      key,
   261  		options:  opts,
   262  		uploadID: uploadID,
   263  	}
   264  
   265  	switch {
   266  	case parts.bucket == "":
   267  		parts.err = errwrapf("%w (%q)", ErrBucketNameInvalid, parts.bucket)
   268  		return &parts
   269  	case parts.key == "":
   270  		parts.err = errwrapf("%w (%q)", ErrObjectKeyInvalid, parts.key)
   271  		return &parts
   272  	case parts.uploadID == "":
   273  		parts.err = packageError.Wrap(ErrUploadIDInvalid)
   274  		return &parts
   275  	}
   276  
   277  	decodedStreamID, version, err := base58.CheckDecode(uploadID)
   278  	if err != nil || version != 1 {
   279  		parts.err = packageError.Wrap(ErrUploadIDInvalid)
   280  		return &parts
   281  	}
   282  
   283  	parts.options.StreamID = decodedStreamID
   284  	return &parts
   285  }
   286  
   287  // ListUploads returns an iterator over the uncommitted uploads in bucket.
   288  // Both multipart and regular uploads are returned. An object may not be
   289  // visible through ListUploads until it has a committed part.
   290  func (project *Project) ListUploads(ctx context.Context, bucket string, options *ListUploadsOptions) *UploadIterator {
   291  	defer mon.Task()(&ctx)(nil)
   292  
   293  	opts := metaclient.ListOptions{
   294  		Direction: metaclient.After,
   295  		Status:    int32(pb.Object_UPLOADING), // TODO: define object status constants in storj package?
   296  	}
   297  
   298  	if options != nil {
   299  		opts.Prefix = options.Prefix
   300  		opts.Cursor = options.Cursor
   301  		opts.Recursive = options.Recursive
   302  		opts.IncludeSystemMetadata = options.System
   303  		opts.IncludeCustomMetadata = options.Custom
   304  	}
   305  
   306  	opts.Limit = testuplink.GetListLimit(ctx)
   307  
   308  	uploads := UploadIterator{
   309  		ctx:     ctx,
   310  		project: project,
   311  		bucket:  bucket,
   312  		options: opts,
   313  	}
   314  
   315  	if opts.Prefix != "" && !strings.HasSuffix(opts.Prefix, "/") {
   316  		uploads.listObjects = listPendingObjectStreams
   317  	} else {
   318  		uploads.listObjects = listObjects
   319  	}
   320  
   321  	if options != nil {
   322  		uploads.uploadOptions = *options
   323  	}
   324  
   325  	return &uploads
   326  }
   327  
   328  // Part part metadata.
   329  type Part struct {
   330  	PartNumber uint32
   331  	// Size plain size of a part.
   332  	Size     int64
   333  	Modified time.Time
   334  	ETag     []byte
   335  }
   336  
   337  // PartUpload is a part upload to started multipart upload.
   338  type PartUpload struct {
   339  	mu      sync.Mutex
   340  	closed  bool
   341  	aborted bool
   342  	cancel  context.CancelFunc
   343  	upload  streamUpload
   344  	bucket  string
   345  	key     string
   346  	part    *Part
   347  	streams *streams.Store
   348  	eTagCh  chan []byte
   349  
   350  	stats operationStats
   351  	task  func(*error)
   352  
   353  	tracker leak.Ref
   354  }
   355  
   356  // Write uploads len(p) bytes from p to the object's data stream.
   357  // It returns the number of bytes written from p (0 <= n <= len(p))
   358  // and any error encountered that caused the write to stop early.
   359  func (upload *PartUpload) Write(p []byte) (int, error) {
   360  	track := upload.stats.trackWorking()
   361  	n, err := upload.upload.Write(p)
   362  	upload.mu.Lock()
   363  	upload.stats.bytes += int64(n)
   364  	upload.stats.flagFailure(err)
   365  	track()
   366  	upload.mu.Unlock()
   367  	return n, convertKnownErrors(err, upload.bucket, upload.key)
   368  }
   369  
   370  // SetETag sets ETag for a part.
   371  func (upload *PartUpload) SetETag(eTag []byte) error {
   372  	upload.mu.Lock()
   373  	defer upload.mu.Unlock()
   374  
   375  	if upload.part.ETag != nil {
   376  		return packageError.New("etag already set")
   377  	}
   378  
   379  	if upload.aborted {
   380  		return errwrapf("%w: upload aborted", ErrUploadDone)
   381  	}
   382  	if upload.closed {
   383  		return errwrapf("%w: already committed", ErrUploadDone)
   384  	}
   385  
   386  	upload.part.ETag = eTag
   387  	upload.eTagCh <- eTag
   388  	return nil
   389  }
   390  
   391  // Commit commits a part.
   392  //
   393  // Returns ErrUploadDone when either Abort or Commit has already been called.
   394  func (upload *PartUpload) Commit() error {
   395  	track := upload.stats.trackWorking()
   396  	upload.mu.Lock()
   397  	defer upload.mu.Unlock()
   398  
   399  	if upload.aborted {
   400  		return errwrapf("%w: already aborted", ErrUploadDone)
   401  	}
   402  
   403  	if upload.closed {
   404  		return errwrapf("%w: already committed", ErrUploadDone)
   405  	}
   406  
   407  	upload.closed = true
   408  
   409  	// ETag must not be sent after a call to commit. The upload code waits on
   410  	// the channel before committing the last segment. Closing the channel
   411  	// allows the upload code to unblock if no eTag has been set. Not all
   412  	// multipart uploaders care about setting the eTag so we can't assume it
   413  	// has been set.
   414  	close(upload.eTagCh)
   415  
   416  	err := errs.Combine(
   417  		upload.upload.Commit(),
   418  		upload.streams.Close(),
   419  		upload.tracker.Close(),
   420  	)
   421  	upload.stats.flagFailure(err)
   422  	track()
   423  	upload.emitEvent(false)
   424  
   425  	return convertKnownErrors(err, upload.bucket, upload.key)
   426  }
   427  
   428  // Abort aborts the part upload.
   429  //
   430  // Returns ErrUploadDone when either Abort or Commit has already been called.
   431  func (upload *PartUpload) Abort() error {
   432  	track := upload.stats.trackWorking()
   433  	upload.mu.Lock()
   434  	defer upload.mu.Unlock()
   435  
   436  	if upload.closed {
   437  		return errwrapf("%w: already committed", ErrUploadDone)
   438  	}
   439  
   440  	if upload.aborted {
   441  		return errwrapf("%w: already aborted", ErrUploadDone)
   442  	}
   443  
   444  	upload.aborted = true
   445  	upload.cancel()
   446  
   447  	err := errs.Combine(
   448  		upload.upload.Abort(),
   449  		upload.streams.Close(),
   450  		upload.tracker.Close(),
   451  	)
   452  	upload.stats.flagFailure(err)
   453  	track()
   454  	upload.emitEvent(true)
   455  
   456  	return convertKnownErrors(err, upload.bucket, upload.key)
   457  }
   458  
   459  // Info returns the last information about the uploaded part.
   460  func (upload *PartUpload) Info() *Part {
   461  	if meta := upload.upload.Meta(); meta != nil {
   462  		upload.part.Size = meta.Size
   463  		upload.part.Modified = meta.Modified
   464  	}
   465  	return upload.part
   466  }
   467  
   468  func (upload *PartUpload) emitEvent(aborted bool) {
   469  	message, err := upload.stats.err()
   470  	upload.task(&err)
   471  
   472  	evs.Event("part-upload",
   473  		eventkit.Int64("bytes", upload.stats.bytes),
   474  		eventkit.Duration("user-elapsed", time.Since(upload.stats.start)),
   475  		eventkit.Duration("working-elapsed", upload.stats.working),
   476  		eventkit.Bool("success", err == nil),
   477  		eventkit.String("error", message),
   478  		eventkit.Bool("aborted", aborted),
   479  		eventkit.String("arch", runtime.GOARCH),
   480  		eventkit.String("os", runtime.GOOS),
   481  		eventkit.Int64("cpus", int64(runtime.NumCPU())),
   482  		eventkit.Int64("quic-rollout", int64(upload.stats.quicRollout)),
   483  		eventkit.String("satellite", upload.stats.satellite),
   484  		eventkit.Bytes("path-checksum", pathChecksum(upload.stats.encPath)),
   485  		eventkit.Int64("noise-version", noiseVersion),
   486  		// segment count
   487  		// ram available
   488  	)
   489  }