storj.io/uplink@v1.13.0/multipart.go (about) 1 // Copyright (C) 2023 Storj Labs, Inc. 2 // See LICENSE for copying information. 3 4 package uplink 5 6 import ( 7 "context" 8 "errors" 9 "math" 10 "runtime" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/zeebo/errs" 16 17 "storj.io/common/base58" 18 "storj.io/common/leak" 19 "storj.io/common/pb" 20 "storj.io/common/storj" 21 "storj.io/eventkit" 22 "storj.io/uplink/private/eestream/scheduler" 23 "storj.io/uplink/private/metaclient" 24 "storj.io/uplink/private/storage/streams" 25 "storj.io/uplink/private/stream" 26 "storj.io/uplink/private/testuplink" 27 ) 28 29 // ErrUploadIDInvalid is returned when the upload ID is invalid. 30 var ErrUploadIDInvalid = errors.New("upload ID invalid") 31 32 // UploadInfo contains information about an upload. 33 type UploadInfo struct { 34 UploadID string 35 Key string 36 37 IsPrefix bool 38 39 System SystemMetadata 40 Custom CustomMetadata 41 } 42 43 // CommitUploadOptions options for committing multipart upload. 44 type CommitUploadOptions struct { 45 CustomMetadata CustomMetadata 46 } 47 48 // BeginUpload begins a new multipart upload to bucket and key. 49 // 50 // Use UploadPart to upload individual parts. 51 // 52 // Use CommitUpload to finish the upload. 53 // 54 // Use AbortUpload to cancel the upload at any time. 55 // 56 // UploadObject is a convenient way to upload single part objects. 57 func (project *Project) BeginUpload(ctx context.Context, bucket, key string, options *UploadOptions) (info UploadInfo, err error) { 58 defer mon.Task()(&ctx)(&err) 59 60 switch { 61 case bucket == "": 62 return UploadInfo{}, errwrapf("%w (%q)", ErrBucketNameInvalid, bucket) 63 case key == "": 64 return UploadInfo{}, errwrapf("%w (%q)", ErrObjectKeyInvalid, key) 65 } 66 67 if options == nil { 68 options = &UploadOptions{} 69 } 70 71 encPath, err := encryptPath(project, bucket, key) 72 if err != nil { 73 return UploadInfo{}, packageError.Wrap(err) 74 } 75 76 metainfoClient, err := project.dialMetainfoClient(ctx) 77 if err != nil { 78 return UploadInfo{}, packageError.Wrap(err) 79 } 80 defer func() { err = errs.Combine(err, metainfoClient.Close()) }() 81 82 response, err := metainfoClient.BeginObject(ctx, metaclient.BeginObjectParams{ 83 Bucket: []byte(bucket), 84 EncryptedObjectKey: []byte(encPath.Raw()), 85 ExpiresAt: options.Expires, 86 EncryptionParameters: project.encryptionParameters, 87 }) 88 if err != nil { 89 return UploadInfo{}, convertKnownErrors(err, bucket, key) 90 } 91 92 encodedStreamID := base58.CheckEncode(response.StreamID[:], 1) 93 return UploadInfo{ 94 Key: key, 95 UploadID: encodedStreamID, 96 System: SystemMetadata{ 97 Expires: options.Expires, 98 }, 99 }, nil 100 } 101 102 // CommitUpload commits a multipart upload to bucket and key started with BeginUpload. 103 // 104 // uploadID is an upload identifier returned by BeginUpload. 105 func (project *Project) CommitUpload(ctx context.Context, bucket, key, uploadID string, opts *CommitUploadOptions) (object *Object, err error) { 106 defer mon.Task()(&ctx)(&err) 107 108 // TODO add completedPart to options when we will have implementation for that 109 110 if opts == nil { 111 opts = &CommitUploadOptions{} 112 } 113 114 metainfoDB, err := project.dialMetainfoDB(ctx) 115 if err != nil { 116 return nil, packageError.Wrap(err) 117 } 118 defer func() { err = errs.Combine(err, metainfoDB.Close()) }() 119 120 mObject, err := metainfoDB.CommitObject(ctx, bucket, key, uploadID, opts.CustomMetadata, project.encryptionParameters) 121 if err != nil { 122 return nil, convertKnownErrors(err, bucket, key) 123 } 124 125 return convertObject(&mObject), nil 126 } 127 128 // UploadPart uploads a part with partNumber to a multipart upload started with BeginUpload. 129 // 130 // uploadID is an upload identifier returned by BeginUpload. 131 func (project *Project) UploadPart(ctx context.Context, bucket, key, uploadID string, partNumber uint32) (_ *PartUpload, err error) { 132 upload := &PartUpload{ 133 bucket: bucket, 134 key: key, 135 part: &Part{ 136 PartNumber: partNumber, 137 }, 138 stats: newOperationStats(ctx, project.access.satelliteURL), 139 eTagCh: make(chan []byte, 1), 140 } 141 upload.task = mon.TaskNamed("PartUpload")(&ctx) 142 defer func() { 143 if err != nil { 144 upload.stats.flagFailure(err) 145 upload.emitEvent(false) 146 } 147 }() 148 defer upload.stats.trackWorking()() 149 defer mon.Task()(&ctx)(&err) 150 151 switch { 152 case bucket == "": 153 return nil, errwrapf("%w (%q)", ErrBucketNameInvalid, bucket) 154 case key == "": 155 return nil, errwrapf("%w (%q)", ErrObjectKeyInvalid, key) 156 case uploadID == "": 157 return nil, packageError.Wrap(ErrUploadIDInvalid) 158 case partNumber >= math.MaxInt32: 159 return nil, packageError.New("partNumber should be less than max(int32)") 160 } 161 162 decodedStreamID, version, err := base58.CheckDecode(uploadID) 163 if err != nil || version != 1 { 164 return nil, packageError.Wrap(ErrUploadIDInvalid) 165 } 166 167 if encPath, err := encryptPath(project, bucket, key); err == nil { 168 upload.stats.encPath = encPath 169 } 170 171 ctx, cancel := context.WithCancel(ctx) 172 upload.cancel = cancel 173 174 streams, err := project.getStreamsStore(ctx) 175 if err != nil { 176 return nil, convertKnownErrors(err, bucket, key) 177 } 178 upload.streams = streams 179 180 if project.concurrentSegmentUploadConfig == nil { 181 upload.upload = stream.NewUploadPart(ctx, bucket, key, decodedStreamID, partNumber, upload.eTagCh, streams) 182 } else { 183 sched := scheduler.New(project.concurrentSegmentUploadConfig.SchedulerOptions) 184 u, err := streams.UploadPart(ctx, bucket, key, decodedStreamID, int32(partNumber), upload.eTagCh, sched) 185 if err != nil { 186 return nil, convertKnownErrors(err, bucket, key) 187 } 188 upload.upload = u 189 } 190 191 upload.tracker = project.tracker.Child("upload-part", 1) 192 return upload, nil 193 } 194 195 // AbortUpload aborts a multipart upload started with BeginUpload. 196 // 197 // uploadID is an upload identifier returned by BeginUpload. 198 func (project *Project) AbortUpload(ctx context.Context, bucket, key, uploadID string) (err error) { 199 defer mon.Task()(&ctx)(&err) 200 201 switch { 202 case bucket == "": 203 return errwrapf("%w (%q)", ErrBucketNameInvalid, bucket) 204 case key == "": 205 return errwrapf("%w (%q)", ErrObjectKeyInvalid, key) 206 case uploadID == "": 207 return packageError.Wrap(ErrUploadIDInvalid) 208 } 209 210 decodedStreamID, version, err := base58.CheckDecode(uploadID) 211 if err != nil || version != 1 { 212 return packageError.Wrap(ErrUploadIDInvalid) 213 } 214 215 id, err := storj.StreamIDFromBytes(decodedStreamID) 216 if err != nil { 217 return packageError.Wrap(err) 218 } 219 220 encPath, err := encryptPath(project, bucket, key) 221 if err != nil { 222 return convertKnownErrors(err, bucket, key) 223 } 224 225 metainfoClient, err := project.dialMetainfoClient(ctx) 226 if err != nil { 227 return convertKnownErrors(err, bucket, key) 228 } 229 defer func() { err = errs.Combine(err, metainfoClient.Close()) }() 230 231 _, err = metainfoClient.BeginDeleteObject(ctx, metaclient.BeginDeleteObjectParams{ 232 Bucket: []byte(bucket), 233 EncryptedObjectKey: []byte(encPath.Raw()), 234 StreamID: id, 235 Status: int32(pb.Object_UPLOADING), 236 }) 237 return convertKnownErrors(err, bucket, key) 238 } 239 240 // ListUploadParts returns an iterator over the parts of a multipart upload started with BeginUpload. 241 func (project *Project) ListUploadParts(ctx context.Context, bucket, key, uploadID string, options *ListUploadPartsOptions) *PartIterator { 242 defer mon.Task()(&ctx)(nil) 243 244 opts := metaclient.ListSegmentsParams{} 245 246 if options != nil { 247 opts.Cursor = metaclient.SegmentPosition{ 248 PartNumber: int32(options.Cursor), 249 // cursor needs to be last segment in a part 250 // satellite can accept uint32 as segment index 251 // but protobuf is defined as int32 for now 252 Index: math.MaxInt32, 253 } 254 } 255 256 parts := PartIterator{ 257 ctx: ctx, 258 project: project, 259 bucket: bucket, 260 key: key, 261 options: opts, 262 uploadID: uploadID, 263 } 264 265 switch { 266 case parts.bucket == "": 267 parts.err = errwrapf("%w (%q)", ErrBucketNameInvalid, parts.bucket) 268 return &parts 269 case parts.key == "": 270 parts.err = errwrapf("%w (%q)", ErrObjectKeyInvalid, parts.key) 271 return &parts 272 case parts.uploadID == "": 273 parts.err = packageError.Wrap(ErrUploadIDInvalid) 274 return &parts 275 } 276 277 decodedStreamID, version, err := base58.CheckDecode(uploadID) 278 if err != nil || version != 1 { 279 parts.err = packageError.Wrap(ErrUploadIDInvalid) 280 return &parts 281 } 282 283 parts.options.StreamID = decodedStreamID 284 return &parts 285 } 286 287 // ListUploads returns an iterator over the uncommitted uploads in bucket. 288 // Both multipart and regular uploads are returned. An object may not be 289 // visible through ListUploads until it has a committed part. 290 func (project *Project) ListUploads(ctx context.Context, bucket string, options *ListUploadsOptions) *UploadIterator { 291 defer mon.Task()(&ctx)(nil) 292 293 opts := metaclient.ListOptions{ 294 Direction: metaclient.After, 295 Status: int32(pb.Object_UPLOADING), // TODO: define object status constants in storj package? 296 } 297 298 if options != nil { 299 opts.Prefix = options.Prefix 300 opts.Cursor = options.Cursor 301 opts.Recursive = options.Recursive 302 opts.IncludeSystemMetadata = options.System 303 opts.IncludeCustomMetadata = options.Custom 304 } 305 306 opts.Limit = testuplink.GetListLimit(ctx) 307 308 uploads := UploadIterator{ 309 ctx: ctx, 310 project: project, 311 bucket: bucket, 312 options: opts, 313 } 314 315 if opts.Prefix != "" && !strings.HasSuffix(opts.Prefix, "/") { 316 uploads.listObjects = listPendingObjectStreams 317 } else { 318 uploads.listObjects = listObjects 319 } 320 321 if options != nil { 322 uploads.uploadOptions = *options 323 } 324 325 return &uploads 326 } 327 328 // Part part metadata. 329 type Part struct { 330 PartNumber uint32 331 // Size plain size of a part. 332 Size int64 333 Modified time.Time 334 ETag []byte 335 } 336 337 // PartUpload is a part upload to started multipart upload. 338 type PartUpload struct { 339 mu sync.Mutex 340 closed bool 341 aborted bool 342 cancel context.CancelFunc 343 upload streamUpload 344 bucket string 345 key string 346 part *Part 347 streams *streams.Store 348 eTagCh chan []byte 349 350 stats operationStats 351 task func(*error) 352 353 tracker leak.Ref 354 } 355 356 // Write uploads len(p) bytes from p to the object's data stream. 357 // It returns the number of bytes written from p (0 <= n <= len(p)) 358 // and any error encountered that caused the write to stop early. 359 func (upload *PartUpload) Write(p []byte) (int, error) { 360 track := upload.stats.trackWorking() 361 n, err := upload.upload.Write(p) 362 upload.mu.Lock() 363 upload.stats.bytes += int64(n) 364 upload.stats.flagFailure(err) 365 track() 366 upload.mu.Unlock() 367 return n, convertKnownErrors(err, upload.bucket, upload.key) 368 } 369 370 // SetETag sets ETag for a part. 371 func (upload *PartUpload) SetETag(eTag []byte) error { 372 upload.mu.Lock() 373 defer upload.mu.Unlock() 374 375 if upload.part.ETag != nil { 376 return packageError.New("etag already set") 377 } 378 379 if upload.aborted { 380 return errwrapf("%w: upload aborted", ErrUploadDone) 381 } 382 if upload.closed { 383 return errwrapf("%w: already committed", ErrUploadDone) 384 } 385 386 upload.part.ETag = eTag 387 upload.eTagCh <- eTag 388 return nil 389 } 390 391 // Commit commits a part. 392 // 393 // Returns ErrUploadDone when either Abort or Commit has already been called. 394 func (upload *PartUpload) Commit() error { 395 track := upload.stats.trackWorking() 396 upload.mu.Lock() 397 defer upload.mu.Unlock() 398 399 if upload.aborted { 400 return errwrapf("%w: already aborted", ErrUploadDone) 401 } 402 403 if upload.closed { 404 return errwrapf("%w: already committed", ErrUploadDone) 405 } 406 407 upload.closed = true 408 409 // ETag must not be sent after a call to commit. The upload code waits on 410 // the channel before committing the last segment. Closing the channel 411 // allows the upload code to unblock if no eTag has been set. Not all 412 // multipart uploaders care about setting the eTag so we can't assume it 413 // has been set. 414 close(upload.eTagCh) 415 416 err := errs.Combine( 417 upload.upload.Commit(), 418 upload.streams.Close(), 419 upload.tracker.Close(), 420 ) 421 upload.stats.flagFailure(err) 422 track() 423 upload.emitEvent(false) 424 425 return convertKnownErrors(err, upload.bucket, upload.key) 426 } 427 428 // Abort aborts the part upload. 429 // 430 // Returns ErrUploadDone when either Abort or Commit has already been called. 431 func (upload *PartUpload) Abort() error { 432 track := upload.stats.trackWorking() 433 upload.mu.Lock() 434 defer upload.mu.Unlock() 435 436 if upload.closed { 437 return errwrapf("%w: already committed", ErrUploadDone) 438 } 439 440 if upload.aborted { 441 return errwrapf("%w: already aborted", ErrUploadDone) 442 } 443 444 upload.aborted = true 445 upload.cancel() 446 447 err := errs.Combine( 448 upload.upload.Abort(), 449 upload.streams.Close(), 450 upload.tracker.Close(), 451 ) 452 upload.stats.flagFailure(err) 453 track() 454 upload.emitEvent(true) 455 456 return convertKnownErrors(err, upload.bucket, upload.key) 457 } 458 459 // Info returns the last information about the uploaded part. 460 func (upload *PartUpload) Info() *Part { 461 if meta := upload.upload.Meta(); meta != nil { 462 upload.part.Size = meta.Size 463 upload.part.Modified = meta.Modified 464 } 465 return upload.part 466 } 467 468 func (upload *PartUpload) emitEvent(aborted bool) { 469 message, err := upload.stats.err() 470 upload.task(&err) 471 472 evs.Event("part-upload", 473 eventkit.Int64("bytes", upload.stats.bytes), 474 eventkit.Duration("user-elapsed", time.Since(upload.stats.start)), 475 eventkit.Duration("working-elapsed", upload.stats.working), 476 eventkit.Bool("success", err == nil), 477 eventkit.String("error", message), 478 eventkit.Bool("aborted", aborted), 479 eventkit.String("arch", runtime.GOARCH), 480 eventkit.String("os", runtime.GOOS), 481 eventkit.Int64("cpus", int64(runtime.NumCPU())), 482 eventkit.Int64("quic-rollout", int64(upload.stats.quicRollout)), 483 eventkit.String("satellite", upload.stats.satellite), 484 eventkit.Bytes("path-checksum", pathChecksum(upload.stats.encPath)), 485 eventkit.Int64("noise-version", noiseVersion), 486 // segment count 487 // ram available 488 ) 489 }