storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/erasure-multipart.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "os" 24 "path" 25 "sort" 26 "strconv" 27 "strings" 28 "sync" 29 "time" 30 31 "github.com/minio/minio-go/v7/pkg/set" 32 33 xhttp "storj.io/minio/cmd/http" 34 "storj.io/minio/cmd/logger" 35 "storj.io/minio/pkg/mimedb" 36 "storj.io/minio/pkg/sync/errgroup" 37 ) 38 39 func (er erasureObjects) getUploadIDDir(bucket, object, uploadID string) string { 40 return pathJoin(er.getMultipartSHADir(bucket, object), uploadID) 41 } 42 43 func (er erasureObjects) getMultipartSHADir(bucket, object string) string { 44 return getSHA256Hash([]byte(pathJoin(bucket, object))) 45 } 46 47 // checkUploadIDExists - verify if a given uploadID exists and is valid. 48 func (er erasureObjects) checkUploadIDExists(ctx context.Context, bucket, object, uploadID string) (err error) { 49 defer func() { 50 if err == errFileNotFound { 51 err = errUploadIDNotFound 52 } 53 }() 54 55 disks := er.getDisks() 56 57 // Read metadata associated with the object from all disks. 58 metaArr, errs := readAllFileInfo(ctx, disks, minioMetaMultipartBucket, er.getUploadIDDir(bucket, object, uploadID), "", false) 59 60 readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) 61 if err != nil { 62 return err 63 } 64 65 if reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil { 66 return reducedErr 67 } 68 69 // List all online disks. 70 _, modTime, dataDir := listOnlineDisks(disks, metaArr, errs) 71 72 // Pick latest valid metadata. 73 _, err = pickValidFileInfo(ctx, metaArr, modTime, dataDir, readQuorum) 74 return err 75 } 76 77 // Removes part given by partName belonging to a mulitpart upload from minioMetaBucket 78 func (er erasureObjects) removeObjectPart(bucket, object, uploadID, dataDir string, partNumber int) { 79 uploadIDPath := er.getUploadIDDir(bucket, object, uploadID) 80 curpartPath := pathJoin(uploadIDPath, dataDir, fmt.Sprintf("part.%d", partNumber)) 81 storageDisks := er.getDisks() 82 83 g := errgroup.WithNErrs(len(storageDisks)) 84 for index, disk := range storageDisks { 85 if disk == nil { 86 continue 87 } 88 index := index 89 g.Go(func() error { 90 // Ignoring failure to remove parts that weren't present in CompleteMultipartUpload 91 // requests. xl.meta is the authoritative source of truth on which parts constitute 92 // the object. The presence of parts that don't belong in the object doesn't affect correctness. 93 _ = storageDisks[index].Delete(context.TODO(), minioMetaMultipartBucket, curpartPath, false) 94 return nil 95 }, index) 96 } 97 g.Wait() 98 } 99 100 // Clean-up the old multipart uploads. Should be run in a Go routine. 101 func (er erasureObjects) cleanupStaleUploads(ctx context.Context, expiry time.Duration) { 102 // run multiple cleanup's local to this server. 103 var wg sync.WaitGroup 104 for _, disk := range er.getLoadBalancedLocalDisks() { 105 if disk != nil { 106 wg.Add(1) 107 go func(disk StorageAPI) { 108 defer wg.Done() 109 er.cleanupStaleUploadsOnDisk(ctx, disk, expiry) 110 }(disk) 111 } 112 } 113 wg.Wait() 114 } 115 116 func (er erasureObjects) renameAll(ctx context.Context, bucket, prefix string) { 117 var wg sync.WaitGroup 118 for _, disk := range er.getDisks() { 119 if disk == nil { 120 continue 121 } 122 wg.Add(1) 123 go func(disk StorageAPI) { 124 defer wg.Done() 125 disk.RenameFile(ctx, bucket, prefix, minioMetaTmpBucket, mustGetUUID()) 126 }(disk) 127 } 128 wg.Wait() 129 } 130 131 func (er erasureObjects) deleteAll(ctx context.Context, bucket, prefix string) { 132 var wg sync.WaitGroup 133 for _, disk := range er.getDisks() { 134 if disk == nil { 135 continue 136 } 137 wg.Add(1) 138 go func(disk StorageAPI) { 139 defer wg.Done() 140 disk.Delete(ctx, bucket, prefix, true) 141 }(disk) 142 } 143 wg.Wait() 144 } 145 146 // Remove the old multipart uploads on the given disk. 147 func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk StorageAPI, expiry time.Duration) { 148 now := time.Now() 149 diskPath := disk.Endpoint().Path 150 151 readDirFn(pathJoin(diskPath, minioMetaMultipartBucket), func(shaDir string, typ os.FileMode) error { 152 return readDirFn(pathJoin(diskPath, minioMetaMultipartBucket, shaDir), func(uploadIDDir string, typ os.FileMode) error { 153 uploadIDPath := pathJoin(shaDir, uploadIDDir) 154 fi, err := disk.ReadVersion(ctx, minioMetaMultipartBucket, uploadIDPath, "", false) 155 if err != nil { 156 return nil 157 } 158 wait := er.deletedCleanupSleeper.Timer(ctx) 159 if now.Sub(fi.ModTime) > expiry { 160 er.renameAll(ctx, minioMetaMultipartBucket, uploadIDPath) 161 } 162 wait() 163 return nil 164 }) 165 }) 166 167 readDirFn(pathJoin(diskPath, minioMetaTmpBucket), func(tmpDir string, typ os.FileMode) error { 168 if tmpDir == ".trash/" { // do not remove .trash/ here, it has its own routines 169 return nil 170 } 171 vi, err := disk.StatVol(ctx, pathJoin(minioMetaTmpBucket, tmpDir)) 172 if err != nil { 173 return nil 174 } 175 wait := er.deletedCleanupSleeper.Timer(ctx) 176 if now.Sub(vi.Created) > expiry { 177 er.deleteAll(ctx, minioMetaTmpBucket, tmpDir) 178 } 179 wait() 180 return nil 181 }) 182 } 183 184 // ListMultipartUploads - lists all the pending multipart 185 // uploads for a particular object in a bucket. 186 // 187 // Implements minimal S3 compatible ListMultipartUploads API. We do 188 // not support prefix based listing, this is a deliberate attempt 189 // towards simplification of multipart APIs. 190 // The resulting ListMultipartsInfo structure is unmarshalled directly as XML. 191 func (er erasureObjects) ListMultipartUploads(ctx context.Context, bucket, object, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (result ListMultipartsInfo, err error) { 192 result.MaxUploads = maxUploads 193 result.KeyMarker = keyMarker 194 result.Prefix = object 195 result.Delimiter = delimiter 196 197 var uploadIDs []string 198 var disk StorageAPI 199 for _, disk = range er.getLoadBalancedDisks(true) { 200 uploadIDs, err = disk.ListDir(ctx, minioMetaMultipartBucket, er.getMultipartSHADir(bucket, object), -1) 201 if err != nil { 202 if err == errDiskNotFound { 203 continue 204 } 205 if err == errFileNotFound { 206 return result, nil 207 } 208 logger.LogIf(ctx, err) 209 return result, toObjectErr(err, bucket, object) 210 } 211 break 212 } 213 214 for i := range uploadIDs { 215 uploadIDs[i] = strings.TrimSuffix(uploadIDs[i], SlashSeparator) 216 } 217 218 // S3 spec says uploadIDs should be sorted based on initiated time, we need 219 // to read the metadata entry. 220 var uploads []MultipartInfo 221 222 populatedUploadIds := set.NewStringSet() 223 224 for _, uploadID := range uploadIDs { 225 if populatedUploadIds.Contains(uploadID) { 226 continue 227 } 228 fi, err := disk.ReadVersion(ctx, minioMetaMultipartBucket, pathJoin(er.getUploadIDDir(bucket, object, uploadID)), "", false) 229 if err != nil { 230 return result, toObjectErr(err, bucket, object) 231 } 232 populatedUploadIds.Add(uploadID) 233 uploads = append(uploads, MultipartInfo{ 234 Object: object, 235 UploadID: uploadID, 236 Initiated: fi.ModTime, 237 }) 238 } 239 240 sort.Slice(uploads, func(i int, j int) bool { 241 return uploads[i].Initiated.Before(uploads[j].Initiated) 242 }) 243 244 uploadIndex := 0 245 if uploadIDMarker != "" { 246 for uploadIndex < len(uploads) { 247 if uploads[uploadIndex].UploadID != uploadIDMarker { 248 uploadIndex++ 249 continue 250 } 251 if uploads[uploadIndex].UploadID == uploadIDMarker { 252 uploadIndex++ 253 break 254 } 255 uploadIndex++ 256 } 257 } 258 for uploadIndex < len(uploads) { 259 result.Uploads = append(result.Uploads, uploads[uploadIndex]) 260 result.NextUploadIDMarker = uploads[uploadIndex].UploadID 261 uploadIndex++ 262 if len(result.Uploads) == maxUploads { 263 break 264 } 265 } 266 267 result.IsTruncated = uploadIndex < len(uploads) 268 269 if !result.IsTruncated { 270 result.NextKeyMarker = "" 271 result.NextUploadIDMarker = "" 272 } 273 274 return result, nil 275 } 276 277 // newMultipartUpload - wrapper for initializing a new multipart 278 // request; returns a unique upload id. 279 // 280 // Internally this function creates 'uploads.json' associated for the 281 // incoming object at 282 // '.minio.sys/multipart/bucket/object/uploads.json' on all the 283 // disks. `uploads.json` carries metadata regarding on-going multipart 284 // operation(s) on the object. 285 func (er erasureObjects) newMultipartUpload(ctx context.Context, bucket string, object string, opts ObjectOptions) (string, error) { 286 onlineDisks := er.getDisks() 287 parityDrives := globalStorageClass.GetParityForSC(opts.UserDefined[xhttp.AmzStorageClass]) 288 if parityDrives <= 0 { 289 parityDrives = er.defaultParityCount 290 } 291 292 dataDrives := len(onlineDisks) - parityDrives 293 // we now know the number of blocks this object needs for data and parity. 294 // establish the writeQuorum using this data 295 writeQuorum := dataDrives 296 if dataDrives == parityDrives { 297 writeQuorum++ 298 } 299 300 // Initialize parts metadata 301 partsMetadata := make([]FileInfo, len(onlineDisks)) 302 303 fi := newFileInfo(pathJoin(bucket, object), dataDrives, parityDrives) 304 if opts.Versioned { 305 fi.VersionID = opts.VersionID 306 if fi.VersionID == "" { 307 fi.VersionID = mustGetUUID() 308 } 309 } 310 fi.DataDir = mustGetUUID() 311 312 // Initialize erasure metadata. 313 for index := range partsMetadata { 314 partsMetadata[index] = fi 315 } 316 317 // Guess content-type from the extension if possible. 318 if opts.UserDefined["content-type"] == "" { 319 opts.UserDefined["content-type"] = mimedb.TypeByExtension(path.Ext(object)) 320 } 321 322 modTime := opts.MTime 323 if opts.MTime.IsZero() { 324 modTime = UTCNow() 325 } 326 327 onlineDisks, partsMetadata = shuffleDisksAndPartsMetadata(onlineDisks, partsMetadata, fi) 328 329 // Fill all the necessary metadata. 330 // Update `xl.meta` content on each disks. 331 for index := range partsMetadata { 332 partsMetadata[index].Metadata = opts.UserDefined 333 partsMetadata[index].ModTime = modTime 334 } 335 336 uploadID := mustGetUUID() 337 uploadIDPath := er.getUploadIDDir(bucket, object, uploadID) 338 339 // Write updated `xl.meta` to all disks. 340 if _, err := writeUniqueFileInfo(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath, partsMetadata, writeQuorum); err != nil { 341 return "", toObjectErr(err, minioMetaMultipartBucket, uploadIDPath) 342 } 343 344 // Return success. 345 return uploadID, nil 346 } 347 348 // NewMultipartUpload - initialize a new multipart upload, returns a 349 // unique id. The unique id returned here is of UUID form, for each 350 // subsequent request each UUID is unique. 351 // 352 // Implements S3 compatible initiate multipart API. 353 func (er erasureObjects) NewMultipartUpload(ctx context.Context, bucket, object string, opts ObjectOptions) (string, error) { 354 // No metadata is set, allocate a new one. 355 if opts.UserDefined == nil { 356 opts.UserDefined = make(map[string]string) 357 } 358 return er.newMultipartUpload(ctx, bucket, object, opts) 359 } 360 361 // CopyObjectPart - reads incoming stream and internally erasure codes 362 // them. This call is similar to put object part operation but the source 363 // data is read from an existing object. 364 // 365 // Implements S3 compatible Upload Part Copy API. 366 func (er erasureObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error) { 367 partInfo, err := er.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, NewPutObjReader(srcInfo.Reader), dstOpts) 368 if err != nil { 369 return pi, toObjectErr(err, dstBucket, dstObject) 370 } 371 372 // Success. 373 return partInfo, nil 374 } 375 376 // PutObjectPart - reads incoming stream and internally erasure codes 377 // them. This call is similar to single put operation but it is part 378 // of the multipart transaction. 379 // 380 // Implements S3 compatible Upload Part API. 381 func (er erasureObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, r *PutObjReader, opts ObjectOptions) (pi PartInfo, err error) { 382 uploadIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID)) 383 ctx, err = uploadIDLock.GetRLock(ctx, globalOperationTimeout) 384 if err != nil { 385 return PartInfo{}, err 386 } 387 readLocked := true 388 defer func() { 389 if readLocked { 390 uploadIDLock.RUnlock() 391 } 392 }() 393 394 data := r.Reader 395 // Validate input data size and it can never be less than zero. 396 if data.Size() < -1 { 397 logger.LogIf(ctx, errInvalidArgument, logger.Application) 398 return pi, toObjectErr(errInvalidArgument) 399 } 400 401 var partsMetadata []FileInfo 402 var errs []error 403 uploadIDPath := er.getUploadIDDir(bucket, object, uploadID) 404 405 // Validates if upload ID exists. 406 if err = er.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 407 return pi, toObjectErr(err, bucket, object, uploadID) 408 } 409 410 storageDisks := er.getDisks() 411 412 // Read metadata associated with the object from all disks. 413 partsMetadata, errs = readAllFileInfo(ctx, storageDisks, minioMetaMultipartBucket, 414 uploadIDPath, "", false) 415 416 // get Quorum for this object 417 _, writeQuorum, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount) 418 if err != nil { 419 return pi, toObjectErr(err, bucket, object) 420 } 421 422 reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) 423 if reducedErr == errErasureWriteQuorum { 424 return pi, toObjectErr(reducedErr, bucket, object) 425 } 426 427 // List all online disks. 428 onlineDisks, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs) 429 430 // Pick one from the first valid metadata. 431 fi, err := pickValidFileInfo(ctx, partsMetadata, modTime, dataDir, writeQuorum) 432 if err != nil { 433 return pi, err 434 } 435 436 onlineDisks = shuffleDisks(onlineDisks, fi.Erasure.Distribution) 437 438 // Need a unique name for the part being written in minioMetaBucket to 439 // accommodate concurrent PutObjectPart requests 440 441 partSuffix := fmt.Sprintf("part.%d", partID) 442 tmpPart := mustGetUUID() 443 tmpPartPath := pathJoin(tmpPart, partSuffix) 444 445 // Delete the temporary object part. If PutObjectPart succeeds there would be nothing to delete. 446 var online int 447 defer func() { 448 if online != len(onlineDisks) { 449 er.deleteObject(context.Background(), minioMetaTmpBucket, tmpPart, writeQuorum) 450 } 451 }() 452 453 erasure, err := NewErasure(ctx, fi.Erasure.DataBlocks, fi.Erasure.ParityBlocks, fi.Erasure.BlockSize) 454 if err != nil { 455 return pi, toObjectErr(err, bucket, object) 456 } 457 458 // Fetch buffer for I/O, returns from the pool if not allocates a new one and returns. 459 var buffer []byte 460 switch size := data.Size(); { 461 case size == 0: 462 buffer = make([]byte, 1) // Allocate atleast a byte to reach EOF 463 case size == -1: 464 if size := data.ActualSize(); size > 0 && size < fi.Erasure.BlockSize { 465 buffer = make([]byte, data.ActualSize()+256, data.ActualSize()*2+512) 466 } else { 467 buffer = er.bp.Get() 468 defer er.bp.Put(buffer) 469 } 470 case size >= fi.Erasure.BlockSize: 471 buffer = er.bp.Get() 472 defer er.bp.Put(buffer) 473 case size < fi.Erasure.BlockSize: 474 // No need to allocate fully fi.Erasure.BlockSize buffer if the incoming data is smaller. 475 buffer = make([]byte, size, 2*size+int64(fi.Erasure.ParityBlocks+fi.Erasure.DataBlocks-1)) 476 } 477 478 if len(buffer) > int(fi.Erasure.BlockSize) { 479 buffer = buffer[:fi.Erasure.BlockSize] 480 } 481 writers := make([]io.Writer, len(onlineDisks)) 482 for i, disk := range onlineDisks { 483 if disk == nil { 484 continue 485 } 486 writers[i] = newBitrotWriter(disk, minioMetaTmpBucket, tmpPartPath, 487 erasure.ShardFileSize(data.Size()), DefaultBitrotAlgorithm, erasure.ShardSize(), false) 488 } 489 490 n, err := erasure.Encode(ctx, data, writers, buffer, writeQuorum) 491 closeBitrotWriters(writers) 492 if err != nil { 493 return pi, toObjectErr(err, bucket, object) 494 } 495 496 // Should return IncompleteBody{} error when reader has fewer bytes 497 // than specified in request header. 498 if n < data.Size() { 499 return pi, IncompleteBody{Bucket: bucket, Object: object} 500 } 501 502 for i := range writers { 503 if writers[i] == nil { 504 onlineDisks[i] = nil 505 } 506 } 507 508 // Unlock here before acquiring write locks all concurrent 509 // PutObjectParts would serialize here updating `xl.meta` 510 uploadIDLock.RUnlock() 511 readLocked = false 512 ctx, err = uploadIDLock.GetLock(ctx, globalOperationTimeout) 513 if err != nil { 514 return PartInfo{}, err 515 } 516 defer uploadIDLock.Unlock() 517 518 // Validates if upload ID exists. 519 if err = er.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 520 return pi, toObjectErr(err, bucket, object, uploadID) 521 } 522 523 // Rename temporary part file to its final location. 524 partPath := pathJoin(uploadIDPath, fi.DataDir, partSuffix) 525 onlineDisks, err = rename(ctx, onlineDisks, minioMetaTmpBucket, tmpPartPath, minioMetaMultipartBucket, partPath, false, writeQuorum, nil) 526 if err != nil { 527 return pi, toObjectErr(err, minioMetaMultipartBucket, partPath) 528 } 529 530 // Read metadata again because it might be updated with parallel upload of another part. 531 partsMetadata, errs = readAllFileInfo(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath, "", false) 532 reducedErr = reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) 533 if reducedErr == errErasureWriteQuorum { 534 return pi, toObjectErr(reducedErr, bucket, object) 535 } 536 537 // Get current highest version based on re-read partsMetadata. 538 onlineDisks, modTime, dataDir = listOnlineDisks(onlineDisks, partsMetadata, errs) 539 540 // Pick one from the first valid metadata. 541 fi, err = pickValidFileInfo(ctx, partsMetadata, modTime, dataDir, writeQuorum) 542 if err != nil { 543 return pi, err 544 } 545 546 // Once part is successfully committed, proceed with updating erasure metadata. 547 fi.ModTime = UTCNow() 548 549 md5hex := r.MD5CurrentHexString() 550 551 // Add the current part. 552 fi.AddObjectPart(partID, md5hex, n, data.ActualSize()) 553 554 for i, disk := range onlineDisks { 555 if disk == OfflineDisk { 556 continue 557 } 558 partsMetadata[i].Size = fi.Size 559 partsMetadata[i].ModTime = fi.ModTime 560 partsMetadata[i].Parts = fi.Parts 561 partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{ 562 PartNumber: partID, 563 Algorithm: DefaultBitrotAlgorithm, 564 Hash: bitrotWriterSum(writers[i]), 565 }) 566 } 567 568 // Writes update `xl.meta` format for each disk. 569 if _, err = writeUniqueFileInfo(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath, partsMetadata, writeQuorum); err != nil { 570 return pi, toObjectErr(err, minioMetaMultipartBucket, uploadIDPath) 571 } 572 573 online = countOnlineDisks(onlineDisks) 574 575 // Return success. 576 return PartInfo{ 577 PartNumber: partID, 578 ETag: md5hex, 579 LastModified: fi.ModTime, 580 Size: fi.Size, 581 ActualSize: data.ActualSize(), 582 }, nil 583 } 584 585 // GetMultipartInfo returns multipart metadata uploaded during newMultipartUpload, used 586 // by callers to verify object states 587 // - encrypted 588 // - compressed 589 func (er erasureObjects) GetMultipartInfo(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) (MultipartInfo, error) { 590 result := MultipartInfo{ 591 Bucket: bucket, 592 Object: object, 593 UploadID: uploadID, 594 } 595 596 var err error 597 uploadIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID)) 598 ctx, err = uploadIDLock.GetRLock(ctx, globalOperationTimeout) 599 if err != nil { 600 return MultipartInfo{}, err 601 } 602 defer uploadIDLock.RUnlock() 603 604 if err := er.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 605 return result, toObjectErr(err, bucket, object, uploadID) 606 } 607 608 uploadIDPath := er.getUploadIDDir(bucket, object, uploadID) 609 610 storageDisks := er.getDisks() 611 612 // Read metadata associated with the object from all disks. 613 partsMetadata, errs := readAllFileInfo(ctx, storageDisks, minioMetaMultipartBucket, uploadIDPath, opts.VersionID, false) 614 615 // get Quorum for this object 616 readQuorum, _, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount) 617 if err != nil { 618 return result, toObjectErr(err, minioMetaMultipartBucket, uploadIDPath) 619 } 620 621 reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum) 622 if reducedErr == errErasureReadQuorum { 623 return result, toObjectErr(reducedErr, minioMetaMultipartBucket, uploadIDPath) 624 } 625 626 _, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs) 627 628 // Pick one from the first valid metadata. 629 fi, err := pickValidFileInfo(ctx, partsMetadata, modTime, dataDir, readQuorum) 630 if err != nil { 631 return result, err 632 } 633 634 result.UserDefined = cloneMSS(fi.Metadata) 635 return result, nil 636 } 637 638 // ListObjectParts - lists all previously uploaded parts for a given 639 // object and uploadID. Takes additional input of part-number-marker 640 // to indicate where the listing should begin from. 641 // 642 // Implements S3 compatible ListObjectParts API. The resulting 643 // ListPartsInfo structure is marshaled directly into XML and 644 // replied back to the client. 645 func (er erasureObjects) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker, maxParts int, opts ObjectOptions) (result ListPartsInfo, err error) { 646 uploadIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID)) 647 ctx, err = uploadIDLock.GetRLock(ctx, globalOperationTimeout) 648 if err != nil { 649 return ListPartsInfo{}, err 650 } 651 defer uploadIDLock.RUnlock() 652 653 if err := er.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 654 return result, toObjectErr(err, bucket, object, uploadID) 655 } 656 657 uploadIDPath := er.getUploadIDDir(bucket, object, uploadID) 658 659 storageDisks := er.getDisks() 660 661 // Read metadata associated with the object from all disks. 662 partsMetadata, errs := readAllFileInfo(ctx, storageDisks, minioMetaMultipartBucket, uploadIDPath, "", false) 663 664 // get Quorum for this object 665 _, writeQuorum, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount) 666 if err != nil { 667 return result, toObjectErr(err, minioMetaMultipartBucket, uploadIDPath) 668 } 669 670 reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) 671 if reducedErr == errErasureWriteQuorum { 672 return result, toObjectErr(reducedErr, minioMetaMultipartBucket, uploadIDPath) 673 } 674 675 _, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs) 676 677 // Pick one from the first valid metadata. 678 fi, err := pickValidFileInfo(ctx, partsMetadata, modTime, dataDir, writeQuorum) 679 if err != nil { 680 return result, err 681 } 682 683 // Populate the result stub. 684 result.Bucket = bucket 685 result.Object = object 686 result.UploadID = uploadID 687 result.MaxParts = maxParts 688 result.PartNumberMarker = partNumberMarker 689 result.UserDefined = cloneMSS(fi.Metadata) 690 691 // For empty number of parts or maxParts as zero, return right here. 692 if len(fi.Parts) == 0 || maxParts == 0 { 693 return result, nil 694 } 695 696 // Limit output to maxPartsList. 697 if maxParts > maxPartsList { 698 maxParts = maxPartsList 699 } 700 701 // Only parts with higher part numbers will be listed. 702 partIdx := objectPartIndex(fi.Parts, partNumberMarker) 703 parts := fi.Parts 704 if partIdx != -1 { 705 parts = fi.Parts[partIdx+1:] 706 } 707 count := maxParts 708 for _, part := range parts { 709 result.Parts = append(result.Parts, PartInfo{ 710 PartNumber: part.Number, 711 ETag: part.ETag, 712 LastModified: fi.ModTime, 713 Size: part.Size, 714 }) 715 count-- 716 if count == 0 { 717 break 718 } 719 } 720 // If listed entries are more than maxParts, we set IsTruncated as true. 721 if len(parts) > len(result.Parts) { 722 result.IsTruncated = true 723 // Make sure to fill next part number marker if IsTruncated is 724 // true for subsequent listing. 725 nextPartNumberMarker := result.Parts[len(result.Parts)-1].PartNumber 726 result.NextPartNumberMarker = nextPartNumberMarker 727 } 728 return result, nil 729 } 730 731 // CompleteMultipartUpload - completes an ongoing multipart 732 // transaction after receiving all the parts indicated by the client. 733 // Returns an md5sum calculated by concatenating all the individual 734 // md5sums of all the parts. 735 // 736 // Implements S3 compatible Complete multipart API. 737 func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket string, object string, uploadID string, parts []CompletePart, opts ObjectOptions) (oi ObjectInfo, err error) { 738 // Hold read-locks to verify uploaded parts, also disallows 739 // parallel part uploads as well. 740 uploadIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID)) 741 ctx, err = uploadIDLock.GetRLock(ctx, globalOperationTimeout) 742 if err != nil { 743 return oi, err 744 } 745 defer uploadIDLock.RUnlock() 746 747 if err = er.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 748 return oi, toObjectErr(err, bucket, object, uploadID) 749 } 750 751 // Check if an object is present as one of the parent dir. 752 // -- FIXME. (needs a new kind of lock). 753 if opts.ParentIsObject != nil && opts.ParentIsObject(ctx, bucket, path.Dir(object)) { 754 return oi, toObjectErr(errFileParentIsFile, bucket, object) 755 } 756 757 defer ObjectPathUpdated(pathJoin(bucket, object)) 758 759 // Calculate s3 compatible md5sum for complete multipart. 760 s3MD5 := getCompleteMultipartMD5(parts) 761 762 uploadIDPath := er.getUploadIDDir(bucket, object, uploadID) 763 764 storageDisks := er.getDisks() 765 766 // Read metadata associated with the object from all disks. 767 partsMetadata, errs := readAllFileInfo(ctx, storageDisks, minioMetaMultipartBucket, uploadIDPath, "", false) 768 769 // get Quorum for this object 770 _, writeQuorum, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount) 771 if err != nil { 772 return oi, toObjectErr(err, bucket, object) 773 } 774 775 reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) 776 if reducedErr == errErasureWriteQuorum { 777 return oi, toObjectErr(reducedErr, bucket, object) 778 } 779 780 onlineDisks, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs) 781 782 // Pick one from the first valid metadata. 783 fi, err := pickValidFileInfo(ctx, partsMetadata, modTime, dataDir, writeQuorum) 784 if err != nil { 785 return oi, err 786 } 787 788 // Calculate full object size. 789 var objectSize int64 790 791 // Calculate consolidated actual size. 792 var objectActualSize int64 793 794 // Order online disks in accordance with distribution order. 795 // Order parts metadata in accordance with distribution order. 796 onlineDisks, partsMetadata = shuffleDisksAndPartsMetadataByIndex(onlineDisks, partsMetadata, fi) 797 798 // Save current erasure metadata for validation. 799 var currentFI = fi 800 801 // Allocate parts similar to incoming slice. 802 fi.Parts = make([]ObjectPartInfo, len(parts)) 803 804 // Validate each part and then commit to disk. 805 for i, part := range parts { 806 partIdx := objectPartIndex(currentFI.Parts, part.PartNumber) 807 // All parts should have same part number. 808 if partIdx == -1 { 809 invp := InvalidPart{ 810 PartNumber: part.PartNumber, 811 GotETag: part.ETag, 812 } 813 return oi, invp 814 } 815 816 // ensure that part ETag is canonicalized to strip off extraneous quotes 817 part.ETag = canonicalizeETag(part.ETag) 818 if currentFI.Parts[partIdx].ETag != part.ETag { 819 invp := InvalidPart{ 820 PartNumber: part.PartNumber, 821 ExpETag: currentFI.Parts[partIdx].ETag, 822 GotETag: part.ETag, 823 } 824 return oi, invp 825 } 826 827 // All parts except the last part has to be atleast 5MB. 828 if (i < len(parts)-1) && !isMinAllowedPartSize(currentFI.Parts[partIdx].ActualSize) { 829 return oi, PartTooSmall{ 830 PartNumber: part.PartNumber, 831 PartSize: currentFI.Parts[partIdx].ActualSize, 832 PartETag: part.ETag, 833 } 834 } 835 836 // Save for total object size. 837 objectSize += currentFI.Parts[partIdx].Size 838 839 // Save the consolidated actual size. 840 objectActualSize += currentFI.Parts[partIdx].ActualSize 841 842 // Add incoming parts. 843 fi.Parts[i] = ObjectPartInfo{ 844 Number: part.PartNumber, 845 Size: currentFI.Parts[partIdx].Size, 846 ActualSize: currentFI.Parts[partIdx].ActualSize, 847 } 848 } 849 850 // Save the final object size and modtime. 851 fi.Size = objectSize 852 fi.ModTime = opts.MTime 853 if opts.MTime.IsZero() { 854 fi.ModTime = UTCNow() 855 } 856 857 // Save successfully calculated md5sum. 858 fi.Metadata["etag"] = s3MD5 859 if opts.UserDefined["etag"] != "" { // preserve ETag if set 860 fi.Metadata["etag"] = opts.UserDefined["etag"] 861 } 862 863 // Save the consolidated actual size. 864 fi.Metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(objectActualSize, 10) 865 866 // Update all erasure metadata, make sure to not modify fields like 867 // checksum which are different on each disks. 868 for index := range partsMetadata { 869 if partsMetadata[index].IsValid() { 870 partsMetadata[index].Size = fi.Size 871 partsMetadata[index].ModTime = fi.ModTime 872 partsMetadata[index].Metadata = fi.Metadata 873 partsMetadata[index].Parts = fi.Parts 874 } 875 } 876 877 // Write final `xl.meta` at uploadID location 878 if onlineDisks, err = writeUniqueFileInfo(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath, partsMetadata, writeQuorum); err != nil { 879 return oi, toObjectErr(err, minioMetaMultipartBucket, uploadIDPath) 880 } 881 882 // Remove parts that weren't present in CompleteMultipartUpload request. 883 for _, curpart := range currentFI.Parts { 884 if objectPartIndex(fi.Parts, curpart.Number) == -1 { 885 // Delete the missing part files. e.g, 886 // Request 1: NewMultipart 887 // Request 2: PutObjectPart 1 888 // Request 3: PutObjectPart 2 889 // Request 4: CompleteMultipartUpload --part 2 890 // N.B. 1st part is not present. This part should be removed from the storage. 891 er.removeObjectPart(bucket, object, uploadID, fi.DataDir, curpart.Number) 892 } 893 } 894 895 // Hold namespace to complete the transaction 896 lk := er.NewNSLock(bucket, object) 897 ctx, err = lk.GetLock(ctx, globalOperationTimeout) 898 if err != nil { 899 return oi, err 900 } 901 defer lk.Unlock() 902 903 // Rename the multipart object to final location. 904 if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath, 905 partsMetadata, bucket, object, writeQuorum); err != nil { 906 return oi, toObjectErr(err, bucket, object) 907 } 908 909 // Check if there is any offline disk and add it to the MRF list 910 for _, disk := range onlineDisks { 911 if disk != nil && disk.IsOnline() { 912 continue 913 } 914 er.addPartial(bucket, object, fi.VersionID) 915 break 916 } 917 918 for i := 0; i < len(onlineDisks); i++ { 919 if onlineDisks[i] != nil && onlineDisks[i].IsOnline() { 920 // Object info is the same in all disks, so we can pick 921 // the first meta from online disk 922 fi = partsMetadata[i] 923 break 924 } 925 } 926 927 // Success, return object info. 928 return fi.ToObjectInfo(bucket, object), nil 929 } 930 931 // AbortMultipartUpload - aborts an ongoing multipart operation 932 // signified by the input uploadID. This is an atomic operation 933 // doesn't require clients to initiate multiple such requests. 934 // 935 // All parts are purged from all disks and reference to the uploadID 936 // would be removed from the system, rollback is not possible on this 937 // operation. 938 func (er erasureObjects) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) (err error) { 939 lk := er.NewNSLock(bucket, pathJoin(object, uploadID)) 940 ctx, err = lk.GetLock(ctx, globalOperationTimeout) 941 if err != nil { 942 return err 943 } 944 defer lk.Unlock() 945 946 // Validates if upload ID exists. 947 if err := er.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil { 948 return toObjectErr(err, bucket, object, uploadID) 949 } 950 951 uploadIDPath := er.getUploadIDDir(bucket, object, uploadID) 952 953 // Read metadata associated with the object from all disks. 954 partsMetadata, errs := readAllFileInfo(ctx, er.getDisks(), minioMetaMultipartBucket, uploadIDPath, "", false) 955 956 // get Quorum for this object 957 _, writeQuorum, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount) 958 if err != nil { 959 return toObjectErr(err, bucket, object, uploadID) 960 } 961 962 // Cleanup all uploaded parts. 963 if err = er.deleteObject(ctx, minioMetaMultipartBucket, uploadIDPath, writeQuorum); err != nil { 964 return toObjectErr(err, bucket, object, uploadID) 965 } 966 967 // Successfully purged. 968 return nil 969 }