github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/erasure-multipart.go (about)

     1  // Copyright (c) 2015-2023 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"encoding/base64"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"path"
    28  	"sort"
    29  	"strconv"
    30  	"strings"
    31  	"sync"
    32  	"time"
    33  
    34  	"github.com/klauspost/readahead"
    35  	"github.com/minio/minio-go/v7/pkg/set"
    36  	"github.com/minio/minio/internal/config/storageclass"
    37  	"github.com/minio/minio/internal/crypto"
    38  	"github.com/minio/minio/internal/hash"
    39  	xhttp "github.com/minio/minio/internal/http"
    40  	xioutil "github.com/minio/minio/internal/ioutil"
    41  	"github.com/minio/minio/internal/logger"
    42  	"github.com/minio/pkg/v2/mimedb"
    43  	"github.com/minio/pkg/v2/sync/errgroup"
    44  )
    45  
    46  func (er erasureObjects) getUploadIDDir(bucket, object, uploadID string) string {
    47  	uploadUUID := uploadID
    48  	uploadBytes, err := base64.RawURLEncoding.DecodeString(uploadID)
    49  	if err == nil {
    50  		slc := strings.SplitN(string(uploadBytes), ".", 2)
    51  		if len(slc) == 2 {
    52  			uploadUUID = slc[1]
    53  		}
    54  	}
    55  	return pathJoin(er.getMultipartSHADir(bucket, object), uploadUUID)
    56  }
    57  
    58  func (er erasureObjects) getMultipartSHADir(bucket, object string) string {
    59  	return getSHA256Hash([]byte(pathJoin(bucket, object)))
    60  }
    61  
    62  // checkUploadIDExists - verify if a given uploadID exists and is valid.
    63  func (er erasureObjects) checkUploadIDExists(ctx context.Context, bucket, object, uploadID string, write bool) (fi FileInfo, metArr []FileInfo, err error) {
    64  	defer func() {
    65  		if errors.Is(err, errFileNotFound) {
    66  			err = errUploadIDNotFound
    67  		}
    68  	}()
    69  
    70  	uploadIDPath := er.getUploadIDDir(bucket, object, uploadID)
    71  
    72  	storageDisks := er.getDisks()
    73  
    74  	// Read metadata associated with the object from all disks.
    75  	partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, minioMetaMultipartBucket,
    76  		uploadIDPath, "", false, false)
    77  
    78  	readQuorum, writeQuorum, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount)
    79  	if err != nil {
    80  		return fi, nil, err
    81  	}
    82  
    83  	quorum := readQuorum
    84  	if write {
    85  		quorum = writeQuorum
    86  	}
    87  
    88  	// List all online disks.
    89  	_, modTime, etag := listOnlineDisks(storageDisks, partsMetadata, errs, quorum)
    90  
    91  	var reducedErr error
    92  	if write {
    93  		reducedErr = reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
    94  	} else {
    95  		reducedErr = reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum)
    96  	}
    97  	if reducedErr != nil {
    98  		return fi, nil, reducedErr
    99  	}
   100  
   101  	// Pick one from the first valid metadata.
   102  	fi, err = pickValidFileInfo(ctx, partsMetadata, modTime, etag, quorum)
   103  	return fi, partsMetadata, err
   104  }
   105  
   106  // Removes part.meta given by partName belonging to a multipart upload from minioMetaBucket
   107  func (er erasureObjects) removePartMeta(bucket, object, uploadID, dataDir string, partNumber int) {
   108  	uploadIDPath := er.getUploadIDDir(bucket, object, uploadID)
   109  	curpartPath := pathJoin(uploadIDPath, dataDir, fmt.Sprintf("part.%d", partNumber))
   110  	storageDisks := er.getDisks()
   111  
   112  	g := errgroup.WithNErrs(len(storageDisks))
   113  	for index, disk := range storageDisks {
   114  		if disk == nil {
   115  			continue
   116  		}
   117  		index := index
   118  		g.Go(func() error {
   119  			_ = storageDisks[index].Delete(context.TODO(), minioMetaMultipartBucket, curpartPath+".meta", DeleteOptions{
   120  				Recursive: false,
   121  				Immediate: false,
   122  			})
   123  
   124  			return nil
   125  		}, index)
   126  	}
   127  	g.Wait()
   128  }
   129  
   130  // Removes part given by partName belonging to a multipart upload from minioMetaBucket
   131  func (er erasureObjects) removeObjectPart(bucket, object, uploadID, dataDir string, partNumber int) {
   132  	uploadIDPath := er.getUploadIDDir(bucket, object, uploadID)
   133  	curpartPath := pathJoin(uploadIDPath, dataDir, fmt.Sprintf("part.%d", partNumber))
   134  	storageDisks := er.getDisks()
   135  
   136  	g := errgroup.WithNErrs(len(storageDisks))
   137  	for index, disk := range storageDisks {
   138  		if disk == nil {
   139  			continue
   140  		}
   141  		index := index
   142  		g.Go(func() error {
   143  			// Ignoring failure to remove parts that weren't present in CompleteMultipartUpload
   144  			// requests. xl.meta is the authoritative source of truth on which parts constitute
   145  			// the object. The presence of parts that don't belong in the object doesn't affect correctness.
   146  			_ = storageDisks[index].Delete(context.TODO(), minioMetaMultipartBucket, curpartPath, DeleteOptions{
   147  				Recursive: false,
   148  				Immediate: false,
   149  			})
   150  			_ = storageDisks[index].Delete(context.TODO(), minioMetaMultipartBucket, curpartPath+".meta", DeleteOptions{
   151  				Recursive: false,
   152  				Immediate: false,
   153  			})
   154  
   155  			return nil
   156  		}, index)
   157  	}
   158  	g.Wait()
   159  }
   160  
   161  // Clean-up the old multipart uploads. Should be run in a Go routine.
   162  func (er erasureObjects) cleanupStaleUploads(ctx context.Context, expiry time.Duration) {
   163  	// run multiple cleanup's local to this server.
   164  	var wg sync.WaitGroup
   165  	for _, disk := range er.getLocalDisks() {
   166  		if disk != nil {
   167  			wg.Add(1)
   168  			go func(disk StorageAPI) {
   169  				defer wg.Done()
   170  				er.cleanupStaleUploadsOnDisk(ctx, disk, expiry)
   171  			}(disk)
   172  		}
   173  	}
   174  	wg.Wait()
   175  }
   176  
   177  func (er erasureObjects) deleteAll(ctx context.Context, bucket, prefix string) {
   178  	var wg sync.WaitGroup
   179  	for _, disk := range er.getDisks() {
   180  		if disk == nil {
   181  			continue
   182  		}
   183  		wg.Add(1)
   184  		go func(disk StorageAPI) {
   185  			defer wg.Done()
   186  			disk.Delete(ctx, bucket, prefix, DeleteOptions{
   187  				Recursive: true,
   188  				Immediate: false,
   189  			})
   190  		}(disk)
   191  	}
   192  	wg.Wait()
   193  }
   194  
   195  // Remove the old multipart uploads on the given disk.
   196  func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk StorageAPI, expiry time.Duration) {
   197  	now := time.Now()
   198  	diskPath := disk.Endpoint().Path
   199  
   200  	readDirFn(pathJoin(diskPath, minioMetaMultipartBucket), func(shaDir string, typ os.FileMode) error {
   201  		readDirFn(pathJoin(diskPath, minioMetaMultipartBucket, shaDir), func(uploadIDDir string, typ os.FileMode) error {
   202  			uploadIDPath := pathJoin(shaDir, uploadIDDir)
   203  			fi, err := disk.ReadVersion(ctx, "", minioMetaMultipartBucket, uploadIDPath, "", ReadOptions{})
   204  			if err != nil {
   205  				return nil
   206  			}
   207  			w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   208  			return w.Run(func() error {
   209  				wait := deletedCleanupSleeper.Timer(ctx)
   210  				if now.Sub(fi.ModTime) > expiry {
   211  					removeAll(pathJoin(diskPath, minioMetaMultipartBucket, uploadIDPath))
   212  				}
   213  				wait()
   214  				return nil
   215  			})
   216  		})
   217  		vi, err := disk.StatVol(ctx, pathJoin(minioMetaMultipartBucket, shaDir))
   218  		if err != nil {
   219  			return nil
   220  		}
   221  		w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   222  		return w.Run(func() error {
   223  			wait := deletedCleanupSleeper.Timer(ctx)
   224  			if now.Sub(vi.Created) > expiry {
   225  				// We are not deleting shaDir recursively here, if shaDir is empty
   226  				// and its older then we can happily delete it.
   227  				Remove(pathJoin(diskPath, minioMetaMultipartBucket, shaDir))
   228  			}
   229  			wait()
   230  			return nil
   231  		})
   232  	})
   233  
   234  	readDirFn(pathJoin(diskPath, minioMetaTmpBucket), func(tmpDir string, typ os.FileMode) error {
   235  		if tmpDir == ".trash/" { // do not remove .trash/ here, it has its own routines
   236  			return nil
   237  		}
   238  		vi, err := disk.StatVol(ctx, pathJoin(minioMetaTmpBucket, tmpDir))
   239  		if err != nil {
   240  			return nil
   241  		}
   242  		w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
   243  		return w.Run(func() error {
   244  			wait := deletedCleanupSleeper.Timer(ctx)
   245  			if now.Sub(vi.Created) > expiry {
   246  				removeAll(pathJoin(diskPath, minioMetaTmpBucket, tmpDir))
   247  			}
   248  			wait()
   249  			return nil
   250  		})
   251  	})
   252  }
   253  
   254  // ListMultipartUploads - lists all the pending multipart
   255  // uploads for a particular object in a bucket.
   256  //
   257  // Implements minimal S3 compatible ListMultipartUploads API. We do
   258  // not support prefix based listing, this is a deliberate attempt
   259  // towards simplification of multipart APIs.
   260  // The resulting ListMultipartsInfo structure is unmarshalled directly as XML.
   261  func (er erasureObjects) ListMultipartUploads(ctx context.Context, bucket, object, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (result ListMultipartsInfo, err error) {
   262  	auditObjectErasureSet(ctx, object, &er)
   263  
   264  	result.MaxUploads = maxUploads
   265  	result.KeyMarker = keyMarker
   266  	result.Prefix = object
   267  	result.Delimiter = delimiter
   268  
   269  	var uploadIDs []string
   270  	var disk StorageAPI
   271  	disks := er.getOnlineLocalDisks()
   272  	if len(disks) == 0 {
   273  		// using er.getOnlineLocalDisks() has one side-affect where
   274  		// on a pooled setup all disks are remote, add a fallback
   275  		disks = er.getOnlineDisks()
   276  	}
   277  	for _, disk = range disks {
   278  		if disk == nil {
   279  			continue
   280  		}
   281  		if !disk.IsOnline() {
   282  			continue
   283  		}
   284  		uploadIDs, err = disk.ListDir(ctx, bucket, minioMetaMultipartBucket, er.getMultipartSHADir(bucket, object), -1)
   285  		if err != nil {
   286  			if errors.Is(err, errDiskNotFound) {
   287  				continue
   288  			}
   289  			if errors.Is(err, errFileNotFound) {
   290  				return result, nil
   291  			}
   292  			return result, toObjectErr(err, bucket, object)
   293  		}
   294  		break
   295  	}
   296  
   297  	for i := range uploadIDs {
   298  		uploadIDs[i] = strings.TrimSuffix(uploadIDs[i], SlashSeparator)
   299  	}
   300  
   301  	// S3 spec says uploadIDs should be sorted based on initiated time, we need
   302  	// to read the metadata entry.
   303  	var uploads []MultipartInfo
   304  
   305  	populatedUploadIDs := set.NewStringSet()
   306  
   307  	for _, uploadID := range uploadIDs {
   308  		if populatedUploadIDs.Contains(uploadID) {
   309  			continue
   310  		}
   311  		// If present, use time stored in ID.
   312  		startTime := time.Now()
   313  		if split := strings.Split(uploadID, "x"); len(split) == 2 {
   314  			t, err := strconv.ParseInt(split[1], 10, 64)
   315  			if err == nil {
   316  				startTime = time.Unix(0, t)
   317  			}
   318  		}
   319  		uploads = append(uploads, MultipartInfo{
   320  			Bucket:    bucket,
   321  			Object:    object,
   322  			UploadID:  base64.RawURLEncoding.EncodeToString([]byte(fmt.Sprintf("%s.%s", globalDeploymentID(), uploadID))),
   323  			Initiated: startTime,
   324  		})
   325  		populatedUploadIDs.Add(uploadID)
   326  	}
   327  
   328  	sort.Slice(uploads, func(i int, j int) bool {
   329  		return uploads[i].Initiated.Before(uploads[j].Initiated)
   330  	})
   331  
   332  	uploadIndex := 0
   333  	if uploadIDMarker != "" {
   334  		for uploadIndex < len(uploads) {
   335  			if uploads[uploadIndex].UploadID != uploadIDMarker {
   336  				uploadIndex++
   337  				continue
   338  			}
   339  			if uploads[uploadIndex].UploadID == uploadIDMarker {
   340  				uploadIndex++
   341  				break
   342  			}
   343  			uploadIndex++
   344  		}
   345  	}
   346  	for uploadIndex < len(uploads) {
   347  		result.Uploads = append(result.Uploads, uploads[uploadIndex])
   348  		result.NextUploadIDMarker = uploads[uploadIndex].UploadID
   349  		uploadIndex++
   350  		if len(result.Uploads) == maxUploads {
   351  			break
   352  		}
   353  	}
   354  
   355  	result.IsTruncated = uploadIndex < len(uploads)
   356  
   357  	if !result.IsTruncated {
   358  		result.NextKeyMarker = ""
   359  		result.NextUploadIDMarker = ""
   360  	}
   361  
   362  	return result, nil
   363  }
   364  
   365  // newMultipartUpload - wrapper for initializing a new multipart
   366  // request; returns a unique upload id.
   367  //
   368  // Internally this function creates 'uploads.json' associated for the
   369  // incoming object at
   370  // '.minio.sys/multipart/bucket/object/uploads.json' on all the
   371  // disks. `uploads.json` carries metadata regarding on-going multipart
   372  // operation(s) on the object.
   373  func (er erasureObjects) newMultipartUpload(ctx context.Context, bucket string, object string, opts ObjectOptions) (*NewMultipartUploadResult, error) {
   374  	if opts.CheckPrecondFn != nil {
   375  		// Lock the object before reading.
   376  		lk := er.NewNSLock(bucket, object)
   377  		lkctx, err := lk.GetRLock(ctx, globalOperationTimeout)
   378  		if err != nil {
   379  			return nil, err
   380  		}
   381  		rctx := lkctx.Context()
   382  		obj, err := er.getObjectInfo(rctx, bucket, object, opts)
   383  		lk.RUnlock(lkctx)
   384  		if err != nil && !isErrVersionNotFound(err) && !isErrObjectNotFound(err) {
   385  			return nil, err
   386  		}
   387  		if opts.CheckPrecondFn(obj) {
   388  			return nil, PreConditionFailed{}
   389  		}
   390  	}
   391  
   392  	userDefined := cloneMSS(opts.UserDefined)
   393  	if opts.PreserveETag != "" {
   394  		userDefined["etag"] = opts.PreserveETag
   395  	}
   396  	onlineDisks := er.getDisks()
   397  
   398  	// Get parity and data drive count based on storage class metadata
   399  	parityDrives := globalStorageClass.GetParityForSC(userDefined[xhttp.AmzStorageClass])
   400  	if parityDrives < 0 {
   401  		parityDrives = er.defaultParityCount
   402  	}
   403  
   404  	if globalStorageClass.AvailabilityOptimized() {
   405  		// If we have offline disks upgrade the number of erasure codes for this object.
   406  		parityOrig := parityDrives
   407  
   408  		var offlineDrives int
   409  		for _, disk := range onlineDisks {
   410  			if disk == nil || !disk.IsOnline() {
   411  				parityDrives++
   412  				offlineDrives++
   413  				continue
   414  			}
   415  		}
   416  
   417  		if offlineDrives >= (len(onlineDisks)+1)/2 {
   418  			// if offline drives are more than 50% of the drives
   419  			// we have no quorum, we shouldn't proceed just
   420  			// fail at that point.
   421  			return nil, toObjectErr(errErasureWriteQuorum, bucket, object)
   422  		}
   423  
   424  		if parityDrives >= len(onlineDisks)/2 {
   425  			parityDrives = len(onlineDisks) / 2
   426  		}
   427  
   428  		if parityOrig != parityDrives {
   429  			userDefined[minIOErasureUpgraded] = strconv.Itoa(parityOrig) + "->" + strconv.Itoa(parityDrives)
   430  		}
   431  	}
   432  
   433  	dataDrives := len(onlineDisks) - parityDrives
   434  
   435  	// we now know the number of blocks this object needs for data and parity.
   436  	// establish the writeQuorum using this data
   437  	writeQuorum := dataDrives
   438  	if dataDrives == parityDrives {
   439  		writeQuorum++
   440  	}
   441  
   442  	// Initialize parts metadata
   443  	partsMetadata := make([]FileInfo, len(onlineDisks))
   444  
   445  	fi := newFileInfo(pathJoin(bucket, object), dataDrives, parityDrives)
   446  	fi.VersionID = opts.VersionID
   447  	if opts.Versioned && fi.VersionID == "" {
   448  		fi.VersionID = mustGetUUID()
   449  	}
   450  	fi.DataDir = mustGetUUID()
   451  
   452  	// Initialize erasure metadata.
   453  	for index := range partsMetadata {
   454  		partsMetadata[index] = fi
   455  	}
   456  
   457  	// Guess content-type from the extension if possible.
   458  	if userDefined["content-type"] == "" {
   459  		userDefined["content-type"] = mimedb.TypeByExtension(path.Ext(object))
   460  	}
   461  
   462  	// if storageClass is standard no need to save it as part of metadata.
   463  	if userDefined[xhttp.AmzStorageClass] == storageclass.STANDARD {
   464  		delete(userDefined, xhttp.AmzStorageClass)
   465  	}
   466  
   467  	if opts.WantChecksum != nil && opts.WantChecksum.Type.IsSet() {
   468  		userDefined[hash.MinIOMultipartChecksum] = opts.WantChecksum.Type.String()
   469  	}
   470  
   471  	modTime := opts.MTime
   472  	if opts.MTime.IsZero() {
   473  		modTime = UTCNow()
   474  	}
   475  
   476  	onlineDisks, partsMetadata = shuffleDisksAndPartsMetadata(onlineDisks, partsMetadata, fi)
   477  
   478  	// Fill all the necessary metadata.
   479  	// Update `xl.meta` content on each disks.
   480  	for index := range partsMetadata {
   481  		partsMetadata[index].Fresh = true
   482  		partsMetadata[index].ModTime = modTime
   483  		partsMetadata[index].Metadata = userDefined
   484  	}
   485  	uploadUUID := mustGetUUID()
   486  	uploadID := base64.RawURLEncoding.EncodeToString([]byte(fmt.Sprintf("%s.%s", globalDeploymentID(), uploadUUID)))
   487  	uploadIDPath := er.getUploadIDDir(bucket, object, uploadUUID)
   488  
   489  	// Write updated `xl.meta` to all disks.
   490  	if _, err := writeUniqueFileInfo(ctx, onlineDisks, bucket, minioMetaMultipartBucket, uploadIDPath, partsMetadata, writeQuorum); err != nil {
   491  		return nil, toObjectErr(err, bucket, object)
   492  	}
   493  	return &NewMultipartUploadResult{
   494  		UploadID:     uploadID,
   495  		ChecksumAlgo: userDefined[hash.MinIOMultipartChecksum],
   496  	}, nil
   497  }
   498  
   499  // NewMultipartUpload - initialize a new multipart upload, returns a
   500  // unique id. The unique id returned here is of UUID form, for each
   501  // subsequent request each UUID is unique.
   502  //
   503  // Implements S3 compatible initiate multipart API.
   504  func (er erasureObjects) NewMultipartUpload(ctx context.Context, bucket, object string, opts ObjectOptions) (*NewMultipartUploadResult, error) {
   505  	if !opts.NoAuditLog {
   506  		auditObjectErasureSet(ctx, object, &er)
   507  	}
   508  
   509  	return er.newMultipartUpload(ctx, bucket, object, opts)
   510  }
   511  
   512  // renamePart - renames multipart part to its relevant location under uploadID.
   513  func renamePart(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, writeQuorum int) ([]StorageAPI, error) {
   514  	g := errgroup.WithNErrs(len(disks))
   515  
   516  	// Rename file on all underlying storage disks.
   517  	for index := range disks {
   518  		index := index
   519  		g.Go(func() error {
   520  			if disks[index] == nil {
   521  				return errDiskNotFound
   522  			}
   523  			return disks[index].RenameFile(ctx, srcBucket, srcEntry, dstBucket, dstEntry)
   524  		}, index)
   525  	}
   526  
   527  	// Wait for all renames to finish.
   528  	errs := g.Wait()
   529  
   530  	// Do not need to undo partial successful operation since those will be cleaned up
   531  	// in 24hrs via multipart cleaner, never rename() back to `.minio.sys/tmp` as there
   532  	// is no way to clean them.
   533  
   534  	// We can safely allow RenameFile errors up to len(er.getDisks()) - writeQuorum
   535  	// otherwise return failure. Cleanup successful renames.
   536  	return evalDisks(disks, errs), reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
   537  }
   538  
   539  // writeAllDisks - writes 'b' to all provided disks.
   540  // If write cannot reach quorum, the files will be deleted from all disks.
   541  func writeAllDisks(ctx context.Context, disks []StorageAPI, dstBucket, dstEntry string, b []byte, writeQuorum int) ([]StorageAPI, error) {
   542  	g := errgroup.WithNErrs(len(disks))
   543  
   544  	// Write file to all underlying storage disks.
   545  	for index := range disks {
   546  		index := index
   547  		g.Go(func() error {
   548  			if disks[index] == nil {
   549  				return errDiskNotFound
   550  			}
   551  			return disks[index].WriteAll(ctx, dstBucket, dstEntry, b)
   552  		}, index)
   553  	}
   554  
   555  	// Wait for all renames to finish.
   556  	errs := g.Wait()
   557  
   558  	// We can safely allow RenameFile errors up to len(er.getDisks()) - writeQuorum
   559  	// otherwise return failure. Cleanup successful renames.
   560  	err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
   561  	if errors.Is(err, errErasureWriteQuorum) {
   562  		// Remove all written
   563  		g := errgroup.WithNErrs(len(disks))
   564  		for index := range disks {
   565  			if disks[index] == nil || errs[index] != nil {
   566  				continue
   567  			}
   568  			index := index
   569  			g.Go(func() error {
   570  				return disks[index].Delete(ctx, dstBucket, dstEntry, DeleteOptions{Immediate: true})
   571  			}, index)
   572  		}
   573  		// Ignore these errors.
   574  		g.WaitErr()
   575  	}
   576  
   577  	return evalDisks(disks, errs), err
   578  }
   579  
   580  // PutObjectPart - reads incoming stream and internally erasure codes
   581  // them. This call is similar to single put operation but it is part
   582  // of the multipart transaction.
   583  //
   584  // Implements S3 compatible Upload Part API.
   585  func (er erasureObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, r *PutObjReader, opts ObjectOptions) (pi PartInfo, err error) {
   586  	if !opts.NoAuditLog {
   587  		auditObjectErasureSet(ctx, object, &er)
   588  	}
   589  
   590  	data := r.Reader
   591  	// Validate input data size and it can never be less than zero.
   592  	if data.Size() < -1 {
   593  		logger.LogIf(ctx, errInvalidArgument, logger.ErrorKind)
   594  		return pi, toObjectErr(errInvalidArgument)
   595  	}
   596  
   597  	// Read lock for upload id.
   598  	// Only held while reading the upload metadata.
   599  	uploadIDRLock := er.NewNSLock(bucket, pathJoin(object, uploadID))
   600  	rlkctx, err := uploadIDRLock.GetRLock(ctx, globalOperationTimeout)
   601  	if err != nil {
   602  		return PartInfo{}, err
   603  	}
   604  	rctx := rlkctx.Context()
   605  	defer uploadIDRLock.RUnlock(rlkctx)
   606  
   607  	uploadIDPath := er.getUploadIDDir(bucket, object, uploadID)
   608  	// Validates if upload ID exists.
   609  	fi, _, err := er.checkUploadIDExists(rctx, bucket, object, uploadID, true)
   610  	if err != nil {
   611  		if errors.Is(err, errVolumeNotFound) {
   612  			return pi, toObjectErr(err, bucket)
   613  		}
   614  		return pi, toObjectErr(err, bucket, object, uploadID)
   615  	}
   616  
   617  	// Write lock for this part ID, only hold it if we are planning to read from the
   618  	// streamto avoid any concurrent updates.
   619  	//
   620  	// Must be held throughout this call.
   621  	partIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID, strconv.Itoa(partID)))
   622  	plkctx, err := partIDLock.GetLock(ctx, globalOperationTimeout)
   623  	if err != nil {
   624  		return PartInfo{}, err
   625  	}
   626  	pctx := plkctx.Context()
   627  	defer partIDLock.Unlock(plkctx)
   628  
   629  	onlineDisks := er.getDisks()
   630  	writeQuorum := fi.WriteQuorum(er.defaultWQuorum())
   631  
   632  	if cs := fi.Metadata[hash.MinIOMultipartChecksum]; cs != "" {
   633  		if r.ContentCRCType().String() != cs {
   634  			return pi, InvalidArgument{
   635  				Bucket: bucket,
   636  				Object: fi.Name,
   637  				Err:    fmt.Errorf("checksum missing, want %q, got %q", cs, r.ContentCRCType().String()),
   638  			}
   639  		}
   640  	}
   641  	onlineDisks = shuffleDisks(onlineDisks, fi.Erasure.Distribution)
   642  
   643  	// Need a unique name for the part being written in minioMetaBucket to
   644  	// accommodate concurrent PutObjectPart requests
   645  
   646  	partSuffix := fmt.Sprintf("part.%d", partID)
   647  	// Random UUID and timestamp for temporary part file.
   648  	tmpPart := fmt.Sprintf("%sx%d", mustGetUUID(), time.Now().UnixNano())
   649  	tmpPartPath := pathJoin(tmpPart, partSuffix)
   650  
   651  	// Delete the temporary object part. If PutObjectPart succeeds there would be nothing to delete.
   652  	defer func() {
   653  		if countOnlineDisks(onlineDisks) != len(onlineDisks) {
   654  			er.deleteAll(context.Background(), minioMetaTmpBucket, tmpPart)
   655  		}
   656  	}()
   657  
   658  	erasure, err := NewErasure(pctx, fi.Erasure.DataBlocks, fi.Erasure.ParityBlocks, fi.Erasure.BlockSize)
   659  	if err != nil {
   660  		return pi, toObjectErr(err, bucket, object)
   661  	}
   662  
   663  	// Fetch buffer for I/O, returns from the pool if not allocates a new one and returns.
   664  	var buffer []byte
   665  	switch size := data.Size(); {
   666  	case size == 0:
   667  		buffer = make([]byte, 1) // Allocate at least a byte to reach EOF
   668  	case size == -1:
   669  		if size := data.ActualSize(); size > 0 && size < fi.Erasure.BlockSize {
   670  			// Account for padding and forced compression overhead and encryption.
   671  			buffer = make([]byte, data.ActualSize()+256+32+32, data.ActualSize()*2+512)
   672  		} else {
   673  			buffer = globalBytePoolCap.Get()
   674  			defer globalBytePoolCap.Put(buffer)
   675  		}
   676  	case size >= fi.Erasure.BlockSize:
   677  		buffer = globalBytePoolCap.Get()
   678  		defer globalBytePoolCap.Put(buffer)
   679  	case size < fi.Erasure.BlockSize:
   680  		// No need to allocate fully fi.Erasure.BlockSize buffer if the incoming data is smaller.
   681  		buffer = make([]byte, size, 2*size+int64(fi.Erasure.ParityBlocks+fi.Erasure.DataBlocks-1))
   682  	}
   683  
   684  	if len(buffer) > int(fi.Erasure.BlockSize) {
   685  		buffer = buffer[:fi.Erasure.BlockSize]
   686  	}
   687  	writers := make([]io.Writer, len(onlineDisks))
   688  	for i, disk := range onlineDisks {
   689  		if disk == nil {
   690  			continue
   691  		}
   692  		writers[i] = newBitrotWriter(disk, bucket, minioMetaTmpBucket, tmpPartPath, erasure.ShardFileSize(data.Size()), DefaultBitrotAlgorithm, erasure.ShardSize())
   693  	}
   694  
   695  	toEncode := io.Reader(data)
   696  	if data.Size() > bigFileThreshold {
   697  		// Add input readahead.
   698  		// We use 2 buffers, so we always have a full buffer of input.
   699  		bufA := globalBytePoolCap.Get()
   700  		bufB := globalBytePoolCap.Get()
   701  		defer globalBytePoolCap.Put(bufA)
   702  		defer globalBytePoolCap.Put(bufB)
   703  		ra, err := readahead.NewReaderBuffer(data, [][]byte{bufA[:fi.Erasure.BlockSize], bufB[:fi.Erasure.BlockSize]})
   704  		if err == nil {
   705  			toEncode = ra
   706  			defer ra.Close()
   707  		}
   708  	}
   709  
   710  	n, err := erasure.Encode(pctx, toEncode, writers, buffer, writeQuorum)
   711  	closeBitrotWriters(writers)
   712  	if err != nil {
   713  		return pi, toObjectErr(err, bucket, object)
   714  	}
   715  
   716  	// Should return IncompleteBody{} error when reader has fewer bytes
   717  	// than specified in request header.
   718  	if n < data.Size() {
   719  		return pi, IncompleteBody{Bucket: bucket, Object: object}
   720  	}
   721  
   722  	for i := range writers {
   723  		if writers[i] == nil {
   724  			onlineDisks[i] = nil
   725  		}
   726  	}
   727  
   728  	// Rename temporary part file to its final location.
   729  	partPath := pathJoin(uploadIDPath, fi.DataDir, partSuffix)
   730  	onlineDisks, err = renamePart(ctx, onlineDisks, minioMetaTmpBucket, tmpPartPath, minioMetaMultipartBucket, partPath, writeQuorum)
   731  	if err != nil {
   732  		return pi, toObjectErr(err, minioMetaMultipartBucket, partPath)
   733  	}
   734  
   735  	md5hex := r.MD5CurrentHexString()
   736  	if opts.PreserveETag != "" {
   737  		md5hex = opts.PreserveETag
   738  	}
   739  
   740  	var index []byte
   741  	if opts.IndexCB != nil {
   742  		index = opts.IndexCB()
   743  	}
   744  
   745  	partInfo := ObjectPartInfo{
   746  		Number:     partID,
   747  		ETag:       md5hex,
   748  		Size:       n,
   749  		ActualSize: data.ActualSize(),
   750  		ModTime:    UTCNow(),
   751  		Index:      index,
   752  		Checksums:  r.ContentCRC(),
   753  	}
   754  
   755  	fi.Parts = []ObjectPartInfo{partInfo}
   756  	partFI, err := fi.MarshalMsg(nil)
   757  	if err != nil {
   758  		return pi, toObjectErr(err, minioMetaMultipartBucket, partPath)
   759  	}
   760  
   761  	// Write part metadata to all disks.
   762  	onlineDisks, err = writeAllDisks(ctx, onlineDisks, minioMetaMultipartBucket, partPath+".meta", partFI, writeQuorum)
   763  	if err != nil {
   764  		return pi, toObjectErr(err, minioMetaMultipartBucket, partPath)
   765  	}
   766  
   767  	// Return success.
   768  	return PartInfo{
   769  		PartNumber:     partInfo.Number,
   770  		ETag:           partInfo.ETag,
   771  		LastModified:   partInfo.ModTime,
   772  		Size:           partInfo.Size,
   773  		ActualSize:     partInfo.ActualSize,
   774  		ChecksumCRC32:  partInfo.Checksums["CRC32"],
   775  		ChecksumCRC32C: partInfo.Checksums["CRC32C"],
   776  		ChecksumSHA1:   partInfo.Checksums["SHA1"],
   777  		ChecksumSHA256: partInfo.Checksums["SHA256"],
   778  	}, nil
   779  }
   780  
   781  // GetMultipartInfo returns multipart metadata uploaded during newMultipartUpload, used
   782  // by callers to verify object states
   783  // - encrypted
   784  // - compressed
   785  // Does not contain currently uploaded parts by design.
   786  func (er erasureObjects) GetMultipartInfo(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) (MultipartInfo, error) {
   787  	if !opts.NoAuditLog {
   788  		auditObjectErasureSet(ctx, object, &er)
   789  	}
   790  
   791  	result := MultipartInfo{
   792  		Bucket:   bucket,
   793  		Object:   object,
   794  		UploadID: uploadID,
   795  	}
   796  
   797  	uploadIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID))
   798  	lkctx, err := uploadIDLock.GetRLock(ctx, globalOperationTimeout)
   799  	if err != nil {
   800  		return MultipartInfo{}, err
   801  	}
   802  	ctx = lkctx.Context()
   803  	defer uploadIDLock.RUnlock(lkctx)
   804  
   805  	fi, _, err := er.checkUploadIDExists(ctx, bucket, object, uploadID, false)
   806  	if err != nil {
   807  		if errors.Is(err, errVolumeNotFound) {
   808  			return result, toObjectErr(err, bucket)
   809  		}
   810  		return result, toObjectErr(err, bucket, object, uploadID)
   811  	}
   812  
   813  	result.UserDefined = cloneMSS(fi.Metadata)
   814  	return result, nil
   815  }
   816  
   817  // ListObjectParts - lists all previously uploaded parts for a given
   818  // object and uploadID.  Takes additional input of part-number-marker
   819  // to indicate where the listing should begin from.
   820  //
   821  // Implements S3 compatible ListObjectParts API. The resulting
   822  // ListPartsInfo structure is marshaled directly into XML and
   823  // replied back to the client.
   824  func (er erasureObjects) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker, maxParts int, opts ObjectOptions) (result ListPartsInfo, err error) {
   825  	if !opts.NoAuditLog {
   826  		auditObjectErasureSet(ctx, object, &er)
   827  	}
   828  
   829  	uploadIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID))
   830  	lkctx, err := uploadIDLock.GetRLock(ctx, globalOperationTimeout)
   831  	if err != nil {
   832  		return ListPartsInfo{}, err
   833  	}
   834  	ctx = lkctx.Context()
   835  	defer uploadIDLock.RUnlock(lkctx)
   836  
   837  	fi, _, err := er.checkUploadIDExists(ctx, bucket, object, uploadID, false)
   838  	if err != nil {
   839  		return result, toObjectErr(err, bucket, object, uploadID)
   840  	}
   841  
   842  	uploadIDPath := er.getUploadIDDir(bucket, object, uploadID)
   843  
   844  	// Populate the result stub.
   845  	result.Bucket = bucket
   846  	result.Object = object
   847  	result.UploadID = uploadID
   848  	result.MaxParts = maxParts
   849  	result.PartNumberMarker = partNumberMarker
   850  	result.UserDefined = cloneMSS(fi.Metadata)
   851  	result.ChecksumAlgorithm = fi.Metadata[hash.MinIOMultipartChecksum]
   852  
   853  	if partNumberMarker < 0 {
   854  		partNumberMarker = 0
   855  	}
   856  
   857  	// Limit output to maxPartsList.
   858  	if maxParts > maxPartsList-partNumberMarker {
   859  		maxParts = maxPartsList - partNumberMarker
   860  	}
   861  
   862  	if maxParts == 0 {
   863  		return result, nil
   864  	}
   865  
   866  	// Read Part info for all parts
   867  	partPath := pathJoin(uploadIDPath, fi.DataDir) + "/"
   868  	req := ReadMultipleReq{
   869  		Bucket:       minioMetaMultipartBucket,
   870  		Prefix:       partPath,
   871  		MaxSize:      1 << 20, // Each part should realistically not be > 1MiB.
   872  		MaxResults:   maxParts + 1,
   873  		MetadataOnly: true,
   874  	}
   875  
   876  	start := partNumberMarker + 1
   877  	end := start + maxParts
   878  
   879  	// Parts are 1 based, so index 0 is part one, etc.
   880  	for i := start; i <= end; i++ {
   881  		req.Files = append(req.Files, fmt.Sprintf("part.%d.meta", i))
   882  	}
   883  
   884  	var disk StorageAPI
   885  	disks := er.getOnlineLocalDisks()
   886  	if len(disks) == 0 {
   887  		// using er.getOnlineLocalDisks() has one side-affect where
   888  		// on a pooled setup all disks are remote, add a fallback
   889  		disks = er.getOnlineDisks()
   890  	}
   891  
   892  	for _, disk = range disks {
   893  		if disk == nil {
   894  			continue
   895  		}
   896  
   897  		if !disk.IsOnline() {
   898  			continue
   899  		}
   900  
   901  		break
   902  	}
   903  
   904  	g := errgroup.WithNErrs(len(req.Files)).WithConcurrency(32)
   905  
   906  	partsInfo := make([]ObjectPartInfo, len(req.Files))
   907  	for i, file := range req.Files {
   908  		file := file
   909  		partN := i + start
   910  		i := i
   911  
   912  		g.Go(func() error {
   913  			buf, err := disk.ReadAll(ctx, minioMetaMultipartBucket, pathJoin(partPath, file))
   914  			if err != nil {
   915  				return err
   916  			}
   917  
   918  			var pfi FileInfo
   919  			_, err = pfi.UnmarshalMsg(buf)
   920  			if err != nil {
   921  				return err
   922  			}
   923  
   924  			if len(pfi.Parts) != 1 {
   925  				return errors.New("invalid number of parts expected 1, got 0")
   926  			}
   927  
   928  			if partN != pfi.Parts[0].Number {
   929  				return fmt.Errorf("part.%d.meta has incorrect corresponding part number: expected %d, got %d", partN, partN, pfi.Parts[0].Number)
   930  			}
   931  
   932  			partsInfo[i] = pfi.Parts[0]
   933  			return nil
   934  		}, i)
   935  	}
   936  
   937  	g.Wait()
   938  
   939  	for _, part := range partsInfo {
   940  		if part.Number != 0 && !part.ModTime.IsZero() {
   941  			fi.AddObjectPart(part.Number, part.ETag, part.Size, part.ActualSize, part.ModTime, part.Index, part.Checksums)
   942  		}
   943  	}
   944  
   945  	// Only parts with higher part numbers will be listed.
   946  	parts := fi.Parts
   947  	result.Parts = make([]PartInfo, 0, len(parts))
   948  	for _, part := range parts {
   949  		result.Parts = append(result.Parts, PartInfo{
   950  			PartNumber:     part.Number,
   951  			ETag:           part.ETag,
   952  			LastModified:   part.ModTime,
   953  			ActualSize:     part.ActualSize,
   954  			Size:           part.Size,
   955  			ChecksumCRC32:  part.Checksums["CRC32"],
   956  			ChecksumCRC32C: part.Checksums["CRC32C"],
   957  			ChecksumSHA1:   part.Checksums["SHA1"],
   958  			ChecksumSHA256: part.Checksums["SHA256"],
   959  		})
   960  		if len(result.Parts) >= maxParts {
   961  			break
   962  		}
   963  	}
   964  
   965  	// If listed entries are more than maxParts, we set IsTruncated as true.
   966  	if len(parts) > len(result.Parts) {
   967  		result.IsTruncated = true
   968  		// Make sure to fill next part number marker if IsTruncated is
   969  		// true for subsequent listing.
   970  		nextPartNumberMarker := result.Parts[len(result.Parts)-1].PartNumber
   971  		result.NextPartNumberMarker = nextPartNumberMarker
   972  	}
   973  	return result, nil
   974  }
   975  
   976  // CompleteMultipartUpload - completes an ongoing multipart
   977  // transaction after receiving all the parts indicated by the client.
   978  // Returns an md5sum calculated by concatenating all the individual
   979  // md5sums of all the parts.
   980  //
   981  // Implements S3 compatible Complete multipart API.
   982  func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket string, object string, uploadID string, parts []CompletePart, opts ObjectOptions) (oi ObjectInfo, err error) {
   983  	if !opts.NoAuditLog {
   984  		auditObjectErasureSet(ctx, object, &er)
   985  	}
   986  
   987  	// Hold write locks to verify uploaded parts, also disallows any
   988  	// parallel PutObjectPart() requests.
   989  	uploadIDLock := er.NewNSLock(bucket, pathJoin(object, uploadID))
   990  	wlkctx, err := uploadIDLock.GetLock(ctx, globalOperationTimeout)
   991  	if err != nil {
   992  		return oi, err
   993  	}
   994  	ctx = wlkctx.Context()
   995  	defer uploadIDLock.Unlock(wlkctx)
   996  
   997  	fi, partsMetadata, err := er.checkUploadIDExists(ctx, bucket, object, uploadID, true)
   998  	if err != nil {
   999  		if errors.Is(err, errVolumeNotFound) {
  1000  			return oi, toObjectErr(err, bucket)
  1001  		}
  1002  		return oi, toObjectErr(err, bucket, object, uploadID)
  1003  	}
  1004  
  1005  	uploadIDPath := er.getUploadIDDir(bucket, object, uploadID)
  1006  	onlineDisks := er.getDisks()
  1007  	writeQuorum := fi.WriteQuorum(er.defaultWQuorum())
  1008  
  1009  	// Read Part info for all parts
  1010  	partPath := pathJoin(uploadIDPath, fi.DataDir) + "/"
  1011  	req := ReadMultipleReq{
  1012  		Bucket:       minioMetaMultipartBucket,
  1013  		Prefix:       partPath,
  1014  		MaxSize:      1 << 20, // Each part should realistically not be > 1MiB.
  1015  		Files:        make([]string, 0, len(parts)),
  1016  		AbortOn404:   true,
  1017  		MetadataOnly: true,
  1018  	}
  1019  	for _, part := range parts {
  1020  		req.Files = append(req.Files, fmt.Sprintf("part.%d.meta", part.PartNumber))
  1021  	}
  1022  	partInfoFiles, err := readMultipleFiles(ctx, onlineDisks, req, writeQuorum)
  1023  	if err != nil {
  1024  		return oi, err
  1025  	}
  1026  	if len(partInfoFiles) != len(parts) {
  1027  		// Should only happen through internal error
  1028  		err := fmt.Errorf("unexpected part result count: %d, want %d", len(partInfoFiles), len(parts))
  1029  		logger.LogIf(ctx, err)
  1030  		return oi, toObjectErr(err, bucket, object)
  1031  	}
  1032  
  1033  	// Checksum type set when upload started.
  1034  	var checksumType hash.ChecksumType
  1035  	if cs := fi.Metadata[hash.MinIOMultipartChecksum]; cs != "" {
  1036  		checksumType = hash.NewChecksumType(cs)
  1037  		if opts.WantChecksum != nil && !opts.WantChecksum.Type.Is(checksumType) {
  1038  			return oi, InvalidArgument{
  1039  				Bucket: bucket,
  1040  				Object: fi.Name,
  1041  				Err:    fmt.Errorf("checksum type mismatch"),
  1042  			}
  1043  		}
  1044  	}
  1045  
  1046  	var checksumCombined []byte
  1047  
  1048  	// However, in case of encryption, the persisted part ETags don't match
  1049  	// what we have sent to the client during PutObjectPart. The reason is
  1050  	// that ETags are encrypted. Hence, the client will send a list of complete
  1051  	// part ETags of which non can match the ETag of any part. For example
  1052  	//   ETag (client):          30902184f4e62dd8f98f0aaff810c626
  1053  	//   ETag (server-internal): 20000f00ce5dc16e3f3b124f586ae1d88e9caa1c598415c2759bbb50e84a59f630902184f4e62dd8f98f0aaff810c626
  1054  	//
  1055  	// Therefore, we adjust all ETags sent by the client to match what is stored
  1056  	// on the backend.
  1057  	kind, _ := crypto.IsEncrypted(fi.Metadata)
  1058  
  1059  	var objectEncryptionKey []byte
  1060  	switch kind {
  1061  	case crypto.SSEC:
  1062  		if checksumType.IsSet() {
  1063  			if opts.EncryptFn == nil {
  1064  				return oi, crypto.ErrMissingCustomerKey
  1065  			}
  1066  			baseKey := opts.EncryptFn("", nil)
  1067  			if len(baseKey) != 32 {
  1068  				return oi, crypto.ErrInvalidCustomerKey
  1069  			}
  1070  			objectEncryptionKey, err = decryptObjectMeta(baseKey, bucket, object, fi.Metadata)
  1071  			if err != nil {
  1072  				return oi, err
  1073  			}
  1074  		}
  1075  	case crypto.S3, crypto.S3KMS:
  1076  		objectEncryptionKey, err = decryptObjectMeta(nil, bucket, object, fi.Metadata)
  1077  		if err != nil {
  1078  			return oi, err
  1079  		}
  1080  	}
  1081  	if len(objectEncryptionKey) == 32 {
  1082  		var key crypto.ObjectKey
  1083  		copy(key[:], objectEncryptionKey)
  1084  		opts.EncryptFn = metadataEncrypter(key)
  1085  	}
  1086  
  1087  	for i, part := range partInfoFiles {
  1088  		partID := parts[i].PartNumber
  1089  		if part.Error != "" || !part.Exists {
  1090  			return oi, InvalidPart{
  1091  				PartNumber: partID,
  1092  			}
  1093  		}
  1094  
  1095  		var pfi FileInfo
  1096  		_, err := pfi.UnmarshalMsg(part.Data)
  1097  		if err != nil {
  1098  			// Maybe crash or similar.
  1099  			logger.LogIf(ctx, err)
  1100  			return oi, InvalidPart{
  1101  				PartNumber: partID,
  1102  			}
  1103  		}
  1104  
  1105  		partI := pfi.Parts[0]
  1106  		partNumber := partI.Number
  1107  		if partID != partNumber {
  1108  			logger.LogIf(ctx, fmt.Errorf("part.%d.meta has incorrect corresponding part number: expected %d, got %d", partID, partID, partI.Number))
  1109  			return oi, InvalidPart{
  1110  				PartNumber: partID,
  1111  			}
  1112  		}
  1113  
  1114  		// Add the current part.
  1115  		fi.AddObjectPart(partI.Number, partI.ETag, partI.Size, partI.ActualSize, partI.ModTime, partI.Index, partI.Checksums)
  1116  	}
  1117  
  1118  	// Calculate full object size.
  1119  	var objectSize int64
  1120  
  1121  	// Calculate consolidated actual size.
  1122  	var objectActualSize int64
  1123  
  1124  	// Order online disks in accordance with distribution order.
  1125  	// Order parts metadata in accordance with distribution order.
  1126  	onlineDisks, partsMetadata = shuffleDisksAndPartsMetadataByIndex(onlineDisks, partsMetadata, fi)
  1127  
  1128  	// Save current erasure metadata for validation.
  1129  	currentFI := fi
  1130  
  1131  	// Allocate parts similar to incoming slice.
  1132  	fi.Parts = make([]ObjectPartInfo, len(parts))
  1133  
  1134  	// Validate each part and then commit to disk.
  1135  	for i, part := range parts {
  1136  		partIdx := objectPartIndex(currentFI.Parts, part.PartNumber)
  1137  		// All parts should have same part number.
  1138  		if partIdx == -1 {
  1139  			invp := InvalidPart{
  1140  				PartNumber: part.PartNumber,
  1141  				GotETag:    part.ETag,
  1142  			}
  1143  			return oi, invp
  1144  		}
  1145  		expPart := currentFI.Parts[partIdx]
  1146  
  1147  		// ensure that part ETag is canonicalized to strip off extraneous quotes
  1148  		part.ETag = canonicalizeETag(part.ETag)
  1149  		expETag := tryDecryptETag(objectEncryptionKey, expPart.ETag, kind == crypto.S3)
  1150  		if expETag != part.ETag {
  1151  			invp := InvalidPart{
  1152  				PartNumber: part.PartNumber,
  1153  				ExpETag:    expETag,
  1154  				GotETag:    part.ETag,
  1155  			}
  1156  			return oi, invp
  1157  		}
  1158  
  1159  		if checksumType.IsSet() {
  1160  			crc := expPart.Checksums[checksumType.String()]
  1161  			if crc == "" {
  1162  				return oi, InvalidPart{
  1163  					PartNumber: part.PartNumber,
  1164  				}
  1165  			}
  1166  			wantCS := map[string]string{
  1167  				hash.ChecksumCRC32.String():  part.ChecksumCRC32,
  1168  				hash.ChecksumCRC32C.String(): part.ChecksumCRC32C,
  1169  				hash.ChecksumSHA1.String():   part.ChecksumSHA1,
  1170  				hash.ChecksumSHA256.String(): part.ChecksumSHA256,
  1171  			}
  1172  			if wantCS[checksumType.String()] != crc {
  1173  				return oi, InvalidPart{
  1174  					PartNumber: part.PartNumber,
  1175  					ExpETag:    wantCS[checksumType.String()],
  1176  					GotETag:    crc,
  1177  				}
  1178  			}
  1179  			cs := hash.NewChecksumString(checksumType.String(), crc)
  1180  			if !cs.Valid() {
  1181  				return oi, InvalidPart{
  1182  					PartNumber: part.PartNumber,
  1183  				}
  1184  			}
  1185  			checksumCombined = append(checksumCombined, cs.Raw...)
  1186  		}
  1187  
  1188  		// All parts except the last part has to be at least 5MB.
  1189  		if (i < len(parts)-1) && !isMinAllowedPartSize(currentFI.Parts[partIdx].ActualSize) {
  1190  			return oi, PartTooSmall{
  1191  				PartNumber: part.PartNumber,
  1192  				PartSize:   expPart.ActualSize,
  1193  				PartETag:   part.ETag,
  1194  			}
  1195  		}
  1196  
  1197  		// Save for total object size.
  1198  		objectSize += expPart.Size
  1199  
  1200  		// Save the consolidated actual size.
  1201  		objectActualSize += expPart.ActualSize
  1202  
  1203  		// Add incoming parts.
  1204  		fi.Parts[i] = ObjectPartInfo{
  1205  			Number:     part.PartNumber,
  1206  			Size:       expPart.Size,
  1207  			ActualSize: expPart.ActualSize,
  1208  			ModTime:    expPart.ModTime,
  1209  			Index:      expPart.Index,
  1210  			Checksums:  nil, // Not transferred since we do not need it.
  1211  		}
  1212  	}
  1213  
  1214  	if opts.WantChecksum != nil {
  1215  		err := opts.WantChecksum.Matches(checksumCombined)
  1216  		if err != nil {
  1217  			return oi, err
  1218  		}
  1219  	}
  1220  
  1221  	// Hold namespace to complete the transaction
  1222  	lk := er.NewNSLock(bucket, object)
  1223  	lkctx, err := lk.GetLock(ctx, globalOperationTimeout)
  1224  	if err != nil {
  1225  		return oi, err
  1226  	}
  1227  	ctx = lkctx.Context()
  1228  	defer lk.Unlock(lkctx)
  1229  
  1230  	if checksumType.IsSet() {
  1231  		checksumType |= hash.ChecksumMultipart | hash.ChecksumIncludesMultipart
  1232  		cs := hash.NewChecksumFromData(checksumType, checksumCombined)
  1233  		fi.Checksum = cs.AppendTo(nil, checksumCombined)
  1234  		if opts.EncryptFn != nil {
  1235  			fi.Checksum = opts.EncryptFn("object-checksum", fi.Checksum)
  1236  		}
  1237  	}
  1238  	delete(fi.Metadata, hash.MinIOMultipartChecksum) // Not needed in final object.
  1239  
  1240  	// Save the final object size and modtime.
  1241  	fi.Size = objectSize
  1242  	fi.ModTime = opts.MTime
  1243  	if opts.MTime.IsZero() {
  1244  		fi.ModTime = UTCNow()
  1245  	}
  1246  
  1247  	// Save successfully calculated md5sum.
  1248  	// for replica, newMultipartUpload would have already sent the replication ETag
  1249  	if fi.Metadata["etag"] == "" {
  1250  		if opts.UserDefined["etag"] != "" {
  1251  			fi.Metadata["etag"] = opts.UserDefined["etag"]
  1252  		} else { // fallback if not already calculated in handler.
  1253  			fi.Metadata["etag"] = getCompleteMultipartMD5(parts)
  1254  		}
  1255  	}
  1256  
  1257  	// Save the consolidated actual size.
  1258  	if opts.ReplicationRequest {
  1259  		fi.Metadata[ReservedMetadataPrefix+"actual-size"] = opts.UserDefined["X-Minio-Internal-Actual-Object-Size"]
  1260  	} else {
  1261  		fi.Metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(objectActualSize, 10)
  1262  	}
  1263  
  1264  	if opts.DataMovement {
  1265  		fi.SetDataMov()
  1266  	}
  1267  
  1268  	// Update all erasure metadata, make sure to not modify fields like
  1269  	// checksum which are different on each disks.
  1270  	for index := range partsMetadata {
  1271  		if partsMetadata[index].IsValid() {
  1272  			partsMetadata[index].Size = fi.Size
  1273  			partsMetadata[index].ModTime = fi.ModTime
  1274  			partsMetadata[index].Metadata = fi.Metadata
  1275  			partsMetadata[index].Parts = fi.Parts
  1276  			partsMetadata[index].Checksum = fi.Checksum
  1277  			partsMetadata[index].Versioned = opts.Versioned || opts.VersionSuspended
  1278  		}
  1279  	}
  1280  
  1281  	// Remove parts that weren't present in CompleteMultipartUpload request.
  1282  	for _, curpart := range currentFI.Parts {
  1283  		// Remove part.meta which is not needed anymore.
  1284  		er.removePartMeta(bucket, object, uploadID, currentFI.DataDir, curpart.Number)
  1285  
  1286  		if objectPartIndex(fi.Parts, curpart.Number) == -1 {
  1287  			// Delete the missing part files. e.g,
  1288  			// Request 1: NewMultipart
  1289  			// Request 2: PutObjectPart 1
  1290  			// Request 3: PutObjectPart 2
  1291  			// Request 4: CompleteMultipartUpload --part 2
  1292  			// N.B. 1st part is not present. This part should be removed from the storage.
  1293  			er.removeObjectPart(bucket, object, uploadID, currentFI.DataDir, curpart.Number)
  1294  		}
  1295  	}
  1296  
  1297  	defer func() {
  1298  		if err == nil {
  1299  			er.deleteAll(context.Background(), minioMetaMultipartBucket, uploadIDPath)
  1300  		}
  1301  	}()
  1302  
  1303  	// Rename the multipart object to final location.
  1304  	onlineDisks, versionsDisparity, err := renameData(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath,
  1305  		partsMetadata, bucket, object, writeQuorum)
  1306  	if err != nil {
  1307  		return oi, toObjectErr(err, bucket, object)
  1308  	}
  1309  
  1310  	if !opts.Speedtest && versionsDisparity {
  1311  		globalMRFState.addPartialOp(partialOperation{
  1312  			bucket:      bucket,
  1313  			object:      object,
  1314  			queued:      time.Now(),
  1315  			allVersions: true,
  1316  			setIndex:    er.setIndex,
  1317  			poolIndex:   er.poolIndex,
  1318  		})
  1319  	}
  1320  
  1321  	if !opts.Speedtest && !versionsDisparity {
  1322  		// Check if there is any offline disk and add it to the MRF list
  1323  		for _, disk := range onlineDisks {
  1324  			if disk != nil && disk.IsOnline() {
  1325  				continue
  1326  			}
  1327  			er.addPartial(bucket, object, fi.VersionID)
  1328  			break
  1329  		}
  1330  	}
  1331  
  1332  	for i := 0; i < len(onlineDisks); i++ {
  1333  		if onlineDisks[i] != nil && onlineDisks[i].IsOnline() {
  1334  			// Object info is the same in all disks, so we can pick
  1335  			// the first meta from online disk
  1336  			fi = partsMetadata[i]
  1337  			break
  1338  		}
  1339  	}
  1340  
  1341  	// we are adding a new version to this object under the namespace lock, so this is the latest version.
  1342  	fi.IsLatest = true
  1343  
  1344  	// Success, return object info.
  1345  	return fi.ToObjectInfo(bucket, object, opts.Versioned || opts.VersionSuspended), nil
  1346  }
  1347  
  1348  // AbortMultipartUpload - aborts an ongoing multipart operation
  1349  // signified by the input uploadID. This is an atomic operation
  1350  // doesn't require clients to initiate multiple such requests.
  1351  //
  1352  // All parts are purged from all disks and reference to the uploadID
  1353  // would be removed from the system, rollback is not possible on this
  1354  // operation.
  1355  func (er erasureObjects) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) (err error) {
  1356  	if !opts.NoAuditLog {
  1357  		auditObjectErasureSet(ctx, object, &er)
  1358  	}
  1359  
  1360  	lk := er.NewNSLock(bucket, pathJoin(object, uploadID))
  1361  	lkctx, err := lk.GetLock(ctx, globalOperationTimeout)
  1362  	if err != nil {
  1363  		return err
  1364  	}
  1365  	ctx = lkctx.Context()
  1366  	defer lk.Unlock(lkctx)
  1367  
  1368  	// Validates if upload ID exists.
  1369  	if _, _, err = er.checkUploadIDExists(ctx, bucket, object, uploadID, false); err != nil {
  1370  		if errors.Is(err, errVolumeNotFound) {
  1371  			return toObjectErr(err, bucket)
  1372  		}
  1373  		return toObjectErr(err, bucket, object, uploadID)
  1374  	}
  1375  
  1376  	// Cleanup all uploaded parts.
  1377  	er.deleteAll(ctx, minioMetaMultipartBucket, er.getUploadIDDir(bucket, object, uploadID))
  1378  
  1379  	// Successfully purged.
  1380  	return nil
  1381  }