github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/tgts3mpt.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"encoding/xml"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"net/http"
    13  	"net/url"
    14  	"os"
    15  	"sort"
    16  	"strconv"
    17  	"time"
    18  
    19  	"github.com/NVIDIA/aistore/ais/backend"
    20  	"github.com/NVIDIA/aistore/ais/s3"
    21  	"github.com/NVIDIA/aistore/cmn"
    22  	"github.com/NVIDIA/aistore/cmn/cos"
    23  	"github.com/NVIDIA/aistore/cmn/debug"
    24  	"github.com/NVIDIA/aistore/cmn/feat"
    25  	"github.com/NVIDIA/aistore/cmn/nlog"
    26  	"github.com/NVIDIA/aistore/core"
    27  	"github.com/NVIDIA/aistore/core/meta"
    28  	"github.com/NVIDIA/aistore/fs"
    29  )
    30  
    31  func decodeXML[T any](body []byte) (result T, _ error) {
    32  	if err := xml.Unmarshal(body, &result); err != nil {
    33  		return result, err
    34  	}
    35  	return result, nil
    36  }
    37  
    38  func multiWriter(writers ...io.Writer) io.Writer {
    39  	a := make([]io.Writer, 0, 3)
    40  	for _, w := range writers {
    41  		if w != nil {
    42  			a = append(a, w)
    43  		}
    44  	}
    45  	return io.MultiWriter(a...)
    46  }
    47  
    48  // Initialize multipart upload.
    49  // - Generate UUID for the upload
    50  // - Return the UUID to a caller
    51  // https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html
    52  func (t *target) startMpt(w http.ResponseWriter, r *http.Request, items []string, bck *meta.Bck, q url.Values) {
    53  	var (
    54  		objName  = s3.ObjName(items)
    55  		lom      = &core.LOM{ObjName: objName}
    56  		uploadID string
    57  		ecode    int
    58  	)
    59  	err := lom.InitBck(bck.Bucket())
    60  	if err != nil {
    61  		s3.WriteErr(w, r, err, 0)
    62  		return
    63  	}
    64  	if bck.IsRemoteS3() {
    65  		uploadID, ecode, err = backend.StartMpt(lom, r, q)
    66  		if err != nil {
    67  			s3.WriteErr(w, r, err, ecode)
    68  			return
    69  		}
    70  	} else {
    71  		uploadID = cos.GenUUID()
    72  	}
    73  
    74  	s3.InitUpload(uploadID, bck.Name, objName)
    75  	result := &s3.InitiateMptUploadResult{Bucket: bck.Name, Key: objName, UploadID: uploadID}
    76  
    77  	sgl := t.gmm.NewSGL(0)
    78  	result.MustMarshal(sgl)
    79  	w.Header().Set(cos.HdrContentType, cos.ContentXML)
    80  	sgl.WriteTo2(w)
    81  	sgl.Free()
    82  }
    83  
    84  // PUT a part of the multipart upload.
    85  // Body is empty, everything in the query params and the header.
    86  //
    87  // "Content-MD5" in the part headers seems be to be deprecated:
    88  // either not present (s3cmd) or cannot be trusted (aws s3api).
    89  //
    90  // https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html
    91  func (t *target) putMptPart(w http.ResponseWriter, r *http.Request, items []string, q url.Values, bck *meta.Bck) {
    92  	// 1. parse/validate
    93  	uploadID := q.Get(s3.QparamMptUploadID)
    94  	if uploadID == "" {
    95  		s3.WriteErr(w, r, errors.New("empty uploadId"), 0)
    96  		return
    97  	}
    98  	part := q.Get(s3.QparamMptPartNo)
    99  	if part == "" {
   100  		s3.WriteErr(w, r, fmt.Errorf("upload %q: missing part number", uploadID), 0)
   101  		return
   102  	}
   103  	partNum, err := s3.ParsePartNum(part)
   104  	if err != nil {
   105  		s3.WriteErr(w, r, err, 0)
   106  		return
   107  	}
   108  	if partNum < 1 || partNum > s3.MaxPartsPerUpload {
   109  		err := fmt.Errorf("upload %q: invalid part number %d, must be between 1 and %d",
   110  			uploadID, partNum, s3.MaxPartsPerUpload)
   111  		s3.WriteErr(w, r, err, 0)
   112  		return
   113  	}
   114  
   115  	// 2. init lom, create part file
   116  	objName := s3.ObjName(items)
   117  	lom := &core.LOM{ObjName: objName}
   118  	if err := lom.InitBck(bck.Bucket()); err != nil {
   119  		s3.WriteErr(w, r, err, 0)
   120  		return
   121  	}
   122  	// workfile name format: <upload-id>.<part-number>.<obj-name>
   123  	prefix := uploadID + "." + strconv.FormatInt(int64(partNum), 10)
   124  	wfqn := fs.CSM.Gen(lom, fs.WorkfileType, prefix)
   125  	partFh, errC := lom.CreateFileRW(wfqn)
   126  	if errC != nil {
   127  		s3.WriteMptErr(w, r, errC, 0, lom, uploadID)
   128  		return
   129  	}
   130  
   131  	var (
   132  		etag         string
   133  		size         int64
   134  		ecode        int
   135  		partSHA      = r.Header.Get(cos.S3HdrContentSHA256)
   136  		checkPartSHA = partSHA != "" && partSHA != cos.S3UnsignedPayload
   137  		cksumSHA     = &cos.CksumHash{}
   138  		cksumMD5     = &cos.CksumHash{}
   139  		remote       = bck.IsRemoteS3()
   140  	)
   141  	if checkPartSHA {
   142  		cksumSHA = cos.NewCksumHash(cos.ChecksumSHA256)
   143  	}
   144  	if !remote {
   145  		cksumMD5 = cos.NewCksumHash(cos.ChecksumMD5)
   146  	}
   147  
   148  	// 3. write
   149  	mw := multiWriter(cksumMD5.H, cksumSHA.H, partFh)
   150  
   151  	if !remote {
   152  		// write locally
   153  		buf, slab := t.gmm.Alloc()
   154  		size, err = io.CopyBuffer(mw, r.Body, buf)
   155  		slab.Free(buf)
   156  	} else {
   157  		// write locally and utilize TeeReader to simultaneously send data to S3
   158  		tr := io.NopCloser(io.TeeReader(r.Body, mw))
   159  		size = r.ContentLength
   160  		debug.Assert(size > 0, "mpt upload: expecting positive content-length")
   161  
   162  		etag, ecode, err = backend.PutMptPart(lom, tr, r, q, uploadID, size, partNum)
   163  	}
   164  
   165  	cos.Close(partFh)
   166  	if err != nil {
   167  		if nerr := cos.RemoveFile(wfqn); nerr != nil && !os.IsNotExist(nerr) {
   168  			nlog.Errorf(fmtNested, t, err, "remove", wfqn, nerr)
   169  		}
   170  		s3.WriteMptErr(w, r, err, ecode, lom, uploadID)
   171  		return
   172  	}
   173  
   174  	// 4. finalize the part (expecting the part's remote etag to be md5 checksum)
   175  	md5 := etag
   176  	if cksumMD5.H != nil {
   177  		debug.Assert(etag == "")
   178  		cksumMD5.Finalize()
   179  		md5 = cksumMD5.Value()
   180  	}
   181  	if checkPartSHA {
   182  		cksumSHA.Finalize()
   183  		recvSHA := cos.NewCksum(cos.ChecksumSHA256, partSHA)
   184  		if !cksumSHA.Equal(recvSHA) {
   185  			detail := fmt.Sprintf("upload %q, %s, part %d", uploadID, lom, partNum)
   186  			err = cos.NewErrDataCksum(&cksumSHA.Cksum, recvSHA, detail)
   187  			s3.WriteMptErr(w, r, err, http.StatusInternalServerError, lom, uploadID)
   188  			return
   189  		}
   190  	}
   191  	npart := &s3.MptPart{
   192  		MD5:  md5,
   193  		FQN:  wfqn,
   194  		Size: size,
   195  		Num:  partNum,
   196  	}
   197  	if err := s3.AddPart(uploadID, npart); err != nil {
   198  		s3.WriteMptErr(w, r, err, 0, lom, uploadID)
   199  		return
   200  	}
   201  	w.Header().Set(cos.S3CksumHeader, md5) // s3cmd checks this one
   202  }
   203  
   204  // Complete multipart upload.
   205  // Body contains XML with the list of parts that must be on the storage already.
   206  // 1. Check that all parts from request body present
   207  // 2. Merge all parts into a single file and calculate its ETag
   208  // 3. Return ETag to a caller
   209  // https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html
   210  func (t *target) completeMpt(w http.ResponseWriter, r *http.Request, items []string, q url.Values, bck *meta.Bck) {
   211  	// parse/validate
   212  	uploadID := q.Get(s3.QparamMptUploadID)
   213  	if uploadID == "" {
   214  		s3.WriteErr(w, r, errors.New("empty uploadId"), 0)
   215  		return
   216  	}
   217  
   218  	output, err := io.ReadAll(r.Body)
   219  	if err != nil {
   220  		s3.WriteErr(w, r, err, http.StatusBadRequest)
   221  		return
   222  	}
   223  	partList, err := decodeXML[*s3.CompleteMptUpload](output)
   224  	if err != nil {
   225  		s3.WriteErr(w, r, err, http.StatusBadRequest)
   226  		return
   227  	}
   228  	if len(partList.Parts) == 0 {
   229  		s3.WriteErr(w, r, fmt.Errorf("upload %q: empty list of upload parts", uploadID), 0)
   230  		return
   231  	}
   232  	objName := s3.ObjName(items)
   233  	lom := &core.LOM{ObjName: objName}
   234  	if err := lom.InitBck(bck.Bucket()); err != nil {
   235  		s3.WriteErr(w, r, err, 0)
   236  		return
   237  	}
   238  	size, errN := s3.ObjSize(uploadID)
   239  	if errN != nil {
   240  		s3.WriteMptErr(w, r, errN, 0, lom, uploadID)
   241  		return
   242  	}
   243  
   244  	// call s3
   245  	var (
   246  		etag    string
   247  		started = time.Now()
   248  		remote  = bck.IsRemoteS3()
   249  	)
   250  	if remote {
   251  		v, ecode, err := backend.CompleteMpt(lom, r, q, uploadID, partList)
   252  		if err != nil {
   253  			s3.WriteMptErr(w, r, err, ecode, lom, uploadID)
   254  			return
   255  		}
   256  		etag = v
   257  	}
   258  
   259  	// append parts and finalize locally
   260  	var (
   261  		mw          io.Writer
   262  		concatMD5   string // => ETag
   263  		actualCksum = &cos.CksumHash{}
   264  	)
   265  	// .1 sort and check parts
   266  	sort.Slice(partList.Parts, func(i, j int) bool {
   267  		return partList.Parts[i].PartNumber < partList.Parts[j].PartNumber
   268  	})
   269  	nparts, err := s3.CheckParts(uploadID, partList.Parts)
   270  	if err != nil {
   271  		s3.WriteMptErr(w, r, err, 0, lom, uploadID)
   272  		return
   273  	}
   274  	// 2. <upload-id>.complete.<obj-name>
   275  	prefix := uploadID + ".complete"
   276  	wfqn := fs.CSM.Gen(lom, fs.WorkfileType, prefix)
   277  	wfh, errC := lom.CreateFile(wfqn)
   278  	if errC != nil {
   279  		s3.WriteMptErr(w, r, errC, 0, lom, uploadID)
   280  		return
   281  	}
   282  	if remote && lom.CksumConf().Type != cos.ChecksumNone {
   283  		actualCksum = cos.NewCksumHash(lom.CksumConf().Type)
   284  	} else {
   285  		actualCksum = cos.NewCksumHash(cos.ChecksumMD5)
   286  	}
   287  	mw = multiWriter(actualCksum.H, wfh)
   288  
   289  	// .3 write
   290  	buf, slab := t.gmm.Alloc()
   291  	concatMD5, written, errA := _appendMpt(nparts, buf, mw)
   292  	slab.Free(buf)
   293  
   294  	if lom.IsFeatureSet(feat.FsyncPUT) {
   295  		errS := wfh.Sync()
   296  		debug.AssertNoErr(errS)
   297  	}
   298  	cos.Close(wfh)
   299  
   300  	if errA == nil && written != size {
   301  		errA = fmt.Errorf("upload %q %q: expected full size=%d, got %d", uploadID, lom.Cname(), size, written)
   302  	}
   303  	if errA != nil {
   304  		if nerr := cos.RemoveFile(wfqn); nerr != nil && !os.IsNotExist(nerr) {
   305  			nlog.Errorf(fmtNested, t, err, "remove", wfqn, nerr)
   306  		}
   307  		s3.WriteMptErr(w, r, errA, 0, lom, uploadID)
   308  		return
   309  	}
   310  
   311  	// .4 (s3 client => ais://) compute resulting MD5 and, optionally, ETag
   312  	if actualCksum.H != nil {
   313  		actualCksum.Finalize()
   314  		lom.SetCksum(actualCksum.Cksum.Clone())
   315  	}
   316  	if etag == "" {
   317  		debug.Assert(!remote)
   318  		debug.Assert(concatMD5 != "")
   319  		resMD5 := cos.NewCksumHash(cos.ChecksumMD5)
   320  		_, err = resMD5.H.Write([]byte(concatMD5))
   321  		debug.AssertNoErr(err)
   322  		resMD5.Finalize()
   323  		etag = resMD5.Value() + cmn.AwsMultipartDelim + strconv.Itoa(len(partList.Parts))
   324  	}
   325  
   326  	// .5 finalize
   327  	lom.SetSize(size)
   328  	lom.SetCustomKey(cmn.ETag, etag)
   329  
   330  	poi := allocPOI()
   331  	{
   332  		poi.t = t
   333  		poi.atime = started.UnixNano()
   334  		poi.lom = lom
   335  		poi.workFQN = wfqn
   336  		poi.owt = cmn.OwtNone
   337  	}
   338  	ecode, errF := poi.finalize()
   339  	freePOI(poi)
   340  
   341  	// .6 cleanup parts - unconditionally
   342  	exists := s3.CleanupUpload(uploadID, lom.FQN, false /*aborted*/)
   343  	debug.Assert(exists)
   344  
   345  	if errF != nil {
   346  		// NOTE: not failing if remote op. succeeded
   347  		if !remote {
   348  			s3.WriteMptErr(w, r, errF, ecode, lom, uploadID)
   349  			return
   350  		}
   351  		nlog.Errorf("upload %q: failed to complete %s locally: %v(%d)", uploadID, lom.Cname(), err, ecode)
   352  	}
   353  
   354  	// .7 respond
   355  	result := &s3.CompleteMptUploadResult{Bucket: bck.Name, Key: objName, ETag: etag}
   356  	sgl := t.gmm.NewSGL(0)
   357  	result.MustMarshal(sgl)
   358  	w.Header().Set(cos.HdrContentType, cos.ContentXML)
   359  	w.Header().Set(cos.S3CksumHeader, etag)
   360  	sgl.WriteTo2(w)
   361  	sgl.Free()
   362  }
   363  
   364  func _appendMpt(nparts []*s3.MptPart, buf []byte, mw io.Writer) (concatMD5 string, written int64, err error) {
   365  	for _, partInfo := range nparts {
   366  		var (
   367  			partFh   *os.File
   368  			partSize int64
   369  		)
   370  		concatMD5 += partInfo.MD5
   371  		if partFh, err = os.Open(partInfo.FQN); err != nil {
   372  			return "", 0, err
   373  		}
   374  		partSize, err = io.CopyBuffer(mw, partFh, buf)
   375  		cos.Close(partFh)
   376  		if err != nil {
   377  			return "", 0, err
   378  		}
   379  		written += partSize
   380  	}
   381  	return concatMD5, written, nil
   382  }
   383  
   384  // Abort an active multipart upload.
   385  // Body is empty, only URL query contains uploadID
   386  // 1. uploadID must exists
   387  // 2. Remove all temporary files
   388  // 3. Remove all info from in-memory structs
   389  // https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html
   390  func (t *target) abortMpt(w http.ResponseWriter, r *http.Request, items []string, q url.Values) {
   391  	bck, err, ecode := meta.InitByNameOnly(items[0], t.owner.bmd)
   392  	if err != nil {
   393  		s3.WriteErr(w, r, err, ecode)
   394  		return
   395  	}
   396  	objName := s3.ObjName(items)
   397  	lom := &core.LOM{ObjName: objName}
   398  	if err := lom.InitBck(bck.Bucket()); err != nil {
   399  		s3.WriteErr(w, r, err, 0)
   400  		return
   401  	}
   402  
   403  	uploadID := q.Get(s3.QparamMptUploadID)
   404  
   405  	if bck.IsRemoteS3() {
   406  		ecode, err := backend.AbortMpt(lom, r, q, uploadID)
   407  		if err != nil {
   408  			s3.WriteErr(w, r, err, ecode)
   409  			return
   410  		}
   411  	}
   412  
   413  	exists := s3.CleanupUpload(uploadID, "", true /*aborted*/)
   414  	if !exists {
   415  		err := fmt.Errorf("upload %q does not exist", uploadID)
   416  		s3.WriteErr(w, r, err, http.StatusNotFound)
   417  		return
   418  	}
   419  
   420  	// Respond with status 204(!see the docs) and empty body.
   421  	w.WriteHeader(http.StatusNoContent)
   422  }
   423  
   424  // List already stored parts of the active multipart upload by bucket name and uploadID.
   425  // (NOTE: `s3cmd` lists upload parts before checking if any parts can be skipped.)
   426  // s3cmd is OK to receive an empty body in response with status=200. In this
   427  // case s3cmd sends all parts.
   428  // https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html
   429  func (t *target) listMptParts(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string, q url.Values) {
   430  	uploadID := q.Get(s3.QparamMptUploadID)
   431  
   432  	lom := &core.LOM{ObjName: objName}
   433  	if err := lom.InitBck(bck.Bucket()); err != nil {
   434  		s3.WriteErr(w, r, err, 0)
   435  		return
   436  	}
   437  
   438  	parts, ecode, err := s3.ListParts(uploadID, lom)
   439  	if err != nil {
   440  		s3.WriteErr(w, r, err, ecode)
   441  		return
   442  	}
   443  	result := &s3.ListPartsResult{Bucket: bck.Name, Key: objName, UploadID: uploadID, Parts: parts}
   444  	sgl := t.gmm.NewSGL(0)
   445  	result.MustMarshal(sgl)
   446  	w.Header().Set(cos.HdrContentType, cos.ContentXML)
   447  	sgl.WriteTo2(w)
   448  	sgl.Free()
   449  }
   450  
   451  // List all active multipart uploads for a bucket.
   452  // See https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html
   453  // GET /?uploads&delimiter=Delimiter&encoding-type=EncodingType&key-marker=KeyMarker&
   454  // max-uploads=MaxUploads&prefix=Prefix&upload-id-marker=UploadIdMarker
   455  func (t *target) listMptUploads(w http.ResponseWriter, bck *meta.Bck, q url.Values) {
   456  	var (
   457  		maxUploads int
   458  		idMarker   string
   459  	)
   460  	if s := q.Get(s3.QparamMptMaxUploads); s != "" {
   461  		if v, err := strconv.Atoi(s); err == nil {
   462  			maxUploads = v
   463  		}
   464  	}
   465  	idMarker = q.Get(s3.QparamMptUploadIDMarker)
   466  	result := s3.ListUploads(bck.Name, idMarker, maxUploads)
   467  	sgl := t.gmm.NewSGL(0)
   468  	result.MustMarshal(sgl)
   469  	w.Header().Set(cos.HdrContentType, cos.ContentXML)
   470  	sgl.WriteTo2(w)
   471  	sgl.Free()
   472  }
   473  
   474  // Acts on an already multipart-uploaded object, returns `partNumber` (URL query)
   475  // part of the object.
   476  // The object must have been multipart-uploaded beforehand.
   477  // See:
   478  // https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html
   479  func (t *target) getMptPart(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string, q url.Values) {
   480  	lom := core.AllocLOM(objName)
   481  	defer core.FreeLOM(lom)
   482  	if err := lom.InitBck(bck.Bucket()); err != nil {
   483  		s3.WriteErr(w, r, err, 0)
   484  		return
   485  	}
   486  	partNum, err := s3.ParsePartNum(q.Get(s3.QparamMptPartNo))
   487  	if err != nil {
   488  		s3.WriteErr(w, r, err, 0)
   489  		return
   490  	}
   491  	// load mpt xattr and find out the part num's offset & size
   492  	off, size, status, err := s3.OffsetSorted(lom, partNum)
   493  	if err != nil {
   494  		s3.WriteErr(w, r, err, status)
   495  	}
   496  	fh, err := lom.OpenFile()
   497  	if err != nil {
   498  		s3.WriteErr(w, r, err, 0)
   499  		return
   500  	}
   501  	buf, slab := t.gmm.AllocSize(size)
   502  	reader := io.NewSectionReader(fh, off, size)
   503  	if _, err := io.CopyBuffer(w, reader, buf); err != nil {
   504  		s3.WriteErr(w, r, err, 0)
   505  	}
   506  	cos.Close(fh)
   507  	slab.Free(buf)
   508  }