github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/gateway/operations/putobject.go (about)

     1  package operations
     2  
     3  import (
     4  	"errors"
     5  	"net/http"
     6  	"net/url"
     7  	"strconv"
     8  	"time"
     9  
    10  	"github.com/treeverse/lakefs/pkg/block"
    11  	"github.com/treeverse/lakefs/pkg/catalog"
    12  	gatewayErrors "github.com/treeverse/lakefs/pkg/gateway/errors"
    13  	"github.com/treeverse/lakefs/pkg/gateway/path"
    14  	"github.com/treeverse/lakefs/pkg/gateway/serde"
    15  	"github.com/treeverse/lakefs/pkg/graveler"
    16  	"github.com/treeverse/lakefs/pkg/httputil"
    17  	"github.com/treeverse/lakefs/pkg/logging"
    18  	"github.com/treeverse/lakefs/pkg/permissions"
    19  	"github.com/treeverse/lakefs/pkg/upload"
    20  )
    21  
    22  const (
    23  	CopySourceHeader      = "x-amz-copy-source"
    24  	CopySourceRangeHeader = "x-amz-copy-source-range"
    25  	QueryParamUploadID    = "uploadId"
    26  	QueryParamPartNumber  = "partNumber"
    27  )
    28  
    29  type PutObject struct{}
    30  
    31  func (controller *PutObject) RequiredPermissions(req *http.Request, repoID, _, destPath string) (permissions.Node, error) {
    32  	copySource := req.Header.Get(CopySourceHeader)
    33  
    34  	if len(copySource) == 0 {
    35  		return permissions.Node{
    36  			Permission: permissions.Permission{
    37  				Action:   permissions.WriteObjectAction,
    38  				Resource: permissions.ObjectArn(repoID, destPath),
    39  			},
    40  		}, nil
    41  	}
    42  	// this is a copy operation
    43  	p, err := getPathFromSource(copySource)
    44  	if err != nil {
    45  		logging.FromContext(req.Context()).WithError(err).Error("could not parse copy source path")
    46  		return permissions.Node{}, gatewayErrors.ErrInvalidCopySource
    47  	}
    48  
    49  	return permissions.Node{
    50  		Type: permissions.NodeTypeAnd,
    51  		Nodes: []permissions.Node{
    52  			{
    53  				Permission: permissions.Permission{
    54  					Action:   permissions.WriteObjectAction,
    55  					Resource: permissions.ObjectArn(repoID, destPath),
    56  				},
    57  			},
    58  			{
    59  				Permission: permissions.Permission{
    60  					Action:   permissions.ReadObjectAction,
    61  					Resource: permissions.ObjectArn(p.Repo, p.Path),
    62  				},
    63  			},
    64  		},
    65  	}, nil
    66  }
    67  
    68  // extractEntryFromCopyReq: get metadata from source file
    69  func extractEntryFromCopyReq(w http.ResponseWriter, req *http.Request, o *PathOperation, copySource path.ResolvedAbsolutePath) *catalog.DBEntry {
    70  	ent, err := o.Catalog.GetEntry(req.Context(), copySource.Repo, copySource.Reference, copySource.Path, catalog.GetEntryParams{})
    71  	if err != nil {
    72  		o.Log(req).WithError(err).Error("could not read copy source")
    73  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidCopySource))
    74  		return nil
    75  	}
    76  	return ent
    77  }
    78  
    79  func getPathFromSource(copySource string) (path.ResolvedAbsolutePath, error) {
    80  	copySourceDecoded, err := url.QueryUnescape(copySource)
    81  	if err != nil {
    82  		copySourceDecoded = copySource
    83  	}
    84  	p, err := path.ResolveAbsolutePath(copySourceDecoded)
    85  	if err != nil {
    86  		return path.ResolvedAbsolutePath{}, gatewayErrors.ErrInvalidCopySource
    87  	}
    88  	return p, nil
    89  }
    90  
    91  func handleCopy(w http.ResponseWriter, req *http.Request, o *PathOperation, copySource string) {
    92  	repository := o.Repository.Name
    93  	branch := o.Reference
    94  	o.Incr("copy_object", o.Principal, repository, branch)
    95  	srcPath, err := getPathFromSource(copySource)
    96  	if err != nil {
    97  		o.Log(req).WithError(err).Error("could not parse copy source path")
    98  		// This is a solution to avoid misleading error messages in gateway. This is a pinpoint fix for the copy object
    99  		// API, since we decided not to change the entire gateway error handling in order to avoid breaking changes.
   100  		// See: https://github.com/treeverse/lakeFS/issues/7452
   101  		apiErr := gatewayErrors.Codes.ToAPIErrWithInternalError(gatewayErrors.ErrInvalidCopySource, err)
   102  		_ = o.EncodeError(w, req, err, apiErr)
   103  		return
   104  	}
   105  
   106  	ctx := req.Context()
   107  	entry, err := o.Catalog.CopyEntry(ctx, srcPath.Repo, srcPath.Reference, srcPath.Path, repository, branch, o.Path)
   108  	if err != nil {
   109  		o.Log(req).WithError(err).Error("could create a copy")
   110  		apiErr := gatewayErrors.Codes.ToAPIErrWithInternalError(gatewayErrors.ErrInvalidCopyDest, err)
   111  		_ = o.EncodeError(w, req, err, apiErr)
   112  		return
   113  	}
   114  
   115  	o.EncodeResponse(w, req, &serde.CopyObjectResult{
   116  		LastModified: serde.Timestamp(entry.CreationDate),
   117  		ETag:         httputil.ETag(entry.Checksum),
   118  	}, http.StatusOK)
   119  }
   120  
   121  func handleUploadPart(w http.ResponseWriter, req *http.Request, o *PathOperation) {
   122  	o.Incr("put_mpu_part", o.Principal, o.Repository.Name, o.Reference)
   123  	query := req.URL.Query()
   124  	uploadID := query.Get(QueryParamUploadID)
   125  	partNumberStr := query.Get(QueryParamPartNumber)
   126  
   127  	var partNumber int
   128  	if n, err := strconv.ParseInt(partNumberStr, 10, 32); err != nil { //nolint: mnd
   129  		o.Log(req).WithError(err).Error("invalid part number")
   130  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidPartNumberMarker))
   131  		return
   132  	} else {
   133  		partNumber = int(n)
   134  	}
   135  
   136  	req = req.WithContext(logging.AddFields(req.Context(), logging.Fields{
   137  		logging.PartNumberFieldKey: partNumber,
   138  		logging.UploadIDFieldKey:   uploadID,
   139  	}))
   140  
   141  	// handle the upload/copy itself
   142  	multiPart, err := o.MultipartTracker.Get(req.Context(), uploadID)
   143  	if err != nil {
   144  		o.Log(req).WithError(err).Error("could not read  multipart record")
   145  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError))
   146  		return
   147  	}
   148  
   149  	// see if this is an upload part with a request body, or is it a copy of another object
   150  	// https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPartCopy.html#API_UploadPartCopy_RequestSyntax
   151  	if copySource := req.Header.Get(CopySourceHeader); copySource != "" {
   152  		// see if there's a range passed as well
   153  		resolvedCopySource, err := getPathFromSource(copySource)
   154  		if err != nil {
   155  			o.Log(req).WithField("copy_source", copySource).WithError(err).Error("could not parse copy source path")
   156  			_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidCopySource))
   157  			return
   158  		}
   159  		ent := extractEntryFromCopyReq(w, req, o, resolvedCopySource)
   160  		if ent == nil {
   161  			return // operation already failed
   162  		}
   163  		srcRepo := o.Repository
   164  		if resolvedCopySource.Repo != o.Repository.Name {
   165  			srcRepo, err = o.Catalog.GetRepository(req.Context(), resolvedCopySource.Repo)
   166  			if err != nil {
   167  				o.Log(req).
   168  					WithField("copy_source", copySource).
   169  					WithError(err).
   170  					Error("Failed to get repository")
   171  				_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidCopySource))
   172  				return
   173  			}
   174  		}
   175  
   176  		src := block.ObjectPointer{
   177  			StorageNamespace: srcRepo.StorageNamespace,
   178  			IdentifierType:   ent.AddressType.ToIdentifierType(),
   179  			Identifier:       ent.PhysicalAddress,
   180  		}
   181  
   182  		dst := block.ObjectPointer{
   183  			StorageNamespace: o.Repository.StorageNamespace,
   184  			IdentifierType:   block.IdentifierTypeRelative,
   185  			Identifier:       multiPart.PhysicalAddress,
   186  		}
   187  
   188  		var resp *block.UploadPartResponse
   189  		if rang := req.Header.Get(CopySourceRangeHeader); rang != "" {
   190  			// if this is a copy part with a byte range:
   191  			parsedRange, parseErr := httputil.ParseRange(rang, ent.Size)
   192  			if parseErr != nil {
   193  				// invalid range will silently fall back to copying the entire object. ¯\_(ツ)_/¯
   194  				resp, err = o.BlockStore.UploadCopyPart(req.Context(), src, dst, uploadID, partNumber)
   195  			} else {
   196  				resp, err = o.BlockStore.UploadCopyPartRange(req.Context(), src, dst, uploadID, partNumber, parsedRange.StartOffset, parsedRange.EndOffset)
   197  			}
   198  		} else {
   199  			// normal copy part that accepts another object and no byte range:
   200  			resp, err = o.BlockStore.UploadCopyPart(req.Context(), src, dst, uploadID, partNumber)
   201  		}
   202  
   203  		if err != nil {
   204  			o.Log(req).
   205  				WithFields(logging.Fields{
   206  					"copy_source": ent.Path,
   207  					"part":        partNumberStr,
   208  				}).
   209  				WithError(err).
   210  				Error("copy part: upload failed")
   211  			_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError))
   212  			return
   213  		}
   214  
   215  		o.EncodeResponse(w, req, &serde.CopyObjectResult{
   216  			LastModified: serde.Timestamp(time.Now()),
   217  			ETag:         httputil.ETag(resp.ETag),
   218  		}, http.StatusOK)
   219  		return
   220  	}
   221  
   222  	byteSize := req.ContentLength
   223  	resp, err := o.BlockStore.UploadPart(req.Context(), block.ObjectPointer{
   224  		StorageNamespace: o.Repository.StorageNamespace,
   225  		IdentifierType:   block.IdentifierTypeRelative,
   226  		Identifier:       multiPart.PhysicalAddress,
   227  	},
   228  		byteSize, req.Body, uploadID, partNumber)
   229  	if err != nil {
   230  		o.Log(req).WithField("part", partNumberStr).
   231  			WithError(err).Error("part upload failed")
   232  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError))
   233  		return
   234  	}
   235  	o.SetHeaders(w, resp.ServerSideHeader)
   236  	o.SetHeader(w, "ETag", httputil.ETag(resp.ETag))
   237  	w.WriteHeader(http.StatusOK)
   238  }
   239  
   240  func (controller *PutObject) Handle(w http.ResponseWriter, req *http.Request, o *PathOperation) {
   241  	if o.HandleUnsupported(w, req, "torrent", "acl") {
   242  		return
   243  	}
   244  
   245  	// verify branch before we upload data - fail early
   246  	branchExists, err := o.Catalog.BranchExists(req.Context(), o.Repository.Name, o.Reference)
   247  	if err != nil {
   248  		o.Log(req).WithError(err).Error("could not check if branch exists")
   249  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError))
   250  		return
   251  	}
   252  	if !branchExists {
   253  		o.Log(req).Debug("branch not found")
   254  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrNoSuchBucket))
   255  		return
   256  	}
   257  
   258  	query := req.URL.Query()
   259  
   260  	// check if this is a multipart upload creation call
   261  	if query.Has(QueryParamUploadID) {
   262  		handleUploadPart(w, req, o)
   263  		return
   264  	}
   265  
   266  	// check if this is a copy operation (i.e. https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html)
   267  	// A copy operation is identified by the existence of an "x-amz-copy-source" header
   268  	copySource := req.Header.Get(CopySourceHeader)
   269  	if len(copySource) > 0 {
   270  		// The *first* PUT operation sets PutOpts such as
   271  		// storage class, subsequent PUT operations of the
   272  		// same file continue to use that storage class.
   273  
   274  		// TODO(ariels): Add a counter for how often a copy has different options
   275  		handleCopy(w, req, o, copySource)
   276  		return
   277  	}
   278  
   279  	if query.Has("tagging") {
   280  		o.Log(req).Debug("put-object-tagging isn't supported yet")
   281  		o.EncodeError(w, req, nil, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ERRLakeFSNotSupported))
   282  		return
   283  	}
   284  
   285  	// handle the upload itself
   286  	handlePut(w, req, o)
   287  }
   288  
   289  func handlePut(w http.ResponseWriter, req *http.Request, o *PathOperation) {
   290  	o.Incr("put_object", o.Principal, o.Repository.Name, o.Reference)
   291  	storageClass := StorageClassFromHeader(req.Header)
   292  	opts := block.PutOpts{StorageClass: storageClass}
   293  	address := o.PathProvider.NewPath()
   294  	blob, err := upload.WriteBlob(req.Context(), o.BlockStore, o.Repository.StorageNamespace, address, req.Body, req.ContentLength, opts)
   295  	if err != nil {
   296  		o.Log(req).WithError(err).Error("could not write request body to block adapter")
   297  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError))
   298  		return
   299  	}
   300  
   301  	// write metadata
   302  	metadata := amzMetaAsMetadata(req)
   303  	contentType := req.Header.Get("Content-Type")
   304  	err = o.finishUpload(req, blob.Checksum, blob.PhysicalAddress, blob.Size, true, metadata, contentType)
   305  	if errors.Is(err, graveler.ErrWriteToProtectedBranch) {
   306  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrWriteToProtectedBranch))
   307  		return
   308  	}
   309  	if errors.Is(err, graveler.ErrReadOnlyRepository) {
   310  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrReadOnlyRepository))
   311  		return
   312  	}
   313  	if err != nil {
   314  		_ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError))
   315  		return
   316  	}
   317  	o.SetHeader(w, "ETag", httputil.ETag(blob.Checksum))
   318  	w.WriteHeader(http.StatusOK)
   319  }