github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/gateway/operations/putobject.go (about) 1 package operations 2 3 import ( 4 "errors" 5 "net/http" 6 "net/url" 7 "strconv" 8 "time" 9 10 "github.com/treeverse/lakefs/pkg/block" 11 "github.com/treeverse/lakefs/pkg/catalog" 12 gatewayErrors "github.com/treeverse/lakefs/pkg/gateway/errors" 13 "github.com/treeverse/lakefs/pkg/gateway/path" 14 "github.com/treeverse/lakefs/pkg/gateway/serde" 15 "github.com/treeverse/lakefs/pkg/graveler" 16 "github.com/treeverse/lakefs/pkg/httputil" 17 "github.com/treeverse/lakefs/pkg/logging" 18 "github.com/treeverse/lakefs/pkg/permissions" 19 "github.com/treeverse/lakefs/pkg/upload" 20 ) 21 22 const ( 23 CopySourceHeader = "x-amz-copy-source" 24 CopySourceRangeHeader = "x-amz-copy-source-range" 25 QueryParamUploadID = "uploadId" 26 QueryParamPartNumber = "partNumber" 27 ) 28 29 type PutObject struct{} 30 31 func (controller *PutObject) RequiredPermissions(req *http.Request, repoID, _, destPath string) (permissions.Node, error) { 32 copySource := req.Header.Get(CopySourceHeader) 33 34 if len(copySource) == 0 { 35 return permissions.Node{ 36 Permission: permissions.Permission{ 37 Action: permissions.WriteObjectAction, 38 Resource: permissions.ObjectArn(repoID, destPath), 39 }, 40 }, nil 41 } 42 // this is a copy operation 43 p, err := getPathFromSource(copySource) 44 if err != nil { 45 logging.FromContext(req.Context()).WithError(err).Error("could not parse copy source path") 46 return permissions.Node{}, gatewayErrors.ErrInvalidCopySource 47 } 48 49 return permissions.Node{ 50 Type: permissions.NodeTypeAnd, 51 Nodes: []permissions.Node{ 52 { 53 Permission: permissions.Permission{ 54 Action: permissions.WriteObjectAction, 55 Resource: permissions.ObjectArn(repoID, destPath), 56 }, 57 }, 58 { 59 Permission: permissions.Permission{ 60 Action: permissions.ReadObjectAction, 61 Resource: permissions.ObjectArn(p.Repo, p.Path), 62 }, 63 }, 64 }, 65 }, nil 66 } 67 68 // extractEntryFromCopyReq: get metadata from source file 69 func extractEntryFromCopyReq(w http.ResponseWriter, req *http.Request, o *PathOperation, copySource path.ResolvedAbsolutePath) *catalog.DBEntry { 70 ent, err := o.Catalog.GetEntry(req.Context(), copySource.Repo, copySource.Reference, copySource.Path, catalog.GetEntryParams{}) 71 if err != nil { 72 o.Log(req).WithError(err).Error("could not read copy source") 73 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidCopySource)) 74 return nil 75 } 76 return ent 77 } 78 79 func getPathFromSource(copySource string) (path.ResolvedAbsolutePath, error) { 80 copySourceDecoded, err := url.QueryUnescape(copySource) 81 if err != nil { 82 copySourceDecoded = copySource 83 } 84 p, err := path.ResolveAbsolutePath(copySourceDecoded) 85 if err != nil { 86 return path.ResolvedAbsolutePath{}, gatewayErrors.ErrInvalidCopySource 87 } 88 return p, nil 89 } 90 91 func handleCopy(w http.ResponseWriter, req *http.Request, o *PathOperation, copySource string) { 92 repository := o.Repository.Name 93 branch := o.Reference 94 o.Incr("copy_object", o.Principal, repository, branch) 95 srcPath, err := getPathFromSource(copySource) 96 if err != nil { 97 o.Log(req).WithError(err).Error("could not parse copy source path") 98 // This is a solution to avoid misleading error messages in gateway. This is a pinpoint fix for the copy object 99 // API, since we decided not to change the entire gateway error handling in order to avoid breaking changes. 100 // See: https://github.com/treeverse/lakeFS/issues/7452 101 apiErr := gatewayErrors.Codes.ToAPIErrWithInternalError(gatewayErrors.ErrInvalidCopySource, err) 102 _ = o.EncodeError(w, req, err, apiErr) 103 return 104 } 105 106 ctx := req.Context() 107 entry, err := o.Catalog.CopyEntry(ctx, srcPath.Repo, srcPath.Reference, srcPath.Path, repository, branch, o.Path) 108 if err != nil { 109 o.Log(req).WithError(err).Error("could create a copy") 110 apiErr := gatewayErrors.Codes.ToAPIErrWithInternalError(gatewayErrors.ErrInvalidCopyDest, err) 111 _ = o.EncodeError(w, req, err, apiErr) 112 return 113 } 114 115 o.EncodeResponse(w, req, &serde.CopyObjectResult{ 116 LastModified: serde.Timestamp(entry.CreationDate), 117 ETag: httputil.ETag(entry.Checksum), 118 }, http.StatusOK) 119 } 120 121 func handleUploadPart(w http.ResponseWriter, req *http.Request, o *PathOperation) { 122 o.Incr("put_mpu_part", o.Principal, o.Repository.Name, o.Reference) 123 query := req.URL.Query() 124 uploadID := query.Get(QueryParamUploadID) 125 partNumberStr := query.Get(QueryParamPartNumber) 126 127 var partNumber int 128 if n, err := strconv.ParseInt(partNumberStr, 10, 32); err != nil { //nolint: mnd 129 o.Log(req).WithError(err).Error("invalid part number") 130 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidPartNumberMarker)) 131 return 132 } else { 133 partNumber = int(n) 134 } 135 136 req = req.WithContext(logging.AddFields(req.Context(), logging.Fields{ 137 logging.PartNumberFieldKey: partNumber, 138 logging.UploadIDFieldKey: uploadID, 139 })) 140 141 // handle the upload/copy itself 142 multiPart, err := o.MultipartTracker.Get(req.Context(), uploadID) 143 if err != nil { 144 o.Log(req).WithError(err).Error("could not read multipart record") 145 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError)) 146 return 147 } 148 149 // see if this is an upload part with a request body, or is it a copy of another object 150 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPartCopy.html#API_UploadPartCopy_RequestSyntax 151 if copySource := req.Header.Get(CopySourceHeader); copySource != "" { 152 // see if there's a range passed as well 153 resolvedCopySource, err := getPathFromSource(copySource) 154 if err != nil { 155 o.Log(req).WithField("copy_source", copySource).WithError(err).Error("could not parse copy source path") 156 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidCopySource)) 157 return 158 } 159 ent := extractEntryFromCopyReq(w, req, o, resolvedCopySource) 160 if ent == nil { 161 return // operation already failed 162 } 163 srcRepo := o.Repository 164 if resolvedCopySource.Repo != o.Repository.Name { 165 srcRepo, err = o.Catalog.GetRepository(req.Context(), resolvedCopySource.Repo) 166 if err != nil { 167 o.Log(req). 168 WithField("copy_source", copySource). 169 WithError(err). 170 Error("Failed to get repository") 171 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInvalidCopySource)) 172 return 173 } 174 } 175 176 src := block.ObjectPointer{ 177 StorageNamespace: srcRepo.StorageNamespace, 178 IdentifierType: ent.AddressType.ToIdentifierType(), 179 Identifier: ent.PhysicalAddress, 180 } 181 182 dst := block.ObjectPointer{ 183 StorageNamespace: o.Repository.StorageNamespace, 184 IdentifierType: block.IdentifierTypeRelative, 185 Identifier: multiPart.PhysicalAddress, 186 } 187 188 var resp *block.UploadPartResponse 189 if rang := req.Header.Get(CopySourceRangeHeader); rang != "" { 190 // if this is a copy part with a byte range: 191 parsedRange, parseErr := httputil.ParseRange(rang, ent.Size) 192 if parseErr != nil { 193 // invalid range will silently fall back to copying the entire object. ¯\_(ツ)_/¯ 194 resp, err = o.BlockStore.UploadCopyPart(req.Context(), src, dst, uploadID, partNumber) 195 } else { 196 resp, err = o.BlockStore.UploadCopyPartRange(req.Context(), src, dst, uploadID, partNumber, parsedRange.StartOffset, parsedRange.EndOffset) 197 } 198 } else { 199 // normal copy part that accepts another object and no byte range: 200 resp, err = o.BlockStore.UploadCopyPart(req.Context(), src, dst, uploadID, partNumber) 201 } 202 203 if err != nil { 204 o.Log(req). 205 WithFields(logging.Fields{ 206 "copy_source": ent.Path, 207 "part": partNumberStr, 208 }). 209 WithError(err). 210 Error("copy part: upload failed") 211 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError)) 212 return 213 } 214 215 o.EncodeResponse(w, req, &serde.CopyObjectResult{ 216 LastModified: serde.Timestamp(time.Now()), 217 ETag: httputil.ETag(resp.ETag), 218 }, http.StatusOK) 219 return 220 } 221 222 byteSize := req.ContentLength 223 resp, err := o.BlockStore.UploadPart(req.Context(), block.ObjectPointer{ 224 StorageNamespace: o.Repository.StorageNamespace, 225 IdentifierType: block.IdentifierTypeRelative, 226 Identifier: multiPart.PhysicalAddress, 227 }, 228 byteSize, req.Body, uploadID, partNumber) 229 if err != nil { 230 o.Log(req).WithField("part", partNumberStr). 231 WithError(err).Error("part upload failed") 232 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError)) 233 return 234 } 235 o.SetHeaders(w, resp.ServerSideHeader) 236 o.SetHeader(w, "ETag", httputil.ETag(resp.ETag)) 237 w.WriteHeader(http.StatusOK) 238 } 239 240 func (controller *PutObject) Handle(w http.ResponseWriter, req *http.Request, o *PathOperation) { 241 if o.HandleUnsupported(w, req, "torrent", "acl") { 242 return 243 } 244 245 // verify branch before we upload data - fail early 246 branchExists, err := o.Catalog.BranchExists(req.Context(), o.Repository.Name, o.Reference) 247 if err != nil { 248 o.Log(req).WithError(err).Error("could not check if branch exists") 249 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError)) 250 return 251 } 252 if !branchExists { 253 o.Log(req).Debug("branch not found") 254 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrNoSuchBucket)) 255 return 256 } 257 258 query := req.URL.Query() 259 260 // check if this is a multipart upload creation call 261 if query.Has(QueryParamUploadID) { 262 handleUploadPart(w, req, o) 263 return 264 } 265 266 // check if this is a copy operation (i.e. https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html) 267 // A copy operation is identified by the existence of an "x-amz-copy-source" header 268 copySource := req.Header.Get(CopySourceHeader) 269 if len(copySource) > 0 { 270 // The *first* PUT operation sets PutOpts such as 271 // storage class, subsequent PUT operations of the 272 // same file continue to use that storage class. 273 274 // TODO(ariels): Add a counter for how often a copy has different options 275 handleCopy(w, req, o, copySource) 276 return 277 } 278 279 if query.Has("tagging") { 280 o.Log(req).Debug("put-object-tagging isn't supported yet") 281 o.EncodeError(w, req, nil, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ERRLakeFSNotSupported)) 282 return 283 } 284 285 // handle the upload itself 286 handlePut(w, req, o) 287 } 288 289 func handlePut(w http.ResponseWriter, req *http.Request, o *PathOperation) { 290 o.Incr("put_object", o.Principal, o.Repository.Name, o.Reference) 291 storageClass := StorageClassFromHeader(req.Header) 292 opts := block.PutOpts{StorageClass: storageClass} 293 address := o.PathProvider.NewPath() 294 blob, err := upload.WriteBlob(req.Context(), o.BlockStore, o.Repository.StorageNamespace, address, req.Body, req.ContentLength, opts) 295 if err != nil { 296 o.Log(req).WithError(err).Error("could not write request body to block adapter") 297 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError)) 298 return 299 } 300 301 // write metadata 302 metadata := amzMetaAsMetadata(req) 303 contentType := req.Header.Get("Content-Type") 304 err = o.finishUpload(req, blob.Checksum, blob.PhysicalAddress, blob.Size, true, metadata, contentType) 305 if errors.Is(err, graveler.ErrWriteToProtectedBranch) { 306 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrWriteToProtectedBranch)) 307 return 308 } 309 if errors.Is(err, graveler.ErrReadOnlyRepository) { 310 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrReadOnlyRepository)) 311 return 312 } 313 if err != nil { 314 _ = o.EncodeError(w, req, err, gatewayErrors.Codes.ToAPIErr(gatewayErrors.ErrInternalError)) 315 return 316 } 317 o.SetHeader(w, "ETag", httputil.ETag(blob.Checksum)) 318 w.WriteHeader(http.StatusOK) 319 }