github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/tgts3mpt.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "encoding/xml" 9 "errors" 10 "fmt" 11 "io" 12 "net/http" 13 "net/url" 14 "os" 15 "sort" 16 "strconv" 17 "time" 18 19 "github.com/NVIDIA/aistore/ais/backend" 20 "github.com/NVIDIA/aistore/ais/s3" 21 "github.com/NVIDIA/aistore/cmn" 22 "github.com/NVIDIA/aistore/cmn/cos" 23 "github.com/NVIDIA/aistore/cmn/debug" 24 "github.com/NVIDIA/aistore/cmn/feat" 25 "github.com/NVIDIA/aistore/cmn/nlog" 26 "github.com/NVIDIA/aistore/core" 27 "github.com/NVIDIA/aistore/core/meta" 28 "github.com/NVIDIA/aistore/fs" 29 ) 30 31 func decodeXML[T any](body []byte) (result T, _ error) { 32 if err := xml.Unmarshal(body, &result); err != nil { 33 return result, err 34 } 35 return result, nil 36 } 37 38 func multiWriter(writers ...io.Writer) io.Writer { 39 a := make([]io.Writer, 0, 3) 40 for _, w := range writers { 41 if w != nil { 42 a = append(a, w) 43 } 44 } 45 return io.MultiWriter(a...) 46 } 47 48 // Initialize multipart upload. 49 // - Generate UUID for the upload 50 // - Return the UUID to a caller 51 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html 52 func (t *target) startMpt(w http.ResponseWriter, r *http.Request, items []string, bck *meta.Bck, q url.Values) { 53 var ( 54 objName = s3.ObjName(items) 55 lom = &core.LOM{ObjName: objName} 56 uploadID string 57 ecode int 58 ) 59 err := lom.InitBck(bck.Bucket()) 60 if err != nil { 61 s3.WriteErr(w, r, err, 0) 62 return 63 } 64 if bck.IsRemoteS3() { 65 uploadID, ecode, err = backend.StartMpt(lom, r, q) 66 if err != nil { 67 s3.WriteErr(w, r, err, ecode) 68 return 69 } 70 } else { 71 uploadID = cos.GenUUID() 72 } 73 74 s3.InitUpload(uploadID, bck.Name, objName) 75 result := &s3.InitiateMptUploadResult{Bucket: bck.Name, Key: objName, UploadID: uploadID} 76 77 sgl := t.gmm.NewSGL(0) 78 result.MustMarshal(sgl) 79 w.Header().Set(cos.HdrContentType, cos.ContentXML) 80 sgl.WriteTo2(w) 81 sgl.Free() 82 } 83 84 // PUT a part of the multipart upload. 85 // Body is empty, everything in the query params and the header. 86 // 87 // "Content-MD5" in the part headers seems be to be deprecated: 88 // either not present (s3cmd) or cannot be trusted (aws s3api). 89 // 90 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html 91 func (t *target) putMptPart(w http.ResponseWriter, r *http.Request, items []string, q url.Values, bck *meta.Bck) { 92 // 1. parse/validate 93 uploadID := q.Get(s3.QparamMptUploadID) 94 if uploadID == "" { 95 s3.WriteErr(w, r, errors.New("empty uploadId"), 0) 96 return 97 } 98 part := q.Get(s3.QparamMptPartNo) 99 if part == "" { 100 s3.WriteErr(w, r, fmt.Errorf("upload %q: missing part number", uploadID), 0) 101 return 102 } 103 partNum, err := s3.ParsePartNum(part) 104 if err != nil { 105 s3.WriteErr(w, r, err, 0) 106 return 107 } 108 if partNum < 1 || partNum > s3.MaxPartsPerUpload { 109 err := fmt.Errorf("upload %q: invalid part number %d, must be between 1 and %d", 110 uploadID, partNum, s3.MaxPartsPerUpload) 111 s3.WriteErr(w, r, err, 0) 112 return 113 } 114 115 // 2. init lom, create part file 116 objName := s3.ObjName(items) 117 lom := &core.LOM{ObjName: objName} 118 if err := lom.InitBck(bck.Bucket()); err != nil { 119 s3.WriteErr(w, r, err, 0) 120 return 121 } 122 // workfile name format: <upload-id>.<part-number>.<obj-name> 123 prefix := uploadID + "." + strconv.FormatInt(int64(partNum), 10) 124 wfqn := fs.CSM.Gen(lom, fs.WorkfileType, prefix) 125 partFh, errC := lom.CreateFileRW(wfqn) 126 if errC != nil { 127 s3.WriteMptErr(w, r, errC, 0, lom, uploadID) 128 return 129 } 130 131 var ( 132 etag string 133 size int64 134 ecode int 135 partSHA = r.Header.Get(cos.S3HdrContentSHA256) 136 checkPartSHA = partSHA != "" && partSHA != cos.S3UnsignedPayload 137 cksumSHA = &cos.CksumHash{} 138 cksumMD5 = &cos.CksumHash{} 139 remote = bck.IsRemoteS3() 140 ) 141 if checkPartSHA { 142 cksumSHA = cos.NewCksumHash(cos.ChecksumSHA256) 143 } 144 if !remote { 145 cksumMD5 = cos.NewCksumHash(cos.ChecksumMD5) 146 } 147 148 // 3. write 149 mw := multiWriter(cksumMD5.H, cksumSHA.H, partFh) 150 151 if !remote { 152 // write locally 153 buf, slab := t.gmm.Alloc() 154 size, err = io.CopyBuffer(mw, r.Body, buf) 155 slab.Free(buf) 156 } else { 157 // write locally and utilize TeeReader to simultaneously send data to S3 158 tr := io.NopCloser(io.TeeReader(r.Body, mw)) 159 size = r.ContentLength 160 debug.Assert(size > 0, "mpt upload: expecting positive content-length") 161 162 etag, ecode, err = backend.PutMptPart(lom, tr, r, q, uploadID, size, partNum) 163 } 164 165 cos.Close(partFh) 166 if err != nil { 167 if nerr := cos.RemoveFile(wfqn); nerr != nil && !os.IsNotExist(nerr) { 168 nlog.Errorf(fmtNested, t, err, "remove", wfqn, nerr) 169 } 170 s3.WriteMptErr(w, r, err, ecode, lom, uploadID) 171 return 172 } 173 174 // 4. finalize the part (expecting the part's remote etag to be md5 checksum) 175 md5 := etag 176 if cksumMD5.H != nil { 177 debug.Assert(etag == "") 178 cksumMD5.Finalize() 179 md5 = cksumMD5.Value() 180 } 181 if checkPartSHA { 182 cksumSHA.Finalize() 183 recvSHA := cos.NewCksum(cos.ChecksumSHA256, partSHA) 184 if !cksumSHA.Equal(recvSHA) { 185 detail := fmt.Sprintf("upload %q, %s, part %d", uploadID, lom, partNum) 186 err = cos.NewErrDataCksum(&cksumSHA.Cksum, recvSHA, detail) 187 s3.WriteMptErr(w, r, err, http.StatusInternalServerError, lom, uploadID) 188 return 189 } 190 } 191 npart := &s3.MptPart{ 192 MD5: md5, 193 FQN: wfqn, 194 Size: size, 195 Num: partNum, 196 } 197 if err := s3.AddPart(uploadID, npart); err != nil { 198 s3.WriteMptErr(w, r, err, 0, lom, uploadID) 199 return 200 } 201 w.Header().Set(cos.S3CksumHeader, md5) // s3cmd checks this one 202 } 203 204 // Complete multipart upload. 205 // Body contains XML with the list of parts that must be on the storage already. 206 // 1. Check that all parts from request body present 207 // 2. Merge all parts into a single file and calculate its ETag 208 // 3. Return ETag to a caller 209 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html 210 func (t *target) completeMpt(w http.ResponseWriter, r *http.Request, items []string, q url.Values, bck *meta.Bck) { 211 // parse/validate 212 uploadID := q.Get(s3.QparamMptUploadID) 213 if uploadID == "" { 214 s3.WriteErr(w, r, errors.New("empty uploadId"), 0) 215 return 216 } 217 218 output, err := io.ReadAll(r.Body) 219 if err != nil { 220 s3.WriteErr(w, r, err, http.StatusBadRequest) 221 return 222 } 223 partList, err := decodeXML[*s3.CompleteMptUpload](output) 224 if err != nil { 225 s3.WriteErr(w, r, err, http.StatusBadRequest) 226 return 227 } 228 if len(partList.Parts) == 0 { 229 s3.WriteErr(w, r, fmt.Errorf("upload %q: empty list of upload parts", uploadID), 0) 230 return 231 } 232 objName := s3.ObjName(items) 233 lom := &core.LOM{ObjName: objName} 234 if err := lom.InitBck(bck.Bucket()); err != nil { 235 s3.WriteErr(w, r, err, 0) 236 return 237 } 238 size, errN := s3.ObjSize(uploadID) 239 if errN != nil { 240 s3.WriteMptErr(w, r, errN, 0, lom, uploadID) 241 return 242 } 243 244 // call s3 245 var ( 246 etag string 247 started = time.Now() 248 remote = bck.IsRemoteS3() 249 ) 250 if remote { 251 v, ecode, err := backend.CompleteMpt(lom, r, q, uploadID, partList) 252 if err != nil { 253 s3.WriteMptErr(w, r, err, ecode, lom, uploadID) 254 return 255 } 256 etag = v 257 } 258 259 // append parts and finalize locally 260 var ( 261 mw io.Writer 262 concatMD5 string // => ETag 263 actualCksum = &cos.CksumHash{} 264 ) 265 // .1 sort and check parts 266 sort.Slice(partList.Parts, func(i, j int) bool { 267 return partList.Parts[i].PartNumber < partList.Parts[j].PartNumber 268 }) 269 nparts, err := s3.CheckParts(uploadID, partList.Parts) 270 if err != nil { 271 s3.WriteMptErr(w, r, err, 0, lom, uploadID) 272 return 273 } 274 // 2. <upload-id>.complete.<obj-name> 275 prefix := uploadID + ".complete" 276 wfqn := fs.CSM.Gen(lom, fs.WorkfileType, prefix) 277 wfh, errC := lom.CreateFile(wfqn) 278 if errC != nil { 279 s3.WriteMptErr(w, r, errC, 0, lom, uploadID) 280 return 281 } 282 if remote && lom.CksumConf().Type != cos.ChecksumNone { 283 actualCksum = cos.NewCksumHash(lom.CksumConf().Type) 284 } else { 285 actualCksum = cos.NewCksumHash(cos.ChecksumMD5) 286 } 287 mw = multiWriter(actualCksum.H, wfh) 288 289 // .3 write 290 buf, slab := t.gmm.Alloc() 291 concatMD5, written, errA := _appendMpt(nparts, buf, mw) 292 slab.Free(buf) 293 294 if lom.IsFeatureSet(feat.FsyncPUT) { 295 errS := wfh.Sync() 296 debug.AssertNoErr(errS) 297 } 298 cos.Close(wfh) 299 300 if errA == nil && written != size { 301 errA = fmt.Errorf("upload %q %q: expected full size=%d, got %d", uploadID, lom.Cname(), size, written) 302 } 303 if errA != nil { 304 if nerr := cos.RemoveFile(wfqn); nerr != nil && !os.IsNotExist(nerr) { 305 nlog.Errorf(fmtNested, t, err, "remove", wfqn, nerr) 306 } 307 s3.WriteMptErr(w, r, errA, 0, lom, uploadID) 308 return 309 } 310 311 // .4 (s3 client => ais://) compute resulting MD5 and, optionally, ETag 312 if actualCksum.H != nil { 313 actualCksum.Finalize() 314 lom.SetCksum(actualCksum.Cksum.Clone()) 315 } 316 if etag == "" { 317 debug.Assert(!remote) 318 debug.Assert(concatMD5 != "") 319 resMD5 := cos.NewCksumHash(cos.ChecksumMD5) 320 _, err = resMD5.H.Write([]byte(concatMD5)) 321 debug.AssertNoErr(err) 322 resMD5.Finalize() 323 etag = resMD5.Value() + cmn.AwsMultipartDelim + strconv.Itoa(len(partList.Parts)) 324 } 325 326 // .5 finalize 327 lom.SetSize(size) 328 lom.SetCustomKey(cmn.ETag, etag) 329 330 poi := allocPOI() 331 { 332 poi.t = t 333 poi.atime = started.UnixNano() 334 poi.lom = lom 335 poi.workFQN = wfqn 336 poi.owt = cmn.OwtNone 337 } 338 ecode, errF := poi.finalize() 339 freePOI(poi) 340 341 // .6 cleanup parts - unconditionally 342 exists := s3.CleanupUpload(uploadID, lom.FQN, false /*aborted*/) 343 debug.Assert(exists) 344 345 if errF != nil { 346 // NOTE: not failing if remote op. succeeded 347 if !remote { 348 s3.WriteMptErr(w, r, errF, ecode, lom, uploadID) 349 return 350 } 351 nlog.Errorf("upload %q: failed to complete %s locally: %v(%d)", uploadID, lom.Cname(), err, ecode) 352 } 353 354 // .7 respond 355 result := &s3.CompleteMptUploadResult{Bucket: bck.Name, Key: objName, ETag: etag} 356 sgl := t.gmm.NewSGL(0) 357 result.MustMarshal(sgl) 358 w.Header().Set(cos.HdrContentType, cos.ContentXML) 359 w.Header().Set(cos.S3CksumHeader, etag) 360 sgl.WriteTo2(w) 361 sgl.Free() 362 } 363 364 func _appendMpt(nparts []*s3.MptPart, buf []byte, mw io.Writer) (concatMD5 string, written int64, err error) { 365 for _, partInfo := range nparts { 366 var ( 367 partFh *os.File 368 partSize int64 369 ) 370 concatMD5 += partInfo.MD5 371 if partFh, err = os.Open(partInfo.FQN); err != nil { 372 return "", 0, err 373 } 374 partSize, err = io.CopyBuffer(mw, partFh, buf) 375 cos.Close(partFh) 376 if err != nil { 377 return "", 0, err 378 } 379 written += partSize 380 } 381 return concatMD5, written, nil 382 } 383 384 // Abort an active multipart upload. 385 // Body is empty, only URL query contains uploadID 386 // 1. uploadID must exists 387 // 2. Remove all temporary files 388 // 3. Remove all info from in-memory structs 389 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html 390 func (t *target) abortMpt(w http.ResponseWriter, r *http.Request, items []string, q url.Values) { 391 bck, err, ecode := meta.InitByNameOnly(items[0], t.owner.bmd) 392 if err != nil { 393 s3.WriteErr(w, r, err, ecode) 394 return 395 } 396 objName := s3.ObjName(items) 397 lom := &core.LOM{ObjName: objName} 398 if err := lom.InitBck(bck.Bucket()); err != nil { 399 s3.WriteErr(w, r, err, 0) 400 return 401 } 402 403 uploadID := q.Get(s3.QparamMptUploadID) 404 405 if bck.IsRemoteS3() { 406 ecode, err := backend.AbortMpt(lom, r, q, uploadID) 407 if err != nil { 408 s3.WriteErr(w, r, err, ecode) 409 return 410 } 411 } 412 413 exists := s3.CleanupUpload(uploadID, "", true /*aborted*/) 414 if !exists { 415 err := fmt.Errorf("upload %q does not exist", uploadID) 416 s3.WriteErr(w, r, err, http.StatusNotFound) 417 return 418 } 419 420 // Respond with status 204(!see the docs) and empty body. 421 w.WriteHeader(http.StatusNoContent) 422 } 423 424 // List already stored parts of the active multipart upload by bucket name and uploadID. 425 // (NOTE: `s3cmd` lists upload parts before checking if any parts can be skipped.) 426 // s3cmd is OK to receive an empty body in response with status=200. In this 427 // case s3cmd sends all parts. 428 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html 429 func (t *target) listMptParts(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string, q url.Values) { 430 uploadID := q.Get(s3.QparamMptUploadID) 431 432 lom := &core.LOM{ObjName: objName} 433 if err := lom.InitBck(bck.Bucket()); err != nil { 434 s3.WriteErr(w, r, err, 0) 435 return 436 } 437 438 parts, ecode, err := s3.ListParts(uploadID, lom) 439 if err != nil { 440 s3.WriteErr(w, r, err, ecode) 441 return 442 } 443 result := &s3.ListPartsResult{Bucket: bck.Name, Key: objName, UploadID: uploadID, Parts: parts} 444 sgl := t.gmm.NewSGL(0) 445 result.MustMarshal(sgl) 446 w.Header().Set(cos.HdrContentType, cos.ContentXML) 447 sgl.WriteTo2(w) 448 sgl.Free() 449 } 450 451 // List all active multipart uploads for a bucket. 452 // See https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html 453 // GET /?uploads&delimiter=Delimiter&encoding-type=EncodingType&key-marker=KeyMarker& 454 // max-uploads=MaxUploads&prefix=Prefix&upload-id-marker=UploadIdMarker 455 func (t *target) listMptUploads(w http.ResponseWriter, bck *meta.Bck, q url.Values) { 456 var ( 457 maxUploads int 458 idMarker string 459 ) 460 if s := q.Get(s3.QparamMptMaxUploads); s != "" { 461 if v, err := strconv.Atoi(s); err == nil { 462 maxUploads = v 463 } 464 } 465 idMarker = q.Get(s3.QparamMptUploadIDMarker) 466 result := s3.ListUploads(bck.Name, idMarker, maxUploads) 467 sgl := t.gmm.NewSGL(0) 468 result.MustMarshal(sgl) 469 w.Header().Set(cos.HdrContentType, cos.ContentXML) 470 sgl.WriteTo2(w) 471 sgl.Free() 472 } 473 474 // Acts on an already multipart-uploaded object, returns `partNumber` (URL query) 475 // part of the object. 476 // The object must have been multipart-uploaded beforehand. 477 // See: 478 // https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html 479 func (t *target) getMptPart(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string, q url.Values) { 480 lom := core.AllocLOM(objName) 481 defer core.FreeLOM(lom) 482 if err := lom.InitBck(bck.Bucket()); err != nil { 483 s3.WriteErr(w, r, err, 0) 484 return 485 } 486 partNum, err := s3.ParsePartNum(q.Get(s3.QparamMptPartNo)) 487 if err != nil { 488 s3.WriteErr(w, r, err, 0) 489 return 490 } 491 // load mpt xattr and find out the part num's offset & size 492 off, size, status, err := s3.OffsetSorted(lom, partNum) 493 if err != nil { 494 s3.WriteErr(w, r, err, status) 495 } 496 fh, err := lom.OpenFile() 497 if err != nil { 498 s3.WriteErr(w, r, err, 0) 499 return 500 } 501 buf, slab := t.gmm.AllocSize(size) 502 reader := io.NewSectionReader(fh, off, size) 503 if _, err := io.CopyBuffer(w, reader, buf); err != nil { 504 s3.WriteErr(w, r, err, 0) 505 } 506 cos.Close(fh) 507 slab.Free(buf) 508 }