github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/chat/s3/multi.go (about) 1 package s3 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "encoding/base64" 7 "encoding/hex" 8 "encoding/xml" 9 "errors" 10 "io" 11 "sort" 12 "strconv" 13 14 "golang.org/x/net/context" 15 ) 16 17 // Multi represents an unfinished multipart upload. 18 // 19 // Multipart uploads allow sending big objects in smaller chunks. 20 // After all parts have been sent, the upload must be explicitly 21 // completed by calling Complete with the list of parts. 22 // 23 // See http://goo.gl/vJfTG for an overview of multipart uploads. 24 type Multi struct { 25 Bucket *Bucket 26 Key string 27 UploadID string `xml:"UploadId"` 28 } 29 30 // That's the default. Here just for testing. 31 var listMultiMax = 1000 32 33 type listMultiResp struct { 34 NextKeyMarker string 35 NextUploadIDMarker string 36 IsTruncated bool 37 Upload []Multi 38 CommonPrefixes []string `xml:"CommonPrefixes>Prefix"` 39 } 40 41 // ListMulti returns the list of unfinished multipart uploads in b. 42 // 43 // The prefix parameter limits the response to keys that begin with the 44 // specified prefix. You can use prefixes to separate a bucket into different 45 // groupings of keys (to get the feeling of folders, for example). 46 // 47 // The delim parameter causes the response to group all of the keys that 48 // share a common prefix up to the next delimiter in a single entry within 49 // the CommonPrefixes field. You can use delimiters to separate a bucket 50 // into different groupings of keys, similar to how folders would work. 51 // 52 // See http://goo.gl/ePioY for details. 53 func (b *Bucket) ListMulti(ctx context.Context, prefix, delim string) (multis []*Multi, prefixes []string, err error) { 54 params := map[string][]string{ 55 "uploads": {""}, 56 "max-uploads": {strconv.FormatInt(int64(listMultiMax), 10)}, 57 "prefix": {prefix}, 58 "delimiter": {delim}, 59 } 60 headers := map[string][]string{} 61 b.addTokenHeader(headers) 62 for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); { 63 req := &request{ 64 method: "GET", 65 bucket: b.Name, 66 params: params, 67 headers: headers, 68 } 69 var resp listMultiResp 70 err := b.S3.query(ctx, req, &resp) 71 if shouldRetry(err) && attempt.HasNext() { 72 continue 73 } 74 if err != nil { 75 return nil, nil, err 76 } 77 for i := range resp.Upload { 78 multi := &resp.Upload[i] 79 multi.Bucket = b 80 multis = append(multis, multi) 81 } 82 prefixes = append(prefixes, resp.CommonPrefixes...) 83 if !resp.IsTruncated { 84 return multis, prefixes, nil 85 } 86 params["key-marker"] = []string{resp.NextKeyMarker} 87 params["upload-id-marker"] = []string{resp.NextUploadIDMarker} 88 attempt = b.S3.AttemptStrategy.Start() // Last request worked. 89 } 90 panic("unreachable") 91 } 92 93 // Multi returns a multipart upload handler for the provided key 94 // inside b. If a multipart upload exists for key, it is returned, 95 // otherwise a new multipart upload is initiated with contType and perm. 96 func (b *Bucket) Multi(ctx context.Context, key, contType string, perm ACL) (MultiInt, error) { 97 multis, _, err := b.ListMulti(ctx, key, "") 98 if err != nil && !hasCode(err, "NoSuchUpload") { 99 if !UsingFakeS3(ctx) { 100 return nil, err 101 } 102 // fakes3 returns NoSuchKey instead of NoSuchUpload, and we want to continue 103 // in that case, not abort 104 if !hasCode(err, "NoSuchKey") { 105 return nil, err 106 } 107 } 108 for _, m := range multis { 109 if m.Key == key { 110 return m, nil 111 } 112 } 113 114 return b.InitMulti(ctx, key, contType, perm) 115 } 116 117 // InitMulti initializes a new multipart upload at the provided 118 // key inside b and returns a value for manipulating it. 119 // 120 // See http://goo.gl/XP8kL for details. 121 func (b *Bucket) InitMulti(ctx context.Context, key string, contType string, perm ACL) (*Multi, error) { 122 headers := map[string][]string{ 123 "Content-Type": {contType}, 124 "Content-Length": {"0"}, 125 "x-amz-acl": {string(perm)}, 126 } 127 b.addTokenHeader(headers) 128 params := map[string][]string{ 129 "uploads": {""}, 130 } 131 req := &request{ 132 method: "POST", 133 bucket: b.Name, 134 path: key, 135 headers: headers, 136 params: params, 137 } 138 var err error 139 var resp struct { 140 UploadID string `xml:"UploadId"` 141 } 142 for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); { 143 err = b.S3.query(ctx, req, &resp) 144 if !shouldRetry(err) { 145 break 146 } 147 } 148 if err != nil { 149 return nil, err 150 } 151 return &Multi{Bucket: b, Key: key, UploadID: resp.UploadID}, nil 152 } 153 154 // PutPart sends part n of the multipart upload, reading all the content from r. 155 // Each part, except for the last one, must be at least 5MB in size. 156 // 157 // See http://goo.gl/pqZer for details. 158 func (m *Multi) PutPart(ctx context.Context, n int, r io.ReadSeeker) (Part, error) { 159 partSize, _, md5b64, err := seekerInfo(r) 160 if err != nil { 161 return Part{}, err 162 } 163 return m.putPart(ctx, n, r, partSize, md5b64) 164 } 165 166 func (m *Multi) putPart(ctx context.Context, n int, r io.ReadSeeker, partSize int64, md5b64 string) (Part, error) { 167 headers := map[string][]string{ 168 "Content-Length": {strconv.FormatInt(partSize, 10)}, 169 "Content-MD5": {md5b64}, 170 } 171 m.Bucket.addTokenHeader(headers) 172 params := map[string][]string{ 173 "uploadId": {m.UploadID}, 174 "partNumber": {strconv.FormatInt(int64(n), 10)}, 175 } 176 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 177 _, err := r.Seek(0, 0) 178 if err != nil { 179 return Part{}, err 180 } 181 req := &request{ 182 method: "PUT", 183 bucket: m.Bucket.Name, 184 path: m.Key, 185 headers: headers, 186 params: params, 187 payload: r, 188 } 189 err = m.Bucket.S3.prepare(req) 190 if err != nil { 191 return Part{}, err 192 } 193 resp, err := m.Bucket.S3.run(ctx, req, nil) 194 if shouldRetry(err) && attempt.HasNext() { 195 continue 196 } 197 if err != nil { 198 return Part{}, err 199 } 200 etag := resp.Header.Get("ETag") 201 if etag == "" { 202 return Part{}, errors.New("part upload succeeded with no ETag") 203 } 204 return Part{n, etag, partSize}, nil 205 } 206 panic("unreachable") 207 } 208 209 func seekerInfo(r io.ReadSeeker) (size int64, md5hex string, md5b64 string, err error) { 210 _, err = r.Seek(0, 0) 211 if err != nil { 212 return 0, "", "", err 213 } 214 digest := md5.New() 215 size, err = io.Copy(digest, r) 216 if err != nil { 217 return 0, "", "", err 218 } 219 sum := digest.Sum(nil) 220 md5hex = hex.EncodeToString(sum) 221 md5b64 = base64.StdEncoding.EncodeToString(sum) 222 return size, md5hex, md5b64, nil 223 } 224 225 type Part struct { 226 N int `xml:"PartNumber"` 227 ETag string 228 Size int64 229 } 230 231 type partSlice []Part 232 233 func (s partSlice) Len() int { return len(s) } 234 func (s partSlice) Less(i, j int) bool { return s[i].N < s[j].N } 235 func (s partSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 236 237 type listPartsResp struct { 238 NextPartNumberMarker string 239 IsTruncated bool 240 Part []Part 241 } 242 243 // That's the default. Here just for testing. 244 var listPartsMax = 1000 245 246 // ListParts returns the list of previously uploaded parts in m, 247 // ordered by part number. 248 // 249 // See http://goo.gl/ePioY for details. 250 func (m *Multi) ListParts(ctx context.Context) ([]Part, error) { 251 params := map[string][]string{ 252 "uploadId": {m.UploadID}, 253 "max-parts": {strconv.FormatInt(int64(listPartsMax), 10)}, 254 } 255 headers := map[string][]string{} 256 m.Bucket.addTokenHeader(headers) 257 258 var parts partSlice 259 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 260 req := &request{ 261 method: "GET", 262 bucket: m.Bucket.Name, 263 path: m.Key, 264 params: params, 265 headers: headers, 266 } 267 var resp listPartsResp 268 err := m.Bucket.S3.query(ctx, req, &resp) 269 if shouldRetry(err) && attempt.HasNext() { 270 continue 271 } 272 if err != nil { 273 return nil, err 274 } 275 parts = append(parts, resp.Part...) 276 if !resp.IsTruncated { 277 sort.Sort(parts) 278 return parts, nil 279 } 280 params["part-number-marker"] = []string{resp.NextPartNumberMarker} 281 attempt = m.Bucket.S3.AttemptStrategy.Start() // Last request worked. 282 } 283 panic("unreachable") 284 } 285 286 type ReaderAtSeeker interface { 287 io.ReaderAt 288 io.ReadSeeker 289 } 290 291 // PutAll sends all of r via a multipart upload with parts no larger 292 // than partSize bytes, which must be set to at least 5MB. 293 // Parts previously uploaded are either reused if their checksum 294 // and size match the new part, or otherwise overwritten with the 295 // new content. 296 // PutAll returns all the parts of m (reused or not). 297 func (m *Multi) PutAll(r ReaderAtSeeker, partSize int64) ([]Part, error) { 298 old, err := m.ListParts(context.Background()) 299 if err != nil && !hasCode(err, "NoSuchUpload") { 300 return nil, err 301 } 302 reuse := 0 // Index of next old part to consider reusing. 303 current := 1 // Part number of latest good part handled. 304 totalSize, err := r.Seek(0, 2) 305 if err != nil { 306 return nil, err 307 } 308 first := true // Must send at least one empty part if the file is empty. 309 var result []Part 310 NextSection: 311 for offset := int64(0); offset < totalSize || first; offset += partSize { 312 first = false 313 if offset+partSize > totalSize { 314 partSize = totalSize - offset 315 } 316 section := io.NewSectionReader(r, offset, partSize) 317 _, md5hex, md5b64, err := seekerInfo(section) 318 if err != nil { 319 return nil, err 320 } 321 for reuse < len(old) && old[reuse].N <= current { 322 // Looks like this part was already sent. 323 part := &old[reuse] 324 etag := `"` + md5hex + `"` 325 if part.N == current && part.Size == partSize && part.ETag == etag { 326 // Checksum matches. Reuse the old part. 327 result = append(result, *part) 328 current++ 329 continue NextSection 330 } 331 reuse++ 332 } 333 334 // Part wasn't found or doesn't match. Send it. 335 part, err := m.putPart(context.Background(), current, section, partSize, md5b64) 336 if err != nil { 337 return nil, err 338 } 339 result = append(result, part) 340 current++ 341 } 342 return result, nil 343 } 344 345 type completeUpload struct { 346 XMLName xml.Name `xml:"CompleteMultipartUpload"` 347 Parts completeParts `xml:"Part"` 348 } 349 350 type completePart struct { 351 PartNumber int 352 ETag string 353 } 354 355 type completeParts []completePart 356 357 func (p completeParts) Len() int { return len(p) } 358 func (p completeParts) Less(i, j int) bool { return p[i].PartNumber < p[j].PartNumber } 359 func (p completeParts) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 360 361 type completeResponse struct { 362 // The element name: should be either CompleteMultipartUploadResult or Error. 363 XMLName xml.Name 364 // If the element was error, then it should have the following: 365 Code string 366 Message string 367 RequestID string `xml:"RequestId"` 368 HostID string `xml:"HostId"` 369 } 370 371 // Complete assembles the given previously uploaded parts into the 372 // final object. This operation may take several minutes. 373 // 374 // The complete call to AMZ may still fail after returning HTTP 200, 375 // so even though it's unused, the body of the reply must be demarshalled 376 // and checked to see whether or not the complete succeeded. 377 // 378 // See http://goo.gl/2Z7Tw for details. 379 func (m *Multi) Complete(ctx context.Context, parts []Part) error { 380 params := map[string][]string{ 381 "uploadId": {m.UploadID}, 382 } 383 c := completeUpload{} 384 for _, p := range parts { 385 c.Parts = append(c.Parts, completePart{p.N, p.ETag}) 386 } 387 sort.Sort(c.Parts) 388 data, err := xml.Marshal(&c) 389 if err != nil { 390 return err 391 } 392 393 // Setting Content-Length prevents breakage on DreamObjects 394 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 395 headers := map[string][]string{ 396 "Content-Length": {strconv.Itoa(len(data))}, 397 } 398 m.Bucket.addTokenHeader(headers) 399 req := &request{ 400 method: "POST", 401 bucket: m.Bucket.Name, 402 path: m.Key, 403 params: params, 404 payload: bytes.NewReader(data), 405 headers: headers, 406 } 407 408 resp := &completeResponse{} 409 err := m.Bucket.S3.query(ctx, req, resp) 410 if shouldRetry(err) && attempt.HasNext() { 411 continue 412 } 413 if err == nil && resp.XMLName.Local == "Error" { 414 err = &Error{ 415 StatusCode: 200, 416 Code: resp.Code, 417 Message: resp.Message, 418 RequestID: resp.RequestID, 419 HostID: resp.HostID, 420 } 421 } 422 return err 423 } 424 panic("unreachable") 425 } 426 427 // Abort deletes an unfinished multipart upload and any previously 428 // uploaded parts for it. 429 // 430 // After a multipart upload is aborted, no additional parts can be 431 // uploaded using it. However, if any part uploads are currently in 432 // progress, those part uploads might or might not succeed. As a result, 433 // it might be necessary to abort a given multipart upload multiple 434 // times in order to completely free all storage consumed by all parts. 435 // 436 // NOTE: If the described scenario happens to you, please report back to 437 // the goamz authors with details. In the future such retrying should be 438 // handled internally, but it's not clear what happens precisely (Is an 439 // error returned? Is the issue completely undetectable?). 440 // 441 // See http://goo.gl/dnyJw for details. 442 func (m *Multi) Abort(ctx context.Context) error { 443 params := map[string][]string{ 444 "uploadId": {m.UploadID}, 445 } 446 headers := map[string][]string{} 447 m.Bucket.addTokenHeader(headers) 448 449 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 450 req := &request{ 451 method: "DELETE", 452 bucket: m.Bucket.Name, 453 path: m.Key, 454 params: params, 455 headers: headers, 456 } 457 err := m.Bucket.S3.query(ctx, req, nil) 458 if shouldRetry(err) && attempt.HasNext() { 459 continue 460 } 461 return err 462 } 463 panic("unreachable") 464 }