github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/chat/s3/multi.go (about) 1 package s3 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "encoding/base64" 7 "encoding/hex" 8 "encoding/xml" 9 "errors" 10 "io" 11 "sort" 12 "strconv" 13 14 "golang.org/x/net/context" 15 ) 16 17 // Multi represents an unfinished multipart upload. 18 // 19 // Multipart uploads allow sending big objects in smaller chunks. 20 // After all parts have been sent, the upload must be explicitly 21 // completed by calling Complete with the list of parts. 22 // 23 // See http://goo.gl/vJfTG for an overview of multipart uploads. 24 type Multi struct { 25 Bucket *Bucket 26 Key string 27 UploadID string `xml:"UploadId"` 28 } 29 30 // That's the default. Here just for testing. 31 var listMultiMax = 1000 32 33 type listMultiResp struct { 34 NextKeyMarker string 35 NextUploadIDMarker string 36 IsTruncated bool 37 Upload []Multi 38 CommonPrefixes []string `xml:"CommonPrefixes>Prefix"` 39 } 40 41 // ListMulti returns the list of unfinished multipart uploads in b. 42 // 43 // The prefix parameter limits the response to keys that begin with the 44 // specified prefix. You can use prefixes to separate a bucket into different 45 // groupings of keys (to get the feeling of folders, for example). 46 // 47 // The delim parameter causes the response to group all of the keys that 48 // share a common prefix up to the next delimiter in a single entry within 49 // the CommonPrefixes field. You can use delimiters to separate a bucket 50 // into different groupings of keys, similar to how folders would work. 51 // 52 // See http://goo.gl/ePioY for details. 53 func (b *Bucket) ListMulti(ctx context.Context, prefix, delim string) (multis []*Multi, prefixes []string, err error) { 54 params := map[string][]string{ 55 "uploads": {""}, 56 "max-uploads": {strconv.FormatInt(int64(listMultiMax), 10)}, 57 "prefix": {prefix}, 58 "delimiter": {delim}, 59 } 60 for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); { 61 req := &request{ 62 method: "GET", 63 bucket: b.Name, 64 params: params, 65 } 66 var resp listMultiResp 67 err := b.S3.query(ctx, req, &resp) 68 if shouldRetry(err) && attempt.HasNext() { 69 continue 70 } 71 if err != nil { 72 return nil, nil, err 73 } 74 for i := range resp.Upload { 75 multi := &resp.Upload[i] 76 multi.Bucket = b 77 multis = append(multis, multi) 78 } 79 prefixes = append(prefixes, resp.CommonPrefixes...) 80 if !resp.IsTruncated { 81 return multis, prefixes, nil 82 } 83 params["key-marker"] = []string{resp.NextKeyMarker} 84 params["upload-id-marker"] = []string{resp.NextUploadIDMarker} 85 attempt = b.S3.AttemptStrategy.Start() // Last request worked. 86 } 87 panic("unreachable") 88 } 89 90 // Multi returns a multipart upload handler for the provided key 91 // inside b. If a multipart upload exists for key, it is returned, 92 // otherwise a new multipart upload is initiated with contType and perm. 93 func (b *Bucket) Multi(ctx context.Context, key, contType string, perm ACL) (MultiInt, error) { 94 multis, _, err := b.ListMulti(ctx, key, "") 95 if err != nil && !hasCode(err, "NoSuchUpload") { 96 if !UsingFakeS3(ctx) { 97 return nil, err 98 } 99 // fakes3 returns NoSuchKey instead of NoSuchUpload, and we want to continue 100 // in that case, not abort 101 if !hasCode(err, "NoSuchKey") { 102 return nil, err 103 } 104 } 105 for _, m := range multis { 106 if m.Key == key { 107 return m, nil 108 } 109 } 110 111 return b.InitMulti(ctx, key, contType, perm) 112 } 113 114 // InitMulti initializes a new multipart upload at the provided 115 // key inside b and returns a value for manipulating it. 116 // 117 // See http://goo.gl/XP8kL for details. 118 func (b *Bucket) InitMulti(ctx context.Context, key string, contType string, perm ACL) (*Multi, error) { 119 headers := map[string][]string{ 120 "Content-Type": {contType}, 121 "Content-Length": {"0"}, 122 "x-amz-acl": {string(perm)}, 123 } 124 params := map[string][]string{ 125 "uploads": {""}, 126 } 127 req := &request{ 128 method: "POST", 129 bucket: b.Name, 130 path: key, 131 headers: headers, 132 params: params, 133 } 134 var err error 135 var resp struct { 136 UploadID string `xml:"UploadId"` 137 } 138 for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); { 139 err = b.S3.query(ctx, req, &resp) 140 if !shouldRetry(err) { 141 break 142 } 143 } 144 if err != nil { 145 return nil, err 146 } 147 return &Multi{Bucket: b, Key: key, UploadID: resp.UploadID}, nil 148 } 149 150 // PutPart sends part n of the multipart upload, reading all the content from r. 151 // Each part, except for the last one, must be at least 5MB in size. 152 // 153 // See http://goo.gl/pqZer for details. 154 func (m *Multi) PutPart(ctx context.Context, n int, r io.ReadSeeker) (Part, error) { 155 partSize, _, md5b64, err := seekerInfo(r) 156 if err != nil { 157 return Part{}, err 158 } 159 return m.putPart(ctx, n, r, partSize, md5b64) 160 } 161 162 func (m *Multi) putPart(ctx context.Context, n int, r io.ReadSeeker, partSize int64, md5b64 string) (Part, error) { 163 headers := map[string][]string{ 164 "Content-Length": {strconv.FormatInt(partSize, 10)}, 165 "Content-MD5": {md5b64}, 166 } 167 params := map[string][]string{ 168 "uploadId": {m.UploadID}, 169 "partNumber": {strconv.FormatInt(int64(n), 10)}, 170 } 171 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 172 _, err := r.Seek(0, 0) 173 if err != nil { 174 return Part{}, err 175 } 176 req := &request{ 177 method: "PUT", 178 bucket: m.Bucket.Name, 179 path: m.Key, 180 headers: headers, 181 params: params, 182 payload: r, 183 } 184 err = m.Bucket.S3.prepare(req) 185 if err != nil { 186 return Part{}, err 187 } 188 resp, err := m.Bucket.S3.run(ctx, req, nil) 189 if shouldRetry(err) && attempt.HasNext() { 190 continue 191 } 192 if err != nil { 193 return Part{}, err 194 } 195 etag := resp.Header.Get("ETag") 196 if etag == "" { 197 return Part{}, errors.New("part upload succeeded with no ETag") 198 } 199 return Part{n, etag, partSize}, nil 200 } 201 panic("unreachable") 202 } 203 204 func seekerInfo(r io.ReadSeeker) (size int64, md5hex string, md5b64 string, err error) { 205 _, err = r.Seek(0, 0) 206 if err != nil { 207 return 0, "", "", err 208 } 209 digest := md5.New() 210 size, err = io.Copy(digest, r) 211 if err != nil { 212 return 0, "", "", err 213 } 214 sum := digest.Sum(nil) 215 md5hex = hex.EncodeToString(sum) 216 md5b64 = base64.StdEncoding.EncodeToString(sum) 217 return size, md5hex, md5b64, nil 218 } 219 220 type Part struct { 221 N int `xml:"PartNumber"` 222 ETag string 223 Size int64 224 } 225 226 type partSlice []Part 227 228 func (s partSlice) Len() int { return len(s) } 229 func (s partSlice) Less(i, j int) bool { return s[i].N < s[j].N } 230 func (s partSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 231 232 type listPartsResp struct { 233 NextPartNumberMarker string 234 IsTruncated bool 235 Part []Part 236 } 237 238 // That's the default. Here just for testing. 239 var listPartsMax = 1000 240 241 // ListParts returns the list of previously uploaded parts in m, 242 // ordered by part number. 243 // 244 // See http://goo.gl/ePioY for details. 245 func (m *Multi) ListParts(ctx context.Context) ([]Part, error) { 246 params := map[string][]string{ 247 "uploadId": {m.UploadID}, 248 "max-parts": {strconv.FormatInt(int64(listPartsMax), 10)}, 249 } 250 var parts partSlice 251 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 252 req := &request{ 253 method: "GET", 254 bucket: m.Bucket.Name, 255 path: m.Key, 256 params: params, 257 } 258 var resp listPartsResp 259 err := m.Bucket.S3.query(ctx, req, &resp) 260 if shouldRetry(err) && attempt.HasNext() { 261 continue 262 } 263 if err != nil { 264 return nil, err 265 } 266 parts = append(parts, resp.Part...) 267 if !resp.IsTruncated { 268 sort.Sort(parts) 269 return parts, nil 270 } 271 params["part-number-marker"] = []string{resp.NextPartNumberMarker} 272 attempt = m.Bucket.S3.AttemptStrategy.Start() // Last request worked. 273 } 274 panic("unreachable") 275 } 276 277 type ReaderAtSeeker interface { 278 io.ReaderAt 279 io.ReadSeeker 280 } 281 282 // PutAll sends all of r via a multipart upload with parts no larger 283 // than partSize bytes, which must be set to at least 5MB. 284 // Parts previously uploaded are either reused if their checksum 285 // and size match the new part, or otherwise overwritten with the 286 // new content. 287 // PutAll returns all the parts of m (reused or not). 288 func (m *Multi) PutAll(r ReaderAtSeeker, partSize int64) ([]Part, error) { 289 old, err := m.ListParts(context.Background()) 290 if err != nil && !hasCode(err, "NoSuchUpload") { 291 return nil, err 292 } 293 reuse := 0 // Index of next old part to consider reusing. 294 current := 1 // Part number of latest good part handled. 295 totalSize, err := r.Seek(0, 2) 296 if err != nil { 297 return nil, err 298 } 299 first := true // Must send at least one empty part if the file is empty. 300 var result []Part 301 NextSection: 302 for offset := int64(0); offset < totalSize || first; offset += partSize { 303 first = false 304 if offset+partSize > totalSize { 305 partSize = totalSize - offset 306 } 307 section := io.NewSectionReader(r, offset, partSize) 308 _, md5hex, md5b64, err := seekerInfo(section) 309 if err != nil { 310 return nil, err 311 } 312 for reuse < len(old) && old[reuse].N <= current { 313 // Looks like this part was already sent. 314 part := &old[reuse] 315 etag := `"` + md5hex + `"` 316 if part.N == current && part.Size == partSize && part.ETag == etag { 317 // Checksum matches. Reuse the old part. 318 result = append(result, *part) 319 current++ 320 continue NextSection 321 } 322 reuse++ 323 } 324 325 // Part wasn't found or doesn't match. Send it. 326 part, err := m.putPart(context.Background(), current, section, partSize, md5b64) 327 if err != nil { 328 return nil, err 329 } 330 result = append(result, part) 331 current++ 332 } 333 return result, nil 334 } 335 336 type completeUpload struct { 337 XMLName xml.Name `xml:"CompleteMultipartUpload"` 338 Parts completeParts `xml:"Part"` 339 } 340 341 type completePart struct { 342 PartNumber int 343 ETag string 344 } 345 346 type completeParts []completePart 347 348 func (p completeParts) Len() int { return len(p) } 349 func (p completeParts) Less(i, j int) bool { return p[i].PartNumber < p[j].PartNumber } 350 func (p completeParts) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 351 352 type completeResponse struct { 353 // The element name: should be either CompleteMultipartUploadResult or Error. 354 XMLName xml.Name 355 // If the element was error, then it should have the following: 356 Code string 357 Message string 358 RequestID string `xml:"RequestId"` 359 HostID string `xml:"HostId"` 360 } 361 362 // Complete assembles the given previously uploaded parts into the 363 // final object. This operation may take several minutes. 364 // 365 // The complete call to AMZ may still fail after returning HTTP 200, 366 // so even though it's unused, the body of the reply must be demarshalled 367 // and checked to see whether or not the complete succeeded. 368 // 369 // See http://goo.gl/2Z7Tw for details. 370 func (m *Multi) Complete(ctx context.Context, parts []Part) error { 371 params := map[string][]string{ 372 "uploadId": {m.UploadID}, 373 } 374 c := completeUpload{} 375 for _, p := range parts { 376 c.Parts = append(c.Parts, completePart{p.N, p.ETag}) 377 } 378 sort.Sort(c.Parts) 379 data, err := xml.Marshal(&c) 380 if err != nil { 381 return err 382 } 383 384 // Setting Content-Length prevents breakage on DreamObjects 385 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 386 req := &request{ 387 method: "POST", 388 bucket: m.Bucket.Name, 389 path: m.Key, 390 params: params, 391 payload: bytes.NewReader(data), 392 headers: map[string][]string{ 393 "Content-Length": {strconv.Itoa(len(data))}, 394 }, 395 } 396 397 resp := &completeResponse{} 398 err := m.Bucket.S3.query(ctx, req, resp) 399 if shouldRetry(err) && attempt.HasNext() { 400 continue 401 } 402 if err == nil && resp.XMLName.Local == "Error" { 403 err = &Error{ 404 StatusCode: 200, 405 Code: resp.Code, 406 Message: resp.Message, 407 RequestID: resp.RequestID, 408 HostID: resp.HostID, 409 } 410 } 411 return err 412 } 413 panic("unreachable") 414 } 415 416 // Abort deletes an unfinished multipart upload and any previously 417 // uploaded parts for it. 418 // 419 // After a multipart upload is aborted, no additional parts can be 420 // uploaded using it. However, if any part uploads are currently in 421 // progress, those part uploads might or might not succeed. As a result, 422 // it might be necessary to abort a given multipart upload multiple 423 // times in order to completely free all storage consumed by all parts. 424 // 425 // NOTE: If the described scenario happens to you, please report back to 426 // the goamz authors with details. In the future such retrying should be 427 // handled internally, but it's not clear what happens precisely (Is an 428 // error returned? Is the issue completely undetectable?). 429 // 430 // See http://goo.gl/dnyJw for details. 431 func (m *Multi) Abort(ctx context.Context) error { 432 params := map[string][]string{ 433 "uploadId": {m.UploadID}, 434 } 435 for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); { 436 req := &request{ 437 method: "DELETE", 438 bucket: m.Bucket.Name, 439 path: m.Key, 440 params: params, 441 } 442 err := m.Bucket.S3.query(ctx, req, nil) 443 if shouldRetry(err) && attempt.HasNext() { 444 continue 445 } 446 return err 447 } 448 panic("unreachable") 449 }