github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/chat/s3/multi.go (about)

     1  package s3
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"encoding/base64"
     7  	"encoding/hex"
     8  	"encoding/xml"
     9  	"errors"
    10  	"io"
    11  	"sort"
    12  	"strconv"
    13  
    14  	"golang.org/x/net/context"
    15  )
    16  
    17  // Multi represents an unfinished multipart upload.
    18  //
    19  // Multipart uploads allow sending big objects in smaller chunks.
    20  // After all parts have been sent, the upload must be explicitly
    21  // completed by calling Complete with the list of parts.
    22  //
    23  // See http://goo.gl/vJfTG for an overview of multipart uploads.
    24  type Multi struct {
    25  	Bucket   *Bucket
    26  	Key      string
    27  	UploadID string `xml:"UploadId"`
    28  }
    29  
    30  // That's the default. Here just for testing.
    31  var listMultiMax = 1000
    32  
    33  type listMultiResp struct {
    34  	NextKeyMarker      string
    35  	NextUploadIDMarker string
    36  	IsTruncated        bool
    37  	Upload             []Multi
    38  	CommonPrefixes     []string `xml:"CommonPrefixes>Prefix"`
    39  }
    40  
    41  // ListMulti returns the list of unfinished multipart uploads in b.
    42  //
    43  // The prefix parameter limits the response to keys that begin with the
    44  // specified prefix. You can use prefixes to separate a bucket into different
    45  // groupings of keys (to get the feeling of folders, for example).
    46  //
    47  // The delim parameter causes the response to group all of the keys that
    48  // share a common prefix up to the next delimiter in a single entry within
    49  // the CommonPrefixes field. You can use delimiters to separate a bucket
    50  // into different groupings of keys, similar to how folders would work.
    51  //
    52  // See http://goo.gl/ePioY for details.
    53  func (b *Bucket) ListMulti(ctx context.Context, prefix, delim string) (multis []*Multi, prefixes []string, err error) {
    54  	params := map[string][]string{
    55  		"uploads":     {""},
    56  		"max-uploads": {strconv.FormatInt(int64(listMultiMax), 10)},
    57  		"prefix":      {prefix},
    58  		"delimiter":   {delim},
    59  	}
    60  	for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); {
    61  		req := &request{
    62  			method: "GET",
    63  			bucket: b.Name,
    64  			params: params,
    65  		}
    66  		var resp listMultiResp
    67  		err := b.S3.query(ctx, req, &resp)
    68  		if shouldRetry(err) && attempt.HasNext() {
    69  			continue
    70  		}
    71  		if err != nil {
    72  			return nil, nil, err
    73  		}
    74  		for i := range resp.Upload {
    75  			multi := &resp.Upload[i]
    76  			multi.Bucket = b
    77  			multis = append(multis, multi)
    78  		}
    79  		prefixes = append(prefixes, resp.CommonPrefixes...)
    80  		if !resp.IsTruncated {
    81  			return multis, prefixes, nil
    82  		}
    83  		params["key-marker"] = []string{resp.NextKeyMarker}
    84  		params["upload-id-marker"] = []string{resp.NextUploadIDMarker}
    85  		attempt = b.S3.AttemptStrategy.Start() // Last request worked.
    86  	}
    87  	panic("unreachable")
    88  }
    89  
    90  // Multi returns a multipart upload handler for the provided key
    91  // inside b. If a multipart upload exists for key, it is returned,
    92  // otherwise a new multipart upload is initiated with contType and perm.
    93  func (b *Bucket) Multi(ctx context.Context, key, contType string, perm ACL) (MultiInt, error) {
    94  	multis, _, err := b.ListMulti(ctx, key, "")
    95  	if err != nil && !hasCode(err, "NoSuchUpload") {
    96  		if !UsingFakeS3(ctx) {
    97  			return nil, err
    98  		}
    99  		// fakes3 returns NoSuchKey instead of NoSuchUpload, and we want to continue
   100  		// in that case, not abort
   101  		if !hasCode(err, "NoSuchKey") {
   102  			return nil, err
   103  		}
   104  	}
   105  	for _, m := range multis {
   106  		if m.Key == key {
   107  			return m, nil
   108  		}
   109  	}
   110  
   111  	return b.InitMulti(ctx, key, contType, perm)
   112  }
   113  
   114  // InitMulti initializes a new multipart upload at the provided
   115  // key inside b and returns a value for manipulating it.
   116  //
   117  // See http://goo.gl/XP8kL for details.
   118  func (b *Bucket) InitMulti(ctx context.Context, key string, contType string, perm ACL) (*Multi, error) {
   119  	headers := map[string][]string{
   120  		"Content-Type":   {contType},
   121  		"Content-Length": {"0"},
   122  		"x-amz-acl":      {string(perm)},
   123  	}
   124  	params := map[string][]string{
   125  		"uploads": {""},
   126  	}
   127  	req := &request{
   128  		method:  "POST",
   129  		bucket:  b.Name,
   130  		path:    key,
   131  		headers: headers,
   132  		params:  params,
   133  	}
   134  	var err error
   135  	var resp struct {
   136  		UploadID string `xml:"UploadId"`
   137  	}
   138  	for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); {
   139  		err = b.S3.query(ctx, req, &resp)
   140  		if !shouldRetry(err) {
   141  			break
   142  		}
   143  	}
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  	return &Multi{Bucket: b, Key: key, UploadID: resp.UploadID}, nil
   148  }
   149  
   150  // PutPart sends part n of the multipart upload, reading all the content from r.
   151  // Each part, except for the last one, must be at least 5MB in size.
   152  //
   153  // See http://goo.gl/pqZer for details.
   154  func (m *Multi) PutPart(ctx context.Context, n int, r io.ReadSeeker) (Part, error) {
   155  	partSize, _, md5b64, err := seekerInfo(r)
   156  	if err != nil {
   157  		return Part{}, err
   158  	}
   159  	return m.putPart(ctx, n, r, partSize, md5b64)
   160  }
   161  
   162  func (m *Multi) putPart(ctx context.Context, n int, r io.ReadSeeker, partSize int64, md5b64 string) (Part, error) {
   163  	headers := map[string][]string{
   164  		"Content-Length": {strconv.FormatInt(partSize, 10)},
   165  		"Content-MD5":    {md5b64},
   166  	}
   167  	params := map[string][]string{
   168  		"uploadId":   {m.UploadID},
   169  		"partNumber": {strconv.FormatInt(int64(n), 10)},
   170  	}
   171  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   172  		_, err := r.Seek(0, 0)
   173  		if err != nil {
   174  			return Part{}, err
   175  		}
   176  		req := &request{
   177  			method:  "PUT",
   178  			bucket:  m.Bucket.Name,
   179  			path:    m.Key,
   180  			headers: headers,
   181  			params:  params,
   182  			payload: r,
   183  		}
   184  		err = m.Bucket.S3.prepare(req)
   185  		if err != nil {
   186  			return Part{}, err
   187  		}
   188  		resp, err := m.Bucket.S3.run(ctx, req, nil)
   189  		if shouldRetry(err) && attempt.HasNext() {
   190  			continue
   191  		}
   192  		if err != nil {
   193  			return Part{}, err
   194  		}
   195  		etag := resp.Header.Get("ETag")
   196  		if etag == "" {
   197  			return Part{}, errors.New("part upload succeeded with no ETag")
   198  		}
   199  		return Part{n, etag, partSize}, nil
   200  	}
   201  	panic("unreachable")
   202  }
   203  
   204  func seekerInfo(r io.ReadSeeker) (size int64, md5hex string, md5b64 string, err error) {
   205  	_, err = r.Seek(0, 0)
   206  	if err != nil {
   207  		return 0, "", "", err
   208  	}
   209  	digest := md5.New()
   210  	size, err = io.Copy(digest, r)
   211  	if err != nil {
   212  		return 0, "", "", err
   213  	}
   214  	sum := digest.Sum(nil)
   215  	md5hex = hex.EncodeToString(sum)
   216  	md5b64 = base64.StdEncoding.EncodeToString(sum)
   217  	return size, md5hex, md5b64, nil
   218  }
   219  
   220  type Part struct {
   221  	N    int `xml:"PartNumber"`
   222  	ETag string
   223  	Size int64
   224  }
   225  
   226  type partSlice []Part
   227  
   228  func (s partSlice) Len() int           { return len(s) }
   229  func (s partSlice) Less(i, j int) bool { return s[i].N < s[j].N }
   230  func (s partSlice) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   231  
   232  type listPartsResp struct {
   233  	NextPartNumberMarker string
   234  	IsTruncated          bool
   235  	Part                 []Part
   236  }
   237  
   238  // That's the default. Here just for testing.
   239  var listPartsMax = 1000
   240  
   241  // ListParts returns the list of previously uploaded parts in m,
   242  // ordered by part number.
   243  //
   244  // See http://goo.gl/ePioY for details.
   245  func (m *Multi) ListParts(ctx context.Context) ([]Part, error) {
   246  	params := map[string][]string{
   247  		"uploadId":  {m.UploadID},
   248  		"max-parts": {strconv.FormatInt(int64(listPartsMax), 10)},
   249  	}
   250  	var parts partSlice
   251  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   252  		req := &request{
   253  			method: "GET",
   254  			bucket: m.Bucket.Name,
   255  			path:   m.Key,
   256  			params: params,
   257  		}
   258  		var resp listPartsResp
   259  		err := m.Bucket.S3.query(ctx, req, &resp)
   260  		if shouldRetry(err) && attempt.HasNext() {
   261  			continue
   262  		}
   263  		if err != nil {
   264  			return nil, err
   265  		}
   266  		parts = append(parts, resp.Part...)
   267  		if !resp.IsTruncated {
   268  			sort.Sort(parts)
   269  			return parts, nil
   270  		}
   271  		params["part-number-marker"] = []string{resp.NextPartNumberMarker}
   272  		attempt = m.Bucket.S3.AttemptStrategy.Start() // Last request worked.
   273  	}
   274  	panic("unreachable")
   275  }
   276  
   277  type ReaderAtSeeker interface {
   278  	io.ReaderAt
   279  	io.ReadSeeker
   280  }
   281  
   282  // PutAll sends all of r via a multipart upload with parts no larger
   283  // than partSize bytes, which must be set to at least 5MB.
   284  // Parts previously uploaded are either reused if their checksum
   285  // and size match the new part, or otherwise overwritten with the
   286  // new content.
   287  // PutAll returns all the parts of m (reused or not).
   288  func (m *Multi) PutAll(r ReaderAtSeeker, partSize int64) ([]Part, error) {
   289  	old, err := m.ListParts(context.Background())
   290  	if err != nil && !hasCode(err, "NoSuchUpload") {
   291  		return nil, err
   292  	}
   293  	reuse := 0   // Index of next old part to consider reusing.
   294  	current := 1 // Part number of latest good part handled.
   295  	totalSize, err := r.Seek(0, 2)
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  	first := true // Must send at least one empty part if the file is empty.
   300  	var result []Part
   301  NextSection:
   302  	for offset := int64(0); offset < totalSize || first; offset += partSize {
   303  		first = false
   304  		if offset+partSize > totalSize {
   305  			partSize = totalSize - offset
   306  		}
   307  		section := io.NewSectionReader(r, offset, partSize)
   308  		_, md5hex, md5b64, err := seekerInfo(section)
   309  		if err != nil {
   310  			return nil, err
   311  		}
   312  		for reuse < len(old) && old[reuse].N <= current {
   313  			// Looks like this part was already sent.
   314  			part := &old[reuse]
   315  			etag := `"` + md5hex + `"`
   316  			if part.N == current && part.Size == partSize && part.ETag == etag {
   317  				// Checksum matches. Reuse the old part.
   318  				result = append(result, *part)
   319  				current++
   320  				continue NextSection
   321  			}
   322  			reuse++
   323  		}
   324  
   325  		// Part wasn't found or doesn't match. Send it.
   326  		part, err := m.putPart(context.Background(), current, section, partSize, md5b64)
   327  		if err != nil {
   328  			return nil, err
   329  		}
   330  		result = append(result, part)
   331  		current++
   332  	}
   333  	return result, nil
   334  }
   335  
   336  type completeUpload struct {
   337  	XMLName xml.Name      `xml:"CompleteMultipartUpload"`
   338  	Parts   completeParts `xml:"Part"`
   339  }
   340  
   341  type completePart struct {
   342  	PartNumber int
   343  	ETag       string
   344  }
   345  
   346  type completeParts []completePart
   347  
   348  func (p completeParts) Len() int           { return len(p) }
   349  func (p completeParts) Less(i, j int) bool { return p[i].PartNumber < p[j].PartNumber }
   350  func (p completeParts) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   351  
   352  type completeResponse struct {
   353  	// The element name: should be either CompleteMultipartUploadResult or Error.
   354  	XMLName xml.Name
   355  	// If the element was error, then it should have the following:
   356  	Code      string
   357  	Message   string
   358  	RequestID string `xml:"RequestId"`
   359  	HostID    string `xml:"HostId"`
   360  }
   361  
   362  // Complete assembles the given previously uploaded parts into the
   363  // final object. This operation may take several minutes.
   364  //
   365  // The complete call to AMZ may still fail after returning HTTP 200,
   366  // so even though it's unused, the body of the reply must be demarshalled
   367  // and checked to see whether or not the complete succeeded.
   368  //
   369  // See http://goo.gl/2Z7Tw for details.
   370  func (m *Multi) Complete(ctx context.Context, parts []Part) error {
   371  	params := map[string][]string{
   372  		"uploadId": {m.UploadID},
   373  	}
   374  	c := completeUpload{}
   375  	for _, p := range parts {
   376  		c.Parts = append(c.Parts, completePart{p.N, p.ETag})
   377  	}
   378  	sort.Sort(c.Parts)
   379  	data, err := xml.Marshal(&c)
   380  	if err != nil {
   381  		return err
   382  	}
   383  
   384  	// Setting Content-Length prevents breakage on DreamObjects
   385  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   386  		req := &request{
   387  			method:  "POST",
   388  			bucket:  m.Bucket.Name,
   389  			path:    m.Key,
   390  			params:  params,
   391  			payload: bytes.NewReader(data),
   392  			headers: map[string][]string{
   393  				"Content-Length": {strconv.Itoa(len(data))},
   394  			},
   395  		}
   396  
   397  		resp := &completeResponse{}
   398  		err := m.Bucket.S3.query(ctx, req, resp)
   399  		if shouldRetry(err) && attempt.HasNext() {
   400  			continue
   401  		}
   402  		if err == nil && resp.XMLName.Local == "Error" {
   403  			err = &Error{
   404  				StatusCode: 200,
   405  				Code:       resp.Code,
   406  				Message:    resp.Message,
   407  				RequestID:  resp.RequestID,
   408  				HostID:     resp.HostID,
   409  			}
   410  		}
   411  		return err
   412  	}
   413  	panic("unreachable")
   414  }
   415  
   416  // Abort deletes an unfinished multipart upload and any previously
   417  // uploaded parts for it.
   418  //
   419  // After a multipart upload is aborted, no additional parts can be
   420  // uploaded using it. However, if any part uploads are currently in
   421  // progress, those part uploads might or might not succeed. As a result,
   422  // it might be necessary to abort a given multipart upload multiple
   423  // times in order to completely free all storage consumed by all parts.
   424  //
   425  // NOTE: If the described scenario happens to you, please report back to
   426  // the goamz authors with details. In the future such retrying should be
   427  // handled internally, but it's not clear what happens precisely (Is an
   428  // error returned? Is the issue completely undetectable?).
   429  //
   430  // See http://goo.gl/dnyJw for details.
   431  func (m *Multi) Abort(ctx context.Context) error {
   432  	params := map[string][]string{
   433  		"uploadId": {m.UploadID},
   434  	}
   435  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   436  		req := &request{
   437  			method: "DELETE",
   438  			bucket: m.Bucket.Name,
   439  			path:   m.Key,
   440  			params: params,
   441  		}
   442  		err := m.Bucket.S3.query(ctx, req, nil)
   443  		if shouldRetry(err) && attempt.HasNext() {
   444  			continue
   445  		}
   446  		return err
   447  	}
   448  	panic("unreachable")
   449  }