github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/chat/s3/multi.go (about)

     1  package s3
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"encoding/base64"
     7  	"encoding/hex"
     8  	"encoding/xml"
     9  	"errors"
    10  	"io"
    11  	"sort"
    12  	"strconv"
    13  
    14  	"golang.org/x/net/context"
    15  )
    16  
    17  // Multi represents an unfinished multipart upload.
    18  //
    19  // Multipart uploads allow sending big objects in smaller chunks.
    20  // After all parts have been sent, the upload must be explicitly
    21  // completed by calling Complete with the list of parts.
    22  //
    23  // See http://goo.gl/vJfTG for an overview of multipart uploads.
    24  type Multi struct {
    25  	Bucket   *Bucket
    26  	Key      string
    27  	UploadID string `xml:"UploadId"`
    28  }
    29  
    30  // That's the default. Here just for testing.
    31  var listMultiMax = 1000
    32  
    33  type listMultiResp struct {
    34  	NextKeyMarker      string
    35  	NextUploadIDMarker string
    36  	IsTruncated        bool
    37  	Upload             []Multi
    38  	CommonPrefixes     []string `xml:"CommonPrefixes>Prefix"`
    39  }
    40  
    41  // ListMulti returns the list of unfinished multipart uploads in b.
    42  //
    43  // The prefix parameter limits the response to keys that begin with the
    44  // specified prefix. You can use prefixes to separate a bucket into different
    45  // groupings of keys (to get the feeling of folders, for example).
    46  //
    47  // The delim parameter causes the response to group all of the keys that
    48  // share a common prefix up to the next delimiter in a single entry within
    49  // the CommonPrefixes field. You can use delimiters to separate a bucket
    50  // into different groupings of keys, similar to how folders would work.
    51  //
    52  // See http://goo.gl/ePioY for details.
    53  func (b *Bucket) ListMulti(ctx context.Context, prefix, delim string) (multis []*Multi, prefixes []string, err error) {
    54  	params := map[string][]string{
    55  		"uploads":     {""},
    56  		"max-uploads": {strconv.FormatInt(int64(listMultiMax), 10)},
    57  		"prefix":      {prefix},
    58  		"delimiter":   {delim},
    59  	}
    60  	headers := map[string][]string{}
    61  	b.addTokenHeader(headers)
    62  	for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); {
    63  		req := &request{
    64  			method:  "GET",
    65  			bucket:  b.Name,
    66  			params:  params,
    67  			headers: headers,
    68  		}
    69  		var resp listMultiResp
    70  		err := b.S3.query(ctx, req, &resp)
    71  		if shouldRetry(err) && attempt.HasNext() {
    72  			continue
    73  		}
    74  		if err != nil {
    75  			return nil, nil, err
    76  		}
    77  		for i := range resp.Upload {
    78  			multi := &resp.Upload[i]
    79  			multi.Bucket = b
    80  			multis = append(multis, multi)
    81  		}
    82  		prefixes = append(prefixes, resp.CommonPrefixes...)
    83  		if !resp.IsTruncated {
    84  			return multis, prefixes, nil
    85  		}
    86  		params["key-marker"] = []string{resp.NextKeyMarker}
    87  		params["upload-id-marker"] = []string{resp.NextUploadIDMarker}
    88  		attempt = b.S3.AttemptStrategy.Start() // Last request worked.
    89  	}
    90  	panic("unreachable")
    91  }
    92  
    93  // Multi returns a multipart upload handler for the provided key
    94  // inside b. If a multipart upload exists for key, it is returned,
    95  // otherwise a new multipart upload is initiated with contType and perm.
    96  func (b *Bucket) Multi(ctx context.Context, key, contType string, perm ACL) (MultiInt, error) {
    97  	multis, _, err := b.ListMulti(ctx, key, "")
    98  	if err != nil && !hasCode(err, "NoSuchUpload") {
    99  		if !UsingFakeS3(ctx) {
   100  			return nil, err
   101  		}
   102  		// fakes3 returns NoSuchKey instead of NoSuchUpload, and we want to continue
   103  		// in that case, not abort
   104  		if !hasCode(err, "NoSuchKey") {
   105  			return nil, err
   106  		}
   107  	}
   108  	for _, m := range multis {
   109  		if m.Key == key {
   110  			return m, nil
   111  		}
   112  	}
   113  
   114  	return b.InitMulti(ctx, key, contType, perm)
   115  }
   116  
   117  // InitMulti initializes a new multipart upload at the provided
   118  // key inside b and returns a value for manipulating it.
   119  //
   120  // See http://goo.gl/XP8kL for details.
   121  func (b *Bucket) InitMulti(ctx context.Context, key string, contType string, perm ACL) (*Multi, error) {
   122  	headers := map[string][]string{
   123  		"Content-Type":   {contType},
   124  		"Content-Length": {"0"},
   125  		"x-amz-acl":      {string(perm)},
   126  	}
   127  	b.addTokenHeader(headers)
   128  	params := map[string][]string{
   129  		"uploads": {""},
   130  	}
   131  	req := &request{
   132  		method:  "POST",
   133  		bucket:  b.Name,
   134  		path:    key,
   135  		headers: headers,
   136  		params:  params,
   137  	}
   138  	var err error
   139  	var resp struct {
   140  		UploadID string `xml:"UploadId"`
   141  	}
   142  	for attempt := b.S3.AttemptStrategy.Start(); attempt.Next(); {
   143  		err = b.S3.query(ctx, req, &resp)
   144  		if !shouldRetry(err) {
   145  			break
   146  		}
   147  	}
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  	return &Multi{Bucket: b, Key: key, UploadID: resp.UploadID}, nil
   152  }
   153  
   154  // PutPart sends part n of the multipart upload, reading all the content from r.
   155  // Each part, except for the last one, must be at least 5MB in size.
   156  //
   157  // See http://goo.gl/pqZer for details.
   158  func (m *Multi) PutPart(ctx context.Context, n int, r io.ReadSeeker) (Part, error) {
   159  	partSize, _, md5b64, err := seekerInfo(r)
   160  	if err != nil {
   161  		return Part{}, err
   162  	}
   163  	return m.putPart(ctx, n, r, partSize, md5b64)
   164  }
   165  
   166  func (m *Multi) putPart(ctx context.Context, n int, r io.ReadSeeker, partSize int64, md5b64 string) (Part, error) {
   167  	headers := map[string][]string{
   168  		"Content-Length": {strconv.FormatInt(partSize, 10)},
   169  		"Content-MD5":    {md5b64},
   170  	}
   171  	m.Bucket.addTokenHeader(headers)
   172  	params := map[string][]string{
   173  		"uploadId":   {m.UploadID},
   174  		"partNumber": {strconv.FormatInt(int64(n), 10)},
   175  	}
   176  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   177  		_, err := r.Seek(0, 0)
   178  		if err != nil {
   179  			return Part{}, err
   180  		}
   181  		req := &request{
   182  			method:  "PUT",
   183  			bucket:  m.Bucket.Name,
   184  			path:    m.Key,
   185  			headers: headers,
   186  			params:  params,
   187  			payload: r,
   188  		}
   189  		err = m.Bucket.S3.prepare(req)
   190  		if err != nil {
   191  			return Part{}, err
   192  		}
   193  		resp, err := m.Bucket.S3.run(ctx, req, nil)
   194  		if shouldRetry(err) && attempt.HasNext() {
   195  			continue
   196  		}
   197  		if err != nil {
   198  			return Part{}, err
   199  		}
   200  		etag := resp.Header.Get("ETag")
   201  		if etag == "" {
   202  			return Part{}, errors.New("part upload succeeded with no ETag")
   203  		}
   204  		return Part{n, etag, partSize}, nil
   205  	}
   206  	panic("unreachable")
   207  }
   208  
   209  func seekerInfo(r io.ReadSeeker) (size int64, md5hex string, md5b64 string, err error) {
   210  	_, err = r.Seek(0, 0)
   211  	if err != nil {
   212  		return 0, "", "", err
   213  	}
   214  	digest := md5.New()
   215  	size, err = io.Copy(digest, r)
   216  	if err != nil {
   217  		return 0, "", "", err
   218  	}
   219  	sum := digest.Sum(nil)
   220  	md5hex = hex.EncodeToString(sum)
   221  	md5b64 = base64.StdEncoding.EncodeToString(sum)
   222  	return size, md5hex, md5b64, nil
   223  }
   224  
   225  type Part struct {
   226  	N    int `xml:"PartNumber"`
   227  	ETag string
   228  	Size int64
   229  }
   230  
   231  type partSlice []Part
   232  
   233  func (s partSlice) Len() int           { return len(s) }
   234  func (s partSlice) Less(i, j int) bool { return s[i].N < s[j].N }
   235  func (s partSlice) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   236  
   237  type listPartsResp struct {
   238  	NextPartNumberMarker string
   239  	IsTruncated          bool
   240  	Part                 []Part
   241  }
   242  
   243  // That's the default. Here just for testing.
   244  var listPartsMax = 1000
   245  
   246  // ListParts returns the list of previously uploaded parts in m,
   247  // ordered by part number.
   248  //
   249  // See http://goo.gl/ePioY for details.
   250  func (m *Multi) ListParts(ctx context.Context) ([]Part, error) {
   251  	params := map[string][]string{
   252  		"uploadId":  {m.UploadID},
   253  		"max-parts": {strconv.FormatInt(int64(listPartsMax), 10)},
   254  	}
   255  	headers := map[string][]string{}
   256  	m.Bucket.addTokenHeader(headers)
   257  
   258  	var parts partSlice
   259  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   260  		req := &request{
   261  			method:  "GET",
   262  			bucket:  m.Bucket.Name,
   263  			path:    m.Key,
   264  			params:  params,
   265  			headers: headers,
   266  		}
   267  		var resp listPartsResp
   268  		err := m.Bucket.S3.query(ctx, req, &resp)
   269  		if shouldRetry(err) && attempt.HasNext() {
   270  			continue
   271  		}
   272  		if err != nil {
   273  			return nil, err
   274  		}
   275  		parts = append(parts, resp.Part...)
   276  		if !resp.IsTruncated {
   277  			sort.Sort(parts)
   278  			return parts, nil
   279  		}
   280  		params["part-number-marker"] = []string{resp.NextPartNumberMarker}
   281  		attempt = m.Bucket.S3.AttemptStrategy.Start() // Last request worked.
   282  	}
   283  	panic("unreachable")
   284  }
   285  
   286  type ReaderAtSeeker interface {
   287  	io.ReaderAt
   288  	io.ReadSeeker
   289  }
   290  
   291  // PutAll sends all of r via a multipart upload with parts no larger
   292  // than partSize bytes, which must be set to at least 5MB.
   293  // Parts previously uploaded are either reused if their checksum
   294  // and size match the new part, or otherwise overwritten with the
   295  // new content.
   296  // PutAll returns all the parts of m (reused or not).
   297  func (m *Multi) PutAll(r ReaderAtSeeker, partSize int64) ([]Part, error) {
   298  	old, err := m.ListParts(context.Background())
   299  	if err != nil && !hasCode(err, "NoSuchUpload") {
   300  		return nil, err
   301  	}
   302  	reuse := 0   // Index of next old part to consider reusing.
   303  	current := 1 // Part number of latest good part handled.
   304  	totalSize, err := r.Seek(0, 2)
   305  	if err != nil {
   306  		return nil, err
   307  	}
   308  	first := true // Must send at least one empty part if the file is empty.
   309  	var result []Part
   310  NextSection:
   311  	for offset := int64(0); offset < totalSize || first; offset += partSize {
   312  		first = false
   313  		if offset+partSize > totalSize {
   314  			partSize = totalSize - offset
   315  		}
   316  		section := io.NewSectionReader(r, offset, partSize)
   317  		_, md5hex, md5b64, err := seekerInfo(section)
   318  		if err != nil {
   319  			return nil, err
   320  		}
   321  		for reuse < len(old) && old[reuse].N <= current {
   322  			// Looks like this part was already sent.
   323  			part := &old[reuse]
   324  			etag := `"` + md5hex + `"`
   325  			if part.N == current && part.Size == partSize && part.ETag == etag {
   326  				// Checksum matches. Reuse the old part.
   327  				result = append(result, *part)
   328  				current++
   329  				continue NextSection
   330  			}
   331  			reuse++
   332  		}
   333  
   334  		// Part wasn't found or doesn't match. Send it.
   335  		part, err := m.putPart(context.Background(), current, section, partSize, md5b64)
   336  		if err != nil {
   337  			return nil, err
   338  		}
   339  		result = append(result, part)
   340  		current++
   341  	}
   342  	return result, nil
   343  }
   344  
   345  type completeUpload struct {
   346  	XMLName xml.Name      `xml:"CompleteMultipartUpload"`
   347  	Parts   completeParts `xml:"Part"`
   348  }
   349  
   350  type completePart struct {
   351  	PartNumber int
   352  	ETag       string
   353  }
   354  
   355  type completeParts []completePart
   356  
   357  func (p completeParts) Len() int           { return len(p) }
   358  func (p completeParts) Less(i, j int) bool { return p[i].PartNumber < p[j].PartNumber }
   359  func (p completeParts) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   360  
   361  type completeResponse struct {
   362  	// The element name: should be either CompleteMultipartUploadResult or Error.
   363  	XMLName xml.Name
   364  	// If the element was error, then it should have the following:
   365  	Code      string
   366  	Message   string
   367  	RequestID string `xml:"RequestId"`
   368  	HostID    string `xml:"HostId"`
   369  }
   370  
   371  // Complete assembles the given previously uploaded parts into the
   372  // final object. This operation may take several minutes.
   373  //
   374  // The complete call to AMZ may still fail after returning HTTP 200,
   375  // so even though it's unused, the body of the reply must be demarshalled
   376  // and checked to see whether or not the complete succeeded.
   377  //
   378  // See http://goo.gl/2Z7Tw for details.
   379  func (m *Multi) Complete(ctx context.Context, parts []Part) error {
   380  	params := map[string][]string{
   381  		"uploadId": {m.UploadID},
   382  	}
   383  	c := completeUpload{}
   384  	for _, p := range parts {
   385  		c.Parts = append(c.Parts, completePart{p.N, p.ETag})
   386  	}
   387  	sort.Sort(c.Parts)
   388  	data, err := xml.Marshal(&c)
   389  	if err != nil {
   390  		return err
   391  	}
   392  
   393  	// Setting Content-Length prevents breakage on DreamObjects
   394  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   395  		headers := map[string][]string{
   396  			"Content-Length": {strconv.Itoa(len(data))},
   397  		}
   398  		m.Bucket.addTokenHeader(headers)
   399  		req := &request{
   400  			method:  "POST",
   401  			bucket:  m.Bucket.Name,
   402  			path:    m.Key,
   403  			params:  params,
   404  			payload: bytes.NewReader(data),
   405  			headers: headers,
   406  		}
   407  
   408  		resp := &completeResponse{}
   409  		err := m.Bucket.S3.query(ctx, req, resp)
   410  		if shouldRetry(err) && attempt.HasNext() {
   411  			continue
   412  		}
   413  		if err == nil && resp.XMLName.Local == "Error" {
   414  			err = &Error{
   415  				StatusCode: 200,
   416  				Code:       resp.Code,
   417  				Message:    resp.Message,
   418  				RequestID:  resp.RequestID,
   419  				HostID:     resp.HostID,
   420  			}
   421  		}
   422  		return err
   423  	}
   424  	panic("unreachable")
   425  }
   426  
   427  // Abort deletes an unfinished multipart upload and any previously
   428  // uploaded parts for it.
   429  //
   430  // After a multipart upload is aborted, no additional parts can be
   431  // uploaded using it. However, if any part uploads are currently in
   432  // progress, those part uploads might or might not succeed. As a result,
   433  // it might be necessary to abort a given multipart upload multiple
   434  // times in order to completely free all storage consumed by all parts.
   435  //
   436  // NOTE: If the described scenario happens to you, please report back to
   437  // the goamz authors with details. In the future such retrying should be
   438  // handled internally, but it's not clear what happens precisely (Is an
   439  // error returned? Is the issue completely undetectable?).
   440  //
   441  // See http://goo.gl/dnyJw for details.
   442  func (m *Multi) Abort(ctx context.Context) error {
   443  	params := map[string][]string{
   444  		"uploadId": {m.UploadID},
   445  	}
   446  	headers := map[string][]string{}
   447  	m.Bucket.addTokenHeader(headers)
   448  
   449  	for attempt := m.Bucket.S3.AttemptStrategy.Start(); attempt.Next(); {
   450  		req := &request{
   451  			method:  "DELETE",
   452  			bucket:  m.Bucket.Name,
   453  			path:    m.Key,
   454  			params:  params,
   455  			headers: headers,
   456  		}
   457  		err := m.Bucket.S3.query(ctx, req, nil)
   458  		if shouldRetry(err) && attempt.HasNext() {
   459  			continue
   460  		}
   461  		return err
   462  	}
   463  	panic("unreachable")
   464  }