go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/gs/uploader.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gs
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"net/http"
    22  	"time"
    23  
    24  	"golang.org/x/net/context/ctxhttp"
    25  	"google.golang.org/api/googleapi"
    26  
    27  	"go.chromium.org/luci/common/clock"
    28  	"go.chromium.org/luci/common/logging"
    29  )
    30  
    31  // TODO(vadimsh): Use this code from the client too.
    32  
    33  // RestartUploadError is returned by Uploader when it resumes an interrupted
    34  // upload, and Google Storage asks to upload from an offset the Uploader has no
    35  // data for.
    36  //
    37  // Callers of Uploader should handle this case themselves by restarting the
    38  // upload from the requested offset.
    39  //
    40  // See https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload#resume-upload
    41  type RestartUploadError struct {
    42  	Offset int64
    43  }
    44  
    45  // Error is part of error interface.
    46  func (e *RestartUploadError) Error() string {
    47  	return fmt.Sprintf("the upload should be restarted from offset %d", e.Offset)
    48  }
    49  
    50  // Uploader implements io.Writer for Google Storage Resumable Upload sessions.
    51  //
    52  // Does no buffering inside, thus efficiency of uploads directly depends on
    53  // granularity of Write(...) calls. Additionally, Google Storage expects the
    54  // length of each uploaded chunk to be a multiple of 256 Kb, so callers of
    55  // Write(...) should supply the appropriately-sized chunks.
    56  //
    57  // Retries transient errors internally, but it can potentially end up in a
    58  // situation where it needs data not available in the current Write(...)
    59  // operation. In this case Write returns *RestartUploadError error, which
    60  // indicates an offset the upload should be restarted from.
    61  type Uploader struct {
    62  	Context   context.Context // the context for canceling retries and for logging
    63  	Client    *http.Client    // the client to use for sending anonymous requests
    64  	UploadURL string          // upload URL returned by GoogleStorage.StartUpload
    65  	Offset    int64           // offset in the file to upload to, mutated by Write
    66  	FileSize  int64           // total size of the file being uploaded, required
    67  
    68  	// requestMock used from tests to mock ctxhttp.Do that is hostile to mocked
    69  	// time.
    70  	requestMock func(*http.Request) (*http.Response, error)
    71  }
    72  
    73  // Write is part of io.Writer interface.
    74  func (u *Uploader) Write(p []byte) (n int, err error) {
    75  	if len(p) == 0 {
    76  		return 0, nil
    77  	}
    78  
    79  	bufStart := u.Offset
    80  	bufEnd := u.Offset + int64(len(p))
    81  	if bufEnd > u.FileSize {
    82  		return 0, fmt.Errorf("attempting to write past the declared file size (%d > %d)", bufEnd, u.FileSize)
    83  	}
    84  
    85  	for u.Offset != bufEnd && err == nil {
    86  		resuming := false
    87  		err = withRetry(u.Context, func() error {
    88  			// When resuming, we upload 0 bytes chunk to grab the last known offset.
    89  			// Otherwise, just upload the next chunk of data.
    90  			var chunk []byte
    91  			if !resuming {
    92  				chunk = p[int(u.Offset-bufStart):]
    93  			}
    94  			resumeOffset, err := u.uploadChunk(chunk)
    95  
    96  			// On transient errors, try to resume right away once.
    97  			if apiErr, _ := err.(*googleapi.Error); apiErr != nil && apiErr.Code >= 500 {
    98  				logging.WithError(err).Warningf(u.Context, "Transient error, querying for last uploaded offset")
    99  				resuming = true
   100  				resumeOffset, err = u.uploadChunk(nil)
   101  			}
   102  
   103  			switch {
   104  			case err != nil:
   105  				// Either a fatal error during the upload or a transient or fatal error
   106  				// trying to resume. Let 'withRetry' handle it by retrying or failing.
   107  				return err
   108  			case resumeOffset < bufStart || resumeOffset > bufEnd:
   109  				// Resuming requires data we don't have? Escalate to the caller.
   110  				return &RestartUploadError{Offset: resumeOffset}
   111  			default:
   112  				// Resume the upload from the last acknowledged offset.
   113  				u.Offset = resumeOffset
   114  				resuming = false
   115  				return nil
   116  			}
   117  		})
   118  	}
   119  
   120  	return int(u.Offset - bufStart), err
   121  }
   122  
   123  // uploadChunk pushes the given chunk to Google Storage at u.Offset offset.
   124  //
   125  // Returns an offset to continue the upload from (usually u.Offset + len(p), but
   126  // Google Storage docs are vague about that, so it may be different).
   127  //
   128  // If len(p) is 0, makes an empty PUT request. This is useful for querying
   129  // for the last uploaded offset to resume upload from.
   130  func (u *Uploader) uploadChunk(p []byte) (int64, error) {
   131  	ctx, cancel := clock.WithTimeout(u.Context, 30*time.Second)
   132  	defer cancel()
   133  
   134  	logging.Infof(ctx, "gs: UploadChunk(offset=%d, chunk_size=%d, length=%d)", u.Offset, len(p), u.FileSize)
   135  	req, err := http.NewRequest("PUT", u.UploadURL, bytes.NewBuffer(p))
   136  	if err != nil {
   137  		return 0, err
   138  	}
   139  	req.ContentLength = int64(len(p))
   140  	if len(p) > 0 {
   141  		req.Header.Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", u.Offset, u.Offset+int64(len(p))-1, u.FileSize))
   142  	} else {
   143  		req.Header.Set("Content-Range", fmt.Sprintf("bytes */%d", u.FileSize))
   144  	}
   145  
   146  	var resp *http.Response
   147  	if u.requestMock != nil {
   148  		resp, err = u.requestMock(req)
   149  	} else {
   150  		resp, err = ctxhttp.Do(ctx, u.Client, req)
   151  	}
   152  	if err != nil {
   153  		return 0, err
   154  	}
   155  	defer googleapi.CloseBody(resp)
   156  
   157  	// Google Storage return 308 (http.StatusPermanentRedirect) on partial upload.
   158  	// Since it is not really a redirect, we just use 308 below to avoid
   159  	// confusion.
   160  	switch {
   161  	case resp.StatusCode >= 200 && resp.StatusCode <= 299:
   162  		return u.FileSize, nil // finished uploading everything
   163  	case resp.StatusCode != 308:
   164  		// Note: we can't call CheckResponse earlier, since it treats 308 as
   165  		// an error.
   166  		if err := googleapi.CheckResponse(resp); err != nil {
   167  			return 0, err
   168  		}
   169  		panic(fmt.Sprintf("impossible state, status code %d", resp.StatusCode))
   170  	}
   171  
   172  	// Extract the last uploaded offset from Range header. No Range header means
   173  	// there are no uploaded data yet (and so we need to restart from 0). Be
   174  	// paranoid and check this happens only when we are really resuming. Any
   175  	// successful data upload MUST have Range response header.
   176  	hdr := resp.Header.Get("Range")
   177  	if hdr == "" {
   178  		if len(p) != 0 {
   179  			return 0, fmt.Errorf("no Range header in Google Storage response")
   180  		}
   181  		return 0, nil
   182  	}
   183  
   184  	var offset int64
   185  	if _, err = fmt.Sscanf(hdr, "bytes=0-%d", &offset); err != nil {
   186  		return 0, fmt.Errorf("unexpected Range header value: %q", hdr)
   187  	}
   188  
   189  	// 'offset' is an offset of the last uploaded byte, need to resume uploading
   190  	// from the next one.
   191  	return offset + 1, nil
   192  }