go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/gs/uploader.go (about) 1 // Copyright 2018 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gs 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "net/http" 22 "time" 23 24 "golang.org/x/net/context/ctxhttp" 25 "google.golang.org/api/googleapi" 26 27 "go.chromium.org/luci/common/clock" 28 "go.chromium.org/luci/common/logging" 29 ) 30 31 // TODO(vadimsh): Use this code from the client too. 32 33 // RestartUploadError is returned by Uploader when it resumes an interrupted 34 // upload, and Google Storage asks to upload from an offset the Uploader has no 35 // data for. 36 // 37 // Callers of Uploader should handle this case themselves by restarting the 38 // upload from the requested offset. 39 // 40 // See https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload#resume-upload 41 type RestartUploadError struct { 42 Offset int64 43 } 44 45 // Error is part of error interface. 46 func (e *RestartUploadError) Error() string { 47 return fmt.Sprintf("the upload should be restarted from offset %d", e.Offset) 48 } 49 50 // Uploader implements io.Writer for Google Storage Resumable Upload sessions. 51 // 52 // Does no buffering inside, thus efficiency of uploads directly depends on 53 // granularity of Write(...) calls. Additionally, Google Storage expects the 54 // length of each uploaded chunk to be a multiple of 256 Kb, so callers of 55 // Write(...) should supply the appropriately-sized chunks. 56 // 57 // Retries transient errors internally, but it can potentially end up in a 58 // situation where it needs data not available in the current Write(...) 59 // operation. In this case Write returns *RestartUploadError error, which 60 // indicates an offset the upload should be restarted from. 61 type Uploader struct { 62 Context context.Context // the context for canceling retries and for logging 63 Client *http.Client // the client to use for sending anonymous requests 64 UploadURL string // upload URL returned by GoogleStorage.StartUpload 65 Offset int64 // offset in the file to upload to, mutated by Write 66 FileSize int64 // total size of the file being uploaded, required 67 68 // requestMock used from tests to mock ctxhttp.Do that is hostile to mocked 69 // time. 70 requestMock func(*http.Request) (*http.Response, error) 71 } 72 73 // Write is part of io.Writer interface. 74 func (u *Uploader) Write(p []byte) (n int, err error) { 75 if len(p) == 0 { 76 return 0, nil 77 } 78 79 bufStart := u.Offset 80 bufEnd := u.Offset + int64(len(p)) 81 if bufEnd > u.FileSize { 82 return 0, fmt.Errorf("attempting to write past the declared file size (%d > %d)", bufEnd, u.FileSize) 83 } 84 85 for u.Offset != bufEnd && err == nil { 86 resuming := false 87 err = withRetry(u.Context, func() error { 88 // When resuming, we upload 0 bytes chunk to grab the last known offset. 89 // Otherwise, just upload the next chunk of data. 90 var chunk []byte 91 if !resuming { 92 chunk = p[int(u.Offset-bufStart):] 93 } 94 resumeOffset, err := u.uploadChunk(chunk) 95 96 // On transient errors, try to resume right away once. 97 if apiErr, _ := err.(*googleapi.Error); apiErr != nil && apiErr.Code >= 500 { 98 logging.WithError(err).Warningf(u.Context, "Transient error, querying for last uploaded offset") 99 resuming = true 100 resumeOffset, err = u.uploadChunk(nil) 101 } 102 103 switch { 104 case err != nil: 105 // Either a fatal error during the upload or a transient or fatal error 106 // trying to resume. Let 'withRetry' handle it by retrying or failing. 107 return err 108 case resumeOffset < bufStart || resumeOffset > bufEnd: 109 // Resuming requires data we don't have? Escalate to the caller. 110 return &RestartUploadError{Offset: resumeOffset} 111 default: 112 // Resume the upload from the last acknowledged offset. 113 u.Offset = resumeOffset 114 resuming = false 115 return nil 116 } 117 }) 118 } 119 120 return int(u.Offset - bufStart), err 121 } 122 123 // uploadChunk pushes the given chunk to Google Storage at u.Offset offset. 124 // 125 // Returns an offset to continue the upload from (usually u.Offset + len(p), but 126 // Google Storage docs are vague about that, so it may be different). 127 // 128 // If len(p) is 0, makes an empty PUT request. This is useful for querying 129 // for the last uploaded offset to resume upload from. 130 func (u *Uploader) uploadChunk(p []byte) (int64, error) { 131 ctx, cancel := clock.WithTimeout(u.Context, 30*time.Second) 132 defer cancel() 133 134 logging.Infof(ctx, "gs: UploadChunk(offset=%d, chunk_size=%d, length=%d)", u.Offset, len(p), u.FileSize) 135 req, err := http.NewRequest("PUT", u.UploadURL, bytes.NewBuffer(p)) 136 if err != nil { 137 return 0, err 138 } 139 req.ContentLength = int64(len(p)) 140 if len(p) > 0 { 141 req.Header.Set("Content-Range", fmt.Sprintf("bytes %d-%d/%d", u.Offset, u.Offset+int64(len(p))-1, u.FileSize)) 142 } else { 143 req.Header.Set("Content-Range", fmt.Sprintf("bytes */%d", u.FileSize)) 144 } 145 146 var resp *http.Response 147 if u.requestMock != nil { 148 resp, err = u.requestMock(req) 149 } else { 150 resp, err = ctxhttp.Do(ctx, u.Client, req) 151 } 152 if err != nil { 153 return 0, err 154 } 155 defer googleapi.CloseBody(resp) 156 157 // Google Storage return 308 (http.StatusPermanentRedirect) on partial upload. 158 // Since it is not really a redirect, we just use 308 below to avoid 159 // confusion. 160 switch { 161 case resp.StatusCode >= 200 && resp.StatusCode <= 299: 162 return u.FileSize, nil // finished uploading everything 163 case resp.StatusCode != 308: 164 // Note: we can't call CheckResponse earlier, since it treats 308 as 165 // an error. 166 if err := googleapi.CheckResponse(resp); err != nil { 167 return 0, err 168 } 169 panic(fmt.Sprintf("impossible state, status code %d", resp.StatusCode)) 170 } 171 172 // Extract the last uploaded offset from Range header. No Range header means 173 // there are no uploaded data yet (and so we need to restart from 0). Be 174 // paranoid and check this happens only when we are really resuming. Any 175 // successful data upload MUST have Range response header. 176 hdr := resp.Header.Get("Range") 177 if hdr == "" { 178 if len(p) != 0 { 179 return 0, fmt.Errorf("no Range header in Google Storage response") 180 } 181 return 0, nil 182 } 183 184 var offset int64 185 if _, err = fmt.Sscanf(hdr, "bytes=0-%d", &offset); err != nil { 186 return 0, fmt.Errorf("unexpected Range header value: %q", hdr) 187 } 188 189 // 'offset' is an offset of the last uploaded byte, need to resume uploading 190 // from the next one. 191 return offset + 1, nil 192 }