go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/sink/artifact_uploader.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sink 16 17 import ( 18 "bytes" 19 "context" 20 "crypto/sha256" 21 "encoding/hex" 22 "fmt" 23 "io" 24 "net/http" 25 "os" 26 27 "go.chromium.org/luci/common/errors" 28 "go.chromium.org/luci/common/retry" 29 "go.chromium.org/luci/common/retry/transient" 30 "go.chromium.org/luci/common/sync/dispatcher/buffer" 31 32 pb "go.chromium.org/luci/resultdb/proto/v1" 33 ) 34 35 // artifactUploader provides functions for uploading artifacts to ResultDB. 36 type artifactUploader struct { 37 // Recorder is a gRPC client used to upload artifacts in batches. 38 Recorder pb.RecorderClient 39 40 // StreamClient is an HTTP client used to upload artifacts that are too large 41 // to be batched. 42 StreamClient *http.Client 43 // StreamHost is the host of a ResultDB service instance, to which artifacts are streamed. 44 StreamHost string 45 46 // MaxBatchable is the maximum size of an artifact that can be batched. 47 MaxBatchable int64 48 } 49 50 // StreamUpload uploads the artifact in a streaming manner via HTTP.. 51 func (u *artifactUploader) StreamUpload(ctx context.Context, t *uploadTask, updateToken string) error { 52 var body io.ReadSeeker 53 var err error 54 55 if t.art.GetGcsUri() != "" { 56 return errors.Reason("StreamUpload does not support gcsUri upload").Err() 57 } 58 59 if fp := t.art.GetFilePath(); fp == "" { 60 body = bytes.NewReader(t.art.GetContents()) 61 } else { 62 fh, err := os.Open(t.art.GetFilePath()) 63 if err != nil { 64 return err 65 } 66 defer fh.Close() 67 body = fh 68 } 69 70 req, err := http.NewRequestWithContext( 71 ctx, "PUT", fmt.Sprintf("https://%s/%s", u.StreamHost, t.artName), body) 72 if err != nil { 73 return errors.Annotate(err, "newHTTPRequest").Err() 74 } 75 76 // Client.Do always closes the Body on exit, whether there was an error or not. 77 // It also closes the Body on 3xx responses, which requires re-sending the request. 78 // If req.GetBody != nil, it calls GetBody to get a new copy of the body, and resend 79 // the request on 3xx responses. 80 // 81 // http.NewRequestWithContext() returns an HTTP request with 82 // - custom GetBody and ContentLength set, if the given body is of type buffer.Reader, 83 // - nil GetBody and ContentLength unset, if the given body is os.File. 84 // 85 // Therefore, if the body is os.File, the caller is responsible for setting GetBody 86 // and ContentLength, as necessary. 87 if fh, ok := body.(*os.File); ok { 88 // Prevent the file handler from being closed by Client.Do. The file handler, body, 89 // will be closed by the defer function above. With NopCloser(), GetBody() can 90 // simply reset the cursor and return the handler w/o reopening the file. 91 req.Body = io.NopCloser(body) 92 req.GetBody = func() (io.ReadCloser, error) { 93 if _, err := body.Seek(0, io.SeekStart); err != nil { 94 return nil, err 95 } 96 return io.NopCloser(body), nil 97 } 98 99 st, err := fh.Stat() 100 if err != nil { 101 return err 102 } 103 req.ContentLength = st.Size() 104 } 105 106 // calculates the hash and rewind the position back to the beginning so that 107 // the request body can be re-read by HTTPClient.Do. 108 hash, err := calculateHash(body) 109 if err != nil { 110 return errors.Annotate(err, "artifact-hash").Err() 111 } 112 if _, err := body.Seek(0, io.SeekStart); err != nil { 113 return err 114 } 115 req.Header.Add("Content-Hash", hash) 116 if t.art.ContentType != "" { 117 req.Header.Add("Content-Type", t.art.ContentType) 118 } 119 req.Header.Add("Update-Token", updateToken) 120 return u.sendHTTP(req) 121 } 122 123 func (u *artifactUploader) sendHTTP(req *http.Request) error { 124 return retry.Retry(req.Context(), transient.Only(retry.Default), func() error { 125 resp, err := u.StreamClient.Do(req) 126 if err != nil { 127 return errors.Annotate(err, "failed to send HTTP request").Err() 128 } 129 130 code := resp.StatusCode 131 // ResultDB returns StatusNoContent on success. 132 if code == http.StatusNoContent { 133 return nil 134 } 135 136 // Tag the error as an transient error, if retriable. 137 hErr := errors.Reason("http request failed(%d): %s", resp.StatusCode, resp.Status) 138 if code == http.StatusRequestTimeout || code == http.StatusTooManyRequests || code >= 500 { 139 hErr = hErr.Tag(transient.Tag) 140 } 141 return hErr.Err() 142 }, nil) 143 } 144 145 func calculateHash(input io.Reader) (string, error) { 146 hash := sha256.New() 147 if _, err := io.Copy(hash, input); err != nil { 148 return "", err 149 } 150 return "sha256:" + hex.EncodeToString(hash.Sum(nil)), nil 151 } 152 153 // newBatchCreateArtifactsRequest returns a BatchCreateArtifactsRequest with 154 // at most 500 items with capping the sum of the artifact sizes by maxSum. 155 // 156 // Panics if tasks an item with an artifact larger than maxSum. 157 func newBatchCreateArtifactsRequest(maxSum int64, tasks []buffer.BatchItem) (*pb.BatchCreateArtifactsRequest, error) { 158 l := len(tasks) 159 if l > 500 { 160 l = 500 161 } 162 163 var sum int64 164 reqs := make([]*pb.CreateArtifactRequest, 0, l) 165 for i := 0; i < l; i++ { 166 ut := tasks[i].Item.(*uploadTask) 167 168 // artifactChannel.schedule() should have sent it to streamChannel. 169 if ut.size > maxSum { 170 return nil, errors.Reason("an artifact is greater than %d", maxSum).Err() 171 } 172 // if the sum is going to be too big, stop the iteration. 173 if sum+ut.size > maxSum { 174 break 175 } 176 177 r, err := ut.CreateRequest() 178 if err != nil { 179 return nil, errors.Annotate(err, "CreateRequest").Err() 180 } 181 reqs = append(reqs, r) 182 sum += ut.size 183 } 184 return &pb.BatchCreateArtifactsRequest{Requests: reqs}, nil 185 } 186 187 func (u *artifactUploader) BatchUpload(ctx context.Context, b *buffer.Batch) error { 188 var req *pb.BatchCreateArtifactsRequest 189 var err error 190 if b.Meta != nil { 191 req = b.Meta.(*pb.BatchCreateArtifactsRequest) 192 if _, err = u.Recorder.BatchCreateArtifacts(ctx, req); err != nil { 193 return err 194 } 195 } 196 197 // There are the following conditions this loop handles. 198 // 199 // 1) pb.BatchCreateArtifactsRequest can contain at most 500 artifacts. 200 // 2) The sum of the artifact content sizes must be <= u.MaxBatchable. 201 // 3) The size of input artifacts varies. 202 // 203 // It's possible that a buffer.Batch contains 500 of large artifact files, like 1MiB. 204 // To avoid loading the contents of all the artifacts unnecessarily, this loop slices 205 // b.Data by 500 or u.MaxBatchable, and creates a batch request only for the tasks, 206 // handled in the current iteration. 207 for len(b.Data) > 0 { 208 if req, err = newBatchCreateArtifactsRequest(u.MaxBatchable, b.Data); err != nil { 209 return errors.Annotate(err, "newBatchCreateArtifactRequest").Err() 210 } 211 212 b.Meta = req 213 if _, err := u.Recorder.BatchCreateArtifacts(ctx, req); err != nil { 214 return err 215 } 216 b.Data = b.Data[len(req.Requests):] 217 } 218 return nil 219 }