go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/sink/artifact_channel.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sink 16 17 import ( 18 "context" 19 "fmt" 20 "os" 21 "strconv" 22 "sync" 23 "sync/atomic" 24 25 "go.chromium.org/luci/common/errors" 26 "go.chromium.org/luci/common/sync/dispatcher" 27 "go.chromium.org/luci/common/sync/dispatcher/buffer" 28 29 "go.chromium.org/luci/resultdb/pbutil" 30 pb "go.chromium.org/luci/resultdb/proto/v1" 31 sinkpb "go.chromium.org/luci/resultdb/sink/proto/v1" 32 ) 33 34 type uploadTask struct { 35 art *sinkpb.Artifact 36 artName string 37 size int64 // content size 38 } 39 40 // newUploadTask constructs an uploadTask for the artifact. 41 // 42 // If FilePath is set on the artifact, this calls os.Stat to obtain the file information, 43 // and may return an error if the Stat call fails. e.g., permission denied, not found. 44 // It also returns an error if the artifact file path is a directory. 45 func newUploadTask(name string, art *sinkpb.Artifact) (*uploadTask, error) { 46 ret := &uploadTask{ 47 art: art, 48 artName: name, 49 size: int64(len(art.GetContents())), 50 } 51 52 // Find and save the content size on uploadTask creation, so that the task scheduling 53 // and processing logic can use the size information w/o issuing system calls. 54 if fp := art.GetFilePath(); fp != "" { 55 st, err := os.Stat(fp) 56 switch { 57 case err != nil: 58 return nil, errors.Annotate(err, "querying file info").Err() 59 case st.Mode().IsRegular(): 60 // break 61 62 // Return a more human friendly error than 1000....0. 63 case st.IsDir(): 64 return nil, errors.Reason("%q is a directory", fp).Err() 65 default: 66 return nil, errors.Reason("%q is not a regular file: %s", fp, strconv.FormatInt(int64(st.Mode()), 2)).Err() 67 } 68 ret.size = st.Size() 69 } 70 return ret, nil 71 } 72 73 // CreateRequest returns a CreateArtifactRequest for the upload task. 74 // 75 // Note that this will open and read content from the file, the artifact is set with 76 // Artifact_FilePath. Save the returned request to avoid unnecessary I/Os, 77 // if necessary. 78 func (t *uploadTask) CreateRequest() (*pb.CreateArtifactRequest, error) { 79 invID, tID, rID, aID, err := pbutil.ParseArtifactName(t.artName) 80 req := &pb.CreateArtifactRequest{ 81 Artifact: &pb.Artifact{ 82 ArtifactId: aID, 83 ContentType: t.art.GetContentType(), 84 SizeBytes: t.size, 85 Contents: t.art.GetContents(), 86 GcsUri: t.art.GetGcsUri(), 87 }, 88 } 89 90 // parent 91 switch { 92 case err != nil: 93 // This should not happen. 94 // uploadTask should be created with validated artifacts only. 95 panic(fmt.Sprintf("invalid uploadTask.artName %q: %s", t.artName, err)) 96 case tID == "": 97 // Invocation-level artifact 98 req.Parent = pbutil.InvocationName(invID) 99 default: 100 req.Parent = pbutil.TestResultName(invID, tID, rID) 101 } 102 103 // contents 104 if fp := t.art.GetFilePath(); fp != "" { 105 if req.Artifact.Contents, err = os.ReadFile(fp); err != nil { 106 return nil, err 107 } 108 } 109 110 // Perform size check only for non gcs artifact. 111 if req.Artifact.GcsUri == "" { 112 // If the size of the read content is different to what stat claimed initially, then 113 // return an error, so that the batching logic can be kept simple. Test frameworks 114 // should send finalized artifacts only. 115 if int64(len(req.Artifact.Contents)) != t.size { 116 return nil, errors.Reason( 117 "the size of the artifact contents changed from %d to %d", 118 t.size, len(req.Artifact.Contents)).Err() 119 } 120 } 121 122 return req, nil 123 } 124 125 type artifactChannel struct { 126 // batchChannel uploads artifacts via pb.BatchCreateArtifacts(). 127 // 128 // This batches input artifacts and uploads them all at once. 129 // This is suitable for uploading a large number of small artifacts. 130 // 131 // The downside of this channel is that there is a limit on the maximum size of 132 // an artifact that can be included in a batch. Use streamChannel for artifacts 133 // greater than ServerConfig.MaxBatchableArtifactSize. 134 batchChannel dispatcher.Channel 135 136 // streamChannel uploads artifacts in a streaming manner via HTTP. 137 // 138 // This is suitable for uploading large files, but with limited parallelism. 139 // Use batchChannel, if possible. 140 streamChannel dispatcher.Channel 141 142 // wgActive indicates if there are active goroutines invoking reportTestResults. 143 // 144 // reportTestResults can be invoked by multiple goroutines in parallel. wgActive is used 145 // to ensure that all active goroutines finish enqueuing messages to the channel before 146 // closeAndDrain closes and drains the channel. 147 wgActive sync.WaitGroup 148 149 // 1 indicates that artifactChannel started the process of closing and draining 150 // the channel. 0, otherwise. 151 closed int32 152 153 cfg *ServerConfig 154 } 155 156 func newArtifactChannel(ctx context.Context, cfg *ServerConfig) *artifactChannel { 157 var err error 158 c := &artifactChannel{cfg: cfg} 159 au := artifactUploader{ 160 MaxBatchable: cfg.MaxBatchableArtifactSize, 161 Recorder: cfg.Recorder, 162 StreamClient: cfg.ArtifactStreamClient, 163 StreamHost: cfg.ArtifactStreamHost, 164 } 165 166 // batchChannel 167 bcOpts := &dispatcher.Options{ 168 Buffer: buffer.Options{ 169 // BatchCreateArtifactRequest can include up to 500 requests and at most 10MiB 170 // of artifact contents. uploadTaskSlicer slices tasks, as the number of size 171 // limits apply. 172 // 173 // It's recommended to keep BatchItemsMax >= 500 to increase the chance of 174 // BatchCreateArtifactRequest to contain 500 artifacts. 175 // 176 // Depending on the estimated pattern of artifact size distribution, consider 177 // to tune ServerConfig.MaxBatchableArtifactSize and BatchDuration to find 178 // the optimal point between artifact upload latency and throughput. 179 // 180 // For more details, visit 181 // https://godoc.org/go.chromium.org/luci/resultdb/proto/v1#BatchCreateArtifactsRequest 182 BatchItemsMax: 500, 183 MaxLeases: int(cfg.ArtChannelMaxLeases), 184 FullBehavior: &buffer.BlockNewItems{MaxItems: 8000}, 185 }, 186 } 187 c.batchChannel, err = dispatcher.NewChannel(ctx, bcOpts, func(b *buffer.Batch) error { 188 return errors.Annotate(au.BatchUpload(ctx, b), "BatchUpload").Err() 189 }) 190 if err != nil { 191 panic(fmt.Sprintf("failed to create batch channel for artifacts: %s", err)) 192 } 193 194 // streamChannel 195 stOpts := &dispatcher.Options{ 196 Buffer: buffer.Options{ 197 // BatchItemsMax MUST be 1. 198 BatchItemsMax: 1, 199 MaxLeases: int(cfg.ArtChannelMaxLeases), 200 FullBehavior: &buffer.BlockNewItems{MaxItems: 4000}, 201 }, 202 } 203 c.streamChannel, err = dispatcher.NewChannel(ctx, stOpts, func(b *buffer.Batch) error { 204 return errors.Annotate( 205 au.StreamUpload(ctx, b.Data[0].Item.(*uploadTask), cfg.UpdateToken), 206 "StreamUpload").Err() 207 }) 208 if err != nil { 209 panic(fmt.Sprintf("failed to create stream channel for artifacts: %s", err)) 210 } 211 return c 212 } 213 214 func (c *artifactChannel) closeAndDrain(ctx context.Context) { 215 // mark the channel as closed, so that schedule() won't accept new tasks. 216 if !atomic.CompareAndSwapInt32(&c.closed, 0, 1) { 217 return 218 } 219 // wait for all the active sessions to finish enquing tests results to the channel 220 c.wgActive.Wait() 221 222 var draining sync.WaitGroup 223 draining.Add(2) 224 go func() { 225 defer draining.Done() 226 c.batchChannel.CloseAndDrain(ctx) 227 }() 228 go func() { 229 defer draining.Done() 230 c.streamChannel.CloseAndDrain(ctx) 231 }() 232 draining.Wait() 233 } 234 235 func (c *artifactChannel) schedule(tasks ...*uploadTask) { 236 c.wgActive.Add(1) 237 defer c.wgActive.Done() 238 // if the channel already has been closed, drop the test results. 239 if atomic.LoadInt32(&c.closed) == 1 { 240 return 241 } 242 243 for _, task := range tasks { 244 if task.size > c.cfg.MaxBatchableArtifactSize { 245 c.streamChannel.C <- task 246 } else { 247 c.batchChannel.C <- task 248 } 249 } 250 }