go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luciexe/host/buildmerge/build_state.go (about) 1 // Copyright 2019 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package buildmerge 16 17 import ( 18 "bytes" 19 "compress/zlib" 20 "context" 21 "io" 22 "sync" 23 24 "google.golang.org/protobuf/proto" 25 26 bbpb "go.chromium.org/luci/buildbucket/proto" 27 "go.chromium.org/luci/common/errors" 28 "go.chromium.org/luci/common/sync/dispatcher" 29 "go.chromium.org/luci/common/sync/dispatcher/buffer" 30 "go.chromium.org/luci/logdog/api/logpb" 31 "go.chromium.org/luci/logdog/common/types" 32 ) 33 34 // buildState represents the current state of a single build.proto stream. 35 type buildState struct { 36 // build holds the most recently processed Build state. This message should be 37 // treated as immutable (i.e. proto.Clone before modifying it). 38 // 39 // This may be `nil` until the first user-supplied build.proto is processed, 40 // or until the buildStateTracker closes. 41 build *bbpb.Build 42 43 // buildReadOnly holds the most recently processed Build state to read. 44 // This message should be treated as immutable (i.e. proto.Clone before modifying it). 45 buildReadOnly *bbpb.Build 46 47 // closed is set to true when the build state is terminated and will receive 48 // no more user updates (but may still need to be finalized()). 49 closed bool 50 51 // final is set to true when the build state is closed and all final 52 // processing has occurred on the build state. 53 final bool 54 55 // invalid is set to true when the interior structure (i.e. Steps) of latest 56 // contains invalid data and shouldn't be inspected. 57 invalid bool 58 } 59 60 // buildStateTracker manages the state of a single build.proto datagram stream. 61 type buildStateTracker struct { 62 ctx context.Context 63 64 // The Agent that this buildStateTracker belongs to. Used to access: 65 // * clockNow 66 // * calculateURLs 67 // * informNewData 68 merger *Agent 69 70 ldNamespace types.StreamName 71 72 // True iff we should expect zlib-compressed datagrams. 73 zlib bool 74 75 // We use this mutex to synchronize closure and sending operations on the work 76 // channel; `work` is configured, if it's running, to immediately accept any 77 // items pushed to it, so it's safe to hold this while sending on work.C. 78 workMu sync.Mutex 79 80 // The work channel is configured to only keep the latest incoming datagram. 81 // It's send function parses and interprets the Build message. 82 // Errors are not reported to the dispatcher.Channel, but are instead recorded 83 // in the parsed Build state. 84 work dispatcher.Channel 85 workClosed bool // true if we've closed work.C, protected by workMu 86 87 latestStateMu sync.Mutex 88 latestState *buildState 89 } 90 91 // updateState updates `state` with the Build.proto message inside the lock. 92 // 93 // If there's an error when generating the new build - i.e. when parsing `data` 94 // or an error in the decoded message's contents, `state.invalid` and 95 // `state.closed` will be set to true, and `state.build` will be updated with 96 // the error message. 97 func (t *buildStateTracker) updateState(newBuild *bbpb.Build, err error) { 98 t.latestStateMu.Lock() 99 defer t.latestStateMu.Unlock() 100 state := *t.latestState 101 oldBuild := state.build 102 103 if state.closed { 104 return 105 } 106 107 if err != nil { 108 if newBuild == nil { 109 if oldBuild == nil { 110 newBuild = &bbpb.Build{} 111 } else { 112 newBuild = oldBuild 113 } 114 } 115 setErrorOnBuild(newBuild, err) 116 newBuild.UpdateTime = t.merger.clockNow() 117 state.closed = true 118 state.invalid = true 119 } 120 121 state.build = newBuild 122 // Reset buildReadOnly since we have a new build state now. 123 state.buildReadOnly = nil 124 125 if state.closed { 126 t.Close() 127 } 128 129 t.latestState = &state 130 } 131 132 // parseBuild parses `data` then returns the parsed Build. 133 func (t *buildStateTracker) parseBuild(data []byte) (*bbpb.Build, error) { 134 if t.zlib { 135 z, err := zlib.NewReader(bytes.NewBuffer(data)) 136 if err != nil { 137 return nil, errors.Annotate(err, "constructing decompressor for Build").Err() 138 } 139 data, err = io.ReadAll(z) 140 if err != nil { 141 return nil, errors.Annotate(err, "decompressing Build").Err() 142 } 143 } 144 145 parsedBuild := &bbpb.Build{} 146 if err := proto.Unmarshal(data, parsedBuild); err != nil { 147 return nil, errors.Annotate(err, "parsing Build").Err() 148 } 149 150 for _, step := range parsedBuild.Steps { 151 if len(step.Logs) > 0 && step.Logs[0].Name == "$build.proto" { 152 // convert incoming $build.proto logs to MergeBuild messages. 153 // If the step has both, then just discard the $build.proto log. 154 // 155 // TODO(crbug.com/1310155): Remove this conversion after everything 156 // emits MergeBuild messages natively. 157 if step.MergeBuild == nil { 158 step.MergeBuild = &bbpb.Step_MergeBuild{ 159 FromLogdogStream: step.Logs[0].Url, 160 } 161 } 162 step.Logs = step.Logs[1:] 163 } 164 for _, log := range step.Logs { 165 var err error 166 log.Url, log.ViewUrl, err = absolutizeURLs(log.Url, log.ViewUrl, t.ldNamespace, t.merger.calculateURLs) 167 if err != nil { 168 step.Status = bbpb.Status_INFRA_FAILURE 169 step.SummaryMarkdown += err.Error() 170 return parsedBuild, errors.Annotate(err, "step[%q].logs[%q]", step.Name, log.Name).Err() 171 } 172 } 173 if mb := step.GetMergeBuild(); mb != nil && mb.FromLogdogStream != "" { 174 var err error 175 mb.FromLogdogStream, _, err = absolutizeURLs(mb.FromLogdogStream, "", t.ldNamespace, t.merger.calculateURLs) 176 if err != nil { 177 step.Status = bbpb.Status_INFRA_FAILURE 178 step.SummaryMarkdown += err.Error() 179 return parsedBuild, errors.Annotate(err, "step[%q].merge_build.from_logdog_stream", step.Name).Err() 180 } 181 } 182 } 183 for _, log := range parsedBuild.GetOutput().GetLogs() { 184 var err error 185 log.Url, log.ViewUrl, err = absolutizeURLs(log.Url, log.ViewUrl, t.ldNamespace, t.merger.calculateURLs) 186 if err != nil { 187 return parsedBuild, errors.Annotate(err, "build.output.logs[%q]", log.Name).Err() 188 } 189 } 190 parsedBuild.UpdateTime = t.merger.clockNow() 191 return parsedBuild, nil 192 } 193 194 // newBuildStateTracker produces a new buildStateTracker in the given logdog 195 // namespace. 196 // 197 // `ctx` is used for cancellation/logging. 198 // 199 // `merger` is the Agent that this buildStateTracker belongs to. See the comment 200 // in buildStateTracker for its use of this. 201 // 202 // `namespace` is the logdog namespace under which this build.proto is being 203 // streamed from. e.g. if the updates to handleNewData are coming from a logdog 204 // stream "a/b/c/build.proto", then `namespace` here should be "a/b/c". This is 205 // used verbatim as the namespace argument to merger.calculateURLs. 206 // 207 // if `err` is provided, the buildStateTracker tracker is created in an errored 208 // (closed) state where getLatest always returns a fixed Build in the 209 // INFRA_FAILURE state with `err` reflected in the build's SummaryMarkdown 210 // field. 211 func newBuildStateTracker(ctx context.Context, merger *Agent, namespace types.StreamName, zlib bool, err error) *buildStateTracker { 212 ret := &buildStateTracker{ 213 ctx: ctx, 214 merger: merger, 215 zlib: zlib, 216 ldNamespace: namespace.AsNamespace(), 217 latestState: &buildState{}, 218 } 219 220 if err != nil { 221 ret.latestState.build = &bbpb.Build{} 222 setErrorOnBuild(ret.latestState.build, err) 223 ret.finalize() 224 ret.Close() 225 } else { 226 ret.work, err = dispatcher.NewChannel(ctx, &dispatcher.Options{ 227 Buffer: buffer.Options{ 228 MaxLeases: 1, 229 BatchItemsMax: 1, 230 FullBehavior: &buffer.DropOldestBatch{}, 231 }, 232 DropFn: dispatcher.DropFnQuiet, 233 DrainedFn: ret.finalize, 234 }, ret.parseAndSend) 235 if err != nil { 236 panic(err) // creating dispatcher with static config should never fail 237 } 238 // Attach the cancelation of the context to the closure of work.C. 239 go func() { 240 select { 241 case <-ctx.Done(): 242 ret.Close() 243 case <-ret.work.DrainC: 244 // already shut down w/o cancelation 245 } 246 }() 247 } 248 249 return ret 250 } 251 252 // finalized is called exactly once when either: 253 // 254 // - newBuildStateTracker is called with err != nil 255 // - buildStateTracker.work is fully shut down (this is installed as 256 // dispatcher.Options.DrainedFn) 257 func (t *buildStateTracker) finalize() { 258 t.latestStateMu.Lock() 259 defer t.latestStateMu.Unlock() 260 261 state := *t.latestState 262 if state.final { 263 panic("impossible; finalize called twice?") 264 } 265 266 state.closed = true 267 state.final = true 268 if state.build == nil { 269 state.build = &bbpb.Build{ 270 SummaryMarkdown: "Never received any build data.", 271 Status: bbpb.Status_INFRA_FAILURE, 272 Output: &bbpb.Build_Output{ 273 Status: bbpb.Status_INFRA_FAILURE, 274 SummaryMarkdown: "Never received any build data.", 275 }, 276 } 277 } 278 processFinalBuild(t.merger.clockNow(), state.build) 279 state.buildReadOnly = nil 280 t.latestState = &state 281 t.merger.informNewData() 282 } 283 284 func (t *buildStateTracker) parseAndSend(data *buffer.Batch) error { 285 t.latestStateMu.Lock() 286 state := *t.latestState 287 t.latestStateMu.Unlock() 288 289 // already closed 290 if state.closed { 291 return nil 292 } 293 294 newBuild, err := t.parseBuild(data.Data[0].Item.([]byte)) 295 // may set state.closed on an error 296 t.updateState(newBuild, err) 297 298 t.merger.informNewData() 299 return nil 300 } 301 302 // getLatestBuild returns the Build in the current state. 303 // 304 // It returns the internal read-only copy of the build to avoid the read/write race. 305 func (t *buildStateTracker) getLatestBuild() *bbpb.Build { 306 t.latestStateMu.Lock() 307 defer t.latestStateMu.Unlock() 308 309 // Lazily clone the build to its read-only copy when needed. 310 if t.latestState.buildReadOnly == nil { 311 t.latestState.buildReadOnly = proto.Clone(t.latestState.build).(*bbpb.Build) 312 } 313 return t.latestState.buildReadOnly 314 } 315 316 // This implements the bundler.StreamChunkCallback callback function. 317 // 318 // Each call to `handleNewData` expects `entry` to have a complete (non-Partial) 319 // datagram containing a single Build message. The message will (eventually) be 320 // parsed and fixed up (e.g. fixing Log Url/ViewUrl), and become this 321 // buildStateTracker's new state. 322 // 323 // This method does not block; Data here is submitted to the buildStateTracker's 324 // internal worker, which processes state updates as quickly as it can, skipping 325 // state updates which are submitted too rapidly. 326 // 327 // This method has no effect if the buildStateTracker is 'closed'. 328 // 329 // When this is called with `nil` as an argument (when the attached logdog 330 // stream is closed), it will start the closure process on this 331 // buildStateTracker. The final build state can be obtained synchronously by 332 // calling GetFinal(). 333 func (t *buildStateTracker) handleNewData(entry *logpb.LogEntry) { 334 t.workMu.Lock() 335 defer t.workMu.Unlock() 336 337 if entry == nil { 338 t.closeWorkLocked() 339 } else if !t.workClosed { 340 select { 341 case t.work.C <- entry.GetDatagram().Data: 342 case <-t.ctx.Done(): 343 t.closeWorkLocked() 344 } 345 } 346 } 347 348 func (t *buildStateTracker) closeWorkLocked() { 349 if !t.workClosed { 350 if t.work.C != nil { 351 close(t.work.C) 352 } 353 t.workClosed = true 354 } 355 } 356 357 func (t *buildStateTracker) Close() { 358 t.workMu.Lock() 359 defer t.workMu.Unlock() 360 t.closeWorkLocked() 361 } 362 363 // Drain waits for the build state to finalize. 364 func (t *buildStateTracker) Drain() { 365 if t.work.DrainC != nil { 366 <-t.work.DrainC 367 } 368 }