go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luciexe/host/buildmerge/agent.go (about) 1 // Copyright 2019 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package buildmerge implements the build.proto tracking and merging logic for 16 // luciexe host applications. 17 // 18 // You probably want to use `go.chromium.org/luci/luciexe/host` instead. 19 // 20 // This package is separate from luciexe/host to avoid unnecessary entaglement 21 // with butler/logdog; All the logic here is implemented to avoid: 22 // 23 // - interacting with the environment 24 // - interacting with butler/logdog (except by implementing callbacks for 25 // those, but only acting on simple datastructures/proto messages) 26 // - handling errors in any 'brutal' ways (all errors in this package are 27 // handled by reporting them directly in the data structures that this 28 // package manipulates). 29 // 30 // This is done to simplify testing (as much as it can be) by concentrating all 31 // the environment stuff into luciexe/host, and all the 'pure' functional stuff 32 // here (search "imperative shell, functional core"). 33 package buildmerge 34 35 import ( 36 "context" 37 "fmt" 38 "strings" 39 "sync" 40 "sync/atomic" 41 42 "github.com/golang/protobuf/ptypes" 43 "google.golang.org/protobuf/proto" 44 "google.golang.org/protobuf/types/known/timestamppb" 45 46 bbpb "go.chromium.org/luci/buildbucket/proto" 47 "go.chromium.org/luci/common/clock" 48 "go.chromium.org/luci/common/data/stringset" 49 "go.chromium.org/luci/common/errors" 50 "go.chromium.org/luci/common/proto/reflectutil" 51 "go.chromium.org/luci/common/sync/dispatcher" 52 "go.chromium.org/luci/common/sync/dispatcher/buffer" 53 "go.chromium.org/luci/logdog/api/logpb" 54 "go.chromium.org/luci/logdog/client/butler" 55 "go.chromium.org/luci/logdog/common/types" 56 "go.chromium.org/luci/luciexe" 57 ) 58 59 // CalcURLFn is a stateless function which can calculate the absolute url and 60 // viewUrl from a given logdog namespace (with trailing slash) and streamName. 61 type CalcURLFn func(namespaceSlash, streamName types.StreamName) (url, viewUrl string) 62 63 // Agent holds all the logic around merging build.proto streams. 64 type Agent struct { 65 // MergedBuildC is the channel of all the merged builds generated by this 66 // Agent. 67 // 68 // The rate at which Agent merges Builds is governed by the consumption of 69 // this channel; Consuming it slowly will have Agent merge less frequently, 70 // and consuming it rapidly will have Agent merge more frequently. 71 // 72 // The last build before the channel closes will always be the final state of 73 // all builds at the time this Agent was Close()'d. 74 MergedBuildC <-chan *bbpb.Build 75 76 // Wait on this channel for the Agent to drain. Will only drain after calling 77 // Close() at least once. 78 DrainC <-chan struct{} 79 80 // used to cancel in-progress sendMerge calls. 81 ctx context.Context 82 83 // mergedBuildC is the send side of MergedBuildC 84 mergedBuildC chan<- *bbpb.Build 85 86 // userNamespace is the logdog namespace (with a trailing slash) which we'll 87 // use to determine if a new stream is potentially monitored, or not. 88 userNamespace types.StreamName 89 90 // userRootURL is the full url ('logdog://.../stream/build.proto') of the 91 // user's "root" build.proto stream (i.e. the one emitted by the top level 92 // luciexe implementation. 93 // 94 // This is used as a key to start the merge process. 95 userRootURL string 96 baseBuild *bbpb.Build 97 98 // statesMu covers `states`. It must be held when reading or writing to 99 // `states`, but doesn't need to be held while interacting with an individual 100 // *buildState obtained from the map. 101 statesMu sync.RWMutex 102 103 // states maps a stream URL (i.e. `logdog://.../stream/build.proto`) to the 104 // state tracker for that stream. 105 states map[string]*buildStateTracker 106 107 // mergeCh is used in production mode to send pings via informNewData 108 mergeCh dispatcher.Channel 109 110 // informNewData is used to 'ping' mergeCh; it's overwritten in tests. 111 informNewData func() 112 113 // done is an atomically-accessed boolean 114 done int32 115 116 // calculateURLs is a function which can convert a logdog namespace and 117 // streamname into both the full 'Url' and 'ViewUrl' values for a Log message. 118 // This is used by the buildMerger itself when deriving keys for the `states` 119 // map, as well as for individual buildState objects to adjust their build's 120 // logs' URLs. 121 calculateURLs CalcURLFn 122 } 123 124 // New returns a new Agent. 125 // 126 // Args: 127 // - ctx - used for logging, clock and cancelation. When canceled, the Agent 128 // will cease sending updates on MergedBuildC, but you must still invoke 129 // Agent.Close() in order to clean up all resources associated with the 130 // Agent. 131 // - userNamespace - The logdog namespace (with a trailing slash) under which 132 // we should monitor streams. 133 // - base - The "model" Build message that all generated builds should start 134 // with. All build proto streams will be merged onto a copy of this message. 135 // Any Output.Log's which have non-absolute URLs will have their Url and 136 // ViewUrl absolutized relative to userNamespace using calculateURLs. 137 // - calculateURLs - A function to calculate Log.Url and Log.ViewUrl values. 138 // Should be a pure function. 139 // 140 // The following fields will be merged into `base` from the user controlled 141 // build.proto stream(s): 142 // 143 // Steps 144 // SummaryMarkdown 145 // Status 146 // StatusDetails 147 // UpdateTime 148 // Tags 149 // EndTime 150 // Output 151 // 152 // The frequency of updates from this Agent is governed by how quickly the 153 // caller consumes from Agent.MergedBuildC. 154 func New(ctx context.Context, userNamespace types.StreamName, base *bbpb.Build, calculateURLs CalcURLFn) (*Agent, error) { 155 userNamespace = userNamespace.AsNamespace() 156 157 ch := make(chan *bbpb.Build) 158 userRootURL, _ := calculateURLs(userNamespace, luciexe.BuildProtoStreamSuffix) 159 160 ret := &Agent{ 161 ctx: ctx, 162 163 MergedBuildC: ch, 164 165 mergedBuildC: ch, 166 states: map[string]*buildStateTracker{}, 167 calculateURLs: calculateURLs, 168 userNamespace: userNamespace, 169 userRootURL: userRootURL, 170 baseBuild: proto.Clone(base).(*bbpb.Build), 171 } 172 for _, log := range ret.baseBuild.GetOutput().GetLogs() { 173 var err error 174 log.Url, log.ViewUrl, err = absolutizeURLs(log.Url, log.ViewUrl, userNamespace, calculateURLs) 175 if err != nil { 176 return nil, errors.Annotate(err, "build.output.logs[%q]", log.Name).Err() 177 } 178 } 179 180 var err error 181 ret.mergeCh, err = dispatcher.NewChannel(ctx, &dispatcher.Options{ 182 Buffer: buffer.Options{ 183 MaxLeases: 1, 184 BatchItemsMax: 1, 185 FullBehavior: &buffer.DropOldestBatch{}, 186 }, 187 DropFn: dispatcher.DropFnQuiet, 188 DrainedFn: ret.finalize, 189 }, ret.sendMerge) 190 if err != nil { 191 return nil, err // creating dispatcher with static config should never fail 192 } 193 ret.informNewData = func() { 194 ret.mergeCh.C <- nil // content doesn't matter 195 } 196 ret.DrainC = ret.mergeCh.DrainC 197 198 return ret, nil 199 } 200 201 // Attach should be called once to attach this to a Butler. 202 // 203 // This must be done before the butler receives any build.proto streams. 204 func (a *Agent) Attach(b *butler.Butler) { 205 b.AddStreamRegistrationCallback(a.onNewStream, true) 206 } 207 208 var validContentTypes = stringset.NewFromSlice( 209 luciexe.BuildProtoContentType, 210 luciexe.BuildProtoZlibContentType, 211 ) 212 213 func (a *Agent) onNewStream(desc *logpb.LogStreamDescriptor) butler.StreamChunkCallback { 214 if !a.collectingData() { 215 return nil 216 } 217 218 namespace, base := types.StreamName(desc.Name).Split() 219 220 var err error 221 zlib := false 222 switch validStreamT, validContentT := desc.StreamType == logpb.StreamType_DATAGRAM, validContentTypes.Has(desc.ContentType); { 223 case validStreamT && validContentT: 224 zlib = desc.ContentType == luciexe.BuildProtoZlibContentType 225 case validStreamT && !validContentT: 226 err = errors.Reason("stream %q has content type %q, expected one of %v", desc.Name, desc.ContentType, validContentTypes.ToSortedSlice()).Err() 227 case !validStreamT && validContentT: 228 err = errors.Reason("build proto stream %q has type %q, expected %q", desc.Name, desc.StreamType, logpb.StreamType_DATAGRAM).Err() 229 case strings.HasPrefix(desc.Name, string(a.userNamespace)) && base == luciexe.BuildProtoStreamSuffix: 230 err = errors.Reason("build.proto stream %q has stream type %q and content type %q, expected %q and one of %v", desc.Name, desc.StreamType, desc.ContentType, logpb.StreamType_DATAGRAM, validContentTypes.ToSortedSlice()).Err() 231 default: 232 // neither a ".../build.proto" stream nor a stream with valid stream type 233 // or content type. 234 return nil 235 } 236 237 url, _ := a.calculateURLs("", types.StreamName(desc.Name)) 238 bState := newBuildStateTracker(a.ctx, a, namespace, zlib, err) 239 240 a.statesMu.Lock() 241 defer a.statesMu.Unlock() 242 a.states[url] = bState 243 if err == nil { 244 return bState.handleNewData 245 } 246 return nil // no need to handle invalid stream. 247 } 248 249 // Close causes the Agent to stop collecting data, emit a final merged build, 250 // and then shut down all internal routines. 251 func (a *Agent) Close() { 252 // stops accepting new trackers 253 if atomic.SwapInt32(&a.done, 1) == 1 { 254 return 255 } 256 257 // close all states' and process their final work items. Closure should be 258 // very quick and will activate all final processing in parallel. GetFinal 259 // ensures that the state is completely settled. 260 states := a.snapStates() 261 for _, t := range states { 262 t.Close() 263 } 264 for _, t := range states { 265 t.Drain() 266 } 267 268 // tells our merge Channel to process all the current (now-final) states one 269 // last time. 270 a.informNewData() 271 272 // shut down the mergeCh so it will no longer accept new informNewData calls. 273 a.mergeCh.Close() 274 } 275 276 func (a *Agent) snapStates() map[string]*buildStateTracker { 277 a.statesMu.RLock() 278 trackers := make(map[string]*buildStateTracker, len(a.states)) 279 for k, v := range a.states { 280 trackers[k] = v 281 } 282 a.statesMu.RUnlock() 283 return trackers 284 } 285 286 func (a *Agent) sendMerge(_ *buffer.Batch) error { 287 trackers := a.snapStates() 288 289 builds := make(map[string]*bbpb.Build, len(trackers)) 290 stepCount := 0 291 for k, v := range trackers { 292 build := v.getLatestBuild() 293 stepCount += len(build.GetSteps()) 294 builds[k] = build 295 } 296 297 base := reflectutil.ShallowCopy(a.baseBuild).(*bbpb.Build) 298 base.Steps = nil 299 if stepCount > 0 { 300 base.Steps = make([]*bbpb.Step, 0, stepCount) 301 } 302 303 var insertSteps func(stepNS []string, streamURL string, fromSubBuild bool) *bbpb.Build 304 insertSteps = func(stepNS []string, streamURL string, fromSubBuild bool) *bbpb.Build { 305 build, ok := builds[streamURL] 306 if !ok { 307 return nil 308 } 309 for _, step := range build.GetSteps() { 310 mb := step.GetMergeBuild() 311 mergeStream := mb.GetFromLogdogStream() 312 if mergeStream != "" || len(stepNS) > 0 || fromSubBuild { 313 step = proto.Clone(step).(*bbpb.Step) 314 } 315 baseName := step.Name 316 if len(stepNS) > 0 { 317 step.Name = strings.Join(append(stepNS, step.Name), "|") 318 } 319 320 base.Steps = append(base.Steps, step) 321 322 if mergeStream != "" { 323 var subNamespace []string 324 if !mb.LegacyGlobalNamespace { 325 subNamespace = append(stepNS, baseName) 326 } 327 subBuild := insertSteps(subNamespace, mergeStream, true) 328 if subBuild == nil { 329 var sb strings.Builder 330 if step.SummaryMarkdown != "" { 331 sb.WriteString(step.SummaryMarkdown) 332 sb.WriteString("\n\n") 333 } 334 if _, ok := builds[mergeStream]; ok { 335 sb.WriteString(fmt.Sprintf("build.proto stream: %q is empty", mergeStream)) 336 } else { 337 sb.WriteString(fmt.Sprintf("build.proto stream: %q is not registered", mergeStream)) 338 } 339 step.SummaryMarkdown = sb.String() 340 } else { 341 updateStepFromBuild(step, subBuild) 342 if mb.LegacyGlobalNamespace { 343 updateBuildFromGlobalSubBuild(build, subBuild) 344 } 345 } 346 } 347 } 348 return build 349 } 350 updateBaseFromUserBuild(base, insertSteps(nil, a.userRootURL, false)) 351 352 select { 353 case a.mergedBuildC <- base: 354 case <-a.ctx.Done(): 355 a.Close() 356 } 357 358 return nil 359 } 360 361 func (a *Agent) finalize() { 362 close(a.mergedBuildC) 363 } 364 365 func (a *Agent) collectingData() bool { 366 return atomic.LoadInt32(&a.done) == 0 367 } 368 369 // Used for minting protobuf timestamps for buildStateTrackers 370 func (a *Agent) clockNow() *timestamppb.Timestamp { 371 ret, err := ptypes.TimestampProto(clock.Now(a.ctx)) 372 if err != nil { 373 panic(err) 374 } 375 return ret 376 }