go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/prjmanager/state/handler.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package state 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 "time" 22 23 "google.golang.org/protobuf/proto" 24 "google.golang.org/protobuf/types/known/timestamppb" 25 26 "go.chromium.org/luci/common/clock" 27 "go.chromium.org/luci/common/data/stringset" 28 "go.chromium.org/luci/common/logging" 29 30 "go.chromium.org/luci/cv/internal/changelist" 31 "go.chromium.org/luci/cv/internal/common" 32 "go.chromium.org/luci/cv/internal/configs/prjcfg" 33 "go.chromium.org/luci/cv/internal/gerrit/cfgmatcher" 34 "go.chromium.org/luci/cv/internal/gerrit/poller" 35 "go.chromium.org/luci/cv/internal/prjmanager" 36 "go.chromium.org/luci/cv/internal/prjmanager/clpurger" 37 "go.chromium.org/luci/cv/internal/prjmanager/cltriggerer" 38 "go.chromium.org/luci/cv/internal/prjmanager/itriager" 39 "go.chromium.org/luci/cv/internal/prjmanager/prjpb" 40 "go.chromium.org/luci/cv/internal/run" 41 "go.chromium.org/luci/cv/internal/tracing" 42 ) 43 44 type RunNotifier interface { 45 Start(ctx context.Context, id common.RunID) error 46 PokeNow(ctx context.Context, id common.RunID) error 47 Cancel(ctx context.Context, id common.RunID, reason string) error 48 UpdateConfig(ctx context.Context, id common.RunID, hash string, eversion int64) error 49 } 50 51 // Handler handles state transitions of a project. 52 type Handler struct { 53 CLMutator *changelist.Mutator 54 PMNotifier *prjmanager.Notifier 55 RunNotifier RunNotifier 56 CLPurger *clpurger.Purger 57 CLTriggerer *cltriggerer.Triggerer 58 CLPoller *poller.Poller 59 ComponentTriage itriager.Triage 60 } 61 62 // UpdateConfig updates PM to the latest config version. 63 func (h *Handler) UpdateConfig(ctx context.Context, s *State) (*State, SideEffect, error) { 64 s.ensureNotYetCloned() 65 66 meta, err := prjcfg.GetLatestMeta(ctx, s.PB.GetLuciProject()) 67 if err != nil { 68 return nil, nil, err 69 } 70 71 switch meta.Status { 72 case prjcfg.StatusEnabled: 73 if s.PB.GetStatus() == prjpb.Status_STARTED && meta.Hash() == s.PB.GetConfigHash() { 74 return s, nil, nil // already up-to-date. 75 } 76 77 // Tell poller to update ASAP. It doesn't need to wait for a transaction as 78 // it's OK for poller to be temporarily more up-to-date than PM. 79 if err := h.CLPoller.Poke(ctx, s.PB.GetLuciProject()); err != nil { 80 return nil, nil, err 81 } 82 83 if s.PB.Status == prjpb.Status_STARTED { 84 s = s.cloneShallow(prjpb.LogReason_CONFIG_CHANGED) 85 } else { 86 s = s.cloneShallow(prjpb.LogReason_CONFIG_CHANGED, prjpb.LogReason_STATUS_CHANGED) 87 s.PB.Status = prjpb.Status_STARTED 88 } 89 s.PB.ConfigHash = meta.Hash() 90 s.PB.ConfigGroupNames = meta.ConfigGroupNames 91 92 if s.configGroups, err = meta.GetConfigGroups(ctx); err != nil { 93 return nil, nil, err 94 } 95 s.cfgMatcher = cfgmatcher.LoadMatcherFromConfigGroups(ctx, s.configGroups, &meta) 96 97 if err = s.reevalPCLs(ctx); err != nil { 98 return nil, nil, err 99 } 100 // New config may mean new conditions for Run creation. Re-triaging all 101 // components is required. 102 s.PB.Components = markForTriage(s.PB.GetComponents()) 103 104 // We may have been in STOPPING phase, in which case incomplete runs may 105 // still be finalizing themselves after receiving Cancel event from us. 106 // It's harmless to send them UpdateConfig message, too. Eventually, they'll 107 // complete finalization, send us OnRunFinished event and then we'll remove 108 // them from the state anyway. 109 return s, &UpdateIncompleteRunsConfig{ 110 RunNotifier: h.RunNotifier, 111 EVersion: meta.EVersion, 112 Hash: meta.Hash(), 113 RunIDs: s.PB.IncompleteRuns(), 114 }, err 115 116 case prjcfg.StatusDisabled, prjcfg.StatusNotExists: 117 // Intentionally not catching up with new ConfigHash (if any), 118 // since it's not actionable and also simpler. 119 switch s.PB.GetStatus() { 120 case prjpb.Status_STATUS_UNSPECIFIED: 121 // Project entity doesn't exist. No need to create it. 122 return s, nil, nil 123 case prjpb.Status_STOPPED: 124 return s, nil, nil 125 case prjpb.Status_STARTED: 126 s = s.cloneShallow(prjpb.LogReason_STATUS_CHANGED) 127 s.PB.Status = prjpb.Status_STOPPING 128 fallthrough 129 case prjpb.Status_STOPPING: 130 if err := h.CLPoller.Poke(ctx, s.PB.GetLuciProject()); err != nil { 131 return nil, nil, err 132 } 133 runs := s.PB.IncompleteRuns() 134 if len(runs) == 0 { 135 s = s.cloneShallow(prjpb.LogReason_STATUS_CHANGED) 136 s.PB.Status = prjpb.Status_STOPPED 137 return s, nil, nil 138 } 139 return s, &CancelIncompleteRuns{ 140 RunNotifier: h.RunNotifier, 141 RunIDs: s.PB.IncompleteRuns(), 142 }, nil 143 default: 144 panic(fmt.Errorf("unexpected project status: %d", s.PB.GetStatus())) 145 } 146 default: 147 panic(fmt.Errorf("unexpected config status: %d", meta.Status)) 148 } 149 } 150 151 // Poke propagates "the poke" downstream to Poller & Runs. 152 func (h *Handler) Poke(ctx context.Context, s *State) (*State, SideEffect, error) { 153 s.ensureNotYetCloned() 154 155 // First, check if UpdateConfig if necessary. 156 switch newState, sideEffect, err := h.UpdateConfig(ctx, s); { 157 case err != nil: 158 return nil, nil, err 159 case newState != s: 160 // UpdateConfig noticed a change and its SideEffectFn will propagate it 161 // downstream. 162 return newState, sideEffect, nil 163 } 164 165 // Propagate downstream directly. 166 if err := h.CLPoller.Poke(ctx, s.PB.GetLuciProject()); err != nil { 167 return nil, nil, err 168 } 169 if err := h.pokeRuns(ctx, s); err != nil { 170 return nil, nil, err 171 } 172 // Force re-triage of all components. 173 s = s.cloneShallow() 174 s.PB.Components = markForTriage(s.PB.GetComponents()) 175 return s, nil, nil 176 } 177 178 // OnRunsCreated updates state after new Runs were created. 179 func (h *Handler) OnRunsCreated(ctx context.Context, s *State, created common.RunIDs) (_ *State, __ SideEffect, err error) { 180 s.ensureNotYetCloned() 181 182 ctx, span := tracing.Start(ctx, "go.chromium.org/luci/cv/internal/prjmanager/impl/state/OnRunsCreated") 183 defer func() { tracing.End(span, err) }() 184 185 // Check if PM is already aware of these Runs. 186 remaining := created.Set() 187 s.PB.IterIncompleteRuns(func(r *prjpb.PRun, _ *prjpb.Component) (stop bool) { 188 delete(remaining, common.RunID(r.GetId())) 189 return len(remaining) == 0 // stop if nothing left 190 }) 191 if len(remaining) == 0 { 192 return s, nil, nil 193 } 194 195 switch s.PB.GetStatus() { 196 case prjpb.Status_STARTED: 197 s = s.cloneShallow() 198 if err := s.addCreatedRuns(ctx, remaining); err != nil { 199 return nil, nil, err 200 } 201 return s, nil, nil 202 case prjpb.Status_STOPPED, prjpb.Status_STOPPING: 203 // This should not normally happen, but may under rare conditions. 204 switch incomplete, err := incompleteRuns(ctx, remaining); { 205 case err != nil: 206 return nil, nil, err 207 case len(incomplete) == 0: 208 // All the Runs have actually already finished. Nothing to do, and this if 209 // fine. 210 return s, nil, nil 211 default: 212 logging.Errorf(ctx, "RunCreated events for %s on %s Project Manager", incomplete, s.PB.GetStatus()) 213 return s, &CancelIncompleteRuns{RunNotifier: h.RunNotifier, RunIDs: incomplete}, nil 214 } 215 default: 216 panic(fmt.Errorf("unexpected project status: %d", s.PB.GetStatus())) 217 } 218 } 219 220 // OnRunsFinished updates state after Runs were finished. 221 func (h *Handler) OnRunsFinished(ctx context.Context, s *State, finished map[common.RunID]run.Status) (_ *State, __ SideEffect, err error) { 222 s.ensureNotYetCloned() 223 224 _, span := tracing.Start(ctx, "go.chromium.org/luci/cv/internal/prjmanager/impl/state/OnRunsFinished") 225 defer func() { tracing.End(span, err) }() 226 227 // This is rarely a noop, so assume state is modified for simplicity. 228 s = s.cloneShallow() 229 var failedMaybeMCERuns []*prjpb.PRun 230 incompleteRunsCount := s.removeFinishedRuns( 231 finished, func(r *prjpb.PRun) { 232 rid := common.RunID(r.GetId()) 233 if st, ok := finished[rid]; ok && st == run.Status_FAILED && maybeMCERun(ctx, s, r) { 234 failedMaybeMCERuns = append(failedMaybeMCERuns, r) 235 } 236 }, 237 ) 238 if s.PB.GetStatus() == prjpb.Status_STOPPING && incompleteRunsCount == 0 { 239 s.LogReasons = append(s.LogReasons, prjpb.LogReason_STATUS_CHANGED) 240 s.PB.Status = prjpb.Status_STOPPED 241 } 242 se := h.addCLsToPurge(ctx, s, makePurgeCLTasksForFailedMCERuns(ctx, s, failedMaybeMCERuns)) 243 return s, se, nil 244 } 245 246 // OnCLsUpdated updates state as a result of new changes to CLs. 247 // 248 // clEVersions must map CL's ID to CL's EVersion. 249 // clEVersions is mutated. 250 func (h *Handler) OnCLsUpdated(ctx context.Context, s *State, clEVersions map[int64]int64) (_ *State, __ SideEffect, err error) { 251 s.ensureNotYetCloned() 252 253 ctx, span := tracing.Start(ctx, "go.chromium.org/luci/cv/internal/prjmanager/impl/state/OnCLsUpdated") 254 defer func() { tracing.End(span, err) }() 255 256 if s.PB.GetStatus() != prjpb.Status_STARTED { 257 // Ignore all incoming CL events. If PM is re-enabled, then first full 258 // poll will force re-sending of OnCLsUpdated event for all still 259 // interesting CLs. 260 return s, nil, nil 261 } 262 263 // Most likely there will be changes to state. 264 s = s.cloneShallow() 265 if err := s.evalUpdatedCLs(ctx, clEVersions); err != nil { 266 return nil, nil, err 267 } 268 return s, nil, nil 269 } 270 271 // OnPurgesCompleted updates state as a result of completed purge operations. 272 func (h *Handler) OnPurgesCompleted(ctx context.Context, s *State, events []*prjpb.PurgeCompleted) (_ *State, __ SideEffect, evsToConsume []int, err error) { 273 s.ensureNotYetCloned() 274 275 ctx, span := tracing.Start(ctx, "go.chromium.org/luci/cv/internal/prjmanager/impl/state/OnPurgesCompleted") 276 defer func() { tracing.End(span, err) }() 277 278 opIDs := stringset.New(len(events)) 279 clids := make([]int64, len(events)) 280 for i, e := range events { 281 clids[i] = e.GetClid() 282 opIDs.Add(e.GetOperationId()) 283 } 284 if len(clids) > 0 { 285 s = s.cloneShallow() 286 if err := s.evalCLs(ctx, clids); err != nil { 287 return s, nil, nil, err 288 } 289 for i, clid := range clids { 290 switch pcl := s.PB.GetPCL(clid); { 291 case pcl.GetOutdated() == nil: 292 // Consume the event only if the snapshot is fresh. 293 evsToConsume = append(evsToConsume, i) 294 default: 295 opIDs.Del(events[i].GetOperationId()) 296 } 297 } 298 } 299 300 // Give 1 minute grace before expiring purging tasks. This doesn't change 301 // correctness, but decreases probability of starting another purge before 302 // PM observes CLUpdated event with results of prior purge. 303 expireCutOff := clock.Now(ctx).Add(-time.Minute) 304 305 deleted := map[int64]struct{}{} 306 out, mutated := s.PB.COWPurgingCLs(func(p *prjpb.PurgingCL) *prjpb.PurgingCL { 307 if opIDs.Has(p.GetOperationId()) { 308 deleted[p.GetClid()] = struct{}{} 309 return nil // delete 310 } 311 if p.GetDeadline().AsTime().Before(expireCutOff) { 312 logging.Debugf(ctx, "PurgingCL %d %q expired", p.GetClid(), p.GetOperationId()) 313 deleted[p.GetClid()] = struct{}{} 314 return nil // delete 315 } 316 return p // keep as is 317 }, nil) 318 if !mutated { 319 return s, nil, evsToConsume, nil 320 } 321 322 if !s.alreadyCloned { 323 s = s.cloneShallow() 324 } 325 s.PB.PurgingCls = out 326 327 switch { 328 case s.PB.GetRepartitionRequired(): 329 // all the components will be retriaged during the repartition process. 330 default: 331 cs, mutatedComponents := s.PB.COWComponents(func(c *prjpb.Component) *prjpb.Component { 332 if c.GetTriageRequired() { 333 return c 334 } 335 for _, id := range c.GetClids() { 336 if _, yes := deleted[id]; yes { 337 c = c.CloneShallow() 338 c.TriageRequired = true 339 return c 340 } 341 } 342 return c 343 }, nil) 344 if mutatedComponents { 345 s.PB.Components = cs 346 } 347 } 348 return s, nil, evsToConsume, nil 349 } 350 351 // ExecDeferred performs previously postponed actions, notably creating Runs. 352 func (h *Handler) ExecDeferred(ctx context.Context, s *State) (_ *State, __ SideEffect, err error) { 353 s.ensureNotYetCloned() 354 355 ctx, span := tracing.Start(ctx, "go.chromium.org/luci/cv/internal/prjmanager/impl/state/ExecDeferred") 356 defer func() { tracing.End(span, err) }() 357 358 if s.PB.GetStatus() != prjpb.Status_STARTED { 359 return s, nil, nil 360 } 361 362 mutated := false 363 if s.PB.GetRepartitionRequired() || len(s.PB.GetCreatedPruns()) > 0 { 364 s = s.cloneShallow() 365 mutated = true 366 cat := s.categorizeCLs(ctx) 367 if err := s.loadActiveIntoPCLs(ctx, cat); err != nil { 368 return nil, nil, err 369 } 370 s.repartition(cat) 371 } 372 373 var sideEffect SideEffect 374 switch actions, saveForDebug, err := h.triageComponents(ctx, s); { 375 case err != nil: 376 if !mutated { 377 return nil, nil, err 378 } 379 // Don't lose progress made so far. 380 logging.Warningf(ctx, "Failed to triageComponents %s, but proceeding to save repartitioned state", err) 381 case len(actions) > 0 || saveForDebug: 382 if !mutated { 383 if saveForDebug { 384 s = s.cloneShallow(prjpb.LogReason_DEBUG) 385 } else { 386 s = s.cloneShallow() 387 } 388 mutated = true 389 } 390 sideEffect, err = h.actOnComponents(ctx, s, actions) 391 if err != nil { 392 return nil, nil, err 393 } 394 } 395 396 switch t, tPB, asap := earliestDecisionTime(s.PB.GetComponents()); { 397 case asap: 398 t = clock.Now(ctx) 399 tPB = timestamppb.New(t) 400 fallthrough 401 case tPB != nil && !proto.Equal(tPB, s.PB.GetNextEvalTime()): 402 if !mutated { 403 s = s.cloneShallow() 404 } 405 s.PB.NextEvalTime = tPB 406 fallthrough 407 case tPB != nil: 408 // Always create a new task if there is NextEvalTime. If it is in the 409 // future, it'll be deduplicated as needed. 410 if err := h.PMNotifier.TasksBinding.Dispatch(ctx, s.PB.GetLuciProject(), t); err != nil { 411 return nil, nil, err 412 } 413 } 414 return s, sideEffect, nil 415 } 416 417 // OnTriggeringCLDepsCompleted manages TriggeringCLDeps completion events. 418 func (h *Handler) OnTriggeringCLDepsCompleted(ctx context.Context, s *State, events []*prjpb.TriggeringCLDepsCompleted) (_ *State, __ SideEffect, evIndexesToConsume []int, err error) { 419 s.ensureNotYetCloned() 420 421 ctx, span := tracing.Start(ctx, "go.chromium.org/luci/cv/internal/prjmanager/impl/state/OnTriggeringCLDepsCompleted") 422 defer func() { tracing.End(span, err) }() 423 424 // give one extra minute before processing an expired op. 425 expireCutOff := clock.Now(ctx).Add(-time.Minute) 426 opsToRemove := make(map[string]int, len(events)) 427 var clidsToEval []int64 428 var purgeTasks []*prjpb.PurgeCLTask 429 for i, evt := range events { 430 ctx := logging.SetField(ctx, "origin_cl", evt.GetOrigin()) 431 switch op := s.PB.GetTriggeringCLDeps(evt.GetOrigin()); { 432 case op == nil: 433 logging.Warningf(ctx, "OnTriggeringCLDepsCompleted: event arrived but the op(%s) doesn't exist", evt.GetOperationId()) 434 default: 435 if len(evt.GetFailed()) > 0 { 436 // If any vote failed, schedule Purge tasks for the origin and all 437 // the vote suceeded CLs. 438 if tasks := purgeFailedTriggeringCLDeps(s, op.GetTrigger(), evt); len(tasks) > 0 { 439 logging.Debugf(ctx, "purging votes for %v due to vote failures on %v", 440 evt.GetSucceeded(), evt.GetFailed()) 441 purgeTasks = append(purgeTasks, tasks...) 442 } 443 } 444 for _, clid := range evt.GetSucceeded() { 445 if pcl := s.PB.GetPCL(clid); pcl != nil { 446 clidsToEval = append(clidsToEval, clid) 447 } 448 } 449 } 450 // The event should still be added into opsToRemove, even if 451 // there is no matching op in s.PB. Otherwise, the event will be 452 // preserved forever. 453 opsToRemove[evt.GetOperationId()] = i 454 } 455 456 s = s.cloneShallow() 457 if len(clidsToEval) > 0 { 458 if err := s.evalCLs(ctx, clidsToEval); err != nil { 459 return s, nil, nil, err 460 } 461 } 462 for opID, evIndex := range opsToRemove { 463 consume := true 464 // ensure that all the succeeded deps are fresh to remove the Op. 465 for _, depCLID := range events[evIndex].GetSucceeded() { 466 if pcl := s.PB.GetPCL(depCLID); pcl.GetOutdated() != nil { 467 delete(opsToRemove, opID) 468 consume = false 469 break 470 } 471 } 472 if consume { 473 evIndexesToConsume = append(evIndexesToConsume, evIndex) 474 } 475 } 476 deleted := map[int64]struct{}{} 477 out, mutated := s.PB.COWTriggeringCLDeps(func(op *prjpb.TriggeringCLDeps) *prjpb.TriggeringCLDeps { 478 if op.GetDeadline().AsTime().Before(expireCutOff) { 479 ctx := logging.SetField(ctx, "origin_cl", op.GetOriginClid()) 480 logging.Warningf(ctx, "TriggeringCLDeps(%s): deadline exceeded", op.GetOperationId()) 481 deleted[op.GetOriginClid()] = struct{}{} 482 return nil // delete 483 } 484 if _, ok := opsToRemove[op.GetOperationId()]; ok { 485 deleted[op.GetOriginClid()] = struct{}{} 486 return nil // delete 487 } 488 return op 489 }, nil) 490 if !mutated { 491 // if there is a cl to purge, there must be an op to remove. 492 if len(purgeTasks) > 0 { 493 panic(fmt.Errorf("OnTriggeringCLDepsCompleted: BUG")) 494 } 495 return s, nil, evIndexesToConsume, nil 496 } 497 s.PB.TriggeringClDeps = out 498 499 switch { 500 case s.PB.GetRepartitionRequired(): 501 // all the components will be retriaged during the repartition process. 502 default: 503 cs, mutatedComponents := s.PB.COWComponents(func(c *prjpb.Component) *prjpb.Component { 504 if c.GetTriageRequired() { 505 return c 506 } 507 for _, id := range c.GetClids() { 508 if _, yes := deleted[id]; yes { 509 c = c.CloneShallow() 510 c.TriageRequired = true 511 return c 512 } 513 } 514 return c 515 }, nil) 516 if mutatedComponents { 517 s.PB.Components = cs 518 } 519 } 520 var se SideEffect 521 if len(purgeTasks) > 0 { 522 se = h.addCLsToPurge(ctx, s, purgeTasks) 523 } 524 return s, se, evIndexesToConsume, nil 525 } 526 527 // purgeFailedTriggeringCLDeps schedules PurgingCLTasks for the successfully 528 // voted deps of a given failed TriggeringCLDeps. 529 func purgeFailedTriggeringCLDeps(s *State, tr *run.Trigger, evt *prjpb.TriggeringCLDepsCompleted) []*prjpb.PurgeCLTask { 530 depErr := &changelist.CLError_TriggerDeps{} 531 for _, err := range evt.GetFailed() { 532 proto.Merge(depErr, err) 533 } 534 reasons := []*prjpb.PurgeReason{{ 535 ClError: &changelist.CLError{ 536 Kind: &changelist.CLError_TriggerDeps_{ 537 TriggerDeps: depErr, 538 }, 539 }, 540 ApplyTo: &prjpb.PurgeReason_Triggers{ 541 Triggers: &run.Triggers{ 542 CqVoteTrigger: tr, 543 }, 544 }, 545 }} 546 ret := make([]*prjpb.PurgeCLTask, 0, len(evt.GetSucceeded())+1) 547 for _, clid := range evt.GetSucceeded() { 548 if s.PB.GetPurgingCL(clid) != nil { 549 continue 550 } 551 ret = append(ret, &prjpb.PurgeCLTask{ 552 PurgeReasons: reasons, 553 PurgingCl: &prjpb.PurgingCL{ 554 // No email for purging the CQ vote from deps. 555 // The purge operations on the originating CL will send out 556 // an email. That should be enough. 557 Notification: clpurger.NoNotification, 558 Clid: clid, 559 ApplyTo: &prjpb.PurgingCL_Triggers{ 560 Triggers: &run.Triggers{ 561 CqVoteTrigger: tr, 562 }, 563 }, 564 }, 565 }) 566 } 567 // and the origin CL 568 ret = append(ret, &prjpb.PurgeCLTask{ 569 PurgeReasons: reasons, 570 PurgingCl: &prjpb.PurgingCL{ 571 Clid: evt.GetOrigin(), 572 // Nil to send the default notifications. 573 Notification: nil, 574 ApplyTo: &prjpb.PurgingCL_Triggers{ 575 Triggers: &run.Triggers{ 576 CqVoteTrigger: tr, 577 }, 578 }, 579 }, 580 }) 581 return ret 582 } 583 584 func makePurgeCLTasksForFailedMCERuns(ctx context.Context, s *State, failed []*prjpb.PRun) []*prjpb.PurgeCLTask { 585 if len(failed) == 0 { 586 return nil 587 } 588 reverseDeps := make(map[int64][]*prjpb.PCL, len(s.PB.GetPcls())) 589 for _, p := range s.PB.GetPcls() { 590 for _, dep := range p.GetDeps() { 591 if dep.GetKind() == changelist.DepKind_HARD { 592 reverseDeps[dep.GetClid()] = append(reverseDeps[dep.GetClid()], p) 593 } 594 } 595 } 596 incompleteRuns := make(map[int64]struct{}) 597 s.PB.IterIncompleteRuns(func(r *prjpb.PRun, _ *prjpb.Component) bool { 598 if clids := r.GetClids(); len(clids) == 1 { 599 incompleteRuns[clids[0]] = struct{}{} 600 } 601 return false 602 }) 603 tasks := make(map[int64]*prjpb.PurgeCLTask) 604 for _, r := range failed { 605 for _, child := range reverseDeps[r.GetClids()[0]] { 606 // skip if any of the following is true. 607 trigger := child.GetTriggers().GetCqVoteTrigger() 608 if trigger.GetMode() != r.GetMode() { 609 continue 610 } 611 if _, ok := incompleteRuns[child.GetClid()]; ok { 612 continue 613 } 614 if s.PB.GetPurgingCL(child.GetClid()) != nil { 615 continue 616 } 617 // At this stage, the current CL 618 // - depends on the failed MCE run 619 // - has no incomplete Run 620 // - has the same CQ vote as the CQ vote of the failed MCE Run. 621 tasks[child.GetClid()] = &prjpb.PurgeCLTask{ 622 PurgeReasons: []*prjpb.PurgeReason{{ 623 ClError: &changelist.CLError{Kind: &changelist.CLError_DepRunFailed{ 624 DepRunFailed: r.GetClids()[0], 625 }}, 626 ApplyTo: &prjpb.PurgeReason_Triggers{ 627 Triggers: &run.Triggers{ 628 CqVoteTrigger: trigger, 629 }, 630 }, 631 }}, 632 PurgingCl: &prjpb.PurgingCL{ 633 Clid: child.GetClid(), 634 // In case a parent Run fails in a huge stack, we want to 635 // minimize # of emails sent out by the Purge opertaions. 636 // One mail for the probably-top CL should be enough. 637 Notification: clpurger.NoNotification, 638 ApplyTo: &prjpb.PurgingCL_Triggers{ 639 Triggers: &run.Triggers{ 640 CqVoteTrigger: trigger, 641 }, 642 }, 643 }, 644 } 645 } 646 } 647 if len(tasks) == 0 { 648 return nil 649 } 650 var foundCLToNotify bool 651 ret := make([]*prjpb.PurgeCLTask, 0, len(tasks)) 652 for _, t := range tasks { 653 clid := t.GetPurgingCl().GetClid() 654 if !foundCLToNotify && shouldPurgeNotify(clid, reverseDeps[clid], tasks) { 655 // set nil to let clpurger decide the notification targets, based 656 // on the Run mode. 657 t.GetPurgingCl().Notification = nil 658 foundCLToNotify = true 659 } 660 ret = append(ret, t) 661 } 662 sort.Slice(ret, func(i, j int) bool { 663 return ret[i].GetPurgingCl().GetClid() < ret[j].GetPurgingCl().GetClid() 664 }) 665 return ret 666 } 667 668 func shouldPurgeNotify(clid int64, children []*prjpb.PCL, tasks map[int64]*prjpb.PurgeCLTask) bool { 669 for _, child := range children { 670 // don't send an email if the CL has a child of which trigger is 671 // purge-requested. 672 if _, ok := tasks[child.GetClid()]; ok { 673 return false 674 } 675 } 676 return true 677 }