go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/prjmanager/triager/cls.go (about) 1 // Copyright 2021 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package triager 16 17 import ( 18 "context" 19 "fmt" 20 "time" 21 22 "go.chromium.org/luci/common/logging" 23 "go.chromium.org/luci/cv/internal/changelist" 24 "go.chromium.org/luci/cv/internal/prjmanager/prjpb" 25 "go.chromium.org/luci/cv/internal/run" 26 ) 27 28 // triageCLs decides whether individual CLs ought to be acted upon. 29 func triageCLs(ctx context.Context, c *prjpb.Component, pm pmState) map[int64]*clInfo { 30 cls := make(map[int64]*clInfo, len(c.GetClids())) 31 for _, clid := range c.GetClids() { 32 cls[clid] = &clInfo{ 33 pcl: pm.MustPCL(clid), 34 purgingCL: pm.PurgingCL(clid), // may be nil 35 triggeringCLDeps: pm.TriggeringCLDeps(clid), // may be nil 36 runCountByMode: make(map[run.Mode]int), 37 } 38 } 39 for index, r := range c.GetPruns() { 40 for _, clid := range r.GetClids() { 41 info := cls[clid] 42 info.runIndexes = append(info.runIndexes, int32(index)) 43 info.runCountByMode[run.Mode(r.GetMode())]++ 44 } 45 } 46 for _, info := range cls { 47 info.triage(ctx, c, pm) 48 } 49 for clid, info := range cls { 50 // Say the following events happens in sequence. 51 // 1. there are CL1(parent) and CL2(child). 52 // 2. CQ+2 is triggered on CL2, and TriggeringCLDeps is created. 53 // 3. cltriggerer voted CL1 and CL2 in parallel. 54 // 4. CLUpdated event is delivered for CL1 only. 55 // 5. Triager created a Run for CL1. 56 // 6. For some reasons, *before* PM receives a CLUpdated event for CL2, 57 // - PM receives a CLUpdated event, 58 // - triager created a run for CL1, 59 // - the run ended 60 // 7. PM receives a CLUpdated event for CL2. 61 // 62 // At (7), CL1 has CQ=0 and CL2 has CQ+2. 63 // there is no easy way for triager to find the reason of CL1 not having 64 // CQ+2. Hence, it will create a new TriggeringCLDeps{} to vote on CL1 65 // again, of which run just failed. 66 // 67 // To prevent this, the below marks deps as not-cq-ready if there is 68 // an inflight TriggeringCLDeps{} referencing the CL as a dep. 69 // i.e., triager starts creating Runs for a stack of CLs, only if 70 // the entire stack is ready. 71 for _, depCLID := range info.triggeringCLDeps.GetDepClids() { 72 ctx = logging.SetField(ctx, "origin_cl", clid) 73 info.cqReady = false 74 if di, ok := cls[depCLID]; ok { 75 di.cqReady = false 76 } 77 } 78 } 79 return cls 80 } 81 82 // clInfo represents a CL in the PM component of CLs. 83 type clInfo struct { 84 pcl *prjpb.PCL 85 // runIndexes are indexes of Component.PRuns which references this CL. 86 runIndexes []int32 87 // runCountByMode is # of Component.PRuns, referencing this CL, 88 // by the Run mode. 89 runCountByMode map[run.Mode]int 90 91 // purgingCL is set if CL is already being purged. 92 purgingCL *prjpb.PurgingCL 93 // triggeringCLDeps is set if the deps of the CL is being triggered. 94 triggeringCLDeps *prjpb.TriggeringCLDeps 95 96 triagedCL 97 } 98 99 // lastCQVoteTriggered returns the last triggered time by CQ vote among this CL 100 // and its triggered deps. Can be zero time.Time if neither are triggered. 101 func (info *clInfo) lastCQVoteTriggered() time.Time { 102 t := info.pcl.GetTriggers().GetCqVoteTrigger() 103 thisPB := t.GetTime() 104 switch { 105 case thisPB == nil && info.deps == nil: 106 return time.Time{} 107 case thisPB == nil: 108 return info.deps.lastCQVoteTriggered 109 case info.deps == nil || info.deps.lastCQVoteTriggered.IsZero(): 110 return thisPB.AsTime() 111 default: 112 this := thisPB.AsTime() 113 if info.deps.lastCQVoteTriggered.Before(this) { 114 return this 115 } 116 return info.deps.lastCQVoteTriggered 117 } 118 } 119 120 // triagedCL is the result of CL triage (see clInfo.triage()). 121 // 122 // Note: This doesn't take into account `combine_cls.stabilization_delay`, 123 // thus a CL may be ready or with purgeReason, but due to stabilization delay, 124 // it shouldn't be acted upon *yet*. 125 type triagedCL struct { 126 // deps are triaged deps, set only if CL is watched by exactly 1 config group. 127 // of the current project. 128 deps *triagedDeps 129 // purgeReasons is set if the CL ought to be purged. 130 // 131 // Not set if CL is .purgingCL is non-nil since CL is already being purged. 132 purgeReasons []*prjpb.PurgeReason 133 // cqReady is true if it can be used in creation of new CQ-Vote Runs. 134 // 135 // If true, purgeReason must be nil, and deps must be OK though they may contain 136 // not-yet-loaded deps. 137 cqReady bool 138 139 // nprReady is true if it can be used in the creation of a new patchset 140 // run. 141 nprReady bool 142 } 143 144 func isCQVotePurging(purgingCL *prjpb.PurgingCL) bool { 145 return purgingCL.GetTriggers().GetCqVoteTrigger() != nil || purgingCL.GetAllActiveTriggers() 146 } 147 148 func isNPRVotePurging(purgingCL *prjpb.PurgingCL) bool { 149 return purgingCL.GetTriggers().GetNewPatchsetRunTrigger() != nil || purgingCL.GetAllActiveTriggers() 150 } 151 152 func (info *clInfo) prunCountByType(c *prjpb.Component) (int, int) { 153 var nCQVoteRuns, nNewPatchsetRuns int 154 for _, i := range info.runIndexes { 155 switch mode := run.Mode(c.Pruns[i].GetMode()); mode { 156 case run.NewPatchsetRun: 157 nNewPatchsetRuns++ 158 default: 159 nCQVoteRuns++ 160 } 161 } 162 return nCQVoteRuns, nNewPatchsetRuns 163 } 164 165 // triage sets the triagedCL part of clInfo. 166 // 167 // Expects non-triagedCL part of clInfo to be already set. 168 // panics iff component is not in a valid state. 169 func (info *clInfo) triage(ctx context.Context, c *prjpb.Component, pm pmState) { 170 nCQVoteRuns, nNewPatchsetRuns := info.prunCountByType(c) 171 var triageCQTrigger, triageNPRTrigger bool 172 switch { 173 case nCQVoteRuns > 0: 174 // Once CV supports API-based triggering, a CL may be both in purged 175 // state and have an incomplete Run for the same type of trigger at the 176 // same time. The presence in a Run is more important, so treat it as 177 // such. 178 info.triageInCQVoteRun(ctx, pm) 179 case isCQVotePurging(info.purgingCL): 180 info.triageInCQVotePurge(ctx, pm) 181 case info.pcl.GetTriggers().GetCqVoteTrigger() != nil: 182 triageCQTrigger = true 183 } 184 185 switch { 186 case nNewPatchsetRuns > 0: 187 info.triageInNewPatchsetRun(pm) 188 case isNPRVotePurging(info.purgingCL): 189 info.triageInNewPatchsetPurge(pm) 190 case info.pcl.GetTriggers().GetNewPatchsetRunTrigger() != nil: 191 triageNPRTrigger = true 192 } 193 info.triageNewTriggers(ctx, pm, triageCQTrigger, triageNPRTrigger) 194 } 195 196 func (info *clInfo) triageInCQVoteRun(ctx context.Context, pm pmState) { 197 if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.GetTriggers().GetCqVoteTrigger() != nil { 198 pcl := info.pcl 199 if len(pcl.GetConfigGroupIndexes()) != 1 { 200 // This is expected if project config has changed, but Run's reaction to it 201 // via OnRunFinished event hasn't yet reached PM. 202 return 203 } 204 cgIndex := pcl.GetConfigGroupIndexes()[0] 205 info.deps = triageDeps(ctx, pcl, cgIndex, pm) 206 // A purging or triggering CL must not be "ready" to a new cq run. 207 if info.deps.OK() && !isCQVotePurging(info.purgingCL) && len(info.deps.needToTrigger) == 0 { 208 info.cqReady = true 209 } 210 } 211 } 212 213 func (info *clInfo) triageInNewPatchsetRun(pm pmState) { 214 if len(info.pcl.GetConfigGroupIndexes()) != 1 { 215 // This is expected if project config has changes, but Run's reation to 216 // it via OnRunFinished event has not yet reached PM. 217 return 218 } 219 if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.GetTriggers().GetNewPatchsetRunTrigger() != nil && 220 !isNPRVotePurging(info.purgingCL) { 221 info.nprReady = true 222 } 223 } 224 225 func (info *clInfo) pclStatusReadyForTriage() bool { 226 switch s := info.pcl.GetStatus(); s { 227 case prjpb.PCL_DELETED, prjpb.PCL_UNWATCHED, prjpb.PCL_UNKNOWN: 228 return false 229 case prjpb.PCL_OK: 230 return true 231 default: 232 panic(fmt.Errorf("PCL has unrecognized status %s", s)) 233 } 234 } 235 236 func (info *clInfo) triageInCQVotePurge(ctx context.Context, pm pmState) { 237 // The PM hasn't noticed yet the completion of the async purge. 238 // The result of purging is modified CL, which may be observed by PM earlier 239 // than completion of purge. 240 // 241 // Thus, consider these CLs in potential Run Creation, but don't mark them 242 // ready in order to avoid creating new Runs. 243 if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.Triggers.GetCqVoteTrigger() != nil { 244 cgIndexes := info.pcl.GetConfigGroupIndexes() 245 switch len(cgIndexes) { 246 case 0: 247 panic(fmt.Errorf("PCL %d without ConfigGroup index not possible for CL not referenced by any Runs (partitioning bug?)", info.pcl.GetClid())) 248 case 1: 249 info.deps = triageDeps(ctx, info.pcl, cgIndexes[0], pm) 250 // info.deps.OK() may be true, for example if user has already corrected the 251 // mistake that previously resulted in purging op. However, don't mark CL 252 // ready until purging op completes or expires. 253 } 254 } 255 } 256 257 func (info *clInfo) triageInNewPatchsetPurge(pm pmState) { 258 // The PM hasn't noticed yet the completion of the async purge. 259 // The result of purging is modified CL, which may be observed by PM earlier 260 // than completion of purge. 261 // 262 // Thus, consider these CLs in potential Run Creation, but don't mark them 263 // ready in order to avoid creating new Runs. 264 if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.Triggers.GetNewPatchsetRunTrigger() != nil { 265 if len(info.pcl.GetConfigGroupIndexes()) == 0 { 266 panic(fmt.Errorf("PCL %d without ConfigGroup index not possible for CL not referenced by any Runs (partitioning bug?)", info.pcl.GetClid())) 267 } 268 } 269 } 270 271 func (info *clInfo) addPurgeReason(t *run.Trigger, clError *changelist.CLError) { 272 switch { 273 case t == nil: 274 info.purgeReasons = append(info.purgeReasons, &prjpb.PurgeReason{ 275 ClError: clError, 276 ApplyTo: &prjpb.PurgeReason_AllActiveTriggers{ 277 AllActiveTriggers: true, 278 }, 279 }) 280 case run.Mode(t.Mode) == run.NewPatchsetRun: 281 info.purgeReasons = append(info.purgeReasons, &prjpb.PurgeReason{ 282 ClError: clError, 283 ApplyTo: &prjpb.PurgeReason_Triggers{ 284 Triggers: &run.Triggers{ 285 NewPatchsetRunTrigger: t, 286 }, 287 }, 288 }) 289 default: 290 info.purgeReasons = append(info.purgeReasons, &prjpb.PurgeReason{ 291 ClError: clError, 292 ApplyTo: &prjpb.PurgeReason_Triggers{ 293 Triggers: &run.Triggers{ 294 CqVoteTrigger: t, 295 }, 296 }, 297 }) 298 } 299 } 300 301 func (info *clInfo) triageNewTriggers(ctx context.Context, pm pmState, triageCQTrigger, triageNPRTrigger bool) { 302 pcl := info.pcl 303 for _, r := range pcl.GetPurgeReasons() { 304 switch { 305 case r.GetAllActiveTriggers(): 306 triageCQTrigger, triageNPRTrigger = false, false 307 case r.GetTriggers().GetNewPatchsetRunTrigger() != nil: 308 triageNPRTrigger = false 309 case r.GetTriggers().GetCqVoteTrigger() != nil: 310 triageCQTrigger = false 311 } 312 } 313 info.purgeReasons = append(info.purgeReasons, pcl.GetPurgeReasons()...) 314 if !triageCQTrigger && !triageNPRTrigger { 315 return 316 } 317 clid := pcl.GetClid() 318 assumption := "not possible for CL not referenced by any Runs (partitioning bug?)" 319 switch s := pcl.GetStatus(); s { 320 case prjpb.PCL_DELETED, prjpb.PCL_UNWATCHED, prjpb.PCL_UNKNOWN: 321 panic(fmt.Errorf("PCL %d status %s %s", clid, s, assumption)) 322 case prjpb.PCL_OK: 323 // OK. 324 default: 325 panic(fmt.Errorf("PCL has unrecognized status %s", s)) 326 } 327 328 if pcl.GetSubmitted() { 329 panic(fmt.Errorf("PCL %d submitted %s", clid, assumption)) 330 } 331 332 cgIndexes := pcl.GetConfigGroupIndexes() 333 switch len(cgIndexes) { 334 case 0: 335 panic(fmt.Errorf("PCL %d without ConfigGroup index %s", clid, assumption)) 336 case 1: 337 // if either trigger is being purged, do not mark it as ready. 338 if triageCQTrigger { 339 info.deps = triageDeps(ctx, pcl, cgIndexes[0], pm) 340 switch { 341 case !info.deps.OK(): 342 info.addPurgeReason(info.pcl.Triggers.GetCqVoteTrigger(), info.deps.makePurgeReason()) 343 case len(info.deps.needToTrigger) > 0: 344 // no cqReady if it has deps that need to be triggered. 345 default: 346 info.cqReady = true 347 } 348 } 349 if triageNPRTrigger { 350 info.nprReady = true 351 } 352 default: 353 cgNames := make([]string, len(cgIndexes)) 354 for i, idx := range cgIndexes { 355 cgNames[i] = pm.ConfigGroup(idx).ID.Name() 356 } 357 var purgeTrigger *run.Trigger 358 switch { 359 case triageCQTrigger && triageNPRTrigger: 360 purgeTrigger = nil // purge whole CL 361 case triageCQTrigger: 362 purgeTrigger = pcl.GetTriggers().GetCqVoteTrigger() 363 case triageNPRTrigger: 364 purgeTrigger = pcl.GetTriggers().GetNewPatchsetRunTrigger() 365 } 366 info.addPurgeReason(purgeTrigger, &changelist.CLError{ 367 Kind: &changelist.CLError_WatchedByManyConfigGroups_{ 368 WatchedByManyConfigGroups: &changelist.CLError_WatchedByManyConfigGroups{ 369 ConfigGroups: cgNames, 370 }, 371 }, 372 }) 373 } 374 } 375 376 func (info *clInfo) hasIncompleteRun(m run.Mode) bool { 377 return info.runCountByMode[m] > 0 378 }