go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/run/impl/handler/common.go (about) 1 // Copyright 2021 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package handler 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 "strings" 22 23 "golang.org/x/sync/errgroup" 24 "google.golang.org/protobuf/types/known/timestamppb" 25 26 "go.chromium.org/luci/common/clock" 27 "go.chromium.org/luci/common/errors" 28 "go.chromium.org/luci/common/logging" 29 "go.chromium.org/luci/gae/filter/txndefer" 30 "go.chromium.org/luci/gae/service/datastore" 31 32 "go.chromium.org/luci/cv/internal/acls" 33 "go.chromium.org/luci/cv/internal/changelist" 34 "go.chromium.org/luci/cv/internal/common" 35 "go.chromium.org/luci/cv/internal/common/eventbox" 36 "go.chromium.org/luci/cv/internal/configs/prjcfg" 37 "go.chromium.org/luci/cv/internal/gerrit" 38 "go.chromium.org/luci/cv/internal/metrics" 39 "go.chromium.org/luci/cv/internal/rpc/versioning" 40 "go.chromium.org/luci/cv/internal/run" 41 "go.chromium.org/luci/cv/internal/run/impl/state" 42 ) 43 44 // endRun sets Run to the provided status and populates `EndTime`. 45 // 46 // Returns the side effect when Run is ended. 47 // 48 // Panics if the provided status is not ended status. 49 func (impl *Impl) endRun(ctx context.Context, rs *state.RunState, st run.Status, cg *prjcfg.ConfigGroup, childRuns []*run.Run) eventbox.SideEffectFn { 50 if !run.IsEnded(st) { 51 panic(fmt.Errorf("can't end run with non-final status %s", st)) 52 } 53 54 origSt := rs.Status 55 rs.Status = st 56 now := datastore.RoundTime(clock.Now(ctx).UTC()) 57 rs.EndTime = now 58 rs.LogEntries = append(rs.LogEntries, &run.LogEntry{ 59 Time: timestamppb.New(now), 60 Kind: &run.LogEntry_RunEnded_{ 61 RunEnded: &run.LogEntry_RunEnded{}, 62 }, 63 }) 64 for id, op := range rs.OngoingLongOps.GetOps() { 65 switch pa := op.GetExecutePostAction(); { 66 case pa != nil: 67 // Must be a bug. Non terminal Runs should never have ongoing 68 // PostAction(s). 69 logging.Errorf(ctx, "BUG: Run with status(%s) has ongoing PostActions (%s, %s)", origSt, id, pa.GetName()) 70 case !op.GetCancelRequested(): 71 logging.Warningf(ctx, "Requesting best-effort cancellation of long op %q %T", id, op.GetWork()) 72 op.CancelRequested = true 73 } 74 } 75 enqueueExecutePostActionTask(ctx, rs, cg) 76 77 return eventbox.Chain( 78 func(ctx context.Context) error { 79 return impl.removeRunFromCLs(ctx, rs.ID, rs.CLs) 80 }, 81 func(ctx context.Context) error { 82 txndefer.Defer(ctx, func(postTransCtx context.Context) { 83 logging.Infof(postTransCtx, "finalized Run with status %s", st) 84 }) 85 return impl.PM.NotifyRunFinished(ctx, rs.ID, rs.Status) 86 }, 87 func(ctx context.Context) error { 88 switch rs.Mode { 89 case run.NewPatchsetRun: 90 // Do not export NPRs. 91 return nil 92 default: 93 return impl.BQExporter.Schedule(ctx, rs.ID) 94 } 95 }, 96 func(ctx context.Context) error { 97 // If this Run is successfully ended (i.e. saved successfully to 98 // Datastore), the EVersion will be increased by 1 based on how 99 // eventbox works. If this eventbox behavior is changed in the future, 100 // this logic should be revisited. 101 return impl.Publisher.RunEnded(ctx, rs.ID, rs.Status, rs.EVersion+1) 102 }, 103 func(ctx context.Context) error { 104 txndefer.Defer(ctx, func(ctx context.Context) { 105 commonFields := []any{ 106 rs.ID.LUCIProject(), 107 rs.ConfigGroupID.Name(), 108 string(rs.Mode), 109 versioning.RunStatusV0(rs.Status).String(), // translate to public status 110 } 111 successfullyStarted := !rs.StartTime.IsZero() 112 startAwareFields := append(commonFields, successfullyStarted) 113 metrics.Public.RunEnded.Add(ctx, 1, startAwareFields...) 114 if successfullyStarted { 115 // Some run might not start successfully. E.g. user doesn't have the 116 // privilege to start the Run, those Runs will be created but ended 117 // by CV right away. Therefore, when the duration calculation (end- 118 // start) is not applicable for those Runs. 119 metrics.Public.RunDuration.Add(ctx, rs.EndTime.Sub(rs.StartTime).Seconds(), commonFields...) 120 } 121 metrics.Public.RunTotalDuration.Add(ctx, rs.EndTime.Sub(rs.CreateTime).Seconds(), startAwareFields...) 122 }) 123 return nil 124 }, 125 func(ctx context.Context) error { 126 for _, child := range childRuns { 127 if !run.IsEnded(child.Status) { 128 if err := impl.RM.NotifyParentRunCompleted(ctx, child.ID); err != nil { 129 return err 130 } 131 } 132 } 133 return nil 134 }, 135 ) 136 } 137 138 // removeRunFromCLs atomically updates state of CL entities involved in this 139 // Run. 140 // 141 // For each CL: 142 // - marks its Snapshot as outdated, which prevents Project Manager from 143 // operating on potentially outdated CL Snapshots; 144 // - schedules refresh of CL snapshot; 145 // - removes Run's ID from the list of CL's IncompleteRuns. 146 func (impl *Impl) removeRunFromCLs(ctx context.Context, runID common.RunID, clids common.CLIDs) error { 147 muts, err := impl.CLMutator.BeginBatch(ctx, runID.LUCIProject(), clids) 148 if err != nil { 149 return err 150 } 151 for _, mut := range muts { 152 mut.CL.IncompleteRuns.DelSorted(runID) 153 if mut.CL.Snapshot != nil { 154 mut.CL.Snapshot.Outdated = &changelist.Snapshot_Outdated{} 155 } 156 } 157 cls, err := impl.CLMutator.FinalizeBatch(ctx, muts) 158 if err != nil { 159 return err 160 } 161 return impl.CLUpdater.ScheduleBatch(ctx, runID.LUCIProject(), cls, changelist.UpdateCLTask_RUN_REMOVAL) 162 } 163 164 type reviewInputMeta struct { 165 // notify is whom to notify. 166 notify gerrit.Whoms 167 // message provides the reason and details of the review change performed. 168 // 169 // This is posted as a comment in the CL. 170 message string 171 // addToAttention is whom to add in the attention set. 172 addToAttention gerrit.Whoms 173 // reason explains the reason of the attention. 174 reason string 175 } 176 177 // scheduleTriggersReset enqueues a ResetTriggers long op for a given Run. 178 // 179 // No-op if trigger reset is already ongoing. 180 func scheduleTriggersReset(ctx context.Context, rs *state.RunState, metas map[common.CLID]reviewInputMeta, statusIfSucceeded run.Status) { 181 switch { 182 case !run.IsEnded(statusIfSucceeded): 183 panic(fmt.Errorf("expected a terminal status; got %s", statusIfSucceeded)) 184 case isCurrentlyResettingTriggers(rs): 185 return 186 } 187 reqs := make([]*run.OngoingLongOps_Op_ResetTriggers_Request, 0, len(rs.CLs)) 188 for clid, meta := range metas { 189 reqs = append(reqs, &run.OngoingLongOps_Op_ResetTriggers_Request{ 190 Clid: int64(clid), 191 Notify: meta.notify, 192 Message: meta.message, 193 AddToAttention: meta.addToAttention, 194 AddToAttentionReason: meta.reason, 195 }) 196 } 197 sort.Slice(reqs, func(i, j int) bool { return reqs[i].Clid < reqs[j].Clid }) 198 rs.EnqueueLongOp(&run.OngoingLongOps_Op{ 199 Deadline: timestamppb.New(clock.Now(ctx).Add(maxResetTriggersDuration)), 200 Work: &run.OngoingLongOps_Op_ResetTriggers_{ 201 ResetTriggers: &run.OngoingLongOps_Op_ResetTriggers{ 202 Requests: reqs, 203 RunStatusIfSucceeded: statusIfSucceeded, 204 }, 205 }, 206 }) 207 } 208 209 func isCurrentlyResettingTriggers(rs *state.RunState) bool { 210 for _, op := range rs.OngoingLongOps.GetOps() { 211 if op.GetResetTriggers() != nil { 212 return true 213 } 214 } 215 return false 216 } 217 218 func loadCLsAndConfig(ctx context.Context, rs *state.RunState, clids common.CLIDs) (*prjcfg.ConfigGroup, []*run.RunCL, []*changelist.CL, error) { 219 var cg *prjcfg.ConfigGroup 220 var runCLs []*run.RunCL 221 var cls []*changelist.CL 222 eg, ectx := errgroup.WithContext(ctx) 223 eg.Go(func() (err error) { 224 cg, err = prjcfg.GetConfigGroup(ectx, rs.ID.LUCIProject(), rs.ConfigGroupID) 225 return err 226 }) 227 eg.Go(func() (err error) { 228 cls, err = changelist.LoadCLsByIDs(ectx, clids) 229 return err 230 }) 231 eg.Go(func() (err error) { 232 runCLs, err = run.LoadRunCLs(ectx, rs.ID, clids) 233 return err 234 }) 235 if err := eg.Wait(); err != nil { 236 return nil, nil, nil, err 237 } 238 return cg, runCLs, cls, nil 239 } 240 241 func loadRunCLsAndCLs(ctx context.Context, rid common.RunID, clids common.CLIDs) ([]*run.RunCL, []*changelist.CL, error) { 242 var runCLs []*run.RunCL 243 var cls []*changelist.CL 244 eg, ectx := errgroup.WithContext(ctx) 245 eg.Go(func() (err error) { 246 cls, err = changelist.LoadCLsByIDs(ectx, clids) 247 return err 248 }) 249 eg.Go(func() (err error) { 250 runCLs, err = run.LoadRunCLs(ectx, rid, clids) 251 return err 252 }) 253 if err := eg.Wait(); err != nil { 254 return nil, nil, err 255 } 256 return runCLs, cls, nil 257 } 258 259 func checkRunCreate(ctx context.Context, rs *state.RunState, cg *prjcfg.ConfigGroup, runCLs []*run.RunCL, cls []*changelist.CL) (ok bool, err error) { 260 if len(runCLs) == 0 { 261 return true, nil 262 } 263 rootCL, rootTrigger := findRootCLAndTrigger(&rs.Run, cls, runCLs) 264 trs := make([]*run.Trigger, len(runCLs)) 265 for i, r := range runCLs { 266 trs[i] = r.Trigger 267 if rootTrigger != nil { 268 // always use root CL trigger if available for all CLs 269 trs[i] = rootTrigger 270 } 271 } 272 switch aclResult, err := acls.CheckRunCreate(ctx, cg, trs, cls); { 273 case err != nil: 274 return false, errors.Annotate(err, "acls.CheckRunCreate").Err() 275 case !aclResult.OK(): 276 var b strings.Builder 277 b.WriteString("the Run does not pass eligibility checks. See reasons at:") 278 if rootCL != nil { 279 fmt.Fprintf(&b, " %s", rootCL.ExternalID.MustURL()) 280 } else { 281 for cl := range aclResult { 282 fmt.Fprintf(&b, "\n * %s", cl.ExternalID.MustURL()) 283 } 284 } 285 rs.LogInfof(ctx, "Run failed", b.String()) 286 metas := computeMetasForFailedACLCheck(ctx, rs, aclResult, cls, rootCL) 287 scheduleTriggersReset(ctx, rs, metas, run.Status_FAILED) 288 return false, nil 289 } 290 return true, nil 291 } 292 293 func findRootCLAndTrigger(r *run.Run, cls []*changelist.CL, rcls []*run.RunCL) (*changelist.CL, *run.Trigger) { 294 if !r.HasRootCL() { 295 return nil, nil 296 } 297 for i, cl := range cls { 298 if cl.ID == r.RootCL { 299 trigger := rcls[i].Trigger 300 if trigger == nil { 301 panic(fmt.Errorf("root CL %d dones't have an active trigger", r.RootCL)) 302 } 303 return cl, trigger 304 } 305 } 306 panic(fmt.Errorf("can not find root CL %d in the provided CLs", r.RootCL)) 307 } 308 309 func computeMetasForFailedACLCheck(ctx context.Context, rs *state.RunState, aclResult acls.CheckResult, cls []*changelist.CL, rootCL *changelist.CL) map[common.CLID]reviewInputMeta { 310 metas := make(map[common.CLID]reviewInputMeta, len(cls)) 311 whoms := rs.Mode.GerritNotifyTargets() 312 switch { 313 case rs.Mode == run.NewPatchsetRun: 314 // don't need to notify any one if user is not eligible to create a 315 // a new patchset run. 316 for _, cl := range cls { 317 metas[cl.ID] = reviewInputMeta{} 318 } 319 case rs.HasRootCL() && aclResult.Has(rootCL) && len(aclResult) == 1: 320 // If only the root CL doesn't pass the eligibility check, simply relay 321 // the failure to the root CL. 322 metas[rs.RootCL] = reviewInputMeta{ 323 message: aclResult.Failure(rootCL), 324 notify: whoms, 325 addToAttention: whoms, 326 reason: "CQ/CV Run failed", 327 } 328 case rs.HasRootCL(): 329 metas[rs.RootCL] = reviewInputMeta{ 330 message: fmt.Sprintf("can not start the Run due to following errors\n\n%s", aclResult.FailuresSummary()), 331 notify: whoms, 332 addToAttention: whoms, 333 reason: "CQ/CV Run failed", 334 } 335 default: 336 for _, cl := range cls { 337 metas[cl.ID] = reviewInputMeta{ 338 message: aclResult.Failure(cl), 339 notify: gerrit.Whoms{gerrit.Whom_OWNER, gerrit.Whom_CQ_VOTERS}, 340 addToAttention: gerrit.Whoms{gerrit.Whom_OWNER, gerrit.Whom_CQ_VOTERS}, 341 reason: "CQ/CV Run failed", 342 } 343 } 344 } 345 return metas 346 }