go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/run/impl/handler/common.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package handler
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"strings"
    22  
    23  	"golang.org/x/sync/errgroup"
    24  	"google.golang.org/protobuf/types/known/timestamppb"
    25  
    26  	"go.chromium.org/luci/common/clock"
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/logging"
    29  	"go.chromium.org/luci/gae/filter/txndefer"
    30  	"go.chromium.org/luci/gae/service/datastore"
    31  
    32  	"go.chromium.org/luci/cv/internal/acls"
    33  	"go.chromium.org/luci/cv/internal/changelist"
    34  	"go.chromium.org/luci/cv/internal/common"
    35  	"go.chromium.org/luci/cv/internal/common/eventbox"
    36  	"go.chromium.org/luci/cv/internal/configs/prjcfg"
    37  	"go.chromium.org/luci/cv/internal/gerrit"
    38  	"go.chromium.org/luci/cv/internal/metrics"
    39  	"go.chromium.org/luci/cv/internal/rpc/versioning"
    40  	"go.chromium.org/luci/cv/internal/run"
    41  	"go.chromium.org/luci/cv/internal/run/impl/state"
    42  )
    43  
    44  // endRun sets Run to the provided status and populates `EndTime`.
    45  //
    46  // Returns the side effect when Run is ended.
    47  //
    48  // Panics if the provided status is not ended status.
    49  func (impl *Impl) endRun(ctx context.Context, rs *state.RunState, st run.Status, cg *prjcfg.ConfigGroup, childRuns []*run.Run) eventbox.SideEffectFn {
    50  	if !run.IsEnded(st) {
    51  		panic(fmt.Errorf("can't end run with non-final status %s", st))
    52  	}
    53  
    54  	origSt := rs.Status
    55  	rs.Status = st
    56  	now := datastore.RoundTime(clock.Now(ctx).UTC())
    57  	rs.EndTime = now
    58  	rs.LogEntries = append(rs.LogEntries, &run.LogEntry{
    59  		Time: timestamppb.New(now),
    60  		Kind: &run.LogEntry_RunEnded_{
    61  			RunEnded: &run.LogEntry_RunEnded{},
    62  		},
    63  	})
    64  	for id, op := range rs.OngoingLongOps.GetOps() {
    65  		switch pa := op.GetExecutePostAction(); {
    66  		case pa != nil:
    67  			// Must be a bug. Non terminal Runs should never have ongoing
    68  			// PostAction(s).
    69  			logging.Errorf(ctx, "BUG: Run with status(%s) has ongoing PostActions (%s, %s)", origSt, id, pa.GetName())
    70  		case !op.GetCancelRequested():
    71  			logging.Warningf(ctx, "Requesting best-effort cancellation of long op %q %T", id, op.GetWork())
    72  			op.CancelRequested = true
    73  		}
    74  	}
    75  	enqueueExecutePostActionTask(ctx, rs, cg)
    76  
    77  	return eventbox.Chain(
    78  		func(ctx context.Context) error {
    79  			return impl.removeRunFromCLs(ctx, rs.ID, rs.CLs)
    80  		},
    81  		func(ctx context.Context) error {
    82  			txndefer.Defer(ctx, func(postTransCtx context.Context) {
    83  				logging.Infof(postTransCtx, "finalized Run with status %s", st)
    84  			})
    85  			return impl.PM.NotifyRunFinished(ctx, rs.ID, rs.Status)
    86  		},
    87  		func(ctx context.Context) error {
    88  			switch rs.Mode {
    89  			case run.NewPatchsetRun:
    90  				// Do not export NPRs.
    91  				return nil
    92  			default:
    93  				return impl.BQExporter.Schedule(ctx, rs.ID)
    94  			}
    95  		},
    96  		func(ctx context.Context) error {
    97  			// If this Run is successfully ended (i.e. saved successfully to
    98  			// Datastore), the EVersion will be increased by 1 based on how
    99  			// eventbox works. If this eventbox behavior is changed in the future,
   100  			// this logic should be revisited.
   101  			return impl.Publisher.RunEnded(ctx, rs.ID, rs.Status, rs.EVersion+1)
   102  		},
   103  		func(ctx context.Context) error {
   104  			txndefer.Defer(ctx, func(ctx context.Context) {
   105  				commonFields := []any{
   106  					rs.ID.LUCIProject(),
   107  					rs.ConfigGroupID.Name(),
   108  					string(rs.Mode),
   109  					versioning.RunStatusV0(rs.Status).String(), // translate to public status
   110  				}
   111  				successfullyStarted := !rs.StartTime.IsZero()
   112  				startAwareFields := append(commonFields, successfullyStarted)
   113  				metrics.Public.RunEnded.Add(ctx, 1, startAwareFields...)
   114  				if successfullyStarted {
   115  					// Some run might not start successfully. E.g. user doesn't have the
   116  					// privilege to start the Run, those Runs will be created but ended
   117  					// by CV right away. Therefore, when the duration calculation (end-
   118  					// start) is not applicable for those Runs.
   119  					metrics.Public.RunDuration.Add(ctx, rs.EndTime.Sub(rs.StartTime).Seconds(), commonFields...)
   120  				}
   121  				metrics.Public.RunTotalDuration.Add(ctx, rs.EndTime.Sub(rs.CreateTime).Seconds(), startAwareFields...)
   122  			})
   123  			return nil
   124  		},
   125  		func(ctx context.Context) error {
   126  			for _, child := range childRuns {
   127  				if !run.IsEnded(child.Status) {
   128  					if err := impl.RM.NotifyParentRunCompleted(ctx, child.ID); err != nil {
   129  						return err
   130  					}
   131  				}
   132  			}
   133  			return nil
   134  		},
   135  	)
   136  }
   137  
   138  // removeRunFromCLs atomically updates state of CL entities involved in this
   139  // Run.
   140  //
   141  // For each CL:
   142  //   - marks its Snapshot as outdated, which prevents Project Manager from
   143  //     operating on potentially outdated CL Snapshots;
   144  //   - schedules refresh of CL snapshot;
   145  //   - removes Run's ID from the list of CL's IncompleteRuns.
   146  func (impl *Impl) removeRunFromCLs(ctx context.Context, runID common.RunID, clids common.CLIDs) error {
   147  	muts, err := impl.CLMutator.BeginBatch(ctx, runID.LUCIProject(), clids)
   148  	if err != nil {
   149  		return err
   150  	}
   151  	for _, mut := range muts {
   152  		mut.CL.IncompleteRuns.DelSorted(runID)
   153  		if mut.CL.Snapshot != nil {
   154  			mut.CL.Snapshot.Outdated = &changelist.Snapshot_Outdated{}
   155  		}
   156  	}
   157  	cls, err := impl.CLMutator.FinalizeBatch(ctx, muts)
   158  	if err != nil {
   159  		return err
   160  	}
   161  	return impl.CLUpdater.ScheduleBatch(ctx, runID.LUCIProject(), cls, changelist.UpdateCLTask_RUN_REMOVAL)
   162  }
   163  
   164  type reviewInputMeta struct {
   165  	// notify is whom to notify.
   166  	notify gerrit.Whoms
   167  	// message provides the reason and details of the review change performed.
   168  	//
   169  	// This is posted as a comment in the CL.
   170  	message string
   171  	// addToAttention is whom to add in the attention set.
   172  	addToAttention gerrit.Whoms
   173  	// reason explains the reason of the attention.
   174  	reason string
   175  }
   176  
   177  // scheduleTriggersReset enqueues a ResetTriggers long op for a given Run.
   178  //
   179  // No-op if trigger reset is already ongoing.
   180  func scheduleTriggersReset(ctx context.Context, rs *state.RunState, metas map[common.CLID]reviewInputMeta, statusIfSucceeded run.Status) {
   181  	switch {
   182  	case !run.IsEnded(statusIfSucceeded):
   183  		panic(fmt.Errorf("expected a terminal status; got %s", statusIfSucceeded))
   184  	case isCurrentlyResettingTriggers(rs):
   185  		return
   186  	}
   187  	reqs := make([]*run.OngoingLongOps_Op_ResetTriggers_Request, 0, len(rs.CLs))
   188  	for clid, meta := range metas {
   189  		reqs = append(reqs, &run.OngoingLongOps_Op_ResetTriggers_Request{
   190  			Clid:                 int64(clid),
   191  			Notify:               meta.notify,
   192  			Message:              meta.message,
   193  			AddToAttention:       meta.addToAttention,
   194  			AddToAttentionReason: meta.reason,
   195  		})
   196  	}
   197  	sort.Slice(reqs, func(i, j int) bool { return reqs[i].Clid < reqs[j].Clid })
   198  	rs.EnqueueLongOp(&run.OngoingLongOps_Op{
   199  		Deadline: timestamppb.New(clock.Now(ctx).Add(maxResetTriggersDuration)),
   200  		Work: &run.OngoingLongOps_Op_ResetTriggers_{
   201  			ResetTriggers: &run.OngoingLongOps_Op_ResetTriggers{
   202  				Requests:             reqs,
   203  				RunStatusIfSucceeded: statusIfSucceeded,
   204  			},
   205  		},
   206  	})
   207  }
   208  
   209  func isCurrentlyResettingTriggers(rs *state.RunState) bool {
   210  	for _, op := range rs.OngoingLongOps.GetOps() {
   211  		if op.GetResetTriggers() != nil {
   212  			return true
   213  		}
   214  	}
   215  	return false
   216  }
   217  
   218  func loadCLsAndConfig(ctx context.Context, rs *state.RunState, clids common.CLIDs) (*prjcfg.ConfigGroup, []*run.RunCL, []*changelist.CL, error) {
   219  	var cg *prjcfg.ConfigGroup
   220  	var runCLs []*run.RunCL
   221  	var cls []*changelist.CL
   222  	eg, ectx := errgroup.WithContext(ctx)
   223  	eg.Go(func() (err error) {
   224  		cg, err = prjcfg.GetConfigGroup(ectx, rs.ID.LUCIProject(), rs.ConfigGroupID)
   225  		return err
   226  	})
   227  	eg.Go(func() (err error) {
   228  		cls, err = changelist.LoadCLsByIDs(ectx, clids)
   229  		return err
   230  	})
   231  	eg.Go(func() (err error) {
   232  		runCLs, err = run.LoadRunCLs(ectx, rs.ID, clids)
   233  		return err
   234  	})
   235  	if err := eg.Wait(); err != nil {
   236  		return nil, nil, nil, err
   237  	}
   238  	return cg, runCLs, cls, nil
   239  }
   240  
   241  func loadRunCLsAndCLs(ctx context.Context, rid common.RunID, clids common.CLIDs) ([]*run.RunCL, []*changelist.CL, error) {
   242  	var runCLs []*run.RunCL
   243  	var cls []*changelist.CL
   244  	eg, ectx := errgroup.WithContext(ctx)
   245  	eg.Go(func() (err error) {
   246  		cls, err = changelist.LoadCLsByIDs(ectx, clids)
   247  		return err
   248  	})
   249  	eg.Go(func() (err error) {
   250  		runCLs, err = run.LoadRunCLs(ectx, rid, clids)
   251  		return err
   252  	})
   253  	if err := eg.Wait(); err != nil {
   254  		return nil, nil, err
   255  	}
   256  	return runCLs, cls, nil
   257  }
   258  
   259  func checkRunCreate(ctx context.Context, rs *state.RunState, cg *prjcfg.ConfigGroup, runCLs []*run.RunCL, cls []*changelist.CL) (ok bool, err error) {
   260  	if len(runCLs) == 0 {
   261  		return true, nil
   262  	}
   263  	rootCL, rootTrigger := findRootCLAndTrigger(&rs.Run, cls, runCLs)
   264  	trs := make([]*run.Trigger, len(runCLs))
   265  	for i, r := range runCLs {
   266  		trs[i] = r.Trigger
   267  		if rootTrigger != nil {
   268  			// always use root CL trigger if available for all CLs
   269  			trs[i] = rootTrigger
   270  		}
   271  	}
   272  	switch aclResult, err := acls.CheckRunCreate(ctx, cg, trs, cls); {
   273  	case err != nil:
   274  		return false, errors.Annotate(err, "acls.CheckRunCreate").Err()
   275  	case !aclResult.OK():
   276  		var b strings.Builder
   277  		b.WriteString("the Run does not pass eligibility checks. See reasons at:")
   278  		if rootCL != nil {
   279  			fmt.Fprintf(&b, " %s", rootCL.ExternalID.MustURL())
   280  		} else {
   281  			for cl := range aclResult {
   282  				fmt.Fprintf(&b, "\n  * %s", cl.ExternalID.MustURL())
   283  			}
   284  		}
   285  		rs.LogInfof(ctx, "Run failed", b.String())
   286  		metas := computeMetasForFailedACLCheck(ctx, rs, aclResult, cls, rootCL)
   287  		scheduleTriggersReset(ctx, rs, metas, run.Status_FAILED)
   288  		return false, nil
   289  	}
   290  	return true, nil
   291  }
   292  
   293  func findRootCLAndTrigger(r *run.Run, cls []*changelist.CL, rcls []*run.RunCL) (*changelist.CL, *run.Trigger) {
   294  	if !r.HasRootCL() {
   295  		return nil, nil
   296  	}
   297  	for i, cl := range cls {
   298  		if cl.ID == r.RootCL {
   299  			trigger := rcls[i].Trigger
   300  			if trigger == nil {
   301  				panic(fmt.Errorf("root CL %d dones't have an active trigger", r.RootCL))
   302  			}
   303  			return cl, trigger
   304  		}
   305  	}
   306  	panic(fmt.Errorf("can not find root CL %d in the provided CLs", r.RootCL))
   307  }
   308  
   309  func computeMetasForFailedACLCheck(ctx context.Context, rs *state.RunState, aclResult acls.CheckResult, cls []*changelist.CL, rootCL *changelist.CL) map[common.CLID]reviewInputMeta {
   310  	metas := make(map[common.CLID]reviewInputMeta, len(cls))
   311  	whoms := rs.Mode.GerritNotifyTargets()
   312  	switch {
   313  	case rs.Mode == run.NewPatchsetRun:
   314  		// don't need to notify any one if user is not eligible to create a
   315  		// a new patchset run.
   316  		for _, cl := range cls {
   317  			metas[cl.ID] = reviewInputMeta{}
   318  		}
   319  	case rs.HasRootCL() && aclResult.Has(rootCL) && len(aclResult) == 1:
   320  		// If only the root CL doesn't pass the eligibility check, simply relay
   321  		// the failure to the root CL.
   322  		metas[rs.RootCL] = reviewInputMeta{
   323  			message:        aclResult.Failure(rootCL),
   324  			notify:         whoms,
   325  			addToAttention: whoms,
   326  			reason:         "CQ/CV Run failed",
   327  		}
   328  	case rs.HasRootCL():
   329  		metas[rs.RootCL] = reviewInputMeta{
   330  			message:        fmt.Sprintf("can not start the Run due to following errors\n\n%s", aclResult.FailuresSummary()),
   331  			notify:         whoms,
   332  			addToAttention: whoms,
   333  			reason:         "CQ/CV Run failed",
   334  		}
   335  	default:
   336  		for _, cl := range cls {
   337  			metas[cl.ID] = reviewInputMeta{
   338  				message:        aclResult.Failure(cl),
   339  				notify:         gerrit.Whoms{gerrit.Whom_OWNER, gerrit.Whom_CQ_VOTERS},
   340  				addToAttention: gerrit.Whoms{gerrit.Whom_OWNER, gerrit.Whom_CQ_VOTERS},
   341  				reason:         "CQ/CV Run failed",
   342  			}
   343  		}
   344  	}
   345  	return metas
   346  }