go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/prjmanager/triager/runs.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package triager
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"strings"
    22  	"time"
    23  
    24  	"go.chromium.org/luci/auth/identity"
    25  	"go.chromium.org/luci/common/clock"
    26  	"go.chromium.org/luci/common/data/rand/mathrand"
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/logging"
    29  	"go.chromium.org/luci/common/retry/transient"
    30  	"go.chromium.org/luci/gae/service/datastore"
    31  
    32  	"go.chromium.org/luci/cv/internal/changelist"
    33  	"go.chromium.org/luci/cv/internal/common"
    34  	"go.chromium.org/luci/cv/internal/configs/prjcfg"
    35  	"go.chromium.org/luci/cv/internal/gerrit/trigger"
    36  	"go.chromium.org/luci/cv/internal/prjmanager/itriager"
    37  	"go.chromium.org/luci/cv/internal/prjmanager/prjpb"
    38  	"go.chromium.org/luci/cv/internal/run"
    39  	"go.chromium.org/luci/cv/internal/run/runcreator"
    40  )
    41  
    42  // stageNewRuns returns Run Creators for immediate Run creation or the earliest
    43  // time for the next Run to be created.
    44  //
    45  // Guarantees that returned Run Creators are CL-wise disjoint, and thus can be
    46  // created totally independently.
    47  //
    48  // In exceptional cases, also marks some CLs for purging if their trigger
    49  // matches the existing finalized Run.
    50  func stageNewRuns(ctx context.Context, c *prjpb.Component, cls map[int64]*clInfo, pm pmState) ([]*runcreator.Creator, time.Time, error) {
    51  	var next time.Time
    52  	var candidates []*runcreator.Creator
    53  
    54  	rs := runStage{
    55  		pm:         pm,
    56  		c:          c,
    57  		cls:        cls,
    58  		visitedCLs: make(map[int64]struct{}, len(cls)),
    59  	}
    60  	// For determinism, iterate in fixed order:
    61  	for _, clid := range c.GetClids() {
    62  		info := cls[clid]
    63  		switch rcs, nt, err := rs.stageNewRunsFrom(ctx, clid, info); {
    64  		case err != nil:
    65  			return nil, time.Time{}, err
    66  		case len(rcs) != 0:
    67  			candidates = append(candidates, rcs...)
    68  		default:
    69  			next = earliest(next, nt)
    70  		}
    71  	}
    72  	if len(candidates) == 0 {
    73  		return nil, next, nil
    74  	}
    75  	final := make([]*runcreator.Creator, 0, len(candidates))
    76  	for _, rc := range candidates {
    77  		if shouldCreateNow, depRuns := rs.resolveDepRuns(ctx, rc); shouldCreateNow {
    78  			rc.DepRuns = append(rc.DepRuns, depRuns...)
    79  			final = append(final, rc)
    80  		}
    81  	}
    82  	return final, next, nil
    83  }
    84  
    85  type runStage struct {
    86  	// immutable
    87  
    88  	pm  pmState
    89  	c   *prjpb.Component
    90  	cls map[int64]*clInfo
    91  
    92  	// mutable
    93  
    94  	// visitedCLs tracks CLs already considered. Ensures that 1 CL can appear in
    95  	// at most 1 new Run.
    96  	visitedCLs map[int64]struct{}
    97  	// cachedReverseDeps maps clid to clids of CLs which depend on it.
    98  	// lazily-initialized.
    99  	cachedReverseDeps map[int64][]int64
   100  }
   101  
   102  func (rs *runStage) stageNewRunsFrom(ctx context.Context, clid int64, info *clInfo) ([]*runcreator.Creator, time.Time, error) {
   103  	if !info.cqReady && !info.nprReady {
   104  		return nil, time.Time{}, nil
   105  	}
   106  	var runs []*runcreator.Creator
   107  	var retTime time.Time
   108  	if info.cqReady {
   109  		switch cqRuns, t, err := rs.stageNewCQVoteRunsFrom(ctx, clid, info); {
   110  		case err != nil:
   111  			return nil, time.Time{}, err
   112  		default:
   113  			runs = append(runs, cqRuns...)
   114  			retTime = earliest(t, retTime)
   115  		}
   116  	}
   117  	if info.nprReady {
   118  		switch nprRuns, t, err := rs.stageNewPatchsetRunsFrom(ctx, clid, info); {
   119  		case err != nil:
   120  			return nil, time.Time{}, err
   121  		default:
   122  			runs = append(runs, nprRuns...)
   123  			retTime = earliest(t, retTime)
   124  		}
   125  	}
   126  	return runs, retTime, nil
   127  }
   128  
   129  func (rs *runStage) stageNewPatchsetRunsFrom(ctx context.Context, clid int64, info *clInfo) ([]*runcreator.Creator, time.Time, error) {
   130  	cgIndex := info.pcl.GetConfigGroupIndexes()[0]
   131  	cg := rs.pm.ConfigGroup(cgIndex)
   132  
   133  	combo := &combo{}
   134  	combo.add(info, useNewPatchsetTrigger)
   135  
   136  	if runs := combo.overlappingRuns(); len(runs) > 0 {
   137  		for idx := range runs {
   138  			if rs.c.Pruns[idx].Mode == string(run.NewPatchsetRun) {
   139  				// A run exists with the same mode and CL, wait for it to end.
   140  				return nil, time.Time{}, nil
   141  			}
   142  		}
   143  	}
   144  
   145  	rc, err := rs.makeCreator(ctx, combo, cg, useNewPatchsetTrigger)
   146  	if err != nil {
   147  		return nil, time.Time{}, err
   148  	}
   149  
   150  	switch exists, nextCheck, err := checkExisting(ctx, rc, *combo, useNewPatchsetTrigger); {
   151  	case err != nil:
   152  		return nil, time.Time{}, err
   153  	case !exists:
   154  		return []*runcreator.Creator{rc}, time.Time{}, nil
   155  	default:
   156  		return nil, nextCheck, nil
   157  	}
   158  }
   159  
   160  func (rs *runStage) stageNewCQVoteRunsFrom(ctx context.Context, clid int64, info *clInfo) ([]*runcreator.Creator, time.Time, error) {
   161  	// Only start with ready CLs. Non-ready ones can't form new Runs anyway.
   162  
   163  	if !rs.markVisited(clid) {
   164  		return nil, time.Time{}, nil
   165  	}
   166  
   167  	combo := combo{}
   168  	combo.add(info, useCQVoteTrigger)
   169  
   170  	cgIndex := info.pcl.GetConfigGroupIndexes()[0]
   171  	cg := rs.pm.ConfigGroup(cgIndex)
   172  
   173  	if cg.Content.GetCombineCls() != nil {
   174  		// Maximize Run's CL # to include not only all reachable dependencies, but
   175  		// also reachable dependents, recursively.
   176  		rs.expandComboVisited(info, &combo)
   177  
   178  		// Shall the decision be delayed?
   179  		delay := cg.Content.GetCombineCls().GetStabilizationDelay().AsDuration()
   180  		if next := combo.maxTriggeredTime.Add(delay); next.After(clock.Now(ctx)) {
   181  			return nil, next, nil
   182  		}
   183  	}
   184  
   185  	if combo.withNotYetLoadedDeps != nil {
   186  		return rs.postponeDueToNotYetLoadedDeps(ctx, &combo)
   187  	}
   188  	if len(combo.notReady) > 0 {
   189  		return rs.postponeDueToNotReadyCLs(ctx, &combo)
   190  	}
   191  
   192  	// At this point all CLs in combo are stable, ready and with valid deps.
   193  	if cg.Content.GetCombineCls() != nil {
   194  		// For multi-CL runs, this means all non-submitted deps are already inside
   195  		// combo.
   196  		if missing := combo.missingDeps(); len(missing) > 0 {
   197  			panic(fmt.Errorf("%s has missing deps %s", combo, missing))
   198  		}
   199  	}
   200  	// Furthermore, since all CLs are ready and related, they must belong to
   201  	// the exact same config group as the initial CL.
   202  	if cgIndexes := combo.configGroupsIndexes(); len(cgIndexes) > 1 {
   203  		panic(fmt.Errorf("%s has >1 config groups: %v", combo, cgIndexes))
   204  	}
   205  
   206  	// Check whether combo overlaps with any existing Runs.
   207  	// TODO(tandrii): support >1 concurrent Run on the same CL(s).
   208  	if runs := combo.overlappingRuns(); len(runs) > 0 {
   209  		for runIndex, sharedCLsCount := range runs {
   210  			prun := rs.c.GetPruns()[runIndex]
   211  			if run.Mode(prun.Mode) == run.NewPatchsetRun {
   212  				continue
   213  			}
   214  
   215  			switch l := len(prun.GetClids()); {
   216  			case l < sharedCLsCount:
   217  				panic("impossible")
   218  			case l > sharedCLsCount:
   219  				// Run's scope is larger or different than this combo. Run Manager will
   220  				// soon be finalizing the Run as not all of its CLs are triggered.
   221  				//
   222  				// This may happen in a many cases of multi-CL Runs, for example:
   223  				//  * during submitted: some CLs have already been submitted;
   224  				//  * during cancellation: some CLs' votes have already been removed;
   225  				//  * a newly ingested LUCI project config splits Run across multiple
   226  				//    ConfigGroups or even makes one CL unwatched by the project.
   227  				return rs.postponeDueToExistingRunDiffScope(ctx, &combo, prun)
   228  			case sharedCLsCount == len(combo.all):
   229  				// The combo scope is exactly the same as Run. This is the most likely
   230  				// situation -- there is nothing for PM to do but wait.
   231  				// Note, that it's possible that Run's mode is different from the combo,
   232  				// in which case Run Manager will be finalizing the Run soon, so wait
   233  				// for notification from Run Manager anyway.
   234  				return nil, time.Time{}, nil
   235  			case sharedCLsCount > len(combo.all):
   236  				panic("impossible")
   237  			default:
   238  				// The combo scope is larger than this Run.
   239  				//
   240  				// CQDaemon in this case aborts existing Run **without** removing the
   241  				// triggering CQ votes, and then immediately starts working on the
   242  				// larger-scoped Run. This isn't what user usually want though, as this
   243  				// usually means re-running all the tryjobs from scratch.
   244  				//
   245  				// TODO(tandrii): decide if it's OK to just purge a CL which isn't in
   246  				// active Run once CV is in charge AND just waiting if CV isn't in
   247  				// charge. This is definitely easier to implement.
   248  				// However, the problem with this potential approach is that if user
   249  				// really wants to stop existing Run of N CLs and start a larger Run on
   250  				// N+1 CLs instead, then user has to first remove all existing CQ votes,
   251  				// and then re-vote on all CLs from scratch. Worse, during the removal,
   252  				// CV/CQDaemon may temporarily see CQ votes on K < N CLs, and since
   253  				// these CQ votes are >> stabilization delay, CV/CQDaemon will happily
   254  				// start a spurious Run on K CLs, and even potentially trigger redundant
   255  				// tryjobs, which won't even be cancelled. Grrr.
   256  				// TODO(tandrii): alternatively, consider canceling the existing Run,
   257  				// similar to CQDaemon.
   258  				return rs.postponeExpandingExistingRunScope(ctx, &combo, prun)
   259  			}
   260  		}
   261  		// It is normal that a CQ Run "overlaps" a New Patchset Run, treat it as
   262  		// if there were no overlap.
   263  	}
   264  
   265  	rc, err := rs.makeCreator(ctx, &combo, cg, useCQVoteTrigger)
   266  	if err != nil {
   267  		return nil, time.Time{}, err
   268  	}
   269  
   270  	switch exists, nextCheck, err := checkExisting(ctx, rc, combo, useCQVoteTrigger); {
   271  	case err != nil:
   272  		return nil, time.Time{}, err
   273  	case !exists:
   274  		return []*runcreator.Creator{rc}, time.Time{}, nil
   275  	default:
   276  		return nil, nextCheck, nil
   277  	}
   278  }
   279  
   280  func (rs *runStage) reverseDeps() map[int64][]int64 {
   281  	if rs.cachedReverseDeps != nil {
   282  		return rs.cachedReverseDeps
   283  	}
   284  	rs.cachedReverseDeps = map[int64][]int64{}
   285  	for clid, info := range rs.cls {
   286  		if info.deps == nil {
   287  			// CL is or will be purged, so its deps weren't even triaged.
   288  			continue
   289  		}
   290  		info.deps.iterateNotSubmitted(info.pcl, func(dep *changelist.Dep) {
   291  			did := dep.GetClid()
   292  			rs.cachedReverseDeps[did] = append(rs.cachedReverseDeps[did], clid)
   293  		})
   294  	}
   295  	return rs.cachedReverseDeps
   296  }
   297  
   298  func (rs *runStage) expandComboVisited(info *clInfo, result *combo) {
   299  	if info.deps != nil {
   300  		info.deps.iterateNotSubmitted(info.pcl, func(dep *changelist.Dep) {
   301  			rs.expandCombo(dep.GetClid(), result)
   302  		})
   303  	}
   304  	for _, clid := range rs.reverseDeps()[info.pcl.GetClid()] {
   305  		rs.expandCombo(clid, result)
   306  	}
   307  }
   308  
   309  func (rs *runStage) expandCombo(clid int64, result *combo) {
   310  	info := rs.cls[clid]
   311  	if info == nil {
   312  		// Can only happen if clid is a dep that's not yet loaded (otherwise dep
   313  		// would be in this component, and hence info would be set).
   314  		return
   315  	}
   316  	if !rs.markVisited(clid) {
   317  		return
   318  	}
   319  	result.add(info, useCQVoteTrigger)
   320  	rs.expandComboVisited(info, result)
   321  }
   322  
   323  func (rs *runStage) postponeDueToNotYetLoadedDeps(ctx context.Context, combo *combo) ([]*runcreator.Creator, time.Time, error) {
   324  	// TODO(crbug/1211576): this waiting can last forever. Component needs to
   325  	// record how long it has been waiting and abort with clear message to the
   326  	// user.
   327  	logging.Warningf(ctx, "%s waits for not yet loaded deps", combo)
   328  	return nil, time.Time{}, nil
   329  }
   330  
   331  func (rs *runStage) postponeDueToNotReadyCLs(ctx context.Context, combo *combo) ([]*runcreator.Creator, time.Time, error) {
   332  	// TODO(crbug/1211576): for safety, this should not wait forever.
   333  	logging.Warningf(ctx, "%s waits for not yet ready CLs", combo)
   334  	return nil, time.Time{}, nil
   335  }
   336  
   337  func (rs *runStage) postponeDueToExistingRunDiffScope(ctx context.Context, combo *combo, r *prjpb.PRun) ([]*runcreator.Creator, time.Time, error) {
   338  	// TODO(crbug/1211576): for safety, this should not wait forever.
   339  	logging.Warningf(ctx, "%s is waiting for a differently scoped run %q to finish", combo, r.GetId())
   340  	return nil, time.Time{}, nil
   341  }
   342  
   343  func (rs *runStage) postponeExpandingExistingRunScope(ctx context.Context, combo *combo, r *prjpb.PRun) ([]*runcreator.Creator, time.Time, error) {
   344  	// TODO(crbug/1211576): for safety, this should not wait forever.
   345  	logging.Warningf(ctx, "%s is waiting for smaller scoped run %q to finish", combo, r.GetId())
   346  	return nil, time.Time{}, nil
   347  }
   348  
   349  func (rs *runStage) makeCreator(ctx context.Context, combo *combo, cg *prjcfg.ConfigGroup, chooseTrigger func(ts *run.Triggers) *run.Trigger) (*runcreator.Creator, error) {
   350  	latestIndex := -1
   351  	cls := make([]*changelist.CL, len(combo.all))
   352  	for i, info := range combo.all {
   353  		cls[i] = &changelist.CL{ID: common.CLID(info.pcl.GetClid())}
   354  		if info == combo.latestTriggered {
   355  			latestIndex = i
   356  		}
   357  	}
   358  	if err := datastore.Get(ctx, cls); err != nil {
   359  		// Even if one of errors is ErrEntityNotFound, this is a temporary situation as
   360  		// such CL(s) should be removed from PM state soon.
   361  		return nil, errors.Annotate(err, "failed to load CLs").Tag(transient.Tag).Err()
   362  	}
   363  
   364  	// Run's owner is whoever owns the latest triggered CL.
   365  	// It's guaranteed to be set because otherwise CL would have been sent for
   366  	// purging and not marked as ready.
   367  	owner, err := cls[latestIndex].Snapshot.OwnerIdentity()
   368  	if err != nil {
   369  		return nil, errors.Annotate(err, "failed to get OwnerIdentity of %d", cls[latestIndex].ID).Err()
   370  	}
   371  
   372  	bcls := make([]runcreator.CL, len(cls))
   373  	var opts *run.Options
   374  	var incompleteRuns common.RunIDs
   375  	for i, cl := range cls {
   376  		for _, ri := range combo.all[i].runIndexes {
   377  			incompleteRuns = append(incompleteRuns, common.RunID(rs.c.Pruns[ri].Id))
   378  		}
   379  		pcl := combo.all[i].pcl
   380  		exp, act := pcl.GetEversion(), cl.EVersion
   381  		if exp != act {
   382  			return nil, errors.Annotate(itriager.ErrOutdatedPMState, "CL %d EVersion changed %d => %d", cl.ID, exp, act).Err()
   383  		}
   384  		opts = run.MergeOptions(opts, run.ExtractOptions(cl.Snapshot))
   385  
   386  		// Restore email, which Project Manager doesn't track inside PCLs.
   387  		tr := chooseTrigger(trigger.Find(&trigger.FindInput{
   388  			ChangeInfo:                   cl.Snapshot.GetGerrit().GetInfo(),
   389  			ConfigGroup:                  cg.Content,
   390  			TriggerNewPatchsetRunAfterPS: cl.TriggerNewPatchsetRunAfterPS,
   391  		}))
   392  		pclT := chooseTrigger(pcl.GetTriggers())
   393  		if tr.GetMode() != pclT.GetMode() {
   394  			panic(fmt.Errorf("inconsistent Trigger in PM (%s) vs freshly extracted (%s)", pclT, tr))
   395  		}
   396  
   397  		bcls[i] = runcreator.CL{
   398  			ID:               common.CLID(pcl.GetClid()),
   399  			ExpectedEVersion: pcl.GetEversion(),
   400  			TriggerInfo:      tr,
   401  			Snapshot:         cl.Snapshot,
   402  		}
   403  	}
   404  	t := chooseTrigger(combo.latestTriggered.pcl.GetTriggers())
   405  	triggererIdentity, err := identity.MakeIdentity(fmt.Sprintf("%s:%s", identity.User, t.GetEmail()))
   406  	if err != nil {
   407  		return nil, errors.Annotate(err, "failed to construct triggerer identity of %s", t.GetEmail()).Err()
   408  	}
   409  	sort.Sort(incompleteRuns)
   410  	payer := quotaPayer(cls[latestIndex], owner, triggererIdentity, t)
   411  	return &runcreator.Creator{
   412  		ConfigGroupID:            cg.ID,
   413  		LUCIProject:              cg.ProjectString(),
   414  		Mode:                     run.Mode(t.GetMode()),
   415  		ModeDefinition:           t.GetModeDefinition(),
   416  		CreateTime:               t.GetTime().AsTime(),
   417  		Owner:                    owner,
   418  		CreatedBy:                triggererIdentity,
   419  		BilledTo:                 payer,
   420  		Options:                  opts,
   421  		ExpectedIncompleteRunIDs: incompleteRuns,
   422  		OperationID:              fmt.Sprintf("PM-%d", mathrand.Int63(ctx)),
   423  		InputCLs:                 bcls,
   424  	}, nil
   425  }
   426  
   427  // markVisited makes CL visited if not already and returns if action was taken.
   428  func (rs *runStage) markVisited(clid int64) bool {
   429  	if _, visited := rs.visitedCLs[clid]; visited {
   430  		return false
   431  	}
   432  	rs.visitedCLs[clid] = struct{}{}
   433  	return true
   434  }
   435  
   436  // combo is a set of related CLs that will together form a new Run.
   437  //
   438  // The CLs in a combo are a subset of those from the component.
   439  type combo struct {
   440  	all                  []*clInfo
   441  	clids                map[int64]struct{}
   442  	notReady             []*clInfo
   443  	withNotYetLoadedDeps *clInfo // nil if none; any one otherwise.
   444  	latestTriggered      *clInfo
   445  	latestTrigger        *run.Trigger
   446  	maxTriggeredTime     time.Time
   447  }
   448  
   449  func (c combo) String() string {
   450  	sb := strings.Builder{}
   451  	sb.WriteString("combo(CLIDs: [")
   452  	for _, a := range c.all {
   453  		fmt.Fprintf(&sb, "%d ", a.pcl.GetClid())
   454  	}
   455  	sb.WriteRune(']')
   456  	if len(c.notReady) > 0 {
   457  		sb.WriteString(" notReady=[")
   458  		for _, a := range c.notReady {
   459  			fmt.Fprintf(&sb, "%d ", a.pcl.GetClid())
   460  		}
   461  		sb.WriteRune(']')
   462  	}
   463  	if c.withNotYetLoadedDeps != nil {
   464  		fmt.Fprintf(&sb, " notYetLoadedDeps of %d [", c.withNotYetLoadedDeps.pcl.GetClid())
   465  		for _, d := range c.withNotYetLoadedDeps.deps.notYetLoaded {
   466  			fmt.Fprintf(&sb, "%d ", d.GetClid())
   467  		}
   468  		sb.WriteRune(']')
   469  	}
   470  	if c.latestTriggered != nil {
   471  		t := c.latestTrigger
   472  		fmt.Fprintf(&sb, " latestTriggered=%d at %s", c.latestTriggered.pcl.GetClid(), t.GetTime().AsTime())
   473  	}
   474  	sb.WriteRune(')')
   475  	return sb.String()
   476  }
   477  
   478  func (c *combo) add(info *clInfo, chooseTrigger func(ts *run.Triggers) *run.Trigger) {
   479  	c.all = append(c.all, info)
   480  	if c.clids == nil {
   481  		c.clids = map[int64]struct{}{info.pcl.GetClid(): {}}
   482  	} else {
   483  		c.clids[info.pcl.GetClid()] = struct{}{}
   484  	}
   485  
   486  	if !info.cqReady {
   487  		c.notReady = append(c.notReady, info)
   488  	}
   489  
   490  	if info.deps != nil && len(info.deps.notYetLoaded) > 0 {
   491  		c.withNotYetLoadedDeps = info
   492  	}
   493  	trig := chooseTrigger(info.pcl.GetTriggers())
   494  	if pb := trig.GetTime(); pb != nil {
   495  		t := pb.AsTime()
   496  		if c.maxTriggeredTime.IsZero() || t.After(c.maxTriggeredTime) {
   497  			c.maxTriggeredTime = t
   498  			c.latestTriggered = info
   499  			c.latestTrigger = trig
   500  		}
   501  	}
   502  }
   503  
   504  func (c *combo) missingDeps() []*changelist.Dep {
   505  	var missing []*changelist.Dep
   506  	for _, info := range c.all {
   507  		info.deps.iterateNotSubmitted(info.pcl, func(dep *changelist.Dep) {
   508  			if _, in := c.clids[dep.GetClid()]; !in {
   509  				missing = append(missing, dep)
   510  			}
   511  		})
   512  	}
   513  	return missing
   514  }
   515  
   516  func (c *combo) configGroupsIndexes() []int32 {
   517  	res := make([]int32, 0, 1)
   518  	for _, info := range c.all {
   519  		idx := info.pcl.GetConfigGroupIndexes()[0]
   520  		found := false
   521  		for _, v := range res {
   522  			if v == idx {
   523  				found = true
   524  			}
   525  		}
   526  		if !found {
   527  			res = append(res, idx)
   528  		}
   529  	}
   530  	return res
   531  }
   532  
   533  // overlappingRuns returns number of CLs shared with each Run identified by its
   534  // index.
   535  func (c *combo) overlappingRuns() map[int32]int {
   536  	res := map[int32]int{}
   537  	for _, info := range c.all {
   538  		for _, index := range info.runIndexes {
   539  			res[index]++
   540  		}
   541  	}
   542  	return res
   543  }
   544  
   545  // checkExisting checks whether the run to create already exists.
   546  //
   547  // if it does exist, it decides when to triage again.
   548  func checkExisting(ctx context.Context, rc *runcreator.Creator, combo combo, useTrigger func(*run.Triggers) *run.Trigger) (bool, time.Time, error) {
   549  	// Check if Run about to be created already exists in order to detect avoid
   550  	// infinite retries if CL triggers are somehow re-used.
   551  	existing := run.Run{ID: rc.ExpectedRunID()}
   552  	switch err := datastore.Get(ctx, &existing); {
   553  	case err == datastore.ErrNoSuchEntity:
   554  		// This is the expected case.
   555  		// NOTE: actual creation may still fail due to a race, and that's fine.
   556  		return false, time.Time{}, nil
   557  	case err != nil:
   558  		return false, time.Time{}, errors.Annotate(err, "failed to check for existing Run %q", existing.ID).Tag(transient.Tag).Err()
   559  	case !run.IsEnded(existing.Status):
   560  		// The Run already exists. Most likely another triager called from another
   561  		// TQ was first. Check again in a few seconds, at which point PM should
   562  		// incorporate existing Run into its state.
   563  		logging.Warningf(ctx, "Run %q already exists. If this warning persists, there is a bug in PM which appears to not see this Run", existing.ID)
   564  		return true, clock.Now(ctx).Add(5 * time.Second), nil
   565  	default:
   566  		since := clock.Since(ctx, existing.EndTime)
   567  		if since < time.Minute {
   568  			logging.Warningf(ctx, "Recently finalized Run %q already exists, will check later", existing.ID)
   569  			return true, existing.EndTime.Add(time.Minute), nil
   570  		}
   571  		logging.Warningf(ctx, "Run %q already exists, finalized %s ago; will purge CLs with reused triggers", existing.ID, since)
   572  		for _, info := range combo.all {
   573  			info.addPurgeReason(useTrigger(info.pcl.Triggers), &changelist.CLError{
   574  				Kind: &changelist.CLError_ReusedTrigger_{
   575  					ReusedTrigger: &changelist.CLError_ReusedTrigger{
   576  						Run: string(existing.ID),
   577  					},
   578  				},
   579  			})
   580  		}
   581  		return true, time.Time{}, nil
   582  	}
   583  }
   584  
   585  func useNewPatchsetTrigger(ts *run.Triggers) *run.Trigger {
   586  	return ts.GetNewPatchsetRunTrigger()
   587  }
   588  
   589  func useCQVoteTrigger(ts *run.Triggers) *run.Trigger {
   590  	return ts.GetCqVoteTrigger()
   591  }
   592  
   593  func (rs *runStage) findImmediateHardDeps(pcl *prjpb.PCL) []int64 {
   594  	// TODO: use Snapshot.GitDeps.Immediate to find the immediate hard deps.
   595  	candidates := make(map[int64]struct{})
   596  	notCandidates := make(map[int64]struct{})
   597  
   598  	// Iterate the deps in reverse, because child CLs have higher CLIDs than
   599  	// its deps in most cases (but not guaranteed)
   600  	for i := len(pcl.GetDeps()) - 1; i >= 0; i-- {
   601  		dep := pcl.GetDeps()[i]
   602  		if _, exist := notCandidates[dep.GetClid()]; exist {
   603  			continue
   604  		}
   605  		if dep.GetKind() == changelist.DepKind_HARD {
   606  			candidates[dep.GetClid()] = struct{}{}
   607  
   608  			var dpcl *prjpb.PCL
   609  			switch info, exist := rs.cls[dep.GetClid()]; {
   610  			case exist:
   611  				dpcl = info.pcl
   612  			default:
   613  				// Panic if the dep PCL is unknown.
   614  				//
   615  				// If a dep is unknown, the dep triager shouldn't mark the CL
   616  				// as cqReady. If it did, there must be a bug.
   617  				dpcl = rs.pm.MustPCL(dep.GetClid())
   618  			}
   619  			// none of its deps can be a candidate.
   620  			for _, depdep := range dpcl.GetDeps() {
   621  				notCandidates[depdep.GetClid()] = struct{}{}
   622  				delete(candidates, depdep.GetClid())
   623  			}
   624  		}
   625  	}
   626  
   627  	var clids []int64
   628  	if len(candidates) > 0 {
   629  		clids = make([]int64, 0, len(candidates))
   630  		for clid := range candidates {
   631  			clids = append(clids, clid)
   632  		}
   633  	}
   634  	sort.Slice(clids, func(i, j int) bool {
   635  		return clids[i] < clids[j]
   636  	})
   637  	return clids
   638  }
   639  
   640  func (rs *runStage) resolveDepRuns(ctx context.Context, rc *runcreator.Creator) (shouldCreateNow bool, depRuns common.RunIDs) {
   641  	info, ok := rs.cls[int64(rc.InputCLs[0].ID)]
   642  	if !ok {
   643  		panic(fmt.Errorf("resolveDepRuns: rc has a CL %d, not tracked in the component",
   644  			rc.InputCLs[0].ID))
   645  	}
   646  	ctx = logging.SetField(ctx, "cl", info.pcl.GetClid())
   647  
   648  	var ideps []int64
   649  	switch cg := rs.pm.ConfigGroup(info.pcl.GetConfigGroupIndexes()[0]); {
   650  	case !common.IsMCEDogfooder(ctx, rc.CreatedBy),
   651  		rc.Mode != run.Mode(run.FullRun),
   652  		cg.Content.GetCombineCls() != nil:
   653  		// The CL is cqReady. Otherwise, `rc` wouldn't be created.
   654  		// If the triggerer is not a dogfooder, return true to let it go.
   655  		return true, nil
   656  	default:
   657  		ideps = rs.findImmediateHardDeps(info.pcl)
   658  		if len(ideps) == 0 {
   659  			logging.Debugf(ctx, "resolvDepRuns: no immediate hard deps found")
   660  			return true, nil
   661  		}
   662  		logging.Debugf(ctx, "resolveDepRuns: immediate hard-deps %v", ideps)
   663  	}
   664  
   665  	shouldCreateNow = true // be optimistic
   666  	for _, idep := range ideps {
   667  		switch dPCL := rs.pm.MustPCL(idep); {
   668  		case dPCL.GetSubmitted():
   669  			continue
   670  		case dPCL.GetStatus() != prjpb.PCL_OK:
   671  			// If the dep status is not PCL_OK, the dep triager should have put
   672  			// it in notYetLoaded or invalidDeps.Unwatched.
   673  			//
   674  			// Therefore, the origin CL must not be cqReady and
   675  			// runcreator.Creator() should have not been created.
   676  			panic(fmt.Errorf("resolveDepRuns: depCL %d has status %q", dPCL.GetClid(), dPCL.GetStatus()))
   677  		}
   678  		switch dinfo, ok := rs.cls[idep]; {
   679  		case !ok:
   680  			// The dep is not in the same component.
   681  			// All hard-deps are supposed to be tracked in the same component,
   682  			// this could still check the other component and the run status.
   683  			//
   684  			// Let's ignore now. The submission will be rejected at the worst
   685  			// case.
   686  			// TODO(ddoman): check if there is a Run triggered from
   687  			// the dep's CQ vote, either completed or not.
   688  			logging.Errorf(ctx, "resolveDepRuns: a HARD dep CL %d is not tracked in the same component", idep)
   689  			continue
   690  		case dinfo.runCountByMode[run.FullRun] > 0:
   691  			for _, idx := range dinfo.runIndexes {
   692  				if prun := rs.c.GetPruns()[idx]; prun.GetMode() == string(run.FullRun) {
   693  					depRuns = append(depRuns, common.RunID(prun.GetId()))
   694  				}
   695  			}
   696  		default:
   697  			shouldCreateNow = false
   698  		}
   699  	}
   700  	return shouldCreateNow, depRuns
   701  }
   702  
   703  func quotaPayer(cl *changelist.CL, owner, triggerer identity.Identity, t *run.Trigger) identity.Identity {
   704  	switch owner {
   705  	case "":
   706  		panic(fmt.Errorf("CL %d: empty owner was given: %q", cl.ID, owner))
   707  	case identity.AnonymousIdentity:
   708  		panic(fmt.Errorf("CL %d: the CL owner is anonymous", cl.ID))
   709  	}
   710  	mode := run.Mode(t.GetMode())
   711  	cqv := t.GetModeDefinition().GetCqLabelValue()
   712  	switch {
   713  	case mode != run.FullRun && cqv != trigger.CQVoteByMode(run.FullRun):
   714  		return triggerer
   715  	case trigger.HasAutoSubmit(cl.Snapshot.GetGerrit().GetInfo()):
   716  		return owner
   717  	default:
   718  		return triggerer
   719  	}
   720  }