go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/prjmanager/triager/cls.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package triager
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"time"
    21  
    22  	"go.chromium.org/luci/common/logging"
    23  	"go.chromium.org/luci/cv/internal/changelist"
    24  	"go.chromium.org/luci/cv/internal/prjmanager/prjpb"
    25  	"go.chromium.org/luci/cv/internal/run"
    26  )
    27  
    28  // triageCLs decides whether individual CLs ought to be acted upon.
    29  func triageCLs(ctx context.Context, c *prjpb.Component, pm pmState) map[int64]*clInfo {
    30  	cls := make(map[int64]*clInfo, len(c.GetClids()))
    31  	for _, clid := range c.GetClids() {
    32  		cls[clid] = &clInfo{
    33  			pcl:              pm.MustPCL(clid),
    34  			purgingCL:        pm.PurgingCL(clid),        // may be nil
    35  			triggeringCLDeps: pm.TriggeringCLDeps(clid), // may be nil
    36  			runCountByMode:   make(map[run.Mode]int),
    37  		}
    38  	}
    39  	for index, r := range c.GetPruns() {
    40  		for _, clid := range r.GetClids() {
    41  			info := cls[clid]
    42  			info.runIndexes = append(info.runIndexes, int32(index))
    43  			info.runCountByMode[run.Mode(r.GetMode())]++
    44  		}
    45  	}
    46  	for _, info := range cls {
    47  		info.triage(ctx, c, pm)
    48  	}
    49  	for clid, info := range cls {
    50  		// Say the following events happens in sequence.
    51  		// 1. there are CL1(parent) and CL2(child).
    52  		// 2. CQ+2 is triggered on CL2, and TriggeringCLDeps is created.
    53  		// 3. cltriggerer voted CL1 and CL2 in parallel.
    54  		// 4. CLUpdated event is delivered for CL1 only.
    55  		// 5. Triager created a Run for CL1.
    56  		// 6. For some reasons, *before* PM receives a CLUpdated event for CL2,
    57  		//    - PM receives a CLUpdated event,
    58  		//    - triager created a run for CL1,
    59  		//    - the run ended
    60  		// 7. PM receives a CLUpdated event for CL2.
    61  		//
    62  		// At (7), CL1 has CQ=0 and CL2 has CQ+2.
    63  		// there is no easy way for triager to find the reason of CL1 not having
    64  		// CQ+2. Hence, it will create a new TriggeringCLDeps{} to vote on CL1
    65  		// again, of which run just failed.
    66  		//
    67  		// To prevent this, the below marks deps as not-cq-ready if there is
    68  		// an inflight TriggeringCLDeps{} referencing the CL as a dep.
    69  		// i.e., triager starts creating Runs for a stack of CLs, only if
    70  		// the entire stack is ready.
    71  		for _, depCLID := range info.triggeringCLDeps.GetDepClids() {
    72  			ctx = logging.SetField(ctx, "origin_cl", clid)
    73  			info.cqReady = false
    74  			if di, ok := cls[depCLID]; ok {
    75  				di.cqReady = false
    76  			}
    77  		}
    78  	}
    79  	return cls
    80  }
    81  
    82  // clInfo represents a CL in the PM component of CLs.
    83  type clInfo struct {
    84  	pcl *prjpb.PCL
    85  	// runIndexes are indexes of Component.PRuns which references this CL.
    86  	runIndexes []int32
    87  	// runCountByMode is # of Component.PRuns, referencing this CL,
    88  	// by the Run mode.
    89  	runCountByMode map[run.Mode]int
    90  
    91  	// purgingCL is set if CL is already being purged.
    92  	purgingCL *prjpb.PurgingCL
    93  	// triggeringCLDeps is set if the deps of the CL is being triggered.
    94  	triggeringCLDeps *prjpb.TriggeringCLDeps
    95  
    96  	triagedCL
    97  }
    98  
    99  // lastCQVoteTriggered returns the last triggered time by CQ vote among this CL
   100  // and its triggered deps. Can be zero time.Time if neither are triggered.
   101  func (info *clInfo) lastCQVoteTriggered() time.Time {
   102  	t := info.pcl.GetTriggers().GetCqVoteTrigger()
   103  	thisPB := t.GetTime()
   104  	switch {
   105  	case thisPB == nil && info.deps == nil:
   106  		return time.Time{}
   107  	case thisPB == nil:
   108  		return info.deps.lastCQVoteTriggered
   109  	case info.deps == nil || info.deps.lastCQVoteTriggered.IsZero():
   110  		return thisPB.AsTime()
   111  	default:
   112  		this := thisPB.AsTime()
   113  		if info.deps.lastCQVoteTriggered.Before(this) {
   114  			return this
   115  		}
   116  		return info.deps.lastCQVoteTriggered
   117  	}
   118  }
   119  
   120  // triagedCL is the result of CL triage (see clInfo.triage()).
   121  //
   122  // Note: This doesn't take into account `combine_cls.stabilization_delay`,
   123  // thus a CL may be ready or with purgeReason, but due to stabilization delay,
   124  // it shouldn't be acted upon *yet*.
   125  type triagedCL struct {
   126  	// deps are triaged deps, set only if CL is watched by exactly 1 config group.
   127  	// of the current project.
   128  	deps *triagedDeps
   129  	// purgeReasons is set if the CL ought to be purged.
   130  	//
   131  	// Not set if CL is .purgingCL is non-nil since CL is already being purged.
   132  	purgeReasons []*prjpb.PurgeReason
   133  	// cqReady is true if it can be used in creation of new CQ-Vote Runs.
   134  	//
   135  	// If true, purgeReason must be nil, and deps must be OK though they may contain
   136  	// not-yet-loaded deps.
   137  	cqReady bool
   138  
   139  	// nprReady is true if it can be used in the creation of a new patchset
   140  	// run.
   141  	nprReady bool
   142  }
   143  
   144  func isCQVotePurging(purgingCL *prjpb.PurgingCL) bool {
   145  	return purgingCL.GetTriggers().GetCqVoteTrigger() != nil || purgingCL.GetAllActiveTriggers()
   146  }
   147  
   148  func isNPRVotePurging(purgingCL *prjpb.PurgingCL) bool {
   149  	return purgingCL.GetTriggers().GetNewPatchsetRunTrigger() != nil || purgingCL.GetAllActiveTriggers()
   150  }
   151  
   152  func (info *clInfo) prunCountByType(c *prjpb.Component) (int, int) {
   153  	var nCQVoteRuns, nNewPatchsetRuns int
   154  	for _, i := range info.runIndexes {
   155  		switch mode := run.Mode(c.Pruns[i].GetMode()); mode {
   156  		case run.NewPatchsetRun:
   157  			nNewPatchsetRuns++
   158  		default:
   159  			nCQVoteRuns++
   160  		}
   161  	}
   162  	return nCQVoteRuns, nNewPatchsetRuns
   163  }
   164  
   165  // triage sets the triagedCL part of clInfo.
   166  //
   167  // Expects non-triagedCL part of clInfo to be already set.
   168  // panics iff component is not in a valid state.
   169  func (info *clInfo) triage(ctx context.Context, c *prjpb.Component, pm pmState) {
   170  	nCQVoteRuns, nNewPatchsetRuns := info.prunCountByType(c)
   171  	var triageCQTrigger, triageNPRTrigger bool
   172  	switch {
   173  	case nCQVoteRuns > 0:
   174  		// Once CV supports API-based triggering, a CL may be both in purged
   175  		// state and have an incomplete Run for the same type of trigger at the
   176  		// same time. The presence in a Run is more important, so treat it as
   177  		// such.
   178  		info.triageInCQVoteRun(ctx, pm)
   179  	case isCQVotePurging(info.purgingCL):
   180  		info.triageInCQVotePurge(ctx, pm)
   181  	case info.pcl.GetTriggers().GetCqVoteTrigger() != nil:
   182  		triageCQTrigger = true
   183  	}
   184  
   185  	switch {
   186  	case nNewPatchsetRuns > 0:
   187  		info.triageInNewPatchsetRun(pm)
   188  	case isNPRVotePurging(info.purgingCL):
   189  		info.triageInNewPatchsetPurge(pm)
   190  	case info.pcl.GetTriggers().GetNewPatchsetRunTrigger() != nil:
   191  		triageNPRTrigger = true
   192  	}
   193  	info.triageNewTriggers(ctx, pm, triageCQTrigger, triageNPRTrigger)
   194  }
   195  
   196  func (info *clInfo) triageInCQVoteRun(ctx context.Context, pm pmState) {
   197  	if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.GetTriggers().GetCqVoteTrigger() != nil {
   198  		pcl := info.pcl
   199  		if len(pcl.GetConfigGroupIndexes()) != 1 {
   200  			// This is expected if project config has changed, but Run's reaction to it
   201  			// via OnRunFinished event hasn't yet reached PM.
   202  			return
   203  		}
   204  		cgIndex := pcl.GetConfigGroupIndexes()[0]
   205  		info.deps = triageDeps(ctx, pcl, cgIndex, pm)
   206  		// A purging or triggering CL must not be "ready" to a new cq run.
   207  		if info.deps.OK() && !isCQVotePurging(info.purgingCL) && len(info.deps.needToTrigger) == 0 {
   208  			info.cqReady = true
   209  		}
   210  	}
   211  }
   212  
   213  func (info *clInfo) triageInNewPatchsetRun(pm pmState) {
   214  	if len(info.pcl.GetConfigGroupIndexes()) != 1 {
   215  		// This is expected if project config has changes, but Run's reation to
   216  		// it via OnRunFinished event has not yet reached PM.
   217  		return
   218  	}
   219  	if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.GetTriggers().GetNewPatchsetRunTrigger() != nil &&
   220  		!isNPRVotePurging(info.purgingCL) {
   221  		info.nprReady = true
   222  	}
   223  }
   224  
   225  func (info *clInfo) pclStatusReadyForTriage() bool {
   226  	switch s := info.pcl.GetStatus(); s {
   227  	case prjpb.PCL_DELETED, prjpb.PCL_UNWATCHED, prjpb.PCL_UNKNOWN:
   228  		return false
   229  	case prjpb.PCL_OK:
   230  		return true
   231  	default:
   232  		panic(fmt.Errorf("PCL has unrecognized status %s", s))
   233  	}
   234  }
   235  
   236  func (info *clInfo) triageInCQVotePurge(ctx context.Context, pm pmState) {
   237  	// The PM hasn't noticed yet the completion of the async purge.
   238  	// The result of purging is modified CL, which may be observed by PM earlier
   239  	// than completion of purge.
   240  	//
   241  	// Thus, consider these CLs in potential Run Creation, but don't mark them
   242  	// ready in order to avoid creating new Runs.
   243  	if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.Triggers.GetCqVoteTrigger() != nil {
   244  		cgIndexes := info.pcl.GetConfigGroupIndexes()
   245  		switch len(cgIndexes) {
   246  		case 0:
   247  			panic(fmt.Errorf("PCL %d without ConfigGroup index not possible for CL not referenced by any Runs (partitioning bug?)", info.pcl.GetClid()))
   248  		case 1:
   249  			info.deps = triageDeps(ctx, info.pcl, cgIndexes[0], pm)
   250  			// info.deps.OK() may be true, for example if user has already corrected the
   251  			// mistake that previously resulted in purging op. However, don't mark CL
   252  			// ready until purging op completes or expires.
   253  		}
   254  	}
   255  }
   256  
   257  func (info *clInfo) triageInNewPatchsetPurge(pm pmState) {
   258  	// The PM hasn't noticed yet the completion of the async purge.
   259  	// The result of purging is modified CL, which may be observed by PM earlier
   260  	// than completion of purge.
   261  	//
   262  	// Thus, consider these CLs in potential Run Creation, but don't mark them
   263  	// ready in order to avoid creating new Runs.
   264  	if !info.pcl.GetSubmitted() && info.pclStatusReadyForTriage() && info.pcl.Triggers.GetNewPatchsetRunTrigger() != nil {
   265  		if len(info.pcl.GetConfigGroupIndexes()) == 0 {
   266  			panic(fmt.Errorf("PCL %d without ConfigGroup index not possible for CL not referenced by any Runs (partitioning bug?)", info.pcl.GetClid()))
   267  		}
   268  	}
   269  }
   270  
   271  func (info *clInfo) addPurgeReason(t *run.Trigger, clError *changelist.CLError) {
   272  	switch {
   273  	case t == nil:
   274  		info.purgeReasons = append(info.purgeReasons, &prjpb.PurgeReason{
   275  			ClError: clError,
   276  			ApplyTo: &prjpb.PurgeReason_AllActiveTriggers{
   277  				AllActiveTriggers: true,
   278  			},
   279  		})
   280  	case run.Mode(t.Mode) == run.NewPatchsetRun:
   281  		info.purgeReasons = append(info.purgeReasons, &prjpb.PurgeReason{
   282  			ClError: clError,
   283  			ApplyTo: &prjpb.PurgeReason_Triggers{
   284  				Triggers: &run.Triggers{
   285  					NewPatchsetRunTrigger: t,
   286  				},
   287  			},
   288  		})
   289  	default:
   290  		info.purgeReasons = append(info.purgeReasons, &prjpb.PurgeReason{
   291  			ClError: clError,
   292  			ApplyTo: &prjpb.PurgeReason_Triggers{
   293  				Triggers: &run.Triggers{
   294  					CqVoteTrigger: t,
   295  				},
   296  			},
   297  		})
   298  	}
   299  }
   300  
   301  func (info *clInfo) triageNewTriggers(ctx context.Context, pm pmState, triageCQTrigger, triageNPRTrigger bool) {
   302  	pcl := info.pcl
   303  	for _, r := range pcl.GetPurgeReasons() {
   304  		switch {
   305  		case r.GetAllActiveTriggers():
   306  			triageCQTrigger, triageNPRTrigger = false, false
   307  		case r.GetTriggers().GetNewPatchsetRunTrigger() != nil:
   308  			triageNPRTrigger = false
   309  		case r.GetTriggers().GetCqVoteTrigger() != nil:
   310  			triageCQTrigger = false
   311  		}
   312  	}
   313  	info.purgeReasons = append(info.purgeReasons, pcl.GetPurgeReasons()...)
   314  	if !triageCQTrigger && !triageNPRTrigger {
   315  		return
   316  	}
   317  	clid := pcl.GetClid()
   318  	assumption := "not possible for CL not referenced by any Runs (partitioning bug?)"
   319  	switch s := pcl.GetStatus(); s {
   320  	case prjpb.PCL_DELETED, prjpb.PCL_UNWATCHED, prjpb.PCL_UNKNOWN:
   321  		panic(fmt.Errorf("PCL %d status %s %s", clid, s, assumption))
   322  	case prjpb.PCL_OK:
   323  		// OK.
   324  	default:
   325  		panic(fmt.Errorf("PCL has unrecognized status %s", s))
   326  	}
   327  
   328  	if pcl.GetSubmitted() {
   329  		panic(fmt.Errorf("PCL %d submitted %s", clid, assumption))
   330  	}
   331  
   332  	cgIndexes := pcl.GetConfigGroupIndexes()
   333  	switch len(cgIndexes) {
   334  	case 0:
   335  		panic(fmt.Errorf("PCL %d without ConfigGroup index %s", clid, assumption))
   336  	case 1:
   337  		// if either trigger is being purged, do not mark it as ready.
   338  		if triageCQTrigger {
   339  			info.deps = triageDeps(ctx, pcl, cgIndexes[0], pm)
   340  			switch {
   341  			case !info.deps.OK():
   342  				info.addPurgeReason(info.pcl.Triggers.GetCqVoteTrigger(), info.deps.makePurgeReason())
   343  			case len(info.deps.needToTrigger) > 0:
   344  				// no cqReady if it has deps that need to be triggered.
   345  			default:
   346  				info.cqReady = true
   347  			}
   348  		}
   349  		if triageNPRTrigger {
   350  			info.nprReady = true
   351  		}
   352  	default:
   353  		cgNames := make([]string, len(cgIndexes))
   354  		for i, idx := range cgIndexes {
   355  			cgNames[i] = pm.ConfigGroup(idx).ID.Name()
   356  		}
   357  		var purgeTrigger *run.Trigger
   358  		switch {
   359  		case triageCQTrigger && triageNPRTrigger:
   360  			purgeTrigger = nil // purge whole CL
   361  		case triageCQTrigger:
   362  			purgeTrigger = pcl.GetTriggers().GetCqVoteTrigger()
   363  		case triageNPRTrigger:
   364  			purgeTrigger = pcl.GetTriggers().GetNewPatchsetRunTrigger()
   365  		}
   366  		info.addPurgeReason(purgeTrigger, &changelist.CLError{
   367  			Kind: &changelist.CLError_WatchedByManyConfigGroups_{
   368  				WatchedByManyConfigGroups: &changelist.CLError_WatchedByManyConfigGroups{
   369  					ConfigGroups: cgNames,
   370  				},
   371  			},
   372  		})
   373  	}
   374  }
   375  
   376  func (info *clInfo) hasIncompleteRun(m run.Mode) bool {
   377  	return info.runCountByMode[m] > 0
   378  }