go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/run/impl/handler/poke.go (about) 1 // Copyright 2021 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package handler 16 17 import ( 18 "context" 19 "fmt" 20 "time" 21 22 "go.chromium.org/luci/gae/service/datastore" 23 24 "go.chromium.org/luci/common/clock" 25 "go.chromium.org/luci/common/errors" 26 "go.chromium.org/luci/common/logging" 27 "go.chromium.org/luci/common/sync/parallel" 28 29 "go.chromium.org/luci/cv/internal/changelist" 30 "go.chromium.org/luci/cv/internal/common" 31 "go.chromium.org/luci/cv/internal/configs/prjcfg" 32 "go.chromium.org/luci/cv/internal/run" 33 "go.chromium.org/luci/cv/internal/run/impl/state" 34 "go.chromium.org/luci/cv/internal/tryjob" 35 ) 36 37 const ( 38 treeCheckInterval = time.Minute 39 clRefreshInterval = 10 * time.Minute 40 tryjobRefreshInterval = 150 * time.Second 41 treeStatusFailureTimeLimit = 10 * time.Minute 42 ) 43 44 // Poke implements Handler interface. 45 func (impl *Impl) Poke(ctx context.Context, rs *state.RunState) (*Result, error) { 46 if !run.IsEnded(rs.Status) && !rs.StartTime.IsZero() && clock.Since(ctx, rs.CreateTime) > 2*24*time.Hour { 47 logging.Warningf(ctx, "FIXME - crbug/40946713: run has been active for 2 days. This is likely caused by the referenced bug.") 48 } 49 if !run.IsEnded(rs.Status) && clock.Since(ctx, rs.CreateTime) > common.MaxRunTotalDuration { 50 return impl.Cancel(ctx, rs, []string{ 51 fmt.Sprintf("max run duration of %s has reached", common.MaxRunTotalDuration), 52 }) 53 } 54 55 rs = rs.ShallowCopy() 56 if shouldCheckTree(ctx, rs.Status, rs.Submission) { 57 rs.CloneSubmission() 58 switch open, err := rs.CheckTree(ctx, impl.TreeClient); { 59 case err != nil && clock.Since(ctx, rs.Submission.TreeErrorSince.AsTime()) > treeStatusFailureTimeLimit: 60 // The tree has been erroring for too long. Reset the triggers and 61 // fail the run. 62 cg, err := prjcfg.GetConfigGroup(ctx, rs.ID.LUCIProject(), rs.ConfigGroupID) 63 if err != nil { 64 return nil, err 65 } 66 rims := make(map[common.CLID]reviewInputMeta, len(rs.CLs)) 67 whoms := rs.Mode.GerritNotifyTargets() 68 meta := reviewInputMeta{ 69 notify: whoms, 70 // Add the same set of group/people to the attention set. 71 addToAttention: whoms, 72 reason: submissionFailureAttentionReason, 73 message: fmt.Sprintf(persistentTreeStatusAppFailureTemplate, cg.Content.GetVerifiers().GetTreeStatus().GetUrl()), 74 } 75 for _, cl := range rs.CLs { 76 if !rs.HasRootCL() || rs.RootCL == cl { 77 rims[cl] = meta 78 } 79 } 80 scheduleTriggersReset(ctx, rs, rims, run.Status_FAILED) 81 return &Result{ 82 State: rs, 83 }, nil 84 case err != nil: 85 logging.Warningf(ctx, "tree status check failed with error: %s", err) 86 fallthrough 87 case !open: 88 if err := impl.RM.PokeAfter(ctx, rs.ID, treeCheckInterval); err != nil { 89 return nil, err 90 } 91 default: 92 return impl.OnReadyForSubmission(ctx, rs) 93 } 94 } 95 96 // If it's scheduled to be cancelled, skip the refresh. 97 // The long op might have been expired, but it should be removed at the end 98 // of this call first, and then the next Poke() will run this check again. 99 if !isCurrentlyResettingTriggers(rs) && shouldRefreshCLs(ctx, rs) { 100 cg, runCLs, cls, err := loadCLsAndConfig(ctx, rs, rs.CLs) 101 if err != nil { 102 return nil, err 103 } 104 switch ok, err := checkRunCreate(ctx, rs, cg, runCLs, cls); { 105 case err != nil: 106 return nil, err 107 case ok: 108 if err := impl.CLUpdater.ScheduleBatch( 109 ctx, rs.ID.LUCIProject(), cls, 110 changelist.UpdateCLTask_RUN_POKE); err != nil { 111 return nil, err 112 } 113 rs.LatestCLsRefresh = datastore.RoundTime(clock.Now(ctx).UTC()) 114 } 115 } 116 117 if shouldRefreshTryjobs(ctx, rs) { 118 executions := rs.Tryjobs.GetState().GetExecutions() 119 errs := errors.NewLazyMultiError(len(executions)) 120 poolErr := parallel.WorkPool(min(8, len(executions)), func(workCh chan<- func() error) { 121 for i, execution := range executions { 122 // Only care about the latest attempt with the assumption that all 123 // earlier attempt should have been ended already. 124 switch latestAttempt := tryjob.LatestAttempt(execution); { 125 case latestAttempt == nil: 126 case latestAttempt.GetExternalId() == "": 127 case latestAttempt.GetStatus() == tryjob.Status_TRIGGERED: 128 // Only update Tryjob if it has been triggered at the Tryjob backend. 129 i := i 130 workCh <- func() error { 131 errs.Assign(i, impl.TN.ScheduleUpdate(ctx, 132 common.TryjobID(latestAttempt.GetTryjobId()), 133 tryjob.ExternalID(latestAttempt.GetExternalId()))) 134 return nil 135 } 136 } 137 138 } 139 }) 140 switch { 141 case poolErr != nil: 142 panic(poolErr) 143 case errs.Get() != nil: 144 return nil, common.MostSevereError(errs.Get()) 145 default: 146 rs.LatestTryjobsRefresh = datastore.RoundTime(clock.Now(ctx).UTC()) 147 } 148 } 149 150 return impl.processExpiredLongOps(ctx, rs) 151 } 152 153 func shouldCheckTree(ctx context.Context, st run.Status, sub *run.Submission) bool { 154 switch { 155 case st != run.Status_WAITING_FOR_SUBMISSION: 156 case sub.GetLastTreeCheckTime() == nil: 157 return true 158 case !sub.GetTreeOpen(): 159 return clock.Now(ctx).Sub(sub.GetLastTreeCheckTime().AsTime()) >= treeCheckInterval 160 } 161 return false 162 } 163 164 func shouldRefreshCLs(ctx context.Context, rs *state.RunState) bool { 165 return shouldRefresh(ctx, rs, rs.LatestCLsRefresh, clRefreshInterval) 166 } 167 168 func shouldRefreshTryjobs(ctx context.Context, rs *state.RunState) bool { 169 return shouldRefresh(ctx, rs, rs.LatestTryjobsRefresh, tryjobRefreshInterval) 170 } 171 172 func shouldRefresh(ctx context.Context, rs *state.RunState, last time.Time, interval time.Duration) bool { 173 switch { 174 case run.IsEnded(rs.Status): 175 return false 176 case last.IsZero(): 177 last = rs.CreateTime 178 fallthrough 179 default: 180 return clock.Since(ctx, last) > interval 181 } 182 }