go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/tryjob/execute/work.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package execute 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 22 "google.golang.org/protobuf/types/known/timestamppb" 23 24 "go.chromium.org/luci/common/clock" 25 "go.chromium.org/luci/common/data/stringset" 26 "go.chromium.org/luci/gae/service/datastore" 27 28 "go.chromium.org/luci/cv/internal/common" 29 "go.chromium.org/luci/cv/internal/run" 30 "go.chromium.org/luci/cv/internal/tryjob" 31 ) 32 33 // startTryjobs triggers Tryjobs for the given Definitions by either reusing 34 // existing Tryjobs or launching new ones. 35 func (e *Executor) startTryjobs(ctx context.Context, r *run.Run, definitions []*tryjob.Definition, executions []*tryjob.ExecutionState_Execution) ([]*tryjob.Tryjob, error) { 36 cls, err := run.LoadRunCLs(ctx, r.ID, r.CLs) 37 if err != nil { 38 return nil, err 39 } 40 w := &worker{ 41 backend: e.Backend, 42 rm: e.RM, 43 run: r, 44 cls: cls, 45 knownTryjobIDs: make(common.TryjobIDSet), 46 knownExternalIDs: make(stringset.Set), 47 reuseKey: computeReuseKey(cls), 48 clPatchsets: make(tryjob.CLPatchsets, len(cls)), 49 } 50 for _, execution := range executions { 51 for _, attempt := range execution.GetAttempts() { 52 if tjID := common.TryjobID(attempt.GetTryjobId()); tjID != 0 { 53 w.knownTryjobIDs.Add(tjID) 54 } 55 if eid := attempt.GetExternalId(); eid != "" { 56 w.knownExternalIDs.Add(eid) 57 } 58 } 59 } 60 for i, cl := range cls { 61 w.clPatchsets[i] = tryjob.MakeCLPatchset(cl.ID, cl.Detail.GetPatchset()) 62 } 63 sort.Sort(w.clPatchsets) 64 w.findReuseFns = append(w.findReuseFns, w.findReuseInCV, w.findReuseInBackend) 65 66 ret, err := w.start(ctx, definitions) 67 for _, le := range w.logEntries { 68 e.log(le) 69 } 70 if err != nil { 71 return nil, err 72 } 73 return ret, nil 74 } 75 76 // worker implements the workflow to trigger Tryjobs for the given Definitions. 77 // 78 // It does this by searching for Tryjobs that can be reused first, and then 79 // launching new Tryjobs if nothing can be reused. 80 type worker struct { 81 run *run.Run 82 cls []*run.RunCL 83 knownTryjobIDs common.TryjobIDSet 84 knownExternalIDs stringset.Set 85 86 reuseKey string 87 clPatchsets tryjob.CLPatchsets 88 backend TryjobBackend 89 rm rm 90 91 findReuseFns []findReuseFn 92 logEntries []*tryjob.ExecutionLogEntry 93 } 94 95 func (w *worker) makeBaseTryjob(ctx context.Context) *tryjob.Tryjob { 96 now := datastore.RoundTime(clock.Now(ctx).UTC()) 97 return &tryjob.Tryjob{ 98 EVersion: 1, 99 EntityCreateTime: now, 100 EntityUpdateTime: now, 101 ReuseKey: w.reuseKey, 102 CLPatchsets: w.clPatchsets, 103 } 104 } 105 106 // makePendingTryjob makes a pending Tryjob that is triggered by this Run. 107 func (w *worker) makePendingTryjob(ctx context.Context, def *tryjob.Definition) *tryjob.Tryjob { 108 tj := w.makeBaseTryjob(ctx) 109 tj.Definition = def 110 tj.Status = tryjob.Status_PENDING 111 tj.LaunchedBy = w.run.ID 112 return tj 113 } 114 115 // start triggers Tryjobs for the given Definitions. 116 // 117 // First it searches for any Tryjobs that can be reused, then launches 118 // new Tryjobs for Definitions where nothing can be reused. 119 func (w *worker) start(ctx context.Context, definitions []*tryjob.Definition) ([]*tryjob.Tryjob, error) { 120 reuse, err := w.findReuse(ctx, definitions) 121 if err != nil { 122 return nil, err 123 } 124 ret := make([]*tryjob.Tryjob, len(definitions)) 125 tryjobsToLaunch := make([]*tryjob.Tryjob, 0, len(definitions)) 126 reusedTryjobsCount := 0 127 for i, def := range definitions { 128 switch reuseTryjob, hasReuse := reuse[def]; { 129 case !hasReuse: 130 tryjobsToLaunch = append(tryjobsToLaunch, w.makePendingTryjob(ctx, def)) 131 case reuseTryjob.LaunchedBy == w.run.ID && reuseTryjob.Status == tryjob.Status_PENDING: 132 // This typically happens when a previous task created the Tryjob entity 133 // but failed to launch the Tryjob at the backend. Such Tryjob entity will 134 // be surfaced again when searching for reusable Tryjob within CV. 135 // Therefore, try to launch the Tryjob again. 136 tryjobsToLaunch = append(tryjobsToLaunch, reuseTryjob) 137 default: 138 ret[i] = reuseTryjob 139 reusedTryjobsCount += 1 140 } 141 } 142 143 if len(tryjobsToLaunch) > 0 { 144 // Save the newly created Tryjobs and ensure Tryjob IDs are populated. 145 var newlyCreatedTryjobs []*tryjob.Tryjob 146 for _, tj := range tryjobsToLaunch { 147 if tj.ID == 0 { 148 newlyCreatedTryjobs = append(newlyCreatedTryjobs, tj) 149 } 150 } 151 if len(newlyCreatedTryjobs) > 0 { 152 if err := datastore.Put(ctx, newlyCreatedTryjobs); err != nil { 153 return nil, err 154 } 155 } 156 tryjobsToLaunch, err = w.launchTryjobs(ctx, tryjobsToLaunch) 157 if err != nil { 158 return nil, err 159 } 160 // Copy the launched Tryjobs to the returned Tryjobs at the 161 // corresponding location. 162 if reusedTryjobsCount+len(tryjobsToLaunch) != len(definitions) { 163 panic(fmt.Errorf("impossible; requested %d Tryjob Definition, reused %d Tryjobs but launched %d new Tryjobs", 164 len(definitions), reusedTryjobsCount, len(tryjobsToLaunch))) 165 } 166 idx := 0 167 for i, tj := range ret { 168 if tj == nil { 169 ret[i] = tryjobsToLaunch[idx] 170 idx += 1 171 } 172 } 173 } 174 175 return ret, nil 176 } 177 178 type findReuseFn func(context.Context, []*tryjob.Definition) (map[*tryjob.Definition]*tryjob.Tryjob, error) 179 180 // findReuse finds Tryjobs that shall be reused. 181 func (w *worker) findReuse(ctx context.Context, definitions []*tryjob.Definition) (map[*tryjob.Definition]*tryjob.Tryjob, error) { 182 if len(w.findReuseFns) == 0 { 183 return nil, nil 184 } 185 ret := make(map[*tryjob.Definition]*tryjob.Tryjob, len(definitions)) 186 remainingDefinitions := make([]*tryjob.Definition, 0, len(definitions)) 187 // Start with Tryjobs' Definitions that enable reuse. 188 for _, def := range definitions { 189 if !def.GetDisableReuse() { 190 remainingDefinitions = append(remainingDefinitions, def) 191 } 192 } 193 194 for _, fn := range w.findReuseFns { 195 reuse, err := fn(ctx, remainingDefinitions) 196 if err != nil { 197 return nil, err 198 } 199 for def, tj := range reuse { 200 ret[def] = tj 201 } 202 // Reuse the `remainingDefinitions` slice and filter out the 203 // Definitions that have found reuse Tryjobs. 204 tmp := remainingDefinitions[:0] 205 for _, def := range remainingDefinitions { 206 if _, ok := reuse[def]; !ok { 207 tmp = append(tmp, def) 208 } 209 } 210 remainingDefinitions = tmp 211 if len(remainingDefinitions) == 0 { 212 break 213 } 214 } 215 216 if len(ret) > 0 { 217 reusedTryjobLogs := make([]*tryjob.ExecutionLogEntry_TryjobSnapshot, 0, len(ret)) 218 for def, tj := range ret { 219 reusedTryjobLogs = append(reusedTryjobLogs, makeLogTryjobSnapshot(def, tj, true)) 220 } 221 w.logEntries = append(w.logEntries, &tryjob.ExecutionLogEntry{ 222 Time: timestamppb.New(clock.Now(ctx).UTC()), 223 Kind: &tryjob.ExecutionLogEntry_TryjobsReused_{ 224 TryjobsReused: &tryjob.ExecutionLogEntry_TryjobsReused{ 225 Tryjobs: reusedTryjobLogs, 226 }, 227 }, 228 }) 229 } 230 return ret, nil 231 }