go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/tryjob/execute/work.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package execute
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  
    22  	"google.golang.org/protobuf/types/known/timestamppb"
    23  
    24  	"go.chromium.org/luci/common/clock"
    25  	"go.chromium.org/luci/common/data/stringset"
    26  	"go.chromium.org/luci/gae/service/datastore"
    27  
    28  	"go.chromium.org/luci/cv/internal/common"
    29  	"go.chromium.org/luci/cv/internal/run"
    30  	"go.chromium.org/luci/cv/internal/tryjob"
    31  )
    32  
    33  // startTryjobs triggers Tryjobs for the given Definitions by either reusing
    34  // existing Tryjobs or launching new ones.
    35  func (e *Executor) startTryjobs(ctx context.Context, r *run.Run, definitions []*tryjob.Definition, executions []*tryjob.ExecutionState_Execution) ([]*tryjob.Tryjob, error) {
    36  	cls, err := run.LoadRunCLs(ctx, r.ID, r.CLs)
    37  	if err != nil {
    38  		return nil, err
    39  	}
    40  	w := &worker{
    41  		backend:          e.Backend,
    42  		rm:               e.RM,
    43  		run:              r,
    44  		cls:              cls,
    45  		knownTryjobIDs:   make(common.TryjobIDSet),
    46  		knownExternalIDs: make(stringset.Set),
    47  		reuseKey:         computeReuseKey(cls),
    48  		clPatchsets:      make(tryjob.CLPatchsets, len(cls)),
    49  	}
    50  	for _, execution := range executions {
    51  		for _, attempt := range execution.GetAttempts() {
    52  			if tjID := common.TryjobID(attempt.GetTryjobId()); tjID != 0 {
    53  				w.knownTryjobIDs.Add(tjID)
    54  			}
    55  			if eid := attempt.GetExternalId(); eid != "" {
    56  				w.knownExternalIDs.Add(eid)
    57  			}
    58  		}
    59  	}
    60  	for i, cl := range cls {
    61  		w.clPatchsets[i] = tryjob.MakeCLPatchset(cl.ID, cl.Detail.GetPatchset())
    62  	}
    63  	sort.Sort(w.clPatchsets)
    64  	w.findReuseFns = append(w.findReuseFns, w.findReuseInCV, w.findReuseInBackend)
    65  
    66  	ret, err := w.start(ctx, definitions)
    67  	for _, le := range w.logEntries {
    68  		e.log(le)
    69  	}
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  	return ret, nil
    74  }
    75  
    76  // worker implements the workflow to trigger Tryjobs for the given Definitions.
    77  //
    78  // It does this by searching for Tryjobs that can be reused first, and then
    79  // launching new Tryjobs if nothing can be reused.
    80  type worker struct {
    81  	run              *run.Run
    82  	cls              []*run.RunCL
    83  	knownTryjobIDs   common.TryjobIDSet
    84  	knownExternalIDs stringset.Set
    85  
    86  	reuseKey    string
    87  	clPatchsets tryjob.CLPatchsets
    88  	backend     TryjobBackend
    89  	rm          rm
    90  
    91  	findReuseFns []findReuseFn
    92  	logEntries   []*tryjob.ExecutionLogEntry
    93  }
    94  
    95  func (w *worker) makeBaseTryjob(ctx context.Context) *tryjob.Tryjob {
    96  	now := datastore.RoundTime(clock.Now(ctx).UTC())
    97  	return &tryjob.Tryjob{
    98  		EVersion:         1,
    99  		EntityCreateTime: now,
   100  		EntityUpdateTime: now,
   101  		ReuseKey:         w.reuseKey,
   102  		CLPatchsets:      w.clPatchsets,
   103  	}
   104  }
   105  
   106  // makePendingTryjob makes a pending Tryjob that is triggered by this Run.
   107  func (w *worker) makePendingTryjob(ctx context.Context, def *tryjob.Definition) *tryjob.Tryjob {
   108  	tj := w.makeBaseTryjob(ctx)
   109  	tj.Definition = def
   110  	tj.Status = tryjob.Status_PENDING
   111  	tj.LaunchedBy = w.run.ID
   112  	return tj
   113  }
   114  
   115  // start triggers Tryjobs for the given Definitions.
   116  //
   117  // First it searches for any Tryjobs that can be reused, then launches
   118  // new Tryjobs for Definitions where nothing can be reused.
   119  func (w *worker) start(ctx context.Context, definitions []*tryjob.Definition) ([]*tryjob.Tryjob, error) {
   120  	reuse, err := w.findReuse(ctx, definitions)
   121  	if err != nil {
   122  		return nil, err
   123  	}
   124  	ret := make([]*tryjob.Tryjob, len(definitions))
   125  	tryjobsToLaunch := make([]*tryjob.Tryjob, 0, len(definitions))
   126  	reusedTryjobsCount := 0
   127  	for i, def := range definitions {
   128  		switch reuseTryjob, hasReuse := reuse[def]; {
   129  		case !hasReuse:
   130  			tryjobsToLaunch = append(tryjobsToLaunch, w.makePendingTryjob(ctx, def))
   131  		case reuseTryjob.LaunchedBy == w.run.ID && reuseTryjob.Status == tryjob.Status_PENDING:
   132  			// This typically happens when a previous task created the Tryjob entity
   133  			// but failed to launch the Tryjob at the backend. Such Tryjob entity will
   134  			// be surfaced again when searching for reusable Tryjob within CV.
   135  			// Therefore, try to launch the Tryjob again.
   136  			tryjobsToLaunch = append(tryjobsToLaunch, reuseTryjob)
   137  		default:
   138  			ret[i] = reuseTryjob
   139  			reusedTryjobsCount += 1
   140  		}
   141  	}
   142  
   143  	if len(tryjobsToLaunch) > 0 {
   144  		// Save the newly created Tryjobs and ensure Tryjob IDs are populated.
   145  		var newlyCreatedTryjobs []*tryjob.Tryjob
   146  		for _, tj := range tryjobsToLaunch {
   147  			if tj.ID == 0 {
   148  				newlyCreatedTryjobs = append(newlyCreatedTryjobs, tj)
   149  			}
   150  		}
   151  		if len(newlyCreatedTryjobs) > 0 {
   152  			if err := datastore.Put(ctx, newlyCreatedTryjobs); err != nil {
   153  				return nil, err
   154  			}
   155  		}
   156  		tryjobsToLaunch, err = w.launchTryjobs(ctx, tryjobsToLaunch)
   157  		if err != nil {
   158  			return nil, err
   159  		}
   160  		// Copy the launched Tryjobs to the returned Tryjobs at the
   161  		// corresponding location.
   162  		if reusedTryjobsCount+len(tryjobsToLaunch) != len(definitions) {
   163  			panic(fmt.Errorf("impossible; requested %d Tryjob Definition, reused %d Tryjobs but launched %d new Tryjobs",
   164  				len(definitions), reusedTryjobsCount, len(tryjobsToLaunch)))
   165  		}
   166  		idx := 0
   167  		for i, tj := range ret {
   168  			if tj == nil {
   169  				ret[i] = tryjobsToLaunch[idx]
   170  				idx += 1
   171  			}
   172  		}
   173  	}
   174  
   175  	return ret, nil
   176  }
   177  
   178  type findReuseFn func(context.Context, []*tryjob.Definition) (map[*tryjob.Definition]*tryjob.Tryjob, error)
   179  
   180  // findReuse finds Tryjobs that shall be reused.
   181  func (w *worker) findReuse(ctx context.Context, definitions []*tryjob.Definition) (map[*tryjob.Definition]*tryjob.Tryjob, error) {
   182  	if len(w.findReuseFns) == 0 {
   183  		return nil, nil
   184  	}
   185  	ret := make(map[*tryjob.Definition]*tryjob.Tryjob, len(definitions))
   186  	remainingDefinitions := make([]*tryjob.Definition, 0, len(definitions))
   187  	// Start with Tryjobs' Definitions that enable reuse.
   188  	for _, def := range definitions {
   189  		if !def.GetDisableReuse() {
   190  			remainingDefinitions = append(remainingDefinitions, def)
   191  		}
   192  	}
   193  
   194  	for _, fn := range w.findReuseFns {
   195  		reuse, err := fn(ctx, remainingDefinitions)
   196  		if err != nil {
   197  			return nil, err
   198  		}
   199  		for def, tj := range reuse {
   200  			ret[def] = tj
   201  		}
   202  		// Reuse the `remainingDefinitions` slice and filter out the
   203  		// Definitions that have found reuse Tryjobs.
   204  		tmp := remainingDefinitions[:0]
   205  		for _, def := range remainingDefinitions {
   206  			if _, ok := reuse[def]; !ok {
   207  				tmp = append(tmp, def)
   208  			}
   209  		}
   210  		remainingDefinitions = tmp
   211  		if len(remainingDefinitions) == 0 {
   212  			break
   213  		}
   214  	}
   215  
   216  	if len(ret) > 0 {
   217  		reusedTryjobLogs := make([]*tryjob.ExecutionLogEntry_TryjobSnapshot, 0, len(ret))
   218  		for def, tj := range ret {
   219  			reusedTryjobLogs = append(reusedTryjobLogs, makeLogTryjobSnapshot(def, tj, true))
   220  		}
   221  		w.logEntries = append(w.logEntries, &tryjob.ExecutionLogEntry{
   222  			Time: timestamppb.New(clock.Now(ctx).UTC()),
   223  			Kind: &tryjob.ExecutionLogEntry_TryjobsReused_{
   224  				TryjobsReused: &tryjob.ExecutionLogEntry_TryjobsReused{
   225  					Tryjobs: reusedTryjobLogs,
   226  				},
   227  			},
   228  		})
   229  	}
   230  	return ret, nil
   231  }