go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/tryjob/execution.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tryjob
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"time"
    21  
    22  	"go.chromium.org/luci/common/clock"
    23  	"go.chromium.org/luci/common/errors"
    24  	"go.chromium.org/luci/common/retry/transient"
    25  	"go.chromium.org/luci/gae/service/datastore"
    26  
    27  	"go.chromium.org/luci/cv/internal/common"
    28  )
    29  
    30  // TryjobExecutionLogKind is the kind name of executionLog entity.
    31  const TryjobExecutionLogKind = "TryjobExecutionLog"
    32  
    33  // executionState is a Datastore model which stores an `ExecutionState`
    34  // along with the associated metadata.
    35  type executionState struct {
    36  	_kind string         `gae:"$kind,TryjobExecutionState"`
    37  	_id   int64          `gae:"$id,1"`
    38  	Run   *datastore.Key `gae:"$parent"`
    39  	// EVersion is the version of this state. Start with 1.
    40  	EVersion int64 `gae:",noindex"`
    41  	// UpdateTime is exact time of when the state was last updated.
    42  	UpdateTime time.Time `gae:",noindex"`
    43  	State      *ExecutionState
    44  }
    45  
    46  // executionLog is an immutable record for changes to Tryjob execution state.
    47  type executionLog struct {
    48  	_kind string `gae:"$kind,TryjobExecutionLog"`
    49  
    50  	// ID is the value executionState.EVersion which was saved
    51  	// transactionally with the creation of the ExecutionLog entity.
    52  	//
    53  	// Thus, ordering by ID (default Datastore ordering) will automatically
    54  	// provide semantically chronological order.
    55  	ID  int64          `gae:"$id"`
    56  	Run *datastore.Key `gae:"$parent"`
    57  	// Entries record what happened to the Run.
    58  	//
    59  	// Ordered from oldest to newest.
    60  	Entries *ExecutionLogEntries
    61  }
    62  
    63  // LoadExecutionState loads the ExecutionState of the given Run.
    64  //
    65  // Returns nil state and 0 version if execution state never exists for this
    66  // Run before.
    67  func LoadExecutionState(ctx context.Context, rid common.RunID) (state *ExecutionState, version int64, err error) {
    68  	es := &executionState{Run: datastore.MakeKey(ctx, common.RunKind, string(rid))}
    69  	switch err := datastore.Get(ctx, es); {
    70  	case err == datastore.ErrNoSuchEntity:
    71  		return nil, 0, nil
    72  	case err != nil:
    73  		return nil, 0, errors.Annotate(err, "failed to load tryjob execution state of run %q", rid).Tag(transient.Tag).Err()
    74  	default:
    75  		return es.State, es.EVersion, nil
    76  	}
    77  }
    78  
    79  // SaveExecutionState saves new ExecutionState and logs for the run.
    80  //
    81  // Fails if the current version of the state is different from the provided
    82  // `expectedVersion`. This typically means the state has changed since the
    83  // last `LoadState`. `expectedVersion==0` means the state never exists for
    84  // this Run before.
    85  //
    86  // Must be called in a transaction.
    87  func SaveExecutionState(ctx context.Context, rid common.RunID, state *ExecutionState, expectedVersion int64, logEntries []*ExecutionLogEntry) error {
    88  	if datastore.CurrentTransaction(ctx) == nil {
    89  		panic(fmt.Errorf("must be called in a transaction"))
    90  	}
    91  	switch _, latestStateVer, err := LoadExecutionState(ctx, rid); {
    92  	case err != nil:
    93  		return err
    94  	case latestStateVer != expectedVersion:
    95  		return errors.Reason("execution state has changed. before: %d, current: %d", expectedVersion, latestStateVer).Tag(transient.Tag).Err()
    96  	default:
    97  		runKey := datastore.MakeKey(ctx, common.RunKind, string(rid))
    98  		newState := &executionState{
    99  			Run:        runKey,
   100  			EVersion:   latestStateVer + 1,
   101  			UpdateTime: clock.Now(ctx).UTC(),
   102  			State:      state,
   103  		}
   104  
   105  		if len(logEntries) == 0 {
   106  			if err := datastore.Put(ctx, newState); err != nil {
   107  				return errors.Annotate(err, "failed to save execution state").Tag(transient.Tag).Err()
   108  			}
   109  		} else {
   110  			el := &executionLog{
   111  				ID:  newState.EVersion,
   112  				Run: runKey,
   113  				Entries: &ExecutionLogEntries{
   114  					Entries: logEntries,
   115  				},
   116  			}
   117  			if err := datastore.Put(ctx, newState, el); err != nil {
   118  				return errors.Annotate(err, "failed to save execution state and log").Tag(transient.Tag).Err()
   119  			}
   120  		}
   121  		return nil
   122  	}
   123  }
   124  
   125  // LoadExecutionLogs loads all the Tryjob execution log entries for a given Run.
   126  //
   127  // Ordered from logically oldest to newest as it assumes logs associated with
   128  // a smaller ExecutionState EVersion should happen earlier than the logs
   129  // associated with a larger ExecutionState EVersion.
   130  func LoadExecutionLogs(ctx context.Context, runID common.RunID) ([]*ExecutionLogEntry, error) {
   131  	var keys []*datastore.Key
   132  	runKey := datastore.MakeKey(ctx, common.RunKind, string(runID))
   133  	// Getting the key first as getting the execution log entity directly is
   134  	// more likely a cache hit.
   135  	q := datastore.NewQuery("TryjobExecutionLog").KeysOnly(true).Ancestor(runKey)
   136  	if err := datastore.GetAll(ctx, q, &keys); err != nil {
   137  		return nil, errors.Annotate(err, "failed to fetch keys of TryjobExecutionLog entities").Tag(transient.Tag).Err()
   138  	}
   139  	if len(keys) == 0 {
   140  		return nil, nil
   141  	}
   142  	entities := make([]*executionLog, len(keys))
   143  	for i, key := range keys {
   144  		entities[i] = &executionLog{
   145  			ID:  key.IntID(),
   146  			Run: runKey,
   147  		}
   148  	}
   149  	if err := datastore.Get(ctx, entities); err != nil {
   150  		// It's possible to get EntityNotExists, but it may only happen if data
   151  		// retention enforcement is deleting old entities at the same time.
   152  		// Thus, treat all errors as transient.
   153  		return nil, errors.Annotate(common.MostSevereError(err), "failed to fetch TryjobExecutionLog entities").Tag(transient.Tag).Err()
   154  	}
   155  
   156  	// Each TryjobExecutionLog entity contains at least 1 LogEntry.
   157  	ret := make([]*ExecutionLogEntry, 0, len(entities))
   158  	for _, e := range entities {
   159  		ret = append(ret, e.Entries.GetEntries()...)
   160  	}
   161  	return ret, nil
   162  }