go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/deploy/service/model/op_begin.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  	"time"
    22  
    23  	statuspb "google.golang.org/genproto/googleapis/rpc/status"
    24  	"google.golang.org/grpc/codes"
    25  	"google.golang.org/grpc/status"
    26  	"google.golang.org/protobuf/types/known/timestamppb"
    27  
    28  	"go.chromium.org/luci/common/clock"
    29  	"go.chromium.org/luci/gae/service/datastore"
    30  
    31  	"go.chromium.org/luci/deploy/api/modelpb"
    32  	"go.chromium.org/luci/deploy/api/rpcpb"
    33  )
    34  
    35  // defaultActuationTimeout is the default value for `actuation_timeout` in
    36  // DeploymentConfig.
    37  const defaultActuationTimeout = 20 * time.Minute
    38  
    39  // ActuationBeginOp collects changes to transactionally apply to the datastore
    40  // to begin a new actuation.
    41  type ActuationBeginOp struct {
    42  	actuation *modelpb.Actuation
    43  	decisions map[string]*modelpb.ActuationDecision
    44  	assets    map[string]*Asset
    45  	history   *historyRecorder
    46  	now       time.Time
    47  
    48  	actuating bool     // true if have at least one ACTUATE_* decision
    49  	errors    []string // accumulated errors for SKIP_BROKEN decisions
    50  }
    51  
    52  // NewActuationBeginOp starts a datastore operation to create an actuation.
    53  //
    54  // Takes ownership of `actuation` mutating it.
    55  func NewActuationBeginOp(ctx context.Context, assets []string, actuation *modelpb.Actuation) (*ActuationBeginOp, error) {
    56  	assetMap, err := fetchAssets(ctx, assets, false)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  	return &ActuationBeginOp{
    61  		actuation: actuation,
    62  		decisions: make(map[string]*modelpb.ActuationDecision, len(assetMap)),
    63  		assets:    assetMap,
    64  		history:   &historyRecorder{actuation: actuation},
    65  		now:       clock.Now(ctx),
    66  	}, nil
    67  }
    68  
    69  // MakeDecision decides what to do with an asset and records this decision.
    70  //
    71  // Must be called once for every asset passed to NewActuationBeginOp. Takes
    72  // ownership of `asset` mutating it. AssetToActuate fields must already be
    73  // validated at this point.
    74  func (op *ActuationBeginOp) MakeDecision(ctx context.Context, assetID string, asset *rpcpb.AssetToActuate) {
    75  	// TODO: Implement locks.
    76  	// TODO: Implement anti-stomp protection.
    77  	// TODO: Implement forced actuation.
    78  
    79  	var errors []string
    80  	var brokenStatus *statuspb.Status
    81  
    82  	// Fill in server-assigned fields and collect error statuses.
    83  	populateAssetState := func(what string, s *modelpb.AssetState) {
    84  		if s.Timestamp == nil {
    85  			s.Timestamp = timestamppb.New(op.now)
    86  		}
    87  		s.Deployment = op.actuation.Deployment
    88  		s.Actuator = op.actuation.Actuator
    89  
    90  		if s.Status.GetCode() != int32(codes.OK) {
    91  			errors = append(errors, fmt.Sprintf(
    92  				"asset %q: failed to collect %s: %s",
    93  				assetID, what, status.ErrorProto(s.Status)))
    94  			brokenStatus = s.Status // keep only the last, no big deal
    95  		}
    96  	}
    97  	populateAssetState("intended state", asset.IntendedState)
    98  	populateAssetState("reported state", asset.ReportedState)
    99  
   100  	// Update stored AssetState fields only if the new reported values are
   101  	// non-erroneous.
   102  	stored := op.assets[assetID].Asset
   103  	stored.Config = asset.Config
   104  	if asset.IntendedState.Status.GetCode() == int32(codes.OK) {
   105  		stored.IntendedState = asset.IntendedState
   106  	}
   107  	if asset.ReportedState.Status.GetCode() == int32(codes.OK) {
   108  		stored.ReportedState = asset.ReportedState
   109  	}
   110  
   111  	// Preserve for the AssetHistory.
   112  	lastAppliedState := stored.AppliedState
   113  
   114  	// Make the actual decision.
   115  	var decision modelpb.ActuationDecision_Decision
   116  	switch {
   117  	case !IsActuationEnabed(asset.Config, op.actuation.Deployment.GetConfig()):
   118  		decision = modelpb.ActuationDecision_SKIP_DISABLED
   119  	case len(errors) != 0:
   120  		op.errors = append(op.errors, errors...)
   121  		decision = modelpb.ActuationDecision_SKIP_BROKEN
   122  	case IsUpToDate(asset.IntendedState, asset.ReportedState, stored.AppliedState):
   123  		decision = modelpb.ActuationDecision_SKIP_UPTODATE
   124  		stored.AppliedState = stored.IntendedState
   125  	default:
   126  		op.actuating = true
   127  		decision = modelpb.ActuationDecision_ACTUATE_STALE
   128  	}
   129  
   130  	// Record the decision.
   131  	stored.LastDecision = &modelpb.ActuationDecision{
   132  		Decision: decision,
   133  		Status:   brokenStatus,
   134  	}
   135  	op.decisions[assetID] = stored.LastDecision
   136  
   137  	op.maybeUpdateHistoryAndNotify(&modelpb.AssetHistory{
   138  		AssetId:          assetID,
   139  		HistoryId:        0, // will be populated in maybeUpdateHistoryAndNotify
   140  		Decision:         stored.LastDecision,
   141  		Actuation:        op.actuation,
   142  		Config:           asset.Config,
   143  		IntendedState:    asset.IntendedState,
   144  		ReportedState:    asset.ReportedState,
   145  		LastAppliedState: lastAppliedState,
   146  	})
   147  }
   148  
   149  func (op *ActuationBeginOp) maybeUpdateHistoryAndNotify(entry *modelpb.AssetHistory) {
   150  	asset := op.assets[entry.AssetId]
   151  	entry.PriorConsecutiveFailures = asset.ConsecutiveFailures
   152  
   153  	// If had an open history entry, then the previous actuation (that was
   154  	// supposed to close it) probably crashed, i.e. it didn't call EndActuation.
   155  	// We should record this observation in the history log.
   156  	if asset.IsRecordingHistoryEntry() {
   157  		asset.ConsecutiveFailures += 1
   158  		asset.HistoryEntry.Actuation.State = modelpb.Actuation_EXPIRED
   159  		asset.HistoryEntry.Actuation.Finished = timestamppb.New(op.now)
   160  		asset.HistoryEntry.Actuation.Status = &statuspb.Status{
   161  			Code:    int32(codes.Unknown),
   162  			Message: "the actuation probably crashed: the asset was picked up by another actuation",
   163  		}
   164  		op.history.recordAndNotify(asset.finalizeHistoryEntry())
   165  	}
   166  
   167  	// Update the failure counter if the outcome is already known. For ACTUATE_*
   168  	// decisions it will be updated in EndActuation.
   169  	switch {
   170  	case entry.Decision.Decision == modelpb.ActuationDecision_SKIP_BROKEN:
   171  		asset.ConsecutiveFailures += 1
   172  	case !IsActuateDecision(entry.Decision.Decision):
   173  		// This is SKIP_UPTODATE, SKIP_DISABLED, SKIP_LOCKED.
   174  		asset.ConsecutiveFailures = 0
   175  	}
   176  
   177  	// Skip repeating uninteresting decisions e.g. a series of UPTODATE decisions.
   178  	// Otherwise the log would be full of them and it will be hard to find
   179  	// interesting ones.
   180  	if asset.HistoryEntry != nil && !shouldRecordHistory(entry, asset.HistoryEntry) {
   181  		return
   182  	}
   183  
   184  	// The new history entry is noteworthy and should be recorded.
   185  	entry.HistoryId = asset.LastHistoryID + 1
   186  	asset.HistoryEntry = entry
   187  
   188  	if IsActuateDecision(entry.Decision.Decision) {
   189  		// An actuation is only starting and it will be updated later in
   190  		// EndActuation. We should emit ACTUATION_STARTING notification, but don't
   191  		// commit the log record yet (because the history is immutable and
   192  		// contains only finalized actuations).
   193  		op.history.notifyOnly(entry)
   194  	} else {
   195  		// If the decision is final, then the actuation is done with this asset and
   196  		// we can emit the log record right now. Otherwise we'll keep the prepared
   197  		// log record cached in the Asset entity and commit it in EndActuation when
   198  		// we know the actuation outcome.
   199  		asset.LastHistoryID = entry.HistoryId
   200  		op.history.recordAndNotify(entry)
   201  	}
   202  }
   203  
   204  // actuationExpiry calculates when this actuation expires.
   205  func (op *ActuationBeginOp) actuationExpiry() time.Time {
   206  	timeout := op.actuation.Deployment.GetConfig().GetActuationTimeout()
   207  	if timeout != nil {
   208  		return op.now.Add(timeout.AsDuration())
   209  	}
   210  	return op.now.Add(defaultActuationTimeout)
   211  }
   212  
   213  // Apply stores all updated or created datastore entities.
   214  //
   215  // Must be called only after all per-asset MakeDecision calls. Returns the
   216  // mapping with recorded decisions.
   217  func (op *ActuationBeginOp) Apply(ctx context.Context) (map[string]*modelpb.ActuationDecision, error) {
   218  	var toPut []any
   219  
   220  	// Set the overall actuation state based on decisions made.
   221  	op.actuation.Created = timestamppb.New(op.now)
   222  	if op.actuating {
   223  		op.actuation.State = modelpb.Actuation_EXECUTING
   224  		op.actuation.Expiry = timestamppb.New(op.actuationExpiry())
   225  	} else if len(op.errors) != 0 {
   226  		op.actuation.State = modelpb.Actuation_FAILED
   227  		op.actuation.Finished = timestamppb.New(op.now)
   228  		op.actuation.Status = &statuspb.Status{
   229  			Code:    int32(codes.Internal),
   230  			Message: strings.Join(op.errors, "; "),
   231  		}
   232  	} else {
   233  		op.actuation.State = modelpb.Actuation_SUCCEEDED
   234  		op.actuation.Finished = timestamppb.New(op.now)
   235  	}
   236  
   237  	// Embed this Actuation snapshot into Asset entities.
   238  	for _, ent := range op.assets {
   239  		ent.Asset.LastActuation = op.actuation
   240  		if IsActuateDecision(ent.Asset.LastDecision.Decision) {
   241  			ent.Asset.LastActuateActuation = ent.Asset.LastActuation
   242  			ent.Asset.LastActuateDecision = ent.Asset.LastDecision
   243  		}
   244  		toPut = append(toPut, ent)
   245  	}
   246  
   247  	// Create the new actuation entity.
   248  	toPut = append(toPut, &Actuation{
   249  		ID:        op.actuation.Id,
   250  		Actuation: op.actuation,
   251  		Decisions: &modelpb.ActuationDecisions{Decisions: op.decisions},
   252  		State:     op.actuation.State,
   253  		Created:   asTime(op.actuation.Created),
   254  		Expiry:    asTime(op.actuation.Expiry),
   255  	})
   256  
   257  	// Prepare AssetHistory entities. Note they refer to op.actuation by pointer
   258  	// inside already and will pick up all changes made to the Actuation proto.
   259  	history, err := op.history.commit(ctx)
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  	toPut = append(toPut, history...)
   264  
   265  	if err := datastore.Put(ctx, toPut...); err != nil {
   266  		return nil, err
   267  	}
   268  	return op.decisions, nil
   269  }