go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/deploy/service/model/op_begin.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package model 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 "time" 22 23 statuspb "google.golang.org/genproto/googleapis/rpc/status" 24 "google.golang.org/grpc/codes" 25 "google.golang.org/grpc/status" 26 "google.golang.org/protobuf/types/known/timestamppb" 27 28 "go.chromium.org/luci/common/clock" 29 "go.chromium.org/luci/gae/service/datastore" 30 31 "go.chromium.org/luci/deploy/api/modelpb" 32 "go.chromium.org/luci/deploy/api/rpcpb" 33 ) 34 35 // defaultActuationTimeout is the default value for `actuation_timeout` in 36 // DeploymentConfig. 37 const defaultActuationTimeout = 20 * time.Minute 38 39 // ActuationBeginOp collects changes to transactionally apply to the datastore 40 // to begin a new actuation. 41 type ActuationBeginOp struct { 42 actuation *modelpb.Actuation 43 decisions map[string]*modelpb.ActuationDecision 44 assets map[string]*Asset 45 history *historyRecorder 46 now time.Time 47 48 actuating bool // true if have at least one ACTUATE_* decision 49 errors []string // accumulated errors for SKIP_BROKEN decisions 50 } 51 52 // NewActuationBeginOp starts a datastore operation to create an actuation. 53 // 54 // Takes ownership of `actuation` mutating it. 55 func NewActuationBeginOp(ctx context.Context, assets []string, actuation *modelpb.Actuation) (*ActuationBeginOp, error) { 56 assetMap, err := fetchAssets(ctx, assets, false) 57 if err != nil { 58 return nil, err 59 } 60 return &ActuationBeginOp{ 61 actuation: actuation, 62 decisions: make(map[string]*modelpb.ActuationDecision, len(assetMap)), 63 assets: assetMap, 64 history: &historyRecorder{actuation: actuation}, 65 now: clock.Now(ctx), 66 }, nil 67 } 68 69 // MakeDecision decides what to do with an asset and records this decision. 70 // 71 // Must be called once for every asset passed to NewActuationBeginOp. Takes 72 // ownership of `asset` mutating it. AssetToActuate fields must already be 73 // validated at this point. 74 func (op *ActuationBeginOp) MakeDecision(ctx context.Context, assetID string, asset *rpcpb.AssetToActuate) { 75 // TODO: Implement locks. 76 // TODO: Implement anti-stomp protection. 77 // TODO: Implement forced actuation. 78 79 var errors []string 80 var brokenStatus *statuspb.Status 81 82 // Fill in server-assigned fields and collect error statuses. 83 populateAssetState := func(what string, s *modelpb.AssetState) { 84 if s.Timestamp == nil { 85 s.Timestamp = timestamppb.New(op.now) 86 } 87 s.Deployment = op.actuation.Deployment 88 s.Actuator = op.actuation.Actuator 89 90 if s.Status.GetCode() != int32(codes.OK) { 91 errors = append(errors, fmt.Sprintf( 92 "asset %q: failed to collect %s: %s", 93 assetID, what, status.ErrorProto(s.Status))) 94 brokenStatus = s.Status // keep only the last, no big deal 95 } 96 } 97 populateAssetState("intended state", asset.IntendedState) 98 populateAssetState("reported state", asset.ReportedState) 99 100 // Update stored AssetState fields only if the new reported values are 101 // non-erroneous. 102 stored := op.assets[assetID].Asset 103 stored.Config = asset.Config 104 if asset.IntendedState.Status.GetCode() == int32(codes.OK) { 105 stored.IntendedState = asset.IntendedState 106 } 107 if asset.ReportedState.Status.GetCode() == int32(codes.OK) { 108 stored.ReportedState = asset.ReportedState 109 } 110 111 // Preserve for the AssetHistory. 112 lastAppliedState := stored.AppliedState 113 114 // Make the actual decision. 115 var decision modelpb.ActuationDecision_Decision 116 switch { 117 case !IsActuationEnabed(asset.Config, op.actuation.Deployment.GetConfig()): 118 decision = modelpb.ActuationDecision_SKIP_DISABLED 119 case len(errors) != 0: 120 op.errors = append(op.errors, errors...) 121 decision = modelpb.ActuationDecision_SKIP_BROKEN 122 case IsUpToDate(asset.IntendedState, asset.ReportedState, stored.AppliedState): 123 decision = modelpb.ActuationDecision_SKIP_UPTODATE 124 stored.AppliedState = stored.IntendedState 125 default: 126 op.actuating = true 127 decision = modelpb.ActuationDecision_ACTUATE_STALE 128 } 129 130 // Record the decision. 131 stored.LastDecision = &modelpb.ActuationDecision{ 132 Decision: decision, 133 Status: brokenStatus, 134 } 135 op.decisions[assetID] = stored.LastDecision 136 137 op.maybeUpdateHistoryAndNotify(&modelpb.AssetHistory{ 138 AssetId: assetID, 139 HistoryId: 0, // will be populated in maybeUpdateHistoryAndNotify 140 Decision: stored.LastDecision, 141 Actuation: op.actuation, 142 Config: asset.Config, 143 IntendedState: asset.IntendedState, 144 ReportedState: asset.ReportedState, 145 LastAppliedState: lastAppliedState, 146 }) 147 } 148 149 func (op *ActuationBeginOp) maybeUpdateHistoryAndNotify(entry *modelpb.AssetHistory) { 150 asset := op.assets[entry.AssetId] 151 entry.PriorConsecutiveFailures = asset.ConsecutiveFailures 152 153 // If had an open history entry, then the previous actuation (that was 154 // supposed to close it) probably crashed, i.e. it didn't call EndActuation. 155 // We should record this observation in the history log. 156 if asset.IsRecordingHistoryEntry() { 157 asset.ConsecutiveFailures += 1 158 asset.HistoryEntry.Actuation.State = modelpb.Actuation_EXPIRED 159 asset.HistoryEntry.Actuation.Finished = timestamppb.New(op.now) 160 asset.HistoryEntry.Actuation.Status = &statuspb.Status{ 161 Code: int32(codes.Unknown), 162 Message: "the actuation probably crashed: the asset was picked up by another actuation", 163 } 164 op.history.recordAndNotify(asset.finalizeHistoryEntry()) 165 } 166 167 // Update the failure counter if the outcome is already known. For ACTUATE_* 168 // decisions it will be updated in EndActuation. 169 switch { 170 case entry.Decision.Decision == modelpb.ActuationDecision_SKIP_BROKEN: 171 asset.ConsecutiveFailures += 1 172 case !IsActuateDecision(entry.Decision.Decision): 173 // This is SKIP_UPTODATE, SKIP_DISABLED, SKIP_LOCKED. 174 asset.ConsecutiveFailures = 0 175 } 176 177 // Skip repeating uninteresting decisions e.g. a series of UPTODATE decisions. 178 // Otherwise the log would be full of them and it will be hard to find 179 // interesting ones. 180 if asset.HistoryEntry != nil && !shouldRecordHistory(entry, asset.HistoryEntry) { 181 return 182 } 183 184 // The new history entry is noteworthy and should be recorded. 185 entry.HistoryId = asset.LastHistoryID + 1 186 asset.HistoryEntry = entry 187 188 if IsActuateDecision(entry.Decision.Decision) { 189 // An actuation is only starting and it will be updated later in 190 // EndActuation. We should emit ACTUATION_STARTING notification, but don't 191 // commit the log record yet (because the history is immutable and 192 // contains only finalized actuations). 193 op.history.notifyOnly(entry) 194 } else { 195 // If the decision is final, then the actuation is done with this asset and 196 // we can emit the log record right now. Otherwise we'll keep the prepared 197 // log record cached in the Asset entity and commit it in EndActuation when 198 // we know the actuation outcome. 199 asset.LastHistoryID = entry.HistoryId 200 op.history.recordAndNotify(entry) 201 } 202 } 203 204 // actuationExpiry calculates when this actuation expires. 205 func (op *ActuationBeginOp) actuationExpiry() time.Time { 206 timeout := op.actuation.Deployment.GetConfig().GetActuationTimeout() 207 if timeout != nil { 208 return op.now.Add(timeout.AsDuration()) 209 } 210 return op.now.Add(defaultActuationTimeout) 211 } 212 213 // Apply stores all updated or created datastore entities. 214 // 215 // Must be called only after all per-asset MakeDecision calls. Returns the 216 // mapping with recorded decisions. 217 func (op *ActuationBeginOp) Apply(ctx context.Context) (map[string]*modelpb.ActuationDecision, error) { 218 var toPut []any 219 220 // Set the overall actuation state based on decisions made. 221 op.actuation.Created = timestamppb.New(op.now) 222 if op.actuating { 223 op.actuation.State = modelpb.Actuation_EXECUTING 224 op.actuation.Expiry = timestamppb.New(op.actuationExpiry()) 225 } else if len(op.errors) != 0 { 226 op.actuation.State = modelpb.Actuation_FAILED 227 op.actuation.Finished = timestamppb.New(op.now) 228 op.actuation.Status = &statuspb.Status{ 229 Code: int32(codes.Internal), 230 Message: strings.Join(op.errors, "; "), 231 } 232 } else { 233 op.actuation.State = modelpb.Actuation_SUCCEEDED 234 op.actuation.Finished = timestamppb.New(op.now) 235 } 236 237 // Embed this Actuation snapshot into Asset entities. 238 for _, ent := range op.assets { 239 ent.Asset.LastActuation = op.actuation 240 if IsActuateDecision(ent.Asset.LastDecision.Decision) { 241 ent.Asset.LastActuateActuation = ent.Asset.LastActuation 242 ent.Asset.LastActuateDecision = ent.Asset.LastDecision 243 } 244 toPut = append(toPut, ent) 245 } 246 247 // Create the new actuation entity. 248 toPut = append(toPut, &Actuation{ 249 ID: op.actuation.Id, 250 Actuation: op.actuation, 251 Decisions: &modelpb.ActuationDecisions{Decisions: op.decisions}, 252 State: op.actuation.State, 253 Created: asTime(op.actuation.Created), 254 Expiry: asTime(op.actuation.Expiry), 255 }) 256 257 // Prepare AssetHistory entities. Note they refer to op.actuation by pointer 258 // inside already and will pick up all changes made to the Actuation proto. 259 history, err := op.history.commit(ctx) 260 if err != nil { 261 return nil, err 262 } 263 toPut = append(toPut, history...) 264 265 if err := datastore.Put(ctx, toPut...); err != nil { 266 return nil, err 267 } 268 return op.decisions, nil 269 }