github.com/kyma-project/kyma-environment-broker@v0.0.1/internal/process/upgrade_cluster/initialisation.go (about) 1 package upgrade_cluster 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/kyma-project/control-plane/components/provisioner/pkg/gqlschema" 8 "github.com/kyma-project/kyma-environment-broker/common/orchestration" 9 "github.com/kyma-project/kyma-environment-broker/internal" 10 "github.com/kyma-project/kyma-environment-broker/internal/avs" 11 kebError "github.com/kyma-project/kyma-environment-broker/internal/error" 12 "github.com/kyma-project/kyma-environment-broker/internal/notification" 13 "github.com/kyma-project/kyma-environment-broker/internal/process" 14 "github.com/kyma-project/kyma-environment-broker/internal/process/input" 15 "github.com/kyma-project/kyma-environment-broker/internal/provisioner" 16 "github.com/kyma-project/kyma-environment-broker/internal/storage" 17 "github.com/pivotal-cf/brokerapi/v8/domain" 18 "github.com/sirupsen/logrus" 19 ) 20 21 const ( 22 UpgradeInitSteps int = iota + 1 23 UpgradeFinishSteps 24 ) 25 26 const ( 27 // the time after which the operation is marked as expired 28 CheckStatusTimeout = 3 * time.Hour 29 ) 30 31 const postUpgradeDescription = "Performing post-upgrade tasks" 32 33 type InitialisationStep struct { 34 operationManager *process.UpgradeClusterOperationManager 35 operationStorage storage.Operations 36 orchestrationStorage storage.Orchestrations 37 provisionerClient provisioner.Client 38 inputBuilder input.CreatorForPlan 39 evaluationManager *avs.EvaluationManager 40 timeSchedule TimeSchedule 41 bundleBuilder notification.BundleBuilder 42 } 43 44 func NewInitialisationStep(os storage.Operations, ors storage.Orchestrations, pc provisioner.Client, b input.CreatorForPlan, em *avs.EvaluationManager, 45 timeSchedule *TimeSchedule, bundleBuilder notification.BundleBuilder) *InitialisationStep { 46 ts := timeSchedule 47 if ts == nil { 48 ts = &TimeSchedule{ 49 Retry: 5 * time.Second, 50 StatusCheck: time.Minute, 51 UpgradeClusterTimeout: time.Hour, 52 } 53 } 54 return &InitialisationStep{ 55 operationManager: process.NewUpgradeClusterOperationManager(os), 56 operationStorage: os, 57 orchestrationStorage: ors, 58 provisionerClient: pc, 59 inputBuilder: b, 60 evaluationManager: em, 61 timeSchedule: *ts, 62 bundleBuilder: bundleBuilder, 63 } 64 } 65 66 func (s *InitialisationStep) Name() string { 67 return "Upgrade_Cluster_Initialisation" 68 } 69 70 func (s *InitialisationStep) Run(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) { 71 // Check concurrent deprovisioning (or suspension) operation (launched after target resolution) 72 // Terminate (preempt) upgrade immediately with succeeded 73 lastOp, err := s.operationStorage.GetLastOperation(operation.InstanceID) 74 if err != nil { 75 return operation, s.timeSchedule.Retry, nil 76 } 77 if lastOp.Type == internal.OperationTypeDeprovision { 78 return s.operationManager.OperationSucceeded(operation, fmt.Sprintf("operation preempted by deprovisioning %s", lastOp.ID), log) 79 } 80 81 if operation.State == orchestration.Pending { 82 // Check if the orchestreation got cancelled, don't start new pending operation 83 orchestration, err := s.orchestrationStorage.GetByID(operation.OrchestrationID) 84 if err != nil { 85 return operation, s.timeSchedule.Retry, nil 86 } 87 if orchestration.IsCanceled() { 88 log.Infof("Skipping processing because orchestration %s was canceled", operation.OrchestrationID) 89 return s.operationManager.OperationCanceled(operation, fmt.Sprintf("orchestration %s was canceled", operation.OrchestrationID), log) 90 } 91 92 // Check concurrent operations and wait to finish before proceeding 93 // - unsuspension provisioning launched after suspension 94 // - kyma upgrade or cluster upgrade 95 switch lastOp.Type { 96 case internal.OperationTypeProvision, internal.OperationTypeUpgradeKyma, internal.OperationTypeUpgradeCluster: 97 if !lastOp.IsFinished() { 98 return operation, s.timeSchedule.StatusCheck, nil 99 } 100 } 101 102 if operation.RuntimeVersion.IsEmpty() { 103 operation.RuntimeVersion = internal.RuntimeVersionData{ 104 Version: orchestration.Parameters.Kyma.Version, 105 } 106 } 107 108 op, delay, _ := s.operationManager.UpdateOperation(operation, func(op *internal.UpgradeClusterOperation) { 109 op.ProvisioningParameters.ErsContext = internal.InheritMissingERSContext(op.ProvisioningParameters.ErsContext, lastOp.ProvisioningParameters.ErsContext) 110 op.State = domain.InProgress 111 op.RuntimeVersion = operation.RuntimeVersion 112 }, log) 113 if delay != 0 { 114 return operation, delay, nil 115 } 116 operation = op 117 } 118 119 if operation.ProvisionerOperationID == "" { 120 log.Info("provisioner operation ID is empty, initialize upgrade shoot input request") 121 return s.initializeUpgradeShootRequest(operation, log) 122 } 123 124 log.Infof("runtime being upgraded, check operation status for provisioner operation id: %v", operation.ProvisionerOperationID) 125 return s.checkRuntimeStatus(operation, log.WithField("runtimeID", operation.RuntimeOperation.RuntimeID)) 126 } 127 128 func (s *InitialisationStep) initializeUpgradeShootRequest(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) { 129 log.Infof("create provisioner input creator for plan ID %q", operation.ProvisioningParameters) 130 creator, err := s.inputBuilder.CreateUpgradeShootInput(operation.ProvisioningParameters, operation.RuntimeVersion) 131 switch { 132 case err == nil: 133 operation.InputCreator = creator 134 return operation, 0, nil // go to next step 135 case kebError.IsTemporaryError(err): 136 log.Errorf("cannot create upgrade shoot input creator at the moment for plan %s: %s", operation.ProvisioningParameters.PlanID, err) 137 return s.operationManager.RetryOperation(operation, "error while creating upgrade shoot input creator", err, 5*time.Second, 5*time.Minute, log) 138 default: 139 log.Errorf("cannot create input creator for plan %s: %s", operation.ProvisioningParameters.PlanID, err) 140 return s.operationManager.OperationFailed(operation, "cannot create provisioning input creator", err, log) 141 } 142 } 143 144 // performRuntimeTasks Ensures that required logic on init and finish is executed. 145 // Uses internal and external Avs monitor statuses to verify state. 146 func (s *InitialisationStep) performRuntimeTasks(step int, operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) { 147 hasMonitors := s.evaluationManager.HasMonitors(operation.Avs) 148 inMaintenance := s.evaluationManager.InMaintenance(operation.Avs) 149 var err error = nil 150 var delay time.Duration = 0 151 var updateAvsStatus = func(op *internal.UpgradeClusterOperation) { 152 op.Avs.AvsInternalEvaluationStatus = operation.Avs.AvsInternalEvaluationStatus 153 op.Avs.AvsExternalEvaluationStatus = operation.Avs.AvsExternalEvaluationStatus 154 } 155 156 switch step { 157 case UpgradeInitSteps: 158 if s.evaluationManager.IsMaintenanceModeDisabled() { 159 break 160 } 161 if hasMonitors && 162 !inMaintenance && 163 s.evaluationManager.IsMaintenanceModeApplicableForGAID(operation.ProvisioningParameters.ErsContext.GlobalAccountID) { 164 log.Infof("executing init upgrade steps") 165 err = s.evaluationManager.SetMaintenanceStatus(&operation.Avs, log) 166 operation, delay, _ = s.operationManager.UpdateOperation(operation, updateAvsStatus, log) 167 } 168 case UpgradeFinishSteps: 169 if hasMonitors && inMaintenance { 170 log.Infof("executing finish upgrade steps") 171 err = s.evaluationManager.RestoreStatus(&operation.Avs, log) 172 operation, delay, _ = s.operationManager.UpdateOperation(operation, updateAvsStatus, log) 173 } 174 } 175 176 switch { 177 case err == nil: 178 return operation, delay, nil 179 case kebError.IsTemporaryError(err): 180 return s.operationManager.RetryOperation(operation, "error while performing runtime tasks", err, 10*time.Second, 10*time.Minute, log) 181 default: 182 return s.operationManager.OperationFailed(operation, "error while performing runtime tasks", err, log) 183 } 184 } 185 186 func (s *InitialisationStep) restoreAvsAndFailOperation(operation internal.UpgradeClusterOperation, description string, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) { 187 err := s.evaluationManager.RestoreStatus(&operation.Avs, log) 188 if err != nil { 189 return s.operationManager.RetryOperation(operation, "error while restoring AvS state", err, 3*time.Second, time.Minute, log) 190 } 191 operation, retry, _ := s.operationManager.UpdateOperation(operation, func(op *internal.UpgradeClusterOperation) { 192 op.Avs.AvsInternalEvaluationStatus = operation.Avs.AvsInternalEvaluationStatus 193 op.Avs.AvsExternalEvaluationStatus = operation.Avs.AvsExternalEvaluationStatus 194 }, log) 195 if retry > 0 { 196 return operation, retry, nil 197 } 198 return s.operationManager.OperationFailed(operation, description, nil, log) 199 } 200 201 // checkRuntimeStatus will check operation runtime status 202 // It will also trigger performRuntimeTasks upgrade steps to ensure 203 // all the required dependencies have been fulfilled for upgrade operation. 204 func (s *InitialisationStep) checkRuntimeStatus(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) { 205 if time.Since(operation.UpdatedAt) > CheckStatusTimeout { 206 log.Infof("operation has reached the time limit: updated operation time: %s", operation.UpdatedAt) 207 //send customer notification 208 if operation.RuntimeOperation.Notification { 209 err := s.sendNotificationComplete(operation, log) 210 //currently notification error can only be temporary error 211 if err != nil && kebError.IsTemporaryError(err) { 212 return operation, 5 * time.Second, nil 213 } 214 } 215 return s.restoreAvsAndFailOperation(operation, fmt.Sprintf("operation has reached the time limit: %s", CheckStatusTimeout), log) 216 } 217 218 status, err := s.provisionerClient.RuntimeOperationStatus(operation.RuntimeOperation.GlobalAccountID, operation.ProvisionerOperationID) 219 if err != nil { 220 return operation, s.timeSchedule.StatusCheck, nil 221 } 222 log.Infof("call to provisioner returned %s status", status.State.String()) 223 224 var msg string 225 if status.Message != nil { 226 msg = *status.Message 227 } 228 229 // do required steps on init 230 operation, delay, err := s.performRuntimeTasks(UpgradeInitSteps, operation, log) 231 if delay != 0 || err != nil { 232 return operation, delay, err 233 } 234 235 // wait for operation completion 236 switch status.State { 237 case gqlschema.OperationStateInProgress, gqlschema.OperationStatePending: 238 return operation, s.timeSchedule.StatusCheck, nil 239 case gqlschema.OperationStateSucceeded, gqlschema.OperationStateFailed: 240 //send cunstomer notification 241 if operation.RuntimeOperation.Notification { 242 err := s.sendNotificationComplete(operation, log) 243 //currently notification error can only be temporary error 244 if err != nil && kebError.IsTemporaryError(err) { 245 return operation, 5 * time.Second, nil 246 } 247 } 248 // Set post-upgrade description which also reset UpdatedAt for operation retries to work properly 249 if operation.Description != postUpgradeDescription { 250 operation, delay, _ = s.operationManager.UpdateOperation(operation, func(operation *internal.UpgradeClusterOperation) { 251 operation.Description = postUpgradeDescription 252 }, log) 253 if delay != 0 { 254 return operation, delay, nil 255 } 256 } 257 } 258 259 // do required steps on finish 260 operation, delay, err = s.performRuntimeTasks(UpgradeFinishSteps, operation, log) 261 if delay != 0 || err != nil { 262 return operation, delay, err 263 } 264 265 // handle operation completion 266 switch status.State { 267 case gqlschema.OperationStateSucceeded: 268 return s.operationManager.OperationSucceeded(operation, msg, log) 269 case gqlschema.OperationStateFailed: 270 return s.operationManager.OperationFailed(operation, fmt.Sprintf("provisioner client returns failed status: %s", msg), nil, log) 271 } 272 273 return s.operationManager.OperationFailed(operation, fmt.Sprintf("unsupported provisioner client status: %s", status.State.String()), nil, log) 274 } 275 276 func (s *InitialisationStep) sendNotificationComplete(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) error { 277 tenants := []notification.NotificationTenant{ 278 { 279 InstanceID: operation.InstanceID, 280 EndDate: time.Now().Format("2006-01-02 15:04:05"), 281 State: notification.FinishedMaintenanceState, 282 }, 283 } 284 notificationParams := notification.NotificationParams{ 285 OrchestrationID: operation.OrchestrationID, 286 Tenants: tenants, 287 } 288 notificationBundle, err := s.bundleBuilder.NewBundle(operation.OrchestrationID, notificationParams) 289 if err != nil { 290 log.Errorf("%s: %s", "Failed to create Notification Bundle", err) 291 return err 292 } 293 err2 := notificationBundle.UpdateNotificationEvent() 294 if err2 != nil { 295 msg := fmt.Sprintf("cannot update notification for orchestration %s", operation.OrchestrationID) 296 log.Errorf("%s: %s", msg, err) 297 return err 298 } 299 return nil 300 }