github.com/kyma-project/kyma-environment-broker@v0.0.1/internal/process/upgrade_cluster/initialisation.go (about)

     1  package upgrade_cluster
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/kyma-project/control-plane/components/provisioner/pkg/gqlschema"
     8  	"github.com/kyma-project/kyma-environment-broker/common/orchestration"
     9  	"github.com/kyma-project/kyma-environment-broker/internal"
    10  	"github.com/kyma-project/kyma-environment-broker/internal/avs"
    11  	kebError "github.com/kyma-project/kyma-environment-broker/internal/error"
    12  	"github.com/kyma-project/kyma-environment-broker/internal/notification"
    13  	"github.com/kyma-project/kyma-environment-broker/internal/process"
    14  	"github.com/kyma-project/kyma-environment-broker/internal/process/input"
    15  	"github.com/kyma-project/kyma-environment-broker/internal/provisioner"
    16  	"github.com/kyma-project/kyma-environment-broker/internal/storage"
    17  	"github.com/pivotal-cf/brokerapi/v8/domain"
    18  	"github.com/sirupsen/logrus"
    19  )
    20  
    21  const (
    22  	UpgradeInitSteps int = iota + 1
    23  	UpgradeFinishSteps
    24  )
    25  
    26  const (
    27  	// the time after which the operation is marked as expired
    28  	CheckStatusTimeout = 3 * time.Hour
    29  )
    30  
    31  const postUpgradeDescription = "Performing post-upgrade tasks"
    32  
    33  type InitialisationStep struct {
    34  	operationManager     *process.UpgradeClusterOperationManager
    35  	operationStorage     storage.Operations
    36  	orchestrationStorage storage.Orchestrations
    37  	provisionerClient    provisioner.Client
    38  	inputBuilder         input.CreatorForPlan
    39  	evaluationManager    *avs.EvaluationManager
    40  	timeSchedule         TimeSchedule
    41  	bundleBuilder        notification.BundleBuilder
    42  }
    43  
    44  func NewInitialisationStep(os storage.Operations, ors storage.Orchestrations, pc provisioner.Client, b input.CreatorForPlan, em *avs.EvaluationManager,
    45  	timeSchedule *TimeSchedule, bundleBuilder notification.BundleBuilder) *InitialisationStep {
    46  	ts := timeSchedule
    47  	if ts == nil {
    48  		ts = &TimeSchedule{
    49  			Retry:                 5 * time.Second,
    50  			StatusCheck:           time.Minute,
    51  			UpgradeClusterTimeout: time.Hour,
    52  		}
    53  	}
    54  	return &InitialisationStep{
    55  		operationManager:     process.NewUpgradeClusterOperationManager(os),
    56  		operationStorage:     os,
    57  		orchestrationStorage: ors,
    58  		provisionerClient:    pc,
    59  		inputBuilder:         b,
    60  		evaluationManager:    em,
    61  		timeSchedule:         *ts,
    62  		bundleBuilder:        bundleBuilder,
    63  	}
    64  }
    65  
    66  func (s *InitialisationStep) Name() string {
    67  	return "Upgrade_Cluster_Initialisation"
    68  }
    69  
    70  func (s *InitialisationStep) Run(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) {
    71  	// Check concurrent deprovisioning (or suspension) operation (launched after target resolution)
    72  	// Terminate (preempt) upgrade immediately with succeeded
    73  	lastOp, err := s.operationStorage.GetLastOperation(operation.InstanceID)
    74  	if err != nil {
    75  		return operation, s.timeSchedule.Retry, nil
    76  	}
    77  	if lastOp.Type == internal.OperationTypeDeprovision {
    78  		return s.operationManager.OperationSucceeded(operation, fmt.Sprintf("operation preempted by deprovisioning %s", lastOp.ID), log)
    79  	}
    80  
    81  	if operation.State == orchestration.Pending {
    82  		// Check if the orchestreation got cancelled, don't start new pending operation
    83  		orchestration, err := s.orchestrationStorage.GetByID(operation.OrchestrationID)
    84  		if err != nil {
    85  			return operation, s.timeSchedule.Retry, nil
    86  		}
    87  		if orchestration.IsCanceled() {
    88  			log.Infof("Skipping processing because orchestration %s was canceled", operation.OrchestrationID)
    89  			return s.operationManager.OperationCanceled(operation, fmt.Sprintf("orchestration %s was canceled", operation.OrchestrationID), log)
    90  		}
    91  
    92  		// Check concurrent operations and wait to finish before proceeding
    93  		// - unsuspension provisioning launched after suspension
    94  		// - kyma upgrade or cluster upgrade
    95  		switch lastOp.Type {
    96  		case internal.OperationTypeProvision, internal.OperationTypeUpgradeKyma, internal.OperationTypeUpgradeCluster:
    97  			if !lastOp.IsFinished() {
    98  				return operation, s.timeSchedule.StatusCheck, nil
    99  			}
   100  		}
   101  
   102  		if operation.RuntimeVersion.IsEmpty() {
   103  			operation.RuntimeVersion = internal.RuntimeVersionData{
   104  				Version: orchestration.Parameters.Kyma.Version,
   105  			}
   106  		}
   107  
   108  		op, delay, _ := s.operationManager.UpdateOperation(operation, func(op *internal.UpgradeClusterOperation) {
   109  			op.ProvisioningParameters.ErsContext = internal.InheritMissingERSContext(op.ProvisioningParameters.ErsContext, lastOp.ProvisioningParameters.ErsContext)
   110  			op.State = domain.InProgress
   111  			op.RuntimeVersion = operation.RuntimeVersion
   112  		}, log)
   113  		if delay != 0 {
   114  			return operation, delay, nil
   115  		}
   116  		operation = op
   117  	}
   118  
   119  	if operation.ProvisionerOperationID == "" {
   120  		log.Info("provisioner operation ID is empty, initialize upgrade shoot input request")
   121  		return s.initializeUpgradeShootRequest(operation, log)
   122  	}
   123  
   124  	log.Infof("runtime being upgraded, check operation status for provisioner operation id: %v", operation.ProvisionerOperationID)
   125  	return s.checkRuntimeStatus(operation, log.WithField("runtimeID", operation.RuntimeOperation.RuntimeID))
   126  }
   127  
   128  func (s *InitialisationStep) initializeUpgradeShootRequest(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) {
   129  	log.Infof("create provisioner input creator for plan ID %q", operation.ProvisioningParameters)
   130  	creator, err := s.inputBuilder.CreateUpgradeShootInput(operation.ProvisioningParameters, operation.RuntimeVersion)
   131  	switch {
   132  	case err == nil:
   133  		operation.InputCreator = creator
   134  		return operation, 0, nil // go to next step
   135  	case kebError.IsTemporaryError(err):
   136  		log.Errorf("cannot create upgrade shoot input creator at the moment for plan %s: %s", operation.ProvisioningParameters.PlanID, err)
   137  		return s.operationManager.RetryOperation(operation, "error while creating upgrade shoot input creator", err, 5*time.Second, 5*time.Minute, log)
   138  	default:
   139  		log.Errorf("cannot create input creator for plan %s: %s", operation.ProvisioningParameters.PlanID, err)
   140  		return s.operationManager.OperationFailed(operation, "cannot create provisioning input creator", err, log)
   141  	}
   142  }
   143  
   144  // performRuntimeTasks Ensures that required logic on init and finish is executed.
   145  // Uses internal and external Avs monitor statuses to verify state.
   146  func (s *InitialisationStep) performRuntimeTasks(step int, operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) {
   147  	hasMonitors := s.evaluationManager.HasMonitors(operation.Avs)
   148  	inMaintenance := s.evaluationManager.InMaintenance(operation.Avs)
   149  	var err error = nil
   150  	var delay time.Duration = 0
   151  	var updateAvsStatus = func(op *internal.UpgradeClusterOperation) {
   152  		op.Avs.AvsInternalEvaluationStatus = operation.Avs.AvsInternalEvaluationStatus
   153  		op.Avs.AvsExternalEvaluationStatus = operation.Avs.AvsExternalEvaluationStatus
   154  	}
   155  
   156  	switch step {
   157  	case UpgradeInitSteps:
   158  		if s.evaluationManager.IsMaintenanceModeDisabled() {
   159  			break
   160  		}
   161  		if hasMonitors &&
   162  			!inMaintenance &&
   163  			s.evaluationManager.IsMaintenanceModeApplicableForGAID(operation.ProvisioningParameters.ErsContext.GlobalAccountID) {
   164  			log.Infof("executing init upgrade steps")
   165  			err = s.evaluationManager.SetMaintenanceStatus(&operation.Avs, log)
   166  			operation, delay, _ = s.operationManager.UpdateOperation(operation, updateAvsStatus, log)
   167  		}
   168  	case UpgradeFinishSteps:
   169  		if hasMonitors && inMaintenance {
   170  			log.Infof("executing finish upgrade steps")
   171  			err = s.evaluationManager.RestoreStatus(&operation.Avs, log)
   172  			operation, delay, _ = s.operationManager.UpdateOperation(operation, updateAvsStatus, log)
   173  		}
   174  	}
   175  
   176  	switch {
   177  	case err == nil:
   178  		return operation, delay, nil
   179  	case kebError.IsTemporaryError(err):
   180  		return s.operationManager.RetryOperation(operation, "error while performing runtime tasks", err, 10*time.Second, 10*time.Minute, log)
   181  	default:
   182  		return s.operationManager.OperationFailed(operation, "error while performing runtime tasks", err, log)
   183  	}
   184  }
   185  
   186  func (s *InitialisationStep) restoreAvsAndFailOperation(operation internal.UpgradeClusterOperation, description string, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) {
   187  	err := s.evaluationManager.RestoreStatus(&operation.Avs, log)
   188  	if err != nil {
   189  		return s.operationManager.RetryOperation(operation, "error while restoring AvS state", err, 3*time.Second, time.Minute, log)
   190  	}
   191  	operation, retry, _ := s.operationManager.UpdateOperation(operation, func(op *internal.UpgradeClusterOperation) {
   192  		op.Avs.AvsInternalEvaluationStatus = operation.Avs.AvsInternalEvaluationStatus
   193  		op.Avs.AvsExternalEvaluationStatus = operation.Avs.AvsExternalEvaluationStatus
   194  	}, log)
   195  	if retry > 0 {
   196  		return operation, retry, nil
   197  	}
   198  	return s.operationManager.OperationFailed(operation, description, nil, log)
   199  }
   200  
   201  // checkRuntimeStatus will check operation runtime status
   202  // It will also trigger performRuntimeTasks upgrade steps to ensure
   203  // all the required dependencies have been fulfilled for upgrade operation.
   204  func (s *InitialisationStep) checkRuntimeStatus(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) (internal.UpgradeClusterOperation, time.Duration, error) {
   205  	if time.Since(operation.UpdatedAt) > CheckStatusTimeout {
   206  		log.Infof("operation has reached the time limit: updated operation time: %s", operation.UpdatedAt)
   207  		//send customer notification
   208  		if operation.RuntimeOperation.Notification {
   209  			err := s.sendNotificationComplete(operation, log)
   210  			//currently notification error can only be temporary error
   211  			if err != nil && kebError.IsTemporaryError(err) {
   212  				return operation, 5 * time.Second, nil
   213  			}
   214  		}
   215  		return s.restoreAvsAndFailOperation(operation, fmt.Sprintf("operation has reached the time limit: %s", CheckStatusTimeout), log)
   216  	}
   217  
   218  	status, err := s.provisionerClient.RuntimeOperationStatus(operation.RuntimeOperation.GlobalAccountID, operation.ProvisionerOperationID)
   219  	if err != nil {
   220  		return operation, s.timeSchedule.StatusCheck, nil
   221  	}
   222  	log.Infof("call to provisioner returned %s status", status.State.String())
   223  
   224  	var msg string
   225  	if status.Message != nil {
   226  		msg = *status.Message
   227  	}
   228  
   229  	// do required steps on init
   230  	operation, delay, err := s.performRuntimeTasks(UpgradeInitSteps, operation, log)
   231  	if delay != 0 || err != nil {
   232  		return operation, delay, err
   233  	}
   234  
   235  	// wait for operation completion
   236  	switch status.State {
   237  	case gqlschema.OperationStateInProgress, gqlschema.OperationStatePending:
   238  		return operation, s.timeSchedule.StatusCheck, nil
   239  	case gqlschema.OperationStateSucceeded, gqlschema.OperationStateFailed:
   240  		//send cunstomer notification
   241  		if operation.RuntimeOperation.Notification {
   242  			err := s.sendNotificationComplete(operation, log)
   243  			//currently notification error can only be temporary error
   244  			if err != nil && kebError.IsTemporaryError(err) {
   245  				return operation, 5 * time.Second, nil
   246  			}
   247  		}
   248  		// Set post-upgrade description which also reset UpdatedAt for operation retries to work properly
   249  		if operation.Description != postUpgradeDescription {
   250  			operation, delay, _ = s.operationManager.UpdateOperation(operation, func(operation *internal.UpgradeClusterOperation) {
   251  				operation.Description = postUpgradeDescription
   252  			}, log)
   253  			if delay != 0 {
   254  				return operation, delay, nil
   255  			}
   256  		}
   257  	}
   258  
   259  	// do required steps on finish
   260  	operation, delay, err = s.performRuntimeTasks(UpgradeFinishSteps, operation, log)
   261  	if delay != 0 || err != nil {
   262  		return operation, delay, err
   263  	}
   264  
   265  	// handle operation completion
   266  	switch status.State {
   267  	case gqlschema.OperationStateSucceeded:
   268  		return s.operationManager.OperationSucceeded(operation, msg, log)
   269  	case gqlschema.OperationStateFailed:
   270  		return s.operationManager.OperationFailed(operation, fmt.Sprintf("provisioner client returns failed status: %s", msg), nil, log)
   271  	}
   272  
   273  	return s.operationManager.OperationFailed(operation, fmt.Sprintf("unsupported provisioner client status: %s", status.State.String()), nil, log)
   274  }
   275  
   276  func (s *InitialisationStep) sendNotificationComplete(operation internal.UpgradeClusterOperation, log logrus.FieldLogger) error {
   277  	tenants := []notification.NotificationTenant{
   278  		{
   279  			InstanceID: operation.InstanceID,
   280  			EndDate:    time.Now().Format("2006-01-02 15:04:05"),
   281  			State:      notification.FinishedMaintenanceState,
   282  		},
   283  	}
   284  	notificationParams := notification.NotificationParams{
   285  		OrchestrationID: operation.OrchestrationID,
   286  		Tenants:         tenants,
   287  	}
   288  	notificationBundle, err := s.bundleBuilder.NewBundle(operation.OrchestrationID, notificationParams)
   289  	if err != nil {
   290  		log.Errorf("%s: %s", "Failed to create Notification Bundle", err)
   291  		return err
   292  	}
   293  	err2 := notificationBundle.UpdateNotificationEvent()
   294  	if err2 != nil {
   295  		msg := fmt.Sprintf("cannot update notification for orchestration %s", operation.OrchestrationID)
   296  		log.Errorf("%s: %s", msg, err)
   297  		return err
   298  	}
   299  	return nil
   300  }