github.com/kyma-project/kyma-environment-broker@v0.0.1/internal/process/upgrade_kyma/initialisation.go (about)

     1  package upgrade_kyma
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/kyma-project/kyma-environment-broker/internal/broker"
     8  
     9  	"github.com/kyma-project/kyma-environment-broker/internal/avs"
    10  	"github.com/kyma-project/kyma-environment-broker/internal/storage"
    11  
    12  	orchestrationExt "github.com/kyma-project/kyma-environment-broker/common/orchestration"
    13  
    14  	"github.com/kyma-project/control-plane/components/provisioner/pkg/gqlschema"
    15  	"github.com/kyma-project/kyma-environment-broker/internal"
    16  	kebError "github.com/kyma-project/kyma-environment-broker/internal/error"
    17  	"github.com/kyma-project/kyma-environment-broker/internal/notification"
    18  	"github.com/kyma-project/kyma-environment-broker/internal/process"
    19  	"github.com/kyma-project/kyma-environment-broker/internal/process/input"
    20  	"github.com/kyma-project/kyma-environment-broker/internal/provisioner"
    21  	"github.com/pivotal-cf/brokerapi/v8/domain"
    22  	"github.com/sirupsen/logrus"
    23  )
    24  
    25  const (
    26  	UpgradeInitSteps int = iota + 1
    27  	UpgradeFinishSteps
    28  )
    29  
    30  const (
    31  	// the time after which the operation is marked as expired
    32  	CheckStatusTimeout = 3 * time.Hour
    33  )
    34  
    35  const postUpgradeDescription = "Performing post-upgrade tasks"
    36  
    37  type InitialisationStep struct {
    38  	operationManager       *process.UpgradeKymaOperationManager
    39  	operationStorage       storage.Operations
    40  	orchestrationStorage   storage.Orchestrations
    41  	instanceStorage        storage.Instances
    42  	provisionerClient      provisioner.Client
    43  	inputBuilder           input.CreatorForPlan
    44  	evaluationManager      *avs.EvaluationManager
    45  	timeSchedule           TimeSchedule
    46  	runtimeVerConfigurator RuntimeVersionConfiguratorForUpgrade
    47  	bundleBuilder          notification.BundleBuilder
    48  }
    49  
    50  func NewInitialisationStep(os storage.Operations, ors storage.Orchestrations, is storage.Instances, pc provisioner.Client, inputBuilder input.CreatorForPlan, em *avs.EvaluationManager,
    51  	timeSchedule *TimeSchedule, rvc RuntimeVersionConfiguratorForUpgrade, bundleBuilder notification.BundleBuilder) *InitialisationStep {
    52  	ts := timeSchedule
    53  	if ts == nil {
    54  		ts = &TimeSchedule{
    55  			Retry:              5 * time.Second,
    56  			StatusCheck:        time.Minute,
    57  			UpgradeKymaTimeout: time.Hour,
    58  		}
    59  	}
    60  	return &InitialisationStep{
    61  		operationManager:       process.NewUpgradeKymaOperationManager(os),
    62  		operationStorage:       os,
    63  		orchestrationStorage:   ors,
    64  		instanceStorage:        is,
    65  		provisionerClient:      pc,
    66  		inputBuilder:           inputBuilder,
    67  		evaluationManager:      em,
    68  		timeSchedule:           *ts,
    69  		runtimeVerConfigurator: rvc,
    70  		bundleBuilder:          bundleBuilder,
    71  	}
    72  }
    73  
    74  func (s *InitialisationStep) Name() string {
    75  	return "Upgrade_Kyma_Initialisation"
    76  }
    77  
    78  func (s *InitialisationStep) Run(operation internal.UpgradeKymaOperation, log logrus.FieldLogger) (internal.UpgradeKymaOperation, time.Duration, error) {
    79  
    80  	if broker.IsPreviewPlan(operation.ProvisioningParameters.PlanID) {
    81  		log.Infof("Preview Plan  does not support upgrade Kyma process, setting the operation state to succeeded")
    82  		return s.operationManager.OperationSucceeded(operation, fmt.Sprintf("Preview Plan does not support upgrade kyma"), log)
    83  	}
    84  
    85  	// Check concurrent deprovisioning (or suspension) operation (launched after target resolution)
    86  	// Terminate (preempt) upgrade immediately with succeeded
    87  	lastOp, err := s.operationStorage.GetLastOperation(operation.InstanceID)
    88  	if err != nil {
    89  		return operation, s.timeSchedule.Retry, nil
    90  	}
    91  	if lastOp.Type == internal.OperationTypeDeprovision {
    92  		return s.operationManager.OperationSucceeded(operation, fmt.Sprintf("operation preempted by deprovisioning %s", lastOp.ID), log)
    93  	}
    94  
    95  	if operation.State == orchestrationExt.Pending {
    96  		// Check if the orchestration got cancelled, don't start new pending operation
    97  		orchestration, err := s.orchestrationStorage.GetByID(operation.OrchestrationID)
    98  		if err != nil {
    99  			return operation, s.timeSchedule.Retry, nil
   100  		}
   101  		if orchestration.IsCanceled() {
   102  			log.Infof("Skipping processing because orchestration %s was canceled", operation.OrchestrationID)
   103  			return s.operationManager.OperationCanceled(operation, fmt.Sprintf("orchestration %s was canceled", operation.OrchestrationID), log)
   104  		}
   105  
   106  		// Check concurrent operations and wait to finish before proceeding
   107  		// - unsuspension provisioning launched after suspension
   108  		// - kyma upgrade or cluster upgrade
   109  		switch lastOp.Type {
   110  		case internal.OperationTypeProvision, internal.OperationTypeUpgradeKyma, internal.OperationTypeUpgradeCluster:
   111  			if !lastOp.IsFinished() {
   112  				return operation, s.timeSchedule.StatusCheck, nil
   113  			}
   114  		}
   115  
   116  		// rewrite necessary data from ProvisioningOperation to operation internal.UpgradeOperation
   117  		provisioningOperation, err := s.operationStorage.GetProvisioningOperationByInstanceID(operation.InstanceID)
   118  		if err != nil {
   119  			log.Errorf("while getting provisioning operation from storage")
   120  			return operation, s.timeSchedule.Retry, nil
   121  		}
   122  		op, delay, _ := s.operationManager.UpdateOperation(operation, func(op *internal.UpgradeKymaOperation) {
   123  			op.ProvisioningParameters = provisioningOperation.ProvisioningParameters
   124  			op.ProvisioningParameters.ErsContext = internal.InheritMissingERSContext(op.ProvisioningParameters.ErsContext, lastOp.ProvisioningParameters.ErsContext)
   125  			op.State = domain.InProgress
   126  		}, log)
   127  		if delay != 0 {
   128  			return operation, delay, nil
   129  		}
   130  		operation = op
   131  	}
   132  
   133  	operation, backoff, err := s.initializeUpgradeRuntimeRequest(operation, log)
   134  	if backoff > 0 {
   135  		return operation, backoff, err
   136  	}
   137  
   138  	if operation.ProvisionerOperationID != "" {
   139  		log.Infof("runtime being upgraded, check operation status")
   140  		return s.checkRuntimeStatus(operation, log.WithField("runtimeID", operation.RuntimeOperation.RuntimeID))
   141  	}
   142  
   143  	return operation, 0, nil
   144  }
   145  
   146  func (s *InitialisationStep) initializeUpgradeRuntimeRequest(operation internal.UpgradeKymaOperation, log logrus.FieldLogger) (internal.UpgradeKymaOperation, time.Duration, error) {
   147  	if err := s.configureKymaVersion(&operation, log); err != nil {
   148  		return s.operationManager.RetryOperation(operation, "error while configuring kyma version", err, 5*time.Second, 5*time.Minute, log)
   149  	}
   150  
   151  	log.Infof("create provisioner input creator for plan ID %q", operation.ProvisioningParameters.PlanID)
   152  	creator, err := s.inputBuilder.CreateUpgradeInput(operation.ProvisioningParameters, operation.RuntimeVersion)
   153  	switch {
   154  	case err == nil:
   155  		creator.DisableOptionalComponent(internal.BTPOperatorComponentName)
   156  		operation.InputCreator = creator
   157  		return operation, 0, nil // go to next step
   158  	case kebError.IsTemporaryError(err):
   159  		log.Errorf("cannot create upgrade runtime input creator at the moment for plan %s: %s", operation.ProvisioningParameters.PlanID, err)
   160  		return s.operationManager.RetryOperation(operation, "error while creating runtime input creator", err, 5*time.Second, 5*time.Minute, log)
   161  	default:
   162  		log.Errorf("cannot create input creator for plan %s: %s", operation.ProvisioningParameters.PlanID, err)
   163  		return s.operationManager.OperationFailed(operation, "cannot create provisioning input creator", err, log)
   164  	}
   165  }
   166  
   167  func (s *InitialisationStep) configureKymaVersion(operation *internal.UpgradeKymaOperation, log logrus.FieldLogger) error {
   168  	if !operation.RuntimeVersion.IsEmpty() {
   169  		return nil
   170  	}
   171  
   172  	// set Kyma version from request or runtime parameters
   173  	var (
   174  		err     error
   175  		version *internal.RuntimeVersionData
   176  	)
   177  
   178  	version, err = s.runtimeVerConfigurator.ForUpgrade(*operation)
   179  	if err != nil {
   180  		return fmt.Errorf("while getting runtime version for upgrade: %w", err)
   181  	}
   182  
   183  	// update operation version
   184  	var repeat time.Duration
   185  	if *operation, repeat, err = s.operationManager.UpdateOperation(*operation, func(operation *internal.UpgradeKymaOperation) {
   186  		operation.RuntimeVersion = *version
   187  	}, log); repeat != 0 {
   188  		return fmt.Errorf("unable to update operation with RuntimeVersion property: %w", err)
   189  	}
   190  
   191  	return nil
   192  }
   193  
   194  // checkRuntimeStatus will check operation runtime status
   195  // It will also trigger performRuntimeTasks upgrade steps to ensure
   196  // all the required dependencies have been fulfilled for upgrade operation.
   197  func (s *InitialisationStep) checkRuntimeStatus(operation internal.UpgradeKymaOperation, log logrus.FieldLogger) (internal.UpgradeKymaOperation, time.Duration, error) {
   198  	if time.Since(operation.UpdatedAt) > CheckStatusTimeout {
   199  		log.Infof("operation has reached the time limit: updated operation time: %s", operation.UpdatedAt)
   200  		if operation.RuntimeOperation.Notification {
   201  			err := s.sendNotificationComplete(operation, log)
   202  			//currently notification error can only be temporary error
   203  			if err != nil && kebError.IsTemporaryError(err) {
   204  				return operation, 5 * time.Second, nil
   205  			}
   206  		}
   207  		return s.restoreAvsAndFailOperation(operation, fmt.Sprintf("operation has reached the time limit: %s", CheckStatusTimeout), log)
   208  	}
   209  
   210  	var err error
   211  	// Ensure AVS evaluations are set to maintenance
   212  	if !s.evaluationManager.IsMaintenanceModeDisabled() {
   213  		operation, err = SetAvsStatusMaintenance(s.evaluationManager, s.operationManager, operation, log)
   214  		if err != nil {
   215  			if kebError.IsTemporaryError(err) {
   216  				return s.operationManager.RetryOperation(operation, "error while setting avs to maintenance", err, 10*time.Second, 10*time.Minute, log)
   217  			}
   218  			return s.operationManager.OperationFailed(operation, "error while setting avs to maintenance", err, log)
   219  		}
   220  	}
   221  
   222  	if operation.ClusterConfigurationVersion != 0 {
   223  		// upgrade was trigerred in reconciler, no need to call provisioner and create UpgradeRuntimeInput
   224  		// TODO: deal with skipping steps in case of calling reconciler for Kyma 2.0 upgrade
   225  		log.Debugf("Cluster configuration already created, skipping")
   226  		return operation, 0, nil
   227  	}
   228  
   229  	status, err := s.provisionerClient.RuntimeOperationStatus(operation.RuntimeOperation.GlobalAccountID, operation.ProvisionerOperationID)
   230  	if err != nil {
   231  		return operation, s.timeSchedule.StatusCheck, nil
   232  	}
   233  	log.Infof("call to provisioner returned %s status", status.State.String())
   234  
   235  	var msg string
   236  	var delay time.Duration
   237  	if status.Message != nil {
   238  		msg = *status.Message
   239  	}
   240  
   241  	// wait for operation completion
   242  	switch status.State {
   243  	case gqlschema.OperationStateInProgress, gqlschema.OperationStatePending:
   244  		return operation, s.timeSchedule.StatusCheck, nil
   245  	case gqlschema.OperationStateSucceeded, gqlschema.OperationStateFailed:
   246  		if operation.RuntimeOperation.Notification {
   247  			err := s.sendNotificationComplete(operation, log)
   248  			//currently notification error can only be temporary error
   249  			if err != nil && kebError.IsTemporaryError(err) {
   250  				return operation, 5 * time.Second, nil
   251  			}
   252  		}
   253  		// Set post-upgrade description which also reset UpdatedAt for operation retries to work properly
   254  		if operation.Description != postUpgradeDescription {
   255  			operation, delay, _ = s.operationManager.UpdateOperation(operation, func(operation *internal.UpgradeKymaOperation) {
   256  				operation.Description = postUpgradeDescription
   257  			}, log)
   258  			if delay != 0 {
   259  				return operation, delay, nil
   260  			}
   261  		}
   262  	}
   263  
   264  	// Kyma 1.X operation is finished or failed, restore AVS status
   265  	operation, err = RestoreAvsStatus(s.evaluationManager, s.operationManager, operation, log)
   266  	if err != nil {
   267  		if kebError.IsTemporaryError(err) {
   268  			return s.operationManager.RetryOperation(operation, "error while restoring avs status", err, 10*time.Second, 10*time.Minute, log)
   269  		}
   270  		return s.operationManager.OperationFailed(operation, "error while restoring avs status", err, log)
   271  	}
   272  
   273  	// handle operation completion
   274  	switch status.State {
   275  	case gqlschema.OperationStateSucceeded:
   276  		return s.operationManager.OperationSucceeded(operation, msg, log)
   277  	case gqlschema.OperationStateFailed:
   278  		return s.operationManager.OperationFailed(operation, fmt.Sprintf("provisioner client returns failed status: %s", msg), nil, log)
   279  	}
   280  
   281  	return s.operationManager.OperationFailed(operation, fmt.Sprintf("unsupported provisioner client status: %s", status.State.String()), nil, log)
   282  }
   283  
   284  func (s *InitialisationStep) sendNotificationComplete(operation internal.UpgradeKymaOperation, log logrus.FieldLogger) error {
   285  	tenants := []notification.NotificationTenant{
   286  		{
   287  			InstanceID: operation.InstanceID,
   288  			EndDate:    time.Now().Format("2006-01-02 15:04:05"),
   289  			State:      notification.FinishedMaintenanceState,
   290  		},
   291  	}
   292  	notificationParams := notification.NotificationParams{
   293  		OrchestrationID: operation.OrchestrationID,
   294  		Tenants:         tenants,
   295  	}
   296  	notificationBundle, err := s.bundleBuilder.NewBundle(operation.OrchestrationID, notificationParams)
   297  	if err != nil {
   298  		log.Errorf("%s: %s", "Failed to create Notification Bundle", err)
   299  		return err
   300  	}
   301  	err = notificationBundle.UpdateNotificationEvent()
   302  	if err != nil {
   303  		msg := fmt.Sprintf("cannot update notification for orchestration %s", operation.OrchestrationID)
   304  		log.Errorf("%s: %s", msg, err)
   305  		return err
   306  	}
   307  	return nil
   308  }
   309  
   310  func (s *InitialisationStep) restoreAvsAndFailOperation(operation internal.UpgradeKymaOperation, description string, log logrus.FieldLogger) (internal.UpgradeKymaOperation, time.Duration, error) {
   311  	err := s.evaluationManager.RestoreStatus(&operation.Avs, log)
   312  	if err != nil {
   313  		return s.operationManager.RetryOperation(operation, "error while restoring AvS state", err, 3*time.Second, time.Minute, log)
   314  	}
   315  	operation, retry, _ := s.operationManager.UpdateOperation(operation, func(op *internal.UpgradeKymaOperation) {
   316  		op.Avs.AvsInternalEvaluationStatus = operation.Avs.AvsInternalEvaluationStatus
   317  		op.Avs.AvsExternalEvaluationStatus = operation.Avs.AvsExternalEvaluationStatus
   318  	}, log)
   319  	if retry > 0 {
   320  		return operation, retry, nil
   321  	}
   322  	return s.operationManager.OperationFailed(operation, description, nil, log)
   323  }