github.com/kyma-project/kyma-environment-broker@v0.0.1/internal/process/provisioning/check_cluster_configuration.go (about)

     1  package provisioning
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	reconcilerApi "github.com/kyma-incubator/reconciler/pkg/keb"
     8  	"github.com/kyma-project/kyma-environment-broker/internal"
     9  	kebError "github.com/kyma-project/kyma-environment-broker/internal/error"
    10  	"github.com/kyma-project/kyma-environment-broker/internal/process"
    11  	"github.com/kyma-project/kyma-environment-broker/internal/reconciler"
    12  	"github.com/kyma-project/kyma-environment-broker/internal/storage"
    13  	"github.com/sirupsen/logrus"
    14  	"k8s.io/apimachinery/pkg/util/wait"
    15  )
    16  
    17  // CheckClusterConfigurationStep checks if the SKR configuration is applied (by reconciler)
    18  type CheckClusterConfigurationStep struct {
    19  	reconcilerClient    reconciler.Client
    20  	operationManager    *process.OperationManager
    21  	provisioningTimeout time.Duration
    22  }
    23  
    24  func NewCheckClusterConfigurationStep(os storage.Operations,
    25  	reconcilerClient reconciler.Client,
    26  	provisioningTimeout time.Duration) *CheckClusterConfigurationStep {
    27  	return &CheckClusterConfigurationStep{
    28  		reconcilerClient:    reconcilerClient,
    29  		operationManager:    process.NewOperationManager(os),
    30  		provisioningTimeout: provisioningTimeout,
    31  	}
    32  }
    33  
    34  var _ process.Step = (*CheckClusterConfigurationStep)(nil)
    35  
    36  func (s *CheckClusterConfigurationStep) Name() string {
    37  	return "Check_Cluster_Configuration"
    38  }
    39  
    40  func (s *CheckClusterConfigurationStep) Run(operation internal.Operation, log logrus.FieldLogger) (internal.Operation, time.Duration, error) {
    41  	if time.Since(operation.UpdatedAt) > s.provisioningTimeout {
    42  		return s.handleTimeout(operation, log)
    43  	}
    44  
    45  	state, err := s.reconcilerClient.GetCluster(operation.RuntimeID, operation.ClusterConfigurationVersion)
    46  	if kebError.IsTemporaryError(err) {
    47  		log.Errorf("Reconciler GetCluster method failed (temporary error, retrying): %s", err.Error())
    48  		return operation, 1 * time.Minute, nil
    49  	}
    50  	if err != nil {
    51  		log.Errorf("Reconciler GetCluster method failed: %s", err.Error())
    52  		return s.operationManager.OperationFailed(operation, "unable to get cluster state", err, log)
    53  	}
    54  	log.Debugf("Cluster configuration status %s", state.Status)
    55  
    56  	switch state.Status {
    57  	case reconcilerApi.StatusReconciling, reconcilerApi.StatusReconcilePending:
    58  		return operation, 30 * time.Second, nil
    59  	case reconcilerApi.StatusReconcileErrorRetryable:
    60  		log.Infof("Reconciler failed with retryable, rechecking in 10 minutes.")
    61  		return operation, 10 * time.Minute, nil
    62  	case reconcilerApi.StatusReady:
    63  		return operation, 0, nil
    64  
    65  	case reconcilerApi.StatusError:
    66  		errMsg := fmt.Sprintf("Reconciler failed. %v", reconciler.PrettyFailures(state))
    67  		log.Warnf(errMsg)
    68  		return s.operationManager.OperationFailed(operation, "Reconciler failed with error cluster status", reconciler.NewReconcilerError(state.Failures, errMsg), log)
    69  	default:
    70  		errMsg := fmt.Sprintf("unknown cluster status: %s", state.Status)
    71  		return s.operationManager.OperationFailed(operation, "Reconciler failed with unknown cluster status", reconciler.NewReconcilerError(state.Failures, errMsg), log)
    72  	}
    73  }
    74  
    75  func (s *CheckClusterConfigurationStep) handleTimeout(operation internal.Operation, log logrus.FieldLogger) (internal.Operation, time.Duration, error) {
    76  	log.Warnf("Operation has reached the time limit (%v): updated operation time: %s", s.provisioningTimeout, operation.UpdatedAt)
    77  	log.Infof("Deleting cluster %s", operation.RuntimeID)
    78  	operation.EventInfof("Deleting cluster configuration due to check cluster configuration timeout")
    79  	/*
    80  		If the reconciliation timeouted, we have to delete cluster.
    81  		In case of an error, try few times.
    82  	*/
    83  	err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) {
    84  		err := s.reconcilerClient.DeleteCluster(operation.RuntimeID)
    85  		if err != nil {
    86  			log.Warnf("Unable to delete cluster: %s", err.Error())
    87  		}
    88  		return err == nil, nil
    89  	})
    90  	if err != nil {
    91  		log.Errorf("Unable to delete cluster: %s", err.Error())
    92  	}
    93  	return s.operationManager.OperationFailed(operation, fmt.Sprintf("operation has reached the time limit: %s", s.provisioningTimeout), err, log)
    94  }