github.com/kyma-project/kyma-environment-broker@v0.0.1/internal/process/provisioning/check_cluster_configuration.go (about) 1 package provisioning 2 3 import ( 4 "fmt" 5 "time" 6 7 reconcilerApi "github.com/kyma-incubator/reconciler/pkg/keb" 8 "github.com/kyma-project/kyma-environment-broker/internal" 9 kebError "github.com/kyma-project/kyma-environment-broker/internal/error" 10 "github.com/kyma-project/kyma-environment-broker/internal/process" 11 "github.com/kyma-project/kyma-environment-broker/internal/reconciler" 12 "github.com/kyma-project/kyma-environment-broker/internal/storage" 13 "github.com/sirupsen/logrus" 14 "k8s.io/apimachinery/pkg/util/wait" 15 ) 16 17 // CheckClusterConfigurationStep checks if the SKR configuration is applied (by reconciler) 18 type CheckClusterConfigurationStep struct { 19 reconcilerClient reconciler.Client 20 operationManager *process.OperationManager 21 provisioningTimeout time.Duration 22 } 23 24 func NewCheckClusterConfigurationStep(os storage.Operations, 25 reconcilerClient reconciler.Client, 26 provisioningTimeout time.Duration) *CheckClusterConfigurationStep { 27 return &CheckClusterConfigurationStep{ 28 reconcilerClient: reconcilerClient, 29 operationManager: process.NewOperationManager(os), 30 provisioningTimeout: provisioningTimeout, 31 } 32 } 33 34 var _ process.Step = (*CheckClusterConfigurationStep)(nil) 35 36 func (s *CheckClusterConfigurationStep) Name() string { 37 return "Check_Cluster_Configuration" 38 } 39 40 func (s *CheckClusterConfigurationStep) Run(operation internal.Operation, log logrus.FieldLogger) (internal.Operation, time.Duration, error) { 41 if time.Since(operation.UpdatedAt) > s.provisioningTimeout { 42 return s.handleTimeout(operation, log) 43 } 44 45 state, err := s.reconcilerClient.GetCluster(operation.RuntimeID, operation.ClusterConfigurationVersion) 46 if kebError.IsTemporaryError(err) { 47 log.Errorf("Reconciler GetCluster method failed (temporary error, retrying): %s", err.Error()) 48 return operation, 1 * time.Minute, nil 49 } 50 if err != nil { 51 log.Errorf("Reconciler GetCluster method failed: %s", err.Error()) 52 return s.operationManager.OperationFailed(operation, "unable to get cluster state", err, log) 53 } 54 log.Debugf("Cluster configuration status %s", state.Status) 55 56 switch state.Status { 57 case reconcilerApi.StatusReconciling, reconcilerApi.StatusReconcilePending: 58 return operation, 30 * time.Second, nil 59 case reconcilerApi.StatusReconcileErrorRetryable: 60 log.Infof("Reconciler failed with retryable, rechecking in 10 minutes.") 61 return operation, 10 * time.Minute, nil 62 case reconcilerApi.StatusReady: 63 return operation, 0, nil 64 65 case reconcilerApi.StatusError: 66 errMsg := fmt.Sprintf("Reconciler failed. %v", reconciler.PrettyFailures(state)) 67 log.Warnf(errMsg) 68 return s.operationManager.OperationFailed(operation, "Reconciler failed with error cluster status", reconciler.NewReconcilerError(state.Failures, errMsg), log) 69 default: 70 errMsg := fmt.Sprintf("unknown cluster status: %s", state.Status) 71 return s.operationManager.OperationFailed(operation, "Reconciler failed with unknown cluster status", reconciler.NewReconcilerError(state.Failures, errMsg), log) 72 } 73 } 74 75 func (s *CheckClusterConfigurationStep) handleTimeout(operation internal.Operation, log logrus.FieldLogger) (internal.Operation, time.Duration, error) { 76 log.Warnf("Operation has reached the time limit (%v): updated operation time: %s", s.provisioningTimeout, operation.UpdatedAt) 77 log.Infof("Deleting cluster %s", operation.RuntimeID) 78 operation.EventInfof("Deleting cluster configuration due to check cluster configuration timeout") 79 /* 80 If the reconciliation timeouted, we have to delete cluster. 81 In case of an error, try few times. 82 */ 83 err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) { 84 err := s.reconcilerClient.DeleteCluster(operation.RuntimeID) 85 if err != nil { 86 log.Warnf("Unable to delete cluster: %s", err.Error()) 87 } 88 return err == nil, nil 89 }) 90 if err != nil { 91 log.Errorf("Unable to delete cluster: %s", err.Error()) 92 } 93 return s.operationManager.OperationFailed(operation, fmt.Sprintf("operation has reached the time limit: %s", s.provisioningTimeout), err, log) 94 }