github.com/kyma-project/kyma-environment-broker@v0.0.1/internal/orchestration/handlers/cluster_retry.go (about)

     1  package handlers
     2  
     3  import (
     4  	"fmt"
     5  
     6  	commonOrchestration "github.com/kyma-project/kyma-environment-broker/common/orchestration"
     7  	"github.com/kyma-project/kyma-environment-broker/internal"
     8  	"github.com/kyma-project/kyma-environment-broker/internal/process"
     9  	"github.com/kyma-project/kyma-environment-broker/internal/storage"
    10  	"github.com/sirupsen/logrus"
    11  )
    12  
    13  type clusterRetryer Retryer
    14  
    15  func NewClusterRetryer(orchestrations storage.Orchestrations, operations storage.Operations, q *process.Queue, logger logrus.FieldLogger) *clusterRetryer {
    16  	return &clusterRetryer{
    17  		orchestrations: orchestrations,
    18  		operations:     operations,
    19  		queue:          q,
    20  		log:            logger,
    21  	}
    22  }
    23  
    24  func (r *clusterRetryer) orchestrationRetry(o *internal.Orchestration, opsByOrch []internal.UpgradeClusterOperation, operationIDs []string) (commonOrchestration.RetryResponse, error) {
    25  	var err error
    26  	resp := commonOrchestration.RetryResponse{OrchestrationID: o.OrchestrationID}
    27  
    28  	ops, invalidIDs := r.orchestrationOperationsFilter(opsByOrch, operationIDs)
    29  	resp.InvalidOperations = invalidIDs
    30  	if len(ops) == 0 {
    31  		zeroValidOperationInfo(&resp, r.log)
    32  		return resp, nil
    33  	}
    34  
    35  	// as failed orchestration has finished before
    36  	// only retry the latest failed cluster upgrade operation for the same instance
    37  	if o.State == commonOrchestration.Failed {
    38  		var oldIDs []string
    39  		var err error
    40  
    41  		ops, oldIDs, err = r.latestOperationValidate(o.OrchestrationID, ops)
    42  		if err != nil {
    43  			return resp, err
    44  		}
    45  		resp.OldOperations = oldIDs
    46  
    47  		if len(ops) == 0 {
    48  			zeroValidOperationInfo(&resp, r.log)
    49  			return resp, nil
    50  		}
    51  	}
    52  
    53  	for _, op := range ops {
    54  		resp.RetryShoots = append(resp.RetryShoots, op.Operation.InstanceDetails.ShootName)
    55  	}
    56  	resp.Msg = "retry operations are queued for processing"
    57  
    58  	for _, op := range ops {
    59  		o.Parameters.RetryOperation.RetryOperations = append(o.Parameters.RetryOperation.RetryOperations, op.Operation.ID)
    60  	}
    61  
    62  	// get orchestration state again in case in progress changed to failed, need to put in queue
    63  	lastState, err := orchestrationStateUpdate(o, r.orchestrations, o.OrchestrationID, r.log)
    64  	if err != nil {
    65  		return resp, err
    66  	}
    67  
    68  	if lastState == commonOrchestration.Failed {
    69  		r.queue.Add(o.OrchestrationID)
    70  	}
    71  
    72  	return resp, nil
    73  }
    74  
    75  func (r *clusterRetryer) orchestrationOperationsFilter(opsByOrch []internal.UpgradeClusterOperation, opsIDs []string) ([]internal.UpgradeClusterOperation, []string) {
    76  	if len(opsIDs) <= 0 {
    77  		return opsByOrch, nil
    78  	}
    79  
    80  	var retOps []internal.UpgradeClusterOperation
    81  	var invalidIDs []string
    82  	var found bool
    83  
    84  	for _, opID := range opsIDs {
    85  		for _, op := range opsByOrch {
    86  			if opID == op.Operation.ID {
    87  				retOps = append(retOps, op)
    88  				found = true
    89  				break
    90  			}
    91  		}
    92  
    93  		if found {
    94  			found = false
    95  		} else {
    96  			invalidIDs = append(invalidIDs, opID)
    97  		}
    98  	}
    99  
   100  	return retOps, invalidIDs
   101  }
   102  
   103  func (r *clusterRetryer) latestOperationValidate(orchestrationID string, ops []internal.UpgradeClusterOperation) ([]internal.UpgradeClusterOperation, []string, error) {
   104  	var retryOps []internal.UpgradeClusterOperation
   105  	var oldIDs []string
   106  
   107  	for _, op := range ops {
   108  		instanceID := op.InstanceID
   109  
   110  		clusterOps, err := r.operations.ListUpgradeClusterOperationsByInstanceID(instanceID)
   111  		if err != nil {
   112  			// fail for listing operations of one instance, then http return and report fail
   113  			err = fmt.Errorf("while getting operations by instanceID %s: %w", instanceID, err)
   114  			r.log.Error(err)
   115  			return nil, nil, err
   116  		}
   117  
   118  		var errFound, newerExist bool
   119  		num := len(clusterOps)
   120  
   121  		for i := 0; i < num; i++ {
   122  			if op.CreatedAt.Before(clusterOps[i].CreatedAt) {
   123  				if num == 1 {
   124  					errFound = true
   125  					break
   126  				}
   127  
   128  				// 'canceled' or 'canceling' newer op is not a newer op
   129  				if clusterOps[i].State == commonOrchestration.Canceled || clusterOps[i].State == commonOrchestration.Canceling {
   130  					continue
   131  				}
   132  
   133  				oldIDs = append(oldIDs, op.Operation.ID)
   134  				newerExist = true
   135  			}
   136  
   137  			break
   138  		}
   139  
   140  		if num == 0 || errFound {
   141  			err = fmt.Errorf("while getting operations by instanceID %s: %w", instanceID, err)
   142  			r.log.Error(err)
   143  			return nil, nil, err
   144  		}
   145  
   146  		if newerExist {
   147  			continue
   148  		}
   149  
   150  		retryOps = append(retryOps, op)
   151  	}
   152  
   153  	return retryOps, oldIDs, nil
   154  }