github.com/kyma-project/kyma-environment-broker@v0.0.1/internal/orchestration/handlers/kyma_retry.go (about)

     1  package handlers
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	commonOrchestration "github.com/kyma-project/kyma-environment-broker/common/orchestration"
     8  	"github.com/kyma-project/kyma-environment-broker/internal"
     9  	"github.com/kyma-project/kyma-environment-broker/internal/process"
    10  	"github.com/kyma-project/kyma-environment-broker/internal/storage"
    11  	"github.com/sirupsen/logrus"
    12  )
    13  
    14  type Retryer struct {
    15  	orchestrations storage.Orchestrations
    16  	operations     storage.Operations
    17  	queue          *process.Queue
    18  	log            logrus.FieldLogger
    19  }
    20  
    21  type kymaRetryer Retryer
    22  
    23  func NewKymaRetryer(orchestrations storage.Orchestrations, operations storage.Operations, q *process.Queue, logger logrus.FieldLogger) *kymaRetryer {
    24  	return &kymaRetryer{
    25  		orchestrations: orchestrations,
    26  		operations:     operations,
    27  		queue:          q,
    28  		log:            logger,
    29  	}
    30  }
    31  
    32  func (r *kymaRetryer) orchestrationRetry(o *internal.Orchestration, opsByOrch []internal.UpgradeKymaOperation, operationIDs []string, immediate string) (commonOrchestration.RetryResponse, error) {
    33  	var err error
    34  	resp := commonOrchestration.RetryResponse{OrchestrationID: o.OrchestrationID}
    35  
    36  	ops, invalidIDs := r.orchestrationOperationsFilter(opsByOrch, operationIDs)
    37  	resp.InvalidOperations = invalidIDs
    38  	if len(ops) == 0 {
    39  		zeroValidOperationInfo(&resp, r.log)
    40  		return resp, nil
    41  	}
    42  
    43  	// as failed orchestration has finished before
    44  	// only retry the latest failed kyma upgrade operation for the same instance
    45  	if o.State == commonOrchestration.Failed {
    46  		var oldIDs []string
    47  		var err error
    48  
    49  		ops, oldIDs, err = r.latestOperationValidate(o.OrchestrationID, ops)
    50  		if err != nil {
    51  			return resp, err
    52  		}
    53  		resp.OldOperations = oldIDs
    54  
    55  		if len(ops) == 0 {
    56  			zeroValidOperationInfo(&resp, r.log)
    57  			return resp, nil
    58  		}
    59  	}
    60  
    61  	for _, op := range ops {
    62  		resp.RetryShoots = append(resp.RetryShoots, op.Operation.InstanceDetails.ShootName)
    63  	}
    64  	resp.Msg = "retry operations are queued for processing"
    65  
    66  	for _, op := range ops {
    67  		o.Parameters.RetryOperation.RetryOperations = append(o.Parameters.RetryOperation.RetryOperations, op.Operation.ID)
    68  		o.Parameters.RetryOperation.Immediate = immediate == "true"
    69  	}
    70  
    71  	// get orchestration state again in case in progress changed to failed, need to put in queue
    72  	lastState, err := orchestrationStateUpdate(o, r.orchestrations, o.OrchestrationID, r.log)
    73  	if err != nil {
    74  		return resp, err
    75  	}
    76  
    77  	r.log.Infof("Converting orchestration %s from state %s to retrying", o.OrchestrationID, lastState)
    78  	if lastState == commonOrchestration.Failed {
    79  		r.queue.Add(o.OrchestrationID)
    80  	}
    81  
    82  	return resp, nil
    83  }
    84  
    85  // filter out the operation which doesn't belong to the given orchestration
    86  func (r *kymaRetryer) orchestrationOperationsFilter(opsByOrch []internal.UpgradeKymaOperation, opsIDs []string) ([]internal.UpgradeKymaOperation, []string) {
    87  	if len(opsIDs) <= 0 {
    88  		return opsByOrch, nil
    89  	}
    90  
    91  	var retOps []internal.UpgradeKymaOperation
    92  	var invalidIDs []string
    93  	var found bool
    94  
    95  	for _, opID := range opsIDs {
    96  		for _, op := range opsByOrch {
    97  			if opID == op.Operation.ID {
    98  				retOps = append(retOps, op)
    99  				found = true
   100  				break
   101  			}
   102  		}
   103  
   104  		if found {
   105  			found = false
   106  		} else {
   107  			invalidIDs = append(invalidIDs, opID)
   108  		}
   109  	}
   110  
   111  	return retOps, invalidIDs
   112  }
   113  
   114  // if the required operation for kyma upgrade is not the last operated operation for kyma upgrade, then report error
   115  // only validate for failed orchestration
   116  func (r *kymaRetryer) latestOperationValidate(orchestrationID string, ops []internal.UpgradeKymaOperation) ([]internal.UpgradeKymaOperation, []string, error) {
   117  	var retryOps []internal.UpgradeKymaOperation
   118  	var oldIDs []string
   119  
   120  	for _, op := range ops {
   121  		instanceID := op.InstanceID
   122  
   123  		kymaOps, err := r.operations.ListUpgradeKymaOperationsByInstanceID(instanceID)
   124  		if err != nil {
   125  			// fail for listing operations of one instance, then http return and report fail
   126  			r.log.Errorf("while getting operations by instanceID %s: %v", instanceID, err)
   127  			return nil, nil, fmt.Errorf("while getting operations by instanceID %s: %w", instanceID, err)
   128  		}
   129  
   130  		var errFound, newerExist bool
   131  		num := len(kymaOps)
   132  
   133  		for i := 0; i < num; i++ {
   134  			if op.CreatedAt.Before(kymaOps[i].CreatedAt) {
   135  				if num == 1 {
   136  					errFound = true
   137  					break
   138  				}
   139  
   140  				// 'canceled' or 'canceling' newer op is not a newer op
   141  				if kymaOps[i].State == commonOrchestration.Canceled || kymaOps[i].State == commonOrchestration.Canceling {
   142  					continue
   143  				}
   144  
   145  				oldIDs = append(oldIDs, op.Operation.ID)
   146  				newerExist = true
   147  			}
   148  
   149  			break
   150  		}
   151  
   152  		if num == 0 || errFound {
   153  			r.log.Errorf("while getting operations by instanceID %s: %v", instanceID, err)
   154  			return nil, nil, fmt.Errorf("while getting operations by instanceID %s: %w", instanceID, err)
   155  		}
   156  
   157  		if newerExist {
   158  			continue
   159  		}
   160  
   161  		retryOps = append(retryOps, op)
   162  	}
   163  
   164  	return retryOps, oldIDs, nil
   165  }
   166  
   167  func orchestrationStateUpdate(orch *internal.Orchestration, orchestrations storage.Orchestrations, orchestrationID string, log logrus.FieldLogger) (string, error) {
   168  	o, err := orchestrations.GetByID(orchestrationID)
   169  	if err != nil {
   170  		log.Errorf("while getting orchestration %s: %v", orchestrationID, err)
   171  		return "", fmt.Errorf("while getting orchestration %s: %w", orchestrationID, err)
   172  	}
   173  	// last minute check in case in progress one got canceled.
   174  	state := o.State
   175  	if state == commonOrchestration.Canceling || state == commonOrchestration.Canceled {
   176  		log.Infof("orchestration %s was canceled right before retrying", orchestrationID)
   177  		return state, fmt.Errorf("orchestration %s was canceled right before retrying", orchestrationID)
   178  	}
   179  
   180  	o.UpdatedAt = time.Now()
   181  	o.Parameters.RetryOperation.RetryOperations = orch.Parameters.RetryOperation.RetryOperations
   182  	o.Parameters.RetryOperation.Immediate = orch.Parameters.RetryOperation.Immediate
   183  	if state == commonOrchestration.Failed {
   184  		o.Description += ", retrying"
   185  		o.State = commonOrchestration.Retrying
   186  	}
   187  	err = orchestrations.Update(*o)
   188  	if err != nil {
   189  		log.Errorf("while updating orchestration %s: %v", orchestrationID, err)
   190  		return state, fmt.Errorf("while updating orchestration %s: %w", orchestrationID, err)
   191  	}
   192  	return state, nil
   193  }
   194  
   195  func zeroValidOperationInfo(resp *commonOrchestration.RetryResponse, log logrus.FieldLogger) {
   196  	log.Infof("no valid operations to retry for orchestration %s", resp.OrchestrationID)
   197  	resp.Msg = fmt.Sprintf("No valid operations to retry for orchestration %s", resp.OrchestrationID)
   198  }