github.com/kyma-project/kyma-environment-broker@v0.0.1/common/orchestration/strategies/parallel.go (about)

     1  package strategies
     2  
     3  import (
     4  	"fmt"
     5  	"runtime/debug"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/google/uuid"
    10  	"github.com/kyma-project/kyma-environment-broker/common/orchestration"
    11  	"github.com/sirupsen/logrus"
    12  	"k8s.io/client-go/util/workqueue"
    13  )
    14  
    15  type ParallelOrchestrationStrategy struct {
    16  	executor        orchestration.OperationExecutor
    17  	dq              map[string]workqueue.DelayingInterface // scheduling queue, delaying queue for all pending & in progress ops
    18  	pq              map[string]workqueue.DelayingInterface // processing queue, delaying queue for the in progress ops
    19  	wg              map[string]*sync.WaitGroup
    20  	mux             sync.RWMutex
    21  	log             logrus.FieldLogger
    22  	rescheduleDelay time.Duration
    23  	scheduleNum     map[string]int
    24  	speedFactor     int
    25  }
    26  
    27  // NewParallelOrchestrationStrategy returns a new parallel orchestration strategy, which
    28  // executes operations in parallel using a pool of workers and a delaying queue to support time-based scheduling.
    29  func NewParallelOrchestrationStrategy(executor orchestration.OperationExecutor, log logrus.FieldLogger, rescheduleDelay time.Duration) orchestration.Strategy {
    30  	strategy := &ParallelOrchestrationStrategy{
    31  		executor:        executor,
    32  		dq:              map[string]workqueue.DelayingInterface{},
    33  		pq:              map[string]workqueue.DelayingInterface{},
    34  		wg:              map[string]*sync.WaitGroup{},
    35  		log:             log,
    36  		rescheduleDelay: rescheduleDelay,
    37  		scheduleNum:     map[string]int{},
    38  		speedFactor:     1,
    39  	}
    40  
    41  	return strategy
    42  }
    43  
    44  func (p *ParallelOrchestrationStrategy) SpeedUp(factor int) {
    45  	p.speedFactor = factor
    46  }
    47  
    48  // Execute starts the parallel execution of operations.
    49  func (p *ParallelOrchestrationStrategy) Execute(operations []orchestration.RuntimeOperation, strategySpec orchestration.StrategySpec) (string, error) {
    50  	if len(operations) == 0 {
    51  		return "", nil
    52  	}
    53  
    54  	execID := uuid.New().String()
    55  	p.mux.Lock()
    56  	p.wg[execID] = &sync.WaitGroup{}
    57  	p.dq[execID] = workqueue.NewDelayingQueue()
    58  	p.pq[execID] = workqueue.NewDelayingQueue()
    59  	p.mux.Unlock()
    60  
    61  	err := p.Insert(execID, operations, strategySpec)
    62  	if err != nil {
    63  		return execID, fmt.Errorf("while inserting operations to queue: %w", err)
    64  	}
    65  
    66  	// Create workers
    67  	for i := 0; i < strategySpec.Parallel.Workers; i++ {
    68  		p.createWorker(execID, strategySpec)
    69  	}
    70  
    71  	return execID, nil
    72  }
    73  
    74  func (p *ParallelOrchestrationStrategy) Insert(execID string, operations []orchestration.RuntimeOperation, strategySpec orchestration.StrategySpec) error {
    75  	p.mux.Lock()
    76  	defer p.mux.Unlock()
    77  
    78  	for i, op := range operations {
    79  		duration, err := p.updateMaintenanceWindow(execID, &operations[i], strategySpec)
    80  		if err != nil {
    81  			//error when read from storage or update to storage during maintenance window reschedule
    82  			p.handleRescheduleErrorOperation(execID, &operations[i])
    83  			p.log.Errorf("while processing operation %s: %v, will reschedule it", op.ID, err)
    84  		} else {
    85  			if p.dq[execID].ShuttingDown() {
    86  				return fmt.Errorf("the execution ID %s is shutdown", execID)
    87  			}
    88  
    89  			dq, exist := p.dq[execID]
    90  			if !exist {
    91  				return fmt.Errorf("no queue for the execution ID: %s", execID)
    92  			}
    93  
    94  			dq.AddAfter(&operations[i], duration)
    95  		}
    96  
    97  		p.scheduleNum[execID] += 1
    98  	}
    99  
   100  	return nil
   101  }
   102  
   103  func (p *ParallelOrchestrationStrategy) createWorker(execID string, strategy orchestration.StrategySpec) {
   104  	p.wg[execID].Add(1)
   105  
   106  	go func() {
   107  		p.scheduleOperationsLoop(execID, strategy)
   108  
   109  		p.mux.RLock()
   110  		p.wg[execID].Done()
   111  		p.mux.RUnlock()
   112  	}()
   113  }
   114  
   115  func (p *ParallelOrchestrationStrategy) scheduleOperationsLoop(execID string, strategy orchestration.StrategySpec) {
   116  	p.mux.RLock()
   117  	dq := p.dq[execID]
   118  	pq := p.pq[execID]
   119  	p.mux.RUnlock()
   120  
   121  	for {
   122  		p.mux.RLock()
   123  		if p.scheduleNum[execID] <= 0 {
   124  			dq.ShutDown()
   125  			pq.ShutDown()
   126  		}
   127  		p.mux.RUnlock()
   128  
   129  		item, shutdown := dq.Get()
   130  		if shutdown {
   131  			p.log.Infof("scheduling queue is shutdown")
   132  			break
   133  		}
   134  
   135  		op := item.(*orchestration.RuntimeOperation)
   136  
   137  		// check the window before process for the case if op Get is not in time
   138  		duration, err := p.updateMaintenanceWindow(execID, op, strategy)
   139  		if err != nil {
   140  			//error when read from storage or update to storage
   141  			p.handleRescheduleErrorOperation(execID, op)
   142  			dq.Done(item)
   143  			continue
   144  		}
   145  
   146  		log := p.log.WithField("operationID", op.ID)
   147  		if duration <= 0 {
   148  			log.Infof("operation is scheduled now")
   149  
   150  			pq.Add(item)
   151  			p.processOperation(execID)
   152  
   153  			p.mux.Lock()
   154  			p.scheduleNum[execID]--
   155  			p.mux.Unlock()
   156  		} else {
   157  			log.Infof("operation will be scheduled in %v", duration)
   158  			dq.AddAfter(item, duration)
   159  			dq.Done(item)
   160  		}
   161  
   162  	}
   163  }
   164  
   165  func (p *ParallelOrchestrationStrategy) processOperation(execID string) {
   166  	exit := false
   167  
   168  	for !exit {
   169  		exit = func() bool {
   170  			item, quit := p.pq[execID].Get()
   171  			if quit {
   172  				p.log.Infof("processing queue is shutdown")
   173  				return true
   174  			}
   175  
   176  			op := item.(*orchestration.RuntimeOperation)
   177  			id := op.ID
   178  			log := p.log.WithField("operationID", id)
   179  
   180  			defer func() {
   181  				if err := recover(); err != nil {
   182  					log.Errorf("panic error from process: %v. Stacktrace: %s", err, debug.Stack())
   183  				}
   184  				p.pq[execID].Done(item)
   185  			}()
   186  
   187  			when, err := p.executor.Execute(id)
   188  			if err == nil && when != 0 {
   189  				log.Infof("Adding %q item after %v", id, when)
   190  				p.pq[execID].AddAfter(item, time.Duration(int64(when)/int64(p.speedFactor)))
   191  				return false
   192  			}
   193  			if err != nil {
   194  				log.Errorf("Error from process: %v", err)
   195  			}
   196  
   197  			log.Infof("Finishing processing operation")
   198  			p.dq[execID].Done(item)
   199  
   200  			return true
   201  		}()
   202  	}
   203  
   204  }
   205  
   206  func (p *ParallelOrchestrationStrategy) updateMaintenanceWindow(execID string, op *orchestration.RuntimeOperation, strategy orchestration.StrategySpec) (time.Duration, error) {
   207  	var duration time.Duration
   208  	id := op.ID
   209  
   210  	if strategy.MaintenanceWindow {
   211  		// if time window for this operation has finished, we requeue and reprocess on next time window
   212  		if !op.MaintenanceWindowEnd.IsZero() && op.MaintenanceWindowEnd.Before(time.Now()) {
   213  			if p.rescheduleDelay > 0 {
   214  				op.MaintenanceWindowBegin = op.MaintenanceWindowBegin.Add(p.rescheduleDelay)
   215  				op.MaintenanceWindowEnd = op.MaintenanceWindowEnd.Add(p.rescheduleDelay)
   216  			} else {
   217  				currentDay := op.MaintenanceWindowBegin.Weekday()
   218  				diff := orchestration.NextAvailableDayDiff(currentDay, orchestration.ConvertSliceOfDaysToMap(op.MaintenanceDays))
   219  				op.MaintenanceWindowBegin = op.MaintenanceWindowBegin.AddDate(0, 0, diff)
   220  				op.MaintenanceWindowEnd = op.MaintenanceWindowEnd.AddDate(0, 0, diff)
   221  			}
   222  
   223  			err := p.executor.Reschedule(id, op.MaintenanceWindowBegin, op.MaintenanceWindowEnd)
   224  			//error when read from storage or update to storage
   225  			if err != nil {
   226  				return duration, fmt.Errorf("while rescheduling operation by executor (still continuing with new schedule): %w", err)
   227  			}
   228  		}
   229  
   230  		duration = time.Until(op.MaintenanceWindowBegin)
   231  	} else {
   232  		p.executor.Reschedule(id, strategy.ScheduleTime, strategy.ScheduleTime)
   233  		duration = time.Until(strategy.ScheduleTime)
   234  	}
   235  
   236  	return duration, nil
   237  }
   238  
   239  func (p *ParallelOrchestrationStrategy) Wait(executionID string) {
   240  	p.mux.RLock()
   241  	wg := p.wg[executionID]
   242  	p.mux.RUnlock()
   243  	if wg != nil {
   244  		wg.Wait()
   245  	}
   246  }
   247  
   248  func (p *ParallelOrchestrationStrategy) Cancel(executionID string) {
   249  	if executionID == "" {
   250  		return
   251  	}
   252  	p.log.Infof("Cancelling strategy execution %s", executionID)
   253  
   254  	p.mux.Lock()
   255  	defer p.mux.Unlock()
   256  	dq := p.dq[executionID]
   257  	pq := p.pq[executionID]
   258  
   259  	if dq != nil {
   260  		dq.ShutDown()
   261  	}
   262  
   263  	if pq != nil {
   264  		pq.ShutDown()
   265  	}
   266  }
   267  
   268  func (p *ParallelOrchestrationStrategy) handleRescheduleErrorOperation(execID string, op *orchestration.RuntimeOperation) {
   269  	p.dq[execID].AddAfter(op, 24*time.Hour)
   270  }