github.com/kyma-project/kyma-environment-broker@v0.0.1/common/orchestration/strategies/parallel.go (about) 1 package strategies 2 3 import ( 4 "fmt" 5 "runtime/debug" 6 "sync" 7 "time" 8 9 "github.com/google/uuid" 10 "github.com/kyma-project/kyma-environment-broker/common/orchestration" 11 "github.com/sirupsen/logrus" 12 "k8s.io/client-go/util/workqueue" 13 ) 14 15 type ParallelOrchestrationStrategy struct { 16 executor orchestration.OperationExecutor 17 dq map[string]workqueue.DelayingInterface // scheduling queue, delaying queue for all pending & in progress ops 18 pq map[string]workqueue.DelayingInterface // processing queue, delaying queue for the in progress ops 19 wg map[string]*sync.WaitGroup 20 mux sync.RWMutex 21 log logrus.FieldLogger 22 rescheduleDelay time.Duration 23 scheduleNum map[string]int 24 speedFactor int 25 } 26 27 // NewParallelOrchestrationStrategy returns a new parallel orchestration strategy, which 28 // executes operations in parallel using a pool of workers and a delaying queue to support time-based scheduling. 29 func NewParallelOrchestrationStrategy(executor orchestration.OperationExecutor, log logrus.FieldLogger, rescheduleDelay time.Duration) orchestration.Strategy { 30 strategy := &ParallelOrchestrationStrategy{ 31 executor: executor, 32 dq: map[string]workqueue.DelayingInterface{}, 33 pq: map[string]workqueue.DelayingInterface{}, 34 wg: map[string]*sync.WaitGroup{}, 35 log: log, 36 rescheduleDelay: rescheduleDelay, 37 scheduleNum: map[string]int{}, 38 speedFactor: 1, 39 } 40 41 return strategy 42 } 43 44 func (p *ParallelOrchestrationStrategy) SpeedUp(factor int) { 45 p.speedFactor = factor 46 } 47 48 // Execute starts the parallel execution of operations. 49 func (p *ParallelOrchestrationStrategy) Execute(operations []orchestration.RuntimeOperation, strategySpec orchestration.StrategySpec) (string, error) { 50 if len(operations) == 0 { 51 return "", nil 52 } 53 54 execID := uuid.New().String() 55 p.mux.Lock() 56 p.wg[execID] = &sync.WaitGroup{} 57 p.dq[execID] = workqueue.NewDelayingQueue() 58 p.pq[execID] = workqueue.NewDelayingQueue() 59 p.mux.Unlock() 60 61 err := p.Insert(execID, operations, strategySpec) 62 if err != nil { 63 return execID, fmt.Errorf("while inserting operations to queue: %w", err) 64 } 65 66 // Create workers 67 for i := 0; i < strategySpec.Parallel.Workers; i++ { 68 p.createWorker(execID, strategySpec) 69 } 70 71 return execID, nil 72 } 73 74 func (p *ParallelOrchestrationStrategy) Insert(execID string, operations []orchestration.RuntimeOperation, strategySpec orchestration.StrategySpec) error { 75 p.mux.Lock() 76 defer p.mux.Unlock() 77 78 for i, op := range operations { 79 duration, err := p.updateMaintenanceWindow(execID, &operations[i], strategySpec) 80 if err != nil { 81 //error when read from storage or update to storage during maintenance window reschedule 82 p.handleRescheduleErrorOperation(execID, &operations[i]) 83 p.log.Errorf("while processing operation %s: %v, will reschedule it", op.ID, err) 84 } else { 85 if p.dq[execID].ShuttingDown() { 86 return fmt.Errorf("the execution ID %s is shutdown", execID) 87 } 88 89 dq, exist := p.dq[execID] 90 if !exist { 91 return fmt.Errorf("no queue for the execution ID: %s", execID) 92 } 93 94 dq.AddAfter(&operations[i], duration) 95 } 96 97 p.scheduleNum[execID] += 1 98 } 99 100 return nil 101 } 102 103 func (p *ParallelOrchestrationStrategy) createWorker(execID string, strategy orchestration.StrategySpec) { 104 p.wg[execID].Add(1) 105 106 go func() { 107 p.scheduleOperationsLoop(execID, strategy) 108 109 p.mux.RLock() 110 p.wg[execID].Done() 111 p.mux.RUnlock() 112 }() 113 } 114 115 func (p *ParallelOrchestrationStrategy) scheduleOperationsLoop(execID string, strategy orchestration.StrategySpec) { 116 p.mux.RLock() 117 dq := p.dq[execID] 118 pq := p.pq[execID] 119 p.mux.RUnlock() 120 121 for { 122 p.mux.RLock() 123 if p.scheduleNum[execID] <= 0 { 124 dq.ShutDown() 125 pq.ShutDown() 126 } 127 p.mux.RUnlock() 128 129 item, shutdown := dq.Get() 130 if shutdown { 131 p.log.Infof("scheduling queue is shutdown") 132 break 133 } 134 135 op := item.(*orchestration.RuntimeOperation) 136 137 // check the window before process for the case if op Get is not in time 138 duration, err := p.updateMaintenanceWindow(execID, op, strategy) 139 if err != nil { 140 //error when read from storage or update to storage 141 p.handleRescheduleErrorOperation(execID, op) 142 dq.Done(item) 143 continue 144 } 145 146 log := p.log.WithField("operationID", op.ID) 147 if duration <= 0 { 148 log.Infof("operation is scheduled now") 149 150 pq.Add(item) 151 p.processOperation(execID) 152 153 p.mux.Lock() 154 p.scheduleNum[execID]-- 155 p.mux.Unlock() 156 } else { 157 log.Infof("operation will be scheduled in %v", duration) 158 dq.AddAfter(item, duration) 159 dq.Done(item) 160 } 161 162 } 163 } 164 165 func (p *ParallelOrchestrationStrategy) processOperation(execID string) { 166 exit := false 167 168 for !exit { 169 exit = func() bool { 170 item, quit := p.pq[execID].Get() 171 if quit { 172 p.log.Infof("processing queue is shutdown") 173 return true 174 } 175 176 op := item.(*orchestration.RuntimeOperation) 177 id := op.ID 178 log := p.log.WithField("operationID", id) 179 180 defer func() { 181 if err := recover(); err != nil { 182 log.Errorf("panic error from process: %v. Stacktrace: %s", err, debug.Stack()) 183 } 184 p.pq[execID].Done(item) 185 }() 186 187 when, err := p.executor.Execute(id) 188 if err == nil && when != 0 { 189 log.Infof("Adding %q item after %v", id, when) 190 p.pq[execID].AddAfter(item, time.Duration(int64(when)/int64(p.speedFactor))) 191 return false 192 } 193 if err != nil { 194 log.Errorf("Error from process: %v", err) 195 } 196 197 log.Infof("Finishing processing operation") 198 p.dq[execID].Done(item) 199 200 return true 201 }() 202 } 203 204 } 205 206 func (p *ParallelOrchestrationStrategy) updateMaintenanceWindow(execID string, op *orchestration.RuntimeOperation, strategy orchestration.StrategySpec) (time.Duration, error) { 207 var duration time.Duration 208 id := op.ID 209 210 if strategy.MaintenanceWindow { 211 // if time window for this operation has finished, we requeue and reprocess on next time window 212 if !op.MaintenanceWindowEnd.IsZero() && op.MaintenanceWindowEnd.Before(time.Now()) { 213 if p.rescheduleDelay > 0 { 214 op.MaintenanceWindowBegin = op.MaintenanceWindowBegin.Add(p.rescheduleDelay) 215 op.MaintenanceWindowEnd = op.MaintenanceWindowEnd.Add(p.rescheduleDelay) 216 } else { 217 currentDay := op.MaintenanceWindowBegin.Weekday() 218 diff := orchestration.NextAvailableDayDiff(currentDay, orchestration.ConvertSliceOfDaysToMap(op.MaintenanceDays)) 219 op.MaintenanceWindowBegin = op.MaintenanceWindowBegin.AddDate(0, 0, diff) 220 op.MaintenanceWindowEnd = op.MaintenanceWindowEnd.AddDate(0, 0, diff) 221 } 222 223 err := p.executor.Reschedule(id, op.MaintenanceWindowBegin, op.MaintenanceWindowEnd) 224 //error when read from storage or update to storage 225 if err != nil { 226 return duration, fmt.Errorf("while rescheduling operation by executor (still continuing with new schedule): %w", err) 227 } 228 } 229 230 duration = time.Until(op.MaintenanceWindowBegin) 231 } else { 232 p.executor.Reschedule(id, strategy.ScheduleTime, strategy.ScheduleTime) 233 duration = time.Until(strategy.ScheduleTime) 234 } 235 236 return duration, nil 237 } 238 239 func (p *ParallelOrchestrationStrategy) Wait(executionID string) { 240 p.mux.RLock() 241 wg := p.wg[executionID] 242 p.mux.RUnlock() 243 if wg != nil { 244 wg.Wait() 245 } 246 } 247 248 func (p *ParallelOrchestrationStrategy) Cancel(executionID string) { 249 if executionID == "" { 250 return 251 } 252 p.log.Infof("Cancelling strategy execution %s", executionID) 253 254 p.mux.Lock() 255 defer p.mux.Unlock() 256 dq := p.dq[executionID] 257 pq := p.pq[executionID] 258 259 if dq != nil { 260 dq.ShutDown() 261 } 262 263 if pq != nil { 264 pq.ShutDown() 265 } 266 } 267 268 func (p *ParallelOrchestrationStrategy) handleRescheduleErrorOperation(execID string, op *orchestration.RuntimeOperation) { 269 p.dq[execID].AddAfter(op, 24*time.Hour) 270 }