github.com/kubevela/workflow@v0.6.0/pkg/executor/workflow.go (about) 1 /* 2 Copyright 2022 The KubeVela Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package executor 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "sync" 24 "time" 25 26 "github.com/pkg/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apiserver/pkg/endpoints/request" 29 "k8s.io/apiserver/pkg/util/feature" 30 "sigs.k8s.io/controller-runtime/pkg/client" 31 32 monitorContext "github.com/kubevela/pkg/monitor/context" 33 34 "github.com/kubevela/workflow/api/v1alpha1" 35 wfContext "github.com/kubevela/workflow/pkg/context" 36 "github.com/kubevela/workflow/pkg/cue/model/value" 37 "github.com/kubevela/workflow/pkg/debug" 38 "github.com/kubevela/workflow/pkg/features" 39 "github.com/kubevela/workflow/pkg/hooks" 40 "github.com/kubevela/workflow/pkg/monitor/metrics" 41 "github.com/kubevela/workflow/pkg/providers/workspace" 42 "github.com/kubevela/workflow/pkg/tasks/custom" 43 "github.com/kubevela/workflow/pkg/types" 44 ) 45 46 var ( 47 // DisableRecorder optimize workflow by disable recorder 48 DisableRecorder = false 49 // StepStatusCache cache the step status 50 StepStatusCache sync.Map 51 ) 52 53 const ( 54 // minWorkflowBackoffWaitTime is the min time to wait before reconcile workflow again 55 minWorkflowBackoffWaitTime = 1 56 // backoffTimeCoefficient is the coefficient of time to wait before reconcile workflow again 57 backoffTimeCoefficient = 0.05 58 ) 59 60 type workflowExecutor struct { 61 instance *types.WorkflowInstance 62 cli client.Client 63 wfCtx wfContext.Context 64 patcher types.StatusPatcher 65 } 66 67 // New returns a Workflow Executor implementation. 68 func New(instance *types.WorkflowInstance, cli client.Client, patcher types.StatusPatcher) WorkflowExecutor { 69 return &workflowExecutor{ 70 instance: instance, 71 cli: cli, 72 patcher: patcher, 73 } 74 } 75 76 // InitializeWorkflowInstance init workflow instance 77 func InitializeWorkflowInstance(instance *types.WorkflowInstance) { 78 if instance.Status.StartTime.IsZero() && len(instance.Status.Steps) == 0 { 79 metrics.WorkflowRunInitializedCounter.WithLabelValues().Inc() 80 mode := v1alpha1.WorkflowExecuteMode{ 81 Steps: v1alpha1.WorkflowModeStep, 82 SubSteps: v1alpha1.WorkflowModeDAG, 83 } 84 if instance.Mode != nil { 85 if instance.Mode.Steps != "" { 86 mode.Steps = instance.Mode.Steps 87 } 88 if instance.Mode.SubSteps != "" { 89 mode.SubSteps = instance.Mode.SubSteps 90 } 91 } 92 instance.Status = v1alpha1.WorkflowRunStatus{ 93 Mode: mode, 94 StartTime: metav1.Now(), 95 } 96 StepStatusCache.Delete(fmt.Sprintf("%s-%s", instance.Name, instance.Namespace)) 97 wfContext.CleanupMemoryStore(instance.Name, instance.Namespace) 98 } 99 } 100 101 // ExecuteRunners execute workflow task runners in order. 102 func (w *workflowExecutor) ExecuteRunners(ctx monitorContext.Context, taskRunners []types.TaskRunner) (v1alpha1.WorkflowRunPhase, error) { 103 InitializeWorkflowInstance(w.instance) 104 status := &w.instance.Status 105 dagMode := status.Mode.Steps == v1alpha1.WorkflowModeDAG 106 cacheKey := fmt.Sprintf("%s-%s", w.instance.Name, w.instance.Namespace) 107 108 allRunnersDone, allRunnersSucceeded := checkRunners(taskRunners, w.instance.Status) 109 if status.Finished { 110 StepStatusCache.Delete(cacheKey) 111 } 112 if checkWorkflowTerminated(status, allRunnersDone) { 113 if isTerminatedManually(status) { 114 return v1alpha1.WorkflowStateTerminated, nil 115 } 116 return v1alpha1.WorkflowStateFailed, nil 117 } 118 if checkWorkflowSuspended(status) { 119 return v1alpha1.WorkflowStateSuspending, nil 120 } 121 if allRunnersSucceeded { 122 return v1alpha1.WorkflowStateSucceeded, nil 123 } 124 125 wfCtx, err := w.makeContext(ctx, w.instance.Name) 126 if err != nil { 127 ctx.Error(err, "make context") 128 return v1alpha1.WorkflowStateExecuting, err 129 } 130 w.wfCtx = wfCtx 131 132 if cacheValue, ok := StepStatusCache.Load(cacheKey); ok { 133 // handle cache resource 134 if len(status.Steps) < cacheValue.(int) { 135 return v1alpha1.WorkflowStateSkipped, nil 136 } 137 } 138 139 e := newEngine(ctx, wfCtx, w, status, taskRunners) 140 141 err = e.Run(ctx, taskRunners, dagMode) 142 if err != nil { 143 ctx.Error(err, "run steps") 144 StepStatusCache.Store(cacheKey, len(status.Steps)) 145 return v1alpha1.WorkflowStateExecuting, err 146 } 147 148 StepStatusCache.Store(cacheKey, len(status.Steps)) 149 if feature.DefaultMutableFeatureGate.Enabled(features.EnablePatchStatusAtOnce) { 150 return e.status.Phase, nil 151 } 152 return e.checkWorkflowPhase(), nil 153 } 154 155 func isTerminatedManually(status *v1alpha1.WorkflowRunStatus) bool { 156 manually := false 157 for _, step := range status.Steps { 158 if step.Phase == v1alpha1.WorkflowStepPhaseFailed { 159 if step.Reason == types.StatusReasonTerminate { 160 manually = true 161 } else { 162 return false 163 } 164 } 165 } 166 return manually 167 } 168 169 func checkWorkflowTerminated(status *v1alpha1.WorkflowRunStatus, allTasksDone bool) bool { 170 // if all tasks are done, and the terminated is true, then the workflow is terminated 171 return status.Terminated && allTasksDone 172 } 173 174 func checkWorkflowSuspended(status *v1alpha1.WorkflowRunStatus) bool { 175 // if workflow is suspended and the suspended step is still running, return false to run the suspended step 176 if status.Suspend { 177 for _, step := range status.Steps { 178 if step.Phase == v1alpha1.WorkflowStepPhaseSuspending { 179 return false 180 } 181 for _, sub := range step.SubStepsStatus { 182 if sub.Phase == v1alpha1.WorkflowStepPhaseSuspending { 183 return false 184 } 185 } 186 } 187 } 188 return status.Suspend 189 } 190 191 func newEngine(ctx monitorContext.Context, wfCtx wfContext.Context, w *workflowExecutor, wfStatus *v1alpha1.WorkflowRunStatus, taskRunners []types.TaskRunner) *engine { 192 stepStatus := make(map[string]v1alpha1.StepStatus) 193 setStepStatus(stepStatus, wfStatus.Steps) 194 stepDependsOn := make(map[string][]string) 195 for _, step := range w.instance.Steps { 196 hooks.SetAdditionalNameInStatus(stepStatus, step.Name, step.Properties, stepStatus[step.Name]) 197 stepDependsOn[step.Name] = append(stepDependsOn[step.Name], step.DependsOn...) 198 for _, sub := range step.SubSteps { 199 hooks.SetAdditionalNameInStatus(stepStatus, step.Name, step.Properties, stepStatus[step.Name]) 200 stepDependsOn[sub.Name] = append(stepDependsOn[sub.Name], sub.DependsOn...) 201 } 202 } 203 return &engine{ 204 status: wfStatus, 205 instance: w.instance, 206 wfCtx: wfCtx, 207 cli: w.cli, 208 debug: w.instance.Debug, 209 stepStatus: stepStatus, 210 stepDependsOn: stepDependsOn, 211 stepTimeout: make(map[string]time.Time), 212 taskRunners: taskRunners, 213 statusPatcher: w.patcher, 214 } 215 } 216 217 func setStepStatus(statusMap map[string]v1alpha1.StepStatus, status []v1alpha1.WorkflowStepStatus) { 218 for _, ss := range status { 219 statusMap[ss.Name] = ss.StepStatus 220 for _, sss := range ss.SubStepsStatus { 221 statusMap[sss.Name] = sss 222 } 223 } 224 } 225 226 func (w *workflowExecutor) GetSuspendBackoffWaitTime() time.Duration { 227 if len(w.instance.Steps) == 0 { 228 return 0 229 } 230 stepStatus := make(map[string]v1alpha1.StepStatus) 231 setStepStatus(stepStatus, w.instance.Status.Steps) 232 max := time.Duration(1<<63 - 1) 233 min := max 234 for _, step := range w.instance.Steps { 235 min = handleSuspendBackoffTime(w.wfCtx, step, stepStatus[step.Name], min) 236 for _, sub := range step.SubSteps { 237 min = handleSuspendBackoffTime(w.wfCtx, v1alpha1.WorkflowStep{ 238 WorkflowStepBase: v1alpha1.WorkflowStepBase{ 239 Name: sub.Name, 240 Type: sub.Type, 241 Timeout: sub.Timeout, 242 Properties: sub.Properties, 243 }, 244 }, stepStatus[sub.Name], min) 245 } 246 } 247 if min == max { 248 return 0 249 } 250 return min 251 } 252 253 func handleSuspendBackoffTime(wfCtx wfContext.Context, step v1alpha1.WorkflowStep, status v1alpha1.StepStatus, min time.Duration) time.Duration { 254 if status.Phase != v1alpha1.WorkflowStepPhaseSuspending { 255 return min 256 } 257 if step.Timeout != "" { 258 duration, err := time.ParseDuration(step.Timeout) 259 if err != nil { 260 return min 261 } 262 timeout := status.FirstExecuteTime.Add(duration) 263 if time.Now().Before(timeout) { 264 d := time.Until(timeout) 265 if duration < min { 266 min = d 267 } 268 } 269 } 270 271 if ts := wfCtx.GetMutableValue(status.ID, workspace.ResumeTimeStamp); ts != "" { 272 t, err := time.Parse(time.RFC3339, ts) 273 if err != nil { 274 return min 275 } 276 d := time.Until(t) 277 if d < min { 278 min = d 279 } 280 } 281 return min 282 } 283 284 func (w *workflowExecutor) GetBackoffWaitTime() time.Duration { 285 nextTime, ok := w.wfCtx.GetValueInMemory(types.ContextKeyNextExecuteTime) 286 if !ok { 287 if w.instance.Status.Suspend { 288 return 0 289 } 290 return time.Second 291 } 292 unix, ok := nextTime.(int64) 293 if !ok { 294 return time.Second 295 } 296 next := time.Unix(unix, 0) 297 if next.After(time.Now()) { 298 return time.Until(next) 299 } 300 301 return time.Second 302 } 303 304 func checkRunners(taskRunners []types.TaskRunner, status v1alpha1.WorkflowRunStatus) (bool, bool) { 305 success := true 306 for _, t := range taskRunners { 307 done := false 308 for _, ss := range status.Steps { 309 if ss.Name == t.Name() { 310 done = types.IsStepFinish(ss.Phase, ss.Reason) 311 success = success && done && (ss.Phase == v1alpha1.WorkflowStepPhaseSucceeded || ss.Phase == v1alpha1.WorkflowStepPhaseSkipped) 312 break 313 } 314 } 315 if !done { 316 return false, false 317 } 318 } 319 return true, success 320 } 321 322 func (w *workflowExecutor) makeContext(ctx context.Context, name string) (wfContext.Context, error) { 323 // clear the user info in context 324 ctx = request.WithUser(ctx, nil) 325 status := &w.instance.Status 326 if status.ContextBackend != nil { 327 wfCtx, err := wfContext.LoadContext(w.cli, w.instance.Namespace, w.instance.Name, w.instance.Status.ContextBackend.Name) 328 if err != nil { 329 return nil, errors.WithMessage(err, "load context") 330 } 331 return wfCtx, nil 332 } 333 334 wfCtx, err := wfContext.NewContext(ctx, w.cli, w.instance.Namespace, name, w.instance.ChildOwnerReferences) 335 if err != nil { 336 return nil, errors.WithMessage(err, "new context") 337 } 338 339 status.ContextBackend = wfCtx.StoreRef() 340 return wfCtx, nil 341 } 342 343 func (e *engine) getBackoffTimes(stepID string) int { 344 if v, ok := e.wfCtx.GetValueInMemory(types.ContextPrefixBackoffTimes, stepID); ok { 345 times, ok := v.(int) 346 if ok { 347 return times 348 } 349 } 350 return -1 351 } 352 353 func (e *engine) getBackoffWaitTime() int { 354 // the default value of min times reaches the max workflow backoff wait time 355 minTimes := 15 356 found := false 357 for _, step := range e.status.Steps { 358 if backoffTimes := e.getBackoffTimes(step.ID); backoffTimes > 0 { 359 found = true 360 if backoffTimes < minTimes { 361 minTimes = backoffTimes 362 } 363 } 364 if step.SubStepsStatus != nil { 365 for _, subStep := range step.SubStepsStatus { 366 if backoffTimes := e.getBackoffTimes(subStep.ID); backoffTimes > 0 { 367 found = true 368 if backoffTimes < minTimes { 369 minTimes = backoffTimes 370 } 371 } 372 } 373 } 374 } 375 376 if !found { 377 return minWorkflowBackoffWaitTime 378 } 379 380 interval := int(math.Pow(2, float64(minTimes)) * backoffTimeCoefficient) 381 if interval < minWorkflowBackoffWaitTime { 382 return minWorkflowBackoffWaitTime 383 } 384 maxWorkflowBackoffWaitTime := e.getMaxBackoffWaitTime() 385 if interval > maxWorkflowBackoffWaitTime { 386 return maxWorkflowBackoffWaitTime 387 } 388 return interval 389 } 390 391 func (e *engine) getMaxBackoffWaitTime() int { 392 for _, step := range e.status.Steps { 393 if step.Phase == v1alpha1.WorkflowStepPhaseFailed { 394 return types.MaxWorkflowFailedBackoffTime 395 } 396 } 397 return types.MaxWorkflowWaitBackoffTime 398 } 399 400 func (e *engine) getNextTimeout() int64 { 401 max := time.Duration(1<<63 - 1) 402 min := time.Duration(1<<63 - 1) 403 now := time.Now() 404 for _, step := range e.status.Steps { 405 if step.Phase == v1alpha1.WorkflowStepPhaseRunning { 406 if timeout, ok := e.stepTimeout[step.Name]; ok { 407 duration := timeout.Sub(now) 408 if duration < min { 409 min = duration 410 } 411 } 412 } 413 } 414 if min == max { 415 return -1 416 } 417 if min.Seconds() < 1 { 418 return minWorkflowBackoffWaitTime 419 } 420 return int64(math.Ceil(min.Seconds())) 421 } 422 423 func (e *engine) setNextExecuteTime(ctx monitorContext.Context) { 424 backoff := e.getBackoffWaitTime() 425 lastExecuteTime, ok := e.wfCtx.GetValueInMemory(types.ContextKeyLastExecuteTime) 426 if !ok { 427 ctx.Error(fmt.Errorf("failed to get last execute time"), "workflow run", e.instance.Name) 428 } 429 430 last, ok := lastExecuteTime.(int64) 431 if !ok { 432 ctx.Error(fmt.Errorf("failed to parse last execute time to int64"), "lastExecuteTime", lastExecuteTime) 433 } 434 interval := int64(backoff) 435 if timeout := e.getNextTimeout(); timeout > 0 && timeout < interval { 436 interval = timeout 437 } 438 439 next := last + interval 440 e.wfCtx.SetValueInMemory(next, types.ContextKeyNextExecuteTime) 441 } 442 443 func (e *engine) runAsDAG(ctx monitorContext.Context, taskRunners []types.TaskRunner, pendingRunners bool) error { 444 var ( 445 todoTasks []types.TaskRunner 446 pendingTasks []types.TaskRunner 447 ) 448 wfCtx := e.wfCtx 449 done := true 450 for _, tRunner := range taskRunners { 451 finish := false 452 var stepID string 453 if status, ok := e.stepStatus[tRunner.Name()]; ok { 454 stepID = status.ID 455 finish = types.IsStepFinish(status.Phase, status.Reason) 456 } 457 if !finish { 458 done = false 459 if pending, status := tRunner.Pending(ctx, wfCtx, e.stepStatus); pending { 460 if pendingRunners { 461 wfCtx.IncreaseCountValueInMemory(types.ContextPrefixBackoffTimes, status.ID) 462 if err := e.updateStepStatus(ctx, status); err != nil { 463 return err 464 } 465 } 466 pendingTasks = append(pendingTasks, tRunner) 467 continue 468 } else if status.Phase == v1alpha1.WorkflowStepPhasePending { 469 wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffTimes, stepID) 470 } 471 todoTasks = append(todoTasks, tRunner) 472 } else { 473 wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffTimes, stepID) 474 } 475 } 476 if done { 477 return nil 478 } 479 480 if len(todoTasks) > 0 { 481 err := e.steps(ctx, todoTasks, true) 482 if err != nil { 483 return err 484 } 485 486 if e.needStop() { 487 return nil 488 } 489 490 if len(pendingTasks) > 0 { 491 return e.runAsDAG(ctx, pendingTasks, true) 492 } 493 } 494 return nil 495 496 } 497 498 func (e *engine) Run(ctx monitorContext.Context, taskRunners []types.TaskRunner, dag bool) error { 499 var err error 500 if dag { 501 err = e.runAsDAG(ctx, taskRunners, false) 502 } else { 503 err = e.steps(ctx, taskRunners, dag) 504 } 505 506 e.checkFailedAfterRetries() 507 e.setNextExecuteTime(ctx) 508 return err 509 } 510 511 func (e *engine) checkWorkflowStatusMessage() { 512 switch { 513 case !e.waiting && e.failedAfterRetries && feature.DefaultMutableFeatureGate.Enabled(features.EnableSuspendOnFailure): 514 e.status.Message = types.MessageSuspendFailedAfterRetries 515 default: 516 e.status.Message = "" 517 } 518 } 519 520 func (e *engine) steps(ctx monitorContext.Context, taskRunners []types.TaskRunner, dag bool) error { 521 wfCtx := e.wfCtx 522 for index, runner := range taskRunners { 523 if status, ok := e.stepStatus[runner.Name()]; ok { 524 if types.IsStepFinish(status.Phase, status.Reason) { 525 continue 526 } 527 } 528 if pending, status := runner.Pending(ctx, wfCtx, e.stepStatus); pending { 529 wfCtx.IncreaseCountValueInMemory(types.ContextPrefixBackoffTimes, status.ID) 530 if err := e.updateStepStatus(ctx, status); err != nil { 531 return err 532 } 533 if dag { 534 continue 535 } 536 return nil 537 } 538 options := e.generateRunOptions(ctx, e.findDependPhase(taskRunners, index, dag)) 539 540 status, operation, err := runner.Run(wfCtx, options) 541 if err != nil { 542 return err 543 } 544 e.finishStep(operation) 545 546 // for the suspend step with duration, there's no need to increase the backoff time in reconcile when it's still running 547 if !types.IsStepFinish(status.Phase, status.Reason) && status.Phase != v1alpha1.WorkflowStepPhaseSuspending { 548 if err := e.updateStepStatus(ctx, status); err != nil { 549 return err 550 } 551 if err := handleBackoffTimes(wfCtx, status, false); err != nil { 552 return err 553 } 554 if dag { 555 continue 556 } 557 return nil 558 } 559 // clear the backoff time when the step is finished 560 if err := handleBackoffTimes(wfCtx, status, true); err != nil { 561 return err 562 } 563 if err := e.updateStepStatus(ctx, status); err != nil { 564 return err 565 } 566 567 if dag { 568 continue 569 } 570 if e.needStop() { 571 return nil 572 } 573 } 574 return nil 575 } 576 577 func (e *engine) generateRunOptions(ctx monitorContext.Context, dependsOnPhase v1alpha1.WorkflowStepPhase) *types.TaskRunOptions { 578 options := &types.TaskRunOptions{ 579 GetTracer: func(id string, stepStatus v1alpha1.WorkflowStep) monitorContext.Context { 580 return ctx.Fork(id, monitorContext.DurationMetric(func(v float64) { 581 metrics.WorkflowRunStepDurationHistogram.WithLabelValues("workflowrun", stepStatus.Type).Observe(v) 582 })) 583 }, 584 StepStatus: e.stepStatus, 585 Engine: e, 586 PreCheckHooks: []types.TaskPreCheckHook{ 587 func(step v1alpha1.WorkflowStep, options *types.PreCheckOptions) (*types.PreCheckResult, error) { 588 if feature.DefaultMutableFeatureGate.Enabled(features.EnableSuspendOnFailure) { 589 return &types.PreCheckResult{Skip: false}, nil 590 } 591 if e.parentRunner != "" { 592 if status, ok := e.stepStatus[e.parentRunner]; ok && status.Phase == v1alpha1.WorkflowStepPhaseSkipped { 593 return &types.PreCheckResult{Skip: true}, nil 594 } 595 } 596 switch step.If { 597 case "always": 598 return &types.PreCheckResult{Skip: false}, nil 599 case "": 600 return &types.PreCheckResult{Skip: skipExecutionOfNextStep(dependsOnPhase, len(step.DependsOn) > 0)}, nil 601 default: 602 ifValue, err := custom.ValidateIfValue(e.wfCtx, step, e.stepStatus, options) 603 if err != nil { 604 return &types.PreCheckResult{Skip: true}, err 605 } 606 return &types.PreCheckResult{Skip: !ifValue}, nil 607 } 608 }, 609 func(step v1alpha1.WorkflowStep, options *types.PreCheckOptions) (*types.PreCheckResult, error) { 610 status := e.stepStatus[step.Name] 611 if e.parentRunner != "" { 612 if status, ok := e.stepStatus[e.parentRunner]; ok && status.Phase == v1alpha1.WorkflowStepPhaseFailed && status.Reason == types.StatusReasonTimeout { 613 return &types.PreCheckResult{Timeout: true}, nil 614 } 615 } 616 if !status.FirstExecuteTime.Time.IsZero() && step.Timeout != "" { 617 duration, err := time.ParseDuration(step.Timeout) 618 if err != nil { 619 // if the timeout is a invalid duration, return {timeout: false} 620 return &types.PreCheckResult{Timeout: false}, err 621 } 622 timeout := status.FirstExecuteTime.Add(duration) 623 e.stepTimeout[step.Name] = timeout 624 if time.Now().After(timeout) { 625 return &types.PreCheckResult{Timeout: true}, nil 626 } 627 } 628 return &types.PreCheckResult{Timeout: false}, nil 629 }, 630 }, 631 PreStartHooks: []types.TaskPreStartHook{hooks.Input}, 632 PostStopHooks: []types.TaskPostStopHook{hooks.Output}, 633 } 634 if e.debug { 635 options.Debug = func(id string, v *value.Value) error { 636 debugContext := debug.NewContext(e.cli, e.instance, id) 637 if err := debugContext.Set(v); err != nil { 638 return err 639 } 640 return nil 641 } 642 } 643 return options 644 } 645 646 type engine struct { 647 failedAfterRetries bool 648 waiting bool 649 suspending bool 650 debug bool 651 status *v1alpha1.WorkflowRunStatus 652 wfCtx wfContext.Context 653 instance *types.WorkflowInstance 654 cli client.Client 655 parentRunner string 656 stepStatus map[string]v1alpha1.StepStatus 657 stepTimeout map[string]time.Time 658 stepDependsOn map[string][]string 659 taskRunners []types.TaskRunner 660 statusPatcher types.StatusPatcher 661 } 662 663 func (e *engine) finishStep(operation *types.Operation) { 664 if operation != nil { 665 e.status.Terminated = e.status.Terminated || operation.Terminated 666 e.failedAfterRetries = e.failedAfterRetries || operation.FailedAfterRetries 667 e.waiting = e.waiting || operation.Waiting 668 e.suspending = e.suspending || operation.Suspend 669 } 670 e.status.Suspend = e.suspending 671 if !e.waiting && e.failedAfterRetries && feature.DefaultMutableFeatureGate.Enabled(features.EnableSuspendOnFailure) { 672 e.status.Suspend = true 673 } 674 if e.failedAfterRetries && !feature.DefaultMutableFeatureGate.Enabled(features.EnableSuspendOnFailure) { 675 e.status.Terminated = true 676 } 677 } 678 679 func (e *engine) updateStepStatus(ctx context.Context, status v1alpha1.StepStatus) error { 680 var ( 681 conditionUpdated bool 682 now = metav1.NewTime(time.Now()) 683 ) 684 685 parentRunner := e.parentRunner 686 stepName := status.Name 687 if parentRunner != "" { 688 stepName = parentRunner 689 } 690 e.wfCtx.SetValueInMemory(now.Unix(), types.ContextKeyLastExecuteTime) 691 status.LastExecuteTime = now 692 index := -1 693 for i, ss := range e.status.Steps { 694 if ss.Name == stepName { 695 index = i 696 if parentRunner != "" { 697 // update the sub steps status 698 for j, sub := range ss.SubStepsStatus { 699 if sub.Name == status.Name { 700 status.FirstExecuteTime = sub.FirstExecuteTime 701 e.status.Steps[i].SubStepsStatus[j] = status 702 conditionUpdated = true 703 break 704 } 705 } 706 } else { 707 // update the parent steps status 708 status.FirstExecuteTime = ss.FirstExecuteTime 709 e.status.Steps[i].StepStatus = status 710 conditionUpdated = true 711 break 712 } 713 } 714 } 715 if !conditionUpdated { 716 status.FirstExecuteTime = now 717 if parentRunner != "" { 718 if index < 0 { 719 e.status.Steps = append(e.status.Steps, v1alpha1.WorkflowStepStatus{ 720 StepStatus: v1alpha1.StepStatus{ 721 Name: parentRunner, 722 FirstExecuteTime: now, 723 }}) 724 index = len(e.status.Steps) - 1 725 } 726 e.status.Steps[index].SubStepsStatus = append(e.status.Steps[index].SubStepsStatus, status) 727 } else { 728 e.status.Steps = append(e.status.Steps, v1alpha1.WorkflowStepStatus{StepStatus: status}) 729 } 730 } 731 e.stepStatus[status.Name] = status 732 if feature.DefaultMutableFeatureGate.Enabled(features.EnablePatchStatusAtOnce) { 733 isUpdate := false 734 orig := e.status.Message 735 e.status.Phase = e.checkWorkflowPhase() 736 if orig != "" && e.status.Message == "" { 737 // patch can not set empty string 738 isUpdate = true 739 } 740 return e.statusPatcher(ctx, e.status, isUpdate) 741 } 742 return nil 743 } 744 745 func (e *engine) checkWorkflowPhase() v1alpha1.WorkflowRunPhase { 746 status := e.status 747 e.checkWorkflowStatusMessage() 748 allRunnersDone, allRunnersSucceeded := checkRunners(e.taskRunners, e.instance.Status) 749 if status.Terminated { 750 e.cleanBackoffTimesForTerminated() 751 if checkWorkflowTerminated(status, allRunnersDone) { 752 wfContext.CleanupMemoryStore(e.instance.Name, e.instance.Namespace) 753 if isTerminatedManually(status) { 754 return v1alpha1.WorkflowStateTerminated 755 } 756 return v1alpha1.WorkflowStateFailed 757 } 758 } 759 if status.Suspend { 760 wfContext.CleanupMemoryStore(e.instance.Name, e.instance.Namespace) 761 return v1alpha1.WorkflowStateSuspending 762 } 763 if allRunnersSucceeded { 764 return v1alpha1.WorkflowStateSucceeded 765 } 766 return v1alpha1.WorkflowStateExecuting 767 } 768 769 func (e *engine) checkFailedAfterRetries() { 770 if !e.waiting && e.failedAfterRetries && feature.DefaultMutableFeatureGate.Enabled(features.EnableSuspendOnFailure) { 771 e.status.Suspend = true 772 } 773 if e.failedAfterRetries && !feature.DefaultMutableFeatureGate.Enabled(features.EnableSuspendOnFailure) { 774 e.status.Terminated = true 775 } 776 } 777 778 func (e *engine) needStop() bool { 779 // if the workflow is terminated, we still need to execute all the remaining steps 780 return e.status.Suspend 781 } 782 783 func (e *engine) findDependPhase(taskRunners []types.TaskRunner, index int, dag bool) v1alpha1.WorkflowStepPhase { 784 dependsOn := len(e.stepDependsOn[taskRunners[index].Name()]) > 0 785 if dag || dependsOn { 786 return e.findDependsOnPhase(taskRunners[index].Name()) 787 } 788 if index < 1 { 789 return v1alpha1.WorkflowStepPhaseSucceeded 790 } 791 for i := index - 1; i >= 0; i-- { 792 if skipExecutionOfNextStep(e.stepStatus[taskRunners[i].Name()].Phase, dependsOn) { 793 return e.stepStatus[taskRunners[i].Name()].Phase 794 } 795 } 796 return e.stepStatus[taskRunners[index-1].Name()].Phase 797 } 798 799 func (e *engine) findDependsOnPhase(name string) v1alpha1.WorkflowStepPhase { 800 for _, dependsOn := range e.stepDependsOn[name] { 801 if e.stepStatus[dependsOn].Phase != v1alpha1.WorkflowStepPhaseSucceeded { 802 return e.stepStatus[dependsOn].Phase 803 } 804 if result := e.findDependsOnPhase(dependsOn); result != v1alpha1.WorkflowStepPhaseSucceeded { 805 return result 806 } 807 } 808 return v1alpha1.WorkflowStepPhaseSucceeded 809 } 810 811 // skipExecutionOfNextStep returns true if the next step should be skipped 812 func skipExecutionOfNextStep(phase v1alpha1.WorkflowStepPhase, dependsOn bool) bool { 813 if dependsOn { 814 return phase != v1alpha1.WorkflowStepPhaseSucceeded 815 } 816 return phase != v1alpha1.WorkflowStepPhaseSucceeded && phase != v1alpha1.WorkflowStepPhaseSkipped 817 } 818 819 func handleBackoffTimes(wfCtx wfContext.Context, status v1alpha1.StepStatus, clear bool) error { 820 if clear { 821 wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffTimes, status.ID) 822 wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffReason, status.ID) 823 } else { 824 if val, exists := wfCtx.GetValueInMemory(types.ContextPrefixBackoffReason, status.ID); !exists || val != status.Message { 825 wfCtx.SetValueInMemory(status.Message, types.ContextPrefixBackoffReason, status.ID) 826 wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffTimes, status.ID) 827 } 828 wfCtx.IncreaseCountValueInMemory(types.ContextPrefixBackoffTimes, status.ID) 829 } 830 if err := wfCtx.Commit(); err != nil { 831 return errors.WithMessage(err, "commit workflow context") 832 } 833 return nil 834 } 835 836 func (e *engine) cleanBackoffTimesForTerminated() { 837 for _, ss := range e.status.Steps { 838 for _, sub := range ss.SubStepsStatus { 839 if sub.Reason == types.StatusReasonTerminate { 840 e.wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffTimes, sub.ID) 841 e.wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffReason, sub.ID) 842 } 843 } 844 if ss.Reason == types.StatusReasonTerminate { 845 e.wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffTimes, ss.ID) 846 e.wfCtx.DeleteValueInMemory(types.ContextPrefixBackoffReason, ss.ID) 847 } 848 } 849 } 850 851 func (e *engine) GetStepStatus(stepName string) v1alpha1.WorkflowStepStatus { 852 // ss is step status 853 for _, ss := range e.status.Steps { 854 if ss.Name == stepName { 855 return ss 856 } 857 } 858 return v1alpha1.WorkflowStepStatus{} 859 } 860 861 func (e *engine) GetCommonStepStatus(stepName string) v1alpha1.StepStatus { 862 if status, ok := e.stepStatus[stepName]; ok { 863 return status 864 } 865 return v1alpha1.StepStatus{} 866 } 867 868 func (e *engine) SetParentRunner(name string) { 869 e.parentRunner = name 870 } 871 872 func (e *engine) GetOperation() *types.Operation { 873 return &types.Operation{ 874 Suspend: e.status.Suspend, 875 Terminated: e.status.Terminated, 876 Waiting: e.waiting, 877 FailedAfterRetries: e.failedAfterRetries, 878 } 879 }