go.uber.org/cadence@v1.2.9/internal/error.go (about)

     1  // Copyright (c) 2017-2020 Uber Technologies Inc.
     2  // Portions of the Software are attributed to Copyright (c) 2020 Temporal Technologies Inc.
     3  //
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy
     5  // of this software and associated documentation files (the "Software"), to deal
     6  // in the Software without restriction, including without limitation the rights
     7  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     8  // copies of the Software, and to permit persons to whom the Software is
     9  // furnished to do so, subject to the following conditions:
    10  //
    11  // The above copyright notice and this permission notice shall be included in
    12  // all copies or substantial portions of the Software.
    13  //
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    16  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    17  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    18  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    19  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    20  // THE SOFTWARE.
    21  
    22  package internal
    23  
    24  import (
    25  	"errors"
    26  	"fmt"
    27  	"reflect"
    28  	"strings"
    29  
    30  	"go.uber.org/cadence/.gen/go/shared"
    31  	"go.uber.org/cadence/internal/common/util"
    32  )
    33  
    34  /*
    35  Below are the possible cases that activity could fail:
    36  1) *CustomError: (this should be the most common one)
    37  	If activity implementation returns *CustomError by using NewCustomError() API, workflow code would receive *CustomError.
    38  	The err would contain a Reason and Details. The reason is what activity specified to NewCustomError(), which workflow
    39  	code could check to determine what kind of error it was and take actions based on the reason. The details is encoded
    40  	[]byte which workflow code could extract strong typed data. Workflow code needs to know what the types of the encoded
    41  	details are before extracting them.
    42  2) *GenericError:
    43  	If activity implementation returns errors other than from NewCustomError() API, workflow code would receive *GenericError.
    44  	Use err.Error() to get the string representation of the actual error.
    45  3) *CanceledError:
    46  	If activity was canceled, workflow code will receive instance of *CanceledError. When activity cancels itself by
    47  	returning NewCancelError() it would supply optional details which could be extracted by workflow code.
    48  4) *TimeoutError:
    49  	If activity was timed out (several timeout types), workflow code will receive instance of *TimeoutError. The err contains
    50  	details about what type of timeout it was.
    51  5) *PanicError:
    52  	If activity code panic while executing, cadence activity worker will report it as activity failure to cadence server.
    53  	The cadence client library will present that failure as *PanicError to workflow code. The err contains a string
    54  	representation of the panic message and the call stack when panic was happen.
    55  
    56  Workflow code could handle errors based on different types of error. Below is sample code of how error handling looks like.
    57  
    58  _, err := workflow.ExecuteActivity(ctx, MyActivity, ...).Get(nil)
    59  if err != nil {
    60  	switch err := err.(type) {
    61  	case *workflow.CustomError:
    62  		// handle activity errors (created via NewCustomError() API)
    63  		switch err.Reason() {
    64  		case CustomErrReasonA: // assume CustomErrReasonA is constant defined by activity implementation
    65  			var detailMsg string // assuming activity return error by NewCustomError(CustomErrReasonA, "string details")
    66  			err.Details(&detailMsg) // extract strong typed details (corresponding to CustomErrReasonA)
    67  			// handle CustomErrReasonA
    68  		case CustomErrReasonB:
    69  			// handle CustomErrReasonB
    70  		default:
    71  			// newer version of activity could return new errors that workflow was not aware of.
    72  		}
    73  	case *workflow.GenericError:
    74  		// handle generic error (errors created other than using NewCustomError() API)
    75  	case *workflow.CanceledError:
    76  		// handle cancellation
    77  	case *workflow.TimeoutError:
    78  		// handle timeout, could check timeout type by err.TimeoutType()
    79  	case *workflow.PanicError:
    80  		// handle panic
    81  	}
    82  }
    83  
    84  Errors from child workflow should be handled in a similar way, except that there should be no *PanicError from child workflow.
    85  When panic happen in workflow implementation code, cadence client library catches that panic and causing the decision timeout.
    86  That decision task will be retried at a later time (with exponential backoff retry intervals).
    87  */
    88  
    89  type (
    90  	// CustomError returned from workflow and activity implementations with reason and optional details.
    91  	CustomError struct {
    92  		reason  string
    93  		details Values
    94  	}
    95  
    96  	// GenericError returned from workflow/workflow when the implementations return errors other than from NewCustomError() API.
    97  	GenericError struct {
    98  		err string
    99  	}
   100  
   101  	// TimeoutError returned when activity or child workflow timed out.
   102  	TimeoutError struct {
   103  		timeoutType shared.TimeoutType
   104  		details     Values
   105  	}
   106  
   107  	// CanceledError returned when operation was canceled.
   108  	CanceledError struct {
   109  		details Values
   110  	}
   111  
   112  	// TerminatedError returned when workflow was terminated.
   113  	TerminatedError struct {
   114  	}
   115  
   116  	// PanicError contains information about panicked workflow/activity.
   117  	PanicError struct {
   118  		value      interface{}
   119  		stackTrace string
   120  	}
   121  
   122  	// workflowPanicError contains information about panicked workflow.
   123  	// Used to distinguish go panic in the workflow code from a PanicError returned from a workflow function.
   124  	workflowPanicError struct {
   125  		value      interface{}
   126  		stackTrace string
   127  	}
   128  
   129  	// NonDeterministicError contains some structured data related to a non-deterministic
   130  	// replay failure, and is primarily intended for allowing richer error reporting.
   131  	//
   132  	// WorkflowType, WorkflowID, RunID, TaskList, and DomainName will likely be long-term stable
   133  	// and included in some form in future library versions, but the rest of these fields may
   134  	// change at any time, or be removed in a future major version change.
   135  	NonDeterministicError struct {
   136  
   137  		// Reason is a relatively free-form description of what kind of non-determinism
   138  		// was detected.
   139  		//
   140  		// You are *strongly* encouraged to not rely on these strings for behavior, only
   141  		// explanation, for a few reasons.  More will likely appear in the future, they may
   142  		// change, and there is little that can be safely decided on in an automated way.
   143  		//
   144  		// Currently, values roughly match the historical error strings, and are:
   145  		//  - "missing replay decision" (The error will contain HistoryEventText, as there
   146  		//    is at least one history event that has no matching replayed decision)
   147  		//  - "extra replay decision" (The error will contain DecisionText, as there is
   148  		//    at least one decision from replay that has no matching history event)
   149  		//  - "mismatch" (Both HistoryEventText and DecisionText will exist, as there
   150  		//    are issues with both.  This was previously shown as "history event is ...,
   151  		//    replay decision is ..." error text.)
   152  		Reason string
   153  
   154  		WorkflowType string
   155  		WorkflowID   string
   156  		RunID        string
   157  		TaskList     string
   158  		DomainName   string
   159  
   160  		// intentionally avoiding "history event" and "decision" names
   161  		// because we *do* have types for them, but they are in thrift and should
   162  		// not be exposed directly.
   163  		// we should consider doing that eventually though, or providing a
   164  		// simplified object for richer failure information.
   165  
   166  		// HistoryEventText contains a String() representation of a history
   167  		// event (i.e. previously recorded) that is related to the problem.
   168  		HistoryEventText string
   169  		// DecisionText contains a String() representation of a replay decision
   170  		// event (i.e. created during replay) that is related to the problem.
   171  		DecisionText string
   172  	}
   173  
   174  	// ContinueAsNewError contains information about how to continue the workflow as new.
   175  	ContinueAsNewError struct {
   176  		wfn    interface{}
   177  		args   []interface{}
   178  		params *executeWorkflowParams
   179  	}
   180  
   181  	// UnknownExternalWorkflowExecutionError can be returned when external workflow doesn't exist
   182  	UnknownExternalWorkflowExecutionError struct{}
   183  
   184  	// ErrorDetailsValues is a type alias used hold error details objects.
   185  	ErrorDetailsValues []interface{}
   186  )
   187  
   188  const (
   189  	errReasonPanic    = "cadenceInternal:Panic"
   190  	errReasonGeneric  = "cadenceInternal:Generic"
   191  	errReasonCanceled = "cadenceInternal:Canceled"
   192  	errReasonTimeout  = "cadenceInternal:Timeout"
   193  )
   194  
   195  // ErrNoData is returned when trying to extract strong typed data while there is no data available.
   196  var ErrNoData = errors.New("no data available")
   197  
   198  // ErrTooManyArg is returned when trying to extract strong typed data with more arguments than available data.
   199  var ErrTooManyArg = errors.New("too many arguments")
   200  
   201  // ErrActivityResultPending is returned from activity's implementation to indicate the activity is not completed when
   202  // activity method returns. Activity needs to be completed by Client.CompleteActivity() separately. For example, if an
   203  // activity require human interaction (like approve an expense report), the activity could return activity.ErrResultPending
   204  // which indicate the activity is not done yet. Then, when the waited human action happened, it needs to trigger something
   205  // that could report the activity completed event to cadence server via Client.CompleteActivity() API.
   206  var ErrActivityResultPending = errors.New("not error: do not autocomplete, using Client.CompleteActivity() to complete")
   207  
   208  // NewCustomError create new instance of *CustomError with reason and optional details.
   209  func NewCustomError(reason string, details ...interface{}) *CustomError {
   210  	if strings.HasPrefix(reason, "cadenceInternal:") {
   211  		panic("'cadenceInternal:' is reserved prefix, please use different reason")
   212  	}
   213  	// When return error to user, use EncodedValues as details and data is ready to be decoded by calling Get
   214  	if len(details) == 1 {
   215  		if d, ok := details[0].(*EncodedValues); ok {
   216  			return &CustomError{reason: reason, details: d}
   217  		}
   218  	}
   219  	// When create error for server, use ErrorDetailsValues as details to hold values and encode later
   220  	return &CustomError{reason: reason, details: ErrorDetailsValues(details)}
   221  }
   222  
   223  // NewTimeoutError creates TimeoutError instance.
   224  // Use NewHeartbeatTimeoutError to create heartbeat TimeoutError
   225  func NewTimeoutError(timeoutType shared.TimeoutType, details ...interface{}) *TimeoutError {
   226  	if len(details) == 1 {
   227  		if d, ok := details[0].(*EncodedValues); ok {
   228  			return &TimeoutError{timeoutType: timeoutType, details: d}
   229  		}
   230  	}
   231  	return &TimeoutError{timeoutType: timeoutType, details: ErrorDetailsValues(details)}
   232  }
   233  
   234  // NewHeartbeatTimeoutError creates TimeoutError instance
   235  func NewHeartbeatTimeoutError(details ...interface{}) *TimeoutError {
   236  	return NewTimeoutError(shared.TimeoutTypeHeartbeat, details...)
   237  }
   238  
   239  // NewCanceledError creates CanceledError instance
   240  func NewCanceledError(details ...interface{}) *CanceledError {
   241  	if len(details) == 1 {
   242  		if d, ok := details[0].(*EncodedValues); ok {
   243  			return &CanceledError{details: d}
   244  		}
   245  	}
   246  	return &CanceledError{details: ErrorDetailsValues(details)}
   247  }
   248  
   249  // IsCanceledError return whether error in CanceledError
   250  func IsCanceledError(err error) bool {
   251  	_, ok := err.(*CanceledError)
   252  	return ok
   253  }
   254  
   255  // NewContinueAsNewError creates ContinueAsNewError instance
   256  // If the workflow main function returns this error then the current execution is ended and
   257  // the new execution with same workflow ID is started automatically with options
   258  // provided to this function.
   259  //
   260  //	 ctx - use context to override any options for the new workflow like execution timeout, decision task timeout, task list.
   261  //		  if not mentioned it would use the defaults that the current workflow is using.
   262  //	       ctx := WithExecutionStartToCloseTimeout(ctx, 30 * time.Minute)
   263  //	       ctx := WithWorkflowTaskStartToCloseTimeout(ctx, time.Minute)
   264  //		  ctx := WithWorkflowTaskList(ctx, "example-group")
   265  //	 wfn - workflow function. for new execution it can be different from the currently running.
   266  //	 args - arguments for the new workflow.
   267  func NewContinueAsNewError(ctx Context, wfn interface{}, args ...interface{}) *ContinueAsNewError {
   268  	// Validate type and its arguments.
   269  	options := getWorkflowEnvOptions(ctx)
   270  	if options == nil {
   271  		panic("context is missing required options for continue as new")
   272  	}
   273  	env := getWorkflowEnvironment(ctx)
   274  	workflowType, input, err := getValidatedWorkflowFunction(wfn, args, options.dataConverter, env.GetRegistry())
   275  	if err != nil {
   276  		panic(err)
   277  	}
   278  	if options.taskListName == nil || *options.taskListName == "" {
   279  		panic("invalid task list provided")
   280  	}
   281  	if options.executionStartToCloseTimeoutSeconds == nil || *options.executionStartToCloseTimeoutSeconds <= 0 {
   282  		panic("invalid executionStartToCloseTimeoutSeconds provided")
   283  	}
   284  	if options.taskStartToCloseTimeoutSeconds == nil || *options.taskStartToCloseTimeoutSeconds <= 0 {
   285  		panic("invalid taskStartToCloseTimeoutSeconds provided")
   286  	}
   287  
   288  	params := &executeWorkflowParams{
   289  		workflowOptions: *options,
   290  		workflowType:    workflowType,
   291  		input:           input,
   292  		header:          getWorkflowHeader(ctx, options.contextPropagators),
   293  	}
   294  	return &ContinueAsNewError{wfn: wfn, args: args, params: params}
   295  }
   296  
   297  // Error from error interface
   298  func (e *CustomError) Error() string {
   299  	return e.reason
   300  }
   301  
   302  // Reason gets the reason of this custom error
   303  func (e *CustomError) Reason() string {
   304  	return e.reason
   305  }
   306  
   307  // HasDetails return if this error has strong typed detail data.
   308  func (e *CustomError) HasDetails() bool {
   309  	return e.details != nil && e.details.HasValues()
   310  }
   311  
   312  // Details extracts strong typed detail data of this custom error. If there is no details, it will return ErrNoData.
   313  func (e *CustomError) Details(d ...interface{}) error {
   314  	if !e.HasDetails() {
   315  		return ErrNoData
   316  	}
   317  	return e.details.Get(d...)
   318  }
   319  
   320  // Error from error interface
   321  func (e *GenericError) Error() string {
   322  	return e.err
   323  }
   324  
   325  // Error from error interface
   326  func (e *TimeoutError) Error() string {
   327  	return fmt.Sprintf("TimeoutType: %v", e.timeoutType)
   328  }
   329  
   330  // TimeoutType return timeout type of this error
   331  func (e *TimeoutError) TimeoutType() shared.TimeoutType {
   332  	return e.timeoutType
   333  }
   334  
   335  // HasDetails return if this error has strong typed detail data.
   336  func (e *TimeoutError) HasDetails() bool {
   337  	return e.details != nil && e.details.HasValues()
   338  }
   339  
   340  // Details extracts strong typed detail data of this error. If there is no details, it will return ErrNoData.
   341  func (e *TimeoutError) Details(d ...interface{}) error {
   342  	if !e.HasDetails() {
   343  		return ErrNoData
   344  	}
   345  	return e.details.Get(d...)
   346  }
   347  
   348  // Error from error interface
   349  func (e *CanceledError) Error() string {
   350  	return "CanceledError"
   351  }
   352  
   353  // HasDetails return if this error has strong typed detail data.
   354  func (e *CanceledError) HasDetails() bool {
   355  	return e.details != nil && e.details.HasValues()
   356  }
   357  
   358  // Details extracts strong typed detail data of this error.
   359  func (e *CanceledError) Details(d ...interface{}) error {
   360  	if !e.HasDetails() {
   361  		return ErrNoData
   362  	}
   363  	return e.details.Get(d...)
   364  }
   365  
   366  func newPanicError(value interface{}, stackTrace string) *PanicError {
   367  	return &PanicError{value: value, stackTrace: stackTrace}
   368  }
   369  
   370  func newWorkflowPanicError(value interface{}, stackTrace string) *workflowPanicError {
   371  	return &workflowPanicError{value: value, stackTrace: stackTrace}
   372  }
   373  
   374  // Error from error interface
   375  func (e *PanicError) Error() string {
   376  	return fmt.Sprintf("%v", e.value)
   377  }
   378  
   379  // StackTrace return stack trace of the panic
   380  func (e *PanicError) StackTrace() string {
   381  	return e.stackTrace
   382  }
   383  
   384  // Error from error interface
   385  func (e *workflowPanicError) Error() string {
   386  	return fmt.Sprintf("%v", e.value)
   387  }
   388  
   389  // StackTrace return stack trace of the panic
   390  func (e *workflowPanicError) StackTrace() string {
   391  	return e.stackTrace
   392  }
   393  
   394  // Error from error interface
   395  func (e *ContinueAsNewError) Error() string {
   396  	return "ContinueAsNew"
   397  }
   398  
   399  // WorkflowIDReusePolicy return workflow id reuse policy in the new run
   400  func (e *ContinueAsNewError) WorkflowIDReusePolicy() WorkflowIDReusePolicy {
   401  	return e.params.workflowIDReusePolicy
   402  }
   403  
   404  // WorkflowType return workflowType of the new run
   405  func (e *ContinueAsNewError) WorkflowType() *WorkflowType {
   406  	return e.params.workflowType
   407  }
   408  
   409  // Args return workflow argument of the new run
   410  func (e *ContinueAsNewError) Args() []interface{} {
   411  	return e.args
   412  }
   413  
   414  // Input return serialized workflow argument
   415  func (e *ContinueAsNewError) Input() []byte {
   416  	return e.params.input
   417  }
   418  
   419  // Header return the header to start a workflow
   420  func (e *ContinueAsNewError) Header() *shared.Header {
   421  	return e.params.header
   422  }
   423  
   424  // newTerminatedError creates NewTerminatedError instance
   425  func newTerminatedError() *TerminatedError {
   426  	return &TerminatedError{}
   427  }
   428  
   429  // Error from error interface
   430  func (e *TerminatedError) Error() string {
   431  	return "Terminated"
   432  }
   433  
   434  // newUnknownExternalWorkflowExecutionError creates UnknownExternalWorkflowExecutionError instance
   435  func newUnknownExternalWorkflowExecutionError() *UnknownExternalWorkflowExecutionError {
   436  	return &UnknownExternalWorkflowExecutionError{}
   437  }
   438  
   439  // Error from error interface
   440  func (e *UnknownExternalWorkflowExecutionError) Error() string {
   441  	return "UnknownExternalWorkflowExecution"
   442  }
   443  
   444  // HasValues return whether there are values.
   445  func (b ErrorDetailsValues) HasValues() bool {
   446  	return b != nil && len(b) != 0
   447  }
   448  
   449  // Get extract data from encoded data to desired value type. valuePtr is pointer to the actual value type.
   450  func (b ErrorDetailsValues) Get(valuePtr ...interface{}) error {
   451  	if !b.HasValues() {
   452  		return ErrNoData
   453  	}
   454  	if len(valuePtr) > len(b) {
   455  		return ErrTooManyArg
   456  	}
   457  	for i, item := range valuePtr {
   458  		target := reflect.ValueOf(item).Elem()
   459  		val := reflect.ValueOf(b[i])
   460  		if !val.Type().AssignableTo(target.Type()) {
   461  			return fmt.Errorf(
   462  				"unable to decode argument: cannot set %v value to %v field", val.Type(), target.Type())
   463  		}
   464  		target.Set(val)
   465  	}
   466  	return nil
   467  }
   468  
   469  // NewNonDeterminsticError constructs a new *NonDeterministicError.
   470  //
   471  //   - reason should be a documented NonDeterminsticError.Reason value
   472  //   - info is always required.  only a portion of it is used, but it is a convenient
   473  //     and currently always-available object.
   474  //   - history and decision may each be present or nil at any time
   475  func NewNonDeterminsticError(reason string, info *WorkflowInfo, history *shared.HistoryEvent, decision *shared.Decision) error {
   476  	var historyText string
   477  	if history != nil {
   478  		historyText = util.HistoryEventToString(history)
   479  	}
   480  	var decisionText string
   481  	if decision != nil {
   482  		decisionText = util.DecisionToString(decision)
   483  	}
   484  	return &NonDeterministicError{
   485  		Reason: reason,
   486  
   487  		WorkflowType: info.WorkflowType.Name,
   488  		WorkflowID:   info.WorkflowExecution.ID,
   489  		RunID:        info.WorkflowExecution.RunID,
   490  		TaskList:     info.TaskListName,
   491  		DomainName:   info.Domain,
   492  
   493  		HistoryEventText: historyText,
   494  		DecisionText:     decisionText,
   495  	}
   496  }
   497  
   498  func (e *NonDeterministicError) Error() string {
   499  	switch e.Reason {
   500  	case "missing replay decision":
   501  		// historical text
   502  		return "nondeterministic workflow: " +
   503  			"missing replay decision for " + e.HistoryEventText
   504  	case "extra replay decision":
   505  		// historical text
   506  		return "nondeterministic workflow: " +
   507  			"extra replay decision for " + e.DecisionText
   508  	case "mismatch":
   509  		// historical text
   510  		return "nondeterministic workflow: " +
   511  			"history event is " + e.HistoryEventText + ", " +
   512  			"replay decision is " + e.DecisionText
   513  	default:
   514  		// should not occur in practice, but it's basically fine if it does.
   515  		// ideally this should crash in internal builds / tests, to prevent mismatched values.
   516  		return fmt.Sprintf(
   517  			"unknown reason %q, history event is: %s, replay decision is: %s",
   518  			e.Reason, e.HistoryEventText, e.DecisionText,
   519  		)
   520  	}
   521  }