go.uber.org/cadence@v1.2.9/internal/error.go (about) 1 // Copyright (c) 2017-2020 Uber Technologies Inc. 2 // Portions of the Software are attributed to Copyright (c) 2020 Temporal Technologies Inc. 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 package internal 23 24 import ( 25 "errors" 26 "fmt" 27 "reflect" 28 "strings" 29 30 "go.uber.org/cadence/.gen/go/shared" 31 "go.uber.org/cadence/internal/common/util" 32 ) 33 34 /* 35 Below are the possible cases that activity could fail: 36 1) *CustomError: (this should be the most common one) 37 If activity implementation returns *CustomError by using NewCustomError() API, workflow code would receive *CustomError. 38 The err would contain a Reason and Details. The reason is what activity specified to NewCustomError(), which workflow 39 code could check to determine what kind of error it was and take actions based on the reason. The details is encoded 40 []byte which workflow code could extract strong typed data. Workflow code needs to know what the types of the encoded 41 details are before extracting them. 42 2) *GenericError: 43 If activity implementation returns errors other than from NewCustomError() API, workflow code would receive *GenericError. 44 Use err.Error() to get the string representation of the actual error. 45 3) *CanceledError: 46 If activity was canceled, workflow code will receive instance of *CanceledError. When activity cancels itself by 47 returning NewCancelError() it would supply optional details which could be extracted by workflow code. 48 4) *TimeoutError: 49 If activity was timed out (several timeout types), workflow code will receive instance of *TimeoutError. The err contains 50 details about what type of timeout it was. 51 5) *PanicError: 52 If activity code panic while executing, cadence activity worker will report it as activity failure to cadence server. 53 The cadence client library will present that failure as *PanicError to workflow code. The err contains a string 54 representation of the panic message and the call stack when panic was happen. 55 56 Workflow code could handle errors based on different types of error. Below is sample code of how error handling looks like. 57 58 _, err := workflow.ExecuteActivity(ctx, MyActivity, ...).Get(nil) 59 if err != nil { 60 switch err := err.(type) { 61 case *workflow.CustomError: 62 // handle activity errors (created via NewCustomError() API) 63 switch err.Reason() { 64 case CustomErrReasonA: // assume CustomErrReasonA is constant defined by activity implementation 65 var detailMsg string // assuming activity return error by NewCustomError(CustomErrReasonA, "string details") 66 err.Details(&detailMsg) // extract strong typed details (corresponding to CustomErrReasonA) 67 // handle CustomErrReasonA 68 case CustomErrReasonB: 69 // handle CustomErrReasonB 70 default: 71 // newer version of activity could return new errors that workflow was not aware of. 72 } 73 case *workflow.GenericError: 74 // handle generic error (errors created other than using NewCustomError() API) 75 case *workflow.CanceledError: 76 // handle cancellation 77 case *workflow.TimeoutError: 78 // handle timeout, could check timeout type by err.TimeoutType() 79 case *workflow.PanicError: 80 // handle panic 81 } 82 } 83 84 Errors from child workflow should be handled in a similar way, except that there should be no *PanicError from child workflow. 85 When panic happen in workflow implementation code, cadence client library catches that panic and causing the decision timeout. 86 That decision task will be retried at a later time (with exponential backoff retry intervals). 87 */ 88 89 type ( 90 // CustomError returned from workflow and activity implementations with reason and optional details. 91 CustomError struct { 92 reason string 93 details Values 94 } 95 96 // GenericError returned from workflow/workflow when the implementations return errors other than from NewCustomError() API. 97 GenericError struct { 98 err string 99 } 100 101 // TimeoutError returned when activity or child workflow timed out. 102 TimeoutError struct { 103 timeoutType shared.TimeoutType 104 details Values 105 } 106 107 // CanceledError returned when operation was canceled. 108 CanceledError struct { 109 details Values 110 } 111 112 // TerminatedError returned when workflow was terminated. 113 TerminatedError struct { 114 } 115 116 // PanicError contains information about panicked workflow/activity. 117 PanicError struct { 118 value interface{} 119 stackTrace string 120 } 121 122 // workflowPanicError contains information about panicked workflow. 123 // Used to distinguish go panic in the workflow code from a PanicError returned from a workflow function. 124 workflowPanicError struct { 125 value interface{} 126 stackTrace string 127 } 128 129 // NonDeterministicError contains some structured data related to a non-deterministic 130 // replay failure, and is primarily intended for allowing richer error reporting. 131 // 132 // WorkflowType, WorkflowID, RunID, TaskList, and DomainName will likely be long-term stable 133 // and included in some form in future library versions, but the rest of these fields may 134 // change at any time, or be removed in a future major version change. 135 NonDeterministicError struct { 136 137 // Reason is a relatively free-form description of what kind of non-determinism 138 // was detected. 139 // 140 // You are *strongly* encouraged to not rely on these strings for behavior, only 141 // explanation, for a few reasons. More will likely appear in the future, they may 142 // change, and there is little that can be safely decided on in an automated way. 143 // 144 // Currently, values roughly match the historical error strings, and are: 145 // - "missing replay decision" (The error will contain HistoryEventText, as there 146 // is at least one history event that has no matching replayed decision) 147 // - "extra replay decision" (The error will contain DecisionText, as there is 148 // at least one decision from replay that has no matching history event) 149 // - "mismatch" (Both HistoryEventText and DecisionText will exist, as there 150 // are issues with both. This was previously shown as "history event is ..., 151 // replay decision is ..." error text.) 152 Reason string 153 154 WorkflowType string 155 WorkflowID string 156 RunID string 157 TaskList string 158 DomainName string 159 160 // intentionally avoiding "history event" and "decision" names 161 // because we *do* have types for them, but they are in thrift and should 162 // not be exposed directly. 163 // we should consider doing that eventually though, or providing a 164 // simplified object for richer failure information. 165 166 // HistoryEventText contains a String() representation of a history 167 // event (i.e. previously recorded) that is related to the problem. 168 HistoryEventText string 169 // DecisionText contains a String() representation of a replay decision 170 // event (i.e. created during replay) that is related to the problem. 171 DecisionText string 172 } 173 174 // ContinueAsNewError contains information about how to continue the workflow as new. 175 ContinueAsNewError struct { 176 wfn interface{} 177 args []interface{} 178 params *executeWorkflowParams 179 } 180 181 // UnknownExternalWorkflowExecutionError can be returned when external workflow doesn't exist 182 UnknownExternalWorkflowExecutionError struct{} 183 184 // ErrorDetailsValues is a type alias used hold error details objects. 185 ErrorDetailsValues []interface{} 186 ) 187 188 const ( 189 errReasonPanic = "cadenceInternal:Panic" 190 errReasonGeneric = "cadenceInternal:Generic" 191 errReasonCanceled = "cadenceInternal:Canceled" 192 errReasonTimeout = "cadenceInternal:Timeout" 193 ) 194 195 // ErrNoData is returned when trying to extract strong typed data while there is no data available. 196 var ErrNoData = errors.New("no data available") 197 198 // ErrTooManyArg is returned when trying to extract strong typed data with more arguments than available data. 199 var ErrTooManyArg = errors.New("too many arguments") 200 201 // ErrActivityResultPending is returned from activity's implementation to indicate the activity is not completed when 202 // activity method returns. Activity needs to be completed by Client.CompleteActivity() separately. For example, if an 203 // activity require human interaction (like approve an expense report), the activity could return activity.ErrResultPending 204 // which indicate the activity is not done yet. Then, when the waited human action happened, it needs to trigger something 205 // that could report the activity completed event to cadence server via Client.CompleteActivity() API. 206 var ErrActivityResultPending = errors.New("not error: do not autocomplete, using Client.CompleteActivity() to complete") 207 208 // NewCustomError create new instance of *CustomError with reason and optional details. 209 func NewCustomError(reason string, details ...interface{}) *CustomError { 210 if strings.HasPrefix(reason, "cadenceInternal:") { 211 panic("'cadenceInternal:' is reserved prefix, please use different reason") 212 } 213 // When return error to user, use EncodedValues as details and data is ready to be decoded by calling Get 214 if len(details) == 1 { 215 if d, ok := details[0].(*EncodedValues); ok { 216 return &CustomError{reason: reason, details: d} 217 } 218 } 219 // When create error for server, use ErrorDetailsValues as details to hold values and encode later 220 return &CustomError{reason: reason, details: ErrorDetailsValues(details)} 221 } 222 223 // NewTimeoutError creates TimeoutError instance. 224 // Use NewHeartbeatTimeoutError to create heartbeat TimeoutError 225 func NewTimeoutError(timeoutType shared.TimeoutType, details ...interface{}) *TimeoutError { 226 if len(details) == 1 { 227 if d, ok := details[0].(*EncodedValues); ok { 228 return &TimeoutError{timeoutType: timeoutType, details: d} 229 } 230 } 231 return &TimeoutError{timeoutType: timeoutType, details: ErrorDetailsValues(details)} 232 } 233 234 // NewHeartbeatTimeoutError creates TimeoutError instance 235 func NewHeartbeatTimeoutError(details ...interface{}) *TimeoutError { 236 return NewTimeoutError(shared.TimeoutTypeHeartbeat, details...) 237 } 238 239 // NewCanceledError creates CanceledError instance 240 func NewCanceledError(details ...interface{}) *CanceledError { 241 if len(details) == 1 { 242 if d, ok := details[0].(*EncodedValues); ok { 243 return &CanceledError{details: d} 244 } 245 } 246 return &CanceledError{details: ErrorDetailsValues(details)} 247 } 248 249 // IsCanceledError return whether error in CanceledError 250 func IsCanceledError(err error) bool { 251 _, ok := err.(*CanceledError) 252 return ok 253 } 254 255 // NewContinueAsNewError creates ContinueAsNewError instance 256 // If the workflow main function returns this error then the current execution is ended and 257 // the new execution with same workflow ID is started automatically with options 258 // provided to this function. 259 // 260 // ctx - use context to override any options for the new workflow like execution timeout, decision task timeout, task list. 261 // if not mentioned it would use the defaults that the current workflow is using. 262 // ctx := WithExecutionStartToCloseTimeout(ctx, 30 * time.Minute) 263 // ctx := WithWorkflowTaskStartToCloseTimeout(ctx, time.Minute) 264 // ctx := WithWorkflowTaskList(ctx, "example-group") 265 // wfn - workflow function. for new execution it can be different from the currently running. 266 // args - arguments for the new workflow. 267 func NewContinueAsNewError(ctx Context, wfn interface{}, args ...interface{}) *ContinueAsNewError { 268 // Validate type and its arguments. 269 options := getWorkflowEnvOptions(ctx) 270 if options == nil { 271 panic("context is missing required options for continue as new") 272 } 273 env := getWorkflowEnvironment(ctx) 274 workflowType, input, err := getValidatedWorkflowFunction(wfn, args, options.dataConverter, env.GetRegistry()) 275 if err != nil { 276 panic(err) 277 } 278 if options.taskListName == nil || *options.taskListName == "" { 279 panic("invalid task list provided") 280 } 281 if options.executionStartToCloseTimeoutSeconds == nil || *options.executionStartToCloseTimeoutSeconds <= 0 { 282 panic("invalid executionStartToCloseTimeoutSeconds provided") 283 } 284 if options.taskStartToCloseTimeoutSeconds == nil || *options.taskStartToCloseTimeoutSeconds <= 0 { 285 panic("invalid taskStartToCloseTimeoutSeconds provided") 286 } 287 288 params := &executeWorkflowParams{ 289 workflowOptions: *options, 290 workflowType: workflowType, 291 input: input, 292 header: getWorkflowHeader(ctx, options.contextPropagators), 293 } 294 return &ContinueAsNewError{wfn: wfn, args: args, params: params} 295 } 296 297 // Error from error interface 298 func (e *CustomError) Error() string { 299 return e.reason 300 } 301 302 // Reason gets the reason of this custom error 303 func (e *CustomError) Reason() string { 304 return e.reason 305 } 306 307 // HasDetails return if this error has strong typed detail data. 308 func (e *CustomError) HasDetails() bool { 309 return e.details != nil && e.details.HasValues() 310 } 311 312 // Details extracts strong typed detail data of this custom error. If there is no details, it will return ErrNoData. 313 func (e *CustomError) Details(d ...interface{}) error { 314 if !e.HasDetails() { 315 return ErrNoData 316 } 317 return e.details.Get(d...) 318 } 319 320 // Error from error interface 321 func (e *GenericError) Error() string { 322 return e.err 323 } 324 325 // Error from error interface 326 func (e *TimeoutError) Error() string { 327 return fmt.Sprintf("TimeoutType: %v", e.timeoutType) 328 } 329 330 // TimeoutType return timeout type of this error 331 func (e *TimeoutError) TimeoutType() shared.TimeoutType { 332 return e.timeoutType 333 } 334 335 // HasDetails return if this error has strong typed detail data. 336 func (e *TimeoutError) HasDetails() bool { 337 return e.details != nil && e.details.HasValues() 338 } 339 340 // Details extracts strong typed detail data of this error. If there is no details, it will return ErrNoData. 341 func (e *TimeoutError) Details(d ...interface{}) error { 342 if !e.HasDetails() { 343 return ErrNoData 344 } 345 return e.details.Get(d...) 346 } 347 348 // Error from error interface 349 func (e *CanceledError) Error() string { 350 return "CanceledError" 351 } 352 353 // HasDetails return if this error has strong typed detail data. 354 func (e *CanceledError) HasDetails() bool { 355 return e.details != nil && e.details.HasValues() 356 } 357 358 // Details extracts strong typed detail data of this error. 359 func (e *CanceledError) Details(d ...interface{}) error { 360 if !e.HasDetails() { 361 return ErrNoData 362 } 363 return e.details.Get(d...) 364 } 365 366 func newPanicError(value interface{}, stackTrace string) *PanicError { 367 return &PanicError{value: value, stackTrace: stackTrace} 368 } 369 370 func newWorkflowPanicError(value interface{}, stackTrace string) *workflowPanicError { 371 return &workflowPanicError{value: value, stackTrace: stackTrace} 372 } 373 374 // Error from error interface 375 func (e *PanicError) Error() string { 376 return fmt.Sprintf("%v", e.value) 377 } 378 379 // StackTrace return stack trace of the panic 380 func (e *PanicError) StackTrace() string { 381 return e.stackTrace 382 } 383 384 // Error from error interface 385 func (e *workflowPanicError) Error() string { 386 return fmt.Sprintf("%v", e.value) 387 } 388 389 // StackTrace return stack trace of the panic 390 func (e *workflowPanicError) StackTrace() string { 391 return e.stackTrace 392 } 393 394 // Error from error interface 395 func (e *ContinueAsNewError) Error() string { 396 return "ContinueAsNew" 397 } 398 399 // WorkflowIDReusePolicy return workflow id reuse policy in the new run 400 func (e *ContinueAsNewError) WorkflowIDReusePolicy() WorkflowIDReusePolicy { 401 return e.params.workflowIDReusePolicy 402 } 403 404 // WorkflowType return workflowType of the new run 405 func (e *ContinueAsNewError) WorkflowType() *WorkflowType { 406 return e.params.workflowType 407 } 408 409 // Args return workflow argument of the new run 410 func (e *ContinueAsNewError) Args() []interface{} { 411 return e.args 412 } 413 414 // Input return serialized workflow argument 415 func (e *ContinueAsNewError) Input() []byte { 416 return e.params.input 417 } 418 419 // Header return the header to start a workflow 420 func (e *ContinueAsNewError) Header() *shared.Header { 421 return e.params.header 422 } 423 424 // newTerminatedError creates NewTerminatedError instance 425 func newTerminatedError() *TerminatedError { 426 return &TerminatedError{} 427 } 428 429 // Error from error interface 430 func (e *TerminatedError) Error() string { 431 return "Terminated" 432 } 433 434 // newUnknownExternalWorkflowExecutionError creates UnknownExternalWorkflowExecutionError instance 435 func newUnknownExternalWorkflowExecutionError() *UnknownExternalWorkflowExecutionError { 436 return &UnknownExternalWorkflowExecutionError{} 437 } 438 439 // Error from error interface 440 func (e *UnknownExternalWorkflowExecutionError) Error() string { 441 return "UnknownExternalWorkflowExecution" 442 } 443 444 // HasValues return whether there are values. 445 func (b ErrorDetailsValues) HasValues() bool { 446 return b != nil && len(b) != 0 447 } 448 449 // Get extract data from encoded data to desired value type. valuePtr is pointer to the actual value type. 450 func (b ErrorDetailsValues) Get(valuePtr ...interface{}) error { 451 if !b.HasValues() { 452 return ErrNoData 453 } 454 if len(valuePtr) > len(b) { 455 return ErrTooManyArg 456 } 457 for i, item := range valuePtr { 458 target := reflect.ValueOf(item).Elem() 459 val := reflect.ValueOf(b[i]) 460 if !val.Type().AssignableTo(target.Type()) { 461 return fmt.Errorf( 462 "unable to decode argument: cannot set %v value to %v field", val.Type(), target.Type()) 463 } 464 target.Set(val) 465 } 466 return nil 467 } 468 469 // NewNonDeterminsticError constructs a new *NonDeterministicError. 470 // 471 // - reason should be a documented NonDeterminsticError.Reason value 472 // - info is always required. only a portion of it is used, but it is a convenient 473 // and currently always-available object. 474 // - history and decision may each be present or nil at any time 475 func NewNonDeterminsticError(reason string, info *WorkflowInfo, history *shared.HistoryEvent, decision *shared.Decision) error { 476 var historyText string 477 if history != nil { 478 historyText = util.HistoryEventToString(history) 479 } 480 var decisionText string 481 if decision != nil { 482 decisionText = util.DecisionToString(decision) 483 } 484 return &NonDeterministicError{ 485 Reason: reason, 486 487 WorkflowType: info.WorkflowType.Name, 488 WorkflowID: info.WorkflowExecution.ID, 489 RunID: info.WorkflowExecution.RunID, 490 TaskList: info.TaskListName, 491 DomainName: info.Domain, 492 493 HistoryEventText: historyText, 494 DecisionText: decisionText, 495 } 496 } 497 498 func (e *NonDeterministicError) Error() string { 499 switch e.Reason { 500 case "missing replay decision": 501 // historical text 502 return "nondeterministic workflow: " + 503 "missing replay decision for " + e.HistoryEventText 504 case "extra replay decision": 505 // historical text 506 return "nondeterministic workflow: " + 507 "extra replay decision for " + e.DecisionText 508 case "mismatch": 509 // historical text 510 return "nondeterministic workflow: " + 511 "history event is " + e.HistoryEventText + ", " + 512 "replay decision is " + e.DecisionText 513 default: 514 // should not occur in practice, but it's basically fine if it does. 515 // ideally this should crash in internal builds / tests, to prevent mismatched values. 516 return fmt.Sprintf( 517 "unknown reason %q, history event is: %s, replay decision is: %s", 518 e.Reason, e.HistoryEventText, e.DecisionText, 519 ) 520 } 521 }