go.uber.org/cadence@v1.2.9/internal/workflow_replayer.go (about) 1 // Copyright (c) 2017-2020 Uber Technologies Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package internal 22 23 import ( 24 "bytes" 25 "context" 26 "encoding/json" 27 "errors" 28 "fmt" 29 "io" 30 "math" 31 "os" 32 33 "github.com/golang/mock/gomock" 34 "github.com/opentracing/opentracing-go" 35 "github.com/pborman/uuid" 36 "github.com/uber-go/tally" 37 "go.uber.org/zap" 38 39 "go.uber.org/cadence/.gen/go/cadence/workflowserviceclient" 40 "go.uber.org/cadence/.gen/go/cadence/workflowservicetest" 41 "go.uber.org/cadence/.gen/go/shared" 42 "go.uber.org/cadence/internal/common" 43 "go.uber.org/cadence/internal/common/backoff" 44 "go.uber.org/cadence/internal/common/serializer" 45 ) 46 47 const ( 48 replayDomainName = "ReplayDomain" 49 replayTaskListName = "ReplayTaskList" 50 replayWorkflowID = "ReplayId" 51 replayWorkerIdentity = "replayID" 52 replayPreviousStartedEventID = math.MaxInt64 53 replayTaskToken = "ReplayTaskToken" 54 ) 55 56 var ( 57 errReplayEmptyHistory = errors.New("empty events") 58 errReplayHistoryTooShort = errors.New("at least 3 events expected in the history") 59 errReplayInvalidFirstEvent = errors.New("first event is not WorkflowExecutionStarted") 60 errReplayCorruptedStartedEvent = errors.New("corrupted WorkflowExecutionStarted") 61 ) 62 63 // WorkflowReplayer is used to replay workflow code from an event history 64 type WorkflowReplayer struct { 65 registry *registry 66 options ReplayOptions 67 } 68 69 // ReplayOptions is used to configure the replay decision task worker. 70 type ReplayOptions struct { 71 // Optional: Sets DataConverter to customize serialization/deserialization of arguments in Cadence 72 // default: defaultDataConverter, an combination of thriftEncoder and jsonEncoder 73 DataConverter DataConverter 74 75 // Optional: Specifies factories used to instantiate workflow interceptor chain 76 // The chain is instantiated per each replay of a workflow execution 77 WorkflowInterceptorChainFactories []WorkflowInterceptorFactory 78 79 // Optional: Sets ContextPropagators that allows users to control the context information passed through a workflow 80 // default: no ContextPropagators 81 ContextPropagators []ContextPropagator 82 83 // Optional: Sets opentracing Tracer that is to be used to emit tracing information 84 // default: no tracer - opentracing.NoopTracer 85 Tracer opentracing.Tracer 86 87 // Optional: flags to turn on/off some features on server side 88 // default: all features under the struct is turned off 89 FeatureFlags FeatureFlags 90 } 91 92 // IsReplayDomain checks if the domainName is from replay 93 func IsReplayDomain(dn string) bool { 94 return replayDomainName == dn 95 } 96 97 // NewWorkflowReplayer creates an instance of the WorkflowReplayer 98 func NewWorkflowReplayer() *WorkflowReplayer { 99 return NewWorkflowReplayerWithOptions(ReplayOptions{}) 100 } 101 102 // NewWorkflowReplayerWithOptions creates an instance of the WorkflowReplayer 103 // with provided replay worker options 104 func NewWorkflowReplayerWithOptions( 105 options ReplayOptions, 106 ) *WorkflowReplayer { 107 augmentReplayOptions(&options) 108 return &WorkflowReplayer{ 109 registry: newRegistry(), 110 options: options, 111 } 112 } 113 114 // RegisterWorkflow registers workflow function to replay 115 func (r *WorkflowReplayer) RegisterWorkflow(w interface{}) { 116 r.registry.RegisterWorkflow(w) 117 } 118 119 // RegisterWorkflowWithOptions registers workflow function with custom workflow name to replay 120 func (r *WorkflowReplayer) RegisterWorkflowWithOptions(w interface{}, options RegisterWorkflowOptions) { 121 r.registry.RegisterWorkflowWithOptions(w, options) 122 } 123 124 // RegisterActivity registers an activity function for this replayer 125 func (r *WorkflowReplayer) RegisterActivity(a interface{}) { 126 r.registry.RegisterActivity(a) 127 } 128 129 // RegisterActivityWithOptions registers an activity function for this replayer with custom options, e.g. an explicit name. 130 func (r *WorkflowReplayer) RegisterActivityWithOptions(a interface{}, options RegisterActivityOptions) { 131 r.registry.RegisterActivityWithOptions(a, options) 132 } 133 134 // ReplayWorkflowHistory executes a single decision task for the given history. 135 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger. 136 // The logger is an optional parameter. Defaults to the noop logger. 137 func (r *WorkflowReplayer) ReplayWorkflowHistory(logger *zap.Logger, history *shared.History) error { 138 if logger == nil { 139 logger = zap.NewNop() 140 } 141 142 testReporter := logger.Sugar() 143 controller := gomock.NewController(testReporter) 144 service := workflowservicetest.NewMockClient(controller) 145 146 return r.replayWorkflowHistory(logger, service, replayDomainName, nil, history, nil) 147 } 148 149 func (r *WorkflowReplayer) ReplayWorkflowHistoryFromJSON(logger *zap.Logger, reader io.Reader) error { 150 return r.ReplayPartialWorkflowHistoryFromJSON(logger, reader, 0) 151 } 152 153 func (r *WorkflowReplayer) ReplayPartialWorkflowHistoryFromJSON(logger *zap.Logger, reader io.Reader, lastEventID int64) error { 154 history, err := extractHistoryFromReader(reader, lastEventID) 155 156 if err != nil { 157 return err 158 } 159 160 if logger == nil { 161 logger = zap.NewNop() 162 } 163 164 testReporter := logger.Sugar() 165 controller := gomock.NewController(testReporter) 166 service := workflowservicetest.NewMockClient(controller) 167 168 return r.replayWorkflowHistory(logger, service, replayDomainName, nil, history, nil) 169 } 170 171 // ReplayWorkflowHistoryFromJSONFile executes a single decision task for the given json history file. 172 // Use for testing the backwards compatibility of code changes and troubleshooting workflows in a debugger. 173 // The logger is an optional parameter. Defaults to the noop logger. 174 func (r *WorkflowReplayer) ReplayWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string) error { 175 return r.ReplayPartialWorkflowHistoryFromJSONFile(logger, jsonfileName, 0) 176 } 177 178 // ReplayPartialWorkflowHistoryFromJSONFile executes a single decision task for the given json history file up to provided 179 // lastEventID(inclusive). 180 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger. 181 // The logger is an optional parameter. Defaults to the noop logger. 182 func (r *WorkflowReplayer) ReplayPartialWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string, lastEventID int64) error { 183 file, err := os.Open(jsonfileName) 184 if err != nil { 185 return fmt.Errorf("could not open file: %w", err) 186 } 187 defer func() { 188 _ = file.Close() 189 }() 190 return r.ReplayPartialWorkflowHistoryFromJSON(logger, file, lastEventID) 191 } 192 193 // ReplayWorkflowExecution replays workflow execution loading it from Cadence service. 194 // The logger is an optional parameter. Defaults to the noop logger. 195 func (r *WorkflowReplayer) ReplayWorkflowExecution( 196 ctx context.Context, 197 service workflowserviceclient.Interface, 198 logger *zap.Logger, 199 domain string, 200 execution WorkflowExecution, 201 ) error { 202 sharedExecution := &shared.WorkflowExecution{ 203 RunId: common.StringPtr(execution.RunID), 204 WorkflowId: common.StringPtr(execution.ID), 205 } 206 request := &shared.GetWorkflowExecutionHistoryRequest{ 207 Domain: common.StringPtr(domain), 208 Execution: sharedExecution, 209 } 210 211 var hResponse *shared.GetWorkflowExecutionHistoryResponse 212 if err := backoff.Retry(ctx, 213 func() error { 214 tchCtx, cancel, opt := newChannelContext(ctx, r.options.FeatureFlags) 215 216 var err error 217 hResponse, err = service.GetWorkflowExecutionHistory(tchCtx, request, opt...) 218 cancel() 219 220 return err 221 }, 222 createDynamicServiceRetryPolicy(ctx), 223 func(err error) bool { 224 if _, ok := err.(*shared.InternalServiceError); ok { 225 // treat InternalServiceError as non-retryable, as the workflow history may be corrupted 226 return false 227 } 228 return isServiceTransientError(err) 229 }, 230 ); err != nil { 231 return err 232 } 233 234 if hResponse.RawHistory != nil { 235 history, err := serializer.DeserializeBlobDataToHistoryEvents(hResponse.RawHistory, shared.HistoryEventFilterTypeAllEvent) 236 if err != nil { 237 return err 238 } 239 240 hResponse.History = history 241 } 242 243 return r.replayWorkflowHistory(logger, service, domain, &execution, hResponse.History, hResponse.NextPageToken) 244 } 245 246 func (r *WorkflowReplayer) replayWorkflowHistory( 247 logger *zap.Logger, 248 service workflowserviceclient.Interface, 249 domain string, 250 execution *WorkflowExecution, 251 history *shared.History, 252 nextPageToken []byte, 253 ) error { 254 events := history.Events 255 if events == nil { 256 return errReplayEmptyHistory 257 } 258 if len(events) < 3 { 259 return errReplayHistoryTooShort 260 } 261 first := events[0] 262 if first.GetEventType() != shared.EventTypeWorkflowExecutionStarted { 263 return errReplayInvalidFirstEvent 264 } 265 last := events[len(events)-1] 266 267 attr := first.WorkflowExecutionStartedEventAttributes 268 if attr == nil { 269 return errReplayCorruptedStartedEvent 270 } 271 workflowType := attr.WorkflowType 272 if execution == nil { 273 execution = &WorkflowExecution{ 274 ID: replayWorkflowID, 275 RunID: uuid.NewRandom().String(), 276 } 277 if first.WorkflowExecutionStartedEventAttributes.GetOriginalExecutionRunId() != "" { 278 execution.RunID = first.WorkflowExecutionStartedEventAttributes.GetOriginalExecutionRunId() 279 } 280 } 281 282 task := &shared.PollForDecisionTaskResponse{ 283 Attempt: common.Int64Ptr(int64(attr.GetAttempt())), 284 TaskToken: []byte(replayTaskToken), 285 WorkflowType: workflowType, 286 WorkflowExecution: &shared.WorkflowExecution{ 287 WorkflowId: common.StringPtr(execution.ID), 288 RunId: common.StringPtr(execution.RunID), 289 }, 290 History: history, 291 PreviousStartedEventId: common.Int64Ptr(replayPreviousStartedEventID), 292 NextPageToken: nextPageToken, 293 } 294 if logger == nil { 295 logger = zap.NewNop() 296 } 297 workerParams := workerExecutionParameters{ 298 WorkerOptions: WorkerOptions{ 299 Identity: replayWorkerIdentity, 300 DataConverter: r.options.DataConverter, 301 ContextPropagators: r.options.ContextPropagators, 302 WorkflowInterceptorChainFactories: r.options.WorkflowInterceptorChainFactories, 303 Tracer: r.options.Tracer, 304 Logger: logger, 305 DisableStickyExecution: true, 306 }, 307 TaskList: replayTaskListName, 308 } 309 310 metricScope := tally.NoopScope 311 iterator := &historyIteratorImpl{ 312 nextPageToken: task.NextPageToken, 313 execution: task.WorkflowExecution, 314 domain: domain, 315 service: service, 316 metricsScope: metricScope, 317 startedEventID: task.GetStartedEventId(), 318 featureFlags: r.options.FeatureFlags, 319 } 320 taskHandler := newWorkflowTaskHandler(domain, workerParams, nil, r.registry) 321 resp, err := taskHandler.ProcessWorkflowTask(&workflowTask{task: task, historyIterator: iterator}, nil) 322 if err != nil { 323 return err 324 } 325 326 if last.GetEventType() != shared.EventTypeWorkflowExecutionCompleted && last.GetEventType() != shared.EventTypeWorkflowExecutionContinuedAsNew { 327 return nil 328 } 329 330 // TODO: the following result will not be executed if nextPageToken is not nil, which is probably fine as the actual workflow task 331 // processing logic does not have such check. If we want to always execute this check for closed workflows, we need to dump the 332 // entire history before starting the replay as otherwise we can't get the last event here. 333 // compare workflow results 334 if resp != nil { 335 completeReq, ok := resp.(*shared.RespondDecisionTaskCompletedRequest) 336 if ok { 337 for _, d := range completeReq.Decisions { 338 if d.GetDecisionType() == shared.DecisionTypeContinueAsNewWorkflowExecution && 339 last.GetEventType() == shared.EventTypeWorkflowExecutionContinuedAsNew { 340 inputA := d.ContinueAsNewWorkflowExecutionDecisionAttributes.Input 341 inputB := last.WorkflowExecutionContinuedAsNewEventAttributes.Input 342 if bytes.Compare(inputA, inputB) == 0 { 343 return nil 344 } 345 } 346 if d.GetDecisionType() == shared.DecisionTypeCompleteWorkflowExecution && 347 last.GetEventType() == shared.EventTypeWorkflowExecutionCompleted { 348 resultA := last.WorkflowExecutionCompletedEventAttributes.Result 349 resultB := d.CompleteWorkflowExecutionDecisionAttributes.Result 350 if bytes.Compare(resultA, resultB) == 0 { 351 return nil 352 } 353 } 354 if d.GetDecisionType() == shared.DecisionTypeCompleteWorkflowExecution && 355 last.GetEventType() == shared.EventTypeWorkflowExecutionContinuedAsNew { 356 // for cron and retry workflow, decision will be completed workflow and 357 // and server side will convert it to a continue as new event. 358 // there's nothing to compare here 359 return nil 360 } 361 } 362 } 363 } 364 return fmt.Errorf("replay workflow doesn't return the same result as the last event, resp: %v, last: %v", resp, last) 365 } 366 367 func extractHistoryFromReader(r io.Reader, lastEventID int64) (*shared.History, error) { 368 raw, err := io.ReadAll(r) 369 if err != nil { 370 return nil, fmt.Errorf("failed to read data: %w", err) 371 } 372 373 var deserializedEvents []*shared.HistoryEvent 374 err = json.Unmarshal(raw, &deserializedEvents) 375 376 if err != nil { 377 return nil, fmt.Errorf("invalid json contents: %w", err) 378 } 379 380 if lastEventID <= 0 { 381 return &shared.History{Events: deserializedEvents}, nil 382 } 383 384 // Caller is potentially asking for subset of history instead of all history events 385 var events []*shared.HistoryEvent 386 for _, event := range deserializedEvents { 387 events = append(events, event) 388 if event.GetEventId() == lastEventID { 389 // Copy history up to last event (inclusive) 390 break 391 } 392 } 393 394 return &shared.History{Events: events}, nil 395 } 396 397 func augmentReplayOptions( 398 options *ReplayOptions, 399 ) { 400 // if the user passes in a tracer then add a tracing context propagator 401 if options.Tracer != nil { 402 options.ContextPropagators = append(options.ContextPropagators, NewTracingContextPropagator(zap.NewNop(), options.Tracer)) 403 } else { 404 options.Tracer = opentracing.NoopTracer{} 405 } 406 }