go.uber.org/cadence@v1.2.9/internal/worker.go (about) 1 // Copyright (c) 2017-2020 Uber Technologies Inc. 2 // Portions of the Software are attributed to Copyright (c) 2020 Temporal Technologies Inc. 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 package internal 23 24 import ( 25 "context" 26 "time" 27 28 "github.com/opentracing/opentracing-go" 29 "github.com/uber-go/tally" 30 "go.uber.org/zap" 31 32 "go.uber.org/cadence/.gen/go/cadence/workflowserviceclient" 33 "go.uber.org/cadence/.gen/go/shared" 34 "go.uber.org/cadence/internal/common/auth" 35 ) 36 37 type ( 38 // WorkerOptions is used to configure a worker instance. 39 // The current timeout resolution implementation is in seconds and uses math.Ceil(d.Seconds()) as the duration. But is 40 // subjected to change in the future. 41 WorkerOptions struct { 42 // Optional: To set the maximum concurrent activity executions this worker can have. 43 // The zero value of this uses the default value. 44 // default: defaultMaxConcurrentActivityExecutionSize(1k) 45 MaxConcurrentActivityExecutionSize int 46 47 // Optional: Sets the rate limiting on number of activities that can be executed per second per 48 // worker. This can be used to limit resources used by the worker. 49 // Notice that the number is represented in float, so that you can set it to less than 50 // 1 if needed. For example, set the number to 0.1 means you want your activity to be executed 51 // once for every 10 seconds. This can be used to protect down stream services from flooding. 52 // The zero value of this uses the default value. Default: 100k 53 WorkerActivitiesPerSecond float64 54 55 // Optional: To set the maximum concurrent local activity executions this worker can have. 56 // The zero value of this uses the default value. 57 // default: 1k 58 MaxConcurrentLocalActivityExecutionSize int 59 60 // Optional: Sets the rate limiting on number of local activities that can be executed per second per 61 // worker. This can be used to limit resources used by the worker. 62 // Notice that the number is represented in float, so that you can set it to less than 63 // 1 if needed. For example, set the number to 0.1 means you want your local activity to be executed 64 // once for every 10 seconds. This can be used to protect down stream services from flooding. 65 // The zero value of this uses the default value. Default: 100k 66 WorkerLocalActivitiesPerSecond float64 67 68 // Optional: Sets the rate limiting on number of activities that can be executed per second. 69 // This is managed by the server and controls activities per second for your entire tasklist 70 // whereas WorkerActivityTasksPerSecond controls activities only per worker. 71 // Notice that the number is represented in float, so that you can set it to less than 72 // 1 if needed. For example, set the number to 0.1 means you want your activity to be executed 73 // once for every 10 seconds. This can be used to protect down stream services from flooding. 74 // The zero value of this uses the default value. Default: 100k 75 TaskListActivitiesPerSecond float64 76 77 // optional: Sets the maximum number of goroutines that will concurrently poll the 78 // cadence-server to retrieve activity tasks. Changing this value will affect the 79 // rate at which the worker is able to consume tasks from a task list. 80 // Default value is 2 81 MaxConcurrentActivityTaskPollers int 82 83 // optional: Sets the minimum number of goroutines that will concurrently poll the 84 // cadence-server to retrieve activity tasks. Changing this value will NOT affect the 85 // rate at which the worker is able to consume tasks from a task list, 86 // unless FeatureFlags.PollerAutoScalerEnabled is set to true. 87 // Default value is 1 88 MinConcurrentActivityTaskPollers int 89 90 // Optional: To set the maximum concurrent decision task executions this worker can have. 91 // The zero value of this uses the default value. 92 // default: defaultMaxConcurrentTaskExecutionSize(1k) 93 MaxConcurrentDecisionTaskExecutionSize int 94 95 // Optional: Sets the rate limiting on number of decision tasks that can be executed per second per 96 // worker. This can be used to limit resources used by the worker. 97 // The zero value of this uses the default value. Default: 100k 98 WorkerDecisionTasksPerSecond float64 99 100 // optional: Sets the maximum number of goroutines that will concurrently poll the 101 // cadence-server to retrieve decision tasks. Changing this value will affect the 102 // rate at which the worker is able to consume tasks from a task list. 103 // Default value is 2 104 MaxConcurrentDecisionTaskPollers int 105 106 // optional: Sets the minimum number of goroutines that will concurrently poll the 107 // cadence-server to retrieve decision tasks. If FeatureFlags.PollerAutoScalerEnabled is set to true, 108 // changing this value will NOT affect the rate at which the worker is able to consume tasks from a task list. 109 // Default value is 1 110 MinConcurrentDecisionTaskPollers int 111 112 // optional: Sets the interval of poller autoscaling, between which poller autoscaler changes the poller count 113 // based on poll result. It takes effect if FeatureFlags.PollerAutoScalerEnabled is set to true. 114 // Default value is 1 min 115 PollerAutoScalerCooldown time.Duration 116 117 // optional: Sets the target utilization rate between [0,1]. 118 // Utilization Rate = pollResultWithTask / (pollResultWithTask + pollResultWithNoTask) 119 // It takes effect if FeatureFlags.PollerAutoScalerEnabled is set to true. 120 // Default value is 0.6 121 PollerAutoScalerTargetUtilization float64 122 123 // optional: Sets whether to start dry run mode of autoscaler. 124 // Default value is false 125 PollerAutoScalerDryRun bool 126 127 // Optional: Sets an identify that can be used to track this host for debugging. 128 // default: default identity that include hostname, groupName and process ID. 129 Identity string 130 131 // Optional: Defines the 'zone' or the failure group that the worker belongs to 132 IsolationGroup string 133 134 // Optional: Metrics to be reported. Metrics emitted by the cadence client are not prometheus compatible by 135 // default. To ensure metrics are compatible with prometheus make sure to create tally scope with sanitizer 136 // options set. 137 // var ( 138 // _safeCharacters = []rune{'_'} 139 // _sanitizeOptions = tally.SanitizeOptions{ 140 // NameCharacters: tally.ValidCharacters{ 141 // Ranges: tally.AlphanumericRange, 142 // Characters: _safeCharacters, 143 // }, 144 // KeyCharacters: tally.ValidCharacters{ 145 // Ranges: tally.AlphanumericRange, 146 // Characters: _safeCharacters, 147 // }, 148 // ValueCharacters: tally.ValidCharacters{ 149 // Ranges: tally.AlphanumericRange, 150 // Characters: _safeCharacters, 151 // }, 152 // ReplacementCharacter: tally.DefaultReplacementCharacter, 153 // } 154 // ) 155 // opts := tally.ScopeOptions{ 156 // Reporter: reporter, 157 // SanitizeOptions: &_sanitizeOptions, 158 // } 159 // scope, _ := tally.NewRootScope(opts, time.Second) 160 // default: no metrics. 161 MetricsScope tally.Scope 162 163 // Optional: Logger framework can use to log. 164 // default: default logger provided. 165 Logger *zap.Logger 166 167 // Optional: Enable logging in replay. 168 // In the workflow code you can use workflow.GetLogger(ctx) to write logs. By default, the logger will skip log 169 // entry during replay mode so you won't see duplicate logs. This option will enable the logging in replay mode. 170 // This is only useful for debugging purpose. 171 // default: false 172 EnableLoggingInReplay bool 173 174 // Optional: Disable running workflow workers. 175 // default: false 176 DisableWorkflowWorker bool 177 178 // Optional: Disable running activity workers. 179 // default: false 180 DisableActivityWorker bool 181 182 // Optional: Disable sticky execution. 183 // default: false 184 // Sticky Execution is to run the decision tasks for one workflow execution on same worker host. This is an 185 // optimization for workflow execution. When sticky execution is enabled, worker keeps the workflow state in 186 // memory. New decision task contains the new history events will be dispatched to the same worker. If this 187 // worker crashes, the sticky decision task will timeout after StickyScheduleToStartTimeout, and cadence server 188 // will clear the stickiness for that workflow execution and automatically reschedule a new decision task that 189 // is available for any worker to pick up and resume the progress. 190 DisableStickyExecution bool 191 192 // Optional: Sticky schedule to start timeout. 193 // default: 5s 194 // The resolution is seconds. See details about StickyExecution on the comments for DisableStickyExecution. 195 StickyScheduleToStartTimeout time.Duration 196 197 // Optional: sets context for activity. The context can be used to pass any configuration to activity 198 // like common logger for all activities. 199 BackgroundActivityContext context.Context 200 201 // Optional: Sets how decision worker deals with non-deterministic history events 202 // (presumably arising from non-deterministic workflow definitions or non-backward compatible workflow definition changes). 203 // default: NonDeterministicWorkflowPolicyBlockWorkflow, which just logs error but reply nothing back to server 204 NonDeterministicWorkflowPolicy NonDeterministicWorkflowPolicy 205 206 // Optional: Sets DataConverter to customize serialization/deserialization of arguments in Cadence 207 // default: defaultDataConverter, an combination of thriftEncoder and jsonEncoder 208 DataConverter DataConverter 209 210 // Optional: worker graceful shutdown timeout 211 // default: 0s 212 WorkerStopTimeout time.Duration 213 214 // Optional: Enable running session workers. 215 // Session workers is for activities within a session. 216 // Enable this option to allow worker to process sessions. 217 // default: false 218 EnableSessionWorker bool 219 220 // Uncomment this option when we support automatic reestablish failed sessions. 221 // Optional: The identifier of the resource consumed by sessions. 222 // It's the user's responsibility to ensure there's only one worker using this resourceID. 223 // For now, if user doesn't specify one, a new uuid will be used as the resourceID. 224 // SessionResourceID string 225 226 // Optional: Sets the maximum number of concurrently running sessions the resource support. 227 // default: 1000 228 MaxConcurrentSessionExecutionSize int 229 230 // Optional: Specifies factories used to instantiate workflow interceptor chain 231 // The chain is instantiated per each replay of a workflow execution 232 WorkflowInterceptorChainFactories []WorkflowInterceptorFactory 233 234 // Optional: Sets ContextPropagators that allows users to control the context information passed through a workflow 235 // default: no ContextPropagators 236 ContextPropagators []ContextPropagator 237 238 // Optional: Sets opentracing Tracer that is to be used to emit tracing information 239 // default: no tracer - opentracing.NoopTracer 240 Tracer opentracing.Tracer 241 242 // Optional: Enable worker for running shadowing workflows to replay existing workflows 243 // If set to true: 244 // 1. Worker will run in shadow mode and all other workers (decision, activity, session) 245 // will be disabled to prevent them from updating existing workflow states. 246 // 2. DataConverter, WorkflowInterceptorChainFactories, ContextPropagators, Tracer will be 247 // used as ReplayOptions and forwarded to the underlying WorkflowReplayer. 248 // The actual shadower activity worker will not use them. 249 // 3. TaskList will become Domain-TaskList, to prevent conflict across domains as there's 250 // only one shadowing domain which is responsible for shadowing workflows for all domains. 251 // default: false 252 EnableShadowWorker bool 253 254 // Optional: Configures shadowing workflow 255 // default: please check the documentation for ShadowOptions for default options 256 ShadowOptions ShadowOptions 257 258 // Optional: Flags to turn on/off some server side options 259 // default: all the features in the struct are turned off 260 FeatureFlags FeatureFlags 261 262 // Optional: Authorization interface to get the Auth Token 263 // default: No provider 264 Authorization auth.AuthorizationProvider 265 266 // Optional: Host is just string on the machine running the client 267 // default: empty string 268 Host string 269 270 // Optional: See WorkerBugPorts for more details 271 // 272 // Deprecated: All bugports are always deprecated and may be removed at any time. 273 WorkerBugPorts WorkerBugPorts 274 } 275 276 // WorkerBugPorts allows opt-in enabling of older, possibly buggy behavior, primarily intended to allow temporarily 277 // emulating old behavior until a fix is deployed. 278 // By default, bugs (especially rarely-occurring ones) are fixed and all users are opted into the new behavior. 279 // Back-ported buggy behavior *may* be available via these flags. 280 // 281 // Bugports are always deprecated and may be removed in future versions. 282 // Generally speaking they will *likely* remain in place for one minor version, and then they may be removed to 283 // allow cleaning up the additional code complexity that they cause. 284 // Deprecated: All bugports are always deprecated and may be removed at any time 285 WorkerBugPorts struct { 286 // Optional: Disable strict non-determinism checks for workflow. 287 // There are some non-determinism cases which are missed by original implementation and a fix is on the way. 288 // The fix will be toggleable by this parameter. 289 // Default: false, which means strict non-determinism checks are enabled. 290 // 291 // Deprecated: All bugports are always deprecated and may be removed at any time 292 DisableStrictNonDeterminismCheck bool 293 } 294 ) 295 296 // NonDeterministicWorkflowPolicy is an enum for configuring how client's decision task handler deals with 297 // mismatched history events (presumably arising from non-deterministic workflow definitions). 298 type NonDeterministicWorkflowPolicy int 299 300 const ( 301 // NonDeterministicWorkflowPolicyBlockWorkflow is the default policy for handling detected non-determinism. 302 // This option simply logs to console with an error message that non-determinism is detected, but 303 // does *NOT* reply anything back to the server. 304 // It is chosen as default for backward compatibility reasons because it preserves the old behavior 305 // for handling non-determinism that we had before NonDeterministicWorkflowPolicy type was added to 306 // allow more configurability. 307 NonDeterministicWorkflowPolicyBlockWorkflow NonDeterministicWorkflowPolicy = iota 308 // NonDeterministicWorkflowPolicyFailWorkflow behaves exactly the same as Ignore, up until the very 309 // end of processing a decision task. 310 // Whereas default does *NOT* reply anything back to the server, fail workflow replies back with a request 311 // to fail the workflow execution. 312 NonDeterministicWorkflowPolicyFailWorkflow 313 ) 314 315 // NewWorker creates an instance of worker for managing workflow and activity executions. 316 // service - thrift connection to the cadence server. 317 // domain - the name of the cadence domain. 318 // taskList - is the task list name you use to identify your client worker, also 319 // 320 // identifies group of workflow and activity implementations that are hosted by a single worker process. 321 // 322 // options - configure any worker specific options like logger, metrics, identity. 323 func NewWorker( 324 service workflowserviceclient.Interface, 325 domain string, 326 taskList string, 327 options WorkerOptions, 328 ) *aggregatedWorker { 329 return newAggregatedWorker(service, domain, taskList, options) 330 } 331 332 // ReplayWorkflowExecution loads a workflow execution history from the Cadence service and executes a single decision task for it. 333 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger. 334 // The logger is the only optional parameter. Defaults to the noop logger. 335 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods. 336 // This method is kept to maintain backward compatibility and should not be used. 337 func ReplayWorkflowExecution( 338 ctx context.Context, 339 service workflowserviceclient.Interface, 340 logger *zap.Logger, 341 domain string, 342 execution WorkflowExecution, 343 ) error { 344 r := NewWorkflowReplayer() 345 return r.ReplayWorkflowExecution(ctx, service, logger, domain, execution) 346 } 347 348 // ReplayWorkflowHistory executes a single decision task for the given history. 349 // Use for testing the backwards compatibility of code changes and troubleshooting workflows in a debugger. 350 // The logger is an optional parameter. Defaults to the noop logger. 351 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods. 352 // This method is kept to maintain backward compatibility and should not be used. 353 func ReplayWorkflowHistory(logger *zap.Logger, history *shared.History) error { 354 r := NewWorkflowReplayer() 355 return r.ReplayWorkflowHistory(logger, history) 356 } 357 358 // ReplayWorkflowHistoryFromJSONFile executes a single decision task for the given json history file. 359 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger. 360 // The logger is an optional parameter. Defaults to the noop logger. 361 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods. 362 // This method is kept to maintain backward compatibility and should not be used. 363 func ReplayWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string) error { 364 r := NewWorkflowReplayer() 365 return r.ReplayWorkflowHistoryFromJSONFile(logger, jsonfileName) 366 } 367 368 // ReplayPartialWorkflowHistoryFromJSONFile executes a single decision task for the given json history file upto provided 369 // lastEventID(inclusive). 370 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger. 371 // The logger is an optional parameter. Defaults to the noop logger. 372 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods. 373 // This method is kept to maintain backward compatibility and should not be used. 374 func ReplayPartialWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string, lastEventID int64) error { 375 r := NewWorkflowReplayer() 376 return r.ReplayPartialWorkflowHistoryFromJSONFile(logger, jsonfileName, lastEventID) 377 }