go.uber.org/cadence@v1.2.9/internal/worker.go

go.uber.org/cadence@v1.2.9/internal/worker.go (about)

1 // Copyright (c) 2017-2020 Uber Technologies Inc.
2 // Portions of the Software are attributed to Copyright (c) 2020 Temporal Technologies Inc.
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21
22 package internal
23
24 import (
25 "context"
26 "time"
27
28 "github.com/opentracing/opentracing-go"
29 "github.com/uber-go/tally"
30 "go.uber.org/zap"
31
32 "go.uber.org/cadence/.gen/go/cadence/workflowserviceclient"
33 "go.uber.org/cadence/.gen/go/shared"
34 "go.uber.org/cadence/internal/common/auth"
35 )
36
37 type (
38 // WorkerOptions is used to configure a worker instance.
39 // The current timeout resolution implementation is in seconds and uses math.Ceil(d.Seconds()) as the duration. But is
40 // subjected to change in the future.
41 WorkerOptions struct {
42 // Optional: To set the maximum concurrent activity executions this worker can have.
43 // The zero value of this uses the default value.
44 // default: defaultMaxConcurrentActivityExecutionSize(1k)
45 MaxConcurrentActivityExecutionSize int
46
47 // Optional: Sets the rate limiting on number of activities that can be executed per second per
48 // worker. This can be used to limit resources used by the worker.
49 // Notice that the number is represented in float, so that you can set it to less than
50 // 1 if needed. For example, set the number to 0.1 means you want your activity to be executed
51 // once for every 10 seconds. This can be used to protect down stream services from flooding.
52 // The zero value of this uses the default value. Default: 100k
53 WorkerActivitiesPerSecond float64
54
55 // Optional: To set the maximum concurrent local activity executions this worker can have.
56 // The zero value of this uses the default value.
57 // default: 1k
58 MaxConcurrentLocalActivityExecutionSize int
59
60 // Optional: Sets the rate limiting on number of local activities that can be executed per second per
61 // worker. This can be used to limit resources used by the worker.
62 // Notice that the number is represented in float, so that you can set it to less than
63 // 1 if needed. For example, set the number to 0.1 means you want your local activity to be executed
64 // once for every 10 seconds. This can be used to protect down stream services from flooding.
65 // The zero value of this uses the default value. Default: 100k
66 WorkerLocalActivitiesPerSecond float64
67
68 // Optional: Sets the rate limiting on number of activities that can be executed per second.
69 // This is managed by the server and controls activities per second for your entire tasklist
70 // whereas WorkerActivityTasksPerSecond controls activities only per worker.
71 // Notice that the number is represented in float, so that you can set it to less than
72 // 1 if needed. For example, set the number to 0.1 means you want your activity to be executed
73 // once for every 10 seconds. This can be used to protect down stream services from flooding.
74 // The zero value of this uses the default value. Default: 100k
75 TaskListActivitiesPerSecond float64
76
77 // optional: Sets the maximum number of goroutines that will concurrently poll the
78 // cadence-server to retrieve activity tasks. Changing this value will affect the
79 // rate at which the worker is able to consume tasks from a task list.
80 // Default value is 2
81 MaxConcurrentActivityTaskPollers int
82
83 // optional: Sets the minimum number of goroutines that will concurrently poll the
84 // cadence-server to retrieve activity tasks. Changing this value will NOT affect the
85 // rate at which the worker is able to consume tasks from a task list,
86 // unless FeatureFlags.PollerAutoScalerEnabled is set to true.
87 // Default value is 1
88 MinConcurrentActivityTaskPollers int
89
90 // Optional: To set the maximum concurrent decision task executions this worker can have.
91 // The zero value of this uses the default value.
92 // default: defaultMaxConcurrentTaskExecutionSize(1k)
93 MaxConcurrentDecisionTaskExecutionSize int
94
95 // Optional: Sets the rate limiting on number of decision tasks that can be executed per second per
96 // worker. This can be used to limit resources used by the worker.
97 // The zero value of this uses the default value. Default: 100k
98 WorkerDecisionTasksPerSecond float64
99
100 // optional: Sets the maximum number of goroutines that will concurrently poll the
101 // cadence-server to retrieve decision tasks. Changing this value will affect the
102 // rate at which the worker is able to consume tasks from a task list.
103 // Default value is 2
104 MaxConcurrentDecisionTaskPollers int
105
106 // optional: Sets the minimum number of goroutines that will concurrently poll the
107 // cadence-server to retrieve decision tasks. If FeatureFlags.PollerAutoScalerEnabled is set to true,
108 // changing this value will NOT affect the rate at which the worker is able to consume tasks from a task list.
109 // Default value is 1
110 MinConcurrentDecisionTaskPollers int
111
112 // optional: Sets the interval of poller autoscaling, between which poller autoscaler changes the poller count
113 // based on poll result. It takes effect if FeatureFlags.PollerAutoScalerEnabled is set to true.
114 // Default value is 1 min
115 PollerAutoScalerCooldown time.Duration
116
117 // optional: Sets the target utilization rate between [0,1].
118 // Utilization Rate = pollResultWithTask / (pollResultWithTask + pollResultWithNoTask)
119 // It takes effect if FeatureFlags.PollerAutoScalerEnabled is set to true.
120 // Default value is 0.6
121 PollerAutoScalerTargetUtilization float64
122
123 // optional: Sets whether to start dry run mode of autoscaler.
124 // Default value is false
125 PollerAutoScalerDryRun bool
126
127 // Optional: Sets an identify that can be used to track this host for debugging.
128 // default: default identity that include hostname, groupName and process ID.
129 Identity string
130
131 // Optional: Defines the 'zone' or the failure group that the worker belongs to
132 IsolationGroup string
133
134 // Optional: Metrics to be reported. Metrics emitted by the cadence client are not prometheus compatible by
135 // default. To ensure metrics are compatible with prometheus make sure to create tally scope with sanitizer
136 // options set.
137 // var (
138 // _safeCharacters = []rune{'_'}
139 // _sanitizeOptions = tally.SanitizeOptions{
140 // NameCharacters: tally.ValidCharacters{
141 // Ranges: tally.AlphanumericRange,
142 // Characters: _safeCharacters,
143 // },
144 // KeyCharacters: tally.ValidCharacters{
145 // Ranges: tally.AlphanumericRange,
146 // Characters: _safeCharacters,
147 // },
148 // ValueCharacters: tally.ValidCharacters{
149 // Ranges: tally.AlphanumericRange,
150 // Characters: _safeCharacters,
151 // },
152 // ReplacementCharacter: tally.DefaultReplacementCharacter,
153 // }
154 // )
155 // opts := tally.ScopeOptions{
156 // Reporter: reporter,
157 // SanitizeOptions: &_sanitizeOptions,
158 // }
159 // scope, _ := tally.NewRootScope(opts, time.Second)
160 // default: no metrics.
161 MetricsScope tally.Scope
162
163 // Optional: Logger framework can use to log.
164 // default: default logger provided.
165 Logger *zap.Logger
166
167 // Optional: Enable logging in replay.
168 // In the workflow code you can use workflow.GetLogger(ctx) to write logs. By default, the logger will skip log
169 // entry during replay mode so you won't see duplicate logs. This option will enable the logging in replay mode.
170 // This is only useful for debugging purpose.
171 // default: false
172 EnableLoggingInReplay bool
173
174 // Optional: Disable running workflow workers.
175 // default: false
176 DisableWorkflowWorker bool
177
178 // Optional: Disable running activity workers.
179 // default: false
180 DisableActivityWorker bool
181
182 // Optional: Disable sticky execution.
183 // default: false
184 // Sticky Execution is to run the decision tasks for one workflow execution on same worker host. This is an
185 // optimization for workflow execution. When sticky execution is enabled, worker keeps the workflow state in
186 // memory. New decision task contains the new history events will be dispatched to the same worker. If this
187 // worker crashes, the sticky decision task will timeout after StickyScheduleToStartTimeout, and cadence server
188 // will clear the stickiness for that workflow execution and automatically reschedule a new decision task that
189 // is available for any worker to pick up and resume the progress.
190 DisableStickyExecution bool
191
192 // Optional: Sticky schedule to start timeout.
193 // default: 5s
194 // The resolution is seconds. See details about StickyExecution on the comments for DisableStickyExecution.
195 StickyScheduleToStartTimeout time.Duration
196
197 // Optional: sets context for activity. The context can be used to pass any configuration to activity
198 // like common logger for all activities.
199 BackgroundActivityContext context.Context
200
201 // Optional: Sets how decision worker deals with non-deterministic history events
202 // (presumably arising from non-deterministic workflow definitions or non-backward compatible workflow definition changes).
203 // default: NonDeterministicWorkflowPolicyBlockWorkflow, which just logs error but reply nothing back to server
204 NonDeterministicWorkflowPolicy NonDeterministicWorkflowPolicy
205
206 // Optional: Sets DataConverter to customize serialization/deserialization of arguments in Cadence
207 // default: defaultDataConverter, an combination of thriftEncoder and jsonEncoder
208 DataConverter DataConverter
209
210 // Optional: worker graceful shutdown timeout
211 // default: 0s
212 WorkerStopTimeout time.Duration
213
214 // Optional: Enable running session workers.
215 // Session workers is for activities within a session.
216 // Enable this option to allow worker to process sessions.
217 // default: false
218 EnableSessionWorker bool
219
220 // Uncomment this option when we support automatic reestablish failed sessions.
221 // Optional: The identifier of the resource consumed by sessions.
222 // It's the user's responsibility to ensure there's only one worker using this resourceID.
223 // For now, if user doesn't specify one, a new uuid will be used as the resourceID.
224 // SessionResourceID string
225
226 // Optional: Sets the maximum number of concurrently running sessions the resource support.
227 // default: 1000
228 MaxConcurrentSessionExecutionSize int
229
230 // Optional: Specifies factories used to instantiate workflow interceptor chain
231 // The chain is instantiated per each replay of a workflow execution
232 WorkflowInterceptorChainFactories []WorkflowInterceptorFactory
233
234 // Optional: Sets ContextPropagators that allows users to control the context information passed through a workflow
235 // default: no ContextPropagators
236 ContextPropagators []ContextPropagator
237
238 // Optional: Sets opentracing Tracer that is to be used to emit tracing information
239 // default: no tracer - opentracing.NoopTracer
240 Tracer opentracing.Tracer
241
242 // Optional: Enable worker for running shadowing workflows to replay existing workflows
243 // If set to true:
244 // 1. Worker will run in shadow mode and all other workers (decision, activity, session)
245 // will be disabled to prevent them from updating existing workflow states.
246 // 2. DataConverter, WorkflowInterceptorChainFactories, ContextPropagators, Tracer will be
247 // used as ReplayOptions and forwarded to the underlying WorkflowReplayer.
248 // The actual shadower activity worker will not use them.
249 // 3. TaskList will become Domain-TaskList, to prevent conflict across domains as there's
250 // only one shadowing domain which is responsible for shadowing workflows for all domains.
251 // default: false
252 EnableShadowWorker bool
253
254 // Optional: Configures shadowing workflow
255 // default: please check the documentation for ShadowOptions for default options
256 ShadowOptions ShadowOptions
257
258 // Optional: Flags to turn on/off some server side options
259 // default: all the features in the struct are turned off
260 FeatureFlags FeatureFlags
261
262 // Optional: Authorization interface to get the Auth Token
263 // default: No provider
264 Authorization auth.AuthorizationProvider
265
266 // Optional: Host is just string on the machine running the client
267 // default: empty string
268 Host string
269
270 // Optional: See WorkerBugPorts for more details
271 //
272 // Deprecated: All bugports are always deprecated and may be removed at any time.
273 WorkerBugPorts WorkerBugPorts
274 }
275
276 // WorkerBugPorts allows opt-in enabling of older, possibly buggy behavior, primarily intended to allow temporarily
277 // emulating old behavior until a fix is deployed.
278 // By default, bugs (especially rarely-occurring ones) are fixed and all users are opted into the new behavior.
279 // Back-ported buggy behavior *may* be available via these flags.
280 //
281 // Bugports are always deprecated and may be removed in future versions.
282 // Generally speaking they will *likely* remain in place for one minor version, and then they may be removed to
283 // allow cleaning up the additional code complexity that they cause.
284 // Deprecated: All bugports are always deprecated and may be removed at any time
285 WorkerBugPorts struct {
286 // Optional: Disable strict non-determinism checks for workflow.
287 // There are some non-determinism cases which are missed by original implementation and a fix is on the way.
288 // The fix will be toggleable by this parameter.
289 // Default: false, which means strict non-determinism checks are enabled.
290 //
291 // Deprecated: All bugports are always deprecated and may be removed at any time
292 DisableStrictNonDeterminismCheck bool
293 }
294 )
295
296 // NonDeterministicWorkflowPolicy is an enum for configuring how client's decision task handler deals with
297 // mismatched history events (presumably arising from non-deterministic workflow definitions).
298 type NonDeterministicWorkflowPolicy int
299
300 const (
301 // NonDeterministicWorkflowPolicyBlockWorkflow is the default policy for handling detected non-determinism.
302 // This option simply logs to console with an error message that non-determinism is detected, but
303 // does *NOT* reply anything back to the server.
304 // It is chosen as default for backward compatibility reasons because it preserves the old behavior
305 // for handling non-determinism that we had before NonDeterministicWorkflowPolicy type was added to
306 // allow more configurability.
307 NonDeterministicWorkflowPolicyBlockWorkflow NonDeterministicWorkflowPolicy = iota
308 // NonDeterministicWorkflowPolicyFailWorkflow behaves exactly the same as Ignore, up until the very
309 // end of processing a decision task.
310 // Whereas default does *NOT* reply anything back to the server, fail workflow replies back with a request
311 // to fail the workflow execution.
312 NonDeterministicWorkflowPolicyFailWorkflow
313 )
314
315 // NewWorker creates an instance of worker for managing workflow and activity executions.
316 // service - thrift connection to the cadence server.
317 // domain - the name of the cadence domain.
318 // taskList - is the task list name you use to identify your client worker, also
319 //
320 // identifies group of workflow and activity implementations that are hosted by a single worker process.
321 //
322 // options - configure any worker specific options like logger, metrics, identity.
323 func NewWorker(
324 service workflowserviceclient.Interface,
325 domain string,
326 taskList string,
327 options WorkerOptions,
328 ) *aggregatedWorker {
329 return newAggregatedWorker(service, domain, taskList, options)
330 }
331
332 // ReplayWorkflowExecution loads a workflow execution history from the Cadence service and executes a single decision task for it.
333 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger.
334 // The logger is the only optional parameter. Defaults to the noop logger.
335 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
336 // This method is kept to maintain backward compatibility and should not be used.
337 func ReplayWorkflowExecution(
338 ctx context.Context,
339 service workflowserviceclient.Interface,
340 logger *zap.Logger,
341 domain string,
342 execution WorkflowExecution,
343 ) error {
344 r := NewWorkflowReplayer()
345 return r.ReplayWorkflowExecution(ctx, service, logger, domain, execution)
346 }
347
348 // ReplayWorkflowHistory executes a single decision task for the given history.
349 // Use for testing the backwards compatibility of code changes and troubleshooting workflows in a debugger.
350 // The logger is an optional parameter. Defaults to the noop logger.
351 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
352 // This method is kept to maintain backward compatibility and should not be used.
353 func ReplayWorkflowHistory(logger *zap.Logger, history *shared.History) error {
354 r := NewWorkflowReplayer()
355 return r.ReplayWorkflowHistory(logger, history)
356 }
357
358 // ReplayWorkflowHistoryFromJSONFile executes a single decision task for the given json history file.
359 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger.
360 // The logger is an optional parameter. Defaults to the noop logger.
361 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
362 // This method is kept to maintain backward compatibility and should not be used.
363 func ReplayWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string) error {
364 r := NewWorkflowReplayer()
365 return r.ReplayWorkflowHistoryFromJSONFile(logger, jsonfileName)
366 }
367
368 // ReplayPartialWorkflowHistoryFromJSONFile executes a single decision task for the given json history file upto provided
369 // lastEventID(inclusive).
370 // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger.
371 // The logger is an optional parameter. Defaults to the noop logger.
372 // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
373 // This method is kept to maintain backward compatibility and should not be used.
374 func ReplayPartialWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string, lastEventID int64) error {
375 r := NewWorkflowReplayer()
376 return r.ReplayPartialWorkflowHistoryFromJSONFile(logger, jsonfileName, lastEventID)
377 }