go.uber.org/cadence@v1.2.9/internal/worker.go (about)

     1  // Copyright (c) 2017-2020 Uber Technologies Inc.
     2  // Portions of the Software are attributed to Copyright (c) 2020 Temporal Technologies Inc.
     3  //
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy
     5  // of this software and associated documentation files (the "Software"), to deal
     6  // in the Software without restriction, including without limitation the rights
     7  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     8  // copies of the Software, and to permit persons to whom the Software is
     9  // furnished to do so, subject to the following conditions:
    10  //
    11  // The above copyright notice and this permission notice shall be included in
    12  // all copies or substantial portions of the Software.
    13  //
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    16  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    17  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    18  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    19  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    20  // THE SOFTWARE.
    21  
    22  package internal
    23  
    24  import (
    25  	"context"
    26  	"time"
    27  
    28  	"github.com/opentracing/opentracing-go"
    29  	"github.com/uber-go/tally"
    30  	"go.uber.org/zap"
    31  
    32  	"go.uber.org/cadence/.gen/go/cadence/workflowserviceclient"
    33  	"go.uber.org/cadence/.gen/go/shared"
    34  	"go.uber.org/cadence/internal/common/auth"
    35  )
    36  
    37  type (
    38  	// WorkerOptions is used to configure a worker instance.
    39  	// The current timeout resolution implementation is in seconds and uses math.Ceil(d.Seconds()) as the duration. But is
    40  	// subjected to change in the future.
    41  	WorkerOptions struct {
    42  		// Optional: To set the maximum concurrent activity executions this worker can have.
    43  		// The zero value of this uses the default value.
    44  		// default: defaultMaxConcurrentActivityExecutionSize(1k)
    45  		MaxConcurrentActivityExecutionSize int
    46  
    47  		// Optional: Sets the rate limiting on number of activities that can be executed per second per
    48  		// worker. This can be used to limit resources used by the worker.
    49  		// Notice that the number is represented in float, so that you can set it to less than
    50  		// 1 if needed. For example, set the number to 0.1 means you want your activity to be executed
    51  		// once for every 10 seconds. This can be used to protect down stream services from flooding.
    52  		// The zero value of this uses the default value. Default: 100k
    53  		WorkerActivitiesPerSecond float64
    54  
    55  		// Optional: To set the maximum concurrent local activity executions this worker can have.
    56  		// The zero value of this uses the default value.
    57  		// default: 1k
    58  		MaxConcurrentLocalActivityExecutionSize int
    59  
    60  		// Optional: Sets the rate limiting on number of local activities that can be executed per second per
    61  		// worker. This can be used to limit resources used by the worker.
    62  		// Notice that the number is represented in float, so that you can set it to less than
    63  		// 1 if needed. For example, set the number to 0.1 means you want your local activity to be executed
    64  		// once for every 10 seconds. This can be used to protect down stream services from flooding.
    65  		// The zero value of this uses the default value. Default: 100k
    66  		WorkerLocalActivitiesPerSecond float64
    67  
    68  		// Optional: Sets the rate limiting on number of activities that can be executed per second.
    69  		// This is managed by the server and controls activities per second for your entire tasklist
    70  		// whereas WorkerActivityTasksPerSecond controls activities only per worker.
    71  		// Notice that the number is represented in float, so that you can set it to less than
    72  		// 1 if needed. For example, set the number to 0.1 means you want your activity to be executed
    73  		// once for every 10 seconds. This can be used to protect down stream services from flooding.
    74  		// The zero value of this uses the default value. Default: 100k
    75  		TaskListActivitiesPerSecond float64
    76  
    77  		// optional: Sets the maximum number of goroutines that will concurrently poll the
    78  		// cadence-server to retrieve activity tasks. Changing this value will affect the
    79  		// rate at which the worker is able to consume tasks from a task list.
    80  		// Default value is 2
    81  		MaxConcurrentActivityTaskPollers int
    82  
    83  		// optional: Sets the minimum number of goroutines that will concurrently poll the
    84  		// cadence-server to retrieve activity tasks. Changing this value will NOT affect the
    85  		// rate at which the worker is able to consume tasks from a task list,
    86  		// unless FeatureFlags.PollerAutoScalerEnabled is set to true.
    87  		// Default value is 1
    88  		MinConcurrentActivityTaskPollers int
    89  
    90  		// Optional: To set the maximum concurrent decision task executions this worker can have.
    91  		// The zero value of this uses the default value.
    92  		// default: defaultMaxConcurrentTaskExecutionSize(1k)
    93  		MaxConcurrentDecisionTaskExecutionSize int
    94  
    95  		// Optional: Sets the rate limiting on number of decision tasks that can be executed per second per
    96  		// worker. This can be used to limit resources used by the worker.
    97  		// The zero value of this uses the default value. Default: 100k
    98  		WorkerDecisionTasksPerSecond float64
    99  
   100  		// optional: Sets the maximum number of goroutines that will concurrently poll the
   101  		// cadence-server to retrieve decision tasks. Changing this value will affect the
   102  		// rate at which the worker is able to consume tasks from a task list.
   103  		// Default value is 2
   104  		MaxConcurrentDecisionTaskPollers int
   105  
   106  		// optional: Sets the minimum number of goroutines that will concurrently poll the
   107  		// cadence-server to retrieve decision tasks. If FeatureFlags.PollerAutoScalerEnabled is set to true,
   108  		// changing this value will NOT affect the rate at which the worker is able to consume tasks from a task list.
   109  		// Default value is 1
   110  		MinConcurrentDecisionTaskPollers int
   111  
   112  		// optional: Sets the interval of poller autoscaling, between which poller autoscaler changes the poller count
   113  		// based on poll result. It takes effect if FeatureFlags.PollerAutoScalerEnabled is set to true.
   114  		// Default value is 1 min
   115  		PollerAutoScalerCooldown time.Duration
   116  
   117  		// optional: Sets the target utilization rate between [0,1].
   118  		// Utilization Rate = pollResultWithTask / (pollResultWithTask + pollResultWithNoTask)
   119  		// It takes effect if FeatureFlags.PollerAutoScalerEnabled is set to true.
   120  		// Default value is 0.6
   121  		PollerAutoScalerTargetUtilization float64
   122  
   123  		// optional: Sets whether to start dry run mode of autoscaler.
   124  		// Default value is false
   125  		PollerAutoScalerDryRun bool
   126  
   127  		// Optional: Sets an identify that can be used to track this host for debugging.
   128  		// default: default identity that include hostname, groupName and process ID.
   129  		Identity string
   130  
   131  		// Optional: Defines the 'zone' or the failure group that the worker belongs to
   132  		IsolationGroup string
   133  
   134  		// Optional: Metrics to be reported. Metrics emitted by the cadence client are not prometheus compatible by
   135  		// default. To ensure metrics are compatible with prometheus make sure to create tally scope with sanitizer
   136  		// options set.
   137  		// var (
   138  		// _safeCharacters = []rune{'_'}
   139  		// _sanitizeOptions = tally.SanitizeOptions{
   140  		// 	NameCharacters: tally.ValidCharacters{
   141  		// 		Ranges:     tally.AlphanumericRange,
   142  		// 		Characters: _safeCharacters,
   143  		// 	},
   144  		// 		KeyCharacters: tally.ValidCharacters{
   145  		// 			Ranges:     tally.AlphanumericRange,
   146  		// 			Characters: _safeCharacters,
   147  		// 		},
   148  		// 		ValueCharacters: tally.ValidCharacters{
   149  		// 			Ranges:     tally.AlphanumericRange,
   150  		// 			Characters: _safeCharacters,
   151  		// 		},
   152  		// 		ReplacementCharacter: tally.DefaultReplacementCharacter,
   153  		// 	}
   154  		// )
   155  		// opts := tally.ScopeOptions{
   156  		// 	Reporter:        reporter,
   157  		// 	SanitizeOptions: &_sanitizeOptions,
   158  		// }
   159  		// scope, _ := tally.NewRootScope(opts, time.Second)
   160  		// default: no metrics.
   161  		MetricsScope tally.Scope
   162  
   163  		// Optional: Logger framework can use to log.
   164  		// default: default logger provided.
   165  		Logger *zap.Logger
   166  
   167  		// Optional: Enable logging in replay.
   168  		// In the workflow code you can use workflow.GetLogger(ctx) to write logs. By default, the logger will skip log
   169  		// entry during replay mode so you won't see duplicate logs. This option will enable the logging in replay mode.
   170  		// This is only useful for debugging purpose.
   171  		// default: false
   172  		EnableLoggingInReplay bool
   173  
   174  		// Optional: Disable running workflow workers.
   175  		// default: false
   176  		DisableWorkflowWorker bool
   177  
   178  		// Optional: Disable running activity workers.
   179  		// default: false
   180  		DisableActivityWorker bool
   181  
   182  		// Optional: Disable sticky execution.
   183  		// default: false
   184  		// Sticky Execution is to run the decision tasks for one workflow execution on same worker host. This is an
   185  		// optimization for workflow execution. When sticky execution is enabled, worker keeps the workflow state in
   186  		// memory. New decision task contains the new history events will be dispatched to the same worker. If this
   187  		// worker crashes, the sticky decision task will timeout after StickyScheduleToStartTimeout, and cadence server
   188  		// will clear the stickiness for that workflow execution and automatically reschedule a new decision task that
   189  		// is available for any worker to pick up and resume the progress.
   190  		DisableStickyExecution bool
   191  
   192  		// Optional: Sticky schedule to start timeout.
   193  		// default: 5s
   194  		// The resolution is seconds. See details about StickyExecution on the comments for DisableStickyExecution.
   195  		StickyScheduleToStartTimeout time.Duration
   196  
   197  		// Optional: sets context for activity. The context can be used to pass any configuration to activity
   198  		// like common logger for all activities.
   199  		BackgroundActivityContext context.Context
   200  
   201  		// Optional: Sets how decision worker deals with non-deterministic history events
   202  		// (presumably arising from non-deterministic workflow definitions or non-backward compatible workflow definition changes).
   203  		// default: NonDeterministicWorkflowPolicyBlockWorkflow, which just logs error but reply nothing back to server
   204  		NonDeterministicWorkflowPolicy NonDeterministicWorkflowPolicy
   205  
   206  		// Optional: Sets DataConverter to customize serialization/deserialization of arguments in Cadence
   207  		// default: defaultDataConverter, an combination of thriftEncoder and jsonEncoder
   208  		DataConverter DataConverter
   209  
   210  		// Optional: worker graceful shutdown timeout
   211  		// default: 0s
   212  		WorkerStopTimeout time.Duration
   213  
   214  		// Optional: Enable running session workers.
   215  		// Session workers is for activities within a session.
   216  		// Enable this option to allow worker to process sessions.
   217  		// default: false
   218  		EnableSessionWorker bool
   219  
   220  		// Uncomment this option when we support automatic reestablish failed sessions.
   221  		// Optional: The identifier of the resource consumed by sessions.
   222  		// It's the user's responsibility to ensure there's only one worker using this resourceID.
   223  		// For now, if user doesn't specify one, a new uuid will be used as the resourceID.
   224  		// SessionResourceID string
   225  
   226  		// Optional: Sets the maximum number of concurrently running sessions the resource support.
   227  		// default: 1000
   228  		MaxConcurrentSessionExecutionSize int
   229  
   230  		// Optional: Specifies factories used to instantiate workflow interceptor chain
   231  		// The chain is instantiated per each replay of a workflow execution
   232  		WorkflowInterceptorChainFactories []WorkflowInterceptorFactory
   233  
   234  		// Optional: Sets ContextPropagators that allows users to control the context information passed through a workflow
   235  		// default: no ContextPropagators
   236  		ContextPropagators []ContextPropagator
   237  
   238  		// Optional: Sets opentracing Tracer that is to be used to emit tracing information
   239  		// default: no tracer - opentracing.NoopTracer
   240  		Tracer opentracing.Tracer
   241  
   242  		// Optional: Enable worker for running shadowing workflows to replay existing workflows
   243  		// If set to true:
   244  		// 1. Worker will run in shadow mode and all other workers (decision, activity, session)
   245  		// will be disabled to prevent them from updating existing workflow states.
   246  		// 2. DataConverter, WorkflowInterceptorChainFactories, ContextPropagators, Tracer will be
   247  		// used as ReplayOptions and forwarded to the underlying WorkflowReplayer.
   248  		// The actual shadower activity worker will not use them.
   249  		// 3. TaskList will become Domain-TaskList, to prevent conflict across domains as there's
   250  		// only one shadowing domain which is responsible for shadowing workflows for all domains.
   251  		// default: false
   252  		EnableShadowWorker bool
   253  
   254  		// Optional: Configures shadowing workflow
   255  		// default: please check the documentation for ShadowOptions for default options
   256  		ShadowOptions ShadowOptions
   257  
   258  		// Optional: Flags to turn on/off some server side options
   259  		// default: all the features in the struct are turned off
   260  		FeatureFlags FeatureFlags
   261  
   262  		// Optional: Authorization interface to get the Auth Token
   263  		// default: No provider
   264  		Authorization auth.AuthorizationProvider
   265  
   266  		// Optional: Host is just string on the machine running the client
   267  		// default: empty string
   268  		Host string
   269  
   270  		// Optional: See WorkerBugPorts for more details
   271  		//
   272  		// Deprecated: All bugports are always deprecated and may be removed at any time.
   273  		WorkerBugPorts WorkerBugPorts
   274  	}
   275  
   276  	// WorkerBugPorts allows opt-in enabling of older, possibly buggy behavior, primarily intended to allow temporarily
   277  	// emulating old behavior until a fix is deployed.
   278  	// By default, bugs (especially rarely-occurring ones) are fixed and all users are opted into the new behavior.
   279  	// Back-ported buggy behavior *may* be available via these flags.
   280  	//
   281  	// Bugports are always deprecated and may be removed in future versions.
   282  	// Generally speaking they will *likely* remain in place for one minor version, and then they may be removed to
   283  	// allow cleaning up the additional code complexity that they cause.
   284  	// Deprecated: All bugports are always deprecated and may be removed at any time
   285  	WorkerBugPorts struct {
   286  		// Optional: Disable strict non-determinism checks for workflow.
   287  		// There are some non-determinism cases which are missed by original implementation and a fix is on the way.
   288  		// The fix will be toggleable by this parameter.
   289  		// Default: false, which means strict non-determinism checks are enabled.
   290  		//
   291  		// Deprecated: All bugports are always deprecated and may be removed at any time
   292  		DisableStrictNonDeterminismCheck bool
   293  	}
   294  )
   295  
   296  // NonDeterministicWorkflowPolicy is an enum for configuring how client's decision task handler deals with
   297  // mismatched history events (presumably arising from non-deterministic workflow definitions).
   298  type NonDeterministicWorkflowPolicy int
   299  
   300  const (
   301  	// NonDeterministicWorkflowPolicyBlockWorkflow is the default policy for handling detected non-determinism.
   302  	// This option simply logs to console with an error message that non-determinism is detected, but
   303  	// does *NOT* reply anything back to the server.
   304  	// It is chosen as default for backward compatibility reasons because it preserves the old behavior
   305  	// for handling non-determinism that we had before NonDeterministicWorkflowPolicy type was added to
   306  	// allow more configurability.
   307  	NonDeterministicWorkflowPolicyBlockWorkflow NonDeterministicWorkflowPolicy = iota
   308  	// NonDeterministicWorkflowPolicyFailWorkflow behaves exactly the same as Ignore, up until the very
   309  	// end of processing a decision task.
   310  	// Whereas default does *NOT* reply anything back to the server, fail workflow replies back with a request
   311  	// to fail the workflow execution.
   312  	NonDeterministicWorkflowPolicyFailWorkflow
   313  )
   314  
   315  // NewWorker creates an instance of worker for managing workflow and activity executions.
   316  // service 	- thrift connection to the cadence server.
   317  // domain - the name of the cadence domain.
   318  // taskList 	- is the task list name you use to identify your client worker, also
   319  //
   320  //	identifies group of workflow and activity implementations that are hosted by a single worker process.
   321  //
   322  // options 	-  configure any worker specific options like logger, metrics, identity.
   323  func NewWorker(
   324  	service workflowserviceclient.Interface,
   325  	domain string,
   326  	taskList string,
   327  	options WorkerOptions,
   328  ) *aggregatedWorker {
   329  	return newAggregatedWorker(service, domain, taskList, options)
   330  }
   331  
   332  // ReplayWorkflowExecution loads a workflow execution history from the Cadence service and executes a single decision task for it.
   333  // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger.
   334  // The logger is the only optional parameter. Defaults to the noop logger.
   335  // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
   336  // This method is kept to maintain backward compatibility and should not be used.
   337  func ReplayWorkflowExecution(
   338  	ctx context.Context,
   339  	service workflowserviceclient.Interface,
   340  	logger *zap.Logger,
   341  	domain string,
   342  	execution WorkflowExecution,
   343  ) error {
   344  	r := NewWorkflowReplayer()
   345  	return r.ReplayWorkflowExecution(ctx, service, logger, domain, execution)
   346  }
   347  
   348  // ReplayWorkflowHistory executes a single decision task for the given history.
   349  // Use for testing the backwards compatibility of code changes and troubleshooting workflows in a debugger.
   350  // The logger is an optional parameter. Defaults to the noop logger.
   351  // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
   352  // This method is kept to maintain backward compatibility and should not be used.
   353  func ReplayWorkflowHistory(logger *zap.Logger, history *shared.History) error {
   354  	r := NewWorkflowReplayer()
   355  	return r.ReplayWorkflowHistory(logger, history)
   356  }
   357  
   358  // ReplayWorkflowHistoryFromJSONFile executes a single decision task for the given json history file.
   359  // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger.
   360  // The logger is an optional parameter. Defaults to the noop logger.
   361  // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
   362  // This method is kept to maintain backward compatibility and should not be used.
   363  func ReplayWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string) error {
   364  	r := NewWorkflowReplayer()
   365  	return r.ReplayWorkflowHistoryFromJSONFile(logger, jsonfileName)
   366  }
   367  
   368  // ReplayPartialWorkflowHistoryFromJSONFile executes a single decision task for the given json history file upto provided
   369  // lastEventID(inclusive).
   370  // Use for testing backwards compatibility of code changes and troubleshooting workflows in a debugger.
   371  // The logger is an optional parameter. Defaults to the noop logger.
   372  // Deprecated: Global workflow replay methods are replaced by equivalent WorkflowReplayer instance methods.
   373  // This method is kept to maintain backward compatibility and should not be used.
   374  func ReplayPartialWorkflowHistoryFromJSONFile(logger *zap.Logger, jsonfileName string, lastEventID int64) error {
   375  	r := NewWorkflowReplayer()
   376  	return r.ReplayPartialWorkflowHistoryFromJSONFile(logger, jsonfileName, lastEventID)
   377  }