go.uber.org/cadence@v1.2.9/internal/internal_worker.go (about) 1 // Copyright (c) 2017-2020 Uber Technologies Inc. 2 // Portions of the Software are attributed to Copyright (c) 2020 Temporal Technologies Inc. 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is 9 // furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 // THE SOFTWARE. 21 22 package internal 23 24 // All code in this file is private to the package. 25 26 import ( 27 "context" 28 "crypto/md5" 29 "encoding/hex" 30 "errors" 31 "fmt" 32 "io" 33 "os" 34 "reflect" 35 "runtime" 36 "strconv" 37 "strings" 38 "sync" 39 "sync/atomic" 40 "time" 41 42 "go.uber.org/cadence/internal/common/isolationgroup" 43 44 "github.com/opentracing/opentracing-go" 45 "github.com/pborman/uuid" 46 "github.com/uber-go/tally" 47 "go.uber.org/zap" 48 "go.uber.org/zap/zapcore" 49 50 "go.uber.org/cadence/.gen/go/cadence/workflowserviceclient" 51 "go.uber.org/cadence/.gen/go/shared" 52 "go.uber.org/cadence/internal/common/auth" 53 "go.uber.org/cadence/internal/common/backoff" 54 "go.uber.org/cadence/internal/common/metrics" 55 "go.uber.org/cadence/internal/common/util" 56 ) 57 58 var startVersionMetric sync.Once 59 var StopMetrics = make(chan struct{}) 60 61 const ( 62 // Set to 2 pollers for now, can adjust later if needed. The typical RTT (round-trip time) is below 1ms within data 63 // center. And the poll API latency is about 5ms. With 2 poller, we could achieve around 300~400 RPS. 64 defaultConcurrentPollRoutineSize = 2 65 66 defaultMaxConcurrentActivityExecutionSize = 1000 // Large concurrent activity execution size (1k) 67 defaultWorkerActivitiesPerSecond = 100000 // Large activity executions/sec (unlimited) 68 69 defaultMaxConcurrentLocalActivityExecutionSize = 1000 // Large concurrent activity execution size (1k) 70 defaultWorkerLocalActivitiesPerSecond = 100000 // Large activity executions/sec (unlimited) 71 72 defaultTaskListActivitiesPerSecond = 100000.0 // Large activity executions/sec (unlimited) 73 74 defaultMaxConcurrentTaskExecutionSize = 1000 // hardcoded max task execution size. 75 defaultWorkerTaskExecutionRate = 100000 // Large task execution rate (unlimited) 76 77 defaultPollerRate = 1000 78 79 defaultMaxConcurrentSessionExecutionSize = 1000 // Large concurrent session execution size (1k) 80 81 testTagsContextKey = "cadence-testTags" 82 clientVersionTag = "cadence_client_version" 83 clientGauge = "client_version_metric" 84 clientHostTag = "cadence_client_host" 85 ) 86 87 type ( 88 // WorkflowWorker wraps the code for hosting workflow types. 89 // And worker is mapped 1:1 with task list. If the user want's to poll multiple 90 // task list names they might have to manage 'n' workers for 'n' task lists. 91 workflowWorker struct { 92 executionParameters workerExecutionParameters 93 workflowService workflowserviceclient.Interface 94 domain string 95 poller taskPoller // taskPoller to poll and process the tasks. 96 worker *baseWorker 97 localActivityWorker *baseWorker 98 identity string 99 stopC chan struct{} 100 } 101 102 // ActivityWorker wraps the code for hosting activity types. 103 // TODO: Worker doing heartbeating automatically while activity task is running 104 activityWorker struct { 105 executionParameters workerExecutionParameters 106 workflowService workflowserviceclient.Interface 107 domain string 108 poller taskPoller 109 worker *baseWorker 110 identity string 111 stopC chan struct{} 112 } 113 114 // sessionWorker wraps the code for hosting session creation, completion and 115 // activities within a session. The creationWorker polls from a global tasklist, 116 // while the activityWorker polls from a resource specific tasklist. 117 sessionWorker struct { 118 creationWorker *activityWorker 119 activityWorker *activityWorker 120 } 121 122 // Worker overrides. 123 workerOverrides struct { 124 workflowTaskHandler WorkflowTaskHandler 125 activityTaskHandler ActivityTaskHandler 126 useLocallyDispatchedActivityPoller bool 127 } 128 129 // workerExecutionParameters defines worker configure/execution options. 130 workerExecutionParameters struct { 131 WorkerOptions 132 133 // Task list name to poll. 134 TaskList string 135 136 // Context to store user provided key/value pairs 137 UserContext context.Context 138 139 // Context cancel function to cancel user context 140 UserContextCancel context.CancelFunc 141 142 // WorkerStopTimeout is the time delay before hard terminate worker 143 WorkerStopTimeout time.Duration 144 145 // WorkerStopChannel is a read only channel listen on worker close. The worker will close the channel before exit. 146 WorkerStopChannel <-chan struct{} 147 148 // SessionResourceID is a unique identifier of the resource the session will consume 149 SessionResourceID string 150 } 151 ) 152 153 // newWorkflowWorker returns an instance of the workflow worker. 154 func newWorkflowWorker( 155 service workflowserviceclient.Interface, 156 domain string, 157 params workerExecutionParameters, 158 ppMgr pressurePointMgr, 159 registry *registry, 160 ldaTunnel *locallyDispatchedActivityTunnel, 161 ) *workflowWorker { 162 return newWorkflowWorkerInternal(service, domain, params, ppMgr, nil, registry, ldaTunnel) 163 } 164 165 func ensureRequiredParams(params *workerExecutionParameters) { 166 if params.Identity == "" { 167 params.Identity = getWorkerIdentity(params.TaskList) 168 } 169 if params.Logger == nil { 170 // create default logger if user does not supply one. 171 config := zap.NewProductionConfig() 172 // set default time formatter to "2006-01-02T15:04:05.000Z0700" 173 config.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder 174 // config.Level.SetLevel(zapcore.DebugLevel) 175 logger, _ := config.Build() 176 params.Logger = logger 177 params.Logger.Info("No logger configured for cadence worker. Created default one.") 178 } 179 if params.MetricsScope == nil { 180 params.MetricsScope = tally.NoopScope 181 params.Logger.Info("No metrics scope configured for cadence worker. Use NoopScope as default.") 182 } 183 if params.DataConverter == nil { 184 params.DataConverter = getDefaultDataConverter() 185 params.Logger.Info("No DataConverter configured for cadence worker. Use default one.") 186 } 187 if params.UserContext == nil { 188 params.UserContext = context.Background() 189 } 190 } 191 192 // verifyDomainExist does a DescribeDomain operation on the specified domain with backoff/retry 193 // It returns an error, if the server returns an EntityNotExist or BadRequest error 194 // On any other transient error, this method will just return success 195 func verifyDomainExist( 196 client workflowserviceclient.Interface, 197 domain string, 198 logger *zap.Logger, 199 featureFlags FeatureFlags, 200 ) error { 201 ctx := context.Background() 202 descDomainOp := func() error { 203 tchCtx, cancel, opt := newChannelContext(ctx, featureFlags) 204 defer cancel() 205 _, err := client.DescribeDomain(tchCtx, &shared.DescribeDomainRequest{Name: &domain}, opt...) 206 if err != nil { 207 if _, ok := err.(*shared.EntityNotExistsError); ok { 208 logger.Error("domain does not exist", zap.String("domain", domain), zap.Error(err)) 209 return err 210 } 211 if _, ok := err.(*shared.BadRequestError); ok { 212 logger.Error("domain does not exist", zap.String("domain", domain), zap.Error(err)) 213 return err 214 } 215 // on any other error, just return true 216 logger.Warn("unable to verify if domain exist", zap.String("domain", domain), zap.Error(err)) 217 } 218 return nil 219 } 220 221 if len(domain) == 0 { 222 return errors.New("domain cannot be empty") 223 } 224 225 // exponential backoff retry for upto a minute 226 return backoff.Retry(ctx, descDomainOp, createDynamicServiceRetryPolicy(ctx), isServiceTransientError) 227 } 228 229 func newWorkflowWorkerInternal( 230 service workflowserviceclient.Interface, 231 domain string, 232 params workerExecutionParameters, 233 ppMgr pressurePointMgr, 234 overrides *workerOverrides, 235 registry *registry, 236 ldaTunnel *locallyDispatchedActivityTunnel, 237 ) *workflowWorker { 238 workerStopChannel := make(chan struct{}) 239 params.WorkerStopChannel = getReadOnlyChannel(workerStopChannel) 240 // Get a workflow task handler. 241 ensureRequiredParams(¶ms) 242 var taskHandler WorkflowTaskHandler 243 if overrides != nil && overrides.workflowTaskHandler != nil { 244 taskHandler = overrides.workflowTaskHandler 245 } else { 246 taskHandler = newWorkflowTaskHandler(domain, params, ppMgr, registry) 247 } 248 return newWorkflowTaskWorkerInternal(taskHandler, service, domain, params, workerStopChannel, ldaTunnel) 249 } 250 251 func newWorkflowTaskWorkerInternal( 252 taskHandler WorkflowTaskHandler, 253 service workflowserviceclient.Interface, 254 domain string, 255 params workerExecutionParameters, 256 stopC chan struct{}, 257 ldaTunnel *locallyDispatchedActivityTunnel, 258 ) *workflowWorker { 259 ensureRequiredParams(¶ms) 260 poller := newWorkflowTaskPoller( 261 taskHandler, 262 ldaTunnel, 263 service, 264 domain, 265 params, 266 ) 267 worker := newBaseWorker(baseWorkerOptions{ 268 pollerAutoScaler: pollerAutoScalerOptions{ 269 Enabled: params.FeatureFlags.PollerAutoScalerEnabled, 270 InitCount: params.MaxConcurrentDecisionTaskPollers, 271 MinCount: params.MinConcurrentDecisionTaskPollers, 272 MaxCount: params.MaxConcurrentDecisionTaskPollers, 273 Cooldown: params.PollerAutoScalerCooldown, 274 DryRun: params.PollerAutoScalerDryRun, 275 TargetUtilization: params.PollerAutoScalerTargetUtilization, 276 }, 277 pollerCount: params.MaxConcurrentDecisionTaskPollers, 278 pollerRate: defaultPollerRate, 279 maxConcurrentTask: params.MaxConcurrentDecisionTaskExecutionSize, 280 maxTaskPerSecond: params.WorkerDecisionTasksPerSecond, 281 taskWorker: poller, 282 identity: params.Identity, 283 workerType: "DecisionWorker", 284 shutdownTimeout: params.WorkerStopTimeout}, 285 params.Logger, 286 params.MetricsScope, 287 nil, 288 ) 289 290 // laTunnel is the glue that hookup 3 parts 291 laTunnel := newLocalActivityTunnel(params.WorkerStopChannel) 292 293 // 1) workflow handler will send local activity task to laTunnel 294 if handlerImpl, ok := taskHandler.(*workflowTaskHandlerImpl); ok { 295 handlerImpl.laTunnel = laTunnel 296 } 297 298 // 2) local activity task poller will poll from laTunnel, and result will be pushed to laTunnel 299 localActivityTaskPoller := newLocalActivityPoller(params, laTunnel) 300 localActivityWorker := newBaseWorker(baseWorkerOptions{ 301 pollerCount: 1, // 1 poller (from local channel) is enough for local activity 302 maxConcurrentTask: params.MaxConcurrentLocalActivityExecutionSize, 303 maxTaskPerSecond: params.WorkerLocalActivitiesPerSecond, 304 taskWorker: localActivityTaskPoller, 305 identity: params.Identity, 306 workerType: "LocalActivityWorker", 307 shutdownTimeout: params.WorkerStopTimeout}, 308 params.Logger, 309 params.MetricsScope, 310 nil, 311 ) 312 313 // 3) the result pushed to laTunnel will be send as task to workflow worker to process. 314 worker.taskQueueCh = laTunnel.resultCh 315 316 worker.options.host = params.Host 317 localActivityWorker.options.host = params.Host 318 319 return &workflowWorker{ 320 executionParameters: params, 321 workflowService: service, 322 poller: poller, 323 worker: worker, 324 localActivityWorker: localActivityWorker, 325 identity: params.Identity, 326 domain: domain, 327 stopC: stopC, 328 } 329 } 330 331 // Start the worker. 332 func (ww *workflowWorker) Start() error { 333 err := verifyDomainExist(ww.workflowService, ww.domain, ww.worker.logger, ww.executionParameters.FeatureFlags) 334 if err != nil { 335 return err 336 } 337 ww.localActivityWorker.Start() 338 ww.worker.Start() 339 return nil // TODO: propagate error 340 } 341 342 func (ww *workflowWorker) Run() error { 343 err := verifyDomainExist(ww.workflowService, ww.domain, ww.worker.logger, ww.executionParameters.FeatureFlags) 344 if err != nil { 345 return err 346 } 347 ww.localActivityWorker.Start() 348 ww.worker.Run() 349 return nil 350 } 351 352 // Shutdown the worker. 353 func (ww *workflowWorker) Stop() { 354 select { 355 case <-ww.stopC: 356 // channel is already closed 357 default: 358 close(ww.stopC) 359 } 360 // TODO: remove the stop methods in favor of the workerStopChannel 361 ww.localActivityWorker.Stop() 362 ww.worker.Stop() 363 } 364 365 func newSessionWorker(service workflowserviceclient.Interface, 366 domain string, 367 params workerExecutionParameters, 368 overrides *workerOverrides, 369 env *registry, 370 maxConcurrentSessionExecutionSize int, 371 ) *sessionWorker { 372 ensureRequiredParams(¶ms) 373 // For now resourceID is hidden from user so we will always create a unique one for each worker. 374 if params.SessionResourceID == "" { 375 params.SessionResourceID = uuid.New() 376 } 377 sessionEnvironment := newSessionEnvironment(params.SessionResourceID, maxConcurrentSessionExecutionSize) 378 379 creationTasklist := getCreationTasklist(params.TaskList) 380 params.UserContext = context.WithValue(params.UserContext, sessionEnvironmentContextKey, sessionEnvironment) 381 params.TaskList = sessionEnvironment.GetResourceSpecificTasklist() 382 activityWorker := newActivityWorker(service, domain, params, overrides, env, nil) 383 384 params.MaxConcurrentActivityTaskPollers = 1 385 params.TaskList = creationTasklist 386 creationWorker := newActivityWorker(service, domain, params, overrides, env, sessionEnvironment.GetTokenBucket()) 387 388 return &sessionWorker{ 389 creationWorker: creationWorker, 390 activityWorker: activityWorker, 391 } 392 } 393 394 func (sw *sessionWorker) Start() error { 395 err := sw.creationWorker.Start() 396 if err != nil { 397 return err 398 } 399 400 err = sw.activityWorker.Start() 401 if err != nil { 402 sw.creationWorker.Stop() 403 return err 404 } 405 return nil 406 } 407 408 func (sw *sessionWorker) Run() error { 409 err := sw.creationWorker.Start() 410 if err != nil { 411 return err 412 } 413 return sw.activityWorker.Run() 414 } 415 416 func (sw *sessionWorker) Stop() { 417 sw.creationWorker.Stop() 418 sw.activityWorker.Stop() 419 } 420 421 func newActivityWorker( 422 service workflowserviceclient.Interface, 423 domain string, 424 params workerExecutionParameters, 425 overrides *workerOverrides, 426 env *registry, 427 sessionTokenBucket *sessionTokenBucket, 428 ) *activityWorker { 429 workerStopChannel := make(chan struct{}, 1) 430 params.WorkerStopChannel = getReadOnlyChannel(workerStopChannel) 431 ensureRequiredParams(¶ms) 432 workerType := "ActivityWorker" 433 // Get a activity task handler. 434 var taskHandler ActivityTaskHandler 435 if overrides != nil && overrides.activityTaskHandler != nil { 436 taskHandler = overrides.activityTaskHandler 437 } else { 438 taskHandler = newActivityTaskHandler(service, params, env) 439 } 440 // Get an activity task poller. 441 var taskPoller taskPoller 442 if overrides != nil && overrides.useLocallyDispatchedActivityPoller { 443 taskPoller = newLocallyDispatchedActivityTaskPoller(taskHandler, service, domain, params) 444 workerType = "LocallyDispatchedActivityWorker" 445 } else { 446 taskPoller = newActivityTaskPoller( 447 taskHandler, 448 service, 449 domain, 450 params, 451 ) 452 } 453 return newActivityTaskWorker(service, domain, params, sessionTokenBucket, workerStopChannel, taskPoller, workerType) 454 } 455 456 func newActivityTaskWorker( 457 service workflowserviceclient.Interface, 458 domain string, 459 workerParams workerExecutionParameters, 460 sessionTokenBucket *sessionTokenBucket, 461 stopC chan struct{}, 462 poller taskPoller, 463 workerType string, 464 ) (worker *activityWorker) { 465 ensureRequiredParams(&workerParams) 466 base := newBaseWorker( 467 baseWorkerOptions{ 468 pollerAutoScaler: pollerAutoScalerOptions{ 469 Enabled: workerParams.FeatureFlags.PollerAutoScalerEnabled, 470 InitCount: workerParams.MaxConcurrentActivityTaskPollers, 471 MinCount: workerParams.MinConcurrentActivityTaskPollers, 472 MaxCount: workerParams.MaxConcurrentActivityTaskPollers, 473 Cooldown: workerParams.PollerAutoScalerCooldown, 474 DryRun: workerParams.PollerAutoScalerDryRun, 475 TargetUtilization: workerParams.PollerAutoScalerTargetUtilization, 476 }, 477 pollerCount: workerParams.MaxConcurrentActivityTaskPollers, 478 pollerRate: defaultPollerRate, 479 maxConcurrentTask: workerParams.MaxConcurrentActivityExecutionSize, 480 maxTaskPerSecond: workerParams.WorkerActivitiesPerSecond, 481 taskWorker: poller, 482 identity: workerParams.Identity, 483 workerType: workerType, 484 shutdownTimeout: workerParams.WorkerStopTimeout, 485 userContextCancel: workerParams.UserContextCancel}, 486 workerParams.Logger, 487 workerParams.MetricsScope, 488 sessionTokenBucket, 489 ) 490 base.options.host = workerParams.Host 491 492 return &activityWorker{ 493 executionParameters: workerParams, 494 workflowService: service, 495 worker: base, 496 poller: poller, 497 identity: workerParams.Identity, 498 domain: domain, 499 stopC: stopC, 500 } 501 } 502 503 // Start the worker. 504 func (aw *activityWorker) Start() error { 505 err := verifyDomainExist(aw.workflowService, aw.domain, aw.worker.logger, aw.executionParameters.FeatureFlags) 506 if err != nil { 507 return err 508 } 509 aw.worker.Start() 510 return nil // TODO: propagate errors 511 } 512 513 // Run the worker. 514 func (aw *activityWorker) Run() error { 515 err := verifyDomainExist(aw.workflowService, aw.domain, aw.worker.logger, aw.executionParameters.FeatureFlags) 516 if err != nil { 517 return err 518 } 519 aw.worker.Run() 520 return nil 521 } 522 523 // Shutdown the worker. 524 func (aw *activityWorker) Stop() { 525 select { 526 case <-aw.stopC: 527 // channel is already closed 528 default: 529 close(aw.stopC) 530 } 531 aw.worker.Stop() 532 } 533 534 // Validate function parameters. 535 func validateFnFormat(fnType reflect.Type, isWorkflow bool) error { 536 if fnType.Kind() != reflect.Func { 537 return fmt.Errorf("expected a func as input but was %s", fnType.Kind()) 538 } 539 if isWorkflow { 540 if fnType.NumIn() < 1 { 541 return fmt.Errorf( 542 "expected at least one argument of type workflow.Context in function, found %d input arguments", 543 fnType.NumIn(), 544 ) 545 } 546 if !isWorkflowContext(fnType.In(0)) { 547 return fmt.Errorf("expected first argument to be workflow.Context but found %s", fnType.In(0)) 548 } 549 } 550 551 // Return values 552 // We expect either 553 // <result>, error 554 // (or) just error 555 if fnType.NumOut() < 1 || fnType.NumOut() > 2 { 556 return fmt.Errorf( 557 "expected function to return result, error or just error, but found %d return values", fnType.NumOut(), 558 ) 559 } 560 if fnType.NumOut() > 1 && !isValidResultType(fnType.Out(0)) { 561 return fmt.Errorf( 562 "expected function first return value to return valid type but found: %v", fnType.Out(0).Kind(), 563 ) 564 } 565 if !isError(fnType.Out(fnType.NumOut() - 1)) { 566 return fmt.Errorf( 567 "expected function second return value to return error but found %v", fnType.Out(fnType.NumOut()-1).Kind(), 568 ) 569 } 570 return nil 571 } 572 573 // encode multiple arguments(arguments to a function). 574 func encodeArgs(dc DataConverter, args []interface{}) ([]byte, error) { 575 if dc == nil { 576 return getDefaultDataConverter().ToData(args...) 577 } 578 return dc.ToData(args...) 579 } 580 581 // decode multiple arguments(arguments to a function). 582 func decodeArgs(dc DataConverter, fnType reflect.Type, data []byte) (result []reflect.Value, err error) { 583 r, err := decodeArgsToValues(dc, fnType, data) 584 if err != nil { 585 return 586 } 587 for i := 0; i < len(r); i++ { 588 result = append(result, reflect.ValueOf(r[i]).Elem()) 589 } 590 return 591 } 592 593 func decodeArgsToValues(dc DataConverter, fnType reflect.Type, data []byte) (result []interface{}, err error) { 594 if dc == nil { 595 dc = getDefaultDataConverter() 596 } 597 argsLoop: 598 for i := 0; i < fnType.NumIn(); i++ { 599 argT := fnType.In(i) 600 if i == 0 && (isActivityContext(argT) || isWorkflowContext(argT)) { 601 continue argsLoop 602 } 603 arg := reflect.New(argT).Interface() 604 result = append(result, arg) 605 } 606 err = dc.FromData(data, result...) 607 if err != nil { 608 return 609 } 610 return 611 } 612 613 // encode single value(like return parameter). 614 func encodeArg(dc DataConverter, arg interface{}) ([]byte, error) { 615 if dc == nil { 616 return getDefaultDataConverter().ToData(arg) 617 } 618 return dc.ToData(arg) 619 } 620 621 // decode single value(like return parameter). 622 func decodeArg(dc DataConverter, data []byte, to interface{}) error { 623 if dc == nil { 624 return getDefaultDataConverter().FromData(data, to) 625 } 626 return dc.FromData(data, to) 627 } 628 629 func decodeAndAssignValue(dc DataConverter, from interface{}, toValuePtr interface{}) error { 630 if toValuePtr == nil { 631 return nil 632 } 633 if rf := reflect.ValueOf(toValuePtr); rf.Type().Kind() != reflect.Ptr { 634 return errors.New("value parameter provided is not a pointer") 635 } 636 if data, ok := from.([]byte); ok { 637 if err := decodeArg(dc, data, toValuePtr); err != nil { 638 return err 639 } 640 } else if fv := reflect.ValueOf(from); fv.IsValid() { 641 fromType := fv.Type() 642 toType := reflect.TypeOf(toValuePtr).Elem() 643 assignable := fromType.AssignableTo(toType) 644 if !assignable { 645 return fmt.Errorf("%s is not assignable to %s", fromType.Name(), toType.Name()) 646 } 647 reflect.ValueOf(toValuePtr).Elem().Set(fv) 648 } 649 return nil 650 } 651 652 // Wrapper to execute workflow functions. 653 type workflowExecutor struct { 654 workflowType string 655 fn interface{} 656 } 657 658 func (we *workflowExecutor) Execute(ctx Context, input []byte) ([]byte, error) { 659 var args []interface{} 660 dataConverter := getWorkflowEnvOptions(ctx).dataConverter 661 fnType := reflect.TypeOf(we.fn) 662 if fnType.NumIn() == 2 && util.IsTypeByteSlice(fnType.In(1)) { 663 // Do not deserialize input if workflow has a single byte slice argument (besides ctx) 664 args = append(args, input) 665 } else { 666 decoded, err := decodeArgsToValues(dataConverter, fnType, input) 667 if err != nil { 668 return nil, fmt.Errorf( 669 "unable to decode the workflow function input bytes with error: %v, function name: %v", 670 err, we.workflowType) 671 } 672 args = append(args, decoded...) 673 } 674 envInterceptor := getEnvInterceptor(ctx) 675 envInterceptor.fn = we.fn 676 results := envInterceptor.interceptorChainHead.ExecuteWorkflow(ctx, we.workflowType, args...) 677 return serializeResults(we.fn, results, dataConverter) 678 } 679 680 // Wrapper to execute activity functions. 681 type activityExecutor struct { 682 name string 683 fn interface{} 684 options RegisterActivityOptions 685 } 686 687 func (ae *activityExecutor) ActivityType() ActivityType { 688 return ActivityType{Name: ae.name} 689 } 690 691 func (ae *activityExecutor) GetFunction() interface{} { 692 return ae.fn 693 } 694 695 func (ae *activityExecutor) GetOptions() RegisterActivityOptions { 696 return ae.options 697 } 698 699 func (ae *activityExecutor) Execute(ctx context.Context, input []byte) ([]byte, error) { 700 fnType := reflect.TypeOf(ae.fn) 701 var args []reflect.Value 702 dataConverter := getDataConverterFromActivityCtx(ctx) 703 704 // activities optionally might not take context. 705 if fnType.NumIn() > 0 && isActivityContext(fnType.In(0)) { 706 args = append(args, reflect.ValueOf(ctx)) 707 } 708 709 if fnType.NumIn() == 1 && util.IsTypeByteSlice(fnType.In(0)) { 710 args = append(args, reflect.ValueOf(input)) 711 } else { 712 decoded, err := decodeArgs(dataConverter, fnType, input) 713 if err != nil { 714 return nil, fmt.Errorf( 715 "unable to decode the activity function input bytes with error: %v for function name: %v", 716 err, ae.name) 717 } 718 args = append(args, decoded...) 719 } 720 721 fnValue := reflect.ValueOf(ae.fn) 722 retValues := fnValue.Call(args) 723 return validateFunctionAndGetResults(ae.fn, retValues, dataConverter) 724 } 725 726 func (ae *activityExecutor) ExecuteWithActualArgs(ctx context.Context, actualArgs []interface{}) ([]byte, error) { 727 retValues := ae.executeWithActualArgsWithoutParseResult(ctx, actualArgs) 728 dataConverter := getDataConverterFromActivityCtx(ctx) 729 730 return validateFunctionAndGetResults(ae.fn, retValues, dataConverter) 731 } 732 733 func (ae *activityExecutor) executeWithActualArgsWithoutParseResult(ctx context.Context, actualArgs []interface{}) []reflect.Value { 734 fnType := reflect.TypeOf(ae.fn) 735 args := []reflect.Value{} 736 737 // activities optionally might not take context. 738 argsOffeset := 0 739 if fnType.NumIn() > 0 && isActivityContext(fnType.In(0)) { 740 args = append(args, reflect.ValueOf(ctx)) 741 argsOffeset = 1 742 } 743 744 for i, arg := range actualArgs { 745 if arg == nil { 746 args = append(args, reflect.New(fnType.In(i+argsOffeset)).Elem()) 747 } else { 748 args = append(args, reflect.ValueOf(arg)) 749 } 750 } 751 752 fnValue := reflect.ValueOf(ae.fn) 753 retValues := fnValue.Call(args) 754 return retValues 755 } 756 757 func getDataConverterFromActivityCtx(ctx context.Context) DataConverter { 758 if ctx == nil || ctx.Value(activityEnvContextKey) == nil { 759 return getDefaultDataConverter() 760 } 761 info := ctx.Value(activityEnvContextKey).(*activityEnvironment) 762 if info.dataConverter == nil { 763 return getDefaultDataConverter() 764 } 765 return info.dataConverter 766 } 767 768 // aggregatedWorker combines management of both workflowWorker and activityWorker worker lifecycle. 769 type aggregatedWorker struct { 770 workflowWorker *workflowWorker 771 activityWorker *activityWorker 772 locallyDispatchedActivityWorker *activityWorker 773 sessionWorker *sessionWorker 774 shadowWorker *shadowWorker 775 logger *zap.Logger 776 registry *registry 777 } 778 779 func (aw *aggregatedWorker) RegisterWorkflow(w interface{}) { 780 aw.registry.RegisterWorkflow(w) 781 } 782 783 func (aw *aggregatedWorker) RegisterWorkflowWithOptions(w interface{}, options RegisterWorkflowOptions) { 784 aw.registry.RegisterWorkflowWithOptions(w, options) 785 } 786 787 func (aw *aggregatedWorker) RegisterActivity(a interface{}) { 788 aw.registry.RegisterActivity(a) 789 } 790 791 func (aw *aggregatedWorker) RegisterActivityWithOptions(a interface{}, options RegisterActivityOptions) { 792 aw.registry.RegisterActivityWithOptions(a, options) 793 } 794 795 func (aw *aggregatedWorker) Start() error { 796 if _, err := initBinaryChecksum(); err != nil { 797 return fmt.Errorf("failed to get executable checksum: %v", err) 798 } 799 800 if aw.workflowWorker != nil { 801 if len(aw.registry.GetRegisteredWorkflowTypes()) == 0 { 802 aw.logger.Info( 803 "Worker has no workflows registered, so workflow worker will not be started.", 804 ) 805 } else { 806 if err := aw.workflowWorker.Start(); err != nil { 807 return err 808 } 809 } 810 aw.logger.Info("Started Workflow Worker") 811 } 812 if aw.activityWorker != nil { 813 if len(aw.registry.getRegisteredActivities()) == 0 { 814 aw.logger.Info( 815 "Worker has no activities registered, so activity worker will not be started.", 816 ) 817 } else { 818 if err := aw.activityWorker.Start(); err != nil { 819 // stop workflow worker. 820 if aw.workflowWorker != nil { 821 aw.workflowWorker.Stop() 822 } 823 return err 824 } 825 if aw.locallyDispatchedActivityWorker != nil { 826 if err := aw.locallyDispatchedActivityWorker.Start(); err != nil { 827 // stop workflow worker. 828 if aw.workflowWorker != nil { 829 aw.workflowWorker.Stop() 830 } 831 aw.activityWorker.Stop() 832 return err 833 } 834 } 835 aw.logger.Info("Started Activity Worker") 836 } 837 } 838 839 if aw.sessionWorker != nil { 840 if err := aw.sessionWorker.Start(); err != nil { 841 // stop workflow worker and activity worker. 842 if aw.workflowWorker != nil { 843 aw.workflowWorker.Stop() 844 } 845 if aw.activityWorker != nil { 846 aw.activityWorker.Stop() 847 } 848 if aw.locallyDispatchedActivityWorker != nil { 849 aw.locallyDispatchedActivityWorker.Stop() 850 } 851 return err 852 } 853 aw.logger.Info("Started Session Worker") 854 } 855 856 if aw.shadowWorker != nil { 857 if err := aw.shadowWorker.Start(); err != nil { 858 if aw.workflowWorker != nil { 859 aw.workflowWorker.Stop() 860 } 861 if aw.activityWorker != nil { 862 aw.activityWorker.Stop() 863 } 864 if aw.locallyDispatchedActivityWorker != nil { 865 aw.locallyDispatchedActivityWorker.Stop() 866 } 867 if aw.sessionWorker != nil { 868 aw.sessionWorker.Stop() 869 } 870 return err 871 } 872 aw.logger.Info("Started Shadow Worker") 873 } 874 875 return nil 876 } 877 878 var binaryChecksum atomic.Value 879 var binaryChecksumLock sync.Mutex 880 881 // SetBinaryChecksum set binary checksum 882 func SetBinaryChecksum(checksum string) { 883 binaryChecksum.Store(checksum) 884 } 885 886 func initBinaryChecksum() (string, error) { 887 // initBinaryChecksum may be called multiple times concurrently during worker startup. 888 // To avoid reading and hashing the contents of the binary multiple times acquire mutex here. 889 binaryChecksumLock.Lock() 890 defer binaryChecksumLock.Unlock() 891 892 // check if binaryChecksum already set/initialized. 893 if bcsVal, ok := binaryChecksum.Load().(string); ok { 894 return bcsVal, nil 895 } 896 897 exec, err := os.Executable() 898 if err != nil { 899 return "", err 900 } 901 902 f, err := os.Open(exec) 903 if err != nil { 904 return "", err 905 } 906 defer func() { 907 _ = f.Close() // error is unimportant as it is read-only 908 }() 909 910 h := md5.New() 911 if _, err := io.Copy(h, f); err != nil { 912 return "", err 913 } 914 915 checksum := h.Sum(nil) 916 bcsVal := hex.EncodeToString(checksum[:]) 917 binaryChecksum.Store(bcsVal) 918 919 return bcsVal, err 920 } 921 922 func getBinaryChecksum() string { 923 bcsVal, ok := binaryChecksum.Load().(string) 924 if ok { 925 return bcsVal 926 } 927 928 bcsVal, err := initBinaryChecksum() 929 if err != nil { 930 panic(err) 931 } 932 933 return bcsVal 934 } 935 936 func (aw *aggregatedWorker) Run() error { 937 if err := aw.Start(); err != nil { 938 return err 939 } 940 d := <-getKillSignal() 941 aw.logger.Info("Worker has been killed", zap.String("Signal", d.String())) 942 aw.Stop() 943 return nil 944 } 945 946 func (aw *aggregatedWorker) Stop() { 947 if aw.workflowWorker != nil { 948 aw.workflowWorker.Stop() 949 } 950 if aw.activityWorker != nil { 951 aw.activityWorker.Stop() 952 } 953 if aw.locallyDispatchedActivityWorker != nil { 954 aw.locallyDispatchedActivityWorker.Stop() 955 } 956 if aw.sessionWorker != nil { 957 aw.sessionWorker.Stop() 958 } 959 if aw.shadowWorker != nil { 960 aw.shadowWorker.Stop() 961 } 962 aw.logger.Info("Stopped Worker") 963 } 964 965 // AggregatedWorker returns an instance to manage the workers. Use defaultConcurrentPollRoutineSize (which is 2) as 966 // poller size. The typical RTT (round-trip time) is below 1ms within data center. And the poll API latency is about 5ms. 967 // With 2 poller, we could achieve around 300~400 RPS. 968 func newAggregatedWorker( 969 service workflowserviceclient.Interface, 970 domain string, 971 taskList string, 972 options WorkerOptions, 973 ) (worker *aggregatedWorker) { 974 wOptions := AugmentWorkerOptions(options) 975 ctx := wOptions.BackgroundActivityContext 976 if ctx == nil { 977 ctx = context.Background() 978 } 979 backgroundActivityContext, backgroundActivityContextCancel := context.WithCancel(ctx) 980 981 workerParams := workerExecutionParameters{ 982 WorkerOptions: wOptions, 983 TaskList: taskList, 984 UserContext: backgroundActivityContext, 985 UserContextCancel: backgroundActivityContextCancel, 986 } 987 988 ensureRequiredParams(&workerParams) 989 workerParams.MetricsScope = tagScope(workerParams.MetricsScope, tagDomain, domain, tagTaskList, taskList, clientImplHeaderName, clientImplHeaderValue) 990 workerParams.Logger = workerParams.Logger.With( 991 zapcore.Field{Key: tagDomain, Type: zapcore.StringType, String: domain}, 992 zapcore.Field{Key: tagTaskList, Type: zapcore.StringType, String: taskList}, 993 zapcore.Field{Key: tagWorkerID, Type: zapcore.StringType, String: workerParams.Identity}, 994 ) 995 logger := workerParams.Logger 996 if options.Authorization != nil { 997 service = auth.NewWorkflowServiceWrapper(service, options.Authorization) 998 } 999 if options.IsolationGroup != "" { 1000 service = isolationgroup.NewWorkflowServiceWrapper(service, options.IsolationGroup) 1001 } 1002 service = metrics.NewWorkflowServiceWrapper(service, workerParams.MetricsScope) 1003 processTestTags(&wOptions, &workerParams) 1004 1005 // worker specific registry 1006 registry := newRegistry() 1007 1008 // ldaTunnel is a one way tunnel to dispatch activity tasks from workflow poller to activity poller 1009 var ldaTunnel *locallyDispatchedActivityTunnel 1010 1011 // activity types. 1012 var activityWorker, locallyDispatchedActivityWorker *activityWorker 1013 1014 if !wOptions.DisableActivityWorker { 1015 activityWorker = newActivityWorker( 1016 service, 1017 domain, 1018 workerParams, 1019 nil, 1020 registry, 1021 nil, 1022 ) 1023 1024 // do not dispatch locally if TaskListActivitiesPerSecond is set 1025 if workerParams.TaskListActivitiesPerSecond == defaultTaskListActivitiesPerSecond { 1026 // TODO update taskPoller interface so one activity worker can multiplex on multiple pollers 1027 locallyDispatchedActivityWorker = newActivityWorker( 1028 service, 1029 domain, 1030 workerParams, 1031 &workerOverrides{useLocallyDispatchedActivityPoller: true}, 1032 registry, 1033 nil, 1034 ) 1035 ldaTunnel = locallyDispatchedActivityWorker.poller.(*locallyDispatchedActivityTaskPoller).ldaTunnel 1036 ldaTunnel.metricsScope = metrics.NewTaggedScope(workerParams.MetricsScope) 1037 } 1038 } 1039 1040 // workflow factory. 1041 var workflowWorker *workflowWorker 1042 if !wOptions.DisableWorkflowWorker { 1043 testTags := getTestTags(wOptions.BackgroundActivityContext) 1044 if len(testTags) > 0 { 1045 workflowWorker = newWorkflowWorkerWithPressurePoints( 1046 service, 1047 domain, 1048 workerParams, 1049 testTags, 1050 registry, 1051 ) 1052 } else { 1053 workflowWorker = newWorkflowWorker( 1054 service, 1055 domain, 1056 workerParams, 1057 nil, 1058 registry, 1059 ldaTunnel, 1060 ) 1061 } 1062 1063 } 1064 1065 var sessionWorker *sessionWorker 1066 if wOptions.EnableSessionWorker { 1067 sessionWorker = newSessionWorker( 1068 service, 1069 domain, 1070 workerParams, 1071 nil, 1072 registry, 1073 wOptions.MaxConcurrentSessionExecutionSize, 1074 ) 1075 registry.RegisterActivityWithOptions(sessionCreationActivity, RegisterActivityOptions{ 1076 Name: sessionCreationActivityName, 1077 }) 1078 registry.RegisterActivityWithOptions(sessionCompletionActivity, RegisterActivityOptions{ 1079 Name: sessionCompletionActivityName, 1080 }) 1081 1082 } 1083 1084 var shadowWorker *shadowWorker 1085 if wOptions.EnableShadowWorker { 1086 shadowWorker = newShadowWorker( 1087 service, 1088 domain, 1089 wOptions.ShadowOptions, 1090 workerParams, 1091 registry, 1092 ) 1093 } 1094 1095 return &aggregatedWorker{ 1096 workflowWorker: workflowWorker, 1097 activityWorker: activityWorker, 1098 locallyDispatchedActivityWorker: locallyDispatchedActivityWorker, 1099 sessionWorker: sessionWorker, 1100 shadowWorker: shadowWorker, 1101 logger: logger, 1102 registry: registry, 1103 } 1104 } 1105 1106 // tagScope with one or multiple tags, like 1107 // tagScope(scope, tag1, val1, tag2, val2) 1108 func tagScope(metricsScope tally.Scope, keyValueinPairs ...string) tally.Scope { 1109 if metricsScope == nil { 1110 metricsScope = tally.NoopScope 1111 } 1112 if len(keyValueinPairs)%2 != 0 { 1113 panic("tagScope key value are not in pairs") 1114 } 1115 tagsMap := map[string]string{} 1116 for i := 0; i < len(keyValueinPairs); i += 2 { 1117 tagsMap[keyValueinPairs[i]] = keyValueinPairs[i+1] 1118 } 1119 return metricsScope.Tagged(tagsMap) 1120 } 1121 1122 func processTestTags(wOptions *WorkerOptions, ep *workerExecutionParameters) { 1123 testTags := getTestTags(wOptions.BackgroundActivityContext) 1124 if testTags != nil { 1125 if paramsOverride, ok := testTags[workerOptionsConfig]; ok { 1126 for key, val := range paramsOverride { 1127 switch key { 1128 case workerOptionsConfigConcurrentPollRoutineSize: 1129 if size, err := strconv.Atoi(val); err == nil { 1130 ep.MaxConcurrentActivityTaskPollers = size 1131 ep.MaxConcurrentDecisionTaskPollers = size 1132 } 1133 } 1134 } 1135 } 1136 } 1137 } 1138 1139 func isWorkflowContext(inType reflect.Type) bool { 1140 // NOTE: We don't expect any one to derive from workflow context. 1141 return inType == reflect.TypeOf((*Context)(nil)).Elem() 1142 } 1143 1144 func isValidResultType(inType reflect.Type) bool { 1145 // https://golang.org/pkg/reflect/#Kind 1146 switch inType.Kind() { 1147 case reflect.Func, reflect.Chan, reflect.UnsafePointer: 1148 return false 1149 } 1150 1151 return true 1152 } 1153 1154 func isError(inType reflect.Type) bool { 1155 errorElem := reflect.TypeOf((*error)(nil)).Elem() 1156 return inType != nil && inType.Implements(errorElem) 1157 } 1158 1159 func getFunctionName(i interface{}) string { 1160 if fullName, ok := i.(string); ok { 1161 return fullName 1162 } 1163 fullName := runtime.FuncForPC(reflect.ValueOf(i).Pointer()).Name() 1164 // This allows to call activities by method pointer 1165 // Compiler adds -fm suffix to a function name which has a receiver 1166 // Note that this works even if struct pointer used to get the function is nil 1167 // It is possible because nil receivers are allowed. 1168 // For example: 1169 // var a *Activities 1170 // ExecuteActivity(ctx, a.Foo) 1171 // will call this function which is going to return "Foo" 1172 return strings.TrimSuffix(fullName, "-fm") 1173 } 1174 1175 func getActivityFunctionName(r *registry, i interface{}) string { 1176 result := getFunctionName(i) 1177 if alias, ok := r.getActivityAlias(result); ok { 1178 result = alias 1179 } 1180 return result 1181 } 1182 1183 func getWorkflowFunctionName(r *registry, i interface{}) string { 1184 result := getFunctionName(i) 1185 if alias, ok := r.getWorkflowAlias(result); ok { 1186 result = alias 1187 } 1188 return result 1189 } 1190 1191 func getReadOnlyChannel(c chan struct{}) <-chan struct{} { 1192 return c 1193 } 1194 1195 func AugmentWorkerOptions(options WorkerOptions) WorkerOptions { 1196 if options.MaxConcurrentActivityExecutionSize == 0 { 1197 options.MaxConcurrentActivityExecutionSize = defaultMaxConcurrentActivityExecutionSize 1198 } 1199 if options.WorkerActivitiesPerSecond == 0 { 1200 options.WorkerActivitiesPerSecond = defaultWorkerActivitiesPerSecond 1201 } 1202 if options.MaxConcurrentActivityTaskPollers <= 0 { 1203 options.MaxConcurrentActivityTaskPollers = defaultConcurrentPollRoutineSize 1204 } 1205 if options.MaxConcurrentDecisionTaskExecutionSize == 0 { 1206 options.MaxConcurrentDecisionTaskExecutionSize = defaultMaxConcurrentTaskExecutionSize 1207 } 1208 if options.WorkerDecisionTasksPerSecond == 0 { 1209 options.WorkerDecisionTasksPerSecond = defaultWorkerTaskExecutionRate 1210 } 1211 if options.MaxConcurrentDecisionTaskPollers <= 0 { 1212 options.MaxConcurrentDecisionTaskPollers = defaultConcurrentPollRoutineSize 1213 } 1214 if options.MaxConcurrentLocalActivityExecutionSize == 0 { 1215 options.MaxConcurrentLocalActivityExecutionSize = defaultMaxConcurrentLocalActivityExecutionSize 1216 } 1217 if options.WorkerLocalActivitiesPerSecond == 0 { 1218 options.WorkerLocalActivitiesPerSecond = defaultWorkerLocalActivitiesPerSecond 1219 } 1220 if options.TaskListActivitiesPerSecond == 0 { 1221 options.TaskListActivitiesPerSecond = defaultTaskListActivitiesPerSecond 1222 } 1223 if options.StickyScheduleToStartTimeout.Seconds() == 0 { 1224 options.StickyScheduleToStartTimeout = stickyDecisionScheduleToStartTimeoutSeconds * time.Second 1225 } 1226 if options.DataConverter == nil { 1227 options.DataConverter = getDefaultDataConverter() 1228 } 1229 if options.MaxConcurrentSessionExecutionSize == 0 { 1230 options.MaxConcurrentSessionExecutionSize = defaultMaxConcurrentSessionExecutionSize 1231 } 1232 if options.MinConcurrentActivityTaskPollers == 0 { 1233 options.MinConcurrentActivityTaskPollers = defaultMinConcurrentPollerSize 1234 } 1235 if options.MinConcurrentDecisionTaskPollers == 0 { 1236 options.MinConcurrentDecisionTaskPollers = defaultMinConcurrentPollerSize 1237 } 1238 if options.PollerAutoScalerCooldown == 0 { 1239 options.PollerAutoScalerCooldown = defaultPollerAutoScalerCooldown 1240 } 1241 if options.PollerAutoScalerTargetUtilization == 0 { 1242 options.PollerAutoScalerTargetUtilization = defaultPollerAutoScalerTargetUtilization 1243 } 1244 1245 // if the user passes in a tracer then add a tracing context propagator 1246 if options.Tracer != nil { 1247 options.ContextPropagators = append(options.ContextPropagators, NewTracingContextPropagator(options.Logger, options.Tracer)) 1248 } else { 1249 options.Tracer = opentracing.NoopTracer{} 1250 } 1251 1252 if options.EnableShadowWorker { 1253 options.DisableActivityWorker = true 1254 options.DisableWorkflowWorker = true 1255 options.EnableSessionWorker = false 1256 } 1257 1258 return options 1259 } 1260 1261 // getTestTags returns the test tags in the context. 1262 func getTestTags(ctx context.Context) map[string]map[string]string { 1263 if ctx != nil { 1264 env := ctx.Value(testTagsContextKey) 1265 if env != nil { 1266 return env.(map[string]map[string]string) 1267 } 1268 } 1269 return nil 1270 } 1271 1272 // StartVersionMetrics starts emitting version metrics 1273 func StartVersionMetrics(metricsScope tally.Scope) { 1274 startVersionMetric.Do(func() { 1275 go func() { 1276 ticker := time.NewTicker(time.Minute) 1277 versionTags := map[string]string{clientVersionTag: LibraryVersion} 1278 for { 1279 select { 1280 case <-StopMetrics: 1281 return 1282 case <-ticker.C: 1283 metricsScope.Tagged(versionTags).Gauge(clientGauge).Update(1) 1284 } 1285 } 1286 }() 1287 }) 1288 }