go.uber.org/cadence@v1.2.9/internal/internal_utils.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package internal 22 23 // All code in this file is private to the package. 24 25 import ( 26 "context" 27 "encoding/json" 28 "fmt" 29 "go.uber.org/zap" 30 "os" 31 "os/signal" 32 "strings" 33 "syscall" 34 "time" 35 36 "github.com/pborman/uuid" 37 "github.com/uber-go/tally" 38 "go.uber.org/yarpc" 39 40 s "go.uber.org/cadence/.gen/go/shared" 41 "go.uber.org/cadence/internal/common" 42 "go.uber.org/cadence/internal/common/metrics" 43 ) 44 45 const ( 46 // libraryVersionHeaderName refers to the name of the 47 // tchannel / http header that contains the client 48 // library version 49 libraryVersionHeaderName = "cadence-client-library-version" 50 51 // featureVersionHeaderName refers to the name of the 52 // tchannel / http header that contains the client 53 // feature version 54 featureVersionHeaderName = "cadence-client-feature-version" 55 56 // clientImplHeaderName refers to the name of the 57 // header that contains the client implementation 58 clientImplHeaderName = "cadence-client-name" 59 clientImplHeaderValue = "uber-go" 60 61 clientFeatureFlagsHeaderName = "cadence-client-feature-flags" 62 63 // defaultRPCTimeout is the default tchannel rpc call timeout 64 defaultRPCTimeout = 10 * time.Second 65 //minRPCTimeout is minimum rpc call timeout allowed 66 minRPCTimeout = 1 * time.Second 67 //maxRPCTimeout is maximum rpc call timeout allowed 68 maxRPCTimeout = 5 * time.Second 69 // maxQueryRPCTimeout is the maximum rpc call timeout allowed for query 70 maxQueryRPCTimeout = 20 * time.Second 71 ) 72 73 type ( 74 FeatureFlags struct { 75 WorkflowExecutionAlreadyCompletedErrorEnabled bool 76 PollerAutoScalerEnabled bool 77 } 78 ) 79 80 var ( 81 // call header to cadence server 82 _yarpcCallOptions = []yarpc.CallOption{ 83 yarpc.WithHeader(libraryVersionHeaderName, LibraryVersion), 84 yarpc.WithHeader(featureVersionHeaderName, FeatureVersion), 85 yarpc.WithHeader(clientImplHeaderName, clientImplHeaderValue), 86 } 87 ) 88 89 func fromInternalFeatureFlags(featureFlags FeatureFlags) s.FeatureFlags { 90 // if we are using client-side-only flags in client.FeatureFlags; 91 // don't include them in shared.FeatureFlags and drop them here 92 return s.FeatureFlags{ 93 WorkflowExecutionAlreadyCompletedErrorEnabled: common.BoolPtr(featureFlags.WorkflowExecutionAlreadyCompletedErrorEnabled), 94 } 95 } 96 97 func toInternalFeatureFlags(featureFlags *s.FeatureFlags) FeatureFlags { 98 flags := FeatureFlags{} 99 if featureFlags != nil { 100 if featureFlags.WorkflowExecutionAlreadyCompletedErrorEnabled != nil { 101 flags.WorkflowExecutionAlreadyCompletedErrorEnabled = *featureFlags.WorkflowExecutionAlreadyCompletedErrorEnabled 102 } 103 } 104 return flags 105 } 106 107 func featureFlagsHeader(featureFlags FeatureFlags) string { 108 serialized := "" 109 buf, err := json.Marshal(fromInternalFeatureFlags(featureFlags)) 110 if err == nil { 111 serialized = string(buf) 112 } 113 return serialized 114 } 115 116 func getYarpcCallOptions(featureFlags FeatureFlags) []yarpc.CallOption { 117 return append( 118 _yarpcCallOptions, 119 yarpc.WithHeader(clientFeatureFlagsHeaderName, featureFlagsHeader(featureFlags)), 120 ) 121 } 122 123 // ContextBuilder stores all Channel-specific parameters that will 124 // be stored inside of a context. 125 type contextBuilder struct { 126 // If Timeout is zero, Build will default to defaultTimeout. 127 Timeout time.Duration 128 129 // ParentContext to build the new context from. If empty, context.Background() is used. 130 // The new (child) context inherits a number of properties from the parent context: 131 // - context fields, accessible via `ctx.Value(key)` 132 ParentContext context.Context 133 } 134 135 func (cb *contextBuilder) Build() (context.Context, context.CancelFunc) { 136 parent := cb.ParentContext 137 if parent == nil { 138 parent = context.Background() 139 } 140 return context.WithTimeout(parent, cb.Timeout) 141 } 142 143 // sets the rpc timeout for a context 144 func chanTimeout(timeout time.Duration) func(builder *contextBuilder) { 145 return func(b *contextBuilder) { 146 b.Timeout = timeout 147 } 148 } 149 150 // newChannelContext - Get a rpc channel context for query 151 func newChannelContextForQuery( 152 ctx context.Context, 153 featureFlags FeatureFlags, 154 options ...func(builder *contextBuilder), 155 ) (context.Context, context.CancelFunc, []yarpc.CallOption) { 156 return newChannelContextHelper(ctx, true, featureFlags, options...) 157 } 158 159 // newChannelContext - Get a rpc channel context 160 func newChannelContext( 161 ctx context.Context, 162 featureFlags FeatureFlags, 163 options ...func(builder *contextBuilder), 164 ) (context.Context, context.CancelFunc, []yarpc.CallOption) { 165 return newChannelContextHelper(ctx, false, featureFlags, options...) 166 } 167 168 func newChannelContextHelper( 169 ctx context.Context, 170 isQuery bool, 171 featureFlags FeatureFlags, 172 options ...func(builder *contextBuilder), 173 ) (context.Context, context.CancelFunc, []yarpc.CallOption) { 174 rpcTimeout := defaultRPCTimeout 175 if ctx != nil { 176 // Set rpc timeout less than context timeout to allow for retries when call gets lost 177 now := time.Now() 178 if expiration, ok := ctx.Deadline(); ok && expiration.After(now) { 179 rpcTimeout = expiration.Sub(now) / 2 180 // Make sure to not set rpc timeout lower than minRPCTimeout 181 if rpcTimeout < minRPCTimeout { 182 rpcTimeout = minRPCTimeout 183 } else if rpcTimeout > maxRPCTimeout && !isQuery { 184 rpcTimeout = maxRPCTimeout 185 } else if rpcTimeout > maxQueryRPCTimeout && isQuery { 186 rpcTimeout = maxQueryRPCTimeout 187 } 188 } 189 } 190 builder := &contextBuilder{Timeout: rpcTimeout} 191 if ctx != nil { 192 builder.ParentContext = ctx 193 } 194 for _, opt := range options { 195 opt(builder) 196 } 197 ctx, cancelFn := builder.Build() 198 199 return ctx, cancelFn, getYarpcCallOptions(featureFlags) 200 } 201 202 // GetWorkerIdentity gets a default identity for the worker. 203 // 204 // This contains a random UUID, generated each time it is called, to prevent identity collisions when workers share 205 // other host/pid/etc information. These alone are not guaranteed to be unique, especially when Docker is involved. 206 // Take care to retrieve this only once per worker. 207 func getWorkerIdentity(tasklistName string) string { 208 return fmt.Sprintf("%d@%s@%s@%s", os.Getpid(), getHostName(), tasklistName, uuid.New()) 209 } 210 211 func getHostName() string { 212 hostName, err := os.Hostname() 213 if err != nil { 214 hostName = "UnKnown" 215 } 216 return hostName 217 } 218 219 func getWorkerTaskList(stickyUUID string) string { 220 // includes hostname for debuggability, stickyUUID guarantees the uniqueness 221 return fmt.Sprintf("%s:%s", getHostName(), stickyUUID) 222 } 223 224 // ActivityTypePtr makes a copy and returns the pointer to a ActivityType. 225 func activityTypePtr(v ActivityType) *s.ActivityType { 226 return &s.ActivityType{Name: common.StringPtr(v.Name)} 227 } 228 229 func flowWorkflowTypeFrom(v s.WorkflowType) WorkflowType { 230 return WorkflowType{Name: v.GetName()} 231 } 232 233 // WorkflowTypePtr makes a copy and returns the pointer to a WorkflowType. 234 func workflowTypePtr(t WorkflowType) *s.WorkflowType { 235 return &s.WorkflowType{Name: common.StringPtr(t.Name)} 236 } 237 238 // getErrorDetails gets reason and details. 239 func getErrorDetails(err error, dataConverter DataConverter) (string, []byte) { 240 switch err := err.(type) { 241 case *CustomError: 242 var data []byte 243 var err0 error 244 switch details := err.details.(type) { 245 case ErrorDetailsValues: 246 data, err0 = encodeArgs(dataConverter, details) 247 case *EncodedValues: 248 data = details.values 249 default: 250 panic("unknown error type") 251 } 252 if err0 != nil { 253 panic(err0) 254 } 255 return err.Reason(), data 256 case *CanceledError: 257 var data []byte 258 var err0 error 259 switch details := err.details.(type) { 260 case ErrorDetailsValues: 261 data, err0 = encodeArgs(dataConverter, details) 262 case *EncodedValues: 263 data = details.values 264 default: 265 panic("unknown error type") 266 } 267 if err0 != nil { 268 panic(err0) 269 } 270 return errReasonCanceled, data 271 case *PanicError: 272 data, err0 := encodeArgs(dataConverter, []interface{}{err.Error(), err.StackTrace()}) 273 if err0 != nil { 274 panic(err0) 275 } 276 return errReasonPanic, data 277 case *TimeoutError: 278 var data []byte 279 var err0 error 280 switch details := err.details.(type) { 281 case ErrorDetailsValues: 282 data, err0 = encodeArgs(dataConverter, details) 283 case *EncodedValues: 284 data = details.values 285 default: 286 panic("unknown error type") 287 } 288 if err0 != nil { 289 panic(err0) 290 } 291 return fmt.Sprintf("%v %v", errReasonTimeout, err.timeoutType), data 292 default: 293 // will be convert to GenericError when receiving from server. 294 return errReasonGeneric, []byte(err.Error()) 295 } 296 } 297 298 // constructError construct error from reason and details sending down from server. 299 func constructError(reason string, details []byte, dataConverter DataConverter) error { 300 if strings.HasPrefix(reason, errReasonTimeout) { 301 details := newEncodedValues(details, dataConverter) 302 timeoutType, err := getTimeoutTypeFromErrReason(reason) 303 if err != nil { 304 // prior client version uses details to indicate timeoutType 305 if err := details.Get(&timeoutType); err != nil { 306 panic(err) 307 } 308 return NewTimeoutError(timeoutType) 309 } 310 return NewTimeoutError(timeoutType, details) 311 } 312 313 switch reason { 314 case errReasonPanic: 315 // panic error 316 var msg, st string 317 details := newEncodedValues(details, dataConverter) 318 details.Get(&msg, &st) 319 return newPanicError(msg, st) 320 case errReasonGeneric: 321 // errors created other than using NewCustomError() API. 322 return &GenericError{err: string(details)} 323 case errReasonCanceled: 324 details := newEncodedValues(details, dataConverter) 325 return NewCanceledError(details) 326 default: 327 details := newEncodedValues(details, dataConverter) 328 err := NewCustomError(reason, details) 329 return err 330 } 331 } 332 333 func getKillSignal() <-chan os.Signal { 334 c := make(chan os.Signal, 1) 335 signal.Notify(c, syscall.SIGINT, syscall.SIGTERM) 336 return c 337 } 338 339 // getMetricsScopeForActivity return properly tagged tally scope for activity 340 func getMetricsScopeForActivity(ts *metrics.TaggedScope, workflowType, activityType string) tally.Scope { 341 return ts.GetTaggedScope(tagWorkflowType, workflowType, tagActivityType, activityType) 342 } 343 344 // getMetricsScopeForLocalActivity return properly tagged tally scope for local activity 345 func getMetricsScopeForLocalActivity(ts *metrics.TaggedScope, workflowType, localActivityType string) tally.Scope { 346 return ts.GetTaggedScope(tagWorkflowType, workflowType, tagLocalActivityType, localActivityType) 347 } 348 349 func getTimeoutTypeFromErrReason(reason string) (s.TimeoutType, error) { 350 timeoutTypeStr := reason[strings.Index(reason, " ")+1:] 351 var timeoutType s.TimeoutType 352 if err := timeoutType.UnmarshalText([]byte(timeoutTypeStr)); err != nil { 353 // this happens when the timeout error reason is constructed by an prior constructed by prior client version 354 return 0, err 355 } 356 return timeoutType, nil 357 } 358 359 func estimateHistorySize(logger *zap.Logger, event *s.HistoryEvent) int { 360 sum := historySizeEstimationBuffer 361 switch event.GetEventType() { 362 case s.EventTypeWorkflowExecutionStarted: 363 if event.WorkflowExecutionStartedEventAttributes != nil { 364 sum += len(event.WorkflowExecutionStartedEventAttributes.Input) 365 sum += len(event.WorkflowExecutionStartedEventAttributes.ContinuedFailureDetails) 366 sum += len(event.WorkflowExecutionStartedEventAttributes.LastCompletionResult) 367 sum += sizeOf(event.WorkflowExecutionStartedEventAttributes.Memo.GetFields()) 368 sum += sizeOf(event.WorkflowExecutionStartedEventAttributes.Header.GetFields()) 369 sum += sizeOf(event.WorkflowExecutionStartedEventAttributes.SearchAttributes.GetIndexedFields()) 370 } 371 case s.EventTypeWorkflowExecutionCompleted: 372 if event.WorkflowExecutionCompletedEventAttributes != nil { 373 sum += len(event.WorkflowExecutionCompletedEventAttributes.Result) 374 } 375 case s.EventTypeWorkflowExecutionSignaled: 376 if event.WorkflowExecutionSignaledEventAttributes != nil { 377 sum += len(event.WorkflowExecutionSignaledEventAttributes.Input) 378 } 379 case s.EventTypeWorkflowExecutionFailed: 380 if event.WorkflowExecutionFailedEventAttributes != nil { 381 sum += len(event.WorkflowExecutionFailedEventAttributes.Details) 382 } 383 case s.EventTypeDecisionTaskStarted: 384 if event.DecisionTaskStartedEventAttributes != nil { 385 sum += getLengthOfStringPointer(event.DecisionTaskStartedEventAttributes.Identity) 386 } 387 case s.EventTypeDecisionTaskCompleted: 388 if event.DecisionTaskCompletedEventAttributes != nil { 389 sum += len(event.DecisionTaskCompletedEventAttributes.ExecutionContext) 390 sum += getLengthOfStringPointer(event.DecisionTaskCompletedEventAttributes.Identity) 391 sum += getLengthOfStringPointer(event.DecisionTaskCompletedEventAttributes.BinaryChecksum) 392 } 393 case s.EventTypeDecisionTaskFailed: 394 if event.DecisionTaskFailedEventAttributes != nil { 395 sum += len(event.DecisionTaskFailedEventAttributes.Details) 396 } 397 case s.EventTypeActivityTaskScheduled: 398 if event.ActivityTaskScheduledEventAttributes != nil { 399 sum += len(event.ActivityTaskScheduledEventAttributes.Input) 400 sum += sizeOf(event.ActivityTaskScheduledEventAttributes.Header.GetFields()) 401 } 402 case s.EventTypeActivityTaskStarted: 403 if event.ActivityTaskStartedEventAttributes != nil { 404 sum += len(event.ActivityTaskStartedEventAttributes.LastFailureDetails) 405 } 406 case s.EventTypeActivityTaskCompleted: 407 if event.ActivityTaskCompletedEventAttributes != nil { 408 sum += len(event.ActivityTaskCompletedEventAttributes.Result) 409 sum += getLengthOfStringPointer(event.ActivityTaskCompletedEventAttributes.Identity) 410 } 411 case s.EventTypeActivityTaskFailed: 412 if event.ActivityTaskFailedEventAttributes != nil { 413 sum += len(event.ActivityTaskFailedEventAttributes.Details) 414 } 415 case s.EventTypeActivityTaskTimedOut: 416 if event.ActivityTaskTimedOutEventAttributes != nil { 417 sum += len(event.ActivityTaskTimedOutEventAttributes.Details) 418 sum += len(event.ActivityTaskTimedOutEventAttributes.LastFailureDetails) 419 } 420 case s.EventTypeActivityTaskCanceled: 421 if event.ActivityTaskCanceledEventAttributes != nil { 422 sum += len(event.ActivityTaskCanceledEventAttributes.Details) 423 } 424 case s.EventTypeMarkerRecorded: 425 if event.MarkerRecordedEventAttributes != nil { 426 sum += len(event.MarkerRecordedEventAttributes.Details) 427 } 428 case s.EventTypeWorkflowExecutionTerminated: 429 if event.WorkflowExecutionTerminatedEventAttributes != nil { 430 sum += len(event.WorkflowExecutionTerminatedEventAttributes.Details) 431 } 432 case s.EventTypeWorkflowExecutionCanceled: 433 if event.WorkflowExecutionCanceledEventAttributes != nil { 434 sum += len(event.WorkflowExecutionCanceledEventAttributes.Details) 435 } 436 case s.EventTypeWorkflowExecutionContinuedAsNew: 437 if event.WorkflowExecutionContinuedAsNewEventAttributes != nil { 438 sum += len(event.WorkflowExecutionContinuedAsNewEventAttributes.Input) 439 sum += len(event.WorkflowExecutionContinuedAsNewEventAttributes.FailureDetails) 440 sum += len(event.WorkflowExecutionContinuedAsNewEventAttributes.LastCompletionResult) 441 sum += sizeOf(event.WorkflowExecutionContinuedAsNewEventAttributes.Memo.GetFields()) 442 sum += sizeOf(event.WorkflowExecutionContinuedAsNewEventAttributes.Header.GetFields()) 443 sum += sizeOf(event.WorkflowExecutionContinuedAsNewEventAttributes.SearchAttributes.GetIndexedFields()) 444 } 445 case s.EventTypeStartChildWorkflowExecutionInitiated: 446 if event.StartChildWorkflowExecutionInitiatedEventAttributes != nil { 447 sum += len(event.StartChildWorkflowExecutionInitiatedEventAttributes.Input) 448 sum += len(event.StartChildWorkflowExecutionInitiatedEventAttributes.Control) 449 sum += sizeOf(event.StartChildWorkflowExecutionInitiatedEventAttributes.Memo.GetFields()) 450 sum += sizeOf(event.StartChildWorkflowExecutionInitiatedEventAttributes.Header.GetFields()) 451 sum += sizeOf(event.StartChildWorkflowExecutionInitiatedEventAttributes.SearchAttributes.GetIndexedFields()) 452 } 453 case s.EventTypeChildWorkflowExecutionCompleted: 454 if event.ChildWorkflowExecutionCompletedEventAttributes != nil { 455 sum += len(event.ChildWorkflowExecutionCompletedEventAttributes.Result) 456 } 457 case s.EventTypeChildWorkflowExecutionFailed: 458 if event.ChildWorkflowExecutionFailedEventAttributes != nil { 459 sum += len(event.ChildWorkflowExecutionFailedEventAttributes.Details) 460 sum += getLengthOfStringPointer(event.ChildWorkflowExecutionFailedEventAttributes.Reason) 461 } 462 case s.EventTypeChildWorkflowExecutionCanceled: 463 if event.ChildWorkflowExecutionCanceledEventAttributes != nil { 464 sum += len(event.ChildWorkflowExecutionCanceledEventAttributes.Details) 465 } 466 case s.EventTypeSignalExternalWorkflowExecutionInitiated: 467 if event.SignalExternalWorkflowExecutionInitiatedEventAttributes != nil { 468 sum += len(event.SignalExternalWorkflowExecutionInitiatedEventAttributes.Control) 469 sum += len(event.SignalExternalWorkflowExecutionInitiatedEventAttributes.Input) 470 } 471 default: 472 logger.Debug("unsupported event type for history size estimation", zap.String("Event Type", event.GetEventType().String())) 473 } 474 475 return sum 476 } 477 478 // simple function to estimate the size of a map[string][]byte 479 func sizeOf(o map[string][]byte) int { 480 sum := 0 481 for k, v := range o { 482 sum += len(k) + len(v) 483 } 484 return sum 485 } 486 487 // simple function to estimate the size of a string pointer 488 func getLengthOfStringPointer(s *string) int { 489 if s == nil { 490 return 0 491 } 492 return len(*s) 493 }