go.uber.org/cadence@v1.2.9/internal/internal_utils.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package internal
    22  
    23  // All code in this file is private to the package.
    24  
    25  import (
    26  	"context"
    27  	"encoding/json"
    28  	"fmt"
    29  	"go.uber.org/zap"
    30  	"os"
    31  	"os/signal"
    32  	"strings"
    33  	"syscall"
    34  	"time"
    35  
    36  	"github.com/pborman/uuid"
    37  	"github.com/uber-go/tally"
    38  	"go.uber.org/yarpc"
    39  
    40  	s "go.uber.org/cadence/.gen/go/shared"
    41  	"go.uber.org/cadence/internal/common"
    42  	"go.uber.org/cadence/internal/common/metrics"
    43  )
    44  
    45  const (
    46  	// libraryVersionHeaderName refers to the name of the
    47  	// tchannel / http header that contains the client
    48  	// library version
    49  	libraryVersionHeaderName = "cadence-client-library-version"
    50  
    51  	// featureVersionHeaderName refers to the name of the
    52  	// tchannel / http header that contains the client
    53  	// feature version
    54  	featureVersionHeaderName = "cadence-client-feature-version"
    55  
    56  	// clientImplHeaderName refers to the name of the
    57  	// header that contains the client implementation
    58  	clientImplHeaderName  = "cadence-client-name"
    59  	clientImplHeaderValue = "uber-go"
    60  
    61  	clientFeatureFlagsHeaderName = "cadence-client-feature-flags"
    62  
    63  	// defaultRPCTimeout is the default tchannel rpc call timeout
    64  	defaultRPCTimeout = 10 * time.Second
    65  	//minRPCTimeout is minimum rpc call timeout allowed
    66  	minRPCTimeout = 1 * time.Second
    67  	//maxRPCTimeout is maximum rpc call timeout allowed
    68  	maxRPCTimeout = 5 * time.Second
    69  	// maxQueryRPCTimeout is the maximum rpc call timeout allowed for query
    70  	maxQueryRPCTimeout = 20 * time.Second
    71  )
    72  
    73  type (
    74  	FeatureFlags struct {
    75  		WorkflowExecutionAlreadyCompletedErrorEnabled bool
    76  		PollerAutoScalerEnabled                       bool
    77  	}
    78  )
    79  
    80  var (
    81  	// call header to cadence server
    82  	_yarpcCallOptions = []yarpc.CallOption{
    83  		yarpc.WithHeader(libraryVersionHeaderName, LibraryVersion),
    84  		yarpc.WithHeader(featureVersionHeaderName, FeatureVersion),
    85  		yarpc.WithHeader(clientImplHeaderName, clientImplHeaderValue),
    86  	}
    87  )
    88  
    89  func fromInternalFeatureFlags(featureFlags FeatureFlags) s.FeatureFlags {
    90  	// if we are using client-side-only flags in client.FeatureFlags;
    91  	// don't include them in shared.FeatureFlags and drop them here
    92  	return s.FeatureFlags{
    93  		WorkflowExecutionAlreadyCompletedErrorEnabled: common.BoolPtr(featureFlags.WorkflowExecutionAlreadyCompletedErrorEnabled),
    94  	}
    95  }
    96  
    97  func toInternalFeatureFlags(featureFlags *s.FeatureFlags) FeatureFlags {
    98  	flags := FeatureFlags{}
    99  	if featureFlags != nil {
   100  		if featureFlags.WorkflowExecutionAlreadyCompletedErrorEnabled != nil {
   101  			flags.WorkflowExecutionAlreadyCompletedErrorEnabled = *featureFlags.WorkflowExecutionAlreadyCompletedErrorEnabled
   102  		}
   103  	}
   104  	return flags
   105  }
   106  
   107  func featureFlagsHeader(featureFlags FeatureFlags) string {
   108  	serialized := ""
   109  	buf, err := json.Marshal(fromInternalFeatureFlags(featureFlags))
   110  	if err == nil {
   111  		serialized = string(buf)
   112  	}
   113  	return serialized
   114  }
   115  
   116  func getYarpcCallOptions(featureFlags FeatureFlags) []yarpc.CallOption {
   117  	return append(
   118  		_yarpcCallOptions,
   119  		yarpc.WithHeader(clientFeatureFlagsHeaderName, featureFlagsHeader(featureFlags)),
   120  	)
   121  }
   122  
   123  // ContextBuilder stores all Channel-specific parameters that will
   124  // be stored inside of a context.
   125  type contextBuilder struct {
   126  	// If Timeout is zero, Build will default to defaultTimeout.
   127  	Timeout time.Duration
   128  
   129  	// ParentContext to build the new context from. If empty, context.Background() is used.
   130  	// The new (child) context inherits a number of properties from the parent context:
   131  	//   - context fields, accessible via `ctx.Value(key)`
   132  	ParentContext context.Context
   133  }
   134  
   135  func (cb *contextBuilder) Build() (context.Context, context.CancelFunc) {
   136  	parent := cb.ParentContext
   137  	if parent == nil {
   138  		parent = context.Background()
   139  	}
   140  	return context.WithTimeout(parent, cb.Timeout)
   141  }
   142  
   143  // sets the rpc timeout for a context
   144  func chanTimeout(timeout time.Duration) func(builder *contextBuilder) {
   145  	return func(b *contextBuilder) {
   146  		b.Timeout = timeout
   147  	}
   148  }
   149  
   150  // newChannelContext - Get a rpc channel context for query
   151  func newChannelContextForQuery(
   152  	ctx context.Context,
   153  	featureFlags FeatureFlags,
   154  	options ...func(builder *contextBuilder),
   155  ) (context.Context, context.CancelFunc, []yarpc.CallOption) {
   156  	return newChannelContextHelper(ctx, true, featureFlags, options...)
   157  }
   158  
   159  // newChannelContext - Get a rpc channel context
   160  func newChannelContext(
   161  	ctx context.Context,
   162  	featureFlags FeatureFlags,
   163  	options ...func(builder *contextBuilder),
   164  ) (context.Context, context.CancelFunc, []yarpc.CallOption) {
   165  	return newChannelContextHelper(ctx, false, featureFlags, options...)
   166  }
   167  
   168  func newChannelContextHelper(
   169  	ctx context.Context,
   170  	isQuery bool,
   171  	featureFlags FeatureFlags,
   172  	options ...func(builder *contextBuilder),
   173  ) (context.Context, context.CancelFunc, []yarpc.CallOption) {
   174  	rpcTimeout := defaultRPCTimeout
   175  	if ctx != nil {
   176  		// Set rpc timeout less than context timeout to allow for retries when call gets lost
   177  		now := time.Now()
   178  		if expiration, ok := ctx.Deadline(); ok && expiration.After(now) {
   179  			rpcTimeout = expiration.Sub(now) / 2
   180  			// Make sure to not set rpc timeout lower than minRPCTimeout
   181  			if rpcTimeout < minRPCTimeout {
   182  				rpcTimeout = minRPCTimeout
   183  			} else if rpcTimeout > maxRPCTimeout && !isQuery {
   184  				rpcTimeout = maxRPCTimeout
   185  			} else if rpcTimeout > maxQueryRPCTimeout && isQuery {
   186  				rpcTimeout = maxQueryRPCTimeout
   187  			}
   188  		}
   189  	}
   190  	builder := &contextBuilder{Timeout: rpcTimeout}
   191  	if ctx != nil {
   192  		builder.ParentContext = ctx
   193  	}
   194  	for _, opt := range options {
   195  		opt(builder)
   196  	}
   197  	ctx, cancelFn := builder.Build()
   198  
   199  	return ctx, cancelFn, getYarpcCallOptions(featureFlags)
   200  }
   201  
   202  // GetWorkerIdentity gets a default identity for the worker.
   203  //
   204  // This contains a random UUID, generated each time it is called, to prevent identity collisions when workers share
   205  // other host/pid/etc information.  These alone are not guaranteed to be unique, especially when Docker is involved.
   206  // Take care to retrieve this only once per worker.
   207  func getWorkerIdentity(tasklistName string) string {
   208  	return fmt.Sprintf("%d@%s@%s@%s", os.Getpid(), getHostName(), tasklistName, uuid.New())
   209  }
   210  
   211  func getHostName() string {
   212  	hostName, err := os.Hostname()
   213  	if err != nil {
   214  		hostName = "UnKnown"
   215  	}
   216  	return hostName
   217  }
   218  
   219  func getWorkerTaskList(stickyUUID string) string {
   220  	// includes hostname for debuggability, stickyUUID guarantees the uniqueness
   221  	return fmt.Sprintf("%s:%s", getHostName(), stickyUUID)
   222  }
   223  
   224  // ActivityTypePtr makes a copy and returns the pointer to a ActivityType.
   225  func activityTypePtr(v ActivityType) *s.ActivityType {
   226  	return &s.ActivityType{Name: common.StringPtr(v.Name)}
   227  }
   228  
   229  func flowWorkflowTypeFrom(v s.WorkflowType) WorkflowType {
   230  	return WorkflowType{Name: v.GetName()}
   231  }
   232  
   233  // WorkflowTypePtr makes a copy and returns the pointer to a WorkflowType.
   234  func workflowTypePtr(t WorkflowType) *s.WorkflowType {
   235  	return &s.WorkflowType{Name: common.StringPtr(t.Name)}
   236  }
   237  
   238  // getErrorDetails gets reason and details.
   239  func getErrorDetails(err error, dataConverter DataConverter) (string, []byte) {
   240  	switch err := err.(type) {
   241  	case *CustomError:
   242  		var data []byte
   243  		var err0 error
   244  		switch details := err.details.(type) {
   245  		case ErrorDetailsValues:
   246  			data, err0 = encodeArgs(dataConverter, details)
   247  		case *EncodedValues:
   248  			data = details.values
   249  		default:
   250  			panic("unknown error type")
   251  		}
   252  		if err0 != nil {
   253  			panic(err0)
   254  		}
   255  		return err.Reason(), data
   256  	case *CanceledError:
   257  		var data []byte
   258  		var err0 error
   259  		switch details := err.details.(type) {
   260  		case ErrorDetailsValues:
   261  			data, err0 = encodeArgs(dataConverter, details)
   262  		case *EncodedValues:
   263  			data = details.values
   264  		default:
   265  			panic("unknown error type")
   266  		}
   267  		if err0 != nil {
   268  			panic(err0)
   269  		}
   270  		return errReasonCanceled, data
   271  	case *PanicError:
   272  		data, err0 := encodeArgs(dataConverter, []interface{}{err.Error(), err.StackTrace()})
   273  		if err0 != nil {
   274  			panic(err0)
   275  		}
   276  		return errReasonPanic, data
   277  	case *TimeoutError:
   278  		var data []byte
   279  		var err0 error
   280  		switch details := err.details.(type) {
   281  		case ErrorDetailsValues:
   282  			data, err0 = encodeArgs(dataConverter, details)
   283  		case *EncodedValues:
   284  			data = details.values
   285  		default:
   286  			panic("unknown error type")
   287  		}
   288  		if err0 != nil {
   289  			panic(err0)
   290  		}
   291  		return fmt.Sprintf("%v %v", errReasonTimeout, err.timeoutType), data
   292  	default:
   293  		// will be convert to GenericError when receiving from server.
   294  		return errReasonGeneric, []byte(err.Error())
   295  	}
   296  }
   297  
   298  // constructError construct error from reason and details sending down from server.
   299  func constructError(reason string, details []byte, dataConverter DataConverter) error {
   300  	if strings.HasPrefix(reason, errReasonTimeout) {
   301  		details := newEncodedValues(details, dataConverter)
   302  		timeoutType, err := getTimeoutTypeFromErrReason(reason)
   303  		if err != nil {
   304  			// prior client version uses details to indicate timeoutType
   305  			if err := details.Get(&timeoutType); err != nil {
   306  				panic(err)
   307  			}
   308  			return NewTimeoutError(timeoutType)
   309  		}
   310  		return NewTimeoutError(timeoutType, details)
   311  	}
   312  
   313  	switch reason {
   314  	case errReasonPanic:
   315  		// panic error
   316  		var msg, st string
   317  		details := newEncodedValues(details, dataConverter)
   318  		details.Get(&msg, &st)
   319  		return newPanicError(msg, st)
   320  	case errReasonGeneric:
   321  		// errors created other than using NewCustomError() API.
   322  		return &GenericError{err: string(details)}
   323  	case errReasonCanceled:
   324  		details := newEncodedValues(details, dataConverter)
   325  		return NewCanceledError(details)
   326  	default:
   327  		details := newEncodedValues(details, dataConverter)
   328  		err := NewCustomError(reason, details)
   329  		return err
   330  	}
   331  }
   332  
   333  func getKillSignal() <-chan os.Signal {
   334  	c := make(chan os.Signal, 1)
   335  	signal.Notify(c, syscall.SIGINT, syscall.SIGTERM)
   336  	return c
   337  }
   338  
   339  // getMetricsScopeForActivity return properly tagged tally scope for activity
   340  func getMetricsScopeForActivity(ts *metrics.TaggedScope, workflowType, activityType string) tally.Scope {
   341  	return ts.GetTaggedScope(tagWorkflowType, workflowType, tagActivityType, activityType)
   342  }
   343  
   344  // getMetricsScopeForLocalActivity return properly tagged tally scope for local activity
   345  func getMetricsScopeForLocalActivity(ts *metrics.TaggedScope, workflowType, localActivityType string) tally.Scope {
   346  	return ts.GetTaggedScope(tagWorkflowType, workflowType, tagLocalActivityType, localActivityType)
   347  }
   348  
   349  func getTimeoutTypeFromErrReason(reason string) (s.TimeoutType, error) {
   350  	timeoutTypeStr := reason[strings.Index(reason, " ")+1:]
   351  	var timeoutType s.TimeoutType
   352  	if err := timeoutType.UnmarshalText([]byte(timeoutTypeStr)); err != nil {
   353  		// this happens when the timeout error reason is constructed by an prior constructed by prior client version
   354  		return 0, err
   355  	}
   356  	return timeoutType, nil
   357  }
   358  
   359  func estimateHistorySize(logger *zap.Logger, event *s.HistoryEvent) int {
   360  	sum := historySizeEstimationBuffer
   361  	switch event.GetEventType() {
   362  	case s.EventTypeWorkflowExecutionStarted:
   363  		if event.WorkflowExecutionStartedEventAttributes != nil {
   364  			sum += len(event.WorkflowExecutionStartedEventAttributes.Input)
   365  			sum += len(event.WorkflowExecutionStartedEventAttributes.ContinuedFailureDetails)
   366  			sum += len(event.WorkflowExecutionStartedEventAttributes.LastCompletionResult)
   367  			sum += sizeOf(event.WorkflowExecutionStartedEventAttributes.Memo.GetFields())
   368  			sum += sizeOf(event.WorkflowExecutionStartedEventAttributes.Header.GetFields())
   369  			sum += sizeOf(event.WorkflowExecutionStartedEventAttributes.SearchAttributes.GetIndexedFields())
   370  		}
   371  	case s.EventTypeWorkflowExecutionCompleted:
   372  		if event.WorkflowExecutionCompletedEventAttributes != nil {
   373  			sum += len(event.WorkflowExecutionCompletedEventAttributes.Result)
   374  		}
   375  	case s.EventTypeWorkflowExecutionSignaled:
   376  		if event.WorkflowExecutionSignaledEventAttributes != nil {
   377  			sum += len(event.WorkflowExecutionSignaledEventAttributes.Input)
   378  		}
   379  	case s.EventTypeWorkflowExecutionFailed:
   380  		if event.WorkflowExecutionFailedEventAttributes != nil {
   381  			sum += len(event.WorkflowExecutionFailedEventAttributes.Details)
   382  		}
   383  	case s.EventTypeDecisionTaskStarted:
   384  		if event.DecisionTaskStartedEventAttributes != nil {
   385  			sum += getLengthOfStringPointer(event.DecisionTaskStartedEventAttributes.Identity)
   386  		}
   387  	case s.EventTypeDecisionTaskCompleted:
   388  		if event.DecisionTaskCompletedEventAttributes != nil {
   389  			sum += len(event.DecisionTaskCompletedEventAttributes.ExecutionContext)
   390  			sum += getLengthOfStringPointer(event.DecisionTaskCompletedEventAttributes.Identity)
   391  			sum += getLengthOfStringPointer(event.DecisionTaskCompletedEventAttributes.BinaryChecksum)
   392  		}
   393  	case s.EventTypeDecisionTaskFailed:
   394  		if event.DecisionTaskFailedEventAttributes != nil {
   395  			sum += len(event.DecisionTaskFailedEventAttributes.Details)
   396  		}
   397  	case s.EventTypeActivityTaskScheduled:
   398  		if event.ActivityTaskScheduledEventAttributes != nil {
   399  			sum += len(event.ActivityTaskScheduledEventAttributes.Input)
   400  			sum += sizeOf(event.ActivityTaskScheduledEventAttributes.Header.GetFields())
   401  		}
   402  	case s.EventTypeActivityTaskStarted:
   403  		if event.ActivityTaskStartedEventAttributes != nil {
   404  			sum += len(event.ActivityTaskStartedEventAttributes.LastFailureDetails)
   405  		}
   406  	case s.EventTypeActivityTaskCompleted:
   407  		if event.ActivityTaskCompletedEventAttributes != nil {
   408  			sum += len(event.ActivityTaskCompletedEventAttributes.Result)
   409  			sum += getLengthOfStringPointer(event.ActivityTaskCompletedEventAttributes.Identity)
   410  		}
   411  	case s.EventTypeActivityTaskFailed:
   412  		if event.ActivityTaskFailedEventAttributes != nil {
   413  			sum += len(event.ActivityTaskFailedEventAttributes.Details)
   414  		}
   415  	case s.EventTypeActivityTaskTimedOut:
   416  		if event.ActivityTaskTimedOutEventAttributes != nil {
   417  			sum += len(event.ActivityTaskTimedOutEventAttributes.Details)
   418  			sum += len(event.ActivityTaskTimedOutEventAttributes.LastFailureDetails)
   419  		}
   420  	case s.EventTypeActivityTaskCanceled:
   421  		if event.ActivityTaskCanceledEventAttributes != nil {
   422  			sum += len(event.ActivityTaskCanceledEventAttributes.Details)
   423  		}
   424  	case s.EventTypeMarkerRecorded:
   425  		if event.MarkerRecordedEventAttributes != nil {
   426  			sum += len(event.MarkerRecordedEventAttributes.Details)
   427  		}
   428  	case s.EventTypeWorkflowExecutionTerminated:
   429  		if event.WorkflowExecutionTerminatedEventAttributes != nil {
   430  			sum += len(event.WorkflowExecutionTerminatedEventAttributes.Details)
   431  		}
   432  	case s.EventTypeWorkflowExecutionCanceled:
   433  		if event.WorkflowExecutionCanceledEventAttributes != nil {
   434  			sum += len(event.WorkflowExecutionCanceledEventAttributes.Details)
   435  		}
   436  	case s.EventTypeWorkflowExecutionContinuedAsNew:
   437  		if event.WorkflowExecutionContinuedAsNewEventAttributes != nil {
   438  			sum += len(event.WorkflowExecutionContinuedAsNewEventAttributes.Input)
   439  			sum += len(event.WorkflowExecutionContinuedAsNewEventAttributes.FailureDetails)
   440  			sum += len(event.WorkflowExecutionContinuedAsNewEventAttributes.LastCompletionResult)
   441  			sum += sizeOf(event.WorkflowExecutionContinuedAsNewEventAttributes.Memo.GetFields())
   442  			sum += sizeOf(event.WorkflowExecutionContinuedAsNewEventAttributes.Header.GetFields())
   443  			sum += sizeOf(event.WorkflowExecutionContinuedAsNewEventAttributes.SearchAttributes.GetIndexedFields())
   444  		}
   445  	case s.EventTypeStartChildWorkflowExecutionInitiated:
   446  		if event.StartChildWorkflowExecutionInitiatedEventAttributes != nil {
   447  			sum += len(event.StartChildWorkflowExecutionInitiatedEventAttributes.Input)
   448  			sum += len(event.StartChildWorkflowExecutionInitiatedEventAttributes.Control)
   449  			sum += sizeOf(event.StartChildWorkflowExecutionInitiatedEventAttributes.Memo.GetFields())
   450  			sum += sizeOf(event.StartChildWorkflowExecutionInitiatedEventAttributes.Header.GetFields())
   451  			sum += sizeOf(event.StartChildWorkflowExecutionInitiatedEventAttributes.SearchAttributes.GetIndexedFields())
   452  		}
   453  	case s.EventTypeChildWorkflowExecutionCompleted:
   454  		if event.ChildWorkflowExecutionCompletedEventAttributes != nil {
   455  			sum += len(event.ChildWorkflowExecutionCompletedEventAttributes.Result)
   456  		}
   457  	case s.EventTypeChildWorkflowExecutionFailed:
   458  		if event.ChildWorkflowExecutionFailedEventAttributes != nil {
   459  			sum += len(event.ChildWorkflowExecutionFailedEventAttributes.Details)
   460  			sum += getLengthOfStringPointer(event.ChildWorkflowExecutionFailedEventAttributes.Reason)
   461  		}
   462  	case s.EventTypeChildWorkflowExecutionCanceled:
   463  		if event.ChildWorkflowExecutionCanceledEventAttributes != nil {
   464  			sum += len(event.ChildWorkflowExecutionCanceledEventAttributes.Details)
   465  		}
   466  	case s.EventTypeSignalExternalWorkflowExecutionInitiated:
   467  		if event.SignalExternalWorkflowExecutionInitiatedEventAttributes != nil {
   468  			sum += len(event.SignalExternalWorkflowExecutionInitiatedEventAttributes.Control)
   469  			sum += len(event.SignalExternalWorkflowExecutionInitiatedEventAttributes.Input)
   470  		}
   471  	default:
   472  		logger.Debug("unsupported event type for history size estimation", zap.String("Event Type", event.GetEventType().String()))
   473  	}
   474  
   475  	return sum
   476  }
   477  
   478  // simple function to estimate the size of a map[string][]byte
   479  func sizeOf(o map[string][]byte) int {
   480  	sum := 0
   481  	for k, v := range o {
   482  		sum += len(k) + len(v)
   483  	}
   484  	return sum
   485  }
   486  
   487  // simple function to estimate the size of a string pointer
   488  func getLengthOfStringPointer(s *string) int {
   489  	if s == nil {
   490  		return 0
   491  	}
   492  	return len(*s)
   493  }