go.uber.org/cadence@v1.2.9/internal/context_test.go

go.uber.org/cadence@v1.2.9/internal/context_test.go (about)

     1  // Copyright (c) 2017-2021 Uber Technologies Inc.
     2  // Portions of the Software are attributed to Copyright (c) 2020 Temporal Technologies Inc.
     3  //
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy
     5  // of this software and associated documentation files (the "Software"), to deal
     6  // in the Software without restriction, including without limitation the rights
     7  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     8  // copies of the Software, and to permit persons to whom the Software is
     9  // furnished to do so, subject to the following conditions:
    10  //
    11  // The above copyright notice and this permission notice shall be included in
    12  // all copies or substantial portions of the Software.
    13  //
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    16  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    17  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    18  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    19  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    20  // THE SOFTWARE.
    21  
    22  package internal
    23  
    24  import (
    25  	"context"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/stretchr/testify/assert"
    30  	"github.com/stretchr/testify/mock"
    31  	"github.com/stretchr/testify/require"
    32  )
    33  
    34  func TestContextChildParentCancelRace(t *testing.T) {
    35  	/*
    36  		Testing previous race happened while child and parent cancelling at the same time
    37  		While child is trying to remove itself from the parent, parent tries to iterate
    38  		its children and cancel them at the same time.
    39  	*/
    40  	env := newTestWorkflowEnv(t)
    41  
    42  	wf := func(ctx Context) error {
    43  		parentCtx, parentCancel := WithCancel(ctx)
    44  		defer parentCancel()
    45  
    46  		type cancelerContext struct {
    47  			ctx      Context
    48  			canceler func()
    49  		}
    50  
    51  		children := []cancelerContext{}
    52  		numChildren := 100
    53  
    54  		for i := 0; i < numChildren; i++ {
    55  			c, canceler := WithCancel(parentCtx)
    56  			children = append(children, cancelerContext{
    57  				ctx:      c,
    58  				canceler: canceler,
    59  			})
    60  		}
    61  
    62  		for i := 0; i < numChildren; i++ {
    63  			go children[i].canceler()
    64  			if i == numChildren/2 {
    65  				go parentCancel()
    66  			}
    67  		}
    68  
    69  		return nil
    70  	}
    71  	env.RegisterWorkflow(wf)
    72  	env.ExecuteWorkflow(wf)
    73  	assert.NoError(t, env.GetWorkflowError())
    74  }
    75  
    76  func TestContextConcurrentCancelRace(t *testing.T) {
    77  	/*
    78  		A race condition existed due to concurrently ending goroutines on shutdown (i.e. closing their chan without waiting
    79  		on them to finish shutdown), which executed... quite a lot of non-concurrency-safe code in a concurrent way.  All
    80  		decision-sensitive code is assumed to be run strictly sequentially.
    81  
    82  		Context cancellation was one identified by a customer, and it's fairly easy to test.
    83  		In principle this must be safe to do - contexts are supposed to be concurrency-safe.  Even if ours are not actually
    84  		safe (for valid reasons), our execution model needs to ensure they *act* like it's safe.
    85  	*/
    86  	env := newTestWorkflowEnv(t)
    87  	wf := func(ctx Context) error {
    88  		ctx, cancel := WithCancel(ctx)
    89  		racyCancel := func(ctx Context) {
    90  			defer cancel() // defer is necessary as Sleep will never return due to Goexit
    91  			_ = Sleep(ctx, time.Hour)
    92  		}
    93  		// start a handful to increase odds of a race being detected
    94  		for i := 0; i < 10; i++ {
    95  			Go(ctx, racyCancel)
    96  		}
    97  
    98  		_ = Sleep(ctx, time.Minute) // die early
    99  		return nil
   100  	}
   101  	env.RegisterWorkflow(wf)
   102  	env.ExecuteWorkflow(wf)
   103  	assert.NoError(t, env.GetWorkflowError())
   104  }
   105  
   106  func TestContextAddChildCancelParentRace(t *testing.T) {
   107  	/*
   108  		It's apparently also possible to race on adding children while propagating the cancel to children.
   109  	*/
   110  	env := newTestWorkflowEnv(t)
   111  	wf := func(ctx Context) error {
   112  		ctx, cancel := WithCancel(ctx)
   113  		racyCancel := func(ctx Context) {
   114  			defer cancel() // defer is necessary as Sleep will never return due to Goexit
   115  			defer func() {
   116  				_, ccancel := WithCancel(ctx)
   117  				cancel()
   118  				ccancel()
   119  			}()
   120  			_ = Sleep(ctx, time.Hour)
   121  		}
   122  		// start a handful to increase odds of a race being detected
   123  		for i := 0; i < 10; i++ {
   124  			Go(ctx, racyCancel)
   125  		}
   126  
   127  		_ = Sleep(ctx, time.Minute) // die early
   128  		return nil
   129  	}
   130  	env.RegisterWorkflow(wf)
   131  	env.ExecuteWorkflow(wf)
   132  	assert.NoError(t, env.GetWorkflowError())
   133  }
   134  
   135  func TestContextCancellationOrderDeterminism(t *testing.T) {
   136  	/*
   137  		Previously, child-contexts were stored in a map, preventing deterministic order when propagating cancellation.
   138  		The order of branches being selected in this test was random, both for the first event and in following ones.
   139  
   140  		In principle this should be fine, but it's possible for the effects of cancellation to trigger a selector's
   141  		future-done callback, which currently records the *real-time*-first event as the branch to unblock, rather than
   142  		doing something more safe by design (e.g. choosing based on state when the selector's goroutine is unblocked).
   143  
   144  		Unfortunately, we cannot change the selector's behavior without introducing non-backwards-compatible changes to
   145  		currently-working workflows.
   146  
   147  		So the workaround for now is to maintain child-context order, so they are canceled in a consistent order.
   148  		As this order was not controlled before, and Go does a pretty good job at randomizing map iteration order,
   149  		converting non-determinism to determinism should be strictly no worse for backwards compatibility, and it
   150  		fixes the issue for future executions.
   151  	*/
   152  	check := func(t *testing.T, separateStart, separateSelect bool) {
   153  		env := newTestWorkflowEnv(t)
   154  		act := func(ctx context.Context) error {
   155  			return nil // will be mocked
   156  		}
   157  		wf := func(ctx Context) ([]int, error) {
   158  			ctx, cancel := WithCancel(ctx)
   159  			Go(ctx, func(ctx Context) {
   160  				_ = Sleep(ctx, time.Minute)
   161  				cancel()
   162  			})
   163  
   164  			// start some activities, which will not complete before the timeout cancels them
   165  			ctx = WithActivityOptions(ctx, ActivityOptions{
   166  				TaskList:               "",
   167  				ScheduleToCloseTimeout: time.Hour,
   168  				ScheduleToStartTimeout: time.Hour,
   169  				StartToCloseTimeout:    time.Hour,
   170  			})
   171  			s := NewSelector(ctx)
   172  			var result []int
   173  			for i := 0; i < 10; i++ {
   174  				i := i
   175  				// need a child context, a future alone is not enough as it does not become a child
   176  				cctx, ccancel := WithCancel(ctx)
   177  
   178  				s.AddFuture(ExecuteActivity(cctx, act), func(f Future) {
   179  					ccancel() // TODO: is this necessary to prevent leaks?  if it is, how can we make it not?
   180  					err := f.Get(ctx, nil)
   181  					if err == nil || !IsCanceledError(err) {
   182  						// fail the test, this should not happen - activities must be canceled or it's not valid.
   183  						t.Errorf("activity completion or failure for some reason other than cancel: %v", err)
   184  					}
   185  					result = append(result, i)
   186  				})
   187  
   188  				if separateStart {
   189  					// yield so they are submitted one at a time, in case that matters
   190  					_ = Sleep(ctx, time.Second)
   191  				}
   192  			}
   193  			for i := 0; i < 10; i++ {
   194  				if separateSelect {
   195  					// yield so they are selected one at a time, in case that matters
   196  					_ = Sleep(ctx, time.Second)
   197  				}
   198  				s.Select(ctx)
   199  			}
   200  
   201  			return result, nil
   202  		}
   203  		env.RegisterWorkflow(wf)
   204  		env.RegisterActivity(act)
   205  
   206  		// activities must not complete in time
   207  		env.OnActivity(act, mock.Anything).After(5 * time.Minute).Return(nil)
   208  
   209  		env.ExecuteWorkflow(wf)
   210  		require.NoError(t, env.GetWorkflowError())
   211  		var result []int
   212  		require.NoError(t, env.GetWorkflowResult(&result))
   213  		require.NotEmpty(t, result)
   214  		assert.Equal(t, 0, result[0], "first activity to be created should be the first one canceled")
   215  		assert.Equal(t, []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, result[1:], "other activities should finish in a consistent (but undefined) order")
   216  	}
   217  
   218  	type variant struct {
   219  		name           string
   220  		separateStart  bool
   221  		separateSelect bool
   222  	}
   223  	// all variants expose this behavior, but being a bit more exhaustive in the face
   224  	// of decision-scheduling differences seems good.
   225  	for _, test := range []variant{
   226  		{"many in one decision", false, false},
   227  		{"many started at once, selected slowly", false, true},
   228  		{"started slowly, selected quickly", true, false},
   229  		{"started and selected slowly", true, true},
   230  	} {
   231  		t.Run(test.name, func(t *testing.T) {
   232  			check(t, test.separateStart, test.separateSelect)
   233  		})
   234  	}
   235  }
   236  
   237  func BenchmarkSliceMaintenance(b *testing.B) {
   238  	// all essentially identical
   239  	b.Run("append", func(b *testing.B) {
   240  		data := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
   241  		for i := 0; i < b.N; i++ {
   242  			data = append(data[:5], data[6:]...)
   243  			data = append(data, i) // keep the slice the same size for all iterations
   244  		}
   245  	})
   246  	b.Run("copy", func(b *testing.B) {
   247  		data := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
   248  		for i := 0; i < b.N; i++ {
   249  			copy(data[5:], data[6:])
   250  			data = data[:9]        // trim to actual size, as the last value is now duplicated.  capacity is still 10.
   251  			data = append(data, i) // keep the slice the same size for all iterations
   252  		}
   253  	})
   254  	b.Run("copy explicit capacity", func(b *testing.B) {
   255  		data := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
   256  		for i := 0; i < b.N; i++ {
   257  			copy(data[5:], data[6:])
   258  			data = data[:9:10]     // trim to actual size, as the last value is now duplicated.  explicitly reserve 10 cap.
   259  			data = append(data, i) // keep the slice the same size for all iterations
   260  		}
   261  	})
   262  }