github.com/outbrain/consul@v1.4.5/agent/cache/watch_test.go (about)

     1  package cache
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync/atomic"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/stretchr/testify/mock"
    11  	"github.com/stretchr/testify/require"
    12  )
    13  
    14  // Test that a type registered with a periodic refresh can be watched.
    15  func TestCacheNotify(t *testing.T) {
    16  	t.Parallel()
    17  
    18  	typ := TestType(t)
    19  	defer typ.AssertExpectations(t)
    20  	c := TestCache(t)
    21  	c.RegisterType("t", typ, &RegisterOptions{
    22  		Refresh: false,
    23  	})
    24  
    25  	// Setup triggers to control when "updates" should be delivered
    26  	trigger := make([]chan time.Time, 4)
    27  	for i := range trigger {
    28  		trigger[i] = make(chan time.Time)
    29  	}
    30  
    31  	// Configure the type
    32  	typ.Static(FetchResult{Value: 1, Index: 4}, nil).Once().Run(func(args mock.Arguments) {
    33  		// Assert the right request type - all real Fetch implementations do this so
    34  		// it keeps us honest that Watch doesn't require type mangling which will
    35  		// break in real life (hint: it did on the first attempt)
    36  		_, ok := args.Get(1).(*MockRequest)
    37  		require.True(t, ok)
    38  	})
    39  	typ.Static(FetchResult{Value: 12, Index: 5}, nil).Once().WaitUntil(trigger[0])
    40  	typ.Static(FetchResult{Value: 12, Index: 5}, nil).Once().WaitUntil(trigger[1])
    41  	typ.Static(FetchResult{Value: 42, Index: 7}, nil).Once().WaitUntil(trigger[2])
    42  	// It's timing dependent whether the blocking loop manages to make another
    43  	// call before we cancel so don't require it. We need to have a higher index
    44  	// here because if the index is the same then the cache Get will not return
    45  	// until the full 10 min timeout expires. This causes the last fetch to return
    46  	// after cancellation as if it had timed out.
    47  	typ.Static(FetchResult{Value: 42, Index: 8}, nil).WaitUntil(trigger[3])
    48  
    49  	require := require.New(t)
    50  
    51  	ctx, cancel := context.WithCancel(context.Background())
    52  	defer cancel()
    53  
    54  	ch := make(chan UpdateEvent)
    55  
    56  	err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello"}), "test", ch)
    57  	require.NoError(err)
    58  
    59  	// Should receive the first result pretty soon
    60  	TestCacheNotifyChResult(t, ch, UpdateEvent{
    61  		CorrelationID: "test",
    62  		Result:        1,
    63  		Meta:          ResultMeta{Hit: false, Index: 4},
    64  		Err:           nil,
    65  	})
    66  
    67  	// There should be no more updates delivered yet
    68  	require.Len(ch, 0)
    69  
    70  	// Trigger blocking query to return a "change"
    71  	close(trigger[0])
    72  
    73  	// Should receive the next result pretty soon
    74  	TestCacheNotifyChResult(t, ch, UpdateEvent{
    75  		CorrelationID: "test",
    76  		Result:        12,
    77  		// Note these are never cache "hits" because blocking will wait until there
    78  		// is a new value at which point it's not considered a hit.
    79  		Meta: ResultMeta{Hit: false, Index: 5},
    80  		Err:  nil,
    81  	})
    82  
    83  	// Register a second observer using same chan and request. Note that this is
    84  	// testing a few things implicitly:
    85  	//  - that multiple watchers on the same cache entity are de-duped in their
    86  	//    requests to the "backend"
    87  	//  - that multiple watchers can distinguish their results using correlationID
    88  	err = c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello"}), "test2", ch)
    89  	require.NoError(err)
    90  
    91  	// Should get test2 notify immediately, and it should be a cache hit
    92  	TestCacheNotifyChResult(t, ch, UpdateEvent{
    93  		CorrelationID: "test2",
    94  		Result:        12,
    95  		Meta:          ResultMeta{Hit: true, Index: 5},
    96  		Err:           nil,
    97  	})
    98  
    99  	// We could wait for a full timeout but we can't directly observe it so
   100  	// simulate the behavior by triggering a response with the same value and
   101  	// index as the last one.
   102  	close(trigger[1])
   103  
   104  	// We should NOT be notified about that. Note this is timing dependent but
   105  	// it's only a sanity check, if we somehow _do_ get the change delivered later
   106  	// than 10ms the next value assertion will fail anyway.
   107  	time.Sleep(10 * time.Millisecond)
   108  	require.Len(ch, 0)
   109  
   110  	// Trigger final update
   111  	close(trigger[2])
   112  
   113  	TestCacheNotifyChResult(t, ch, UpdateEvent{
   114  		CorrelationID: "test",
   115  		Result:        42,
   116  		Meta:          ResultMeta{Hit: false, Index: 7},
   117  		Err:           nil,
   118  	}, UpdateEvent{
   119  		CorrelationID: "test2",
   120  		Result:        42,
   121  		Meta:          ResultMeta{Hit: false, Index: 7},
   122  		Err:           nil,
   123  	})
   124  
   125  	// Sanity check closing chan before context is canceled doesn't panic
   126  	//close(ch)
   127  
   128  	// Close context
   129  	cancel()
   130  
   131  	// It's likely but not certain that at least one of the watchers was blocked
   132  	// on the next cache Get so trigger that to timeout so we can observe the
   133  	// watch goroutines being cleaned up. This is necessary since currently we
   134  	// have no way to interrupt a blocking query. In practice it's fine to know
   135  	// that after 10 mins max the blocking query will return and the resources
   136  	// will be cleaned.
   137  	close(trigger[3])
   138  
   139  	// I want to test that canceling the context cleans up goroutines (which it
   140  	// does from manual verification with debugger etc). I had a check based on a
   141  	// similar approach to https://golang.org/src/net/http/main_test.go#L60 but it
   142  	// was just too flaky because it relies on the timing of the error backoff
   143  	// timer goroutines and similar so I've given up for now as I have more
   144  	// important things to get working.
   145  }
   146  
   147  func TestCacheNotifyPolling(t *testing.T) {
   148  	t.Parallel()
   149  
   150  	typ := TestTypeNonBlocking(t)
   151  	defer typ.AssertExpectations(t)
   152  	c := TestCache(t)
   153  	c.RegisterType("t", typ, &RegisterOptions{
   154  		Refresh: false,
   155  	})
   156  
   157  	// Configure the type
   158  	typ.Static(FetchResult{Value: 1, Index: 1}, nil).Once().Run(func(args mock.Arguments) {
   159  		// Assert the right request type - all real Fetch implementations do this so
   160  		// it keeps us honest that Watch doesn't require type mangling which will
   161  		// break in real life (hint: it did on the first attempt)
   162  		_, ok := args.Get(1).(*MockRequest)
   163  		require.True(t, ok)
   164  	})
   165  	typ.Static(FetchResult{Value: 12, Index: 1}, nil).Once()
   166  	typ.Static(FetchResult{Value: 42, Index: 1}, nil).Once()
   167  
   168  	require := require.New(t)
   169  
   170  	ctx, cancel := context.WithCancel(context.Background())
   171  	defer cancel()
   172  
   173  	ch := make(chan UpdateEvent)
   174  
   175  	err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello", MaxAge: 100 * time.Millisecond}), "test", ch)
   176  	require.NoError(err)
   177  
   178  	// Should receive the first result pretty soon
   179  	TestCacheNotifyChResult(t, ch, UpdateEvent{
   180  		CorrelationID: "test",
   181  		Result:        1,
   182  		Meta:          ResultMeta{Hit: false, Index: 1},
   183  		Err:           nil,
   184  	})
   185  
   186  	// There should be no more updates delivered yet
   187  	require.Len(ch, 0)
   188  
   189  	// make sure the updates do not come too quickly
   190  	select {
   191  	case <-time.After(50 * time.Millisecond):
   192  	case <-ch:
   193  		require.Fail("Received update too early")
   194  	}
   195  
   196  	// make sure we get the update not too far out.
   197  	select {
   198  	case <-time.After(100 * time.Millisecond):
   199  		require.Fail("Didn't receive the notification")
   200  	case result := <-ch:
   201  		require.Equal(result.Result, 12)
   202  		require.Equal(result.CorrelationID, "test")
   203  		require.Equal(result.Meta.Hit, false)
   204  		require.Equal(result.Meta.Index, uint64(1))
   205  		// pretty conservative check it should be even newer because without a second
   206  		// notifier each value returned will have been executed just then and not served
   207  		// from the cache.
   208  		require.True(result.Meta.Age < 50*time.Millisecond)
   209  		require.NoError(result.Err)
   210  	}
   211  
   212  	require.Len(ch, 0)
   213  
   214  	// Register a second observer using same chan and request. Note that this is
   215  	// testing a few things implicitly:
   216  	//  - that multiple watchers on the same cache entity are de-duped in their
   217  	//    requests to the "backend"
   218  	//  - that multiple watchers can distinguish their results using correlationID
   219  	err = c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello", MaxAge: 100 * time.Millisecond}), "test2", ch)
   220  	require.NoError(err)
   221  
   222  	// Should get test2 notify immediately, and it should be a cache hit
   223  	TestCacheNotifyChResult(t, ch, UpdateEvent{
   224  		CorrelationID: "test2",
   225  		Result:        12,
   226  		Meta:          ResultMeta{Hit: true, Index: 1},
   227  		Err:           nil,
   228  	})
   229  
   230  	require.Len(ch, 0)
   231  
   232  	// wait for the next batch of responses
   233  	events := make([]UpdateEvent, 0)
   234  	// 110 is needed to allow for the jitter
   235  	timeout := time.After(110 * time.Millisecond)
   236  
   237  	for i := 0; i < 2; i++ {
   238  		select {
   239  		case <-timeout:
   240  			require.Fail("UpdateEvent not received in time")
   241  		case eve := <-ch:
   242  			events = append(events, eve)
   243  		}
   244  	}
   245  
   246  	require.Equal(events[0].Result, 42)
   247  	require.Equal(events[0].Meta.Hit, false)
   248  	require.Equal(events[0].Meta.Index, uint64(1))
   249  	require.True(events[0].Meta.Age < 50*time.Millisecond)
   250  	require.NoError(events[0].Err)
   251  	require.Equal(events[1].Result, 42)
   252  	// Sometimes this would be a hit and others not. It all depends on when the various getWithIndex calls got fired.
   253  	// If both are done concurrently then it will not be a cache hit but the request gets single flighted and both
   254  	// get notified at the same time.
   255  	// require.Equal(events[1].Meta.Hit, true)
   256  	require.Equal(events[1].Meta.Index, uint64(1))
   257  	require.True(events[1].Meta.Age < 100*time.Millisecond)
   258  	require.NoError(events[1].Err)
   259  }
   260  
   261  // Test that a refresh performs a backoff.
   262  func TestCacheWatch_ErrorBackoff(t *testing.T) {
   263  	t.Parallel()
   264  
   265  	typ := TestType(t)
   266  	defer typ.AssertExpectations(t)
   267  	c := TestCache(t)
   268  	c.RegisterType("t", typ, &RegisterOptions{
   269  		Refresh: false,
   270  	})
   271  
   272  	// Configure the type
   273  	var retries uint32
   274  	fetchErr := fmt.Errorf("test fetch error")
   275  	typ.Static(FetchResult{Value: 1, Index: 4}, nil).Once()
   276  	typ.Static(FetchResult{Value: nil, Index: 5}, fetchErr).Run(func(args mock.Arguments) {
   277  		atomic.AddUint32(&retries, 1)
   278  	})
   279  
   280  	require := require.New(t)
   281  
   282  	ctx, cancel := context.WithCancel(context.Background())
   283  	defer cancel()
   284  
   285  	ch := make(chan UpdateEvent)
   286  
   287  	err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello"}), "test", ch)
   288  	require.NoError(err)
   289  
   290  	// Should receive the first result pretty soon
   291  	TestCacheNotifyChResult(t, ch, UpdateEvent{
   292  		CorrelationID: "test",
   293  		Result:        1,
   294  		Meta:          ResultMeta{Hit: false, Index: 4},
   295  		Err:           nil,
   296  	})
   297  
   298  	numErrors := 0
   299  	// Loop for a little while and count how many errors we see reported. If this
   300  	// was running as fast as it could go we'd expect this to be huge. We have to
   301  	// be a little careful here because the watch chan ch doesn't have a large
   302  	// buffer so we could be artificially slowing down the loop without the
   303  	// backoff actually taking effect. We can validate that by ensuring this test
   304  	// fails without the backoff code reliably.
   305  	timeoutC := time.After(500 * time.Millisecond)
   306  OUT:
   307  	for {
   308  		select {
   309  		case <-timeoutC:
   310  			break OUT
   311  		case u := <-ch:
   312  			numErrors++
   313  			require.Error(u.Err)
   314  		}
   315  	}
   316  	// Must be fewer than 10 failures in that time
   317  	require.True(numErrors < 10, fmt.Sprintf("numErrors: %d", numErrors))
   318  
   319  	// Check the number of RPCs as a sanity check too
   320  	actual := atomic.LoadUint32(&retries)
   321  	require.True(actual < 10, fmt.Sprintf("actual: %d", actual))
   322  }
   323  
   324  // Test that a refresh performs a backoff.
   325  func TestCacheWatch_ErrorBackoffNonBlocking(t *testing.T) {
   326  	t.Parallel()
   327  
   328  	typ := TestTypeNonBlocking(t)
   329  	defer typ.AssertExpectations(t)
   330  	c := TestCache(t)
   331  	c.RegisterType("t", typ, &RegisterOptions{
   332  		Refresh: false,
   333  	})
   334  
   335  	// Configure the type
   336  	var retries uint32
   337  	fetchErr := fmt.Errorf("test fetch error")
   338  	typ.Static(FetchResult{Value: 1, Index: 4}, nil).Once()
   339  	typ.Static(FetchResult{Value: nil, Index: 5}, fetchErr).Run(func(args mock.Arguments) {
   340  		atomic.AddUint32(&retries, 1)
   341  	})
   342  
   343  	require := require.New(t)
   344  
   345  	ctx, cancel := context.WithCancel(context.Background())
   346  	defer cancel()
   347  
   348  	ch := make(chan UpdateEvent)
   349  
   350  	err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello", MaxAge: 100 * time.Millisecond}), "test", ch)
   351  	require.NoError(err)
   352  
   353  	// Should receive the first result pretty soon
   354  	TestCacheNotifyChResult(t, ch, UpdateEvent{
   355  		CorrelationID: "test",
   356  		Result:        1,
   357  		Meta:          ResultMeta{Hit: false, Index: 4},
   358  		Err:           nil,
   359  	})
   360  
   361  	numErrors := 0
   362  	// Loop for a little while and count how many errors we see reported. If this
   363  	// was running as fast as it could go we'd expect this to be huge. We have to
   364  	// be a little careful here because the watch chan ch doesn't have a large
   365  	// buffer so we could be artificially slowing down the loop without the
   366  	// backoff actually taking effect. We can validate that by ensuring this test
   367  	// fails without the backoff code reliably.
   368  	//
   369  	// 100 + 500 milliseconds. 100 because the first retry will not happen until
   370  	// the 100 + jitter milliseconds have elapsed.
   371  	timeoutC := time.After(600 * time.Millisecond)
   372  OUT:
   373  	for {
   374  		select {
   375  		case <-timeoutC:
   376  			break OUT
   377  		case u := <-ch:
   378  			numErrors++
   379  			require.Error(u.Err)
   380  		}
   381  	}
   382  	// Must be fewer than 10 failures in that time
   383  	require.True(numErrors < 10, fmt.Sprintf("numErrors: %d", numErrors))
   384  
   385  	// Check the number of RPCs as a sanity check too
   386  	actual := atomic.LoadUint32(&retries)
   387  	require.True(actual < 10, fmt.Sprintf("actual: %d", actual))
   388  }