github.com/m3db/m3@v1.5.0/src/cluster/etcd/watchmanager/manager_test.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package watchmanager
    22  
    23  import (
    24  	"fmt"
    25  	"runtime"
    26  	"sync/atomic"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/stretchr/testify/assert"
    31  	"github.com/stretchr/testify/require"
    32  	"github.com/uber-go/tally"
    33  	clientv3 "go.etcd.io/etcd/client/v3"
    34  	"go.etcd.io/etcd/tests/v3/framework/integration"
    35  	"golang.org/x/net/context"
    36  
    37  	"github.com/m3db/m3/src/x/clock"
    38  )
    39  
    40  func TestWatchChan(t *testing.T) {
    41  	wh, ecluster, _, _, _, closer := testCluster(t)
    42  	defer closer()
    43  
    44  	ec := ecluster.RandClient()
    45  	integration.WaitClientV3(t, ec)
    46  
    47  	wc, _, err := wh.watchChanWithTimeout("foo", 0)
    48  	require.NoError(t, err)
    49  	require.Equal(t, 0, len(wc))
    50  
    51  	_, err = ec.Put(context.Background(), "foo", "v")
    52  	require.NoError(t, err)
    53  
    54  	select {
    55  	case <-wc:
    56  	case <-time.After(time.Second):
    57  		require.Fail(t, "could not get notification")
    58  	}
    59  
    60  	ecluster.Members[0].Stop(t)
    61  
    62  	before := time.Now()
    63  	_, _, err = wh.watchChanWithTimeout("foo", 0)
    64  	require.WithinDuration(t, time.Now(), before, 150*time.Millisecond)
    65  	require.Error(t, err)
    66  	require.NoError(t, ecluster.Members[0].Restart(t))
    67  }
    68  
    69  func TestWatchSimple(t *testing.T) {
    70  	wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t)
    71  	defer closer()
    72  	integration.WaitClientV3(t, ec)
    73  	require.Equal(t, int32(0), atomic.LoadInt32(updateCalled))
    74  
    75  	go wh.Watch("foo")
    76  
    77  	time.Sleep(3 * wh.opts.WatchChanInitTimeout())
    78  
    79  	lastRead := atomic.LoadInt32(updateCalled)
    80  	_, err := ec.Put(context.Background(), "foo", "v")
    81  	require.NoError(t, err)
    82  
    83  	for {
    84  		if atomic.LoadInt32(updateCalled) >= lastRead+1 {
    85  			break
    86  		}
    87  		time.Sleep(10 * time.Millisecond)
    88  	}
    89  
    90  	lastRead = atomic.LoadInt32(updateCalled)
    91  	_, err = ec.Put(context.Background(), "foo", "v")
    92  	require.NoError(t, err)
    93  
    94  	for {
    95  		if atomic.LoadInt32(updateCalled) >= lastRead+1 {
    96  			break
    97  		}
    98  		time.Sleep(10 * time.Millisecond)
    99  	}
   100  
   101  	// trigger CheckAndStop
   102  	atomic.AddInt32(shouldStop, 1)
   103  	<-doneCh
   104  
   105  	lastRead = atomic.LoadInt32(updateCalled)
   106  	_, err = ec.Put(context.Background(), "foo", "v")
   107  	require.NoError(t, err)
   108  	// put no longer triggers anything
   109  	require.Equal(t, lastRead, atomic.LoadInt32(updateCalled))
   110  
   111  	// sleep enough time and make sure nothing happens
   112  	time.Sleep(3 * wh.opts.WatchChanCheckInterval())
   113  
   114  	require.Equal(t, lastRead, atomic.LoadInt32(updateCalled))
   115  }
   116  
   117  func TestWatchRecreate(t *testing.T) {
   118  	wh, ecluster, updateCalled, shouldStop, doneCh, closer := testCluster(t)
   119  	defer closer()
   120  
   121  	ec := ecluster.RandClient()
   122  	integration.WaitClientV3(t, ec)
   123  
   124  	failTotal := 1
   125  	wh.opts = wh.opts.
   126  		SetClient(ec).
   127  		SetWatchChanInitTimeout(50 * time.Millisecond).
   128  		SetWatchChanResetInterval(50 * time.Millisecond)
   129  
   130  	go func() {
   131  		ecluster.Members[0].Bridge().DropConnections()
   132  		ecluster.Members[0].Bridge().Blackhole()
   133  		wh.Watch("foo")
   134  	}()
   135  
   136  	time.Sleep(4 * wh.opts.WatchChanInitTimeout())
   137  
   138  	// watch will error out but updateFn will be tried
   139  	for i := 0; i < 100; i++ {
   140  		if atomic.LoadInt32(updateCalled) >= int32(failTotal) {
   141  			break
   142  		}
   143  		time.Sleep(10 * time.Millisecond)
   144  	}
   145  
   146  	ecluster.Members[0].Bridge().Unblackhole()
   147  	// now we have retried failTotal times, give enough time for reset to happen
   148  	time.Sleep(3 * (wh.opts.WatchChanResetInterval()))
   149  
   150  	updatesBefore := atomic.LoadInt32(updateCalled)
   151  	// there should be a valid watch now, trigger a notification
   152  	_, err := ec.Put(context.Background(), "foo", "v")
   153  	require.NoError(t, err)
   154  
   155  	for i := 0; i < 100; i++ {
   156  		if atomic.LoadInt32(updateCalled) > updatesBefore {
   157  			break
   158  		}
   159  		time.Sleep(10 * time.Millisecond)
   160  	}
   161  
   162  	// clean up the background go routine
   163  	atomic.AddInt32(shouldStop, 1)
   164  	<-doneCh
   165  }
   166  
   167  func TestWatchNoLeader(t *testing.T) {
   168  	t.Skip("flaky, started to fail very consistently on CI")
   169  	const (
   170  		watchInitAndRetryDelay = 200 * time.Millisecond
   171  		watchCheckInterval     = 50 * time.Millisecond
   172  	)
   173  
   174  	integration.BeforeTestExternal(t)
   175  	ecluster := integration.NewCluster(t, &integration.ClusterConfig{Size: 3})
   176  	defer ecluster.Terminate(t)
   177  
   178  	var (
   179  		ec              = ecluster.Client(0)
   180  		tickDuration    = 10 * time.Millisecond
   181  		electionTimeout = time.Duration(3*ecluster.Members[0].ElectionTicks) * tickDuration
   182  		doneCh          = make(chan struct{}, 1)
   183  		eventLog        = []*clientv3.Event{}
   184  		updateCalled    int32
   185  		shouldStop      int32
   186  	)
   187  
   188  	opts := NewOptions().
   189  		SetClient(ec).
   190  		SetUpdateFn(
   191  			func(_ string, e []*clientv3.Event) error {
   192  				atomic.AddInt32(&updateCalled, 1)
   193  				if len(e) > 0 {
   194  					eventLog = append(eventLog, e...)
   195  				}
   196  				return nil
   197  			},
   198  		).
   199  		SetTickAndStopFn(
   200  			func(string) bool {
   201  				if atomic.LoadInt32(&shouldStop) == 0 {
   202  					return false
   203  				}
   204  
   205  				close(doneCh)
   206  
   207  				return true
   208  			},
   209  		).
   210  		SetWatchChanInitTimeout(watchInitAndRetryDelay).
   211  		SetWatchChanResetInterval(watchInitAndRetryDelay).
   212  		SetWatchChanCheckInterval(watchCheckInterval)
   213  
   214  	integration.WaitClientV3(t, ec)
   215  
   216  	wh, err := NewWatchManager(opts)
   217  	require.NoError(t, err)
   218  
   219  	go wh.Watch("foo")
   220  
   221  	runtime.Gosched()
   222  	time.Sleep(10 * time.Millisecond)
   223  
   224  	// there should be a valid watch now, trigger a notification
   225  	_, err = ec.Put(context.Background(), "foo", "bar")
   226  	require.NoError(t, err)
   227  
   228  	leaderIdx := ecluster.WaitLeader(t)
   229  	require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Members), "got invalid leader")
   230  
   231  	// simulate quorum loss
   232  	ecluster.Members[1].Stop(t)
   233  	ecluster.Members[2].Stop(t)
   234  
   235  	// wait for election timeout, then member[0] will not have a leader.
   236  	time.Sleep(electionTimeout)
   237  
   238  	require.NoError(t, ecluster.Members[1].Restart(t))
   239  	require.NoError(t, ecluster.Members[2].Restart(t))
   240  
   241  	// wait for leader + election delay just in case
   242  	time.Sleep(time.Duration(3*ecluster.Members[0].ElectionTicks) * tickDuration)
   243  
   244  	leaderIdx = ecluster.WaitLeader(t)
   245  	require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Members), "got invalid leader")
   246  	integration.WaitClientV3(t, ec) // wait for client to be ready again
   247  
   248  	_, err = ec.Put(context.Background(), "foo", "baz")
   249  	require.NoError(t, err)
   250  
   251  	// give some time for watch to be updated
   252  	require.True(t, clock.WaitUntil(func() bool {
   253  		return atomic.LoadInt32(&updateCalled) >= 2
   254  	}, 10*time.Second))
   255  
   256  	updates := atomic.LoadInt32(&updateCalled)
   257  	if updates < 2 {
   258  		require.Fail(t,
   259  			"insufficient update calls",
   260  			"expected at least 2 update attempts, got %d during a partition",
   261  			updates)
   262  	}
   263  
   264  	atomic.AddInt32(&shouldStop, 1)
   265  	<-doneCh
   266  
   267  	require.Len(t, eventLog, 2)
   268  	require.NotNil(t, eventLog[0])
   269  	require.Equal(t, eventLog[0].Kv.Key, []byte("foo"))
   270  	require.Equal(t, eventLog[0].Kv.Value, []byte("bar"))
   271  	require.NotNil(t, eventLog[1])
   272  	require.Equal(t, eventLog[1].Kv.Key, []byte("foo"))
   273  	require.Equal(t, eventLog[1].Kv.Value, []byte("baz"))
   274  }
   275  
   276  func TestWatchCompactedRevision(t *testing.T) {
   277  	wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t)
   278  	defer closer()
   279  
   280  	integration.WaitClientV3(t, ec)
   281  
   282  	ts := tally.NewTestScope("", nil)
   283  	errC := ts.Counter("errors")
   284  	wh.m.etcdWatchError = errC
   285  
   286  	var compactRev int64
   287  	for i := 1; i <= 10; i++ {
   288  		resp, err := ec.Put(context.Background(), "foo", fmt.Sprintf("bar-%d", i))
   289  		require.NoError(t, err)
   290  		compactRev = resp.Header.Revision
   291  	}
   292  
   293  	_, err := ec.Compact(context.Background(), compactRev)
   294  	require.NoError(t, err)
   295  
   296  	wh.opts = wh.opts.SetWatchOptions([]clientv3.OpOption{
   297  		clientv3.WithCreatedNotify(),
   298  		clientv3.WithRev(1),
   299  	})
   300  
   301  	go wh.Watch("foo")
   302  
   303  	require.True(t, clock.WaitUntil(func() bool {
   304  		return atomic.LoadInt32(updateCalled) == 3
   305  	}, 30*time.Second))
   306  
   307  	lastRead := atomic.LoadInt32(updateCalled)
   308  	ec.Put(context.Background(), "foo", "bar-11")
   309  
   310  	for atomic.LoadInt32(updateCalled) <= lastRead {
   311  		time.Sleep(10 * time.Millisecond)
   312  	}
   313  
   314  	errN := ts.Snapshot().Counters()["errors+"].Value()
   315  	assert.Equal(t, int64(1), errN, "expected to encounter watch error")
   316  
   317  	atomic.AddInt32(shouldStop, 1)
   318  	<-doneCh
   319  }
   320  
   321  func testCluster(t *testing.T) (
   322  	*manager,
   323  	*integration.Cluster,
   324  	*int32,
   325  	*int32,
   326  	chan struct{},
   327  	func(),
   328  ) {
   329  	integration.BeforeTestExternal(t)
   330  	ecluster := integration.NewCluster(t, &integration.ClusterConfig{
   331  		Size:      1,
   332  		UseBridge: true,
   333  	})
   334  
   335  	closer := func() {
   336  		ecluster.Terminate(t)
   337  	}
   338  
   339  	var (
   340  		updateCalled int32
   341  		shouldStop   int32
   342  	)
   343  	doneCh := make(chan struct{}, 1)
   344  	opts := NewOptions().
   345  		SetClient(ecluster.RandClient()).
   346  		SetUpdateFn(func(string, []*clientv3.Event) error {
   347  			atomic.AddInt32(&updateCalled, 1)
   348  			return nil
   349  		}).
   350  		SetTickAndStopFn(func(string) bool {
   351  			if atomic.LoadInt32(&shouldStop) == 0 {
   352  				return false
   353  			}
   354  
   355  			close(doneCh)
   356  
   357  			return true
   358  		}).
   359  		SetWatchChanCheckInterval(100 * time.Millisecond).
   360  		SetWatchChanInitTimeout(100 * time.Millisecond).
   361  		SetWatchChanResetInterval(100 * time.Millisecond)
   362  
   363  	wh, err := NewWatchManager(opts)
   364  	require.NoError(t, err)
   365  
   366  	return wh.(*manager), ecluster, &updateCalled, &shouldStop, doneCh, closer
   367  }
   368  
   369  func testSetup(t *testing.T) (*manager, *clientv3.Client, *int32, *int32, chan struct{}, func()) {
   370  	wh, ecluster, updateCalled, shouldStop, donech, closer := testCluster(t)
   371  	return wh, ecluster.RandClient(), updateCalled, shouldStop, donech, closer
   372  }