github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cluster/etcd/watchmanager/manager_test.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package watchmanager
    22  
    23  import (
    24  	"fmt"
    25  	"sync/atomic"
    26  	"testing"
    27  	"time"
    28  
    29  	integration "github.com/m3db/m3/src/integration/resources/docker/dockerexternal/etcdintegration"
    30  	"github.com/stretchr/testify/assert"
    31  	"github.com/stretchr/testify/require"
    32  	"github.com/uber-go/tally"
    33  	clientv3 "go.etcd.io/etcd/client/v3"
    34  	"golang.org/x/net/context"
    35  
    36  	"github.com/m3db/m3/src/x/clock"
    37  )
    38  
    39  func TestWatchChan(t *testing.T) {
    40  	wh, ecluster, _, _, _, closer := testCluster(t)
    41  	defer closer()
    42  
    43  	ec := ecluster.RandClient()
    44  	integration.WaitClientV3(t, ec)
    45  
    46  	wc, _, err := wh.watchChanWithTimeout("foo", 0)
    47  	require.NoError(t, err)
    48  	require.Equal(t, 0, len(wc))
    49  
    50  	_, err = ec.Put(context.Background(), "foo", "v")
    51  	require.NoError(t, err)
    52  
    53  	select {
    54  	case <-wc:
    55  	case <-time.After(time.Second):
    56  		require.Fail(t, "could not get notification")
    57  	}
    58  
    59  	ecluster.Members[0].Stop(t)
    60  
    61  	before := time.Now()
    62  	_, _, err = wh.watchChanWithTimeout("foo", 0)
    63  	require.WithinDuration(t, time.Now(), before, 150*time.Millisecond)
    64  	require.Error(t, err)
    65  	require.NoError(t, ecluster.Members[0].Restart(t))
    66  }
    67  
    68  func TestWatchSimple(t *testing.T) {
    69  	wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t)
    70  	defer closer()
    71  	integration.WaitClientV3(t, ec)
    72  	require.Equal(t, int32(0), atomic.LoadInt32(updateCalled))
    73  
    74  	go wh.Watch("foo")
    75  
    76  	time.Sleep(3 * wh.opts.WatchChanInitTimeout())
    77  
    78  	lastRead := atomic.LoadInt32(updateCalled)
    79  	_, err := ec.Put(context.Background(), "foo", "v")
    80  	require.NoError(t, err)
    81  
    82  	for {
    83  		if atomic.LoadInt32(updateCalled) >= lastRead+1 {
    84  			break
    85  		}
    86  		time.Sleep(10 * time.Millisecond)
    87  	}
    88  
    89  	lastRead = atomic.LoadInt32(updateCalled)
    90  	_, err = ec.Put(context.Background(), "foo", "v")
    91  	require.NoError(t, err)
    92  
    93  	for {
    94  		if atomic.LoadInt32(updateCalled) >= lastRead+1 {
    95  			break
    96  		}
    97  		time.Sleep(10 * time.Millisecond)
    98  	}
    99  
   100  	// trigger CheckAndStop
   101  	atomic.AddInt32(shouldStop, 1)
   102  	<-doneCh
   103  
   104  	lastRead = atomic.LoadInt32(updateCalled)
   105  	_, err = ec.Put(context.Background(), "foo", "v")
   106  	require.NoError(t, err)
   107  	// put no longer triggers anything
   108  	require.Equal(t, lastRead, atomic.LoadInt32(updateCalled))
   109  
   110  	// sleep enough time and make sure nothing happens
   111  	time.Sleep(3 * wh.opts.WatchChanCheckInterval())
   112  
   113  	require.Equal(t, lastRead, atomic.LoadInt32(updateCalled))
   114  }
   115  
   116  func TestWatchRecreate(t *testing.T) {
   117  	wh, ecluster, updateCalled, shouldStop, doneCh, closer := testCluster(t)
   118  	defer closer()
   119  
   120  	ec := ecluster.RandClient()
   121  	integration.WaitClientV3(t, ec)
   122  
   123  	failTotal := 1
   124  	wh.opts = wh.opts.
   125  		SetClient(ec).
   126  		SetWatchChanInitTimeout(50 * time.Millisecond).
   127  		SetWatchChanResetInterval(50 * time.Millisecond)
   128  
   129  	go func() {
   130  		ecluster.Members[0].Bridge().DropConnections()
   131  		ecluster.Members[0].Bridge().Blackhole()
   132  		wh.Watch("foo")
   133  	}()
   134  
   135  	time.Sleep(4 * wh.opts.WatchChanInitTimeout())
   136  
   137  	// watch will error out but updateFn will be tried
   138  	for i := 0; i < 100; i++ {
   139  		if atomic.LoadInt32(updateCalled) >= int32(failTotal) {
   140  			break
   141  		}
   142  		time.Sleep(10 * time.Millisecond)
   143  	}
   144  
   145  	ecluster.Members[0].Bridge().Unblackhole()
   146  	// now we have retried failTotal times, give enough time for reset to happen
   147  	time.Sleep(3 * (wh.opts.WatchChanResetInterval()))
   148  
   149  	updatesBefore := atomic.LoadInt32(updateCalled)
   150  	// there should be a valid watch now, trigger a notification
   151  	_, err := ec.Put(context.Background(), "foo", "v")
   152  	require.NoError(t, err)
   153  
   154  	for i := 0; i < 100; i++ {
   155  		if atomic.LoadInt32(updateCalled) > updatesBefore {
   156  			break
   157  		}
   158  		time.Sleep(10 * time.Millisecond)
   159  	}
   160  
   161  	// clean up the background go routine
   162  	atomic.AddInt32(shouldStop, 1)
   163  	<-doneCh
   164  }
   165  
   166  // TODO: this test has been skipped for a while, and now doesn't work with the docker based etcd integration package.
   167  // Revive it if it's useful, and make it no longer flake.
   168  //nolint:gocritic
   169  //func TestWatchNoLeader(t *testing.T) {
   170  //	t.Skip("flaky, started to fail very consistently on CI")
   171  //	const (
   172  //		watchInitAndRetryDelay = 200 * time.Millisecond
   173  //		watchCheckInterval     = 50 * time.Millisecond
   174  //	)
   175  //
   176  //	integration.BeforeTestExternal(t)
   177  //	ecluster := integration.NewCluster(t, &integration.ClusterConfig{Size: 3})
   178  //	defer ecluster.Terminate(t)
   179  //
   180  //	var (
   181  //		ec              = ecluster.Client(0)
   182  //		tickDuration    = 10 * time.Millisecond
   183  //		electionTimeout = time.Duration(3*ecluster.Address[0].ElectionTicks) * tickDuration
   184  //		doneCh          = make(chan struct{}, 1)
   185  //		eventLog        = []*clientv3.Event{}
   186  //		updateCalled    int32
   187  //		shouldStop      int32
   188  //	)
   189  //
   190  //	opts := NewOptions().
   191  //		SetClient(ec).
   192  //		SetUpdateFn(
   193  //			func(_ string, e []*clientv3.Event) error {
   194  //				atomic.AddInt32(&updateCalled, 1)
   195  //				if len(e) > 0 {
   196  //					eventLog = append(eventLog, e...)
   197  //				}
   198  //				return nil
   199  //			},
   200  //		).
   201  //		SetTickAndStopFn(
   202  //			func(string) bool {
   203  //				if atomic.LoadInt32(&shouldStop) == 0 {
   204  //					return false
   205  //				}
   206  //
   207  //				close(doneCh)
   208  //
   209  //				return true
   210  //			},
   211  //		).
   212  //		SetWatchChanInitTimeout(watchInitAndRetryDelay).
   213  //		SetWatchChanResetInterval(watchInitAndRetryDelay).
   214  //		SetWatchChanCheckInterval(watchCheckInterval)
   215  //
   216  //	integration.WaitClientV3(t, ec)
   217  //
   218  //	wh, err := NewWatchManager(opts)
   219  //	require.NoError(t, err)
   220  //
   221  //	go wh.Watch("foo")
   222  //
   223  //	runtime.Gosched()
   224  //	time.Sleep(10 * time.Millisecond)
   225  //
   226  //	// there should be a valid watch now, trigger a notification
   227  //	_, err = ec.Put(context.Background(), "foo", "bar")
   228  //	require.NoError(t, err)
   229  //
   230  //	leaderIdx := ecluster.WaitLeader(t)
   231  //	require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Address), "got invalid leader")
   232  //
   233  //	// simulate quorum loss
   234  //	ecluster.Address[1].Stop(t)
   235  //	ecluster.Address[2].Stop(t)
   236  //
   237  //	// wait for election timeout, then member[0] will not have a leader.
   238  //	time.Sleep(electionTimeout)
   239  //
   240  //	require.NoError(t, ecluster.Address[1].Restart(t))
   241  //	require.NoError(t, ecluster.Address[2].Restart(t))
   242  //
   243  //	// wait for leader + election delay just in case
   244  //	time.Sleep(time.Duration(3*ecluster.Address[0].ElectionTicks) * tickDuration)
   245  //
   246  //	leaderIdx = ecluster.WaitLeader(t)
   247  //	require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Address), "got invalid leader")
   248  //	integration.WaitClientV3(t, ec) // wait for client to be ready again
   249  //
   250  //	_, err = ec.Put(context.Background(), "foo", "baz")
   251  //	require.NoError(t, err)
   252  //
   253  //	// give some time for watch to be updated
   254  //	require.True(t, clock.WaitUntil(func() bool {
   255  //		return atomic.LoadInt32(&updateCalled) >= 2
   256  //	}, 10*time.Second))
   257  //
   258  //	updates := atomic.LoadInt32(&updateCalled)
   259  //	if updates < 2 {
   260  //		require.Fail(t,
   261  //			"insufficient update calls",
   262  //			"expected at least 2 update attempts, got %d during a partition",
   263  //			updates)
   264  //	}
   265  //
   266  //	atomic.AddInt32(&shouldStop, 1)
   267  //	<-doneCh
   268  //
   269  //	require.Len(t, eventLog, 2)
   270  //	require.NotNil(t, eventLog[0])
   271  //	require.Equal(t, eventLog[0].Kv.Key, []byte("foo"))
   272  //	require.Equal(t, eventLog[0].Kv.Value, []byte("bar"))
   273  //	require.NotNil(t, eventLog[1])
   274  //	require.Equal(t, eventLog[1].Kv.Key, []byte("foo"))
   275  //	require.Equal(t, eventLog[1].Kv.Value, []byte("baz"))
   276  //}
   277  
   278  func TestWatchCompactedRevision(t *testing.T) {
   279  	wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t)
   280  	defer closer()
   281  
   282  	integration.WaitClientV3(t, ec)
   283  
   284  	ts := tally.NewTestScope("", nil)
   285  	errC := ts.Counter("errors")
   286  	wh.m.etcdWatchError = errC
   287  
   288  	var compactRev int64
   289  	for i := 1; i <= 10; i++ {
   290  		resp, err := ec.Put(context.Background(), "foo", fmt.Sprintf("bar-%d", i))
   291  		require.NoError(t, err)
   292  		compactRev = resp.Header.Revision
   293  	}
   294  
   295  	_, err := ec.Compact(context.Background(), compactRev)
   296  	require.NoError(t, err)
   297  
   298  	wh.opts = wh.opts.SetWatchOptions([]clientv3.OpOption{
   299  		clientv3.WithCreatedNotify(),
   300  		clientv3.WithRev(1),
   301  	})
   302  
   303  	go wh.Watch("foo")
   304  
   305  	require.True(t, clock.WaitUntil(func() bool {
   306  		return atomic.LoadInt32(updateCalled) == 3
   307  	}, 30*time.Second))
   308  
   309  	lastRead := atomic.LoadInt32(updateCalled)
   310  	ec.Put(context.Background(), "foo", "bar-11")
   311  
   312  	for atomic.LoadInt32(updateCalled) <= lastRead {
   313  		time.Sleep(10 * time.Millisecond)
   314  	}
   315  
   316  	errN := ts.Snapshot().Counters()["errors+"].Value()
   317  	assert.Equal(t, int64(1), errN, "expected to encounter watch error")
   318  
   319  	atomic.AddInt32(shouldStop, 1)
   320  	<-doneCh
   321  }
   322  
   323  func testCluster(t *testing.T) (
   324  	*manager,
   325  	*integration.Cluster,
   326  	*int32,
   327  	*int32,
   328  	chan struct{},
   329  	func(),
   330  ) {
   331  	integration.BeforeTestExternal(t)
   332  	ecluster := integration.NewCluster(t, &integration.ClusterConfig{
   333  		Size:      1,
   334  		UseBridge: true,
   335  	})
   336  
   337  	closer := func() {
   338  		ecluster.Terminate(t)
   339  	}
   340  
   341  	var (
   342  		updateCalled int32
   343  		shouldStop   int32
   344  	)
   345  	doneCh := make(chan struct{}, 1)
   346  	opts := NewOptions().
   347  		SetClient(ecluster.RandClient()).
   348  		SetUpdateFn(func(string, []*clientv3.Event) error {
   349  			atomic.AddInt32(&updateCalled, 1)
   350  			return nil
   351  		}).
   352  		SetTickAndStopFn(func(string) bool {
   353  			if atomic.LoadInt32(&shouldStop) == 0 {
   354  				return false
   355  			}
   356  
   357  			close(doneCh)
   358  
   359  			return true
   360  		}).
   361  		SetWatchChanCheckInterval(100 * time.Millisecond).
   362  		SetWatchChanInitTimeout(100 * time.Millisecond).
   363  		SetWatchChanResetInterval(100 * time.Millisecond)
   364  
   365  	wh, err := NewWatchManager(opts)
   366  	require.NoError(t, err)
   367  
   368  	return wh.(*manager), ecluster, &updateCalled, &shouldStop, doneCh, closer
   369  }
   370  
   371  func testSetup(t *testing.T) (*manager, *clientv3.Client, *int32, *int32, chan struct{}, func()) {
   372  	wh, ecluster, updateCalled, shouldStop, donech, closer := testCluster(t)
   373  	return wh, ecluster.RandClient(), updateCalled, shouldStop, donech, closer
   374  }