github.com/m3db/m3@v1.5.0/src/cluster/etcd/watchmanager/manager_test.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package watchmanager 22 23 import ( 24 "fmt" 25 "runtime" 26 "sync/atomic" 27 "testing" 28 "time" 29 30 "github.com/stretchr/testify/assert" 31 "github.com/stretchr/testify/require" 32 "github.com/uber-go/tally" 33 clientv3 "go.etcd.io/etcd/client/v3" 34 "go.etcd.io/etcd/tests/v3/framework/integration" 35 "golang.org/x/net/context" 36 37 "github.com/m3db/m3/src/x/clock" 38 ) 39 40 func TestWatchChan(t *testing.T) { 41 wh, ecluster, _, _, _, closer := testCluster(t) 42 defer closer() 43 44 ec := ecluster.RandClient() 45 integration.WaitClientV3(t, ec) 46 47 wc, _, err := wh.watchChanWithTimeout("foo", 0) 48 require.NoError(t, err) 49 require.Equal(t, 0, len(wc)) 50 51 _, err = ec.Put(context.Background(), "foo", "v") 52 require.NoError(t, err) 53 54 select { 55 case <-wc: 56 case <-time.After(time.Second): 57 require.Fail(t, "could not get notification") 58 } 59 60 ecluster.Members[0].Stop(t) 61 62 before := time.Now() 63 _, _, err = wh.watchChanWithTimeout("foo", 0) 64 require.WithinDuration(t, time.Now(), before, 150*time.Millisecond) 65 require.Error(t, err) 66 require.NoError(t, ecluster.Members[0].Restart(t)) 67 } 68 69 func TestWatchSimple(t *testing.T) { 70 wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t) 71 defer closer() 72 integration.WaitClientV3(t, ec) 73 require.Equal(t, int32(0), atomic.LoadInt32(updateCalled)) 74 75 go wh.Watch("foo") 76 77 time.Sleep(3 * wh.opts.WatchChanInitTimeout()) 78 79 lastRead := atomic.LoadInt32(updateCalled) 80 _, err := ec.Put(context.Background(), "foo", "v") 81 require.NoError(t, err) 82 83 for { 84 if atomic.LoadInt32(updateCalled) >= lastRead+1 { 85 break 86 } 87 time.Sleep(10 * time.Millisecond) 88 } 89 90 lastRead = atomic.LoadInt32(updateCalled) 91 _, err = ec.Put(context.Background(), "foo", "v") 92 require.NoError(t, err) 93 94 for { 95 if atomic.LoadInt32(updateCalled) >= lastRead+1 { 96 break 97 } 98 time.Sleep(10 * time.Millisecond) 99 } 100 101 // trigger CheckAndStop 102 atomic.AddInt32(shouldStop, 1) 103 <-doneCh 104 105 lastRead = atomic.LoadInt32(updateCalled) 106 _, err = ec.Put(context.Background(), "foo", "v") 107 require.NoError(t, err) 108 // put no longer triggers anything 109 require.Equal(t, lastRead, atomic.LoadInt32(updateCalled)) 110 111 // sleep enough time and make sure nothing happens 112 time.Sleep(3 * wh.opts.WatchChanCheckInterval()) 113 114 require.Equal(t, lastRead, atomic.LoadInt32(updateCalled)) 115 } 116 117 func TestWatchRecreate(t *testing.T) { 118 wh, ecluster, updateCalled, shouldStop, doneCh, closer := testCluster(t) 119 defer closer() 120 121 ec := ecluster.RandClient() 122 integration.WaitClientV3(t, ec) 123 124 failTotal := 1 125 wh.opts = wh.opts. 126 SetClient(ec). 127 SetWatchChanInitTimeout(50 * time.Millisecond). 128 SetWatchChanResetInterval(50 * time.Millisecond) 129 130 go func() { 131 ecluster.Members[0].Bridge().DropConnections() 132 ecluster.Members[0].Bridge().Blackhole() 133 wh.Watch("foo") 134 }() 135 136 time.Sleep(4 * wh.opts.WatchChanInitTimeout()) 137 138 // watch will error out but updateFn will be tried 139 for i := 0; i < 100; i++ { 140 if atomic.LoadInt32(updateCalled) >= int32(failTotal) { 141 break 142 } 143 time.Sleep(10 * time.Millisecond) 144 } 145 146 ecluster.Members[0].Bridge().Unblackhole() 147 // now we have retried failTotal times, give enough time for reset to happen 148 time.Sleep(3 * (wh.opts.WatchChanResetInterval())) 149 150 updatesBefore := atomic.LoadInt32(updateCalled) 151 // there should be a valid watch now, trigger a notification 152 _, err := ec.Put(context.Background(), "foo", "v") 153 require.NoError(t, err) 154 155 for i := 0; i < 100; i++ { 156 if atomic.LoadInt32(updateCalled) > updatesBefore { 157 break 158 } 159 time.Sleep(10 * time.Millisecond) 160 } 161 162 // clean up the background go routine 163 atomic.AddInt32(shouldStop, 1) 164 <-doneCh 165 } 166 167 func TestWatchNoLeader(t *testing.T) { 168 t.Skip("flaky, started to fail very consistently on CI") 169 const ( 170 watchInitAndRetryDelay = 200 * time.Millisecond 171 watchCheckInterval = 50 * time.Millisecond 172 ) 173 174 integration.BeforeTestExternal(t) 175 ecluster := integration.NewCluster(t, &integration.ClusterConfig{Size: 3}) 176 defer ecluster.Terminate(t) 177 178 var ( 179 ec = ecluster.Client(0) 180 tickDuration = 10 * time.Millisecond 181 electionTimeout = time.Duration(3*ecluster.Members[0].ElectionTicks) * tickDuration 182 doneCh = make(chan struct{}, 1) 183 eventLog = []*clientv3.Event{} 184 updateCalled int32 185 shouldStop int32 186 ) 187 188 opts := NewOptions(). 189 SetClient(ec). 190 SetUpdateFn( 191 func(_ string, e []*clientv3.Event) error { 192 atomic.AddInt32(&updateCalled, 1) 193 if len(e) > 0 { 194 eventLog = append(eventLog, e...) 195 } 196 return nil 197 }, 198 ). 199 SetTickAndStopFn( 200 func(string) bool { 201 if atomic.LoadInt32(&shouldStop) == 0 { 202 return false 203 } 204 205 close(doneCh) 206 207 return true 208 }, 209 ). 210 SetWatchChanInitTimeout(watchInitAndRetryDelay). 211 SetWatchChanResetInterval(watchInitAndRetryDelay). 212 SetWatchChanCheckInterval(watchCheckInterval) 213 214 integration.WaitClientV3(t, ec) 215 216 wh, err := NewWatchManager(opts) 217 require.NoError(t, err) 218 219 go wh.Watch("foo") 220 221 runtime.Gosched() 222 time.Sleep(10 * time.Millisecond) 223 224 // there should be a valid watch now, trigger a notification 225 _, err = ec.Put(context.Background(), "foo", "bar") 226 require.NoError(t, err) 227 228 leaderIdx := ecluster.WaitLeader(t) 229 require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Members), "got invalid leader") 230 231 // simulate quorum loss 232 ecluster.Members[1].Stop(t) 233 ecluster.Members[2].Stop(t) 234 235 // wait for election timeout, then member[0] will not have a leader. 236 time.Sleep(electionTimeout) 237 238 require.NoError(t, ecluster.Members[1].Restart(t)) 239 require.NoError(t, ecluster.Members[2].Restart(t)) 240 241 // wait for leader + election delay just in case 242 time.Sleep(time.Duration(3*ecluster.Members[0].ElectionTicks) * tickDuration) 243 244 leaderIdx = ecluster.WaitLeader(t) 245 require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Members), "got invalid leader") 246 integration.WaitClientV3(t, ec) // wait for client to be ready again 247 248 _, err = ec.Put(context.Background(), "foo", "baz") 249 require.NoError(t, err) 250 251 // give some time for watch to be updated 252 require.True(t, clock.WaitUntil(func() bool { 253 return atomic.LoadInt32(&updateCalled) >= 2 254 }, 10*time.Second)) 255 256 updates := atomic.LoadInt32(&updateCalled) 257 if updates < 2 { 258 require.Fail(t, 259 "insufficient update calls", 260 "expected at least 2 update attempts, got %d during a partition", 261 updates) 262 } 263 264 atomic.AddInt32(&shouldStop, 1) 265 <-doneCh 266 267 require.Len(t, eventLog, 2) 268 require.NotNil(t, eventLog[0]) 269 require.Equal(t, eventLog[0].Kv.Key, []byte("foo")) 270 require.Equal(t, eventLog[0].Kv.Value, []byte("bar")) 271 require.NotNil(t, eventLog[1]) 272 require.Equal(t, eventLog[1].Kv.Key, []byte("foo")) 273 require.Equal(t, eventLog[1].Kv.Value, []byte("baz")) 274 } 275 276 func TestWatchCompactedRevision(t *testing.T) { 277 wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t) 278 defer closer() 279 280 integration.WaitClientV3(t, ec) 281 282 ts := tally.NewTestScope("", nil) 283 errC := ts.Counter("errors") 284 wh.m.etcdWatchError = errC 285 286 var compactRev int64 287 for i := 1; i <= 10; i++ { 288 resp, err := ec.Put(context.Background(), "foo", fmt.Sprintf("bar-%d", i)) 289 require.NoError(t, err) 290 compactRev = resp.Header.Revision 291 } 292 293 _, err := ec.Compact(context.Background(), compactRev) 294 require.NoError(t, err) 295 296 wh.opts = wh.opts.SetWatchOptions([]clientv3.OpOption{ 297 clientv3.WithCreatedNotify(), 298 clientv3.WithRev(1), 299 }) 300 301 go wh.Watch("foo") 302 303 require.True(t, clock.WaitUntil(func() bool { 304 return atomic.LoadInt32(updateCalled) == 3 305 }, 30*time.Second)) 306 307 lastRead := atomic.LoadInt32(updateCalled) 308 ec.Put(context.Background(), "foo", "bar-11") 309 310 for atomic.LoadInt32(updateCalled) <= lastRead { 311 time.Sleep(10 * time.Millisecond) 312 } 313 314 errN := ts.Snapshot().Counters()["errors+"].Value() 315 assert.Equal(t, int64(1), errN, "expected to encounter watch error") 316 317 atomic.AddInt32(shouldStop, 1) 318 <-doneCh 319 } 320 321 func testCluster(t *testing.T) ( 322 *manager, 323 *integration.Cluster, 324 *int32, 325 *int32, 326 chan struct{}, 327 func(), 328 ) { 329 integration.BeforeTestExternal(t) 330 ecluster := integration.NewCluster(t, &integration.ClusterConfig{ 331 Size: 1, 332 UseBridge: true, 333 }) 334 335 closer := func() { 336 ecluster.Terminate(t) 337 } 338 339 var ( 340 updateCalled int32 341 shouldStop int32 342 ) 343 doneCh := make(chan struct{}, 1) 344 opts := NewOptions(). 345 SetClient(ecluster.RandClient()). 346 SetUpdateFn(func(string, []*clientv3.Event) error { 347 atomic.AddInt32(&updateCalled, 1) 348 return nil 349 }). 350 SetTickAndStopFn(func(string) bool { 351 if atomic.LoadInt32(&shouldStop) == 0 { 352 return false 353 } 354 355 close(doneCh) 356 357 return true 358 }). 359 SetWatchChanCheckInterval(100 * time.Millisecond). 360 SetWatchChanInitTimeout(100 * time.Millisecond). 361 SetWatchChanResetInterval(100 * time.Millisecond) 362 363 wh, err := NewWatchManager(opts) 364 require.NoError(t, err) 365 366 return wh.(*manager), ecluster, &updateCalled, &shouldStop, doneCh, closer 367 } 368 369 func testSetup(t *testing.T) (*manager, *clientv3.Client, *int32, *int32, chan struct{}, func()) { 370 wh, ecluster, updateCalled, shouldStop, donech, closer := testCluster(t) 371 return wh, ecluster.RandClient(), updateCalled, shouldStop, donech, closer 372 }