github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/cluster/etcd/watchmanager/manager_test.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package watchmanager 22 23 import ( 24 "fmt" 25 "sync/atomic" 26 "testing" 27 "time" 28 29 integration "github.com/m3db/m3/src/integration/resources/docker/dockerexternal/etcdintegration" 30 "github.com/stretchr/testify/assert" 31 "github.com/stretchr/testify/require" 32 "github.com/uber-go/tally" 33 clientv3 "go.etcd.io/etcd/client/v3" 34 "golang.org/x/net/context" 35 36 "github.com/m3db/m3/src/x/clock" 37 ) 38 39 func TestWatchChan(t *testing.T) { 40 wh, ecluster, _, _, _, closer := testCluster(t) 41 defer closer() 42 43 ec := ecluster.RandClient() 44 integration.WaitClientV3(t, ec) 45 46 wc, _, err := wh.watchChanWithTimeout("foo", 0) 47 require.NoError(t, err) 48 require.Equal(t, 0, len(wc)) 49 50 _, err = ec.Put(context.Background(), "foo", "v") 51 require.NoError(t, err) 52 53 select { 54 case <-wc: 55 case <-time.After(time.Second): 56 require.Fail(t, "could not get notification") 57 } 58 59 ecluster.Members[0].Stop(t) 60 61 before := time.Now() 62 _, _, err = wh.watchChanWithTimeout("foo", 0) 63 require.WithinDuration(t, time.Now(), before, 150*time.Millisecond) 64 require.Error(t, err) 65 require.NoError(t, ecluster.Members[0].Restart(t)) 66 } 67 68 func TestWatchSimple(t *testing.T) { 69 wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t) 70 defer closer() 71 integration.WaitClientV3(t, ec) 72 require.Equal(t, int32(0), atomic.LoadInt32(updateCalled)) 73 74 go wh.Watch("foo") 75 76 time.Sleep(3 * wh.opts.WatchChanInitTimeout()) 77 78 lastRead := atomic.LoadInt32(updateCalled) 79 _, err := ec.Put(context.Background(), "foo", "v") 80 require.NoError(t, err) 81 82 for { 83 if atomic.LoadInt32(updateCalled) >= lastRead+1 { 84 break 85 } 86 time.Sleep(10 * time.Millisecond) 87 } 88 89 lastRead = atomic.LoadInt32(updateCalled) 90 _, err = ec.Put(context.Background(), "foo", "v") 91 require.NoError(t, err) 92 93 for { 94 if atomic.LoadInt32(updateCalled) >= lastRead+1 { 95 break 96 } 97 time.Sleep(10 * time.Millisecond) 98 } 99 100 // trigger CheckAndStop 101 atomic.AddInt32(shouldStop, 1) 102 <-doneCh 103 104 lastRead = atomic.LoadInt32(updateCalled) 105 _, err = ec.Put(context.Background(), "foo", "v") 106 require.NoError(t, err) 107 // put no longer triggers anything 108 require.Equal(t, lastRead, atomic.LoadInt32(updateCalled)) 109 110 // sleep enough time and make sure nothing happens 111 time.Sleep(3 * wh.opts.WatchChanCheckInterval()) 112 113 require.Equal(t, lastRead, atomic.LoadInt32(updateCalled)) 114 } 115 116 func TestWatchRecreate(t *testing.T) { 117 wh, ecluster, updateCalled, shouldStop, doneCh, closer := testCluster(t) 118 defer closer() 119 120 ec := ecluster.RandClient() 121 integration.WaitClientV3(t, ec) 122 123 failTotal := 1 124 wh.opts = wh.opts. 125 SetClient(ec). 126 SetWatchChanInitTimeout(50 * time.Millisecond). 127 SetWatchChanResetInterval(50 * time.Millisecond) 128 129 go func() { 130 ecluster.Members[0].Bridge().DropConnections() 131 ecluster.Members[0].Bridge().Blackhole() 132 wh.Watch("foo") 133 }() 134 135 time.Sleep(4 * wh.opts.WatchChanInitTimeout()) 136 137 // watch will error out but updateFn will be tried 138 for i := 0; i < 100; i++ { 139 if atomic.LoadInt32(updateCalled) >= int32(failTotal) { 140 break 141 } 142 time.Sleep(10 * time.Millisecond) 143 } 144 145 ecluster.Members[0].Bridge().Unblackhole() 146 // now we have retried failTotal times, give enough time for reset to happen 147 time.Sleep(3 * (wh.opts.WatchChanResetInterval())) 148 149 updatesBefore := atomic.LoadInt32(updateCalled) 150 // there should be a valid watch now, trigger a notification 151 _, err := ec.Put(context.Background(), "foo", "v") 152 require.NoError(t, err) 153 154 for i := 0; i < 100; i++ { 155 if atomic.LoadInt32(updateCalled) > updatesBefore { 156 break 157 } 158 time.Sleep(10 * time.Millisecond) 159 } 160 161 // clean up the background go routine 162 atomic.AddInt32(shouldStop, 1) 163 <-doneCh 164 } 165 166 // TODO: this test has been skipped for a while, and now doesn't work with the docker based etcd integration package. 167 // Revive it if it's useful, and make it no longer flake. 168 //nolint:gocritic 169 //func TestWatchNoLeader(t *testing.T) { 170 // t.Skip("flaky, started to fail very consistently on CI") 171 // const ( 172 // watchInitAndRetryDelay = 200 * time.Millisecond 173 // watchCheckInterval = 50 * time.Millisecond 174 // ) 175 // 176 // integration.BeforeTestExternal(t) 177 // ecluster := integration.NewCluster(t, &integration.ClusterConfig{Size: 3}) 178 // defer ecluster.Terminate(t) 179 // 180 // var ( 181 // ec = ecluster.Client(0) 182 // tickDuration = 10 * time.Millisecond 183 // electionTimeout = time.Duration(3*ecluster.Address[0].ElectionTicks) * tickDuration 184 // doneCh = make(chan struct{}, 1) 185 // eventLog = []*clientv3.Event{} 186 // updateCalled int32 187 // shouldStop int32 188 // ) 189 // 190 // opts := NewOptions(). 191 // SetClient(ec). 192 // SetUpdateFn( 193 // func(_ string, e []*clientv3.Event) error { 194 // atomic.AddInt32(&updateCalled, 1) 195 // if len(e) > 0 { 196 // eventLog = append(eventLog, e...) 197 // } 198 // return nil 199 // }, 200 // ). 201 // SetTickAndStopFn( 202 // func(string) bool { 203 // if atomic.LoadInt32(&shouldStop) == 0 { 204 // return false 205 // } 206 // 207 // close(doneCh) 208 // 209 // return true 210 // }, 211 // ). 212 // SetWatchChanInitTimeout(watchInitAndRetryDelay). 213 // SetWatchChanResetInterval(watchInitAndRetryDelay). 214 // SetWatchChanCheckInterval(watchCheckInterval) 215 // 216 // integration.WaitClientV3(t, ec) 217 // 218 // wh, err := NewWatchManager(opts) 219 // require.NoError(t, err) 220 // 221 // go wh.Watch("foo") 222 // 223 // runtime.Gosched() 224 // time.Sleep(10 * time.Millisecond) 225 // 226 // // there should be a valid watch now, trigger a notification 227 // _, err = ec.Put(context.Background(), "foo", "bar") 228 // require.NoError(t, err) 229 // 230 // leaderIdx := ecluster.WaitLeader(t) 231 // require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Address), "got invalid leader") 232 // 233 // // simulate quorum loss 234 // ecluster.Address[1].Stop(t) 235 // ecluster.Address[2].Stop(t) 236 // 237 // // wait for election timeout, then member[0] will not have a leader. 238 // time.Sleep(electionTimeout) 239 // 240 // require.NoError(t, ecluster.Address[1].Restart(t)) 241 // require.NoError(t, ecluster.Address[2].Restart(t)) 242 // 243 // // wait for leader + election delay just in case 244 // time.Sleep(time.Duration(3*ecluster.Address[0].ElectionTicks) * tickDuration) 245 // 246 // leaderIdx = ecluster.WaitLeader(t) 247 // require.True(t, leaderIdx >= 0 && leaderIdx < len(ecluster.Address), "got invalid leader") 248 // integration.WaitClientV3(t, ec) // wait for client to be ready again 249 // 250 // _, err = ec.Put(context.Background(), "foo", "baz") 251 // require.NoError(t, err) 252 // 253 // // give some time for watch to be updated 254 // require.True(t, clock.WaitUntil(func() bool { 255 // return atomic.LoadInt32(&updateCalled) >= 2 256 // }, 10*time.Second)) 257 // 258 // updates := atomic.LoadInt32(&updateCalled) 259 // if updates < 2 { 260 // require.Fail(t, 261 // "insufficient update calls", 262 // "expected at least 2 update attempts, got %d during a partition", 263 // updates) 264 // } 265 // 266 // atomic.AddInt32(&shouldStop, 1) 267 // <-doneCh 268 // 269 // require.Len(t, eventLog, 2) 270 // require.NotNil(t, eventLog[0]) 271 // require.Equal(t, eventLog[0].Kv.Key, []byte("foo")) 272 // require.Equal(t, eventLog[0].Kv.Value, []byte("bar")) 273 // require.NotNil(t, eventLog[1]) 274 // require.Equal(t, eventLog[1].Kv.Key, []byte("foo")) 275 // require.Equal(t, eventLog[1].Kv.Value, []byte("baz")) 276 //} 277 278 func TestWatchCompactedRevision(t *testing.T) { 279 wh, ec, updateCalled, shouldStop, doneCh, closer := testSetup(t) 280 defer closer() 281 282 integration.WaitClientV3(t, ec) 283 284 ts := tally.NewTestScope("", nil) 285 errC := ts.Counter("errors") 286 wh.m.etcdWatchError = errC 287 288 var compactRev int64 289 for i := 1; i <= 10; i++ { 290 resp, err := ec.Put(context.Background(), "foo", fmt.Sprintf("bar-%d", i)) 291 require.NoError(t, err) 292 compactRev = resp.Header.Revision 293 } 294 295 _, err := ec.Compact(context.Background(), compactRev) 296 require.NoError(t, err) 297 298 wh.opts = wh.opts.SetWatchOptions([]clientv3.OpOption{ 299 clientv3.WithCreatedNotify(), 300 clientv3.WithRev(1), 301 }) 302 303 go wh.Watch("foo") 304 305 require.True(t, clock.WaitUntil(func() bool { 306 return atomic.LoadInt32(updateCalled) == 3 307 }, 30*time.Second)) 308 309 lastRead := atomic.LoadInt32(updateCalled) 310 ec.Put(context.Background(), "foo", "bar-11") 311 312 for atomic.LoadInt32(updateCalled) <= lastRead { 313 time.Sleep(10 * time.Millisecond) 314 } 315 316 errN := ts.Snapshot().Counters()["errors+"].Value() 317 assert.Equal(t, int64(1), errN, "expected to encounter watch error") 318 319 atomic.AddInt32(shouldStop, 1) 320 <-doneCh 321 } 322 323 func testCluster(t *testing.T) ( 324 *manager, 325 *integration.Cluster, 326 *int32, 327 *int32, 328 chan struct{}, 329 func(), 330 ) { 331 integration.BeforeTestExternal(t) 332 ecluster := integration.NewCluster(t, &integration.ClusterConfig{ 333 Size: 1, 334 UseBridge: true, 335 }) 336 337 closer := func() { 338 ecluster.Terminate(t) 339 } 340 341 var ( 342 updateCalled int32 343 shouldStop int32 344 ) 345 doneCh := make(chan struct{}, 1) 346 opts := NewOptions(). 347 SetClient(ecluster.RandClient()). 348 SetUpdateFn(func(string, []*clientv3.Event) error { 349 atomic.AddInt32(&updateCalled, 1) 350 return nil 351 }). 352 SetTickAndStopFn(func(string) bool { 353 if atomic.LoadInt32(&shouldStop) == 0 { 354 return false 355 } 356 357 close(doneCh) 358 359 return true 360 }). 361 SetWatchChanCheckInterval(100 * time.Millisecond). 362 SetWatchChanInitTimeout(100 * time.Millisecond). 363 SetWatchChanResetInterval(100 * time.Millisecond) 364 365 wh, err := NewWatchManager(opts) 366 require.NoError(t, err) 367 368 return wh.(*manager), ecluster, &updateCalled, &shouldStop, doneCh, closer 369 } 370 371 func testSetup(t *testing.T) (*manager, *clientv3.Client, *int32, *int32, chan struct{}, func()) { 372 wh, ecluster, updateCalled, shouldStop, donech, closer := testCluster(t) 373 return wh, ecluster.RandClient(), updateCalled, shouldStop, donech, closer 374 }