github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/election/election_test.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package election 15 16 import ( 17 "context" 18 "fmt" 19 "net/url" 20 "sync" 21 "testing" 22 "time" 23 24 . "github.com/pingcap/check" 25 "github.com/pingcap/failpoint" 26 "github.com/pingcap/tiflow/dm/pkg/etcdutil" 27 "github.com/pingcap/tiflow/dm/pkg/log" 28 "github.com/pingcap/tiflow/dm/pkg/terror" 29 "github.com/pingcap/tiflow/dm/pkg/utils" 30 "github.com/tikv/pd/pkg/utils/tempurl" 31 clientv3 "go.etcd.io/etcd/client/v3" 32 "go.etcd.io/etcd/server/v3/embed" 33 ) 34 35 var _ = SerialSuites(&testElectionSuite{}) 36 37 func TestSuite(t *testing.T) { 38 TestingT(t) 39 } 40 41 type testElectionSuite struct { 42 etcd *embed.Etcd 43 endPoint string 44 45 notifyBlockTime time.Duration 46 } 47 48 func (t *testElectionSuite) SetUpTest(c *C) { 49 c.Assert(log.InitLogger(&log.Config{}), IsNil) 50 51 cfg := embed.NewConfig() 52 cfg.Name = "election-test" 53 cfg.Dir = c.MkDir() 54 cfg.ZapLoggerBuilder = embed.NewZapCoreLoggerBuilder(log.L().Logger, log.L().Core(), log.Props().Syncer) 55 cfg.Logger = "zap" 56 err := cfg.Validate() // verify & trigger the builder 57 c.Assert(err, IsNil) 58 59 t.endPoint = tempurl.Alloc() 60 url2, err := url.Parse(t.endPoint) 61 c.Assert(err, IsNil) 62 cfg.ListenClientUrls = []url.URL{*url2} 63 cfg.AdvertiseClientUrls = cfg.ListenClientUrls 64 65 url2, err = url.Parse(tempurl.Alloc()) 66 c.Assert(err, IsNil) 67 cfg.ListenPeerUrls = []url.URL{*url2} 68 cfg.AdvertisePeerUrls = cfg.ListenPeerUrls 69 70 cfg.InitialCluster = fmt.Sprintf("%s=%s", cfg.Name, url2) 71 cfg.ClusterState = embed.ClusterStateFlagNew 72 73 t.etcd, err = embed.StartEtcd(cfg) 74 c.Assert(err, IsNil) 75 select { 76 case <-t.etcd.Server.ReadyNotify(): 77 case <-time.After(10 * time.Second): 78 c.Fatal("start embed etcd timeout") 79 } 80 81 // some notify leader information is not handled, just reduce the block time and ignore them 82 t.notifyBlockTime = 100 * time.Millisecond 83 } 84 85 func (t *testElectionSuite) TearDownTest(c *C) { 86 t.etcd.Close() 87 } 88 89 func testElection2After1(t *testElectionSuite, c *C, normalExit bool) { 90 var ( 91 timeout = 3 * time.Second 92 sessionTTL = 60 93 key = "unit-test/election-2-after-1" 94 ID1 = "member1" 95 ID2 = "member2" 96 ID3 = "member3" 97 addr1 = "127.0.0.1:1" 98 addr2 = "127.0.0.1:2" 99 addr3 = "127.0.0.1:3" 100 ) 101 cli, err := etcdutil.CreateClient([]string{t.endPoint}, nil) 102 c.Assert(err, IsNil) 103 defer cli.Close() 104 ctx0, cancel0 := context.WithCancel(context.Background()) 105 defer cancel0() 106 _, err = cli.Delete(ctx0, key, clientv3.WithPrefix()) 107 c.Assert(err, IsNil) 108 109 ctx1, cancel1 := context.WithCancel(context.Background()) 110 defer cancel1() 111 if !normalExit { 112 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/pkg/election/mockCampaignLoopExitedAbnormally", `return()`), IsNil) 113 //nolint:errcheck 114 defer failpoint.Disable("github.com/pingcap/tiflow/dm/pkg/election/mockCampaignLoopExitedAbnormally") 115 } 116 e1, err := NewElection(ctx1, cli, sessionTTL, key, ID1, addr1, t.notifyBlockTime) 117 c.Assert(err, IsNil) 118 defer e1.Close() 119 120 // e1 should become the leader 121 select { 122 case leader := <-e1.LeaderNotify(): 123 c.Assert(leader.ID, Equals, ID1) 124 case <-time.After(timeout): 125 c.Fatal("leader campaign timeout") 126 } 127 c.Assert(e1.IsLeader(), IsTrue) 128 _, leaderID, leaderAddr, err := e1.LeaderInfo(ctx1) 129 c.Assert(err, IsNil) 130 c.Assert(leaderID, Equals, e1.ID()) 131 c.Assert(leaderAddr, Equals, addr1) 132 if !normalExit { 133 c.Assert(failpoint.Disable("github.com/pingcap/tiflow/dm/pkg/election/mockCampaignLoopExitedAbnormally"), IsNil) 134 } 135 136 // start e2 137 ctx2, cancel2 := context.WithCancel(context.Background()) 138 defer cancel2() 139 e2, err := NewElection(ctx2, cli, sessionTTL, key, ID2, addr2, t.notifyBlockTime) 140 c.Assert(err, IsNil) 141 defer e2.Close() 142 select { 143 case leader := <-e2.leaderCh: 144 c.Assert(leader.ID, Equals, ID1) 145 case <-time.After(timeout): 146 c.Fatal("leader campaign timeout") 147 } 148 // but the leader should still be e1 149 _, leaderID, leaderAddr, err = e2.LeaderInfo(ctx2) 150 c.Assert(err, IsNil) 151 c.Assert(leaderID, Equals, e1.ID()) 152 c.Assert(leaderAddr, Equals, addr1) 153 c.Assert(e2.IsLeader(), IsFalse) 154 155 var wg sync.WaitGroup 156 e1.Close() // stop the campaign for e1 157 c.Assert(e1.IsLeader(), IsFalse) 158 159 ctx3, cancel3 := context.WithTimeout(context.Background(), 3*time.Second) 160 defer cancel3() 161 deleted, err := e2.ClearSessionIfNeeded(ctx3, ID1) 162 c.Assert(err, IsNil) 163 if normalExit { 164 // for normally exited election, session has already been closed before 165 c.Assert(deleted, IsFalse) 166 } else { 167 // for abnormally exited election, session will be cleared here 168 c.Assert(deleted, IsTrue) 169 } 170 171 // e2 should become the leader 172 select { 173 case leader := <-e2.LeaderNotify(): 174 c.Assert(leader.ID, Equals, ID2) 175 case <-time.After(timeout): 176 c.Fatal("leader campaign timeout") 177 } 178 c.Assert(e2.IsLeader(), IsTrue) 179 _, leaderID, leaderAddr, err = e2.LeaderInfo(ctx2) 180 c.Assert(err, IsNil) 181 c.Assert(leaderID, Equals, e2.ID()) 182 c.Assert(leaderAddr, Equals, addr2) 183 184 // only e2's election info is left in etcd 185 ctx4, cancel4 := context.WithTimeout(context.Background(), 3*time.Second) 186 defer cancel4() 187 resp, err := cli.Get(ctx4, key, clientv3.WithPrefix()) 188 c.Assert(err, IsNil) 189 c.Assert(resp.Kvs, HasLen, 1) 190 191 // if closing the client when campaigning, we should get an error 192 wg.Add(1) 193 go func() { 194 defer wg.Done() 195 select { 196 case err2 := <-e2.ErrorNotify(): 197 c.Assert(terror.ErrElectionCampaignFail.Equal(err2), IsTrue) 198 // the old session is done, but we can't create a new one. 199 c.Assert(err2, ErrorMatches, ".*fail to campaign leader: create a new session.*") 200 case <-time.After(timeout): 201 c.Fatal("do not receive error for e2") 202 } 203 }() 204 cli.Close() // close the client 205 wg.Wait() 206 207 // can not elect with closed client. 208 ctx5, cancel5 := context.WithCancel(context.Background()) 209 defer cancel5() 210 _, err = NewElection(ctx5, cli, sessionTTL, key, ID3, addr3, t.notifyBlockTime) 211 c.Assert(terror.ErrElectionCampaignFail.Equal(err), IsTrue) 212 c.Assert(err, ErrorMatches, ".*Message: fail to campaign leader: create the initial session, RawCause: context canceled.*") 213 cancel0() 214 } 215 216 func (t *testElectionSuite) TestElection2After1(c *C) { 217 testElection2After1(t, c, true) 218 testElection2After1(t, c, false) 219 } 220 221 func (t *testElectionSuite) TestElectionAlways1(c *C) { 222 var ( 223 timeout = 3 * time.Second 224 sessionTTL = 60 225 key = "unit-test/election-always-1" 226 ID1 = "member1" 227 ID2 = "member2" 228 addr1 = "127.0.0.1:1234" 229 addr2 = "127.0.0.1:2345" 230 ) 231 cli, err := etcdutil.CreateClient([]string{t.endPoint}, nil) 232 c.Assert(err, IsNil) 233 defer cli.Close() 234 235 ctx1, cancel1 := context.WithCancel(context.Background()) 236 defer cancel1() 237 e1, err := NewElection(ctx1, cli, sessionTTL, key, ID1, addr1, t.notifyBlockTime) 238 c.Assert(err, IsNil) 239 defer e1.Close() 240 241 // e1 should become the leader 242 select { 243 case leader := <-e1.LeaderNotify(): 244 c.Assert(leader.ID, Equals, ID1) 245 case <-time.After(timeout): 246 c.Fatal("leader campaign timeout") 247 } 248 c.Assert(e1.IsLeader(), IsTrue) 249 _, leaderID, leaderAddr, err := e1.LeaderInfo(ctx1) 250 c.Assert(err, IsNil) 251 c.Assert(leaderID, Equals, e1.ID()) 252 c.Assert(leaderAddr, Equals, addr1) 253 254 // start e2 255 ctx2, cancel2 := context.WithCancel(context.Background()) 256 defer cancel2() 257 e2, err := NewElection(ctx2, cli, sessionTTL, key, ID2, addr2, t.notifyBlockTime) 258 c.Assert(err, IsNil) 259 defer e2.Close() 260 time.Sleep(100 * time.Millisecond) // wait 100ms to start the campaign 261 // but the leader should still be e1 262 _, leaderID, leaderAddr, err = e2.LeaderInfo(ctx2) 263 c.Assert(err, IsNil) 264 c.Assert(leaderID, Equals, e1.ID()) 265 c.Assert(leaderAddr, Equals, addr1) 266 c.Assert(e2.IsLeader(), IsFalse) 267 268 // cancel the campaign for e2, should get no errors 269 var wg sync.WaitGroup 270 wg.Add(1) 271 go func() { 272 defer wg.Done() 273 select { 274 case err2 := <-e2.ErrorNotify(): 275 c.Fatalf("cancel the campaign should not get an error, %v", err2) 276 case <-time.After(timeout): // wait 3s 277 } 278 }() 279 cancel2() 280 wg.Wait() 281 282 // e1 is still the leader 283 c.Assert(e1.IsLeader(), IsTrue) 284 _, leaderID, leaderAddr, err = e1.LeaderInfo(ctx1) 285 c.Assert(err, IsNil) 286 c.Assert(leaderID, Equals, e1.ID()) 287 c.Assert(leaderAddr, Equals, addr1) 288 c.Assert(e2.IsLeader(), IsFalse) 289 } 290 291 func (t *testElectionSuite) TestElectionEvictLeader(c *C) { 292 var ( 293 timeout = 3 * time.Second 294 sessionTTL = 60 295 key = "unit-test/election-evict-leader" 296 ID1 = "member1" 297 ID2 = "member2" 298 addr1 = "127.0.0.1:1234" 299 addr2 = "127.0.0.1:2345" 300 ) 301 cli, err := etcdutil.CreateClient([]string{t.endPoint}, nil) 302 c.Assert(err, IsNil) 303 defer cli.Close() 304 305 ctx1, cancel1 := context.WithCancel(context.Background()) 306 defer cancel1() 307 e1, err := NewElection(ctx1, cli, sessionTTL, key, ID1, addr1, t.notifyBlockTime) 308 c.Assert(err, IsNil) 309 defer e1.Close() 310 311 // e1 should become the leader 312 select { 313 case leader := <-e1.LeaderNotify(): 314 c.Assert(leader.ID, Equals, ID1) 315 case <-time.After(timeout): 316 c.Fatal("leader campaign timeout") 317 } 318 c.Assert(e1.IsLeader(), IsTrue) 319 _, leaderID, leaderAddr, err := e1.LeaderInfo(ctx1) 320 c.Assert(err, IsNil) 321 c.Assert(leaderID, Equals, e1.ID()) 322 c.Assert(leaderAddr, Equals, addr1) 323 324 // start e2 325 ctx2, cancel2 := context.WithCancel(context.Background()) 326 defer cancel2() 327 e2, err := NewElection(ctx2, cli, sessionTTL, key, ID2, addr2, t.notifyBlockTime) 328 c.Assert(err, IsNil) 329 defer e2.Close() 330 time.Sleep(100 * time.Millisecond) // wait 100ms to start the campaign 331 // but the leader should still be e1 332 _, leaderID, leaderAddr, err = e2.LeaderInfo(ctx2) 333 c.Assert(err, IsNil) 334 c.Assert(leaderID, Equals, e1.ID()) 335 c.Assert(leaderAddr, Equals, addr1) 336 c.Assert(e2.IsLeader(), IsFalse) 337 338 // e1 evict leader, and e2 will be the leader 339 e1.EvictLeader() 340 utils.WaitSomething(8, 250*time.Millisecond, func() bool { 341 _, leaderID, _, _ = e2.LeaderInfo(ctx2) 342 return leaderID == e2.ID() 343 }) 344 _, leaderID, leaderAddr, err = e2.LeaderInfo(ctx2) 345 c.Assert(err, IsNil) 346 c.Assert(leaderID, Equals, e2.ID()) 347 c.Assert(leaderAddr, Equals, addr2) 348 utils.WaitSomething(10, 10*time.Millisecond, func() bool { 349 return e2.IsLeader() 350 }) 351 352 // cancel evict of e1, and then evict e2, e1 will be the leader 353 e1.CancelEvictLeader() 354 e2.EvictLeader() 355 utils.WaitSomething(8, 250*time.Millisecond, func() bool { 356 _, leaderID, _, _ = e1.LeaderInfo(ctx1) 357 return leaderID == e1.ID() 358 }) 359 _, leaderID, leaderAddr, err = e1.LeaderInfo(ctx1) 360 c.Assert(err, IsNil) 361 c.Assert(leaderID, Equals, e1.ID()) 362 c.Assert(leaderAddr, Equals, addr1) 363 utils.WaitSomething(10, 10*time.Millisecond, func() bool { 364 return e1.IsLeader() 365 }) 366 } 367 368 func (t *testElectionSuite) TestElectionDeleteKey(c *C) { 369 var ( 370 timeout = 3 * time.Second 371 sessionTTL = 60 372 key = "unit-test/election-delete-key" 373 ID = "member" 374 addr = "127.0.0.1:1234" 375 ) 376 cli, err := etcdutil.CreateClient([]string{t.endPoint}, nil) 377 c.Assert(err, IsNil) 378 defer cli.Close() 379 380 ctx, cancel := context.WithCancel(context.Background()) 381 defer cancel() 382 e, err := NewElection(ctx, cli, sessionTTL, key, ID, addr, t.notifyBlockTime) 383 c.Assert(err, IsNil) 384 defer e.Close() 385 386 // should become the leader 387 select { 388 case leader := <-e.LeaderNotify(): 389 c.Assert(leader.ID, Equals, ID) 390 case <-time.After(timeout): 391 c.Fatal("leader campaign timeout") 392 } 393 c.Assert(e.IsLeader(), IsTrue) 394 leaderKey, leaderID, leaderAddr, err := e.LeaderInfo(ctx) 395 c.Assert(err, IsNil) 396 c.Assert(leaderID, Equals, e.ID()) 397 c.Assert(leaderAddr, Equals, addr) 398 399 // the leader retired after deleted the key 400 var wg sync.WaitGroup 401 wg.Add(1) 402 go func() { 403 wg.Done() 404 select { 405 case err2 := <-e.ErrorNotify(): 406 c.Fatalf("delete the leader key should not get an error, %v", err2) 407 case leader := <-e.LeaderNotify(): 408 c.Assert(leader, IsNil) 409 } 410 }() 411 _, err = cli.Delete(ctx, leaderKey) 412 c.Assert(err, IsNil) 413 wg.Wait() 414 }