github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/election/elector_test.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package election_test 15 16 import ( 17 "context" 18 "fmt" 19 "sort" 20 "strconv" 21 "strings" 22 "sync" 23 "testing" 24 "time" 25 26 "github.com/golang/mock/gomock" 27 "github.com/pingcap/tiflow/pkg/election" 28 "github.com/pingcap/tiflow/pkg/election/mock" 29 "github.com/pingcap/tiflow/pkg/errors" 30 "github.com/stretchr/testify/require" 31 "go.uber.org/atomic" 32 ) 33 34 func TestElectorBasic(t *testing.T) { 35 t.Parallel() 36 37 s := mock.NewMockStorage(gomock.NewController(t)) 38 39 var recordLock sync.RWMutex 40 record := &election.Record{} 41 42 s.EXPECT().Get(gomock.Any()).AnyTimes(). 43 DoAndReturn(func(ctx context.Context) (*election.Record, error) { 44 recordLock.RLock() 45 defer recordLock.RUnlock() 46 47 return record.Clone(), nil 48 }) 49 50 s.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes(). 51 DoAndReturn(func(ctx context.Context, r *election.Record, _ bool) error { 52 recordLock.Lock() 53 defer recordLock.Unlock() 54 55 if r.Version != record.Version { 56 return errors.ErrElectionRecordConflict.GenWithStackByArgs() 57 } 58 record = r.Clone() 59 record.Version++ 60 return nil 61 }) 62 63 var ( 64 electors []election.Elector 65 configs []election.Config 66 cancelFns []context.CancelFunc 67 wg sync.WaitGroup 68 ) 69 firstLeaderID := make(chan string, 1) 70 const electorCount = 5 71 for i := 0; i < electorCount; i++ { 72 id := fmt.Sprintf("elector-%d", i) 73 config := election.Config{ 74 ID: id, 75 Name: id, 76 Address: fmt.Sprintf("127.0.0.1:1024%d", i), 77 Storage: s, 78 LeaderCallback: func(ctx context.Context) error { 79 select { 80 case firstLeaderID <- id: 81 default: 82 } 83 <-ctx.Done() 84 return ctx.Err() 85 }, 86 LeaseDuration: time.Second, 87 RenewInterval: time.Millisecond * 100, 88 RenewDeadline: time.Millisecond * 900, 89 } 90 elector, err := election.NewElector(config) 91 require.NoError(t, err) 92 93 ctx, cancel := context.WithCancel(context.Background()) 94 wg.Add(1) 95 go func() { 96 defer wg.Done() 97 err := elector.RunElection(ctx) 98 require.Error(t, err) 99 require.Equal(t, context.Canceled, errors.Cause(err)) 100 }() 101 102 electors = append(electors, elector) 103 configs = append(configs, config) 104 cancelFns = append(cancelFns, cancel) 105 } 106 107 // Wait for first leader to be elected. 108 var leader *election.Member 109 require.Eventually(t, func() bool { 110 var ok bool 111 leader, ok = electors[0].GetLeader() 112 return ok 113 }, time.Second, time.Millisecond*100, "leader not elected") 114 require.NotNil(t, leader) 115 select { 116 case leaderID := <-firstLeaderID: 117 require.Equal(t, leaderID, leader.ID) 118 case <-time.After(time.Second): 119 require.Fail(t, "leader callback not called") 120 } 121 122 // Wait for all elector members to join. 123 var members []*election.Member 124 require.Eventually(t, func() bool { 125 members = electors[0].GetMembers() 126 return len(members) == electorCount 127 }, time.Second, time.Millisecond*100, "not all members joined") 128 129 sort.Slice(members, func(i, j int) bool { 130 return members[i].ID < members[j].ID 131 }) 132 for i, m := range members { 133 require.Equal(t, configs[i].ID, m.ID) 134 require.Equal(t, configs[i].Name, m.Name) 135 require.Equal(t, configs[i].Address, m.Address) 136 } 137 138 // All electors should have the same leader. 139 for _, e := range electors { 140 leader1, ok := e.GetLeader() 141 require.True(t, ok) 142 require.Equal(t, leader.ID, leader1.ID) 143 } 144 145 // Test resign leader. 146 leaderIdx, err := strconv.Atoi(strings.TrimPrefix(leader.ID, "elector-")) 147 require.NoError(t, err) 148 leaderElector := electors[leaderIdx] 149 require.True(t, leaderElector.IsLeader()) 150 err = leaderElector.ResignLeader(context.Background(), time.Second) 151 require.NoError(t, err) 152 require.Eventually(t, func() bool { 153 newLeader, ok := leaderElector.GetLeader() 154 return ok && newLeader.ID != leader.ID 155 }, time.Second, time.Millisecond*100, "leader not changed") 156 157 // Test cancel elector. 158 for i := electorCount - 1; i > 0; i-- { 159 cancelFns[i]() 160 require.Eventually(t, func() bool { 161 _, ok := electors[0].GetLeader() 162 if !ok { 163 return false 164 } 165 members := electors[0].GetMembers() 166 return len(members) == i 167 }, time.Second*3, time.Millisecond*100, "member not removed") 168 } 169 cancelFns[0]() 170 wg.Wait() 171 } 172 173 func TestElectorRenewFailure(t *testing.T) { 174 t.Parallel() 175 176 var recordLock sync.RWMutex 177 record := &election.Record{} 178 179 getRecord := func(_ context.Context) (*election.Record, error) { //nolint:unparam 180 recordLock.RLock() 181 defer recordLock.RUnlock() 182 183 return record.Clone(), nil 184 } 185 186 updateRecord := func(_ context.Context, r *election.Record, _ bool) error { 187 recordLock.Lock() 188 defer recordLock.Unlock() 189 190 if r.Version != record.Version { 191 return errors.ErrElectionRecordConflict.GenWithStackByArgs() 192 } 193 record = r.Clone() 194 record.Version++ 195 return nil 196 } 197 198 var ( 199 s1Err atomic.Error 200 s1LastRenew time.Time 201 ) 202 s1 := mock.NewMockStorage(gomock.NewController(t)) 203 s1.EXPECT().Get(gomock.Any()).AnyTimes(). 204 DoAndReturn(func(ctx context.Context) (*election.Record, error) { 205 if err := s1Err.Load(); err != nil { 206 return nil, err 207 } 208 return getRecord(ctx) 209 }) 210 s1.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes(). 211 DoAndReturn(func(ctx context.Context, r *election.Record, isLeaderChanged bool) error { 212 if err := s1Err.Load(); err != nil { 213 return err 214 } 215 if err := updateRecord(ctx, r, isLeaderChanged); err != nil { 216 return err 217 } 218 s1LastRenew = time.Now() 219 return nil 220 }) 221 222 s2 := mock.NewMockStorage(gomock.NewController(t)) 223 s2.EXPECT().Get(gomock.Any()).AnyTimes().DoAndReturn(getRecord) 224 s2.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().DoAndReturn(updateRecord) 225 226 const ( 227 leaseDuration = time.Second * 1 228 renewInterval = time.Millisecond * 100 229 renewDeadline = leaseDuration - renewInterval 230 ) 231 232 ctx, cancel := context.WithCancel(context.Background()) 233 234 e1, err := election.NewElector(election.Config{ 235 ID: "e1", 236 Name: "e1", 237 Address: "127.0.0.1:10241", 238 Storage: s1, 239 LeaderCallback: func(ctx context.Context) error { 240 <-ctx.Done() 241 return ctx.Err() 242 }, 243 LeaseDuration: leaseDuration, 244 RenewInterval: renewInterval, 245 RenewDeadline: renewDeadline, 246 }) 247 require.NoError(t, err) 248 249 var wg sync.WaitGroup 250 wg.Add(1) 251 252 go func() { 253 defer wg.Done() 254 err := e1.RunElection(ctx) 255 require.Error(t, err) 256 require.Equal(t, context.Canceled, errors.Cause(err)) 257 }() 258 259 // Wait for leader to be elected. 260 require.Eventually(t, func() bool { 261 _, ok := e1.GetLeader() 262 return ok 263 }, time.Second, time.Millisecond*100, "leader not elected") 264 265 e2, err := election.NewElector(election.Config{ 266 ID: "e2", 267 Name: "e2", 268 Address: "127.0.0.1:10242", 269 Storage: s2, 270 LeaderCallback: func(ctx context.Context) error { 271 <-ctx.Done() 272 return ctx.Err() 273 }, 274 LeaseDuration: leaseDuration, 275 RenewInterval: renewInterval, 276 RenewDeadline: renewDeadline, 277 }) 278 require.NoError(t, err) 279 280 wg.Add(1) 281 go func() { 282 defer wg.Done() 283 err := e2.RunElection(ctx) 284 require.Error(t, err) 285 require.Equal(t, context.Canceled, errors.Cause(err)) 286 }() 287 288 // Make s1 fail and wait for s2 to be elected. 289 s1Err.Store(errors.New("connection error")) 290 require.Eventually(t, func() bool { 291 leader, ok := e2.GetLeader() 292 if ok && leader.ID == "e2" { 293 require.GreaterOrEqual(t, time.Since(s1LastRenew), leaseDuration, 294 "elector 2 shouldn't elect itself as leader when elector 1 lease is not expired") 295 return true 296 } 297 return false 298 }, time.Second*3, time.Millisecond*100, "elector 2 not elected") 299 300 require.False(t, e1.IsLeader()) 301 _, ok := e1.GetLeader() 302 require.False(t, ok) 303 304 cancel() 305 wg.Wait() 306 } 307 308 func TestLeaderCallbackUnexpectedExit(t *testing.T) { 309 t.Parallel() 310 311 s := mock.NewMockStorage(gomock.NewController(t)) 312 313 var recordLock sync.RWMutex 314 record := &election.Record{} 315 316 s.EXPECT().Get(gomock.Any()).AnyTimes(). 317 DoAndReturn(func(ctx context.Context) (*election.Record, error) { 318 recordLock.RLock() 319 defer recordLock.RUnlock() 320 321 return record.Clone(), nil 322 }) 323 324 s.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes(). 325 DoAndReturn(func(ctx context.Context, r *election.Record, _ bool) error { 326 recordLock.Lock() 327 defer recordLock.Unlock() 328 329 if r.Version != record.Version { 330 return errors.ErrElectionRecordConflict.GenWithStackByArgs() 331 } 332 record = r.Clone() 333 record.Version++ 334 return nil 335 }) 336 337 const ( 338 leaseDuration = time.Second * 1 339 renewInterval = time.Millisecond * 100 340 renewDeadline = leaseDuration - renewInterval 341 ) 342 343 ctx, cancel := context.WithCancel(context.Background()) 344 345 var e1CallbackErr atomic.Error 346 347 e1, err := election.NewElector(election.Config{ 348 ID: "e1", 349 Name: "e1", 350 Address: "127.0.0.1:10241", 351 Storage: s, 352 LeaderCallback: func(ctx context.Context) error { 353 ticker := time.NewTicker(time.Millisecond) 354 for { 355 select { 356 case <-ticker.C: 357 if err := e1CallbackErr.Load(); err != nil { 358 return err 359 } 360 case <-ctx.Done(): 361 return ctx.Err() 362 } 363 } 364 }, 365 LeaseDuration: leaseDuration, 366 RenewInterval: renewInterval, 367 RenewDeadline: renewDeadline, 368 }) 369 require.NoError(t, err) 370 371 var wg sync.WaitGroup 372 wg.Add(1) 373 go func() { 374 defer wg.Done() 375 err := e1.RunElection(ctx) 376 require.Error(t, err) 377 require.Equal(t, context.Canceled, errors.Cause(err)) 378 }() 379 380 // Wait for leader to be elected. 381 require.Eventually(t, func() bool { 382 _, ok := e1.GetLeader() 383 return ok 384 }, time.Second, time.Millisecond*100, "leader not elected") 385 386 e2, err := election.NewElector(election.Config{ 387 ID: "e2", 388 Name: "e2", 389 Address: "127.0.0.1:10242", 390 Storage: s, 391 LeaderCallback: func(ctx context.Context) error { 392 <-ctx.Done() 393 return ctx.Err() 394 }, 395 LeaseDuration: leaseDuration, 396 RenewInterval: renewInterval, 397 RenewDeadline: renewDeadline, 398 }) 399 require.NoError(t, err) 400 wg.Add(1) 401 go func() { 402 defer wg.Done() 403 err := e2.RunElection(ctx) 404 require.Error(t, err) 405 require.Equal(t, context.Canceled, errors.Cause(err)) 406 }() 407 408 // Make elector 1 leader callback return error. 409 e1CallbackErr.Store(errors.New("callback error")) 410 411 require.Eventually(t, func() bool { 412 leader, ok := e1.GetLeader() 413 return ok && leader.ID == "e2" 414 }, time.Second*3, time.Millisecond*100, "e2 not elected") 415 416 cancel() 417 wg.Wait() 418 }