github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/election/elector_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package election_test
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  	"sync"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/golang/mock/gomock"
    27  	"github.com/pingcap/tiflow/pkg/election"
    28  	"github.com/pingcap/tiflow/pkg/election/mock"
    29  	"github.com/pingcap/tiflow/pkg/errors"
    30  	"github.com/stretchr/testify/require"
    31  	"go.uber.org/atomic"
    32  )
    33  
    34  func TestElectorBasic(t *testing.T) {
    35  	t.Parallel()
    36  
    37  	s := mock.NewMockStorage(gomock.NewController(t))
    38  
    39  	var recordLock sync.RWMutex
    40  	record := &election.Record{}
    41  
    42  	s.EXPECT().Get(gomock.Any()).AnyTimes().
    43  		DoAndReturn(func(ctx context.Context) (*election.Record, error) {
    44  			recordLock.RLock()
    45  			defer recordLock.RUnlock()
    46  
    47  			return record.Clone(), nil
    48  		})
    49  
    50  	s.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
    51  		DoAndReturn(func(ctx context.Context, r *election.Record, _ bool) error {
    52  			recordLock.Lock()
    53  			defer recordLock.Unlock()
    54  
    55  			if r.Version != record.Version {
    56  				return errors.ErrElectionRecordConflict.GenWithStackByArgs()
    57  			}
    58  			record = r.Clone()
    59  			record.Version++
    60  			return nil
    61  		})
    62  
    63  	var (
    64  		electors  []election.Elector
    65  		configs   []election.Config
    66  		cancelFns []context.CancelFunc
    67  		wg        sync.WaitGroup
    68  	)
    69  	firstLeaderID := make(chan string, 1)
    70  	const electorCount = 5
    71  	for i := 0; i < electorCount; i++ {
    72  		id := fmt.Sprintf("elector-%d", i)
    73  		config := election.Config{
    74  			ID:      id,
    75  			Name:    id,
    76  			Address: fmt.Sprintf("127.0.0.1:1024%d", i),
    77  			Storage: s,
    78  			LeaderCallback: func(ctx context.Context) error {
    79  				select {
    80  				case firstLeaderID <- id:
    81  				default:
    82  				}
    83  				<-ctx.Done()
    84  				return ctx.Err()
    85  			},
    86  			LeaseDuration: time.Second,
    87  			RenewInterval: time.Millisecond * 100,
    88  			RenewDeadline: time.Millisecond * 900,
    89  		}
    90  		elector, err := election.NewElector(config)
    91  		require.NoError(t, err)
    92  
    93  		ctx, cancel := context.WithCancel(context.Background())
    94  		wg.Add(1)
    95  		go func() {
    96  			defer wg.Done()
    97  			err := elector.RunElection(ctx)
    98  			require.Error(t, err)
    99  			require.Equal(t, context.Canceled, errors.Cause(err))
   100  		}()
   101  
   102  		electors = append(electors, elector)
   103  		configs = append(configs, config)
   104  		cancelFns = append(cancelFns, cancel)
   105  	}
   106  
   107  	// Wait for first leader to be elected.
   108  	var leader *election.Member
   109  	require.Eventually(t, func() bool {
   110  		var ok bool
   111  		leader, ok = electors[0].GetLeader()
   112  		return ok
   113  	}, time.Second, time.Millisecond*100, "leader not elected")
   114  	require.NotNil(t, leader)
   115  	select {
   116  	case leaderID := <-firstLeaderID:
   117  		require.Equal(t, leaderID, leader.ID)
   118  	case <-time.After(time.Second):
   119  		require.Fail(t, "leader callback not called")
   120  	}
   121  
   122  	// Wait for all elector members to join.
   123  	var members []*election.Member
   124  	require.Eventually(t, func() bool {
   125  		members = electors[0].GetMembers()
   126  		return len(members) == electorCount
   127  	}, time.Second, time.Millisecond*100, "not all members joined")
   128  
   129  	sort.Slice(members, func(i, j int) bool {
   130  		return members[i].ID < members[j].ID
   131  	})
   132  	for i, m := range members {
   133  		require.Equal(t, configs[i].ID, m.ID)
   134  		require.Equal(t, configs[i].Name, m.Name)
   135  		require.Equal(t, configs[i].Address, m.Address)
   136  	}
   137  
   138  	// All electors should have the same leader.
   139  	for _, e := range electors {
   140  		leader1, ok := e.GetLeader()
   141  		require.True(t, ok)
   142  		require.Equal(t, leader.ID, leader1.ID)
   143  	}
   144  
   145  	// Test resign leader.
   146  	leaderIdx, err := strconv.Atoi(strings.TrimPrefix(leader.ID, "elector-"))
   147  	require.NoError(t, err)
   148  	leaderElector := electors[leaderIdx]
   149  	require.True(t, leaderElector.IsLeader())
   150  	err = leaderElector.ResignLeader(context.Background(), time.Second)
   151  	require.NoError(t, err)
   152  	require.Eventually(t, func() bool {
   153  		newLeader, ok := leaderElector.GetLeader()
   154  		return ok && newLeader.ID != leader.ID
   155  	}, time.Second, time.Millisecond*100, "leader not changed")
   156  
   157  	// Test cancel elector.
   158  	for i := electorCount - 1; i > 0; i-- {
   159  		cancelFns[i]()
   160  		require.Eventually(t, func() bool {
   161  			_, ok := electors[0].GetLeader()
   162  			if !ok {
   163  				return false
   164  			}
   165  			members := electors[0].GetMembers()
   166  			return len(members) == i
   167  		}, time.Second*3, time.Millisecond*100, "member not removed")
   168  	}
   169  	cancelFns[0]()
   170  	wg.Wait()
   171  }
   172  
   173  func TestElectorRenewFailure(t *testing.T) {
   174  	t.Parallel()
   175  
   176  	var recordLock sync.RWMutex
   177  	record := &election.Record{}
   178  
   179  	getRecord := func(_ context.Context) (*election.Record, error) { //nolint:unparam
   180  		recordLock.RLock()
   181  		defer recordLock.RUnlock()
   182  
   183  		return record.Clone(), nil
   184  	}
   185  
   186  	updateRecord := func(_ context.Context, r *election.Record, _ bool) error {
   187  		recordLock.Lock()
   188  		defer recordLock.Unlock()
   189  
   190  		if r.Version != record.Version {
   191  			return errors.ErrElectionRecordConflict.GenWithStackByArgs()
   192  		}
   193  		record = r.Clone()
   194  		record.Version++
   195  		return nil
   196  	}
   197  
   198  	var (
   199  		s1Err       atomic.Error
   200  		s1LastRenew time.Time
   201  	)
   202  	s1 := mock.NewMockStorage(gomock.NewController(t))
   203  	s1.EXPECT().Get(gomock.Any()).AnyTimes().
   204  		DoAndReturn(func(ctx context.Context) (*election.Record, error) {
   205  			if err := s1Err.Load(); err != nil {
   206  				return nil, err
   207  			}
   208  			return getRecord(ctx)
   209  		})
   210  	s1.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
   211  		DoAndReturn(func(ctx context.Context, r *election.Record, isLeaderChanged bool) error {
   212  			if err := s1Err.Load(); err != nil {
   213  				return err
   214  			}
   215  			if err := updateRecord(ctx, r, isLeaderChanged); err != nil {
   216  				return err
   217  			}
   218  			s1LastRenew = time.Now()
   219  			return nil
   220  		})
   221  
   222  	s2 := mock.NewMockStorage(gomock.NewController(t))
   223  	s2.EXPECT().Get(gomock.Any()).AnyTimes().DoAndReturn(getRecord)
   224  	s2.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().DoAndReturn(updateRecord)
   225  
   226  	const (
   227  		leaseDuration = time.Second * 1
   228  		renewInterval = time.Millisecond * 100
   229  		renewDeadline = leaseDuration - renewInterval
   230  	)
   231  
   232  	ctx, cancel := context.WithCancel(context.Background())
   233  
   234  	e1, err := election.NewElector(election.Config{
   235  		ID:      "e1",
   236  		Name:    "e1",
   237  		Address: "127.0.0.1:10241",
   238  		Storage: s1,
   239  		LeaderCallback: func(ctx context.Context) error {
   240  			<-ctx.Done()
   241  			return ctx.Err()
   242  		},
   243  		LeaseDuration: leaseDuration,
   244  		RenewInterval: renewInterval,
   245  		RenewDeadline: renewDeadline,
   246  	})
   247  	require.NoError(t, err)
   248  
   249  	var wg sync.WaitGroup
   250  	wg.Add(1)
   251  
   252  	go func() {
   253  		defer wg.Done()
   254  		err := e1.RunElection(ctx)
   255  		require.Error(t, err)
   256  		require.Equal(t, context.Canceled, errors.Cause(err))
   257  	}()
   258  
   259  	// Wait for leader to be elected.
   260  	require.Eventually(t, func() bool {
   261  		_, ok := e1.GetLeader()
   262  		return ok
   263  	}, time.Second, time.Millisecond*100, "leader not elected")
   264  
   265  	e2, err := election.NewElector(election.Config{
   266  		ID:      "e2",
   267  		Name:    "e2",
   268  		Address: "127.0.0.1:10242",
   269  		Storage: s2,
   270  		LeaderCallback: func(ctx context.Context) error {
   271  			<-ctx.Done()
   272  			return ctx.Err()
   273  		},
   274  		LeaseDuration: leaseDuration,
   275  		RenewInterval: renewInterval,
   276  		RenewDeadline: renewDeadline,
   277  	})
   278  	require.NoError(t, err)
   279  
   280  	wg.Add(1)
   281  	go func() {
   282  		defer wg.Done()
   283  		err := e2.RunElection(ctx)
   284  		require.Error(t, err)
   285  		require.Equal(t, context.Canceled, errors.Cause(err))
   286  	}()
   287  
   288  	// Make s1 fail and wait for s2 to be elected.
   289  	s1Err.Store(errors.New("connection error"))
   290  	require.Eventually(t, func() bool {
   291  		leader, ok := e2.GetLeader()
   292  		if ok && leader.ID == "e2" {
   293  			require.GreaterOrEqual(t, time.Since(s1LastRenew), leaseDuration,
   294  				"elector 2 shouldn't elect itself as leader when elector 1 lease is not expired")
   295  			return true
   296  		}
   297  		return false
   298  	}, time.Second*3, time.Millisecond*100, "elector 2 not elected")
   299  
   300  	require.False(t, e1.IsLeader())
   301  	_, ok := e1.GetLeader()
   302  	require.False(t, ok)
   303  
   304  	cancel()
   305  	wg.Wait()
   306  }
   307  
   308  func TestLeaderCallbackUnexpectedExit(t *testing.T) {
   309  	t.Parallel()
   310  
   311  	s := mock.NewMockStorage(gomock.NewController(t))
   312  
   313  	var recordLock sync.RWMutex
   314  	record := &election.Record{}
   315  
   316  	s.EXPECT().Get(gomock.Any()).AnyTimes().
   317  		DoAndReturn(func(ctx context.Context) (*election.Record, error) {
   318  			recordLock.RLock()
   319  			defer recordLock.RUnlock()
   320  
   321  			return record.Clone(), nil
   322  		})
   323  
   324  	s.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().
   325  		DoAndReturn(func(ctx context.Context, r *election.Record, _ bool) error {
   326  			recordLock.Lock()
   327  			defer recordLock.Unlock()
   328  
   329  			if r.Version != record.Version {
   330  				return errors.ErrElectionRecordConflict.GenWithStackByArgs()
   331  			}
   332  			record = r.Clone()
   333  			record.Version++
   334  			return nil
   335  		})
   336  
   337  	const (
   338  		leaseDuration = time.Second * 1
   339  		renewInterval = time.Millisecond * 100
   340  		renewDeadline = leaseDuration - renewInterval
   341  	)
   342  
   343  	ctx, cancel := context.WithCancel(context.Background())
   344  
   345  	var e1CallbackErr atomic.Error
   346  
   347  	e1, err := election.NewElector(election.Config{
   348  		ID:      "e1",
   349  		Name:    "e1",
   350  		Address: "127.0.0.1:10241",
   351  		Storage: s,
   352  		LeaderCallback: func(ctx context.Context) error {
   353  			ticker := time.NewTicker(time.Millisecond)
   354  			for {
   355  				select {
   356  				case <-ticker.C:
   357  					if err := e1CallbackErr.Load(); err != nil {
   358  						return err
   359  					}
   360  				case <-ctx.Done():
   361  					return ctx.Err()
   362  				}
   363  			}
   364  		},
   365  		LeaseDuration: leaseDuration,
   366  		RenewInterval: renewInterval,
   367  		RenewDeadline: renewDeadline,
   368  	})
   369  	require.NoError(t, err)
   370  
   371  	var wg sync.WaitGroup
   372  	wg.Add(1)
   373  	go func() {
   374  		defer wg.Done()
   375  		err := e1.RunElection(ctx)
   376  		require.Error(t, err)
   377  		require.Equal(t, context.Canceled, errors.Cause(err))
   378  	}()
   379  
   380  	// Wait for leader to be elected.
   381  	require.Eventually(t, func() bool {
   382  		_, ok := e1.GetLeader()
   383  		return ok
   384  	}, time.Second, time.Millisecond*100, "leader not elected")
   385  
   386  	e2, err := election.NewElector(election.Config{
   387  		ID:      "e2",
   388  		Name:    "e2",
   389  		Address: "127.0.0.1:10242",
   390  		Storage: s,
   391  		LeaderCallback: func(ctx context.Context) error {
   392  			<-ctx.Done()
   393  			return ctx.Err()
   394  		},
   395  		LeaseDuration: leaseDuration,
   396  		RenewInterval: renewInterval,
   397  		RenewDeadline: renewDeadline,
   398  	})
   399  	require.NoError(t, err)
   400  	wg.Add(1)
   401  	go func() {
   402  		defer wg.Done()
   403  		err := e2.RunElection(ctx)
   404  		require.Error(t, err)
   405  		require.Equal(t, context.Canceled, errors.Cause(err))
   406  	}()
   407  
   408  	// Make elector 1 leader callback return error.
   409  	e1CallbackErr.Store(errors.New("callback error"))
   410  
   411  	require.Eventually(t, func() bool {
   412  		leader, ok := e1.GetLeader()
   413  		return ok && leader.ID == "e2"
   414  	}, time.Second*3, time.Millisecond*100, "e2 not elected")
   415  
   416  	cancel()
   417  	wg.Wait()
   418  }