github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/election_test.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package master
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"strings"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/pingcap/failpoint"
    24  	"github.com/pingcap/tiflow/dm/pkg/etcdutil"
    25  	"github.com/pingcap/tiflow/dm/pkg/log"
    26  	"github.com/stretchr/testify/require"
    27  	"github.com/tikv/pd/pkg/utils/tempurl"
    28  )
    29  
    30  func TestFailToStartLeader(t *testing.T) {
    31  	err := log.InitLogger(&log.Config{Level: "info"})
    32  	require.NoError(t, err)
    33  	ctx, cancel := context.WithCancel(context.Background())
    34  
    35  	var s1, s2 *Server
    36  	defer func() {
    37  		cancel()
    38  		if s1 != nil {
    39  			s1.Close()
    40  		}
    41  		if s2 != nil {
    42  			s2.Close()
    43  		}
    44  	}()
    45  
    46  	// create a new cluster
    47  	cfg1 := NewConfig()
    48  	require.NoError(t, cfg1.FromContent(SampleConfig))
    49  	cfg1.Name = "dm-master-1"
    50  	cfg1.DataDir = t.TempDir()
    51  	cfg1.MasterAddr = tempurl.Alloc()[len("http://"):]
    52  	cfg1.AdvertiseAddr = cfg1.MasterAddr
    53  	cfg1.PeerUrls = tempurl.Alloc()
    54  	cfg1.AdvertisePeerUrls = cfg1.PeerUrls
    55  	cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls)
    56  
    57  	s1 = NewServer(cfg1)
    58  	require.NoError(t, s1.Start(ctx))
    59  	// wait the first one become the leader
    60  	require.Eventually(t, func() bool {
    61  		return s1.election.IsLeader() && s1.scheduler.Started()
    62  	}, 3*time.Second, 100*time.Millisecond)
    63  
    64  	// join to an existing cluster
    65  	cfg2 := NewConfig()
    66  	require.NoError(t, cfg2.FromContent(SampleConfig))
    67  	cfg2.Name = "dm-master-2"
    68  	cfg2.DataDir = t.TempDir()
    69  	cfg2.MasterAddr = tempurl.Alloc()[len("http://"):]
    70  	cfg2.AdvertiseAddr = cfg2.MasterAddr
    71  	cfg2.PeerUrls = tempurl.Alloc()
    72  	cfg2.AdvertisePeerUrls = cfg2.PeerUrls
    73  	cfg2.Join = cfg1.MasterAddr // join to an existing cluster
    74  
    75  	// imitate fail to start scheduler/pessimism/optimism
    76  	require.NoError(t, failpoint.Enable("github.com/pingcap/tiflow/dm/master/FailToStartLeader", `return("dm-master-2")`))
    77  	//nolint:errcheck
    78  	defer failpoint.Disable("github.com/pingcap/tiflow/dm/master/FailToStartLeader")
    79  
    80  	s2 = NewServer(cfg2)
    81  	require.NoError(t, s2.Start(ctx))
    82  	// wait the second master ready
    83  	time.Sleep(time.Second)
    84  	require.False(t, s2.election.IsLeader())
    85  
    86  	client, err := etcdutil.CreateClient(strings.Split(cfg1.AdvertisePeerUrls, ","), nil)
    87  	require.NoError(t, err)
    88  	defer client.Close()
    89  
    90  	// s1 is still the leader
    91  	_, leaderID, _, err := s2.election.LeaderInfo(ctx)
    92  	require.NoError(t, err)
    93  	require.Equal(t, cfg1.Name, leaderID)
    94  	require.Greater(t, s1.ClusterID(), uint64(0))
    95  	require.Equal(t, uint64(0), s2.ClusterID())
    96  
    97  	s1.election.Resign()
    98  	time.Sleep(1 * time.Second)
    99  
   100  	// s1 is still the leader
   101  	require.Eventually(t, func() bool {
   102  		_, leaderID, _, err = s2.election.LeaderInfo(ctx)
   103  		require.NoError(t, err)
   104  		return leaderID == cfg1.Name
   105  	}, 3*time.Second, 100*time.Millisecond)
   106  	clusterID := s1.ClusterID()
   107  
   108  	require.NoError(t, failpoint.Disable("github.com/pingcap/tiflow/dm/master/FailToStartLeader"))
   109  	s1.election.Resign()
   110  	time.Sleep(1 * time.Second)
   111  
   112  	// s2 now become leader
   113  	_, leaderID, _, err = s2.election.LeaderInfo(ctx)
   114  	require.NoError(t, err)
   115  	require.Equal(t, cfg2.Name, leaderID)
   116  	require.Equal(t, clusterID, s2.ClusterID())
   117  }