github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/etcd_test.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package master
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"os"
    20  	"path/filepath"
    21  	"sort"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/pingcap/check"
    26  	"github.com/pingcap/tiflow/dm/pkg/log"
    27  	"github.com/pingcap/tiflow/dm/pkg/terror"
    28  	"github.com/pingcap/tiflow/dm/pkg/utils"
    29  	"github.com/tikv/pd/pkg/utils/tempurl"
    30  	clientv3 "go.etcd.io/etcd/client/v3"
    31  	"go.etcd.io/etcd/server/v3/embed"
    32  )
    33  
    34  var _ = check.Suite(&testEtcdSuite{})
    35  
    36  type testEtcdSuite struct{}
    37  
    38  func (t *testEtcdSuite) SetUpSuite(c *check.C) {
    39  	// initialized the logger to make genEmbedEtcdConfig working.
    40  	c.Assert(log.InitLogger(&log.Config{}), check.IsNil)
    41  }
    42  
    43  func (t *testEtcdSuite) TestStartEtcdFail(c *check.C) {
    44  	cfgCluster := NewConfig()
    45  	cfgCluster.Name = "dm-master-1"
    46  	cfgCluster.DataDir = c.MkDir()
    47  	cfgCluster.MasterAddr = tempurl.Alloc()[len("http://"):]
    48  	cfgCluster.PeerUrls = tempurl.Alloc()
    49  	c.Assert(cfgCluster.adjust(), check.IsNil)
    50  
    51  	// add another non-existing member for bootstrapping.
    52  	cfgCluster.InitialCluster = fmt.Sprintf("%s=%s,%s=%s",
    53  		cfgCluster.Name, cfgCluster.AdvertisePeerUrls,
    54  		"dm-master-2", tempurl.Alloc())
    55  	c.Assert(cfgCluster.adjust(), check.IsNil)
    56  
    57  	// start an etcd cluster
    58  	cfgClusterEtcd := genEmbedEtcdConfigWithLogger("info")
    59  	cfgClusterEtcd, err := cfgCluster.genEmbedEtcdConfig(cfgClusterEtcd)
    60  	c.Assert(err, check.IsNil)
    61  	e, err := startEtcd(cfgClusterEtcd, nil, nil, 3*time.Second)
    62  	c.Assert(terror.ErrMasterStartEmbedEtcdFail.Equal(err), check.IsTrue)
    63  	c.Assert(e, check.IsNil)
    64  }
    65  
    66  func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) {
    67  	cfgCluster := NewConfig() // used to start an etcd cluster
    68  	cfgCluster.Name = "dm-master-1"
    69  	cfgCluster.DataDir = c.MkDir()
    70  	cfgCluster.MasterAddr = tempurl.Alloc()[len("http://"):]
    71  	cfgCluster.AdvertiseAddr = cfgCluster.MasterAddr
    72  	cfgCluster.PeerUrls = tempurl.Alloc()
    73  	c.Assert(cfgCluster.adjust(), check.IsNil)
    74  	cfgClusterEtcd := genEmbedEtcdConfigWithLogger("info")
    75  	cfgClusterEtcd, err := cfgCluster.genEmbedEtcdConfig(cfgClusterEtcd)
    76  	c.Assert(err, check.IsNil)
    77  
    78  	cfgBefore := t.cloneConfig(cfgCluster) // before `prepareJoinEtcd` applied
    79  	cfgBefore.DataDir = c.MkDir()          // overwrite some config items
    80  	cfgBefore.MasterAddr = tempurl.Alloc()[len("http://"):]
    81  	cfgBefore.AdvertiseAddr = cfgBefore.MasterAddr
    82  	cfgBefore.PeerUrls = tempurl.Alloc()
    83  	cfgBefore.AdvertisePeerUrls = cfgBefore.PeerUrls
    84  	c.Assert(cfgBefore.adjust(), check.IsNil)
    85  
    86  	cfgAfter := t.cloneConfig(cfgBefore) // after `prepareJoinEtcd applied
    87  
    88  	joinCluster := cfgCluster.MasterAddr
    89  	joinFP := filepath.Join(cfgBefore.DataDir, "join")
    90  	memberDP := filepath.Join(cfgBefore.DataDir, "member")
    91  
    92  	// not set `join`, do nothing
    93  	c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil)
    94  	c.Assert(cfgAfter, check.DeepEquals, cfgBefore)
    95  
    96  	// try to join self
    97  	cfgAfter.Join = cfgAfter.MasterAddr
    98  	err = prepareJoinEtcd(cfgAfter)
    99  	c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue)
   100  	c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: join self.*is forbidden.*")
   101  
   102  	// update `join` to a valid item
   103  	cfgBefore.Join = joinCluster
   104  
   105  	// join with persistent data
   106  	c.Assert(os.WriteFile(joinFP, []byte(joinCluster), privateDirMode), check.IsNil)
   107  	cfgAfter = t.cloneConfig(cfgBefore)
   108  	c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil)
   109  	c.Assert(cfgAfter.InitialCluster, check.Equals, joinCluster)
   110  	c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting)
   111  	c.Assert(os.Remove(joinFP), check.IsNil) // remove the persistent data
   112  
   113  	// join with invalid persistent data
   114  	c.Assert(os.Mkdir(joinFP, privateDirMode), check.IsNil) // use directory as invalid persistent data (file)
   115  	cfgAfter = t.cloneConfig(cfgBefore)
   116  	err = prepareJoinEtcd(cfgAfter)
   117  	c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue)
   118  	c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: read persistent join data.*")
   119  	c.Assert(os.Remove(joinFP), check.IsNil)        // remove the persistent data
   120  	c.Assert(cfgAfter, check.DeepEquals, cfgBefore) // not changed
   121  
   122  	// restart with previous data
   123  	c.Assert(os.Mkdir(memberDP, privateDirMode), check.IsNil)
   124  	c.Assert(os.Mkdir(filepath.Join(memberDP, "wal"), privateDirMode), check.IsNil)
   125  	c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil)
   126  	c.Assert(cfgAfter.InitialCluster, check.Equals, "")
   127  	c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting)
   128  	c.Assert(os.RemoveAll(memberDP), check.IsNil) // remove previous data
   129  
   130  	// start an etcd cluster
   131  	e1, err := startEtcd(cfgClusterEtcd, nil, nil, etcdStartTimeout)
   132  	c.Assert(err, check.IsNil)
   133  	defer e1.Close()
   134  
   135  	// same `name`, duplicate
   136  	cfgAfter = t.cloneConfig(cfgBefore)
   137  	err = prepareJoinEtcd(cfgAfter)
   138  	c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue)
   139  	c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: missing data or joining a duplicate member.*")
   140  	c.Assert(cfgAfter, check.DeepEquals, cfgBefore) // not changed
   141  
   142  	// set a different name
   143  	cfgBefore.Name = "dm-master-2"
   144  
   145  	// add member with invalid `advertise-peer-urls`
   146  	cfgAfter = t.cloneConfig(cfgBefore)
   147  	cfgAfter.AdvertisePeerUrls = "invalid-advertise-peer-urls"
   148  	err = prepareJoinEtcd(cfgAfter)
   149  	c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue)
   150  	c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: add member.*")
   151  
   152  	// join with existing cluster
   153  	cfgAfter = t.cloneConfig(cfgBefore)
   154  	c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil)
   155  	c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting)
   156  	obtainClusters := strings.Split(cfgAfter.InitialCluster, ",")
   157  	sort.Strings(obtainClusters)
   158  	expectedClusters := []string{
   159  		cfgCluster.InitialCluster,
   160  		fmt.Sprintf("%s=%s", cfgAfter.Name, cfgAfter.PeerUrls),
   161  	}
   162  	sort.Strings(expectedClusters)
   163  	c.Assert(obtainClusters, check.DeepEquals, expectedClusters)
   164  
   165  	// join data should exist now
   166  	joinData, err := os.ReadFile(joinFP)
   167  	c.Assert(err, check.IsNil)
   168  	c.Assert(string(joinData), check.Equals, cfgAfter.InitialCluster)
   169  
   170  	// prepare join done, but has not start the etcd to complete the join, can not join anymore.
   171  	cfgAfter2 := t.cloneConfig(cfgBefore)
   172  	cfgAfter2.Name = "dm-master-3" // overwrite some items
   173  	cfgAfter2.DataDir = c.MkDir()
   174  	cfgAfter2.MasterAddr = tempurl.Alloc()[len("http://"):]
   175  	cfgAfter2.AdvertiseAddr = cfgAfter2.MasterAddr
   176  	cfgAfter2.PeerUrls = tempurl.Alloc()
   177  	cfgAfter2.AdvertisePeerUrls = cfgAfter2.PeerUrls
   178  	err = prepareJoinEtcd(cfgAfter2)
   179  	c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue)
   180  	c.Assert(err, check.ErrorMatches, ".*context deadline exceeded.*")
   181  
   182  	// start the joining etcd
   183  	cfgAfterEtcd := genEmbedEtcdConfigWithLogger("info")
   184  	cfgAfterEtcd, err = cfgAfter.genEmbedEtcdConfig(cfgAfterEtcd)
   185  	c.Assert(err, check.IsNil)
   186  	e2, err := startEtcd(cfgAfterEtcd, nil, nil, etcdStartTimeout)
   187  	c.Assert(err, check.IsNil)
   188  	defer e2.Close()
   189  
   190  	// try join again
   191  	for i := 0; i < 20; i++ {
   192  		err = prepareJoinEtcd(cfgAfter2)
   193  		if err == nil {
   194  			break
   195  		}
   196  		// for `etcdserver: unhealthy cluster`, try again later
   197  		c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue)
   198  		c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: add member.*: etcdserver: unhealthy cluster.*")
   199  		time.Sleep(500 * time.Millisecond)
   200  	}
   201  	c.Assert(err, check.IsNil)
   202  }
   203  
   204  func (t *testEtcdSuite) cloneConfig(cfg *Config) *Config {
   205  	clone := NewConfig()
   206  	*clone = *cfg
   207  	return clone
   208  }
   209  
   210  func (t *testEtcdSuite) TestIsDirExist(c *check.C) {
   211  	d := "./directory-not-exists"
   212  	c.Assert(isDirExist(d), check.IsFalse)
   213  
   214  	// empty directory
   215  	d = c.MkDir()
   216  	c.Assert(isDirExist(d), check.IsTrue)
   217  
   218  	// data exists in the directory
   219  	for i := 1; i <= 3; i++ {
   220  		fp := filepath.Join(d, fmt.Sprintf("file.%d", i))
   221  		c.Assert(os.WriteFile(fp, nil, privateDirMode), check.IsNil)
   222  		c.Assert(isDirExist(d), check.IsTrue)
   223  		c.Assert(isDirExist(fp), check.IsFalse) // not a directory
   224  	}
   225  }
   226  
   227  func (t *testEtcdSuite) TestEtcdAutoCompaction(c *check.C) {
   228  	cfg := NewConfig()
   229  	c.Assert(cfg.FromContent(SampleConfig), check.IsNil)
   230  
   231  	cfg.DataDir = c.MkDir()
   232  	cfg.MasterAddr = tempurl.Alloc()[len("http://"):]
   233  	cfg.AdvertiseAddr = cfg.MasterAddr
   234  	cfg.AutoCompactionRetention = "1s"
   235  
   236  	ctx, cancel := context.WithCancel(context.Background())
   237  	s := NewServer(cfg)
   238  	c.Assert(s.Start(ctx), check.IsNil)
   239  
   240  	etcdCli, err := clientv3.New(clientv3.Config{
   241  		Endpoints: []string{cfg.MasterAddr},
   242  	})
   243  	c.Assert(err, check.IsNil)
   244  
   245  	for i := 0; i < 100; i++ {
   246  		_, err = etcdCli.Put(ctx, "key", fmt.Sprintf("%03d", i))
   247  		c.Assert(err, check.IsNil)
   248  	}
   249  	time.Sleep(3 * time.Second)
   250  	resp, err := etcdCli.Get(ctx, "key")
   251  	c.Assert(err, check.IsNil)
   252  
   253  	utils.WaitSomething(10, time.Second, func() bool {
   254  		_, err = etcdCli.Get(ctx, "key", clientv3.WithRev(resp.Header.Revision-1))
   255  		return err != nil
   256  	})
   257  	c.Assert(err, check.ErrorMatches, ".*required revision has been compacted.*")
   258  
   259  	cancel()
   260  	s.Close()
   261  }