github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/etcd_test.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package master 15 16 import ( 17 "context" 18 "fmt" 19 "os" 20 "path/filepath" 21 "sort" 22 "strings" 23 "time" 24 25 "github.com/pingcap/check" 26 "github.com/pingcap/tiflow/dm/pkg/log" 27 "github.com/pingcap/tiflow/dm/pkg/terror" 28 "github.com/pingcap/tiflow/dm/pkg/utils" 29 "github.com/tikv/pd/pkg/utils/tempurl" 30 clientv3 "go.etcd.io/etcd/client/v3" 31 "go.etcd.io/etcd/server/v3/embed" 32 ) 33 34 var _ = check.Suite(&testEtcdSuite{}) 35 36 type testEtcdSuite struct{} 37 38 func (t *testEtcdSuite) SetUpSuite(c *check.C) { 39 // initialized the logger to make genEmbedEtcdConfig working. 40 c.Assert(log.InitLogger(&log.Config{}), check.IsNil) 41 } 42 43 func (t *testEtcdSuite) TestStartEtcdFail(c *check.C) { 44 cfgCluster := NewConfig() 45 cfgCluster.Name = "dm-master-1" 46 cfgCluster.DataDir = c.MkDir() 47 cfgCluster.MasterAddr = tempurl.Alloc()[len("http://"):] 48 cfgCluster.PeerUrls = tempurl.Alloc() 49 c.Assert(cfgCluster.adjust(), check.IsNil) 50 51 // add another non-existing member for bootstrapping. 52 cfgCluster.InitialCluster = fmt.Sprintf("%s=%s,%s=%s", 53 cfgCluster.Name, cfgCluster.AdvertisePeerUrls, 54 "dm-master-2", tempurl.Alloc()) 55 c.Assert(cfgCluster.adjust(), check.IsNil) 56 57 // start an etcd cluster 58 cfgClusterEtcd := genEmbedEtcdConfigWithLogger("info") 59 cfgClusterEtcd, err := cfgCluster.genEmbedEtcdConfig(cfgClusterEtcd) 60 c.Assert(err, check.IsNil) 61 e, err := startEtcd(cfgClusterEtcd, nil, nil, 3*time.Second) 62 c.Assert(terror.ErrMasterStartEmbedEtcdFail.Equal(err), check.IsTrue) 63 c.Assert(e, check.IsNil) 64 } 65 66 func (t *testEtcdSuite) TestPrepareJoinEtcd(c *check.C) { 67 cfgCluster := NewConfig() // used to start an etcd cluster 68 cfgCluster.Name = "dm-master-1" 69 cfgCluster.DataDir = c.MkDir() 70 cfgCluster.MasterAddr = tempurl.Alloc()[len("http://"):] 71 cfgCluster.AdvertiseAddr = cfgCluster.MasterAddr 72 cfgCluster.PeerUrls = tempurl.Alloc() 73 c.Assert(cfgCluster.adjust(), check.IsNil) 74 cfgClusterEtcd := genEmbedEtcdConfigWithLogger("info") 75 cfgClusterEtcd, err := cfgCluster.genEmbedEtcdConfig(cfgClusterEtcd) 76 c.Assert(err, check.IsNil) 77 78 cfgBefore := t.cloneConfig(cfgCluster) // before `prepareJoinEtcd` applied 79 cfgBefore.DataDir = c.MkDir() // overwrite some config items 80 cfgBefore.MasterAddr = tempurl.Alloc()[len("http://"):] 81 cfgBefore.AdvertiseAddr = cfgBefore.MasterAddr 82 cfgBefore.PeerUrls = tempurl.Alloc() 83 cfgBefore.AdvertisePeerUrls = cfgBefore.PeerUrls 84 c.Assert(cfgBefore.adjust(), check.IsNil) 85 86 cfgAfter := t.cloneConfig(cfgBefore) // after `prepareJoinEtcd applied 87 88 joinCluster := cfgCluster.MasterAddr 89 joinFP := filepath.Join(cfgBefore.DataDir, "join") 90 memberDP := filepath.Join(cfgBefore.DataDir, "member") 91 92 // not set `join`, do nothing 93 c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) 94 c.Assert(cfgAfter, check.DeepEquals, cfgBefore) 95 96 // try to join self 97 cfgAfter.Join = cfgAfter.MasterAddr 98 err = prepareJoinEtcd(cfgAfter) 99 c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) 100 c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: join self.*is forbidden.*") 101 102 // update `join` to a valid item 103 cfgBefore.Join = joinCluster 104 105 // join with persistent data 106 c.Assert(os.WriteFile(joinFP, []byte(joinCluster), privateDirMode), check.IsNil) 107 cfgAfter = t.cloneConfig(cfgBefore) 108 c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) 109 c.Assert(cfgAfter.InitialCluster, check.Equals, joinCluster) 110 c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting) 111 c.Assert(os.Remove(joinFP), check.IsNil) // remove the persistent data 112 113 // join with invalid persistent data 114 c.Assert(os.Mkdir(joinFP, privateDirMode), check.IsNil) // use directory as invalid persistent data (file) 115 cfgAfter = t.cloneConfig(cfgBefore) 116 err = prepareJoinEtcd(cfgAfter) 117 c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) 118 c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: read persistent join data.*") 119 c.Assert(os.Remove(joinFP), check.IsNil) // remove the persistent data 120 c.Assert(cfgAfter, check.DeepEquals, cfgBefore) // not changed 121 122 // restart with previous data 123 c.Assert(os.Mkdir(memberDP, privateDirMode), check.IsNil) 124 c.Assert(os.Mkdir(filepath.Join(memberDP, "wal"), privateDirMode), check.IsNil) 125 c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) 126 c.Assert(cfgAfter.InitialCluster, check.Equals, "") 127 c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting) 128 c.Assert(os.RemoveAll(memberDP), check.IsNil) // remove previous data 129 130 // start an etcd cluster 131 e1, err := startEtcd(cfgClusterEtcd, nil, nil, etcdStartTimeout) 132 c.Assert(err, check.IsNil) 133 defer e1.Close() 134 135 // same `name`, duplicate 136 cfgAfter = t.cloneConfig(cfgBefore) 137 err = prepareJoinEtcd(cfgAfter) 138 c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) 139 c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: missing data or joining a duplicate member.*") 140 c.Assert(cfgAfter, check.DeepEquals, cfgBefore) // not changed 141 142 // set a different name 143 cfgBefore.Name = "dm-master-2" 144 145 // add member with invalid `advertise-peer-urls` 146 cfgAfter = t.cloneConfig(cfgBefore) 147 cfgAfter.AdvertisePeerUrls = "invalid-advertise-peer-urls" 148 err = prepareJoinEtcd(cfgAfter) 149 c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) 150 c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: add member.*") 151 152 // join with existing cluster 153 cfgAfter = t.cloneConfig(cfgBefore) 154 c.Assert(prepareJoinEtcd(cfgAfter), check.IsNil) 155 c.Assert(cfgAfter.InitialClusterState, check.Equals, embed.ClusterStateFlagExisting) 156 obtainClusters := strings.Split(cfgAfter.InitialCluster, ",") 157 sort.Strings(obtainClusters) 158 expectedClusters := []string{ 159 cfgCluster.InitialCluster, 160 fmt.Sprintf("%s=%s", cfgAfter.Name, cfgAfter.PeerUrls), 161 } 162 sort.Strings(expectedClusters) 163 c.Assert(obtainClusters, check.DeepEquals, expectedClusters) 164 165 // join data should exist now 166 joinData, err := os.ReadFile(joinFP) 167 c.Assert(err, check.IsNil) 168 c.Assert(string(joinData), check.Equals, cfgAfter.InitialCluster) 169 170 // prepare join done, but has not start the etcd to complete the join, can not join anymore. 171 cfgAfter2 := t.cloneConfig(cfgBefore) 172 cfgAfter2.Name = "dm-master-3" // overwrite some items 173 cfgAfter2.DataDir = c.MkDir() 174 cfgAfter2.MasterAddr = tempurl.Alloc()[len("http://"):] 175 cfgAfter2.AdvertiseAddr = cfgAfter2.MasterAddr 176 cfgAfter2.PeerUrls = tempurl.Alloc() 177 cfgAfter2.AdvertisePeerUrls = cfgAfter2.PeerUrls 178 err = prepareJoinEtcd(cfgAfter2) 179 c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) 180 c.Assert(err, check.ErrorMatches, ".*context deadline exceeded.*") 181 182 // start the joining etcd 183 cfgAfterEtcd := genEmbedEtcdConfigWithLogger("info") 184 cfgAfterEtcd, err = cfgAfter.genEmbedEtcdConfig(cfgAfterEtcd) 185 c.Assert(err, check.IsNil) 186 e2, err := startEtcd(cfgAfterEtcd, nil, nil, etcdStartTimeout) 187 c.Assert(err, check.IsNil) 188 defer e2.Close() 189 190 // try join again 191 for i := 0; i < 20; i++ { 192 err = prepareJoinEtcd(cfgAfter2) 193 if err == nil { 194 break 195 } 196 // for `etcdserver: unhealthy cluster`, try again later 197 c.Assert(terror.ErrMasterJoinEmbedEtcdFail.Equal(err), check.IsTrue) 198 c.Assert(err, check.ErrorMatches, ".*fail to join embed etcd: add member.*: etcdserver: unhealthy cluster.*") 199 time.Sleep(500 * time.Millisecond) 200 } 201 c.Assert(err, check.IsNil) 202 } 203 204 func (t *testEtcdSuite) cloneConfig(cfg *Config) *Config { 205 clone := NewConfig() 206 *clone = *cfg 207 return clone 208 } 209 210 func (t *testEtcdSuite) TestIsDirExist(c *check.C) { 211 d := "./directory-not-exists" 212 c.Assert(isDirExist(d), check.IsFalse) 213 214 // empty directory 215 d = c.MkDir() 216 c.Assert(isDirExist(d), check.IsTrue) 217 218 // data exists in the directory 219 for i := 1; i <= 3; i++ { 220 fp := filepath.Join(d, fmt.Sprintf("file.%d", i)) 221 c.Assert(os.WriteFile(fp, nil, privateDirMode), check.IsNil) 222 c.Assert(isDirExist(d), check.IsTrue) 223 c.Assert(isDirExist(fp), check.IsFalse) // not a directory 224 } 225 } 226 227 func (t *testEtcdSuite) TestEtcdAutoCompaction(c *check.C) { 228 cfg := NewConfig() 229 c.Assert(cfg.FromContent(SampleConfig), check.IsNil) 230 231 cfg.DataDir = c.MkDir() 232 cfg.MasterAddr = tempurl.Alloc()[len("http://"):] 233 cfg.AdvertiseAddr = cfg.MasterAddr 234 cfg.AutoCompactionRetention = "1s" 235 236 ctx, cancel := context.WithCancel(context.Background()) 237 s := NewServer(cfg) 238 c.Assert(s.Start(ctx), check.IsNil) 239 240 etcdCli, err := clientv3.New(clientv3.Config{ 241 Endpoints: []string{cfg.MasterAddr}, 242 }) 243 c.Assert(err, check.IsNil) 244 245 for i := 0; i < 100; i++ { 246 _, err = etcdCli.Put(ctx, "key", fmt.Sprintf("%03d", i)) 247 c.Assert(err, check.IsNil) 248 } 249 time.Sleep(3 * time.Second) 250 resp, err := etcdCli.Get(ctx, "key") 251 c.Assert(err, check.IsNil) 252 253 utils.WaitSomething(10, time.Second, func() bool { 254 _, err = etcdCli.Get(ctx, "key", clientv3.WithRev(resp.Header.Revision-1)) 255 return err != nil 256 }) 257 c.Assert(err, check.ErrorMatches, ".*required revision has been compacted.*") 258 259 cancel() 260 s.Close() 261 }