github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/bootstrap_test.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package master 15 16 import ( 17 "context" 18 "os" 19 "path/filepath" 20 "strconv" 21 "strings" 22 "testing" 23 "time" 24 25 "github.com/golang/mock/gomock" 26 . "github.com/pingcap/check" 27 "github.com/pingcap/tiflow/dm/config" 28 "github.com/pingcap/tiflow/dm/master/workerrpc" 29 "github.com/pingcap/tiflow/dm/pb" 30 "github.com/pingcap/tiflow/dm/pbmock" 31 tcontext "github.com/pingcap/tiflow/dm/pkg/context" 32 "github.com/pingcap/tiflow/dm/pkg/ha" 33 "github.com/pingcap/tiflow/dm/pkg/log" 34 "github.com/pingcap/tiflow/dm/pkg/terror" 35 filter "github.com/pingcap/tiflow/pkg/binlog-filter" 36 clientv3 "go.etcd.io/etcd/client/v3" 37 "go.etcd.io/etcd/tests/v3/integration" 38 ) 39 40 type testMaster struct { 41 workerClients map[string]workerrpc.Client 42 saveMaxRetryNum int 43 electionTTLBackup int 44 testT *testing.T 45 46 testEtcdCluster *integration.ClusterV3 47 etcdTestCli *clientv3.Client 48 } 49 50 var testSuite = SerialSuites(&testMaster{}) 51 52 func TestMaster(t *testing.T) { 53 err := log.InitLogger(&log.Config{}) 54 if err != nil { 55 t.Fatal(err) 56 } 57 pwd, err = os.Getwd() 58 if err != nil { 59 t.Fatal(err) 60 } 61 integration.BeforeTestExternal(t) 62 // inject *testing.T to testMaster 63 s := testSuite.(*testMaster) 64 s.testT = t 65 66 TestingT(t) 67 } 68 69 func (t *testMaster) SetUpSuite(c *C) { 70 c.Assert(log.InitLogger(&log.Config{}), IsNil) 71 t.workerClients = make(map[string]workerrpc.Client) 72 t.saveMaxRetryNum = maxRetryNum 73 t.electionTTLBackup = electionTTL 74 electionTTL = 3 75 maxRetryNum = 2 76 checkAndAdjustSourceConfigForDMCtlFunc = checkAndNoAdjustSourceConfigMock 77 } 78 79 func (t *testMaster) TearDownSuite(c *C) { 80 maxRetryNum = t.saveMaxRetryNum 81 electionTTL = t.electionTTLBackup 82 checkAndAdjustSourceConfigForDMCtlFunc = checkAndAdjustSourceConfig 83 } 84 85 func (t *testMaster) SetUpTest(c *C) { 86 t.testEtcdCluster = integration.NewClusterV3(t.testT, &integration.ClusterConfig{Size: 1}) 87 t.etcdTestCli = t.testEtcdCluster.RandClient() 88 t.clearEtcdEnv(c) 89 } 90 91 func (t *testMaster) TearDownTest(c *C) { 92 t.clearEtcdEnv(c) 93 t.testEtcdCluster.Terminate(t.testT) 94 } 95 96 func (t *testMaster) clearEtcdEnv(c *C) { 97 c.Assert(ha.ClearTestInfoOperation(t.etcdTestCli), IsNil) 98 } 99 100 func testDefaultMasterServerWithC(c *C) *Server { 101 cfg := NewConfig() 102 err := cfg.FromContent(SampleConfig) 103 c.Assert(err, IsNil) 104 cfg.DataDir = c.MkDir() 105 server := NewServer(cfg) 106 server.leader.Store(oneselfLeader) 107 go server.ap.Start(context.Background()) 108 109 return server 110 } 111 112 func (t *testMaster) TestCollectSourceConfigFilesV1Import(c *C) { 113 s := testDefaultMasterServerWithC(c) 114 defer s.Close() 115 s.cfg.V1SourcesPath = c.MkDir() 116 117 ctx, cancel := context.WithCancel(context.Background()) 118 defer cancel() 119 120 tctx := tcontext.NewContext(ctx, log.L()) 121 122 // no source file exist. 123 cfgs, err := s.collectSourceConfigFilesV1Import(tctx) 124 c.Assert(err, IsNil) 125 c.Assert(cfgs, HasLen, 0) 126 127 host := os.Getenv("MYSQL_HOST") 128 if host == "" { 129 host = "127.0.0.1" 130 } 131 port, _ := strconv.Atoi(os.Getenv("MYSQL_PORT")) 132 if port == 0 { 133 port = 3306 134 } 135 user := os.Getenv("MYSQL_USER") 136 if user == "" { 137 user = "root" 138 } 139 password := os.Getenv("MYSQL_PSWD") 140 141 cfg1, err := config.SourceCfgFromYaml(config.SampleSourceConfig) 142 c.Assert(err, IsNil) 143 // fix empty map after marshal/unmarshal becomes nil 144 cfg1.From.Adjust() 145 cfg1.Tracer = map[string]interface{}{} 146 cfg1.Filters = []*filter.BinlogEventRule{} 147 cfg1.From.Host = host 148 cfg1.From.Port = port 149 cfg1.From.User = user 150 cfg1.From.Password = password 151 cfg1.RelayDir = "relay-dir" 152 c.Assert(checkAndAdjustSourceConfigForDMCtlFunc(ctx, cfg1), IsNil) // adjust source config. 153 cfg2 := cfg1.Clone() 154 cfg2.SourceID = "mysql-replica-02" 155 156 // write into source files. 157 data1, err := cfg1.Yaml() 158 c.Assert(err, IsNil) 159 c.Assert(os.WriteFile(filepath.Join(s.cfg.V1SourcesPath, "source1.yaml"), []byte(data1), 0o644), IsNil) 160 data2, err := cfg2.Yaml() 161 c.Assert(err, IsNil) 162 c.Assert(os.WriteFile(filepath.Join(s.cfg.V1SourcesPath, "source2.yaml"), []byte(data2), 0o644), IsNil) 163 164 // collect again, two configs exist. 165 cfgs, err = s.collectSourceConfigFilesV1Import(tctx) 166 c.Assert(err, IsNil) 167 for _, cfg := range cfgs { 168 cfg.From.Session = nil 169 } 170 c.Assert(cfgs, HasLen, 2) 171 c.Assert(cfgs[cfg1.SourceID], DeepEquals, cfg1) 172 c.Assert(cfgs[cfg2.SourceID], DeepEquals, cfg2) 173 174 // put a invalid source file. 175 c.Assert(os.WriteFile(filepath.Join(s.cfg.V1SourcesPath, "invalid.yaml"), []byte("invalid-source-data"), 0o644), IsNil) 176 cfgs, err = s.collectSourceConfigFilesV1Import(tctx) 177 c.Assert(terror.ErrConfigYamlTransform.Equal(err), IsTrue) 178 c.Assert(cfgs, HasLen, 0) 179 } 180 181 func (t *testMaster) TestWaitWorkersReadyV1Import(c *C) { 182 oldWaitWorkerV1Timeout := waitWorkerV1Timeout 183 defer func() { 184 waitWorkerV1Timeout = oldWaitWorkerV1Timeout 185 }() 186 waitWorkerV1Timeout = 5 * time.Second 187 188 ctx, cancel := context.WithCancel(context.Background()) 189 defer cancel() 190 191 tctx := tcontext.NewContext(ctx, log.L()) 192 193 s := testDefaultMasterServerWithC(c) 194 defer s.Close() 195 s.cfg.V1SourcesPath = c.MkDir() 196 c.Assert(s.scheduler.Start(ctx, t.etcdTestCli), IsNil) 197 198 cfg1, err := config.SourceCfgFromYaml(config.SampleSourceConfig) 199 c.Assert(err, IsNil) 200 cfg2 := cfg1.Clone() 201 cfg2.SourceID = "mysql-replica-02" 202 cfgs := map[string]*config.SourceConfig{ 203 cfg1.SourceID: cfg1, 204 cfg2.SourceID: cfg2, 205 } 206 207 // no worker registered, timeout. 208 err = s.waitWorkersReadyV1Import(tctx, cfgs) 209 c.Assert(err, ErrorMatches, ".*wait for DM-worker instances timeout.*") 210 211 // register one worker. 212 req1 := &pb.RegisterWorkerRequest{ 213 Name: "worker-1", 214 Address: "127.0.0.1:8262", 215 } 216 resp1, err := s.RegisterWorker(ctx, req1) 217 c.Assert(err, IsNil) 218 c.Assert(resp1.Result, IsTrue) 219 220 // still timeout because no enough workers. 221 err = s.waitWorkersReadyV1Import(tctx, cfgs) 222 c.Assert(err, ErrorMatches, ".*wait for DM-worker instances timeout.*") 223 224 // register another worker. 225 go func() { 226 time.Sleep(1500 * time.Millisecond) 227 req2 := &pb.RegisterWorkerRequest{ 228 Name: "worker-2", 229 Address: "127.0.0.1:8263", 230 } 231 resp2, err2 := s.RegisterWorker(ctx, req2) 232 c.Assert(err2, IsNil) 233 c.Assert(resp2.Result, IsTrue) 234 }() 235 236 err = s.waitWorkersReadyV1Import(tctx, cfgs) 237 c.Assert(err, IsNil) 238 } 239 240 func (t *testMaster) TestSubtaskCfgsStagesV1Import(c *C) { 241 var ( 242 worker1Name = "worker-1" 243 worker1Addr = "127.0.0.1:8262" 244 worker2Name = "worker-2" 245 worker2Addr = "127.0.0.1:8263" 246 taskName1 = "task-1" 247 taskName2 = "task-2" 248 sourceID1 = "mysql-replica-01" 249 sourceID2 = "mysql-replica-02" 250 ) 251 252 cfg11 := config.NewSubTaskConfig() 253 c.Assert(cfg11.Decode(config.SampleSubtaskConfig, true), IsNil) 254 cfg11.Dir = "./dump_data" 255 cfg11.ChunkFilesize = "64" 256 cfg11.Name = taskName1 257 cfg11.SourceID = sourceID1 258 c.Assert(cfg11.Adjust(true), IsNil) // adjust again after manually modified some items. 259 data11, err := cfg11.Toml() 260 c.Assert(err, IsNil) 261 data11 = strings.ReplaceAll(data11, `chunk-filesize = "64"`, `chunk-filesize = 64`) // different type between v1.0.x and v2.0.x. 262 263 cfg12, err := cfg11.Clone() 264 c.Assert(err, IsNil) 265 cfg12.SourceID = sourceID2 266 data12, err := cfg12.Toml() 267 c.Assert(err, IsNil) 268 data12 = strings.ReplaceAll(data12, `chunk-filesize = "64"`, `chunk-filesize = 64`) 269 270 cfg21, err := cfg11.Clone() 271 c.Assert(err, IsNil) 272 cfg21.Dir = "./dump_data" 273 cfg21.Name = taskName2 274 c.Assert(cfg21.Adjust(true), IsNil) 275 data21, err := cfg21.Toml() 276 c.Assert(err, IsNil) 277 data21 = strings.ReplaceAll(data21, `chunk-filesize = "64"`, `chunk-filesize = 64`) 278 279 cfg22, err := cfg21.Clone() 280 c.Assert(err, IsNil) 281 cfg22.SourceID = sourceID2 282 data22, err := cfg22.Toml() 283 c.Assert(err, IsNil) 284 data22 = strings.ReplaceAll(data22, `chunk-filesize = "64"`, `chunk-filesize = 64`) 285 286 ctx, cancel := context.WithCancel(context.Background()) 287 defer cancel() 288 tctx := tcontext.NewContext(ctx, log.L()) 289 290 s := testDefaultMasterServerWithC(c) 291 defer s.Close() 292 s.cfg.V1SourcesPath = c.MkDir() 293 c.Assert(s.scheduler.Start(ctx, t.etcdTestCli), IsNil) 294 295 // no workers exist, no config and status need to get. 296 cfgs, stages, err := s.getSubtaskCfgsStagesV1Import(tctx) 297 c.Assert(err, IsNil) 298 c.Assert(cfgs, HasLen, 0) 299 c.Assert(stages, HasLen, 0) 300 301 ctrl := gomock.NewController(c) 302 defer ctrl.Finish() 303 mockWCli1 := pbmock.NewMockWorkerClient(ctrl) 304 mockWCli2 := pbmock.NewMockWorkerClient(ctrl) 305 c.Assert(s.scheduler.AddWorker(worker1Name, worker1Addr), IsNil) 306 c.Assert(s.scheduler.AddWorker(worker2Name, worker2Addr), IsNil) 307 s.scheduler.SetWorkerClientForTest(worker1Name, newMockRPCClient(mockWCli1)) 308 s.scheduler.SetWorkerClientForTest(worker2Name, newMockRPCClient(mockWCli2)) 309 310 mockWCli1.EXPECT().OperateV1Meta( 311 gomock.Any(), 312 &pb.OperateV1MetaRequest{ 313 Op: pb.V1MetaOp_GetV1Meta, 314 }, 315 ).Return(&pb.OperateV1MetaResponse{ 316 Result: true, 317 Meta: map[string]*pb.V1SubTaskMeta{ 318 taskName1: { 319 Op: pb.TaskOp_Start, 320 Stage: pb.Stage_Running, 321 Name: taskName1, 322 Task: []byte(data11), 323 }, 324 taskName2: { 325 Op: pb.TaskOp_Pause, 326 Stage: pb.Stage_Paused, 327 Name: taskName2, 328 Task: []byte(data21), 329 }, 330 }, 331 }, nil) 332 333 mockWCli2.EXPECT().OperateV1Meta( 334 gomock.Any(), 335 &pb.OperateV1MetaRequest{ 336 Op: pb.V1MetaOp_GetV1Meta, 337 }, 338 ).Return(&pb.OperateV1MetaResponse{ 339 Result: true, 340 Meta: map[string]*pb.V1SubTaskMeta{ 341 taskName1: { 342 Op: pb.TaskOp_Resume, 343 Stage: pb.Stage_Running, 344 Name: taskName1, 345 Task: []byte(data12), 346 }, 347 taskName2: { 348 Op: pb.TaskOp_Start, 349 Stage: pb.Stage_Running, 350 Name: taskName2, 351 Task: []byte(data22), 352 }, 353 }, 354 }, nil) 355 356 // all workers return valid config and stage. 357 cfgs, stages, err = s.getSubtaskCfgsStagesV1Import(tctx) 358 c.Assert(err, IsNil) 359 c.Assert(cfgs, HasLen, 2) 360 c.Assert(stages, HasLen, 2) 361 c.Assert(cfgs[taskName1], HasLen, 2) 362 c.Assert(cfgs[taskName2], HasLen, 2) 363 c.Assert(cfgs[taskName1][sourceID1], DeepEquals, *cfg11) 364 c.Assert(cfgs[taskName1][sourceID2], DeepEquals, *cfg12) 365 c.Assert(cfgs[taskName2][sourceID1], DeepEquals, *cfg21) 366 c.Assert(cfgs[taskName2][sourceID2], DeepEquals, *cfg22) 367 c.Assert(stages[taskName1], HasLen, 2) 368 c.Assert(stages[taskName2], HasLen, 2) 369 c.Assert(stages[taskName1][sourceID1], Equals, pb.Stage_Running) 370 c.Assert(stages[taskName1][sourceID2], Equals, pb.Stage_Running) 371 c.Assert(stages[taskName2][sourceID1], Equals, pb.Stage_Paused) 372 c.Assert(stages[taskName2][sourceID2], Equals, pb.Stage_Running) 373 374 // one of workers return invalid config. 375 mockWCli1.EXPECT().OperateV1Meta( 376 gomock.Any(), 377 &pb.OperateV1MetaRequest{ 378 Op: pb.V1MetaOp_GetV1Meta, 379 }, 380 ).Return(&pb.OperateV1MetaResponse{ 381 Result: true, 382 Meta: map[string]*pb.V1SubTaskMeta{ 383 taskName1: { 384 Op: pb.TaskOp_Start, 385 Stage: pb.Stage_Running, 386 Name: taskName1, 387 Task: []byte(data11), 388 }, 389 taskName2: { 390 Op: pb.TaskOp_Pause, 391 Stage: pb.Stage_Paused, 392 Name: taskName2, 393 Task: []byte(data21), 394 }, 395 }, 396 }, nil) 397 mockWCli2.EXPECT().OperateV1Meta( 398 gomock.Any(), 399 &pb.OperateV1MetaRequest{ 400 Op: pb.V1MetaOp_GetV1Meta, 401 }, 402 ).Return(&pb.OperateV1MetaResponse{ 403 Result: true, 404 Meta: map[string]*pb.V1SubTaskMeta{ 405 taskName1: { 406 Op: pb.TaskOp_Resume, 407 Stage: pb.Stage_Running, 408 Name: taskName1, 409 Task: []byte("invalid subtask data"), 410 }, 411 taskName2: { 412 Op: pb.TaskOp_Start, 413 Stage: pb.Stage_Running, 414 Name: taskName2, 415 Task: []byte(data22), 416 }, 417 }, 418 }, nil) 419 cfgs, stages, err = s.getSubtaskCfgsStagesV1Import(tctx) 420 c.Assert(err, ErrorMatches, ".*fail to get subtask config and stage.*") 421 c.Assert(cfgs, HasLen, 0) 422 c.Assert(stages, HasLen, 0) 423 } 424 425 func checkAndNoAdjustSourceConfigMock(ctx context.Context, cfg *config.SourceConfig) error { 426 if _, err := cfg.Yaml(); err != nil { 427 return err 428 } 429 return cfg.Verify() 430 }