github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/worker/source_worker_test.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package worker 15 16 import ( 17 "context" 18 "fmt" 19 "sync" 20 "sync/atomic" 21 "testing" 22 "time" 23 24 "github.com/DATA-DOG/go-sqlmock" 25 . "github.com/pingcap/check" 26 "github.com/pingcap/errors" 27 "github.com/pingcap/failpoint" 28 "github.com/pingcap/tiflow/dm/config" 29 "github.com/pingcap/tiflow/dm/pb" 30 "github.com/pingcap/tiflow/dm/pkg/conn" 31 "github.com/pingcap/tiflow/dm/pkg/ha" 32 "github.com/pingcap/tiflow/dm/pkg/log" 33 "github.com/pingcap/tiflow/dm/pkg/terror" 34 "github.com/pingcap/tiflow/dm/pkg/utils" 35 "github.com/pingcap/tiflow/dm/relay" 36 "github.com/pingcap/tiflow/dm/syncer" 37 "github.com/pingcap/tiflow/dm/unit" 38 "github.com/stretchr/testify/require" 39 "github.com/tikv/pd/pkg/utils/tempurl" 40 clientv3 "go.etcd.io/etcd/client/v3" 41 ) 42 43 var emptyWorkerStatusInfoJSONLength = 25 44 45 func mockShowMasterStatus(mockDB sqlmock.Sqlmock) { 46 rows := mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB", "Executed_Gtid_Set"}).AddRow( 47 "mysql-bin.000009", 11232, nil, nil, "074be7f4-f0f1-11ea-95bd-0242ac120002:1-699", 48 ) 49 mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows) 50 } 51 52 func mockShowMasterStatusNoRows(mockDB sqlmock.Sqlmock) { 53 rows := mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB", "Executed_Gtid_Set"}) 54 mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows) 55 } 56 57 type testServer2 struct{} 58 59 var _ = Suite(&testServer2{}) 60 61 func (t *testServer2) SetUpSuite(c *C) { 62 err := log.InitLogger(&log.Config{}) 63 c.Assert(err, IsNil) 64 65 getMinLocForSubTaskFunc = getFakeLocForSubTask 66 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD", `return(true)`), IsNil) 67 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/worker/SkipRefreshFromETCDInUT", `return()`), IsNil) 68 } 69 70 func (t *testServer2) TearDownSuite(c *C) { 71 getMinLocForSubTaskFunc = getMinLocForSubTask 72 c.Assert(failpoint.Disable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD"), IsNil) 73 c.Assert(failpoint.Disable("github.com/pingcap/tiflow/dm/worker/SkipRefreshFromETCDInUT"), IsNil) 74 } 75 76 func (t *testServer2) TestTaskAutoResume(c *C) { 77 var ( 78 taskName = "sub-task-name" 79 port = 8263 80 ) 81 hostName := "127.0.0.1:18261" 82 etcdDir := c.MkDir() 83 ETCD, err := createMockETCD(etcdDir, "http://"+hostName) 84 c.Assert(err, IsNil) 85 defer ETCD.Close() 86 87 cfg := NewConfig() 88 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 89 cfg.Join = hostName 90 sourceConfig := loadSourceConfigWithoutPassword(c) 91 sourceConfig.Checker.CheckEnable = true 92 sourceConfig.Checker.CheckInterval = config.Duration{Duration: 40 * time.Millisecond} 93 sourceConfig.Checker.BackoffMin = config.Duration{Duration: 20 * time.Millisecond} 94 sourceConfig.Checker.BackoffMax = config.Duration{Duration: 1 * time.Second} 95 96 cfg.WorkerAddr = fmt.Sprintf(":%d", port) 97 98 dir := c.MkDir() 99 sourceConfig.RelayDir = dir 100 sourceConfig.MetaDir = dir 101 sourceConfig.EnableRelay = true 102 103 NewRelayHolder = NewDummyRelayHolder 104 defer func() { 105 NewRelayHolder = NewRealRelayHolder 106 }() 107 108 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/dumpling/dumpUnitProcessForever", `return()`), IsNil) 109 //nolint:errcheck 110 defer failpoint.Disable("github.com/pingcap/tiflow/dm/dumpling/dumpUnitProcessForever") 111 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/worker/mockCreateUnitsDumpOnly", `return(true)`), IsNil) 112 //nolint:errcheck 113 defer failpoint.Disable("github.com/pingcap/tiflow/dm/worker/mockCreateUnitsDumpOnly") 114 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/loader/ignoreLoadCheckpointErr", `return()`), IsNil) 115 //nolint:errcheck 116 defer failpoint.Disable("github.com/pingcap/tiflow/dm/loader/ignoreLoadCheckpointErr") 117 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/dumpling/dumpUnitProcessWithError", `return("test auto resume inject error")`), IsNil) 118 //nolint:errcheck 119 defer failpoint.Disable("github.com/pingcap/tiflow/dm/dumpling/dumpUnitProcessWithError") 120 121 s := NewServer(cfg) 122 defer s.Close() 123 go func() { 124 c.Assert(s.Start(), IsNil) 125 }() 126 c.Assert(utils.WaitSomething(10, 100*time.Millisecond, func() bool { 127 if s.closed.Load() { 128 return false 129 } 130 w, err2 := s.getOrStartWorker(sourceConfig, true) 131 c.Assert(err2, IsNil) 132 // we set sourceConfig.EnableRelay = true above 133 c.Assert(w.EnableRelay(false), IsNil) 134 c.Assert(w.EnableHandleSubtasks(), IsNil) 135 return true 136 }), IsTrue) 137 // start task 138 var subtaskCfg config.SubTaskConfig 139 c.Assert(subtaskCfg.Decode(config.SampleSubtaskConfig, true), IsNil) 140 c.Assert(err, IsNil) 141 subtaskCfg.Mode = "full" 142 subtaskCfg.Timezone = "UTC" 143 c.Assert(s.getSourceWorker(true).StartSubTask(&subtaskCfg, pb.Stage_Running, pb.Stage_Stopped, true), IsNil) 144 145 // check task in paused state 146 c.Assert(utils.WaitSomething(100, 100*time.Millisecond, func() bool { 147 subtaskStatus, _, _ := s.getSourceWorker(true).QueryStatus(context.Background(), taskName) 148 for _, st := range subtaskStatus { 149 if st.Name == taskName && st.Stage == pb.Stage_Paused { 150 return true 151 } 152 } 153 return false 154 }), IsTrue) 155 //nolint:errcheck 156 failpoint.Disable("github.com/pingcap/tiflow/dm/dumpling/dumpUnitProcessWithError") 157 158 rtsc, ok := s.getSourceWorker(true).taskStatusChecker.(*realTaskStatusChecker) 159 c.Assert(ok, IsTrue) 160 defer func() { 161 // close multiple time 162 rtsc.Close() 163 rtsc.Close() 164 }() 165 166 // check task will be auto resumed 167 c.Assert(utils.WaitSomething(10, 100*time.Millisecond, func() bool { 168 sts, _, _ := s.getSourceWorker(true).QueryStatus(context.Background(), taskName) 169 for _, st := range sts { 170 if st.Name == taskName && st.Stage == pb.Stage_Running { 171 return true 172 } 173 } 174 c.Log(sts) 175 return false 176 }), IsTrue) 177 } 178 179 type testWorkerFunctionalities struct { 180 createUnitCount int32 181 expectedCreateUnitCount int32 182 } 183 184 var _ = Suite(&testWorkerFunctionalities{}) 185 186 func (t *testWorkerFunctionalities) SetUpSuite(c *C) { 187 NewRelayHolder = NewDummyRelayHolder 188 NewSubTask = NewRealSubTask 189 createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client, worker string, relay relay.Process) []unit.Unit { 190 atomic.AddInt32(&t.createUnitCount, 1) 191 mockDumper := NewMockUnit(pb.UnitType_Dump) 192 mockLoader := NewMockUnit(pb.UnitType_Load) 193 mockSync := NewMockUnit(pb.UnitType_Sync) 194 return []unit.Unit{mockDumper, mockLoader, mockSync} 195 } 196 getMinLocForSubTaskFunc = getFakeLocForSubTask 197 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD", `return(true)`), IsNil) 198 } 199 200 func (t *testWorkerFunctionalities) TearDownSuite(c *C) { 201 NewRelayHolder = NewRealRelayHolder 202 NewSubTask = NewRealSubTask 203 createUnits = createRealUnits 204 getMinLocForSubTaskFunc = getMinLocForSubTask 205 c.Assert(failpoint.Disable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD"), IsNil) 206 } 207 208 func (t *testWorkerFunctionalities) TestWorkerFunctionalities(c *C) { 209 var ( 210 masterAddr = tempurl.Alloc()[len("http://"):] 211 keepAliveTTL = int64(1) 212 ) 213 etcdDir := c.MkDir() 214 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 215 c.Assert(err, IsNil) 216 defer ETCD.Close() 217 cfg := NewConfig() 218 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 219 cfg.Join = masterAddr 220 cfg.KeepAliveTTL = keepAliveTTL 221 cfg.RelayKeepAliveTTL = keepAliveTTL 222 223 etcdCli, err := clientv3.New(clientv3.Config{ 224 Endpoints: GetJoinURLs(cfg.Join), 225 DialTimeout: dialTimeout, 226 DialKeepAliveTime: keepaliveTime, 227 DialKeepAliveTimeout: keepaliveTimeout, 228 }) 229 c.Assert(err, IsNil) 230 sourceCfg := loadSourceConfigWithoutPassword(c) 231 sourceCfg.EnableRelay = false 232 233 subtaskCfg := config.SubTaskConfig{} 234 err = subtaskCfg.Decode(config.SampleSubtaskConfig, true) 235 c.Assert(err, IsNil) 236 237 // start worker 238 w, err := NewSourceWorker(sourceCfg, etcdCli, "", "") 239 c.Assert(err, IsNil) 240 defer w.Stop(true) 241 go func() { 242 w.Start() 243 }() 244 c.Assert(utils.WaitSomething(50, 100*time.Millisecond, func() bool { 245 return !w.closed.Load() 246 }), IsTrue) 247 248 // test 1: when subTaskEnabled is false, switch on relay 249 c.Assert(w.subTaskEnabled.Load(), IsFalse) 250 t.testEnableRelay(c, w, etcdCli, sourceCfg, cfg) 251 252 // test2: when subTaskEnabled is false, switch off relay 253 c.Assert(w.subTaskEnabled.Load(), IsFalse) 254 t.testDisableRelay(c, w) 255 256 // test3: when relayEnabled is false, switch on subtask 257 c.Assert(w.relayEnabled.Load(), IsFalse) 258 259 t.testEnableHandleSubtasks(c, w, etcdCli, subtaskCfg, sourceCfg) 260 261 // test4: when subTaskEnabled is true, switch on relay 262 c.Assert(w.subTaskEnabled.Load(), IsTrue) 263 264 t.testEnableRelay(c, w, etcdCli, sourceCfg, cfg) 265 c.Assert(w.subTaskHolder.findSubTask(subtaskCfg.Name).cfg.UseRelay, IsTrue) 266 t.expectedCreateUnitCount++ 267 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 268 return atomic.LoadInt32(&t.createUnitCount) == t.expectedCreateUnitCount 269 }), IsTrue) 270 271 // test5: when subTaskEnabled is true, switch off relay 272 c.Assert(w.subTaskEnabled.Load(), IsTrue) 273 t.testDisableRelay(c, w) 274 275 c.Assert(w.subTaskHolder.findSubTask(subtaskCfg.Name).cfg.UseRelay, IsFalse) 276 t.expectedCreateUnitCount++ 277 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 278 return atomic.LoadInt32(&t.createUnitCount) == t.expectedCreateUnitCount 279 }), IsTrue) 280 281 // test6: when relayEnabled is false, switch off subtask 282 c.Assert(w.relayEnabled.Load(), IsFalse) 283 284 w.DisableHandleSubtasks() 285 c.Assert(w.subTaskEnabled.Load(), IsFalse) 286 287 // prepare for test7 & 8 288 t.testEnableRelay(c, w, etcdCli, sourceCfg, cfg) 289 // test7: when relayEnabled is true, switch on subtask 290 c.Assert(w.relayEnabled.Load(), IsTrue) 291 292 subtaskCfg2 := subtaskCfg 293 subtaskCfg2.Name = "sub-task-name-2" 294 // we already added subtaskCfg, so below EnableHandleSubtasks will find an extra subtask 295 t.expectedCreateUnitCount++ 296 t.testEnableHandleSubtasks(c, w, etcdCli, subtaskCfg2, sourceCfg) 297 c.Assert(w.subTaskHolder.findSubTask(subtaskCfg.Name).cfg.UseRelay, IsTrue) 298 c.Assert(w.subTaskHolder.findSubTask(subtaskCfg2.Name).cfg.UseRelay, IsTrue) 299 300 // test8: when relayEnabled is true, switch off subtask 301 c.Assert(w.relayEnabled.Load(), IsTrue) 302 303 w.DisableHandleSubtasks() 304 c.Assert(w.subTaskEnabled.Load(), IsFalse) 305 } 306 307 func (t *testWorkerFunctionalities) testEnableRelay(c *C, w *SourceWorker, etcdCli *clientv3.Client, 308 sourceCfg *config.SourceConfig, cfg *Config, 309 ) { 310 c.Assert(w.EnableRelay(false), IsNil) 311 312 c.Assert(w.relayEnabled.Load(), IsTrue) 313 c.Assert(w.relayHolder.Stage(), Equals, pb.Stage_New) 314 315 _, err := ha.PutSourceCfg(etcdCli, sourceCfg) 316 c.Assert(err, IsNil) 317 _, err = ha.PutRelayStageRelayConfigSourceBound(etcdCli, ha.NewRelayStage(pb.Stage_Running, sourceCfg.SourceID), 318 ha.NewSourceBound(sourceCfg.SourceID, cfg.Name)) 319 c.Assert(err, IsNil) 320 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 321 return w.relayHolder.Stage() == pb.Stage_Running 322 }), IsTrue) 323 324 _, err = ha.DeleteSourceCfgRelayStageSourceBound(etcdCli, sourceCfg.SourceID, cfg.Name) 325 c.Assert(err, IsNil) 326 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 327 return w.relayHolder.Stage() == pb.Stage_Stopped 328 }), IsTrue) 329 } 330 331 func (t *testWorkerFunctionalities) testDisableRelay(c *C, w *SourceWorker) { 332 w.DisableRelay() 333 334 c.Assert(w.relayEnabled.Load(), IsFalse) 335 c.Assert(w.relayHolder, IsNil) 336 } 337 338 func (t *testWorkerFunctionalities) testEnableHandleSubtasks(c *C, w *SourceWorker, etcdCli *clientv3.Client, 339 subtaskCfg config.SubTaskConfig, sourceCfg *config.SourceConfig, 340 ) { 341 c.Assert(w.EnableHandleSubtasks(), IsNil) 342 c.Assert(w.subTaskEnabled.Load(), IsTrue) 343 344 _, err := ha.PutSubTaskCfgStage(etcdCli, []config.SubTaskConfig{subtaskCfg}, []ha.Stage{ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)}, nil) 345 c.Assert(err, IsNil) 346 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 347 return w.subTaskHolder.findSubTask(subtaskCfg.Name) != nil 348 }), IsTrue) 349 t.expectedCreateUnitCount++ 350 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 351 return atomic.LoadInt32(&t.createUnitCount) == t.expectedCreateUnitCount 352 }), IsTrue) 353 } 354 355 type testWorkerEtcdCompact struct{} 356 357 var _ = Suite(&testWorkerEtcdCompact{}) 358 359 func (t *testWorkerEtcdCompact) SetUpSuite(c *C) { 360 NewRelayHolder = NewDummyRelayHolder 361 NewSubTask = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client, worker string) *SubTask { 362 cfg.UseRelay = false 363 return NewRealSubTask(cfg, etcdClient, worker) 364 } 365 createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client, worker string, relay relay.Process) []unit.Unit { 366 mockDumper := NewMockUnit(pb.UnitType_Dump) 367 mockLoader := NewMockUnit(pb.UnitType_Load) 368 mockSync := NewMockUnit(pb.UnitType_Sync) 369 return []unit.Unit{mockDumper, mockLoader, mockSync} 370 } 371 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD", `return(true)`), IsNil) 372 } 373 374 func (t *testWorkerEtcdCompact) TearDownSuite(c *C) { 375 NewRelayHolder = NewRealRelayHolder 376 NewSubTask = NewRealSubTask 377 createUnits = createRealUnits 378 c.Assert(failpoint.Disable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD"), IsNil) 379 } 380 381 func (t *testWorkerEtcdCompact) TestWatchSubtaskStageEtcdCompact(c *C) { 382 var ( 383 masterAddr = tempurl.Alloc()[len("http://"):] 384 keepAliveTTL = int64(1) 385 startRev = int64(1) 386 ) 387 388 etcdDir := c.MkDir() 389 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 390 c.Assert(err, IsNil) 391 defer ETCD.Close() 392 cfg := NewConfig() 393 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 394 cfg.Join = masterAddr 395 cfg.KeepAliveTTL = keepAliveTTL 396 cfg.RelayKeepAliveTTL = keepAliveTTL 397 398 etcdCli, err := clientv3.New(clientv3.Config{ 399 Endpoints: GetJoinURLs(cfg.Join), 400 DialTimeout: dialTimeout, 401 DialKeepAliveTime: keepaliveTime, 402 DialKeepAliveTimeout: keepaliveTimeout, 403 }) 404 c.Assert(err, IsNil) 405 sourceCfg := loadSourceConfigWithoutPassword(c) 406 sourceCfg.From = config.GetDBConfigForTest() 407 sourceCfg.EnableRelay = false 408 409 // step 1: start worker 410 w, err := NewSourceWorker(sourceCfg, etcdCli, "", "") 411 c.Assert(err, IsNil) 412 ctx, cancel := context.WithCancel(context.Background()) 413 defer cancel() 414 defer w.Stop(true) 415 go func() { 416 w.Start() 417 }() 418 c.Assert(utils.WaitSomething(50, 100*time.Millisecond, func() bool { 419 return !w.closed.Load() 420 }), IsTrue) 421 // step 2: Put a subtask config and subtask stage to this source, then delete it 422 subtaskCfg := config.SubTaskConfig{} 423 err = subtaskCfg.Decode(config.SampleSubtaskConfig, true) 424 c.Assert(err, IsNil) 425 subtaskCfg.MydumperPath = mydumperPath 426 427 _, err = ha.PutSubTaskCfgStage(etcdCli, []config.SubTaskConfig{subtaskCfg}, []ha.Stage{ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)}, nil) 428 c.Assert(err, IsNil) 429 rev, err := ha.DeleteSubTaskCfgStage(etcdCli, []config.SubTaskConfig{subtaskCfg}, 430 []ha.Stage{ha.NewSubTaskStage(pb.Stage_Stopped, sourceCfg.SourceID, subtaskCfg.Name)}, nil) 431 c.Assert(err, IsNil) 432 // step 2.1: start a subtask manually 433 c.Assert(w.StartSubTask(&subtaskCfg, pb.Stage_Running, pb.Stage_Stopped, true), IsNil) 434 // step 3: trigger etcd compaction and check whether we can receive it through watcher 435 _, err = etcdCli.Compact(ctx, rev) 436 c.Assert(err, IsNil) 437 subTaskStageCh := make(chan ha.Stage, 10) 438 subTaskErrCh := make(chan error, 10) 439 ha.WatchSubTaskStage(ctx, etcdCli, sourceCfg.SourceID, startRev, subTaskStageCh, subTaskErrCh) 440 select { 441 case err = <-subTaskErrCh: 442 c.Assert(errors.Cause(err), Equals, etcdErrCompacted) 443 case <-time.After(300 * time.Millisecond): 444 c.Fatal("fail to get etcd error compacted") 445 } 446 // step 4: watch subtask stage from startRev 447 c.Assert(w.subTaskHolder.findSubTask(subtaskCfg.Name), NotNil) 448 var wg sync.WaitGroup 449 ctx1, cancel1 := context.WithCancel(ctx) 450 wg.Add(1) 451 go func() { 452 defer wg.Done() 453 c.Assert(w.observeSubtaskStage(ctx1, etcdCli, startRev), IsNil) 454 }() 455 time.Sleep(time.Second) 456 // step 4.1: after observe, invalid subtask should be removed 457 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 458 return w.subTaskHolder.findSubTask(subtaskCfg.Name) == nil 459 }), IsTrue) 460 // step 4.2: add a new subtask stage, worker should receive and start it 461 _, err = ha.PutSubTaskCfgStage(etcdCli, []config.SubTaskConfig{subtaskCfg}, []ha.Stage{ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)}, nil) 462 c.Assert(err, IsNil) 463 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 464 return w.subTaskHolder.findSubTask(subtaskCfg.Name) != nil 465 }), IsTrue) 466 mockDB := conn.InitMockDB(c) 467 mockShowMasterStatus(mockDB) 468 status, _, err := w.QueryStatus(ctx1, subtaskCfg.Name) 469 c.Assert(err, IsNil) 470 c.Assert(status, HasLen, 1) 471 c.Assert(status[0].Name, Equals, subtaskCfg.Name) 472 c.Assert(status[0].Stage, Equals, pb.Stage_Running) 473 cancel1() 474 wg.Wait() 475 w.subTaskHolder.closeAllSubTasks() 476 // step 5: restart observe and start from startRev, this subtask should be added 477 ctx2, cancel2 := context.WithCancel(ctx) 478 wg.Add(1) 479 go func() { 480 defer wg.Done() 481 c.Assert(w.observeSubtaskStage(ctx2, etcdCli, startRev), IsNil) 482 }() 483 time.Sleep(time.Second) 484 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 485 return w.subTaskHolder.findSubTask(subtaskCfg.Name) != nil 486 }), IsTrue) 487 mockShowMasterStatus(mockDB) 488 status, _, err = w.QueryStatus(ctx2, subtaskCfg.Name) 489 c.Assert(err, IsNil) 490 c.Assert(status, HasLen, 1) 491 c.Assert(status[0].Name, Equals, subtaskCfg.Name) 492 c.Assert(status[0].Stage, Equals, pb.Stage_Running) 493 w.Stop(true) 494 cancel2() 495 wg.Wait() 496 } 497 498 func (t *testWorkerEtcdCompact) TestWatchValidatorStageEtcdCompact(c *C) { 499 var ( 500 masterAddr = tempurl.Alloc()[len("http://"):] 501 keepAliveTTL = int64(1) 502 startRev = int64(1) 503 ) 504 505 etcdDir := c.MkDir() 506 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 507 c.Assert(err, IsNil) 508 defer ETCD.Close() 509 cfg := NewConfig() 510 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 511 cfg.Join = masterAddr 512 cfg.KeepAliveTTL = keepAliveTTL 513 cfg.RelayKeepAliveTTL = keepAliveTTL 514 515 etcdCli, err := clientv3.New(clientv3.Config{ 516 Endpoints: GetJoinURLs(cfg.Join), 517 DialTimeout: dialTimeout, 518 DialKeepAliveTime: keepaliveTime, 519 DialKeepAliveTimeout: keepaliveTimeout, 520 }) 521 c.Assert(err, IsNil) 522 sourceCfg := loadSourceConfigWithoutPassword(c) 523 sourceCfg.From = config.GetDBConfigForTest() 524 sourceCfg.EnableRelay = false 525 526 // 527 // step 1: start worker 528 w, err := NewSourceWorker(sourceCfg, etcdCli, "", "") 529 c.Assert(err, IsNil) 530 ctx, cancel := context.WithCancel(context.Background()) 531 defer cancel() 532 defer w.Stop(true) 533 go func() { 534 w.Start() 535 }() 536 c.Assert(utils.WaitSomething(50, 100*time.Millisecond, func() bool { 537 return !w.closed.Load() 538 }), IsTrue) 539 540 // 541 // step 2: Put a subtask config and subtask stage to this source, then delete it 542 subtaskCfg := config.SubTaskConfig{} 543 err = subtaskCfg.Decode(config.SampleSubtaskConfig, true) 544 c.Assert(err, IsNil) 545 subtaskCfg.MydumperPath = mydumperPath 546 subtaskCfg.ValidatorCfg = config.ValidatorConfig{Mode: config.ValidationNone} 547 548 // increase revision 549 _, err = etcdCli.Put(context.Background(), "/dummy-key", "value") 550 c.Assert(err, IsNil) 551 rev, err := ha.PutSubTaskCfgStage(etcdCli, []config.SubTaskConfig{subtaskCfg}, []ha.Stage{ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)}, nil) 552 c.Assert(err, IsNil) 553 554 // 555 // step 2.1: start a subtask manually 556 c.Assert(w.StartSubTask(&subtaskCfg, pb.Stage_Running, pb.Stage_Stopped, true), IsNil) 557 558 // 559 // step 3: trigger etcd compaction and check whether we can receive it through watcher 560 _, err = etcdCli.Compact(ctx, rev) 561 c.Assert(err, IsNil) 562 subTaskStageCh := make(chan ha.Stage, 10) 563 subTaskErrCh := make(chan error, 10) 564 ctxForWatch, cancelFunc := context.WithCancel(ctx) 565 ha.WatchValidatorStage(ctxForWatch, etcdCli, sourceCfg.SourceID, startRev, subTaskStageCh, subTaskErrCh) 566 select { 567 case err = <-subTaskErrCh: 568 c.Assert(errors.Cause(err), Equals, etcdErrCompacted) 569 case <-time.After(300 * time.Millisecond): 570 c.Fatal("fail to get etcd error compacted") 571 } 572 cancelFunc() 573 574 // 575 // step 4: watch subtask stage from startRev 576 subTask := w.subTaskHolder.findSubTask(subtaskCfg.Name) 577 getValidator := func() *syncer.DataValidator { 578 subTask.RLock() 579 defer subTask.RUnlock() 580 return subTask.validator 581 } 582 c.Assert(subTask, NotNil) 583 c.Assert(getValidator(), IsNil) 584 var wg sync.WaitGroup 585 ctx1, cancel1 := context.WithCancel(ctx) 586 wg.Add(1) 587 go func() { 588 defer wg.Done() 589 c.Assert(w.observeValidatorStage(ctx1, startRev), IsNil) 590 }() 591 time.Sleep(time.Second) 592 593 subtaskCfg.ValidatorCfg = config.ValidatorConfig{Mode: config.ValidationFast} 594 unitBakup := subTask.units[len(subTask.units)-1] 595 subTask.units[len(subTask.units)-1] = &syncer.Syncer{} // validator need a Syncer, not a mocked unit 596 validatorStage := ha.NewValidatorStage(pb.Stage_Running, subtaskCfg.SourceID, subtaskCfg.Name) 597 _, err = ha.PutSubTaskCfgStage(etcdCli, []config.SubTaskConfig{subtaskCfg}, nil, []ha.Stage{validatorStage}) 598 c.Assert(err, IsNil) 599 600 // validator created 601 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 602 return getValidator() != nil 603 }), IsTrue) 604 605 subTask.units[len(subTask.units)-1] = unitBakup // restore unit 606 cancel1() 607 wg.Wait() 608 609 // test operate validator 610 err = w.operateValidatorStage(ha.Stage{IsDeleted: true}) 611 c.Assert(err, IsNil) 612 err = w.operateValidatorStage(ha.Stage{Expect: pb.Stage_Running, Task: "not-exist"}) 613 c.Assert(err, IsNil) 614 err = w.operateValidatorStage(ha.Stage{Expect: pb.Stage_Running, Task: subtaskCfg.Name}) 615 c.Assert(err, ErrorMatches, ".*failed to get subtask config.*") 616 err = w.operateValidatorStage(ha.Stage{Expect: pb.Stage_Running, Source: subtaskCfg.SourceID, Task: subtaskCfg.Name}) 617 c.Assert(err, IsNil) 618 } 619 620 func (t *testWorkerEtcdCompact) TestWatchRelayStageEtcdCompact(c *C) { 621 var ( 622 masterAddr = tempurl.Alloc()[len("http://"):] 623 keepAliveTTL = int64(1) 624 startRev = int64(1) 625 ) 626 etcdDir := c.MkDir() 627 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 628 c.Assert(err, IsNil) 629 defer ETCD.Close() 630 cfg := NewConfig() 631 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 632 cfg.Join = masterAddr 633 cfg.KeepAliveTTL = keepAliveTTL 634 cfg.RelayKeepAliveTTL = keepAliveTTL 635 636 etcdCli, err := clientv3.New(clientv3.Config{ 637 Endpoints: GetJoinURLs(cfg.Join), 638 DialTimeout: dialTimeout, 639 DialKeepAliveTime: keepaliveTime, 640 DialKeepAliveTimeout: keepaliveTimeout, 641 }) 642 c.Assert(err, IsNil) 643 sourceCfg := loadSourceConfigWithoutPassword(c) 644 sourceCfg.EnableRelay = true 645 sourceCfg.RelayDir = c.MkDir() 646 sourceCfg.MetaDir = c.MkDir() 647 648 // step 1: start worker 649 w, err := NewSourceWorker(sourceCfg, etcdCli, "", "") 650 c.Assert(err, IsNil) 651 ctx, cancel := context.WithCancel(context.Background()) 652 defer cancel() 653 defer w.Stop(true) 654 go func() { 655 c.Assert(w.EnableRelay(false), IsNil) 656 w.Start() 657 }() 658 c.Assert(utils.WaitSomething(50, 100*time.Millisecond, func() bool { 659 return !w.closed.Load() 660 }), IsTrue) 661 // step 2: Put a relay stage to this source, then delete it 662 // put mysql config into relative etcd key adapter to trigger operation event 663 c.Assert(w.relayHolder, NotNil) 664 _, err = ha.PutSourceCfg(etcdCli, sourceCfg) 665 c.Assert(err, IsNil) 666 rev, err := ha.PutRelayStageRelayConfigSourceBound(etcdCli, ha.NewRelayStage(pb.Stage_Running, sourceCfg.SourceID), 667 ha.NewSourceBound(sourceCfg.SourceID, cfg.Name)) 668 c.Assert(err, IsNil) 669 // check relay stage, should be running 670 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 671 return w.relayHolder.Stage() == pb.Stage_Running 672 }), IsTrue) 673 // step 3: trigger etcd compaction and check whether we can receive it through watcher, then we delete relay stage 674 _, err = etcdCli.Compact(ctx, rev) 675 c.Assert(err, IsNil) 676 _, err = ha.DeleteSourceCfgRelayStageSourceBound(etcdCli, sourceCfg.SourceID, cfg.Name) 677 c.Assert(err, IsNil) 678 relayStageCh := make(chan ha.Stage, 10) 679 relayErrCh := make(chan error, 10) 680 ha.WatchRelayStage(ctx, etcdCli, cfg.Name, startRev, relayStageCh, relayErrCh) 681 select { 682 case err := <-relayErrCh: 683 c.Assert(errors.Cause(err), Equals, etcdErrCompacted) 684 case <-time.After(300 * time.Millisecond): 685 c.Fatal("fail to get etcd error compacted") 686 } 687 // step 4: should stop the running relay because see deletion after compaction 688 time.Sleep(time.Second) 689 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 690 return w.relayHolder.Stage() == pb.Stage_Stopped 691 }), IsTrue) 692 } 693 694 func (t *testServer) testSourceWorker(c *C) { 695 cfg := loadSourceConfigWithoutPassword(c) 696 697 dir := c.MkDir() 698 cfg.EnableRelay = true 699 cfg.RelayDir = dir 700 cfg.MetaDir = dir 701 702 var ( 703 masterAddr = tempurl.Alloc()[len("http://"):] 704 keepAliveTTL = int64(1) 705 ) 706 etcdDir := c.MkDir() 707 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 708 c.Assert(err, IsNil) 709 defer ETCD.Close() 710 workerCfg := NewConfig() 711 c.Assert(workerCfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 712 workerCfg.Join = masterAddr 713 workerCfg.KeepAliveTTL = keepAliveTTL 714 workerCfg.RelayKeepAliveTTL = keepAliveTTL 715 716 etcdCli, err := clientv3.New(clientv3.Config{ 717 Endpoints: GetJoinURLs(workerCfg.Join), 718 DialTimeout: dialTimeout, 719 DialKeepAliveTime: keepaliveTime, 720 DialKeepAliveTimeout: keepaliveTimeout, 721 }) 722 c.Assert(err, IsNil) 723 724 NewRelayHolder = NewDummyRelayHolderWithInitError 725 defer func() { 726 NewRelayHolder = NewRealRelayHolder 727 }() 728 w, err := NewSourceWorker(cfg, etcdCli, "", "") 729 c.Assert(err, IsNil) 730 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD", `return(true)`), IsNil) 731 c.Assert(w.EnableRelay(false), ErrorMatches, "init error") 732 c.Assert(failpoint.Disable("github.com/pingcap/tiflow/dm/worker/MockGetSourceCfgFromETCD"), IsNil) 733 734 NewRelayHolder = NewDummyRelayHolder 735 w, err = NewSourceWorker(cfg, etcdCli, "", "") 736 c.Assert(err, IsNil) 737 c.Assert(w.GetUnitAndSourceStatusJSON("", nil), HasLen, emptyWorkerStatusInfoJSONLength) 738 739 // stop twice 740 w.Stop(true) 741 c.Assert(w.closed.Load(), IsTrue) 742 c.Assert(w.subTaskHolder.getAllSubTasks(), HasLen, 0) 743 w.Stop(true) 744 c.Assert(w.closed.Load(), IsTrue) 745 c.Assert(w.subTaskHolder.getAllSubTasks(), HasLen, 0) 746 c.Assert(w.closed.Load(), IsTrue) 747 748 c.Assert(w.StartSubTask(&config.SubTaskConfig{ 749 Name: "testStartTask", 750 }, pb.Stage_Running, pb.Stage_Stopped, true), IsNil) 751 task := w.subTaskHolder.findSubTask("testStartTask") 752 c.Assert(task, NotNil) 753 c.Assert(task.Result().String(), Matches, ".*worker already closed.*") 754 755 c.Assert(w.StartSubTask(&config.SubTaskConfig{ 756 Name: "testStartTask-in-stopped", 757 }, pb.Stage_Stopped, pb.Stage_Stopped, true), IsNil) 758 task = w.subTaskHolder.findSubTask("testStartTask-in-stopped") 759 c.Assert(task, NotNil) 760 c.Assert(task.Result().String(), Matches, ".*worker already closed.*") 761 762 err = w.UpdateSubTask(context.Background(), &config.SubTaskConfig{ 763 Name: "testStartTask", 764 }, true) 765 c.Assert(err, ErrorMatches, ".*worker already closed.*") 766 767 err = w.OperateSubTask("testSubTask", pb.TaskOp_Delete) 768 c.Assert(err, ErrorMatches, ".*worker already closed.*") 769 } 770 771 func (t *testServer) TestQueryValidator(c *C) { 772 cfg := loadSourceConfigWithoutPassword(c) 773 774 dir := c.MkDir() 775 cfg.EnableRelay = true 776 cfg.RelayDir = dir 777 cfg.MetaDir = dir 778 779 w, err := NewSourceWorker(cfg, nil, "", "") 780 w.closed.Store(false) 781 c.Assert(err, IsNil) 782 st := NewSubTaskWithStage(&config.SubTaskConfig{ 783 Name: "testQueryValidator", 784 ValidatorCfg: config.ValidatorConfig{ 785 Mode: config.ValidationFull, 786 }, 787 }, pb.Stage_Running, nil, "") 788 w.subTaskHolder.recordSubTask(st) 789 var ret *pb.ValidationStatus 790 ret, err = w.GetValidatorStatus("invalidTaskName") 791 c.Assert(ret, IsNil) 792 c.Assert(terror.ErrWorkerSubTaskNotFound.Equal(err), IsTrue) 793 } 794 795 func (t *testServer) setupValidator(c *C) *SourceWorker { 796 cfg := loadSourceConfigWithoutPassword(c) 797 798 dir := c.MkDir() 799 cfg.EnableRelay = true 800 cfg.RelayDir = dir 801 cfg.MetaDir = dir 802 st := NewSubTaskWithStage(&config.SubTaskConfig{ 803 Name: "testQueryValidator", 804 ValidatorCfg: config.ValidatorConfig{ 805 Mode: config.ValidationFull, 806 }, 807 }, pb.Stage_Running, nil, "") 808 w, err := NewSourceWorker(cfg, nil, "", "") 809 st.StartValidator(pb.Stage_Running, false) 810 w.subTaskHolder.recordSubTask(st) 811 w.closed.Store(false) 812 c.Assert(err, IsNil) 813 return w 814 } 815 816 func (t *testServer) TestGetWorkerValidatorErr(c *C) { 817 w := t.setupValidator(c) 818 // when subtask name not exists 819 // return empty array 820 errs, err := w.GetWorkerValidatorErr("invalidTask", pb.ValidateErrorState_InvalidErr) 821 c.Assert(terror.ErrWorkerSubTaskNotFound.Equal(err), IsTrue) 822 c.Assert(errs, IsNil) 823 } 824 825 func (t *testServer) TestOperateWorkerValidatorErr(c *C) { 826 w := t.setupValidator(c) 827 // when subtask name not exists 828 // return empty array 829 taskNotFound := terror.ErrWorkerSubTaskNotFound.Generate("invalidTask") 830 c.Assert(w.OperateWorkerValidatorErr("invalidTask", pb.ValidationErrOp_ClearErrOp, 0, true).Error(), Equals, taskNotFound.Error()) 831 } 832 833 func TestMasterBinlogOff(t *testing.T) { 834 ctx := context.Background() 835 cfg, err := config.SourceCfgFromYamlAndVerify(config.SampleSourceConfig) 836 require.NoError(t, err) 837 cfg.From.Password = "no need to connect" 838 839 w, err := NewSourceWorker(cfg, nil, "", "") 840 require.NoError(t, err) 841 w.closed.Store(false) 842 843 // start task 844 var subtaskCfg config.SubTaskConfig 845 require.NoError(t, subtaskCfg.Decode(config.SampleSubtaskConfig, true)) 846 require.NoError(t, w.StartSubTask(&subtaskCfg, pb.Stage_Running, pb.Stage_Stopped, true)) 847 848 _, mockDB, err := conn.InitMockDBFull() 849 require.NoError(t, err) 850 mockShowMasterStatusNoRows(mockDB) 851 status, _, err := w.QueryStatus(ctx, subtaskCfg.Name) 852 require.NoError(t, err) 853 require.Len(t, status, 1) 854 require.Equal(t, subtaskCfg.Name, status[0].Name) 855 }