github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/worker/server_test.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package worker 15 16 import ( 17 "context" 18 "io" 19 "net/http" 20 "net/url" 21 "sync" 22 "testing" 23 "time" 24 25 "github.com/go-mysql-org/go-mysql/mysql" 26 . "github.com/pingcap/check" 27 "github.com/pingcap/errors" 28 "github.com/pingcap/failpoint" 29 "github.com/pingcap/tiflow/dm/config" 30 "github.com/pingcap/tiflow/dm/pb" 31 "github.com/pingcap/tiflow/dm/pkg/binlog" 32 "github.com/pingcap/tiflow/dm/pkg/gtid" 33 "github.com/pingcap/tiflow/dm/pkg/ha" 34 "github.com/pingcap/tiflow/dm/pkg/log" 35 "github.com/pingcap/tiflow/dm/pkg/terror" 36 "github.com/pingcap/tiflow/dm/pkg/utils" 37 "github.com/pingcap/tiflow/dm/relay" 38 "github.com/pingcap/tiflow/dm/unit" 39 "github.com/stretchr/testify/require" 40 "github.com/tikv/pd/pkg/utils/tempurl" 41 v3rpc "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" 42 clientv3 "go.etcd.io/etcd/client/v3" 43 "go.etcd.io/etcd/server/v3/embed" 44 "google.golang.org/grpc" 45 ) 46 47 // do not forget to update this path if the file removed/renamed. 48 const ( 49 mydumperPath = "../../bin/mydumper" 50 ) 51 52 var etcdErrCompacted = v3rpc.ErrCompacted 53 54 func TestServer(t *testing.T) { 55 TestingT(t) 56 } 57 58 type testServer struct{} 59 60 var _ = Suite(&testServer{}) 61 62 func (t *testServer) SetUpSuite(c *C) { 63 err := log.InitLogger(&log.Config{}) 64 c.Assert(err, IsNil) 65 getMinLocForSubTaskFunc = getFakeLocForSubTask 66 } 67 68 func (t *testServer) TearDownSuite(c *C) { 69 getMinLocForSubTaskFunc = getMinLocForSubTask 70 } 71 72 func createMockETCD(dir string, host string) (*embed.Etcd, error) { 73 cfg := embed.NewConfig() 74 cfg.Dir = dir 75 lcurl, _ := url.Parse(host) 76 cfg.ListenClientUrls = []url.URL{*lcurl} 77 cfg.AdvertiseClientUrls = []url.URL{*lcurl} 78 lpurl, _ := url.Parse(tempurl.Alloc()) 79 cfg.ListenPeerUrls = []url.URL{*lpurl} 80 cfg.AdvertisePeerUrls = []url.URL{*lpurl} 81 cfg.InitialCluster = "default=" + lpurl.String() 82 cfg.Logger = "zap" 83 metricsURL, _ := url.Parse(tempurl.Alloc()) 84 cfg.ListenMetricsUrls = []url.URL{*metricsURL} 85 ETCD, err := embed.StartEtcd(cfg) 86 if err != nil { 87 return nil, err 88 } 89 90 select { 91 case <-ETCD.Server.ReadyNotify(): 92 case <-time.After(5 * time.Second): 93 ETCD.Server.Stop() // trigger a shutdown 94 } 95 // embd.client = v3client.New(embd.ETCD.Server) 96 return ETCD, nil 97 } 98 99 func (t *testServer) TestServer(c *C) { 100 var ( 101 masterAddr = tempurl.Alloc()[len("http://"):] 102 workerAddr1 = "127.0.0.1:8262" 103 keepAliveTTL = int64(1) 104 ) 105 etcdDir := c.MkDir() 106 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 107 c.Assert(err, IsNil) 108 cfg := NewConfig() 109 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 110 cfg.Join = masterAddr 111 cfg.KeepAliveTTL = keepAliveTTL 112 cfg.RelayKeepAliveTTL = keepAliveTTL 113 114 NewRelayHolder = NewDummyRelayHolder 115 NewSubTask = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client, worker string) *SubTask { 116 cfg.UseRelay = false 117 return NewRealSubTask(cfg, etcdClient, worker) 118 } 119 createUnits = func(cfg *config.SubTaskConfig, etcdClient *clientv3.Client, worker string, relay relay.Process) []unit.Unit { 120 mockDumper := NewMockUnit(pb.UnitType_Dump) 121 mockLoader := NewMockUnit(pb.UnitType_Load) 122 mockSync := NewMockUnit(pb.UnitType_Sync) 123 return []unit.Unit{mockDumper, mockLoader, mockSync} 124 } 125 defer func() { 126 NewRelayHolder = NewRealRelayHolder 127 NewSubTask = NewRealSubTask 128 createUnits = createRealUnits 129 }() 130 131 s := NewServer(cfg) 132 defer s.Close() 133 go func() { 134 err1 := s.Start() 135 c.Assert(err1, IsNil) 136 }() 137 138 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 139 return !s.closed.Load() 140 }), IsTrue) 141 dir := c.MkDir() 142 143 t.testOperateSourceBoundWithoutConfigInEtcd(c, s) 144 145 t.testOperateWorker(c, s, dir, true) 146 147 // check worker would retry connecting master rather than stop worker directly. 148 ETCD = t.testRetryConnectMaster(c, s, ETCD, etcdDir, masterAddr) 149 150 // resume contact with ETCD and start worker again 151 t.testOperateWorker(c, s, dir, true) 152 153 // test condition hub 154 t.testConidtionHub(c, s) 155 156 t.testHTTPInterface(c, "status") 157 t.testHTTPInterface(c, "metrics") 158 159 // create client 160 cli := t.createClient(c, workerAddr1) 161 162 // start task 163 subtaskCfg := config.SubTaskConfig{} 164 err = subtaskCfg.Decode(config.SampleSubtaskConfig, true) 165 c.Assert(err, IsNil) 166 subtaskCfg.MydumperPath = mydumperPath 167 168 sourceCfg := loadSourceConfigWithoutPassword(c) 169 _, err = ha.PutSubTaskCfgStage(s.etcdClient, []config.SubTaskConfig{subtaskCfg}, []ha.Stage{ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)}, nil) 170 c.Assert(err, IsNil) 171 172 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 173 return checkSubTaskStatus(cli, pb.Stage_Running) 174 }), IsTrue) 175 176 t.testSubTaskRecover(c, s, dir) 177 178 // pause relay 179 _, err = ha.PutRelayStage(s.etcdClient, ha.NewRelayStage(pb.Stage_Paused, sourceCfg.SourceID)) 180 c.Assert(err, IsNil) 181 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 182 return checkRelayStatus(cli, pb.Stage_Paused) 183 }), IsTrue) 184 // resume relay 185 _, err = ha.PutRelayStage(s.etcdClient, ha.NewRelayStage(pb.Stage_Running, sourceCfg.SourceID)) 186 c.Assert(err, IsNil) 187 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 188 return checkRelayStatus(cli, pb.Stage_Running) 189 }), IsTrue) 190 // pause task 191 _, err = ha.PutSubTaskStage(s.etcdClient, ha.NewSubTaskStage(pb.Stage_Paused, sourceCfg.SourceID, subtaskCfg.Name)) 192 c.Assert(err, IsNil) 193 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 194 return checkSubTaskStatus(cli, pb.Stage_Paused) 195 }), IsTrue) 196 197 // test refresh source cfg 198 sourceCfg.MetaDir = "new meta" 199 _, err = ha.PutSourceCfg(s.etcdClient, sourceCfg) 200 c.Assert(err, IsNil) 201 c.Assert(s.worker.refreshSourceCfg(), IsNil) 202 c.Assert(s.worker.cfg.MetaDir, Equals, sourceCfg.MetaDir) 203 204 // check update subtask cfg failed 205 tomlStr, tomlErr := subtaskCfg.Toml() 206 c.Assert(tomlErr, IsNil) 207 ctx := context.Background() 208 checkReq := &pb.CheckSubtasksCanUpdateRequest{SubtaskCfgTomlString: tomlStr} 209 checkResp, checkErr := s.CheckSubtasksCanUpdate(ctx, checkReq) 210 c.Assert(checkErr, IsNil) 211 c.Assert(checkResp.Success, IsFalse) 212 213 // test refresh subtask cfg 214 subtaskCfg.SyncerConfig.Batch = 111 215 _, err = ha.PutSubTaskCfgStage(s.etcdClient, []config.SubTaskConfig{subtaskCfg}, []ha.Stage{}, []ha.Stage{}) 216 c.Assert(err, IsNil) 217 subTask := s.worker.subTaskHolder.findSubTask(subtaskCfg.Name) 218 subTask.setCurrUnit(subTask.units[2]) // set to syncer unit 219 c.Assert(s.worker.tryRefreshSubTaskAndSourceConfig(subTask), IsNil) 220 subtaskCfgInWorker := s.worker.subTaskHolder.findSubTask(subtaskCfg.Name) 221 c.Assert(subtaskCfgInWorker.cfg.SyncerConfig.Batch, Equals, subtaskCfg.SyncerConfig.Batch) 222 223 // resume task 224 _, err = ha.PutSubTaskStage(s.etcdClient, ha.NewSubTaskStage(pb.Stage_Running, sourceCfg.SourceID, subtaskCfg.Name)) 225 c.Assert(err, IsNil) 226 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 227 return checkSubTaskStatus(cli, pb.Stage_Running) 228 }), IsTrue) 229 230 // stop task 231 _, err = ha.DeleteSubTaskStage(s.etcdClient, ha.NewSubTaskStage(pb.Stage_Stopped, sourceCfg.SourceID, subtaskCfg.Name)) 232 c.Assert(err, IsNil) 233 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 234 return s.getSourceWorker(true).subTaskHolder.findSubTask(subtaskCfg.Name) == nil 235 }), IsTrue) 236 237 dupServer := NewServer(cfg) 238 err = dupServer.Start() 239 c.Assert(terror.ErrWorkerStartService.Equal(err), IsTrue) 240 c.Assert(err.Error(), Matches, ".*bind: address already in use.*") 241 242 t.testStopWorkerWhenLostConnect(c, s, ETCD) 243 s.Close() 244 245 c.Assert(utils.WaitSomething(30, 10*time.Millisecond, func() bool { 246 return s.closed.Load() 247 }), IsTrue) 248 249 // test source worker, just make sure testing sort 250 t.testSourceWorker(c) 251 } 252 253 func (t *testServer) TestHandleSourceBoundAfterError(c *C) { 254 var ( 255 masterAddr = tempurl.Alloc()[len("http://"):] 256 keepAliveTTL = int64(1) 257 ) 258 // start etcd server 259 etcdDir := c.MkDir() 260 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 261 c.Assert(err, IsNil) 262 defer ETCD.Close() 263 cfg := NewConfig() 264 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 265 cfg.Join = masterAddr 266 cfg.KeepAliveTTL = keepAliveTTL 267 268 // new etcd client 269 etcdCli, err := clientv3.New(clientv3.Config{ 270 Endpoints: GetJoinURLs(cfg.Join), 271 DialTimeout: dialTimeout, 272 DialKeepAliveTime: keepaliveTime, 273 DialKeepAliveTimeout: keepaliveTimeout, 274 }) 275 c.Assert(err, IsNil) 276 277 // watch worker event(oneline or offline) 278 var ( 279 wg sync.WaitGroup 280 startRev int64 = 1 281 ) 282 workerEvCh := make(chan ha.WorkerEvent, 10) 283 workerErrCh := make(chan error, 10) 284 ctx, cancel := context.WithCancel(context.Background()) 285 wg.Add(1) 286 go func() { 287 defer func() { 288 close(workerEvCh) 289 close(workerErrCh) 290 wg.Done() 291 }() 292 ha.WatchWorkerEvent(ctx, etcdCli, startRev, workerEvCh, workerErrCh) 293 }() 294 295 // start worker server 296 s := NewServer(cfg) 297 defer s.Close() 298 go func() { 299 err1 := s.Start() 300 c.Assert(err1, IsNil) 301 }() 302 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 303 return !s.closed.Load() 304 }), IsTrue) 305 306 // check if the worker is online 307 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 308 select { 309 case ev := <-workerEvCh: 310 if !ev.IsDeleted { 311 return true 312 } 313 default: 314 } 315 return false 316 }), IsTrue) 317 318 // enable failpoint 319 c.Assert(failpoint.Enable("github.com/pingcap/tiflow/dm/pkg/ha/FailToGetSourceCfg", `return(true)`), IsNil) 320 sourceCfg := loadSourceConfigWithoutPassword(c) 321 sourceCfg.EnableRelay = false 322 _, err = ha.PutSourceCfg(etcdCli, sourceCfg) 323 c.Assert(err, IsNil) 324 sourceBound := ha.NewSourceBound(sourceCfg.SourceID, s.cfg.Name) 325 _, err = ha.PutSourceBound(etcdCli, sourceBound) 326 c.Assert(err, IsNil) 327 328 // do check until worker offline 329 c.Assert(utils.WaitSomething(50, 100*time.Millisecond, func() bool { 330 select { 331 case ev := <-workerEvCh: 332 if ev.IsDeleted { 333 return true 334 } 335 default: 336 } 337 return false 338 }), IsTrue) 339 340 // check if the worker is online 341 c.Assert(utils.WaitSomething(5, time.Duration(s.cfg.KeepAliveTTL)*time.Second, func() bool { 342 select { 343 case ev := <-workerEvCh: 344 if !ev.IsDeleted { 345 return true 346 } 347 default: 348 } 349 return false 350 }), IsTrue) 351 352 // stop watching and disable failpoint 353 cancel() 354 wg.Wait() 355 c.Assert(failpoint.Disable("github.com/pingcap/tiflow/dm/pkg/ha/FailToGetSourceCfg"), IsNil) 356 357 _, err = ha.PutSourceBound(etcdCli, sourceBound) 358 c.Assert(err, IsNil) 359 _, err = ha.PutSourceCfg(etcdCli, sourceCfg) 360 c.Assert(err, IsNil) 361 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 362 return s.getSourceWorker(true) != nil 363 }), IsTrue) 364 365 _, err = ha.DeleteSourceBound(etcdCli, s.cfg.Name) 366 c.Assert(err, IsNil) 367 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 368 return s.getSourceWorker(true) == nil 369 }), IsTrue) 370 } 371 372 func (t *testServer) TestServerQueryValidator(c *C) { 373 var ( 374 masterAddr = tempurl.Alloc()[len("http://"):] 375 keepAliveTTL = int64(1) 376 ) 377 etcdDir := c.MkDir() 378 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 379 c.Assert(err, IsNil) 380 defer ETCD.Close() 381 cfg := NewConfig() 382 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 383 cfg.Join = masterAddr 384 cfg.KeepAliveTTL = keepAliveTTL 385 cfg.RelayKeepAliveTTL = keepAliveTTL 386 387 s := NewServer(cfg) 388 resp, err := s.GetWorkerValidatorStatus(context.Background(), &pb.GetValidationStatusRequest{}) 389 c.Assert(err, IsNil) 390 c.Assert(resp.Result, IsFalse) 391 c.Assert(resp.Msg, Matches, ".*no mysql source is being handled in the worker.*") 392 } 393 394 func (t *testServer) TestServerQueryValidatorError(c *C) { 395 var ( 396 masterAddr = tempurl.Alloc()[len("http://"):] 397 keepAliveTTL = int64(1) 398 ) 399 etcdDir := c.MkDir() 400 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 401 c.Assert(err, IsNil) 402 defer ETCD.Close() 403 cfg := NewConfig() 404 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 405 cfg.Join = masterAddr 406 cfg.KeepAliveTTL = keepAliveTTL 407 cfg.RelayKeepAliveTTL = keepAliveTTL 408 409 s := NewServer(cfg) 410 resp, err := s.GetValidatorError(context.Background(), &pb.GetValidationErrorRequest{}) 411 c.Assert(err, IsNil) 412 c.Assert(resp.Result, IsFalse) 413 c.Assert(resp.Msg, Matches, ".*no mysql source is being handled in the worker.*") 414 } 415 416 func (t *testServer) TestServerOperateValidatorError(c *C) { 417 var ( 418 masterAddr = tempurl.Alloc()[len("http://"):] 419 keepAliveTTL = int64(1) 420 ) 421 etcdDir := c.MkDir() 422 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 423 c.Assert(err, IsNil) 424 defer ETCD.Close() 425 cfg := NewConfig() 426 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 427 cfg.Join = masterAddr 428 cfg.KeepAliveTTL = keepAliveTTL 429 cfg.RelayKeepAliveTTL = keepAliveTTL 430 431 s := NewServer(cfg) 432 resp, err := s.OperateValidatorError(context.Background(), &pb.OperateValidationErrorRequest{}) 433 c.Assert(err, IsNil) 434 c.Assert(resp.Result, IsFalse) 435 c.Assert(resp.Msg, Matches, ".*no mysql source is being handled in the worker.*") 436 } 437 438 func (t *testServer) TestWatchSourceBoundEtcdCompact(c *C) { 439 var ( 440 masterAddr = tempurl.Alloc()[len("http://"):] 441 keepAliveTTL = int64(1) 442 startRev = int64(1) 443 ) 444 etcdDir := c.MkDir() 445 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 446 c.Assert(err, IsNil) 447 defer ETCD.Close() 448 cfg := NewConfig() 449 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 450 cfg.Join = masterAddr 451 cfg.KeepAliveTTL = keepAliveTTL 452 cfg.RelayKeepAliveTTL = keepAliveTTL 453 454 s := NewServer(cfg) 455 etcdCli, err := clientv3.New(clientv3.Config{ 456 Endpoints: GetJoinURLs(s.cfg.Join), 457 DialTimeout: dialTimeout, 458 DialKeepAliveTime: keepaliveTime, 459 DialKeepAliveTimeout: keepaliveTimeout, 460 }) 461 s.etcdClient = etcdCli 462 s.closed.Store(false) 463 c.Assert(err, IsNil) 464 sourceCfg := loadSourceConfigWithoutPassword(c) 465 sourceCfg.EnableRelay = false 466 467 ctx, cancel := context.WithCancel(context.Background()) 468 defer cancel() 469 470 // step 1: Put a source config and source bound to this worker, then delete it 471 _, err = ha.PutSourceCfg(etcdCli, sourceCfg) 472 c.Assert(err, IsNil) 473 sourceBound := ha.NewSourceBound(sourceCfg.SourceID, cfg.Name) 474 _, err = ha.PutSourceBound(etcdCli, sourceBound) 475 c.Assert(err, IsNil) 476 rev, err := ha.DeleteSourceBound(etcdCli, cfg.Name) 477 c.Assert(err, IsNil) 478 // step 2: start source at this worker 479 w, err := s.getOrStartWorker(sourceCfg, true) 480 c.Assert(err, IsNil) 481 c.Assert(w.EnableHandleSubtasks(), IsNil) 482 // step 3: trigger etcd compaction and check whether we can receive it through watcher 483 _, err = etcdCli.Compact(ctx, rev) 484 c.Assert(err, IsNil) 485 sourceBoundCh := make(chan ha.SourceBound, 10) 486 sourceBoundErrCh := make(chan error, 10) 487 ha.WatchSourceBound(ctx, etcdCli, cfg.Name, startRev, sourceBoundCh, sourceBoundErrCh) 488 select { 489 case err = <-sourceBoundErrCh: 490 c.Assert(errors.Cause(err), Equals, etcdErrCompacted) 491 case <-time.After(300 * time.Millisecond): 492 c.Fatal("fail to get etcd error compacted") 493 } 494 // step 4: watch source bound from startRev 495 var wg sync.WaitGroup 496 ctx1, cancel1 := context.WithCancel(ctx) 497 wg.Add(1) 498 go func() { 499 defer wg.Done() 500 c.Assert(s.observeSourceBound(ctx1, startRev), IsNil) 501 }() 502 // step 4.1: should stop the running worker, source bound has been deleted, should stop this worker 503 c.Assert(utils.WaitSomething(20, 100*time.Millisecond, func() bool { 504 return s.getSourceWorker(true) == nil 505 }), IsTrue) 506 // step 4.2: put a new source bound, source should be started 507 _, err = ha.PutSourceBound(etcdCli, sourceBound) 508 c.Assert(err, IsNil) 509 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 510 return s.getSourceWorker(true) != nil 511 }), IsTrue) 512 cfg2 := s.getSourceWorker(true).cfg 513 c.Assert(cfg2, DeepEquals, sourceCfg) 514 cancel1() 515 wg.Wait() 516 c.Assert(s.stopSourceWorker(sourceCfg.SourceID, true, true), IsNil) 517 // step 5: start observeSourceBound from compacted revision again, should start worker 518 ctx2, cancel2 := context.WithCancel(ctx) 519 wg.Add(1) 520 go func() { 521 defer wg.Done() 522 c.Assert(s.observeSourceBound(ctx2, startRev), IsNil) 523 }() 524 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 525 return s.getSourceWorker(true) != nil 526 }), IsTrue) 527 cfg2 = s.getSourceWorker(true).cfg 528 c.Assert(cfg2, DeepEquals, sourceCfg) 529 cancel2() 530 wg.Wait() 531 } 532 533 func (t *testServer) testHTTPInterface(c *C, uri string) { 534 // nolint:noctx 535 resp, err := http.Get("http://127.0.0.1:8262/" + uri) 536 c.Assert(err, IsNil) 537 defer resp.Body.Close() 538 c.Assert(resp.StatusCode, Equals, 200) 539 _, err = io.ReadAll(resp.Body) 540 c.Assert(err, IsNil) 541 } 542 543 func (t *testServer) createClient(c *C, addr string) pb.WorkerClient { 544 //nolint:staticcheck 545 conn, err := grpc.Dial(addr, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(3*time.Second)) 546 c.Assert(err, IsNil) 547 return pb.NewWorkerClient(conn) 548 } 549 550 func (t *testServer) testOperateSourceBoundWithoutConfigInEtcd(c *C, s *Server) { 551 err := s.operateSourceBound(ha.NewSourceBound("sourceWithoutConfigInEtcd", s.cfg.Name)) 552 c.Assert(terror.ErrWorkerFailToGetSourceConfigFromEtcd.Equal(err), IsTrue) 553 } 554 555 func (t *testServer) testOperateWorker(c *C, s *Server, dir string, start bool) { 556 // load sourceCfg 557 sourceCfg := loadSourceConfigWithoutPassword(c) 558 sourceCfg.EnableRelay = true 559 sourceCfg.RelayDir = dir 560 sourceCfg.MetaDir = c.MkDir() 561 562 if start { 563 // put mysql config into relative etcd key adapter to trigger operation event 564 _, err := ha.PutSourceCfg(s.etcdClient, sourceCfg) 565 c.Assert(err, IsNil) 566 _, err = ha.PutRelayStageRelayConfigSourceBound(s.etcdClient, ha.NewRelayStage(pb.Stage_Running, sourceCfg.SourceID), 567 ha.NewSourceBound(sourceCfg.SourceID, s.cfg.Name)) 568 c.Assert(err, IsNil) 569 // worker should be started and without error 570 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 571 w := s.getSourceWorker(true) 572 return w != nil && !w.closed.Load() 573 }), IsTrue) 574 c.Assert(s.getSourceStatus(true).Result, IsNil) 575 } else { 576 // worker should be started before stopped 577 w := s.getSourceWorker(true) 578 c.Assert(w, NotNil) 579 c.Assert(w.closed.Load(), IsFalse) 580 _, err := ha.DeleteRelayConfig(s.etcdClient, w.name) 581 c.Assert(err, IsNil) 582 _, err = ha.DeleteSourceCfgRelayStageSourceBound(s.etcdClient, sourceCfg.SourceID, s.cfg.Name) 583 c.Assert(err, IsNil) 584 // worker should be closed and without error 585 c.Assert(utils.WaitSomething(30, 100*time.Millisecond, func() bool { 586 currentWorker := s.getSourceWorker(true) 587 return currentWorker == nil && w.closed.Load() 588 }), IsTrue) 589 c.Assert(s.getSourceStatus(true).Result, IsNil) 590 } 591 } 592 593 func (t *testServer) testRetryConnectMaster(c *C, s *Server, etcd *embed.Etcd, dir string, hostName string) *embed.Etcd { 594 etcd.Close() 595 time.Sleep(6 * time.Second) 596 // When worker server fail to keepalive with etcd, server should close its worker 597 c.Assert(s.getSourceWorker(true), IsNil) 598 c.Assert(s.getSourceStatus(true).Result, IsNil) 599 ETCD, err := createMockETCD(dir, "http://"+hostName) 600 c.Assert(err, IsNil) 601 time.Sleep(3 * time.Second) 602 return ETCD 603 } 604 605 func (t *testServer) testSubTaskRecover(c *C, s *Server, dir string) { 606 workerCli := t.createClient(c, "127.0.0.1:8262") 607 t.testOperateWorker(c, s, dir, false) 608 609 status, err := workerCli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) 610 c.Assert(err, IsNil) 611 c.Assert(status.Result, IsFalse) 612 c.Assert(status.Msg, Equals, terror.ErrWorkerNoStart.Error()) 613 614 t.testOperateWorker(c, s, dir, true) 615 616 // because we split starting worker and enabling handling subtasks into two parts, a query-status may occur between 617 // them, thus get a result of no subtask running 618 utils.WaitSomething(30, 100*time.Millisecond, func() bool { 619 status, err = workerCli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) 620 if err != nil { 621 return false 622 } 623 if status.Result == false { 624 return false 625 } 626 if len(status.SubTaskStatus) == 0 || status.SubTaskStatus[0].Stage != pb.Stage_Running { 627 return false 628 } 629 return true 630 }) 631 632 status, err = workerCli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) 633 c.Assert(err, IsNil) 634 c.Assert(status.Result, IsTrue) 635 c.Assert(status.SubTaskStatus, HasLen, 1) 636 c.Assert(status.SubTaskStatus[0].Stage, Equals, pb.Stage_Running) 637 } 638 639 func (t *testServer) testStopWorkerWhenLostConnect(c *C, s *Server, etcd *embed.Etcd) { 640 etcd.Close() 641 c.Assert(utils.WaitSomething(int(defaultKeepAliveTTL+3), time.Second, func() bool { 642 return s.getSourceWorker(true) == nil 643 }), IsTrue) 644 c.Assert(s.getSourceWorker(true), IsNil) 645 } 646 647 func (t *testServer) TestGetMinLocInAllSubTasks(c *C) { 648 subTaskCfg := map[string]config.SubTaskConfig{ 649 "test2": {Name: "test2"}, 650 "test3": {Name: "test3"}, 651 "test1": {Name: "test1"}, 652 } 653 minLoc, err := getMinLocInAllSubTasks(context.Background(), subTaskCfg) 654 c.Assert(err, IsNil) 655 c.Assert(minLoc.Position.Name, Equals, "mysql-binlog.00001") 656 c.Assert(minLoc.Position.Pos, Equals, uint32(12)) 657 658 for k, cfg := range subTaskCfg { 659 cfg.EnableGTID = true 660 subTaskCfg[k] = cfg 661 } 662 663 minLoc, err = getMinLocInAllSubTasks(context.Background(), subTaskCfg) 664 c.Assert(err, IsNil) 665 c.Assert(minLoc.Position.Name, Equals, "mysql-binlog.00001") 666 c.Assert(minLoc.Position.Pos, Equals, uint32(123)) 667 } 668 669 func getFakeLocForSubTask(ctx context.Context, subTaskCfg config.SubTaskConfig) (minLoc *binlog.Location, err error) { 670 gset1, _ := gtid.ParserGTID(mysql.MySQLFlavor, "ba8f633f-1f15-11eb-b1c7-0242ac110001:1-30") 671 gset2, _ := gtid.ParserGTID(mysql.MySQLFlavor, "ba8f633f-1f15-11eb-b1c7-0242ac110001:1-50") 672 gset3, _ := gtid.ParserGTID(mysql.MySQLFlavor, "ba8f633f-1f15-11eb-b1c7-0242ac110001:1-50,ba8f633f-1f15-11eb-b1c7-0242ac110002:1") 673 loc1 := binlog.NewLocation( 674 mysql.Position{ 675 Name: "mysql-binlog.00001", 676 Pos: 123, 677 }, 678 gset1, 679 ) 680 loc2 := binlog.NewLocation( 681 mysql.Position{ 682 Name: "mysql-binlog.00001", 683 Pos: 12, 684 }, 685 gset2, 686 ) 687 loc3 := binlog.NewLocation( 688 mysql.Position{ 689 Name: "mysql-binlog.00003", 690 }, 691 gset3, 692 ) 693 694 switch subTaskCfg.Name { 695 case "test1": 696 return &loc1, nil 697 case "test2": 698 return &loc2, nil 699 case "test3": 700 return &loc3, nil 701 default: 702 return nil, nil 703 } 704 } 705 706 func checkSubTaskStatus(cli pb.WorkerClient, expect pb.Stage) bool { 707 status, err := cli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) 708 if err != nil { 709 return false 710 } 711 if status.Result == false { 712 return false 713 } 714 return len(status.SubTaskStatus) > 0 && status.SubTaskStatus[0].Stage == expect 715 } 716 717 func checkRelayStatus(cli pb.WorkerClient, expect pb.Stage) bool { 718 status, err := cli.QueryStatus(context.Background(), &pb.QueryStatusRequest{Name: "sub-task-name"}) 719 if err != nil { 720 return false 721 } 722 if status.Result == false { 723 return false 724 } 725 return status.SourceStatus.RelayStatus.Stage == expect 726 } 727 728 func loadSourceConfigWithoutPassword(c *C) *config.SourceConfig { 729 sourceCfg, err := config.SourceCfgFromYamlAndVerify(config.SampleSourceConfig) 730 c.Assert(err, IsNil) 731 sourceCfg.From.Password = "" // no password set 732 return sourceCfg 733 } 734 735 func (t *testServer) TestServerDataRace(c *C) { 736 var ( 737 masterAddr = tempurl.Alloc()[len("http://"):] 738 keepAliveTTL = int64(1) 739 ) 740 etcdDir := c.MkDir() 741 ETCD, err := createMockETCD(etcdDir, "http://"+masterAddr) 742 c.Assert(err, IsNil) 743 defer ETCD.Close() 744 cfg := NewConfig() 745 c.Assert(cfg.Parse([]string{"-config=./dm-worker.toml"}), IsNil) 746 cfg.Join = masterAddr 747 cfg.KeepAliveTTL = keepAliveTTL 748 cfg.RelayKeepAliveTTL = keepAliveTTL 749 750 s := NewServer(cfg) 751 defer s.Close() 752 753 var wg sync.WaitGroup 754 for i := 0; i < 20; i++ { 755 wg.Add(2) 756 go func() { 757 defer wg.Done() 758 err1 := s.Start() 759 c.Assert(err1 == nil || err1 == terror.ErrWorkerServerClosed, IsTrue) 760 }() 761 go func() { 762 defer wg.Done() 763 s.Close() 764 }() 765 wg.Wait() 766 } 767 } 768 769 func loadSourceConfigWithoutPassword2(t *testing.T) *config.SourceConfig { 770 t.Helper() 771 772 sourceCfg, err := config.SourceCfgFromYamlAndVerify(config.SampleSourceConfig) 773 require.NoError(t, err) 774 sourceCfg.From.Password = "" // no password set 775 return sourceCfg 776 }