github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/server_test.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package master 15 16 import ( 17 "bytes" 18 "context" 19 "fmt" 20 "io" 21 "io/ioutil" 22 "net" 23 "net/http" 24 "os" 25 "path/filepath" 26 "sort" 27 "strings" 28 "sync" 29 "testing" 30 "time" 31 32 "github.com/DATA-DOG/go-sqlmock" 33 "github.com/go-mysql-org/go-mysql/mysql" 34 "github.com/golang/mock/gomock" 35 "github.com/pingcap/errors" 36 "github.com/pingcap/failpoint" 37 tiddl "github.com/pingcap/tidb/pkg/ddl" 38 "github.com/pingcap/tidb/pkg/parser" 39 "github.com/pingcap/tidb/pkg/parser/ast" 40 "github.com/pingcap/tidb/pkg/parser/model" 41 "github.com/pingcap/tidb/pkg/sessionctx" 42 toolutils "github.com/pingcap/tidb/pkg/util" 43 tidbmock "github.com/pingcap/tidb/pkg/util/mock" 44 "github.com/pingcap/tiflow/dm/checker" 45 common2 "github.com/pingcap/tiflow/dm/common" 46 "github.com/pingcap/tiflow/dm/config" 47 "github.com/pingcap/tiflow/dm/config/dbconfig" 48 "github.com/pingcap/tiflow/dm/config/security" 49 "github.com/pingcap/tiflow/dm/ctl/common" 50 "github.com/pingcap/tiflow/dm/loader" 51 "github.com/pingcap/tiflow/dm/master/scheduler" 52 "github.com/pingcap/tiflow/dm/master/shardddl" 53 "github.com/pingcap/tiflow/dm/master/workerrpc" 54 "github.com/pingcap/tiflow/dm/openapi/fixtures" 55 "github.com/pingcap/tiflow/dm/pb" 56 "github.com/pingcap/tiflow/dm/pbmock" 57 "github.com/pingcap/tiflow/dm/pkg/conn" 58 "github.com/pingcap/tiflow/dm/pkg/cputil" 59 "github.com/pingcap/tiflow/dm/pkg/etcdutil" 60 "github.com/pingcap/tiflow/dm/pkg/ha" 61 "github.com/pingcap/tiflow/dm/pkg/log" 62 "github.com/pingcap/tiflow/dm/pkg/shardddl/optimism" 63 "github.com/pingcap/tiflow/dm/pkg/shardddl/pessimism" 64 "github.com/pingcap/tiflow/dm/pkg/terror" 65 "github.com/pingcap/tiflow/dm/pkg/utils" 66 "github.com/pingcap/tiflow/pkg/version" 67 "github.com/stretchr/testify/require" 68 "github.com/stretchr/testify/suite" 69 "github.com/tikv/pd/pkg/utils/tempurl" 70 clientv3 "go.etcd.io/etcd/client/v3" 71 "go.etcd.io/etcd/server/v3/verify" 72 "go.etcd.io/etcd/tests/v3/integration" 73 "google.golang.org/grpc" 74 ) 75 76 // use task config from integration test `sharding`. 77 var taskConfig = `--- 78 name: test 79 task-mode: all 80 is-sharding: true 81 shard-mode: "" 82 meta-schema: "dm_meta" 83 enable-heartbeat: true 84 ignore-checking-items: ["all"] 85 86 target-database: 87 host: "127.0.0.1" 88 port: 4000 89 user: "root" 90 password: "" 91 92 mysql-instances: 93 - source-id: "mysql-replica-01" 94 block-allow-list: "instance" 95 route-rules: ["sharding-route-rules-table", "sharding-route-rules-schema"] 96 mydumper-config-name: "global" 97 loader-config-name: "global" 98 syncer-config-name: "global" 99 100 - source-id: "mysql-replica-02" 101 block-allow-list: "instance" 102 route-rules: ["sharding-route-rules-table", "sharding-route-rules-schema"] 103 mydumper-config-name: "global" 104 loader-config-name: "global" 105 syncer-config-name: "global" 106 107 block-allow-list: 108 instance: 109 do-dbs: ["~^sharding[\\d]+"] 110 do-tables: 111 - db-name: "~^sharding[\\d]+" 112 tbl-name: "~^t[\\d]+" 113 114 routes: 115 sharding-route-rules-table: 116 schema-pattern: sharding* 117 table-pattern: t* 118 target-schema: db_target 119 target-table: t_target 120 121 sharding-route-rules-schema: 122 schema-pattern: sharding* 123 target-schema: db_target 124 125 mydumpers: 126 global: 127 threads: 4 128 chunk-filesize: 64 129 skip-tz-utc: true 130 extra-args: "--regex '^sharding.*'" 131 132 loaders: 133 global: 134 pool-size: 16 135 dir: "./dumped_data" 136 137 syncers: 138 global: 139 worker-count: 16 140 batch: 100 141 ` 142 143 var ( 144 errGRPCFailed = "test grpc request failed" 145 errGRPCFailedReg = fmt.Sprintf("(?m).*%s.*", errGRPCFailed) 146 errCheckSyncConfig = "(?m).*check sync config with error.*" 147 errCheckSyncConfigReg = fmt.Sprintf("(?m).*%s.*", errCheckSyncConfig) 148 keepAliveTTL = int64(10) 149 ) 150 151 type testMasterSuite struct { 152 suite.Suite 153 154 workerClients map[string]workerrpc.Client 155 saveMaxRetryNum int 156 electionTTLBackup int 157 158 testEtcdCluster *integration.ClusterV3 159 etcdTestCli *clientv3.Client 160 } 161 162 func TestMasterSuite(t *testing.T) { 163 suite.Run(t, new(testMasterSuite)) 164 } 165 166 var pwd string 167 168 func (t *testMasterSuite) SetupSuite() { 169 require.NoError(t.T(), log.InitLogger(&log.Config{})) 170 var err error 171 pwd, err = os.Getwd() 172 require.NoError(t.T(), err) 173 integration.BeforeTestExternal(t.T()) 174 t.workerClients = make(map[string]workerrpc.Client) 175 t.saveMaxRetryNum = maxRetryNum 176 t.electionTTLBackup = electionTTL 177 electionTTL = 3 178 maxRetryNum = 2 179 checkAndAdjustSourceConfigForDMCtlFunc = checkAndNoAdjustSourceConfigMock 180 } 181 182 func (t *testMasterSuite) TearDownSuite() { 183 maxRetryNum = t.saveMaxRetryNum 184 electionTTL = t.electionTTLBackup 185 checkAndAdjustSourceConfigForDMCtlFunc = checkAndAdjustSourceConfig 186 } 187 188 func (t *testMasterSuite) SetupTest() { 189 t.testEtcdCluster = integration.NewClusterV3(t.T(), &integration.ClusterConfig{Size: 1}) 190 t.etcdTestCli = t.testEtcdCluster.RandClient() 191 t.clearEtcdEnv() 192 } 193 194 func (t *testMasterSuite) TearDownTest() { 195 t.clearEtcdEnv() 196 t.testEtcdCluster.Terminate(t.T()) 197 } 198 199 func (t *testMasterSuite) clearEtcdEnv() { 200 require.NoError(t.T(), ha.ClearTestInfoOperation(t.etcdTestCli)) 201 } 202 203 func (t *testMasterSuite) clearSchedulerEnv(cancel context.CancelFunc, wg *sync.WaitGroup) { 204 cancel() 205 wg.Wait() 206 t.clearEtcdEnv() 207 } 208 209 func stageDeepEqualExcludeRev(t *testing.T, stage, expectStage ha.Stage) { 210 t.Helper() 211 212 expectStage.Revision = stage.Revision 213 require.Equal(t, expectStage, stage) 214 } 215 216 func mockRevelantWorkerClient(mockWorkerClient *pbmock.MockWorkerClient, taskName, sourceID string, masterReq interface{}) { 217 var expect pb.Stage 218 switch req := masterReq.(type) { 219 case *pb.OperateSourceRequest: 220 switch req.Op { 221 case pb.SourceOp_StartSource, pb.SourceOp_UpdateSource: 222 expect = pb.Stage_Running 223 case pb.SourceOp_StopSource: 224 expect = pb.Stage_Stopped 225 } 226 case *pb.StartTaskRequest, *pb.UpdateTaskRequest: 227 expect = pb.Stage_Running 228 case *pb.OperateTaskRequest: 229 switch req.Op { 230 case pb.TaskOp_Resume: 231 expect = pb.Stage_Running 232 case pb.TaskOp_Pause: 233 expect = pb.Stage_Paused 234 case pb.TaskOp_Delete: 235 } 236 case *pb.OperateWorkerRelayRequest: 237 switch req.Op { 238 case pb.RelayOp_ResumeRelay: 239 expect = pb.Stage_Running 240 case pb.RelayOp_PauseRelay: 241 expect = pb.Stage_Paused 242 case pb.RelayOp_StopRelay: 243 expect = pb.Stage_Stopped 244 } 245 } 246 queryResp := &pb.QueryStatusResponse{ 247 Result: true, 248 SourceStatus: &pb.SourceStatus{}, 249 } 250 251 switch masterReq.(type) { 252 case *pb.OperateSourceRequest: 253 switch expect { 254 case pb.Stage_Running: 255 queryResp.SourceStatus = &pb.SourceStatus{Source: sourceID} 256 case pb.Stage_Stopped: 257 queryResp.SourceStatus = &pb.SourceStatus{Source: ""} 258 } 259 case *pb.StartTaskRequest, *pb.UpdateTaskRequest, *pb.OperateTaskRequest: 260 queryResp.SubTaskStatus = []*pb.SubTaskStatus{{}} 261 if opTaskReq, ok := masterReq.(*pb.OperateTaskRequest); ok && opTaskReq.Op == pb.TaskOp_Delete { 262 queryResp.SubTaskStatus[0].Status = &pb.SubTaskStatus_Msg{ 263 Msg: fmt.Sprintf("no sub task with name %s has started", taskName), 264 } 265 } else { 266 queryResp.SubTaskStatus[0].Name = taskName 267 queryResp.SubTaskStatus[0].Stage = expect 268 } 269 case *pb.OperateWorkerRelayRequest: 270 queryResp.SourceStatus = &pb.SourceStatus{RelayStatus: &pb.RelayStatus{Stage: expect}} 271 } 272 273 mockWorkerClient.EXPECT().QueryStatus( 274 gomock.Any(), 275 &pb.QueryStatusRequest{ 276 Name: taskName, 277 }, 278 ).Return(queryResp, nil).MaxTimes(maxRetryNum) 279 } 280 281 func createTableInfo(t *testing.T, p *parser.Parser, se sessionctx.Context, tableID int64, sql string) *model.TableInfo { 282 t.Helper() 283 284 node, err := p.ParseOneStmt(sql, "utf8mb4", "utf8mb4_bin") 285 require.NoError(t, err) 286 createStmtNode, ok := node.(*ast.CreateTableStmt) 287 require.True(t, ok, "%s is not a CREATE TABLE statement", sql) 288 info, err := tiddl.MockTableInfo(se, createStmtNode, tableID) 289 require.NoError(t, err) 290 return info 291 } 292 293 func newMockRPCClient(client pb.WorkerClient) workerrpc.Client { 294 c, _ := workerrpc.NewGRPCClientWrap(nil, client) 295 return c 296 } 297 298 func defaultWorkerSource() ([]string, []string) { 299 return []string{ 300 "mysql-replica-01", 301 "mysql-replica-02", 302 }, []string{ 303 "127.0.0.1:8262", 304 "127.0.0.1:8263", 305 } 306 } 307 308 func makeNilWorkerClients(workers []string) map[string]workerrpc.Client { 309 nilWorkerClients := make(map[string]workerrpc.Client, len(workers)) 310 for _, worker := range workers { 311 nilWorkerClients[worker] = nil 312 } 313 return nilWorkerClients 314 } 315 316 func makeWorkerClientsForHandle(ctrl *gomock.Controller, taskName string, sources []string, workers []string, reqs ...interface{}) map[string]workerrpc.Client { 317 workerClients := make(map[string]workerrpc.Client, len(workers)) 318 for i := range workers { 319 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 320 for _, req := range reqs { 321 mockRevelantWorkerClient(mockWorkerClient, taskName, sources[i], req) 322 } 323 workerClients[workers[i]] = newMockRPCClient(mockWorkerClient) 324 } 325 return workerClients 326 } 327 328 func testDefaultMasterServer(t *testing.T) *Server { 329 t.Helper() 330 331 cfg := NewConfig() 332 err := cfg.FromContent(SampleConfig) 333 require.NoError(t, err) 334 cfg.DataDir = t.TempDir() 335 server := NewServer(cfg) 336 server.leader.Store(oneselfLeader) 337 go server.ap.Start(context.Background()) 338 339 return server 340 } 341 342 func (t *testMasterSuite) testMockScheduler( 343 ctx context.Context, 344 wg *sync.WaitGroup, 345 sources, workers []string, 346 password string, 347 workerClients map[string]workerrpc.Client, 348 ) (*scheduler.Scheduler, []context.CancelFunc) { 349 logger := log.L() 350 scheduler2 := scheduler.NewScheduler(&logger, security.Security{}) 351 err := scheduler2.Start(ctx, t.etcdTestCli) 352 require.NoError(t.T(), err) 353 cancels := make([]context.CancelFunc, 0, 2) 354 for i := range workers { 355 // add worker to scheduler's workers map 356 name := workers[i] 357 require.NoError(t.T(), scheduler2.AddWorker(name, workers[i])) 358 scheduler2.SetWorkerClientForTest(name, workerClients[workers[i]]) 359 // operate mysql config on this worker 360 cfg := config.NewSourceConfig() 361 cfg.SourceID = sources[i] 362 cfg.From.Password = password 363 require.NoError(t.T(), scheduler2.AddSourceCfg(cfg)) 364 wg.Add(1) 365 ctx1, cancel1 := context.WithCancel(ctx) 366 cancels = append(cancels, cancel1) 367 go func(ctx context.Context, workerName string) { 368 defer wg.Done() 369 require.NoError(t.T(), ha.KeepAlive(ctx, t.etcdTestCli, workerName, keepAliveTTL)) 370 }(ctx1, name) 371 idx := i 372 require.Eventually(t.T(), func() bool { 373 w := scheduler2.GetWorkerBySource(sources[idx]) 374 return w != nil && w.BaseInfo().Name == name 375 }, 3*time.Second, 100*time.Millisecond) 376 } 377 return scheduler2, cancels 378 } 379 380 func (t *testMasterSuite) testMockSchedulerForRelay( 381 ctx context.Context, 382 wg *sync.WaitGroup, 383 sources, workers []string, 384 password string, 385 workerClients map[string]workerrpc.Client, 386 ) (*scheduler.Scheduler, []context.CancelFunc) { 387 logger := log.L() 388 scheduler2 := scheduler.NewScheduler(&logger, security.Security{}) 389 err := scheduler2.Start(ctx, t.etcdTestCli) 390 require.NoError(t.T(), err) 391 cancels := make([]context.CancelFunc, 0, 2) 392 for i := range workers { 393 // add worker to scheduler's workers map 394 name := workers[i] 395 require.NoError(t.T(), scheduler2.AddWorker(name, workers[i])) 396 scheduler2.SetWorkerClientForTest(name, workerClients[workers[i]]) 397 // operate mysql config on this worker 398 cfg := config.NewSourceConfig() 399 cfg.SourceID = sources[i] 400 cfg.From.Password = password 401 require.NoError(t.T(), scheduler2.AddSourceCfg(cfg)) 402 wg.Add(1) 403 ctx1, cancel1 := context.WithCancel(ctx) 404 cancels = append(cancels, cancel1) 405 go func(ctx context.Context, workerName string) { 406 defer wg.Done() 407 require.NoError(t.T(), ha.KeepAlive(ctx, t.etcdTestCli, workerName, keepAliveTTL)) 408 }(ctx1, name) 409 410 // wait the mock worker has alive 411 require.Eventually(t.T(), func() bool { 412 resp, err2 := t.etcdTestCli.Get(ctx, common2.WorkerKeepAliveKeyAdapter.Encode(name)) 413 require.NoError(t.T(), err2) 414 return resp.Count == 1 415 }, 3*time.Second, 100*time.Millisecond) 416 417 require.NoError(t.T(), scheduler2.StartRelay(sources[i], []string{workers[i]})) 418 idx := i 419 require.Eventually(t.T(), func() bool { 420 relayWorkers, err2 := scheduler2.GetRelayWorkers(sources[idx]) 421 require.NoError(t.T(), err2) 422 return len(relayWorkers) == 1 && relayWorkers[0].BaseInfo().Name == name 423 }, 3*time.Second, 100*time.Millisecond) 424 } 425 return scheduler2, cancels 426 } 427 428 func generateServerConfig(t *testing.T, name string) *Config { 429 t.Helper() 430 431 // create a new cluster 432 cfg1 := NewConfig() 433 err := cfg1.FromContent(SampleConfig) 434 require.NoError(t, err) 435 cfg1.Name = name 436 cfg1.DataDir = t.TempDir() 437 cfg1.MasterAddr = tempurl.Alloc()[len("http://"):] 438 cfg1.AdvertiseAddr = cfg1.MasterAddr 439 cfg1.PeerUrls = tempurl.Alloc() 440 cfg1.AdvertisePeerUrls = cfg1.PeerUrls 441 cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls) 442 return cfg1 443 } 444 445 func (t *testMasterSuite) TestQueryStatus() { 446 ctrl := gomock.NewController(t.T()) 447 defer ctrl.Finish() 448 449 server := testDefaultMasterServer(t.T()) 450 sources, workers := defaultWorkerSource() 451 var cancels []context.CancelFunc 452 453 // test query all workers 454 for _, worker := range workers { 455 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 456 mockWorkerClient.EXPECT().QueryStatus( 457 gomock.Any(), 458 &pb.QueryStatusRequest{}, 459 ).Return(&pb.QueryStatusResponse{ 460 Result: true, 461 SourceStatus: &pb.SourceStatus{}, 462 }, nil) 463 t.workerClients[worker] = newMockRPCClient(mockWorkerClient) 464 } 465 var wg sync.WaitGroup 466 ctx, cancel := context.WithCancel(context.Background()) 467 server.scheduler, cancels = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients) 468 for _, cancelFunc := range cancels { 469 defer cancelFunc() 470 } 471 resp, err := server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{}) 472 require.NoError(t.T(), err) 473 require.True(t.T(), resp.Result) 474 t.clearSchedulerEnv(cancel, &wg) 475 476 // query specified sources 477 for _, worker := range workers { 478 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 479 mockWorkerClient.EXPECT().QueryStatus( 480 gomock.Any(), 481 &pb.QueryStatusRequest{}, 482 ).Return(&pb.QueryStatusResponse{ 483 Result: true, 484 SourceStatus: &pb.SourceStatus{}, 485 }, nil) 486 t.workerClients[worker] = newMockRPCClient(mockWorkerClient) 487 } 488 ctx, cancel = context.WithCancel(context.Background()) 489 server.scheduler, cancels = t.testMockSchedulerForRelay(ctx, &wg, sources, workers, "passwd", t.workerClients) 490 for _, cancelFunc := range cancels { 491 defer cancelFunc() 492 } 493 resp, err = server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{ 494 Sources: sources, 495 }) 496 require.NoError(t.T(), err) 497 require.True(t.T(), resp.Result) 498 499 // query with invalid dm-worker[s] 500 resp, err = server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{ 501 Sources: []string{"invalid-source1", "invalid-source2"}, 502 }) 503 require.NoError(t.T(), err) 504 require.False(t.T(), resp.Result) 505 require.Regexp(t.T(), "sources .* haven't been added", resp.Msg) 506 507 // query with invalid task name 508 resp, err = server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{ 509 Name: "invalid-task-name", 510 }) 511 require.NoError(t.T(), err) 512 require.False(t.T(), resp.Result) 513 require.Regexp(t.T(), "task .* has no source or not exist", resp.Msg) 514 t.clearSchedulerEnv(cancel, &wg) 515 // TODO: test query with correct task name, this needs to add task first 516 } 517 518 func (t *testMasterSuite) TestWaitOperationOkRightResult() { 519 cases := []struct { 520 req interface{} 521 resp *pb.QueryStatusResponse 522 expectedOK bool 523 expectedEmptyMsg bool 524 }{ 525 { 526 &pb.OperateTaskRequest{ 527 Op: pb.TaskOp_Pause, 528 Name: "task-unittest", 529 }, 530 &pb.QueryStatusResponse{ 531 SubTaskStatus: []*pb.SubTaskStatus{ 532 {Stage: pb.Stage_Paused}, 533 }, 534 }, 535 true, 536 true, 537 }, 538 { 539 &pb.OperateTaskRequest{ 540 Op: pb.TaskOp_Pause, 541 Name: "task-unittest", 542 }, 543 &pb.QueryStatusResponse{ 544 SubTaskStatus: []*pb.SubTaskStatus{ 545 { 546 Stage: pb.Stage_Paused, 547 Result: &pb.ProcessResult{Errors: []*pb.ProcessError{{Message: "paused by previous error"}}}, 548 }, 549 }, 550 }, 551 true, 552 false, 553 }, 554 } 555 556 ctrl := gomock.NewController(t.T()) 557 defer ctrl.Finish() 558 ctx := context.Background() 559 duration, _ := time.ParseDuration("1s") 560 s := &Server{cfg: &Config{RPCTimeout: duration}} 561 for _, ca := range cases { 562 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 563 mockWorkerClient.EXPECT().QueryStatus( 564 gomock.Any(), 565 gomock.Any(), 566 ).Return(ca.resp, nil) 567 mockWorker := scheduler.NewMockWorker(newMockRPCClient(mockWorkerClient)) 568 569 ok, msg, _, err := s.waitOperationOk(ctx, mockWorker, "", "", ca.req) 570 require.NoError(t.T(), err) 571 require.Equal(t.T(), ca.expectedOK, ok) 572 if ca.expectedEmptyMsg { 573 require.Empty(t.T(), msg) 574 } else { 575 require.NotEmpty(t.T(), msg) 576 } 577 } 578 } 579 580 func (t *testMasterSuite) TestStopTaskWithExceptRight() { 581 taskName := "test-stop-task" 582 responeses := [][]*pb.QueryStatusResponse{{ 583 &pb.QueryStatusResponse{ 584 SubTaskStatus: []*pb.SubTaskStatus{ 585 { 586 Name: taskName, 587 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{ 588 UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}}, 589 }}, 590 }, 591 }, 592 }, 593 &pb.QueryStatusResponse{SubTaskStatus: []*pb.SubTaskStatus{}}, 594 }, { 595 &pb.QueryStatusResponse{ 596 SubTaskStatus: []*pb.SubTaskStatus{ 597 { 598 Name: taskName, 599 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{ 600 UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}}, 601 }}, 602 }, 603 }, 604 }, 605 &pb.QueryStatusResponse{SubTaskStatus: []*pb.SubTaskStatus{ 606 { 607 Name: taskName, 608 Status: &pb.SubTaskStatus_Msg{Msg: common2.NoSubTaskMsg(taskName)}, 609 }, 610 }}, 611 }} 612 req := &pb.OperateTaskRequest{ 613 Op: pb.TaskOp_Delete, 614 Name: taskName, 615 } 616 ctrl := gomock.NewController(t.T()) 617 defer ctrl.Finish() 618 ctx := context.Background() 619 s := &Server{cfg: &Config{RPCTimeout: time.Second}} 620 621 for _, item := range responeses { 622 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 623 mockWorkerClient.EXPECT().QueryStatus( 624 gomock.Any(), 625 gomock.Any(), 626 ).Return(item[0], nil).Return(item[1], nil).MaxTimes(2) 627 mockWorker := scheduler.NewMockWorker(newMockRPCClient(mockWorkerClient)) 628 ok, msg, _, err := s.waitOperationOk(ctx, mockWorker, taskName, "", req) 629 require.NoError(t.T(), err) 630 require.True(t.T(), ok) 631 require.Empty(t.T(), msg) 632 } 633 } 634 635 func (t *testMasterSuite) TestFillUnsyncedStatus() { 636 var ( 637 logger = log.L() 638 task1 = "task1" 639 task2 = "task2" 640 source1 = "source1" 641 source2 = "source2" 642 sources = []string{source1, source2} 643 ) 644 cases := []struct { 645 infos []pessimism.Info 646 input []*pb.QueryStatusResponse 647 expected []*pb.QueryStatusResponse 648 }{ 649 // test it could work 650 { 651 []pessimism.Info{ 652 { 653 Task: task1, 654 Source: source1, 655 Schema: "db", 656 Table: "tbl", 657 }, 658 }, 659 []*pb.QueryStatusResponse{ 660 { 661 SourceStatus: &pb.SourceStatus{ 662 Source: source1, 663 }, 664 SubTaskStatus: []*pb.SubTaskStatus{ 665 { 666 Name: task1, 667 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{ 668 UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}}, 669 }}, 670 }, 671 { 672 Name: task2, 673 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}}, 674 }, 675 }, 676 }, { 677 SourceStatus: &pb.SourceStatus{ 678 Source: source2, 679 }, 680 SubTaskStatus: []*pb.SubTaskStatus{ 681 { 682 Name: task1, 683 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}}, 684 }, 685 { 686 Name: task2, 687 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}}, 688 }, 689 }, 690 }, 691 }, 692 []*pb.QueryStatusResponse{ 693 { 694 SourceStatus: &pb.SourceStatus{ 695 Source: source1, 696 }, 697 SubTaskStatus: []*pb.SubTaskStatus{ 698 { 699 Name: task1, 700 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{ 701 UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}}, 702 }}, 703 }, 704 { 705 Name: task2, 706 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}}, 707 }, 708 }, 709 }, { 710 SourceStatus: &pb.SourceStatus{ 711 Source: source2, 712 }, 713 SubTaskStatus: []*pb.SubTaskStatus{ 714 { 715 Name: task1, 716 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{ 717 UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"this DM-worker doesn't receive any shard DDL of this group"}}}, 718 }}, 719 }, 720 { 721 Name: task2, 722 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}}, 723 }, 724 }, 725 }, 726 }, 727 }, 728 // test won't interfere not sync status 729 { 730 []pessimism.Info{ 731 { 732 Task: task1, 733 Source: source1, 734 Schema: "db", 735 Table: "tbl", 736 }, 737 }, 738 []*pb.QueryStatusResponse{ 739 { 740 SourceStatus: &pb.SourceStatus{ 741 Source: source1, 742 }, 743 SubTaskStatus: []*pb.SubTaskStatus{ 744 { 745 Name: task1, 746 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{ 747 UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}}, 748 }}, 749 }, 750 }, 751 }, { 752 SourceStatus: &pb.SourceStatus{ 753 Source: source2, 754 }, 755 SubTaskStatus: []*pb.SubTaskStatus{ 756 { 757 Name: task1, 758 Status: &pb.SubTaskStatus_Load{Load: &pb.LoadStatus{}}, 759 }, 760 }, 761 }, 762 }, 763 []*pb.QueryStatusResponse{ 764 { 765 SourceStatus: &pb.SourceStatus{ 766 Source: source1, 767 }, 768 SubTaskStatus: []*pb.SubTaskStatus{ 769 { 770 Name: task1, 771 Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{ 772 UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}}, 773 }}, 774 }, 775 }, 776 }, { 777 SourceStatus: &pb.SourceStatus{ 778 Source: source2, 779 }, 780 SubTaskStatus: []*pb.SubTaskStatus{ 781 { 782 Name: task1, 783 Status: &pb.SubTaskStatus_Load{Load: &pb.LoadStatus{}}, 784 }, 785 }, 786 }, 787 }, 788 }, 789 } 790 791 // test pessimistic mode 792 for _, ca := range cases { 793 s := &Server{} 794 s.pessimist = shardddl.NewPessimist(&logger, func(task string) []string { return sources }) 795 require.NoError(t.T(), s.pessimist.Start(context.Background(), t.etcdTestCli)) 796 for _, i := range ca.infos { 797 _, err := pessimism.PutInfo(t.etcdTestCli, i) 798 require.NoError(t.T(), err) 799 } 800 if len(ca.infos) > 0 { 801 utils.WaitSomething(20, 100*time.Millisecond, func() bool { 802 return len(s.pessimist.ShowLocks("", nil)) > 0 803 }) 804 } 805 806 s.fillUnsyncedStatus(ca.input) 807 require.Equal(t.T(), ca.expected, ca.input) 808 _, err := pessimism.DeleteInfosOperations(t.etcdTestCli, ca.infos, nil) 809 require.NoError(t.T(), err) 810 } 811 } 812 813 func (t *testMasterSuite) TestCheckTask() { 814 ctrl := gomock.NewController(t.T()) 815 defer ctrl.Finish() 816 817 server := testDefaultMasterServer(t.T()) 818 sources, workers := defaultWorkerSource() 819 820 t.workerClients = makeNilWorkerClients(workers) 821 var wg sync.WaitGroup 822 ctx, cancel := context.WithCancel(context.Background()) 823 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients) 824 mock := conn.InitVersionDB() 825 defer func() { 826 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 827 }() 828 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 829 AddRow("version", "5.7.25-TiDB-v4.0.2")) 830 resp, err := server.CheckTask(context.Background(), &pb.CheckTaskRequest{ 831 Task: taskConfig, 832 }) 833 require.NoError(t.T(), err) 834 require.True(t.T(), resp.Result) 835 836 // decode task with error 837 resp, err = server.CheckTask(context.Background(), &pb.CheckTaskRequest{ 838 Task: "invalid toml config", 839 }) 840 require.NoError(t.T(), err) 841 require.False(t.T(), resp.Result) 842 t.clearSchedulerEnv(cancel, &wg) 843 844 // simulate invalid password returned from scheduler, but config was supported plaintext mysql password, so cfg.SubTaskConfigs will success 845 ctx, cancel = context.WithCancel(context.Background()) 846 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "invalid-encrypt-password", t.workerClients) 847 mock = conn.InitVersionDB() 848 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 849 AddRow("version", "5.7.25-TiDB-v4.0.2")) 850 resp, err = server.CheckTask(context.Background(), &pb.CheckTaskRequest{ 851 Task: taskConfig, 852 }) 853 require.NoError(t.T(), err) 854 require.True(t.T(), resp.Result) 855 t.clearSchedulerEnv(cancel, &wg) 856 } 857 858 func (t *testMasterSuite) TestStartTask() { 859 ctrl := gomock.NewController(t.T()) 860 defer ctrl.Finish() 861 862 server := testDefaultMasterServer(t.T()) 863 server.etcdClient = t.etcdTestCli 864 sources, workers := defaultWorkerSource() 865 866 // s.generateSubTask with error 867 resp, err := server.StartTask(context.Background(), &pb.StartTaskRequest{ 868 Task: "invalid toml config", 869 }) 870 require.NoError(t.T(), err) 871 require.False(t.T(), resp.Result) 872 873 // test start task successfully 874 var wg sync.WaitGroup 875 // taskName is relative to taskConfig 876 taskName := "test" 877 ctx, cancel := context.WithCancel(context.Background()) 878 req := &pb.StartTaskRequest{ 879 Task: taskConfig, 880 Sources: sources, 881 } 882 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", 883 makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req)) 884 mock := conn.InitVersionDB() 885 defer func() { 886 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 887 }() 888 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 889 AddRow("version", "5.7.25-TiDB-v4.0.2")) 890 resp, err = server.StartTask(context.Background(), req) 891 require.NoError(t.T(), err) 892 require.True(t.T(), resp.Result) 893 for _, source := range sources { 894 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 895 tcm, _, err2 := ha.GetSubTaskCfg(t.etcdTestCli, source, taskName, 0) 896 require.NoError(t.T(), err2) 897 require.Contains(t.T(), tcm, taskName) 898 require.Equal(t.T(), taskName, tcm[taskName].Name) 899 require.Equal(t.T(), source, tcm[taskName].SourceID) 900 } 901 902 // check start-task with an invalid source 903 invalidSource := "invalid-source" 904 mock = conn.InitVersionDB() 905 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 906 AddRow("version", "5.7.25-TiDB-v4.0.2")) 907 resp, err = server.StartTask(context.Background(), &pb.StartTaskRequest{ 908 Task: taskConfig, 909 Sources: []string{invalidSource}, 910 }) 911 require.NoError(t.T(), err) 912 require.False(t.T(), resp.Result) 913 require.Len(t.T(), resp.Sources, 1) 914 require.False(t.T(), resp.Sources[0].Result) 915 require.Equal(t.T(), invalidSource, resp.Sources[0].Source) 916 917 // test start task, but the first step check-task fails 918 bakCheckSyncConfigFunc := checker.CheckSyncConfigFunc 919 checker.CheckSyncConfigFunc = func(_ context.Context, _ []*config.SubTaskConfig, _, _ int64) (string, error) { 920 return "", errors.New(errCheckSyncConfig) 921 } 922 defer func() { 923 checker.CheckSyncConfigFunc = bakCheckSyncConfigFunc 924 }() 925 mock = conn.InitVersionDB() 926 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 927 AddRow("version", "5.7.25-TiDB-v4.0.2")) 928 resp, err = server.StartTask(context.Background(), &pb.StartTaskRequest{ 929 Task: taskConfig, 930 Sources: sources, 931 }) 932 require.NoError(t.T(), err) 933 require.False(t.T(), resp.Result) 934 require.Regexp(t.T(), errCheckSyncConfigReg, resp.CheckResult) 935 t.clearSchedulerEnv(cancel, &wg) 936 } 937 938 func (t *testMasterSuite) TestStartTaskWithRemoveMeta() { 939 ctrl := gomock.NewController(t.T()) 940 defer ctrl.Finish() 941 942 server := testDefaultMasterServer(t.T()) 943 sources, workers := defaultWorkerSource() 944 server.etcdClient = t.etcdTestCli 945 946 // test start task successfully 947 var wg sync.WaitGroup 948 // taskName is relative to taskConfig 949 cfg := config.NewTaskConfig() 950 err := cfg.FromYaml(taskConfig) 951 require.NoError(t.T(), err) 952 taskName := cfg.Name 953 ctx, cancel := context.WithCancel(context.Background()) 954 logger := log.L() 955 956 // test remove meta with pessimist 957 cfg.ShardMode = config.ShardPessimistic 958 req := &pb.StartTaskRequest{ 959 Task: strings.ReplaceAll(taskConfig, `shard-mode: ""`, fmt.Sprintf(`shard-mode: "%s"`, cfg.ShardMode)), 960 Sources: sources, 961 RemoveMeta: true, 962 } 963 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", 964 makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req)) 965 server.pessimist = shardddl.NewPessimist(&logger, func(task string) []string { return sources }) 966 server.optimist = shardddl.NewOptimist(&logger, server.scheduler.GetDownstreamMetaByTask) 967 968 var ( 969 DDLs = []string{"ALTER TABLE bar ADD COLUMN c1 INT"} 970 schema, table = "foo", "bar" 971 ID = fmt.Sprintf("%s-`%s`.`%s`", taskName, schema, table) 972 i11 = pessimism.NewInfo(taskName, sources[0], schema, table, DDLs) 973 op2 = pessimism.NewOperation(ID, taskName, sources[0], DDLs, true, false) 974 ) 975 _, err = pessimism.PutInfo(t.etcdTestCli, i11) 976 require.NoError(t.T(), err) 977 _, succ, err := pessimism.PutOperations(t.etcdTestCli, false, op2) 978 require.True(t.T(), succ) 979 require.NoError(t.T(), err) 980 981 require.NoError(t.T(), server.pessimist.Start(ctx, t.etcdTestCli)) 982 require.NoError(t.T(), server.optimist.Start(ctx, t.etcdTestCli)) 983 984 verMock := conn.InitVersionDB() 985 defer func() { 986 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 987 }() 988 verMock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 989 AddRow("version", "5.7.25-TiDB-v4.0.2")) 990 mock, err := conn.MockDefaultDBProvider() 991 require.NoError(t.T(), err) 992 mock.ExpectBegin() 993 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LoaderCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 994 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LightningCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 995 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 996 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerShardMeta(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 997 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerOnlineDDL(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 998 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 999 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorPendingChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1000 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorErrorChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1001 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorTableStatus(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1002 mock.ExpectExec(fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", loader.GetTaskInfoSchemaName(cfg.MetaSchema, cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1003 mock.ExpectCommit() 1004 require.Greater(t.T(), len(server.pessimist.Locks()), 0) 1005 1006 resp, err := server.StartTask(context.Background(), req) 1007 wg.Add(1) 1008 go func() { 1009 defer wg.Done() 1010 time.Sleep(10 * time.Microsecond) 1011 // start another same task at the same time, should get err 1012 verMock2 := conn.InitVersionDB() 1013 verMock2.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 1014 AddRow("version", "5.7.25-TiDB-v4.0.2")) 1015 resp1, err1 := server.StartTask(context.Background(), req) 1016 require.NoError(t.T(), err1) 1017 require.False(t.T(), resp1.Result) 1018 require.Equal(t.T(), terror.Annotate(terror.ErrSchedulerSubTaskExist.Generate(cfg.Name, sources), 1019 "while remove-meta is true").Error(), resp1.Msg) 1020 }() 1021 require.NoError(t.T(), err) 1022 require.True(t.T(), resp.Result, "start task failed: %s", resp.Msg) 1023 for _, source := range sources { 1024 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 1025 tcm, _, err2 := ha.GetSubTaskCfg(t.etcdTestCli, source, taskName, 0) 1026 require.NoError(t.T(), err2) 1027 require.Contains(t.T(), tcm, taskName) 1028 require.Equal(t.T(), taskName, tcm[taskName].Name) 1029 require.Equal(t.T(), source, tcm[taskName].SourceID) 1030 } 1031 1032 require.Len(t.T(), server.pessimist.Locks(), 0) 1033 require.NoError(t.T(), mock.ExpectationsWereMet()) 1034 ifm, _, err := pessimism.GetAllInfo(t.etcdTestCli) 1035 require.NoError(t.T(), err) 1036 require.Len(t.T(), ifm, 0) 1037 opm, _, err := pessimism.GetAllOperations(t.etcdTestCli) 1038 require.NoError(t.T(), err) 1039 require.Len(t.T(), opm, 0) 1040 t.clearSchedulerEnv(cancel, &wg) 1041 1042 // test remove meta with optimist 1043 ctx, cancel = context.WithCancel(context.Background()) 1044 cfg.ShardMode = config.ShardOptimistic 1045 req = &pb.StartTaskRequest{ 1046 Task: strings.ReplaceAll(taskConfig, `shard-mode: ""`, fmt.Sprintf(`shard-mode: "%s"`, cfg.ShardMode)), 1047 Sources: sources, 1048 RemoveMeta: true, 1049 } 1050 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", 1051 makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req)) 1052 server.pessimist = shardddl.NewPessimist(&logger, func(task string) []string { return sources }) 1053 server.optimist = shardddl.NewOptimist(&logger, server.scheduler.GetDownstreamMetaByTask) 1054 1055 var ( 1056 p = parser.New() 1057 se = tidbmock.NewContext() 1058 tblID int64 = 111 1059 1060 st1 = optimism.NewSourceTables(taskName, sources[0]) 1061 DDLs1 = []string{"ALTER TABLE bar ADD COLUMN c1 INT"} 1062 tiBefore = createTableInfo(t.T(), p, se, tblID, `CREATE TABLE bar (id INT PRIMARY KEY)`) 1063 tiAfter1 = createTableInfo(t.T(), p, se, tblID, `CREATE TABLE bar (id INT PRIMARY KEY, c1 TEXT)`) 1064 info1 = optimism.NewInfo(taskName, sources[0], "foo-1", "bar-1", schema, table, DDLs1, tiBefore, []*model.TableInfo{tiAfter1}) 1065 op1 = optimism.NewOperation(ID, taskName, sources[0], info1.UpSchema, info1.UpTable, DDLs1, optimism.ConflictNone, "", false, []string{}) 1066 ) 1067 1068 st1.AddTable("foo-1", "bar-1", schema, table) 1069 _, err = optimism.PutSourceTables(t.etcdTestCli, st1) 1070 require.NoError(t.T(), err) 1071 _, err = optimism.PutInfo(t.etcdTestCli, info1) 1072 require.NoError(t.T(), err) 1073 _, succ, err = optimism.PutOperation(t.etcdTestCli, false, op1, 0) 1074 require.True(t.T(), succ) 1075 require.NoError(t.T(), err) 1076 1077 err = server.pessimist.Start(ctx, t.etcdTestCli) 1078 require.NoError(t.T(), err) 1079 err = server.optimist.Start(ctx, t.etcdTestCli) 1080 require.NoError(t.T(), err) 1081 1082 verMock = conn.InitVersionDB() 1083 verMock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 1084 AddRow("version", "5.7.25-TiDB-v4.0.2")) 1085 mock, err = conn.MockDefaultDBProvider() 1086 require.NoError(t.T(), err) 1087 mock.ExpectBegin() 1088 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LoaderCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1089 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LightningCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1090 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1091 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerShardMeta(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1092 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerOnlineDDL(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1093 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1094 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorPendingChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1095 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorErrorChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1096 mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorTableStatus(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1097 mock.ExpectExec(fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", loader.GetTaskInfoSchemaName(cfg.MetaSchema, cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1)) 1098 mock.ExpectCommit() 1099 require.Greater(t.T(), len(server.optimist.Locks()), 0) 1100 1101 resp, err = server.StartTask(context.Background(), req) 1102 wg.Add(1) 1103 go func() { 1104 defer wg.Done() 1105 time.Sleep(10 * time.Microsecond) 1106 // start another same task at the same time, should get err 1107 vermock2 := conn.InitVersionDB() 1108 vermock2.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 1109 AddRow("version", "5.7.25-TiDB-v4.0.2")) 1110 resp1, err1 := server.StartTask(context.Background(), req) 1111 require.NoError(t.T(), err1) 1112 require.False(t.T(), resp1.Result) 1113 require.Equal(t.T(), terror.Annotate(terror.ErrSchedulerSubTaskExist.Generate(cfg.Name, sources), 1114 "while remove-meta is true").Error(), resp1.Msg) 1115 }() 1116 require.NoError(t.T(), err) 1117 require.True(t.T(), resp.Result) 1118 for _, source := range sources { 1119 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 1120 tcm, _, err2 := ha.GetSubTaskCfg(t.etcdTestCli, source, taskName, 0) 1121 require.NoError(t.T(), err2) 1122 require.Contains(t.T(), tcm, taskName) 1123 require.Equal(t.T(), taskName, tcm[taskName].Name) 1124 require.Equal(t.T(), source, tcm[taskName].SourceID) 1125 } 1126 1127 require.Len(t.T(), server.optimist.Locks(), 0) 1128 require.NoError(t.T(), mock.ExpectationsWereMet()) 1129 ifm2, _, err := optimism.GetAllInfo(t.etcdTestCli) 1130 require.NoError(t.T(), err) 1131 require.Len(t.T(), ifm2, 0) 1132 opm2, _, err := optimism.GetAllOperations(t.etcdTestCli) 1133 require.NoError(t.T(), err) 1134 require.Len(t.T(), opm2, 0) 1135 tbm, _, err := optimism.GetAllSourceTables(t.etcdTestCli) 1136 require.NoError(t.T(), err) 1137 require.Len(t.T(), tbm, 0) 1138 1139 t.clearSchedulerEnv(cancel, &wg) 1140 } 1141 1142 func (t *testMasterSuite) TestOperateTask() { 1143 var ( 1144 taskName = "unit-test-task" 1145 pauseOp = pb.TaskOp_Pause 1146 ) 1147 1148 ctrl := gomock.NewController(t.T()) 1149 defer ctrl.Finish() 1150 server := testDefaultMasterServer(t.T()) 1151 server.etcdClient = t.etcdTestCli 1152 sources, workers := defaultWorkerSource() 1153 1154 // test operate-task with invalid task name 1155 resp, err := server.OperateTask(context.Background(), &pb.OperateTaskRequest{ 1156 Op: pauseOp, 1157 Name: taskName, 1158 }) 1159 require.NoError(t.T(), err) 1160 require.False(t.T(), resp.Result) 1161 require.Equal(t.T(), fmt.Sprintf("task %s has no source or not exist, please check the task name and status", taskName), resp.Msg) 1162 1163 // 1. start task 1164 taskName = "test" 1165 var wg sync.WaitGroup 1166 ctx, cancel := context.WithCancel(context.Background()) 1167 startReq := &pb.StartTaskRequest{ 1168 Task: taskConfig, 1169 Sources: sources, 1170 } 1171 pauseReq := &pb.OperateTaskRequest{ 1172 Op: pauseOp, 1173 Name: taskName, 1174 } 1175 resumeReq := &pb.OperateTaskRequest{ 1176 Op: pb.TaskOp_Resume, 1177 Name: taskName, 1178 } 1179 stopReq1 := &pb.OperateTaskRequest{ 1180 Op: pb.TaskOp_Delete, 1181 Name: taskName, 1182 Sources: []string{sources[0]}, 1183 } 1184 stopReq2 := &pb.OperateTaskRequest{ 1185 Op: pb.TaskOp_Delete, 1186 Name: taskName, 1187 } 1188 sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}} 1189 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", 1190 makeWorkerClientsForHandle(ctrl, taskName, sources, workers, startReq, pauseReq, resumeReq, stopReq1, stopReq2)) 1191 mock := conn.InitVersionDB() 1192 defer func() { 1193 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 1194 }() 1195 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 1196 AddRow("version", "5.7.25-TiDB-v4.0.2")) 1197 stResp, err := server.StartTask(context.Background(), startReq) 1198 require.NoError(t.T(), err) 1199 require.True(t.T(), stResp.Result) 1200 for _, source := range sources { 1201 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 1202 } 1203 1204 require.Equal(t.T(), sourceResps, stResp.Sources) 1205 // 2. pause task 1206 resp, err = server.OperateTask(context.Background(), pauseReq) 1207 require.NoError(t.T(), err) 1208 require.True(t.T(), resp.Result) 1209 for _, source := range sources { 1210 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Paused) 1211 } 1212 1213 require.Equal(t.T(), sourceResps, resp.Sources) 1214 // 3. resume task 1215 resp, err = server.OperateTask(context.Background(), resumeReq) 1216 require.NoError(t.T(), err) 1217 require.True(t.T(), resp.Result) 1218 for _, source := range sources { 1219 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 1220 } 1221 require.Equal(t.T(), sourceResps, resp.Sources) 1222 // 4. test stop task successfully, remove partial sources 1223 resp, err = server.OperateTask(context.Background(), stopReq1) 1224 require.NoError(t.T(), err) 1225 require.True(t.T(), resp.Result) 1226 require.Equal(t.T(), []string{sources[1]}, server.getTaskSourceNameList(taskName)) 1227 require.Equal(t.T(), []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}}, resp.Sources) 1228 // 5. test stop task successfully, remove all workers 1229 resp, err = server.OperateTask(context.Background(), stopReq2) 1230 require.NoError(t.T(), err) 1231 require.True(t.T(), resp.Result) 1232 require.Len(t.T(), server.getTaskSourceNameList(taskName), 0) 1233 require.Equal(t.T(), []*pb.CommonWorkerResponse{{Result: true, Source: sources[1]}}, resp.Sources) 1234 t.clearSchedulerEnv(cancel, &wg) 1235 } 1236 1237 func (t *testMasterSuite) TestPurgeWorkerRelay() { 1238 ctrl := gomock.NewController(t.T()) 1239 defer ctrl.Finish() 1240 1241 server := testDefaultMasterServer(t.T()) 1242 sources, workers := defaultWorkerSource() 1243 var ( 1244 now = time.Now().Unix() 1245 filename = "mysql-bin.000005" 1246 ) 1247 1248 // mock PurgeRelay request 1249 mockPurgeRelay := func(rpcSuccess bool) { 1250 for i, worker := range workers { 1251 rets := []interface{}{ 1252 nil, 1253 errors.New(errGRPCFailed), 1254 } 1255 if rpcSuccess { 1256 rets = []interface{}{ 1257 &pb.CommonWorkerResponse{ 1258 Result: true, 1259 Source: sources[i], 1260 }, 1261 nil, 1262 } 1263 } 1264 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 1265 mockWorkerClient.EXPECT().PurgeRelay( 1266 gomock.Any(), 1267 &pb.PurgeRelayRequest{ 1268 Time: now, 1269 Filename: filename, 1270 }, 1271 ).Return(rets...) 1272 t.workerClients[worker] = newMockRPCClient(mockWorkerClient) 1273 } 1274 } 1275 1276 var wg sync.WaitGroup 1277 ctx, cancel := context.WithCancel(context.Background()) 1278 server.scheduler, _ = t.testMockSchedulerForRelay(ctx, &wg, nil, nil, "", t.workerClients) 1279 1280 // test PurgeWorkerRelay with invalid dm-worker[s] 1281 resp, err := server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{ 1282 Sources: []string{"invalid-source1", "invalid-source2"}, 1283 Time: now, 1284 Filename: filename, 1285 }) 1286 require.NoError(t.T(), err) 1287 require.True(t.T(), resp.Result) 1288 require.Len(t.T(), resp.Sources, 2) 1289 for _, w := range resp.Sources { 1290 require.False(t.T(), w.Result) 1291 require.Regexp(t.T(), "relay worker for source .* not found.*", w.Msg) 1292 } 1293 t.clearSchedulerEnv(cancel, &wg) 1294 1295 ctx, cancel = context.WithCancel(context.Background()) 1296 // test PurgeWorkerRelay successfully 1297 mockPurgeRelay(true) 1298 server.scheduler, _ = t.testMockSchedulerForRelay(ctx, &wg, sources, workers, "", t.workerClients) 1299 resp, err = server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{ 1300 Sources: sources, 1301 Time: now, 1302 Filename: filename, 1303 }) 1304 require.NoError(t.T(), err) 1305 require.True(t.T(), resp.Result) 1306 require.Len(t.T(), resp.Sources, 2) 1307 for _, w := range resp.Sources { 1308 require.True(t.T(), w.Result) 1309 } 1310 t.clearSchedulerEnv(cancel, &wg) 1311 1312 ctx, cancel = context.WithCancel(context.Background()) 1313 // test PurgeWorkerRelay with error response 1314 mockPurgeRelay(false) 1315 server.scheduler, _ = t.testMockSchedulerForRelay(ctx, &wg, sources, workers, "", t.workerClients) 1316 resp, err = server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{ 1317 Sources: sources, 1318 Time: now, 1319 Filename: filename, 1320 }) 1321 require.NoError(t.T(), err) 1322 require.True(t.T(), resp.Result) 1323 require.Len(t.T(), resp.Sources, 2) 1324 for _, w := range resp.Sources { 1325 require.False(t.T(), w.Result) 1326 require.Regexp(t.T(), errGRPCFailedReg, w.Msg) 1327 } 1328 t.clearSchedulerEnv(cancel, &wg) 1329 } 1330 1331 func (t *testMasterSuite) TestOperateWorkerRelayTask() { 1332 ctrl := gomock.NewController(t.T()) 1333 defer ctrl.Finish() 1334 1335 server := testDefaultMasterServer(t.T()) 1336 sources, workers := defaultWorkerSource() 1337 var wg sync.WaitGroup 1338 ctx, cancel := context.WithCancel(context.Background()) 1339 pauseReq := &pb.OperateWorkerRelayRequest{ 1340 Sources: sources, 1341 Op: pb.RelayOp_PauseRelay, 1342 } 1343 resumeReq := &pb.OperateWorkerRelayRequest{ 1344 Sources: sources, 1345 Op: pb.RelayOp_ResumeRelay, 1346 } 1347 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", 1348 makeWorkerClientsForHandle(ctrl, "", sources, workers, pauseReq, resumeReq)) 1349 1350 // test OperateWorkerRelayTask with invalid dm-worker[s] 1351 resp, err := server.OperateWorkerRelayTask(context.Background(), &pb.OperateWorkerRelayRequest{ 1352 Sources: []string{"invalid-source1", "invalid-source2"}, 1353 Op: pb.RelayOp_PauseRelay, 1354 }) 1355 require.NoError(t.T(), err) 1356 require.False(t.T(), resp.Result) 1357 require.Contains(t.T(), resp.Msg, "need to update expectant relay stage not exist") 1358 1359 sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}} 1360 // 1. test pause-relay successfully 1361 resp, err = server.OperateWorkerRelayTask(context.Background(), pauseReq) 1362 require.NoError(t.T(), err) 1363 require.True(t.T(), resp.Result) 1364 for _, source := range sources { 1365 t.relayStageMatch(server.scheduler, source, pb.Stage_Paused) 1366 } 1367 require.Equal(t.T(), sourceResps, resp.Sources) 1368 // 2. test resume-relay successfully 1369 resp, err = server.OperateWorkerRelayTask(context.Background(), resumeReq) 1370 require.NoError(t.T(), err) 1371 require.True(t.T(), resp.Result) 1372 for _, source := range sources { 1373 t.relayStageMatch(server.scheduler, source, pb.Stage_Running) 1374 } 1375 require.Equal(t.T(), sourceResps, resp.Sources) 1376 t.clearSchedulerEnv(cancel, &wg) 1377 } 1378 1379 func (t *testMasterSuite) TestServer() { 1380 var err error 1381 cfg := NewConfig() 1382 require.NoError(t.T(), cfg.FromContent(SampleConfig)) 1383 cfg.PeerUrls = "http://127.0.0.1:8294" 1384 cfg.DataDir = t.T().TempDir() 1385 cfg.MasterAddr = tempurl.Alloc()[len("http://"):] 1386 cfg.AdvertiseAddr = cfg.MasterAddr 1387 1388 basicServiceCheck := func(cfg *Config) { 1389 t.testHTTPInterface(fmt.Sprintf("http://%s/status", cfg.AdvertiseAddr), []byte(version.GetRawInfo())) 1390 t.testHTTPInterface(fmt.Sprintf("http://%s/debug/pprof/", cfg.AdvertiseAddr), []byte("Types of profiles available")) 1391 // HTTP API in this unit test is unstable, but we test it in `http_apis` in integration test. 1392 // t.testHTTPInterface( fmt.Sprintf("http://%s/apis/v1alpha1/status/test-task", cfg.AdvertiseAddr), []byte("task test-task has no source or not exist")) 1393 } 1394 t.testNormalServerLifecycle(cfg, func(cfg *Config) { 1395 basicServiceCheck(cfg) 1396 1397 // try to start another server with the same address. Expect it to fail 1398 // unset an etcd variable because it will cause checking on exit, and block forever 1399 err = os.Unsetenv(verify.ENV_VERIFY) 1400 require.NoError(t.T(), err) 1401 1402 dupServer := NewServer(cfg) 1403 ctx, cancel := context.WithCancel(context.Background()) 1404 defer cancel() 1405 err1 := dupServer.Start(ctx) 1406 require.True(t.T(), terror.ErrMasterStartEmbedEtcdFail.Equal(err1)) 1407 require.Contains(t.T(), err1.Error(), "bind: address already in use") 1408 1409 err = os.Setenv(verify.ENV_VERIFY, verify.ENV_VERIFY_ALL_VALUE) 1410 require.NoError(t.T(), err) 1411 }) 1412 1413 // test the listen address is 0.0.0.0 1414 masterAddrStr := tempurl.Alloc()[len("http://"):] 1415 _, masterPort, err := net.SplitHostPort(masterAddrStr) 1416 require.NoError(t.T(), err) 1417 cfg2 := NewConfig() 1418 *cfg2 = *cfg 1419 cfg2.MasterAddr = fmt.Sprintf("0.0.0.0:%s", masterPort) 1420 cfg2.AdvertiseAddr = masterAddrStr 1421 t.testNormalServerLifecycle(cfg2, basicServiceCheck) 1422 } 1423 1424 func (t *testMasterSuite) TestMasterTLS() { 1425 var err error 1426 masterAddr := tempurl.Alloc()[len("http://"):] 1427 peerAddr := tempurl.Alloc()[len("http://"):] 1428 _, masterPort, err := net.SplitHostPort(masterAddr) 1429 require.NoError(t.T(), err) 1430 _, peerPort, err := net.SplitHostPort(peerAddr) 1431 require.NoError(t.T(), err) 1432 1433 caPath := pwd + "/tls_for_test/ca.pem" 1434 certPath := pwd + "/tls_for_test/dm.pem" 1435 keyPath := pwd + "/tls_for_test/dm.key" 1436 1437 // all with `https://` prefix 1438 cfg := NewConfig() 1439 err = cfg.Parse([]string{ 1440 "--name=master-tls", 1441 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1442 fmt.Sprintf("--master-addr=https://%s", masterAddr), 1443 fmt.Sprintf("--advertise-addr=https://%s", masterAddr), 1444 fmt.Sprintf("--peer-urls=https://%s", peerAddr), 1445 fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr), 1446 fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr), 1447 "--ssl-ca=" + caPath, 1448 "--ssl-cert=" + certPath, 1449 "--ssl-key=" + keyPath, 1450 }) 1451 require.NoError(t.T(), err) 1452 t.testTLSPrefix(cfg) 1453 require.Equal(t.T(), masterAddr, cfg.MasterAddr) 1454 require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr) 1455 require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls) 1456 require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls) 1457 require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster) 1458 1459 // no `https://` prefix for `--master-addr` 1460 cfg = NewConfig() 1461 err = cfg.Parse([]string{ 1462 "--name=master-tls", 1463 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1464 fmt.Sprintf("--master-addr=%s", masterAddr), 1465 fmt.Sprintf("--advertise-addr=https://%s", masterAddr), 1466 fmt.Sprintf("--peer-urls=https://%s", peerAddr), 1467 fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr), 1468 fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr), 1469 "--ssl-ca=" + caPath, 1470 "--ssl-cert=" + certPath, 1471 "--ssl-key=" + keyPath, 1472 }) 1473 require.NoError(t.T(), err) 1474 t.testTLSPrefix(cfg) 1475 1476 // no `https://` prefix for `--master-addr` and `--advertise-addr` 1477 cfg = NewConfig() 1478 err = cfg.Parse([]string{ 1479 "--name=master-tls", 1480 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1481 fmt.Sprintf("--master-addr=%s", masterAddr), 1482 fmt.Sprintf("--advertise-addr=%s", masterAddr), 1483 fmt.Sprintf("--peer-urls=https://%s", peerAddr), 1484 fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr), 1485 fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr), 1486 "--ssl-ca=" + caPath, 1487 "--ssl-cert=" + certPath, 1488 "--ssl-key=" + keyPath, 1489 }) 1490 require.NoError(t.T(), err) 1491 t.testTLSPrefix(cfg) 1492 1493 // no `https://` prefix for `--master-addr`, `--advertise-addr` and `--peer-urls` 1494 cfg = NewConfig() 1495 err = cfg.Parse([]string{ 1496 "--name=master-tls", 1497 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1498 fmt.Sprintf("--master-addr=%s", masterAddr), 1499 fmt.Sprintf("--advertise-addr=%s", masterAddr), 1500 fmt.Sprintf("--peer-urls=%s", peerAddr), 1501 fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr), 1502 fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr), 1503 "--ssl-ca=" + caPath, 1504 "--ssl-cert=" + certPath, 1505 "--ssl-key=" + keyPath, 1506 }) 1507 require.NoError(t.T(), err) 1508 t.testTLSPrefix(cfg) 1509 1510 // no `https://` prefix for `--master-addr`, `--advertise-addr`, `--peer-urls` and `--advertise-peer-urls` 1511 cfg = NewConfig() 1512 err = cfg.Parse([]string{ 1513 "--name=master-tls", 1514 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1515 fmt.Sprintf("--master-addr=%s", masterAddr), 1516 fmt.Sprintf("--advertise-addr=%s", masterAddr), 1517 fmt.Sprintf("--peer-urls=%s", peerAddr), 1518 fmt.Sprintf("--advertise-peer-urls=%s", peerAddr), 1519 fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr), 1520 "--ssl-ca=" + caPath, 1521 "--ssl-cert=" + certPath, 1522 "--ssl-key=" + keyPath, 1523 }) 1524 require.NoError(t.T(), err) 1525 t.testTLSPrefix(cfg) 1526 1527 // all without `https://`/`http://` prefix 1528 cfg = NewConfig() 1529 err = cfg.Parse([]string{ 1530 "--name=master-tls", 1531 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1532 fmt.Sprintf("--master-addr=%s", masterAddr), 1533 fmt.Sprintf("--advertise-addr=%s", masterAddr), 1534 fmt.Sprintf("--peer-urls=%s", peerAddr), 1535 fmt.Sprintf("--advertise-peer-urls=%s", peerAddr), 1536 fmt.Sprintf("--initial-cluster=master-tls=%s", peerAddr), 1537 "--ssl-ca=" + caPath, 1538 "--ssl-cert=" + certPath, 1539 "--ssl-key=" + keyPath, 1540 }) 1541 require.NoError(t.T(), err) 1542 t.testTLSPrefix(cfg) 1543 require.Equal(t.T(), masterAddr, cfg.MasterAddr) 1544 require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr) 1545 require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls) 1546 require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls) 1547 require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster) 1548 1549 // all with `http://` prefix, but with TLS enabled. 1550 cfg = NewConfig() 1551 err = cfg.Parse([]string{ 1552 "--name=master-tls", 1553 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1554 fmt.Sprintf("--master-addr=http://%s", masterAddr), 1555 fmt.Sprintf("--advertise-addr=http://%s", masterAddr), 1556 fmt.Sprintf("--peer-urls=http://%s", peerAddr), 1557 fmt.Sprintf("--advertise-peer-urls=http://%s", peerAddr), 1558 fmt.Sprintf("--initial-cluster=master-tls=http://%s", peerAddr), 1559 "--ssl-ca=" + caPath, 1560 "--ssl-cert=" + certPath, 1561 "--ssl-key=" + keyPath, 1562 }) 1563 require.NoError(t.T(), err) 1564 require.Equal(t.T(), masterAddr, cfg.MasterAddr) 1565 require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr) 1566 require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls) 1567 require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls) 1568 require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster) 1569 1570 // different prefix for `--peer-urls` and `--initial-cluster` 1571 cfg = NewConfig() 1572 err = cfg.Parse([]string{ 1573 "--name=master-tls", 1574 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1575 fmt.Sprintf("--master-addr=https://%s", masterAddr), 1576 fmt.Sprintf("--advertise-addr=https://%s", masterAddr), 1577 fmt.Sprintf("--peer-urls=https://%s", peerAddr), 1578 fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr), 1579 fmt.Sprintf("--initial-cluster=master-tls=http://%s", peerAddr), 1580 "--ssl-ca=" + caPath, 1581 "--ssl-cert=" + certPath, 1582 "--ssl-key=" + keyPath, 1583 }) 1584 require.NoError(t.T(), err) 1585 require.Equal(t.T(), masterAddr, cfg.MasterAddr) 1586 require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr) 1587 require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls) 1588 require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls) 1589 require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster) 1590 t.testTLSPrefix(cfg) 1591 1592 // listen address set to 0.0.0.0 1593 cfg = NewConfig() 1594 err = cfg.Parse([]string{ 1595 "--name=master-tls", 1596 fmt.Sprintf("--data-dir=%s", t.T().TempDir()), 1597 fmt.Sprintf("--master-addr=0.0.0.0:%s", masterPort), 1598 fmt.Sprintf("--advertise-addr=https://%s", masterAddr), 1599 fmt.Sprintf("--peer-urls=0.0.0.0:%s", peerPort), 1600 fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr), 1601 fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr), 1602 "--ssl-ca=" + caPath, 1603 "--ssl-cert=" + certPath, 1604 "--ssl-key=" + keyPath, 1605 }) 1606 require.NoError(t.T(), err) 1607 t.testTLSPrefix(cfg) 1608 } 1609 1610 func (t *testMasterSuite) testTLSPrefix(cfg *Config) { 1611 t.testNormalServerLifecycle(cfg, func(cfg *Config) { 1612 t.testHTTPInterface(fmt.Sprintf("https://%s/status", cfg.AdvertiseAddr), []byte(version.GetRawInfo())) 1613 t.testHTTPInterface(fmt.Sprintf("https://%s/debug/pprof/", cfg.AdvertiseAddr), []byte("Types of profiles available")) 1614 }) 1615 } 1616 1617 func (t *testMasterSuite) testNormalServerLifecycle(cfg *Config, checkLogic func(*Config)) { 1618 var err error 1619 s := NewServer(cfg) 1620 1621 ctx, cancel := context.WithCancel(context.Background()) 1622 err = s.Start(ctx) 1623 require.NoError(t.T(), err) 1624 1625 checkLogic(cfg) 1626 1627 // close 1628 cancel() 1629 s.Close() 1630 1631 require.Eventually(t.T(), func() bool { 1632 return s.closed.Load() 1633 }, 3*time.Second, 100*time.Millisecond) 1634 } 1635 1636 func (t *testMasterSuite) testHTTPInterface(url string, contain []byte) { 1637 // we use HTTPS in some test cases. 1638 tlsConfig, err := toolutils.NewTLSConfig( 1639 toolutils.WithCAPath(pwd+"/tls_for_test/ca.pem"), 1640 toolutils.WithCertAndKeyPath(pwd+"/tls_for_test/dm.pem", pwd+"/tls_for_test/dm.key"), 1641 ) 1642 require.NoError(t.T(), err) 1643 cli := toolutils.ClientWithTLS(tlsConfig) 1644 1645 // nolint:noctx 1646 resp, err := cli.Get(url) 1647 require.NoError(t.T(), err) 1648 defer resp.Body.Close() 1649 require.Equal(t.T(), http.StatusOK, resp.StatusCode) 1650 1651 body, err := io.ReadAll(resp.Body) 1652 require.NoError(t.T(), err) 1653 require.True(t.T(), bytes.Contains(body, contain)) 1654 } 1655 1656 func (t *testMasterSuite) TestJoinMember() { 1657 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 1658 1659 // create a new cluster 1660 cfg1 := NewConfig() 1661 require.NoError(t.T(), cfg1.FromContent(SampleConfig)) 1662 cfg1.Name = "dm-master-1" 1663 cfg1.DataDir = t.T().TempDir() 1664 cfg1.MasterAddr = tempurl.Alloc()[len("http://"):] 1665 cfg1.AdvertiseAddr = cfg1.MasterAddr 1666 cfg1.PeerUrls = tempurl.Alloc() 1667 cfg1.AdvertisePeerUrls = cfg1.PeerUrls 1668 cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls) 1669 1670 s1 := NewServer(cfg1) 1671 require.NoError(t.T(), s1.Start(ctx)) 1672 defer s1.Close() 1673 1674 // wait the first one become the leader 1675 require.Eventually(t.T(), func() bool { 1676 return s1.election.IsLeader() 1677 }, 3*time.Second, 100*time.Millisecond) 1678 1679 // join to an existing cluster 1680 cfg2 := NewConfig() 1681 require.NoError(t.T(), cfg2.FromContent(SampleConfig)) 1682 cfg2.Name = "dm-master-2" 1683 cfg2.DataDir = t.T().TempDir() 1684 cfg2.MasterAddr = tempurl.Alloc()[len("http://"):] 1685 cfg2.AdvertiseAddr = cfg2.MasterAddr 1686 cfg2.PeerUrls = tempurl.Alloc() 1687 cfg2.AdvertisePeerUrls = cfg2.PeerUrls 1688 cfg2.Join = cfg1.MasterAddr // join to an existing cluster 1689 1690 s2 := NewServer(cfg2) 1691 require.NoError(t.T(), s2.Start(ctx)) 1692 defer s2.Close() 1693 1694 client, err := etcdutil.CreateClient(strings.Split(cfg1.AdvertisePeerUrls, ","), nil) 1695 require.NoError(t.T(), err) 1696 defer client.Close() 1697 1698 // verify members 1699 listResp, err := etcdutil.ListMembers(client) 1700 require.NoError(t.T(), err) 1701 require.Len(t.T(), listResp.Members, 2) 1702 names := make(map[string]struct{}, len(listResp.Members)) 1703 for _, m := range listResp.Members { 1704 names[m.Name] = struct{}{} 1705 } 1706 require.Contains(t.T(), names, cfg1.Name) 1707 require.Contains(t.T(), names, cfg2.Name) 1708 1709 // s1 is still the leader 1710 _, leaderID, _, err := s2.election.LeaderInfo(ctx) 1711 1712 require.NoError(t.T(), err) 1713 require.Equal(t.T(), leaderID, cfg1.Name) 1714 1715 cfg3 := NewConfig() 1716 require.NoError(t.T(), cfg3.FromContent(SampleConfig)) 1717 cfg3.Name = "dm-master-3" 1718 cfg3.DataDir = t.T().TempDir() 1719 cfg3.MasterAddr = tempurl.Alloc()[len("http://"):] 1720 cfg3.AdvertiseAddr = cfg3.MasterAddr 1721 cfg3.PeerUrls = tempurl.Alloc() 1722 cfg3.AdvertisePeerUrls = cfg3.PeerUrls 1723 cfg3.Join = cfg1.MasterAddr // join to an existing cluster 1724 1725 // mock join master without wal dir 1726 require.NoError(t.T(), os.Mkdir(filepath.Join(cfg3.DataDir, "member"), privateDirMode)) 1727 require.NoError(t.T(), os.Mkdir(filepath.Join(cfg3.DataDir, "member", "join"), privateDirMode)) 1728 s3 := NewServer(cfg3) 1729 // avoid join a unhealthy cluster 1730 require.Eventually(t.T(), func() bool { 1731 return s3.Start(ctx) == nil 1732 }, 30*time.Second, time.Second) 1733 defer s3.Close() 1734 1735 // verify members 1736 listResp, err = etcdutil.ListMembers(client) 1737 require.NoError(t.T(), err) 1738 require.Len(t.T(), listResp.Members, 3) 1739 names = make(map[string]struct{}, len(listResp.Members)) 1740 for _, m := range listResp.Members { 1741 names[m.Name] = struct{}{} 1742 } 1743 require.Contains(t.T(), names, cfg1.Name) 1744 require.Contains(t.T(), names, cfg2.Name) 1745 require.Contains(t.T(), names, cfg3.Name) 1746 1747 cancel() 1748 t.clearEtcdEnv() 1749 } 1750 1751 func (t *testMasterSuite) TestOperateSource() { 1752 ctx, cancel := context.WithCancel(context.Background()) 1753 defer cancel() 1754 ctrl := gomock.NewController(t.T()) 1755 defer ctrl.Finish() 1756 1757 // create a new cluster 1758 cfg1 := NewConfig() 1759 require.NoError(t.T(), cfg1.FromContent(SampleConfig)) 1760 cfg1.Name = "dm-master-1" 1761 cfg1.DataDir = t.T().TempDir() 1762 cfg1.MasterAddr = tempurl.Alloc()[len("http://"):] 1763 cfg1.AdvertiseAddr = cfg1.MasterAddr 1764 cfg1.PeerUrls = tempurl.Alloc() 1765 cfg1.AdvertisePeerUrls = cfg1.PeerUrls 1766 cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls) 1767 1768 s1 := NewServer(cfg1) 1769 s1.leader.Store(oneselfLeader) 1770 require.NoError(t.T(), s1.Start(ctx)) 1771 defer s1.Close() 1772 mysqlCfg, err := config.SourceCfgFromYamlAndVerify(config.SampleSourceConfig) 1773 require.NoError(t.T(), err) 1774 mysqlCfg.From.Password = os.Getenv("MYSQL_PSWD") 1775 task, err := mysqlCfg.Yaml() 1776 require.NoError(t.T(), err) 1777 sourceID := mysqlCfg.SourceID 1778 // 1. wait for scheduler to start 1779 time.Sleep(3 * time.Second) 1780 1781 // 2. try to add a new mysql source 1782 req := &pb.OperateSourceRequest{Op: pb.SourceOp_StartSource, Config: []string{task}} 1783 resp, err := s1.OperateSource(ctx, req) 1784 require.NoError(t.T(), err) 1785 require.True(t.T(), resp.Result) 1786 require.Equal(t.T(), []*pb.CommonWorkerResponse{{ 1787 Result: true, 1788 Msg: "source is added but there is no free worker to bound", 1789 Source: sourceID, 1790 }}, resp.Sources) 1791 unBoundSources := s1.scheduler.UnboundSources() 1792 require.Len(t.T(), unBoundSources, 1) 1793 require.Equal(t.T(), sourceID, unBoundSources[0]) 1794 1795 // 3. try to add multiple source 1796 // 3.1 duplicated source id 1797 sourceID2 := "mysql-replica-02" 1798 mysqlCfg.SourceID = sourceID2 1799 task2, err := mysqlCfg.Yaml() 1800 require.NoError(t.T(), err) 1801 req = &pb.OperateSourceRequest{Op: pb.SourceOp_StartSource, Config: []string{task2, task2}} 1802 resp, err = s1.OperateSource(ctx, req) 1803 require.NoError(t.T(), err) 1804 require.False(t.T(), resp.Result) 1805 require.Contains(t.T(), resp.Msg, "source config with ID "+sourceID2+" already exists") 1806 // 3.2 run same command after correction 1807 sourceID3 := "mysql-replica-03" 1808 mysqlCfg.SourceID = sourceID3 1809 task3, err := mysqlCfg.Yaml() 1810 require.NoError(t.T(), err) 1811 req = &pb.OperateSourceRequest{Op: pb.SourceOp_StartSource, Config: []string{task2, task3}} 1812 resp, err = s1.OperateSource(ctx, req) 1813 require.NoError(t.T(), err) 1814 require.True(t.T(), resp.Result) 1815 sort.Slice(resp.Sources, func(i, j int) bool { 1816 return resp.Sources[i].Source < resp.Sources[j].Source 1817 }) 1818 require.Equal(t.T(), []*pb.CommonWorkerResponse{{ 1819 Result: true, 1820 Msg: "source is added but there is no free worker to bound", 1821 Source: sourceID2, 1822 }, { 1823 Result: true, 1824 Msg: "source is added but there is no free worker to bound", 1825 Source: sourceID3, 1826 }}, resp.Sources) 1827 unBoundSources = s1.scheduler.UnboundSources() 1828 require.Len(t.T(), unBoundSources, 3) 1829 require.Equal(t.T(), sourceID, unBoundSources[0]) 1830 require.Equal(t.T(), sourceID2, unBoundSources[1]) 1831 require.Equal(t.T(), sourceID3, unBoundSources[2]) 1832 1833 // 4. try to stop a non-exist-source 1834 req.Op = pb.SourceOp_StopSource 1835 mysqlCfg.SourceID = "not-exist-source" 1836 task4, err := mysqlCfg.Yaml() 1837 require.NoError(t.T(), err) 1838 req.Config = []string{task4} 1839 resp, err = s1.OperateSource(ctx, req) 1840 require.NoError(t.T(), err) 1841 require.False(t.T(), resp.Result) 1842 require.Contains(t.T(), resp.Msg, "source config with ID "+mysqlCfg.SourceID+" not exists") 1843 1844 // 5. start workers, the unbound sources should be bound 1845 var wg sync.WaitGroup 1846 workerName1 := "worker1" 1847 workerName2 := "worker2" 1848 workerName3 := "worker3" 1849 defer func() { 1850 t.clearSchedulerEnv(cancel, &wg) 1851 }() 1852 require.NoError(t.T(), s1.scheduler.AddWorker(workerName1, "172.16.10.72:8262")) 1853 wg.Add(1) 1854 go func(ctx context.Context, workerName string) { 1855 defer wg.Done() 1856 require.NoError(t.T(), ha.KeepAlive(ctx, s1.etcdClient, workerName, keepAliveTTL)) 1857 }(ctx, workerName1) 1858 require.NoError(t.T(), s1.scheduler.AddWorker(workerName2, "172.16.10.72:8263")) 1859 wg.Add(1) 1860 go func(ctx context.Context, workerName string) { 1861 defer wg.Done() 1862 require.NoError(t.T(), ha.KeepAlive(ctx, s1.etcdClient, workerName, keepAliveTTL)) 1863 }(ctx, workerName2) 1864 require.NoError(t.T(), s1.scheduler.AddWorker(workerName3, "172.16.10.72:8264")) 1865 wg.Add(1) 1866 go func(ctx context.Context, workerName string) { 1867 defer wg.Done() 1868 require.NoError(t.T(), ha.KeepAlive(ctx, s1.etcdClient, workerName, keepAliveTTL)) 1869 }(ctx, workerName3) 1870 require.Eventually(t.T(), func() bool { 1871 w := s1.scheduler.GetWorkerBySource(sourceID) 1872 return w != nil 1873 }, 3*time.Second, 100*time.Millisecond) 1874 1875 // 6. stop sources 1876 req.Config = []string{task, task2, task3} 1877 req.Op = pb.SourceOp_StopSource 1878 1879 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 1880 mockRevelantWorkerClient(mockWorkerClient, "", sourceID, req) 1881 s1.scheduler.SetWorkerClientForTest(workerName1, newMockRPCClient(mockWorkerClient)) 1882 mockWorkerClient2 := pbmock.NewMockWorkerClient(ctrl) 1883 mockRevelantWorkerClient(mockWorkerClient2, "", sourceID2, req) 1884 s1.scheduler.SetWorkerClientForTest(workerName2, newMockRPCClient(mockWorkerClient2)) 1885 mockWorkerClient3 := pbmock.NewMockWorkerClient(ctrl) 1886 mockRevelantWorkerClient(mockWorkerClient3, "", sourceID3, req) 1887 s1.scheduler.SetWorkerClientForTest(workerName3, newMockRPCClient(mockWorkerClient3)) 1888 resp, err = s1.OperateSource(ctx, req) 1889 require.NoError(t.T(), err) 1890 require.True(t.T(), resp.Result) 1891 require.Equal(t.T(), []*pb.CommonWorkerResponse{{ 1892 Result: true, 1893 Source: sourceID, 1894 }, { 1895 Result: true, 1896 Source: sourceID2, 1897 }, { 1898 Result: true, 1899 Source: sourceID3, 1900 }}, resp.Sources) 1901 scm, _, err := ha.GetSourceCfg(t.etcdTestCli, sourceID, 0) 1902 require.NoError(t.T(), err) 1903 require.Len(t.T(), scm, 0) 1904 t.clearSchedulerEnv(cancel, &wg) 1905 1906 cancel() 1907 } 1908 1909 func (t *testMasterSuite) TestOfflineMember() { 1910 ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) 1911 1912 cfg1 := generateServerConfig(t.T(), "dm-master-1") 1913 cfg2 := generateServerConfig(t.T(), "dm-master-2") 1914 cfg3 := generateServerConfig(t.T(), "dm-master-3") 1915 1916 initialCluster := fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls) + "," + 1917 fmt.Sprintf("%s=%s", cfg2.Name, cfg2.AdvertisePeerUrls) + "," + 1918 fmt.Sprintf("%s=%s", cfg3.Name, cfg3.AdvertisePeerUrls) 1919 cfg1.InitialCluster = initialCluster 1920 cfg2.InitialCluster = initialCluster 1921 cfg3.InitialCluster = initialCluster 1922 1923 var wg sync.WaitGroup 1924 s1 := NewServer(cfg1) 1925 defer func() { 1926 cancel() 1927 s1.Close() 1928 }() 1929 wg.Add(1) 1930 go func() { 1931 require.NoError(t.T(), s1.Start(ctx)) 1932 wg.Done() 1933 }() 1934 1935 s2 := NewServer(cfg2) 1936 defer func() { 1937 cancel() 1938 s2.Close() 1939 }() 1940 wg.Add(1) 1941 go func() { 1942 require.NoError(t.T(), s2.Start(ctx)) 1943 wg.Done() 1944 }() 1945 1946 ctx3, cancel3 := context.WithCancel(ctx) 1947 s3 := NewServer(cfg3) 1948 require.NoError(t.T(), s3.Start(ctx3)) 1949 defer func() { 1950 cancel3() 1951 s3.Close() 1952 }() 1953 1954 wg.Wait() 1955 1956 var leaderID string 1957 // ensure s2 has got the right leader info, because it will be used to `OfflineMember`. 1958 require.Eventually(t.T(), func() bool { 1959 s2.RLock() 1960 leader := s2.leader.Load() 1961 s2.RUnlock() 1962 if leader == "" { 1963 return false 1964 } 1965 if leader == oneselfLeader { 1966 leaderID = s2.cfg.Name 1967 } else { 1968 leaderID = s2.leader.Load() 1969 } 1970 return true 1971 }, 3*time.Second, 100*time.Millisecond) 1972 1973 // master related operations 1974 req := &pb.OfflineMemberRequest{ 1975 Type: "masters", 1976 Name: "xixi", 1977 } 1978 // test offline member with wrong type 1979 resp, err := s2.OfflineMember(ctx, req) 1980 require.NoError(t.T(), err) 1981 require.False(t.T(), resp.Result) 1982 require.Contains(t.T(), resp.Msg, terror.ErrMasterInvalidOfflineType.Generate(req.Type).Error()) 1983 // test offline member with invalid master name 1984 req.Type = common.Master 1985 resp, err = s2.OfflineMember(ctx, req) 1986 require.NoError(t.T(), err) 1987 require.False(t.T(), resp.Result) 1988 require.Contains(t.T(), resp.Msg, `dm-master with name `+req.Name+` not exists`) 1989 // test offline member with correct master name 1990 cli := s2.etcdClient 1991 listResp, err := etcdutil.ListMembers(cli) 1992 require.NoError(t.T(), err) 1993 require.Len(t.T(), listResp.Members, 3) 1994 1995 // make sure s3 is not the leader, otherwise it will take some time to campaign a new leader after close s3, and it may cause timeout 1996 require.Eventually(t.T(), func() bool { 1997 _, leaderID, _, err = s1.election.LeaderInfo(ctx) 1998 if err != nil { 1999 return false 2000 } 2001 2002 if leaderID == s3.cfg.Name { 2003 _, err = s3.OperateLeader(ctx, &pb.OperateLeaderRequest{ 2004 Op: pb.LeaderOp_EvictLeaderOp, 2005 }) 2006 require.NoError(t.T(), err) 2007 } 2008 return leaderID != s3.cfg.Name 2009 }, 10*time.Second, 500*time.Millisecond) 2010 2011 cancel3() 2012 s3.Close() 2013 2014 req.Name = s3.cfg.Name 2015 resp, err = s2.OfflineMember(ctx, req) 2016 require.NoError(t.T(), err) 2017 require.Equal(t.T(), "", resp.Msg) 2018 require.True(t.T(), resp.Result) 2019 2020 listResp, err = etcdutil.ListMembers(cli) 2021 require.NoError(t.T(), err) 2022 require.Len(t.T(), listResp.Members, 2) 2023 if listResp.Members[0].Name == cfg2.Name { 2024 listResp.Members[0], listResp.Members[1] = listResp.Members[1], listResp.Members[0] 2025 } 2026 require.Equal(t.T(), cfg1.Name, listResp.Members[0].Name) 2027 require.Equal(t.T(), cfg2.Name, listResp.Members[1].Name) 2028 2029 _, leaderID2, _, err := s1.election.LeaderInfo(ctx) 2030 require.NoError(t.T(), err) 2031 2032 if leaderID == cfg3.Name { 2033 // s3 is leader before, leader should re-campaign 2034 require.False(t.T(), leaderID != leaderID2) 2035 } else { 2036 // s3 isn't leader before, leader should keep the same 2037 require.Equal(t.T(), leaderID, leaderID2) 2038 } 2039 2040 // worker related operations 2041 ectx, canc := context.WithTimeout(ctx, time.Second) 2042 defer canc() 2043 req1 := &pb.RegisterWorkerRequest{ 2044 Name: "xixi", 2045 Address: "127.0.0.1:1000", 2046 } 2047 regReq, err := s1.RegisterWorker(ectx, req1) 2048 require.NoError(t.T(), err) 2049 require.True(t.T(), regReq.Result) 2050 2051 req2 := &pb.OfflineMemberRequest{ 2052 Type: common.Worker, 2053 Name: "haha", 2054 } 2055 { 2056 res, err := s1.OfflineMember(ectx, req2) 2057 require.NoError(t.T(), err) 2058 require.False(t.T(), res.Result) 2059 require.Contains(t.T(), res.Msg, `dm-worker with name `+req2.Name+` not exists`) 2060 } 2061 { 2062 req2.Name = "xixi" 2063 res, err := s1.OfflineMember(ectx, req2) 2064 require.NoError(t.T(), err) 2065 require.True(t.T(), res.Result) 2066 } 2067 { 2068 // register offline worker again. TICASE-962, 963 2069 resp, err := s1.RegisterWorker(ectx, req1) 2070 require.NoError(t.T(), err) 2071 require.True(t.T(), resp.Result) 2072 } 2073 t.clearSchedulerEnv(cancel, &wg) 2074 } 2075 2076 func (t *testMasterSuite) TestGetCfg() { 2077 ctrl := gomock.NewController(t.T()) 2078 defer ctrl.Finish() 2079 2080 server := testDefaultMasterServer(t.T()) 2081 sources, workers := defaultWorkerSource() 2082 2083 var wg sync.WaitGroup 2084 taskName := "test" 2085 ctx, cancel := context.WithCancel(context.Background()) 2086 req := &pb.StartTaskRequest{ 2087 Task: taskConfig, 2088 Sources: sources, 2089 } 2090 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", 2091 makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req)) 2092 server.etcdClient = t.etcdTestCli 2093 2094 // start task 2095 mock := conn.InitVersionDB() 2096 defer func() { 2097 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 2098 }() 2099 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 2100 AddRow("version", "5.7.25-TiDB-v4.0.2")) 2101 resp, err := server.StartTask(context.Background(), req) 2102 require.NoError(t.T(), err) 2103 require.True(t.T(), resp.Result) 2104 2105 // get task config 2106 req1 := &pb.GetCfgRequest{ 2107 Name: taskName, 2108 Type: pb.CfgType_TaskType, 2109 } 2110 resp1, err := server.GetCfg(context.Background(), req1) 2111 require.NoError(t.T(), err) 2112 require.True(t.T(), resp1.Result) 2113 require.Contains(t.T(), resp1.Cfg, "name: test") 2114 2115 // not exist task name 2116 taskName2 := "wrong" 2117 req2 := &pb.GetCfgRequest{ 2118 Name: taskName2, 2119 Type: pb.CfgType_TaskType, 2120 } 2121 resp2, err := server.GetCfg(context.Background(), req2) 2122 require.NoError(t.T(), err) 2123 require.False(t.T(), resp2.Result) 2124 require.Contains(t.T(), resp2.Msg, "task not found") 2125 2126 // generate a template named `wrong`, test get this task template 2127 openapiTask, err := fixtures.GenNoShardOpenAPITaskForTest() 2128 require.NoError(t.T(), err) 2129 openapiTask.Name = taskName2 2130 require.NoError(t.T(), ha.PutOpenAPITaskTemplate(t.etcdTestCli, openapiTask, true)) 2131 require.NoError(t.T(), failpoint.Enable("github.com/pingcap/tiflow/dm/master/MockSkipAdjustTargetDB", `return(true)`)) 2132 resp2, err = server.GetCfg(context.Background(), &pb.GetCfgRequest{Name: taskName2, Type: pb.CfgType_TaskTemplateType}) 2133 require.NoError(t.T(), failpoint.Disable("github.com/pingcap/tiflow/dm/master/MockSkipAdjustTargetDB")) 2134 require.NoError(t.T(), err) 2135 require.True(t.T(), resp2.Result) 2136 require.Contains(t.T(), resp2.Cfg, "name: "+taskName2) 2137 2138 // test restart master 2139 server.scheduler.Close() 2140 require.NoError(t.T(), server.scheduler.Start(ctx, t.etcdTestCli)) 2141 2142 resp3, err := server.GetCfg(context.Background(), req1) 2143 require.NoError(t.T(), err) 2144 require.True(t.T(), resp3.Result) 2145 require.Equal(t.T(), resp1.Cfg, resp3.Cfg) 2146 2147 req3 := &pb.GetCfgRequest{ 2148 Name: "dm-master", 2149 Type: pb.CfgType_MasterType, 2150 } 2151 resp4, err := server.GetCfg(context.Background(), req3) 2152 require.NoError(t.T(), err) 2153 require.True(t.T(), resp4.Result) 2154 require.Contains(t.T(), resp4.Cfg, `name = "dm-master"`) 2155 2156 req4 := &pb.GetCfgRequest{ 2157 Name: "haha", 2158 Type: pb.CfgType_MasterType, 2159 } 2160 resp5, err := server.GetCfg(context.Background(), req4) 2161 require.NoError(t.T(), err) 2162 require.False(t.T(), resp5.Result) 2163 require.Contains(t.T(), resp5.Msg, "master not found") 2164 2165 req5 := &pb.GetCfgRequest{ 2166 Name: "haha", 2167 Type: pb.CfgType_WorkerType, 2168 } 2169 resp6, err := server.GetCfg(context.Background(), req5) 2170 require.NoError(t.T(), err) 2171 require.False(t.T(), resp6.Result) 2172 require.Contains(t.T(), resp6.Msg, "worker not found") 2173 2174 req6 := &pb.GetCfgRequest{ 2175 Name: "mysql-replica-01", 2176 Type: pb.CfgType_SourceType, 2177 } 2178 resp7, err := server.GetCfg(context.Background(), req6) 2179 require.NoError(t.T(), err) 2180 require.True(t.T(), resp7.Result) 2181 require.Contains(t.T(), resp7.Cfg, `source-id: mysql-replica-01`) 2182 2183 req7 := &pb.GetCfgRequest{ 2184 Name: "haha", 2185 Type: pb.CfgType_SourceType, 2186 } 2187 resp8, err := server.GetCfg(context.Background(), req7) 2188 require.NoError(t.T(), err) 2189 require.False(t.T(), resp8.Result) 2190 require.Equal(t.T(), resp8.Msg, "source not found") 2191 2192 t.clearSchedulerEnv(cancel, &wg) 2193 } 2194 2195 func (t *testMasterSuite) relayStageMatch(s *scheduler.Scheduler, source string, expectStage pb.Stage) { 2196 stage := ha.NewRelayStage(expectStage, source) 2197 stageDeepEqualExcludeRev(t.T(), s.GetExpectRelayStage(source), stage) 2198 2199 eStage, _, err := ha.GetRelayStage(t.etcdTestCli, source) 2200 require.NoError(t.T(), err) 2201 switch expectStage { 2202 case pb.Stage_Running, pb.Stage_Paused: 2203 stageDeepEqualExcludeRev(t.T(), eStage, stage) 2204 } 2205 } 2206 2207 func (t *testMasterSuite) subTaskStageMatch(s *scheduler.Scheduler, task, source string, expectStage pb.Stage) { 2208 stage := ha.NewSubTaskStage(expectStage, source, task) 2209 require.Equal(t.T(), s.GetExpectSubTaskStage(task, source), stage) 2210 2211 eStageM, _, err := ha.GetSubTaskStage(t.etcdTestCli, source, task) 2212 require.NoError(t.T(), err) 2213 switch expectStage { 2214 case pb.Stage_Running, pb.Stage_Paused: 2215 require.Len(t.T(), eStageM, 1) 2216 stageDeepEqualExcludeRev(t.T(), eStageM[task], stage) 2217 default: 2218 require.Len(t.T(), eStageM, 0) 2219 } 2220 } 2221 2222 func (t *testMasterSuite) TestGRPCLongResponse() { 2223 require.NoError(t.T(), failpoint.Enable("github.com/pingcap/tiflow/dm/master/LongRPCResponse", `return()`)) 2224 //nolint:errcheck 2225 defer failpoint.Disable("github.com/pingcap/tiflow/dm/master/LongRPCResponse") 2226 require.NoError(t.T(), failpoint.Enable("github.com/pingcap/tiflow/dm/ctl/common/SkipUpdateMasterClient", `return()`)) 2227 //nolint:errcheck 2228 defer failpoint.Disable("github.com/pingcap/tiflow/dm/ctl/common/SkipUpdateMasterClient") 2229 2230 masterAddr := tempurl.Alloc()[len("http://"):] 2231 lis, err := net.Listen("tcp", masterAddr) 2232 require.NoError(t.T(), err) 2233 defer lis.Close() 2234 server := grpc.NewServer() 2235 pb.RegisterMasterServer(server, &Server{}) 2236 //nolint:errcheck 2237 go server.Serve(lis) 2238 2239 conn, err := grpc.Dial(utils.UnwrapScheme(masterAddr), 2240 grpc.WithInsecure(), 2241 grpc.WithBlock()) 2242 require.NoError(t.T(), err) 2243 defer conn.Close() 2244 2245 common.GlobalCtlClient.MasterClient = pb.NewMasterClient(conn) 2246 ctx := context.Background() 2247 resp := &pb.StartTaskResponse{} 2248 err = common.SendRequest(ctx, "StartTask", &pb.StartTaskRequest{}, &resp) 2249 require.NoError(t.T(), err) 2250 } 2251 2252 func (t *testMasterSuite) TestStartStopValidation() { 2253 var ( 2254 wg sync.WaitGroup 2255 taskName = "test" 2256 ) 2257 ctrl := gomock.NewController(t.T()) 2258 defer ctrl.Finish() 2259 server := testDefaultMasterServer(t.T()) 2260 server.etcdClient = t.etcdTestCli 2261 sources, workers := defaultWorkerSource() 2262 ctx, cancel := context.WithCancel(context.Background()) 2263 defer t.clearSchedulerEnv(cancel, &wg) 2264 // start task without validation 2265 startReq := &pb.StartTaskRequest{ 2266 Task: taskConfig, 2267 Sources: sources, 2268 } 2269 sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}} 2270 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", 2271 makeWorkerClientsForHandle(ctrl, taskName, sources, workers, startReq)) 2272 mock := conn.InitVersionDB() 2273 defer func() { 2274 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 2275 }() 2276 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 2277 AddRow("version", "5.7.25-TiDB-v4.0.2")) 2278 stResp, err := server.StartTask(context.Background(), startReq) 2279 require.NoError(t.T(), err) 2280 require.True(t.T(), stResp.Result) 2281 2282 for _, source := range sources { 2283 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 2284 } 2285 require.Equal(t.T(), sourceResps, stResp.Sources) 2286 2287 // (fail) start all validator of the task with explicit but invalid mode 2288 validatorStartReq := &pb.StartValidationRequest{ 2289 Mode: &pb.StartValidationRequest_ModeValue{ModeValue: "invalid-mode"}, 2290 TaskName: taskName, 2291 } 2292 startResp, err := server.StartValidation(context.Background(), validatorStartReq) 2293 require.NoError(t.T(), err) 2294 require.False(t.T(), startResp.Result) 2295 require.Contains(t.T(), startResp.Msg, "validation mode should be either `full` or `fast`") 2296 t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage) 2297 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2298 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "") 2299 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2300 2301 // (fail) start with explicit but invalid start-time 2302 validatorStartReq = &pb.StartValidationRequest{ 2303 StartTime: &pb.StartValidationRequest_StartTimeValue{StartTimeValue: "xxx"}, 2304 TaskName: taskName, 2305 } 2306 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2307 require.NoError(t.T(), err) 2308 require.False(t.T(), startResp.Result) 2309 require.Contains(t.T(), startResp.Msg, "start-time should be in the format like") 2310 t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage) 2311 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2312 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "") 2313 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2314 2315 // (fail) start for non-existed subtask 2316 validatorStartReq = &pb.StartValidationRequest{ 2317 TaskName: "not-exist-name", 2318 } 2319 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2320 require.NoError(t.T(), err) 2321 require.False(t.T(), startResp.Result) 2322 require.Contains(t.T(), startResp.Msg, "cannot get subtask by task name") 2323 t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage) 2324 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2325 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "") 2326 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2327 2328 // (fail) start for non-exist source 2329 validatorStartReq = &pb.StartValidationRequest{ 2330 Sources: []string{"xxx"}, 2331 } 2332 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2333 require.NoError(t.T(), err) 2334 require.False(t.T(), startResp.Result) 2335 require.Contains(t.T(), startResp.Msg, "cannot get subtask by sources") 2336 t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage) 2337 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2338 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "") 2339 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2340 2341 // (success) start validation without explicit mode for source 0 2342 validatorStartReq = &pb.StartValidationRequest{ 2343 TaskName: taskName, 2344 Sources: []string{sources[0]}, 2345 } 2346 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2347 require.NoError(t.T(), err) 2348 require.True(t.T(), startResp.Result) 2349 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2350 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2351 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2352 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2353 2354 // (fail) start all validator with explicit mode 2355 validatorStartReq = &pb.StartValidationRequest{ 2356 Mode: &pb.StartValidationRequest_ModeValue{ModeValue: config.ValidationFull}, 2357 TaskName: taskName, 2358 } 2359 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2360 require.NoError(t.T(), err) 2361 require.False(t.T(), startResp.Result) 2362 require.Regexp(t.T(), ".*some of target validator.* has already enabled.*", startResp.Msg) 2363 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2364 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2365 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2366 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2367 2368 // (fail) start validation with explicit mode for source 0 again 2369 validatorStartReq = &pb.StartValidationRequest{ 2370 Mode: &pb.StartValidationRequest_ModeValue{ModeValue: config.ValidationFull}, 2371 TaskName: taskName, 2372 Sources: []string{sources[0]}, 2373 } 2374 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2375 require.NoError(t.T(), err) 2376 require.False(t.T(), startResp.Result) 2377 require.Contains(t.T(), startResp.Msg, "all target validator has enabled, cannot do 'validation start' with explicit mode or start-time") 2378 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2379 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2380 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2381 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2382 2383 // (fail) start all validator without explicit mode 2384 validatorStartReq = &pb.StartValidationRequest{ 2385 TaskName: taskName, 2386 } 2387 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2388 require.NoError(t.T(), err) 2389 require.False(t.T(), startResp.Result) 2390 require.Regexp(t.T(), ".*some of target validator.* has already enabled.*", startResp.Msg) 2391 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2392 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2393 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2394 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2395 2396 // (fail) stop validator of source 1 2397 validatorStopReq := &pb.StopValidationRequest{ 2398 TaskName: taskName, 2399 Sources: sources[1:], 2400 } 2401 stopResp, err := server.StopValidation(context.Background(), validatorStopReq) 2402 require.NoError(t.T(), err) 2403 require.False(t.T(), stopResp.Result) 2404 require.Regexp(t.T(), ".*some target validator.* is not enabled.*", stopResp.Msg) 2405 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2406 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2407 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2408 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2409 2410 // (fail) stop all validator 2411 validatorStopReq = &pb.StopValidationRequest{ 2412 TaskName: taskName, 2413 } 2414 stopResp, err = server.StopValidation(context.Background(), validatorStopReq) 2415 require.NoError(t.T(), err) 2416 require.False(t.T(), stopResp.Result) 2417 require.Regexp(t.T(), ".*some target validator.* is not enabled.*", stopResp.Msg) 2418 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2419 t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage) 2420 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2421 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "") 2422 2423 // (success) start validation with fast mode and start-time for source 1 2424 validatorStartReq = &pb.StartValidationRequest{ 2425 Mode: &pb.StartValidationRequest_ModeValue{ModeValue: config.ValidationFast}, 2426 StartTime: &pb.StartValidationRequest_StartTimeValue{StartTimeValue: "2006-01-02 15:04:05"}, 2427 TaskName: taskName, 2428 Sources: []string{sources[1]}, 2429 } 2430 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2431 require.NoError(t.T(), err) 2432 require.True(t.T(), startResp.Result) 2433 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2434 t.validatorStageMatch(taskName, sources[1], pb.Stage_Running) 2435 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2436 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2437 2438 // now validator of the 2 subtask is enabled(running) 2439 2440 // (success) start all validator of the task without explicit param again, i.e. resuming 2441 validatorStartReq = &pb.StartValidationRequest{ 2442 TaskName: taskName, 2443 } 2444 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2445 require.NoError(t.T(), err) 2446 require.True(t.T(), startResp.Result) 2447 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2448 t.validatorStageMatch(taskName, sources[1], pb.Stage_Running) 2449 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2450 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2451 2452 // (fail) stop non-existed subtask's validator 2453 validatorStopReq = &pb.StopValidationRequest{ 2454 TaskName: "not-exist-name", 2455 } 2456 stopResp, err = server.StopValidation(context.Background(), validatorStopReq) 2457 require.NoError(t.T(), err) 2458 require.False(t.T(), stopResp.Result) 2459 require.Contains(t.T(), stopResp.Msg, "cannot get subtask by task name") 2460 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2461 t.validatorStageMatch(taskName, sources[1], pb.Stage_Running) 2462 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2463 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2464 2465 // (fail) stop all task but with non-exist source 2466 validatorStopReq = &pb.StopValidationRequest{ 2467 Sources: []string{"xxx"}, 2468 } 2469 stopResp, err = server.StopValidation(context.Background(), validatorStopReq) 2470 require.NoError(t.T(), err) 2471 require.False(t.T(), stopResp.Result) 2472 require.Contains(t.T(), stopResp.Msg, "cannot get subtask by source") 2473 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2474 t.validatorStageMatch(taskName, sources[1], pb.Stage_Running) 2475 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2476 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2477 2478 // (success) stop validation of source 0 2479 validatorStopReq = &pb.StopValidationRequest{ 2480 TaskName: taskName, 2481 Sources: []string{sources[0]}, 2482 } 2483 stopResp, err = server.StopValidation(context.Background(), validatorStopReq) 2484 require.NoError(t.T(), err) 2485 require.True(t.T(), stopResp.Result) 2486 t.validatorStageMatch(taskName, sources[0], pb.Stage_Stopped) 2487 t.validatorStageMatch(taskName, sources[1], pb.Stage_Running) 2488 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2489 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2490 2491 // (success) stop all 2492 validatorStopReq = &pb.StopValidationRequest{ 2493 TaskName: "", 2494 } 2495 stopResp, err = server.StopValidation(context.Background(), validatorStopReq) 2496 require.NoError(t.T(), err) 2497 require.True(t.T(), stopResp.Result) 2498 t.validatorStageMatch(taskName, sources[0], pb.Stage_Stopped) 2499 t.validatorStageMatch(taskName, sources[1], pb.Stage_Stopped) 2500 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2501 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2502 2503 // (success) stop all again 2504 validatorStopReq = &pb.StopValidationRequest{ 2505 TaskName: "", 2506 } 2507 stopResp, err = server.StopValidation(context.Background(), validatorStopReq) 2508 require.NoError(t.T(), err) 2509 require.True(t.T(), stopResp.Result) 2510 t.validatorStageMatch(taskName, sources[0], pb.Stage_Stopped) 2511 t.validatorStageMatch(taskName, sources[1], pb.Stage_Stopped) 2512 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2513 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2514 2515 // (success) start all tasks 2516 validatorStartReq = &pb.StartValidationRequest{ 2517 TaskName: "", 2518 } 2519 startResp, err = server.StartValidation(context.Background(), validatorStartReq) 2520 require.NoError(t.T(), err) 2521 require.True(t.T(), startResp.Result) 2522 t.validatorStageMatch(taskName, sources[0], pb.Stage_Running) 2523 t.validatorStageMatch(taskName, sources[1], pb.Stage_Running) 2524 t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "") 2525 t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05") 2526 } 2527 2528 //nolint:unparam 2529 func (t *testMasterSuite) validatorStageMatch(taskName, source string, expectStage pb.Stage) { 2530 stage := ha.NewValidatorStage(expectStage, source, taskName) 2531 2532 stageM, _, err := ha.GetValidatorStage(t.etcdTestCli, source, taskName, 0) 2533 require.NoError(t.T(), err) 2534 switch expectStage { 2535 case pb.Stage_Running, pb.Stage_Stopped: 2536 require.Len(t.T(), stageM, 1) 2537 stageDeepEqualExcludeRev(t.T(), stageM[taskName], stage) 2538 default: 2539 require.Len(t.T(), stageM, 0) 2540 } 2541 } 2542 2543 //nolint:unparam 2544 func (t *testMasterSuite) validatorModeMatch(s *scheduler.Scheduler, task, source string, 2545 expectMode, expectedStartTime string, 2546 ) { 2547 cfgs := s.GetSubTaskCfgsByTaskAndSource(task, []string{source}) 2548 v, ok := cfgs[task] 2549 require.True(t.T(), ok) 2550 cfg, ok := v[source] 2551 require.True(t.T(), ok) 2552 require.Equal(t.T(), expectMode, cfg.ValidatorCfg.Mode) 2553 require.Equal(t.T(), expectedStartTime, cfg.ValidatorCfg.StartTime) 2554 } 2555 2556 func (t *testMasterSuite) TestGetValidatorStatus() { 2557 var ( 2558 wg sync.WaitGroup 2559 taskName = "test" 2560 ) 2561 ctrl := gomock.NewController(t.T()) 2562 defer ctrl.Finish() 2563 server := testDefaultMasterServer(t.T()) 2564 server.etcdClient = t.etcdTestCli 2565 sources, workers := defaultWorkerSource() 2566 startReq := &pb.StartTaskRequest{ 2567 Task: taskConfig, 2568 Sources: sources, 2569 } 2570 // test query all workers 2571 for idx, worker := range workers { 2572 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 2573 mockWorkerClient.EXPECT().GetWorkerValidatorStatus( 2574 gomock.Any(), 2575 gomock.Any(), 2576 ).Return(&pb.GetValidationStatusResponse{ 2577 Result: true, 2578 TableStatuses: []*pb.ValidationTableStatus{ 2579 { 2580 SrcTable: "tbl1", 2581 }, 2582 }, 2583 }, nil) 2584 mockWorkerClient.EXPECT().GetWorkerValidatorStatus( 2585 gomock.Any(), 2586 gomock.Any(), 2587 ).Return(&pb.GetValidationStatusResponse{ 2588 Result: false, 2589 Msg: "something wrong in worker", 2590 }, nil) 2591 mockWorkerClient.EXPECT().GetWorkerValidatorStatus( 2592 gomock.Any(), 2593 gomock.Any(), 2594 ).Return(&pb.GetValidationStatusResponse{}, errors.New("grpc error")) 2595 mockRevelantWorkerClient(mockWorkerClient, taskName, sources[idx], startReq) 2596 t.workerClients[worker] = newMockRPCClient(mockWorkerClient) 2597 } 2598 ctx, cancel := context.WithCancel(context.Background()) 2599 defer t.clearSchedulerEnv(cancel, &wg) 2600 // start task without validation 2601 sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}} 2602 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients) 2603 mock := conn.InitVersionDB() 2604 defer func() { 2605 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 2606 }() 2607 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 2608 AddRow("version", "5.7.25-TiDB-v4.0.2")) 2609 stResp, err := server.StartTask(context.Background(), startReq) 2610 require.NoError(t.T(), err) 2611 require.True(t.T(), stResp.Result) 2612 2613 for _, source := range sources { 2614 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 2615 } 2616 require.Equal(t.T(), sourceResps, stResp.Sources) 2617 // 1. query existing task's status 2618 statusReq := &pb.GetValidationStatusRequest{ 2619 TaskName: taskName, 2620 } 2621 resp, err := server.GetValidationStatus(context.Background(), statusReq) 2622 require.NoError(t.T(), err) 2623 require.Equal(t.T(), "", resp.Msg) 2624 require.True(t.T(), resp.Result) 2625 require.Equal(t.T(), 2, len(resp.TableStatuses)) 2626 // 2. query invalid task's status 2627 statusReq.TaskName = "invalid-task" 2628 resp, err = server.GetValidationStatus(context.Background(), statusReq) 2629 require.NoError(t.T(), err) 2630 require.Contains(t.T(), resp.Msg, "cannot get subtask by task name") 2631 require.False(t.T(), resp.Result) 2632 // 3. query invalid stage 2633 statusReq.TaskName = taskName 2634 statusReq.FilterStatus = pb.Stage_Paused // invalid stage 2635 resp, err = server.GetValidationStatus(context.Background(), statusReq) 2636 require.NoError(t.T(), err) 2637 require.Contains(t.T(), resp.Msg, "filtering stage should be either") 2638 require.False(t.T(), resp.Result) 2639 // 4. worker error 2640 statusReq.FilterStatus = pb.Stage_Running 2641 resp, err = server.GetValidationStatus(context.Background(), statusReq) 2642 require.NoError(t.T(), err) 2643 require.False(t.T(), resp.Result) 2644 require.Contains(t.T(), resp.Msg, "something wrong in worker") 2645 // 5. grpc error 2646 statusReq.FilterStatus = pb.Stage_Running 2647 resp, err = server.GetValidationStatus(context.Background(), statusReq) 2648 require.NoError(t.T(), err) 2649 require.False(t.T(), resp.Result) 2650 require.Contains(t.T(), resp.Msg, "grpc error") 2651 } 2652 2653 func (t *testMasterSuite) TestGetValidationError() { 2654 var ( 2655 wg sync.WaitGroup 2656 taskName = "test" 2657 ) 2658 ctrl := gomock.NewController(t.T()) 2659 defer ctrl.Finish() 2660 server := testDefaultMasterServer(t.T()) 2661 server.etcdClient = t.etcdTestCli 2662 sources, workers := defaultWorkerSource() 2663 startReq := &pb.StartTaskRequest{ 2664 Task: taskConfig, 2665 Sources: sources, 2666 } 2667 // test query all workers 2668 for idx, worker := range workers { 2669 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 2670 mockWorkerClient.EXPECT().GetValidatorError( 2671 gomock.Any(), 2672 gomock.Any(), 2673 ).Return(&pb.GetValidationErrorResponse{ 2674 Result: true, 2675 Error: []*pb.ValidationError{ 2676 { 2677 Id: "1", 2678 }, 2679 }, 2680 }, nil) 2681 mockWorkerClient.EXPECT().GetValidatorError( 2682 gomock.Any(), 2683 gomock.Any(), 2684 ).Return(&pb.GetValidationErrorResponse{ 2685 Result: false, 2686 Msg: "something wrong in worker", 2687 Error: []*pb.ValidationError{}, 2688 }, nil) 2689 mockWorkerClient.EXPECT().GetValidatorError( 2690 gomock.Any(), 2691 gomock.Any(), 2692 ).Return(&pb.GetValidationErrorResponse{}, errors.New("grpc error")) 2693 mockRevelantWorkerClient(mockWorkerClient, taskName, sources[idx], startReq) 2694 t.workerClients[worker] = newMockRPCClient(mockWorkerClient) 2695 } 2696 ctx, cancel := context.WithCancel(context.Background()) 2697 defer t.clearSchedulerEnv(cancel, &wg) 2698 // start task without validation 2699 sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}} 2700 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients) 2701 mock := conn.InitVersionDB() 2702 defer func() { 2703 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 2704 }() 2705 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 2706 AddRow("version", "5.7.25-TiDB-v4.0.2")) 2707 stResp, err := server.StartTask(context.Background(), startReq) 2708 require.NoError(t.T(), err) 2709 require.True(t.T(), stResp.Result) 2710 2711 for _, source := range sources { 2712 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 2713 } 2714 require.Equal(t.T(), sourceResps, stResp.Sources) 2715 // 1. query existing task's error 2716 errReq := &pb.GetValidationErrorRequest{ 2717 TaskName: taskName, 2718 ErrState: pb.ValidateErrorState_InvalidErr, 2719 } 2720 resp, err := server.GetValidationError(context.Background(), errReq) 2721 require.NoError(t.T(), err) 2722 require.Equal(t.T(), "", resp.Msg) 2723 require.True(t.T(), resp.Result) 2724 require.Len(t.T(), resp.Error, 2) 2725 // 2. query invalid task's error 2726 errReq.TaskName = "invalid-task" 2727 resp, err = server.GetValidationError(context.Background(), errReq) 2728 require.NoError(t.T(), err) 2729 require.Contains(t.T(), resp.Msg, "cannot get subtask by task name") 2730 require.False(t.T(), resp.Result) 2731 // 3. query invalid state 2732 errReq.TaskName = taskName 2733 errReq.ErrState = pb.ValidateErrorState_ResolvedErr // invalid state 2734 resp, err = server.GetValidationError(context.Background(), errReq) 2735 require.NoError(t.T(), err) 2736 require.Contains(t.T(), resp.Msg, "only support querying `all`, `unprocessed`, and `ignored` error") 2737 require.False(t.T(), resp.Result) 2738 // 4. worker error 2739 errReq.TaskName = taskName 2740 errReq.ErrState = pb.ValidateErrorState_InvalidErr 2741 resp, err = server.GetValidationError(context.Background(), errReq) 2742 require.NoError(t.T(), err) 2743 require.False(t.T(), resp.Result) 2744 require.Contains(t.T(), resp.Msg, "something wrong in worker") 2745 // 5. grpc error 2746 resp, err = server.GetValidationError(context.Background(), errReq) 2747 require.NoError(t.T(), err) 2748 require.False(t.T(), resp.Result) 2749 require.Contains(t.T(), resp.Msg, "grpc error") 2750 } 2751 2752 func (t *testMasterSuite) TestOperateValidationError() { 2753 var ( 2754 wg sync.WaitGroup 2755 taskName = "test" 2756 ) 2757 ctrl := gomock.NewController(t.T()) 2758 defer ctrl.Finish() 2759 server := testDefaultMasterServer(t.T()) 2760 server.etcdClient = t.etcdTestCli 2761 sources, workers := defaultWorkerSource() 2762 startReq := &pb.StartTaskRequest{ 2763 Task: taskConfig, 2764 Sources: sources, 2765 } 2766 // test query all workers 2767 for idx, worker := range workers { 2768 mockWorkerClient := pbmock.NewMockWorkerClient(ctrl) 2769 mockWorkerClient.EXPECT().OperateValidatorError( 2770 gomock.Any(), 2771 gomock.Any(), 2772 ).Return(&pb.OperateValidationErrorResponse{ 2773 Result: true, 2774 Msg: "", 2775 }, nil) 2776 mockWorkerClient.EXPECT().OperateValidatorError( 2777 gomock.Any(), 2778 gomock.Any(), 2779 ).Return(&pb.OperateValidationErrorResponse{ 2780 Result: false, 2781 Msg: "something wrong in worker", 2782 }, nil) 2783 mockWorkerClient.EXPECT().OperateValidatorError( 2784 gomock.Any(), 2785 gomock.Any(), 2786 ).Return(&pb.OperateValidationErrorResponse{}, errors.New("grpc error")) 2787 mockRevelantWorkerClient(mockWorkerClient, taskName, sources[idx], startReq) 2788 t.workerClients[worker] = newMockRPCClient(mockWorkerClient) 2789 } 2790 ctx, cancel := context.WithCancel(context.Background()) 2791 defer t.clearSchedulerEnv(cancel, &wg) 2792 // start task without validation 2793 sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}} 2794 server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients) 2795 mock := conn.InitVersionDB() 2796 defer func() { 2797 conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{} 2798 }() 2799 mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}). 2800 AddRow("version", "5.7.25-TiDB-v4.0.2")) 2801 stResp, err := server.StartTask(context.Background(), startReq) 2802 require.NoError(t.T(), err) 2803 require.True(t.T(), stResp.Result) 2804 2805 for _, source := range sources { 2806 t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running) 2807 } 2808 require.Equal(t.T(), sourceResps, stResp.Sources) 2809 // 1. query existing task's error 2810 opReq := &pb.OperateValidationErrorRequest{ 2811 TaskName: taskName, 2812 IsAllError: true, 2813 } 2814 resp, err := server.OperateValidationError(context.Background(), opReq) 2815 require.NoError(t.T(), err) 2816 require.Equal(t.T(), resp.Msg, "") 2817 require.True(t.T(), resp.Result) 2818 // 2. query invalid task's error 2819 opReq.TaskName = "invalid-task" 2820 resp, err = server.OperateValidationError(context.Background(), opReq) 2821 require.NoError(t.T(), err) 2822 require.Contains(t.T(), resp.Msg, "cannot get subtask by task name") 2823 require.False(t.T(), resp.Result) 2824 // 3. worker error 2825 opReq.TaskName = taskName 2826 resp, err = server.OperateValidationError(context.Background(), opReq) 2827 require.NoError(t.T(), err) 2828 require.False(t.T(), resp.Result) 2829 require.Contains(t.T(), resp.Msg, "something wrong in worker") 2830 // 4. grpc error 2831 opReq.TaskName = taskName 2832 resp, err = server.OperateValidationError(context.Background(), opReq) 2833 require.NoError(t.T(), err) 2834 require.False(t.T(), resp.Result) 2835 require.Contains(t.T(), resp.Msg, "grpc error") 2836 } 2837 2838 func (t *testMasterSuite) TestDashboardAddress() { 2839 // Temp file for test log output 2840 file, err := ioutil.TempFile(t.T().TempDir(), "*") 2841 require.NoError(t.T(), err) 2842 defer os.Remove(file.Name()) 2843 2844 cfg := NewConfig() 2845 err = cfg.FromContent(SampleConfig) 2846 require.NoError(t.T(), err) 2847 2848 err = log.InitLogger(&log.Config{ 2849 File: file.Name(), 2850 }) 2851 require.NoError(t.T(), err) 2852 defer func() { 2853 err = log.InitLogger(&log.Config{}) 2854 require.NoError(t.T(), err) 2855 }() 2856 2857 cfg.OpenAPI = true 2858 cfg.LogFile = file.Name() 2859 cfg.DataDir = t.T().TempDir() 2860 2861 server := NewServer(cfg) 2862 server.leader.Store(oneselfLeader) 2863 ctx, cancel := context.WithCancel(context.Background()) 2864 go server.ap.Start(ctx) 2865 go func() { 2866 err2 := server.Start(ctx) 2867 require.NoError(t.T(), err2) 2868 }() 2869 defer server.Close() 2870 defer cancel() 2871 2872 // Wait server bootstraped. 2873 time.Sleep(time.Second * 3) 2874 2875 content, err := ioutil.ReadFile(file.Name()) 2876 require.NoError(t.T(), err) 2877 require.Contains(t.T(), string(content), "Web UI enabled") 2878 } 2879 2880 func (t *testMasterSuite) TestGetLatestMeta() { 2881 _, mockDB, err := conn.InitMockDBFull() 2882 require.NoError(t.T(), err) 2883 getMasterStatusError := errors.New("failed to get master status") 2884 mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnError(getMasterStatusError) 2885 meta, err := GetLatestMeta(context.Background(), "", &dbconfig.DBConfig{}) 2886 require.Contains(t.T(), err.Error(), getMasterStatusError.Error()) 2887 require.Nil(t.T(), meta) 2888 2889 _, mockDB, err = conn.InitMockDBFull() 2890 require.NoError(t.T(), err) 2891 rows := mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB", "Executed_Gtid_Set"}) 2892 mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows) 2893 meta, err = GetLatestMeta(context.Background(), "", &dbconfig.DBConfig{}) 2894 require.True(t.T(), terror.ErrNoMasterStatus.Equal(err)) 2895 require.Nil(t.T(), meta) 2896 2897 _, mockDB, err = conn.InitMockDBFull() 2898 require.NoError(t.T(), err) 2899 // 5 columns for MySQL 2900 rows = mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB", "Executed_Gtid_Set"}).AddRow( 2901 "mysql-bin.000009", 11232, "do_db", "ignore_db", "", 2902 ) 2903 mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows) 2904 meta, err = GetLatestMeta(context.Background(), mysql.MySQLFlavor, &dbconfig.DBConfig{}) 2905 require.NoError(t.T(), err) 2906 require.Equal(t.T(), meta.BinLogName, "mysql-bin.000009") 2907 require.Equal(t.T(), meta.BinLogPos, uint32(11232)) 2908 require.Equal(t.T(), meta.BinLogGTID, "") 2909 2910 _, mockDB, err = conn.InitMockDBFull() 2911 require.NoError(t.T(), err) 2912 // 4 columns for MariaDB 2913 rows = mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB"}).AddRow( 2914 "mysql-bin.000009", 11232, "do_db", "ignore_db", 2915 ) 2916 mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows) 2917 rows = mockDB.NewRows([]string{"Variable_name", "Value"}).AddRow("gtid_binlog_pos", "1-2-100") 2918 mockDB.ExpectQuery(`SHOW GLOBAL VARIABLES LIKE 'gtid_binlog_pos'`).WillReturnRows(rows) 2919 meta, err = GetLatestMeta(context.Background(), mysql.MariaDBFlavor, &dbconfig.DBConfig{}) 2920 require.NoError(t.T(), err) 2921 require.Equal(t.T(), meta.BinLogName, "mysql-bin.000009") 2922 require.Equal(t.T(), meta.BinLogPos, uint32(11232)) 2923 require.Equal(t.T(), meta.BinLogGTID, "1-2-100") 2924 }