github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/server_test.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package master
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"io/ioutil"
    22  	"net"
    23  	"net/http"
    24  	"os"
    25  	"path/filepath"
    26  	"sort"
    27  	"strings"
    28  	"sync"
    29  	"testing"
    30  	"time"
    31  
    32  	"github.com/DATA-DOG/go-sqlmock"
    33  	"github.com/go-mysql-org/go-mysql/mysql"
    34  	"github.com/golang/mock/gomock"
    35  	"github.com/pingcap/errors"
    36  	"github.com/pingcap/failpoint"
    37  	tiddl "github.com/pingcap/tidb/pkg/ddl"
    38  	"github.com/pingcap/tidb/pkg/parser"
    39  	"github.com/pingcap/tidb/pkg/parser/ast"
    40  	"github.com/pingcap/tidb/pkg/parser/model"
    41  	"github.com/pingcap/tidb/pkg/sessionctx"
    42  	toolutils "github.com/pingcap/tidb/pkg/util"
    43  	tidbmock "github.com/pingcap/tidb/pkg/util/mock"
    44  	"github.com/pingcap/tiflow/dm/checker"
    45  	common2 "github.com/pingcap/tiflow/dm/common"
    46  	"github.com/pingcap/tiflow/dm/config"
    47  	"github.com/pingcap/tiflow/dm/config/dbconfig"
    48  	"github.com/pingcap/tiflow/dm/config/security"
    49  	"github.com/pingcap/tiflow/dm/ctl/common"
    50  	"github.com/pingcap/tiflow/dm/loader"
    51  	"github.com/pingcap/tiflow/dm/master/scheduler"
    52  	"github.com/pingcap/tiflow/dm/master/shardddl"
    53  	"github.com/pingcap/tiflow/dm/master/workerrpc"
    54  	"github.com/pingcap/tiflow/dm/openapi/fixtures"
    55  	"github.com/pingcap/tiflow/dm/pb"
    56  	"github.com/pingcap/tiflow/dm/pbmock"
    57  	"github.com/pingcap/tiflow/dm/pkg/conn"
    58  	"github.com/pingcap/tiflow/dm/pkg/cputil"
    59  	"github.com/pingcap/tiflow/dm/pkg/etcdutil"
    60  	"github.com/pingcap/tiflow/dm/pkg/ha"
    61  	"github.com/pingcap/tiflow/dm/pkg/log"
    62  	"github.com/pingcap/tiflow/dm/pkg/shardddl/optimism"
    63  	"github.com/pingcap/tiflow/dm/pkg/shardddl/pessimism"
    64  	"github.com/pingcap/tiflow/dm/pkg/terror"
    65  	"github.com/pingcap/tiflow/dm/pkg/utils"
    66  	"github.com/pingcap/tiflow/pkg/version"
    67  	"github.com/stretchr/testify/require"
    68  	"github.com/stretchr/testify/suite"
    69  	"github.com/tikv/pd/pkg/utils/tempurl"
    70  	clientv3 "go.etcd.io/etcd/client/v3"
    71  	"go.etcd.io/etcd/server/v3/verify"
    72  	"go.etcd.io/etcd/tests/v3/integration"
    73  	"google.golang.org/grpc"
    74  )
    75  
    76  // use task config from integration test `sharding`.
    77  var taskConfig = `---
    78  name: test
    79  task-mode: all
    80  is-sharding: true
    81  shard-mode: ""
    82  meta-schema: "dm_meta"
    83  enable-heartbeat: true
    84  ignore-checking-items: ["all"]
    85  
    86  target-database:
    87    host: "127.0.0.1"
    88    port: 4000
    89    user: "root"
    90    password: ""
    91  
    92  mysql-instances:
    93    - source-id: "mysql-replica-01"
    94      block-allow-list:  "instance"
    95      route-rules: ["sharding-route-rules-table", "sharding-route-rules-schema"]
    96      mydumper-config-name: "global"
    97      loader-config-name: "global"
    98      syncer-config-name: "global"
    99  
   100    - source-id: "mysql-replica-02"
   101      block-allow-list:  "instance"
   102      route-rules: ["sharding-route-rules-table", "sharding-route-rules-schema"]
   103      mydumper-config-name: "global"
   104      loader-config-name: "global"
   105      syncer-config-name: "global"
   106  
   107  block-allow-list:
   108    instance:
   109      do-dbs: ["~^sharding[\\d]+"]
   110      do-tables:
   111      -  db-name: "~^sharding[\\d]+"
   112         tbl-name: "~^t[\\d]+"
   113  
   114  routes:
   115    sharding-route-rules-table:
   116      schema-pattern: sharding*
   117      table-pattern: t*
   118      target-schema: db_target
   119      target-table: t_target
   120  
   121    sharding-route-rules-schema:
   122      schema-pattern: sharding*
   123      target-schema: db_target
   124  
   125  mydumpers:
   126    global:
   127      threads: 4
   128      chunk-filesize: 64
   129      skip-tz-utc: true
   130      extra-args: "--regex '^sharding.*'"
   131  
   132  loaders:
   133    global:
   134      pool-size: 16
   135      dir: "./dumped_data"
   136  
   137  syncers:
   138    global:
   139      worker-count: 16
   140      batch: 100
   141  `
   142  
   143  var (
   144  	errGRPCFailed         = "test grpc request failed"
   145  	errGRPCFailedReg      = fmt.Sprintf("(?m).*%s.*", errGRPCFailed)
   146  	errCheckSyncConfig    = "(?m).*check sync config with error.*"
   147  	errCheckSyncConfigReg = fmt.Sprintf("(?m).*%s.*", errCheckSyncConfig)
   148  	keepAliveTTL          = int64(10)
   149  )
   150  
   151  type testMasterSuite struct {
   152  	suite.Suite
   153  
   154  	workerClients     map[string]workerrpc.Client
   155  	saveMaxRetryNum   int
   156  	electionTTLBackup int
   157  
   158  	testEtcdCluster *integration.ClusterV3
   159  	etcdTestCli     *clientv3.Client
   160  }
   161  
   162  func TestMasterSuite(t *testing.T) {
   163  	suite.Run(t, new(testMasterSuite))
   164  }
   165  
   166  var pwd string
   167  
   168  func (t *testMasterSuite) SetupSuite() {
   169  	require.NoError(t.T(), log.InitLogger(&log.Config{}))
   170  	var err error
   171  	pwd, err = os.Getwd()
   172  	require.NoError(t.T(), err)
   173  	integration.BeforeTestExternal(t.T())
   174  	t.workerClients = make(map[string]workerrpc.Client)
   175  	t.saveMaxRetryNum = maxRetryNum
   176  	t.electionTTLBackup = electionTTL
   177  	electionTTL = 3
   178  	maxRetryNum = 2
   179  	checkAndAdjustSourceConfigForDMCtlFunc = checkAndNoAdjustSourceConfigMock
   180  }
   181  
   182  func (t *testMasterSuite) TearDownSuite() {
   183  	maxRetryNum = t.saveMaxRetryNum
   184  	electionTTL = t.electionTTLBackup
   185  	checkAndAdjustSourceConfigForDMCtlFunc = checkAndAdjustSourceConfig
   186  }
   187  
   188  func (t *testMasterSuite) SetupTest() {
   189  	t.testEtcdCluster = integration.NewClusterV3(t.T(), &integration.ClusterConfig{Size: 1})
   190  	t.etcdTestCli = t.testEtcdCluster.RandClient()
   191  	t.clearEtcdEnv()
   192  }
   193  
   194  func (t *testMasterSuite) TearDownTest() {
   195  	t.clearEtcdEnv()
   196  	t.testEtcdCluster.Terminate(t.T())
   197  }
   198  
   199  func (t *testMasterSuite) clearEtcdEnv() {
   200  	require.NoError(t.T(), ha.ClearTestInfoOperation(t.etcdTestCli))
   201  }
   202  
   203  func (t *testMasterSuite) clearSchedulerEnv(cancel context.CancelFunc, wg *sync.WaitGroup) {
   204  	cancel()
   205  	wg.Wait()
   206  	t.clearEtcdEnv()
   207  }
   208  
   209  func stageDeepEqualExcludeRev(t *testing.T, stage, expectStage ha.Stage) {
   210  	t.Helper()
   211  
   212  	expectStage.Revision = stage.Revision
   213  	require.Equal(t, expectStage, stage)
   214  }
   215  
   216  func mockRevelantWorkerClient(mockWorkerClient *pbmock.MockWorkerClient, taskName, sourceID string, masterReq interface{}) {
   217  	var expect pb.Stage
   218  	switch req := masterReq.(type) {
   219  	case *pb.OperateSourceRequest:
   220  		switch req.Op {
   221  		case pb.SourceOp_StartSource, pb.SourceOp_UpdateSource:
   222  			expect = pb.Stage_Running
   223  		case pb.SourceOp_StopSource:
   224  			expect = pb.Stage_Stopped
   225  		}
   226  	case *pb.StartTaskRequest, *pb.UpdateTaskRequest:
   227  		expect = pb.Stage_Running
   228  	case *pb.OperateTaskRequest:
   229  		switch req.Op {
   230  		case pb.TaskOp_Resume:
   231  			expect = pb.Stage_Running
   232  		case pb.TaskOp_Pause:
   233  			expect = pb.Stage_Paused
   234  		case pb.TaskOp_Delete:
   235  		}
   236  	case *pb.OperateWorkerRelayRequest:
   237  		switch req.Op {
   238  		case pb.RelayOp_ResumeRelay:
   239  			expect = pb.Stage_Running
   240  		case pb.RelayOp_PauseRelay:
   241  			expect = pb.Stage_Paused
   242  		case pb.RelayOp_StopRelay:
   243  			expect = pb.Stage_Stopped
   244  		}
   245  	}
   246  	queryResp := &pb.QueryStatusResponse{
   247  		Result:       true,
   248  		SourceStatus: &pb.SourceStatus{},
   249  	}
   250  
   251  	switch masterReq.(type) {
   252  	case *pb.OperateSourceRequest:
   253  		switch expect {
   254  		case pb.Stage_Running:
   255  			queryResp.SourceStatus = &pb.SourceStatus{Source: sourceID}
   256  		case pb.Stage_Stopped:
   257  			queryResp.SourceStatus = &pb.SourceStatus{Source: ""}
   258  		}
   259  	case *pb.StartTaskRequest, *pb.UpdateTaskRequest, *pb.OperateTaskRequest:
   260  		queryResp.SubTaskStatus = []*pb.SubTaskStatus{{}}
   261  		if opTaskReq, ok := masterReq.(*pb.OperateTaskRequest); ok && opTaskReq.Op == pb.TaskOp_Delete {
   262  			queryResp.SubTaskStatus[0].Status = &pb.SubTaskStatus_Msg{
   263  				Msg: fmt.Sprintf("no sub task with name %s has started", taskName),
   264  			}
   265  		} else {
   266  			queryResp.SubTaskStatus[0].Name = taskName
   267  			queryResp.SubTaskStatus[0].Stage = expect
   268  		}
   269  	case *pb.OperateWorkerRelayRequest:
   270  		queryResp.SourceStatus = &pb.SourceStatus{RelayStatus: &pb.RelayStatus{Stage: expect}}
   271  	}
   272  
   273  	mockWorkerClient.EXPECT().QueryStatus(
   274  		gomock.Any(),
   275  		&pb.QueryStatusRequest{
   276  			Name: taskName,
   277  		},
   278  	).Return(queryResp, nil).MaxTimes(maxRetryNum)
   279  }
   280  
   281  func createTableInfo(t *testing.T, p *parser.Parser, se sessionctx.Context, tableID int64, sql string) *model.TableInfo {
   282  	t.Helper()
   283  
   284  	node, err := p.ParseOneStmt(sql, "utf8mb4", "utf8mb4_bin")
   285  	require.NoError(t, err)
   286  	createStmtNode, ok := node.(*ast.CreateTableStmt)
   287  	require.True(t, ok, "%s is not a CREATE TABLE statement", sql)
   288  	info, err := tiddl.MockTableInfo(se, createStmtNode, tableID)
   289  	require.NoError(t, err)
   290  	return info
   291  }
   292  
   293  func newMockRPCClient(client pb.WorkerClient) workerrpc.Client {
   294  	c, _ := workerrpc.NewGRPCClientWrap(nil, client)
   295  	return c
   296  }
   297  
   298  func defaultWorkerSource() ([]string, []string) {
   299  	return []string{
   300  			"mysql-replica-01",
   301  			"mysql-replica-02",
   302  		}, []string{
   303  			"127.0.0.1:8262",
   304  			"127.0.0.1:8263",
   305  		}
   306  }
   307  
   308  func makeNilWorkerClients(workers []string) map[string]workerrpc.Client {
   309  	nilWorkerClients := make(map[string]workerrpc.Client, len(workers))
   310  	for _, worker := range workers {
   311  		nilWorkerClients[worker] = nil
   312  	}
   313  	return nilWorkerClients
   314  }
   315  
   316  func makeWorkerClientsForHandle(ctrl *gomock.Controller, taskName string, sources []string, workers []string, reqs ...interface{}) map[string]workerrpc.Client {
   317  	workerClients := make(map[string]workerrpc.Client, len(workers))
   318  	for i := range workers {
   319  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
   320  		for _, req := range reqs {
   321  			mockRevelantWorkerClient(mockWorkerClient, taskName, sources[i], req)
   322  		}
   323  		workerClients[workers[i]] = newMockRPCClient(mockWorkerClient)
   324  	}
   325  	return workerClients
   326  }
   327  
   328  func testDefaultMasterServer(t *testing.T) *Server {
   329  	t.Helper()
   330  
   331  	cfg := NewConfig()
   332  	err := cfg.FromContent(SampleConfig)
   333  	require.NoError(t, err)
   334  	cfg.DataDir = t.TempDir()
   335  	server := NewServer(cfg)
   336  	server.leader.Store(oneselfLeader)
   337  	go server.ap.Start(context.Background())
   338  
   339  	return server
   340  }
   341  
   342  func (t *testMasterSuite) testMockScheduler(
   343  	ctx context.Context,
   344  	wg *sync.WaitGroup,
   345  	sources, workers []string,
   346  	password string,
   347  	workerClients map[string]workerrpc.Client,
   348  ) (*scheduler.Scheduler, []context.CancelFunc) {
   349  	logger := log.L()
   350  	scheduler2 := scheduler.NewScheduler(&logger, security.Security{})
   351  	err := scheduler2.Start(ctx, t.etcdTestCli)
   352  	require.NoError(t.T(), err)
   353  	cancels := make([]context.CancelFunc, 0, 2)
   354  	for i := range workers {
   355  		// add worker to scheduler's workers map
   356  		name := workers[i]
   357  		require.NoError(t.T(), scheduler2.AddWorker(name, workers[i]))
   358  		scheduler2.SetWorkerClientForTest(name, workerClients[workers[i]])
   359  		// operate mysql config on this worker
   360  		cfg := config.NewSourceConfig()
   361  		cfg.SourceID = sources[i]
   362  		cfg.From.Password = password
   363  		require.NoError(t.T(), scheduler2.AddSourceCfg(cfg))
   364  		wg.Add(1)
   365  		ctx1, cancel1 := context.WithCancel(ctx)
   366  		cancels = append(cancels, cancel1)
   367  		go func(ctx context.Context, workerName string) {
   368  			defer wg.Done()
   369  			require.NoError(t.T(), ha.KeepAlive(ctx, t.etcdTestCli, workerName, keepAliveTTL))
   370  		}(ctx1, name)
   371  		idx := i
   372  		require.Eventually(t.T(), func() bool {
   373  			w := scheduler2.GetWorkerBySource(sources[idx])
   374  			return w != nil && w.BaseInfo().Name == name
   375  		}, 3*time.Second, 100*time.Millisecond)
   376  	}
   377  	return scheduler2, cancels
   378  }
   379  
   380  func (t *testMasterSuite) testMockSchedulerForRelay(
   381  	ctx context.Context,
   382  	wg *sync.WaitGroup,
   383  	sources, workers []string,
   384  	password string,
   385  	workerClients map[string]workerrpc.Client,
   386  ) (*scheduler.Scheduler, []context.CancelFunc) {
   387  	logger := log.L()
   388  	scheduler2 := scheduler.NewScheduler(&logger, security.Security{})
   389  	err := scheduler2.Start(ctx, t.etcdTestCli)
   390  	require.NoError(t.T(), err)
   391  	cancels := make([]context.CancelFunc, 0, 2)
   392  	for i := range workers {
   393  		// add worker to scheduler's workers map
   394  		name := workers[i]
   395  		require.NoError(t.T(), scheduler2.AddWorker(name, workers[i]))
   396  		scheduler2.SetWorkerClientForTest(name, workerClients[workers[i]])
   397  		// operate mysql config on this worker
   398  		cfg := config.NewSourceConfig()
   399  		cfg.SourceID = sources[i]
   400  		cfg.From.Password = password
   401  		require.NoError(t.T(), scheduler2.AddSourceCfg(cfg))
   402  		wg.Add(1)
   403  		ctx1, cancel1 := context.WithCancel(ctx)
   404  		cancels = append(cancels, cancel1)
   405  		go func(ctx context.Context, workerName string) {
   406  			defer wg.Done()
   407  			require.NoError(t.T(), ha.KeepAlive(ctx, t.etcdTestCli, workerName, keepAliveTTL))
   408  		}(ctx1, name)
   409  
   410  		// wait the mock worker has alive
   411  		require.Eventually(t.T(), func() bool {
   412  			resp, err2 := t.etcdTestCli.Get(ctx, common2.WorkerKeepAliveKeyAdapter.Encode(name))
   413  			require.NoError(t.T(), err2)
   414  			return resp.Count == 1
   415  		}, 3*time.Second, 100*time.Millisecond)
   416  
   417  		require.NoError(t.T(), scheduler2.StartRelay(sources[i], []string{workers[i]}))
   418  		idx := i
   419  		require.Eventually(t.T(), func() bool {
   420  			relayWorkers, err2 := scheduler2.GetRelayWorkers(sources[idx])
   421  			require.NoError(t.T(), err2)
   422  			return len(relayWorkers) == 1 && relayWorkers[0].BaseInfo().Name == name
   423  		}, 3*time.Second, 100*time.Millisecond)
   424  	}
   425  	return scheduler2, cancels
   426  }
   427  
   428  func generateServerConfig(t *testing.T, name string) *Config {
   429  	t.Helper()
   430  
   431  	// create a new cluster
   432  	cfg1 := NewConfig()
   433  	err := cfg1.FromContent(SampleConfig)
   434  	require.NoError(t, err)
   435  	cfg1.Name = name
   436  	cfg1.DataDir = t.TempDir()
   437  	cfg1.MasterAddr = tempurl.Alloc()[len("http://"):]
   438  	cfg1.AdvertiseAddr = cfg1.MasterAddr
   439  	cfg1.PeerUrls = tempurl.Alloc()
   440  	cfg1.AdvertisePeerUrls = cfg1.PeerUrls
   441  	cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls)
   442  	return cfg1
   443  }
   444  
   445  func (t *testMasterSuite) TestQueryStatus() {
   446  	ctrl := gomock.NewController(t.T())
   447  	defer ctrl.Finish()
   448  
   449  	server := testDefaultMasterServer(t.T())
   450  	sources, workers := defaultWorkerSource()
   451  	var cancels []context.CancelFunc
   452  
   453  	// test query all workers
   454  	for _, worker := range workers {
   455  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
   456  		mockWorkerClient.EXPECT().QueryStatus(
   457  			gomock.Any(),
   458  			&pb.QueryStatusRequest{},
   459  		).Return(&pb.QueryStatusResponse{
   460  			Result:       true,
   461  			SourceStatus: &pb.SourceStatus{},
   462  		}, nil)
   463  		t.workerClients[worker] = newMockRPCClient(mockWorkerClient)
   464  	}
   465  	var wg sync.WaitGroup
   466  	ctx, cancel := context.WithCancel(context.Background())
   467  	server.scheduler, cancels = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients)
   468  	for _, cancelFunc := range cancels {
   469  		defer cancelFunc()
   470  	}
   471  	resp, err := server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{})
   472  	require.NoError(t.T(), err)
   473  	require.True(t.T(), resp.Result)
   474  	t.clearSchedulerEnv(cancel, &wg)
   475  
   476  	// query specified sources
   477  	for _, worker := range workers {
   478  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
   479  		mockWorkerClient.EXPECT().QueryStatus(
   480  			gomock.Any(),
   481  			&pb.QueryStatusRequest{},
   482  		).Return(&pb.QueryStatusResponse{
   483  			Result:       true,
   484  			SourceStatus: &pb.SourceStatus{},
   485  		}, nil)
   486  		t.workerClients[worker] = newMockRPCClient(mockWorkerClient)
   487  	}
   488  	ctx, cancel = context.WithCancel(context.Background())
   489  	server.scheduler, cancels = t.testMockSchedulerForRelay(ctx, &wg, sources, workers, "passwd", t.workerClients)
   490  	for _, cancelFunc := range cancels {
   491  		defer cancelFunc()
   492  	}
   493  	resp, err = server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{
   494  		Sources: sources,
   495  	})
   496  	require.NoError(t.T(), err)
   497  	require.True(t.T(), resp.Result)
   498  
   499  	// query with invalid dm-worker[s]
   500  	resp, err = server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{
   501  		Sources: []string{"invalid-source1", "invalid-source2"},
   502  	})
   503  	require.NoError(t.T(), err)
   504  	require.False(t.T(), resp.Result)
   505  	require.Regexp(t.T(), "sources .* haven't been added", resp.Msg)
   506  
   507  	// query with invalid task name
   508  	resp, err = server.QueryStatus(context.Background(), &pb.QueryStatusListRequest{
   509  		Name: "invalid-task-name",
   510  	})
   511  	require.NoError(t.T(), err)
   512  	require.False(t.T(), resp.Result)
   513  	require.Regexp(t.T(), "task .* has no source or not exist", resp.Msg)
   514  	t.clearSchedulerEnv(cancel, &wg)
   515  	// TODO: test query with correct task name, this needs to add task first
   516  }
   517  
   518  func (t *testMasterSuite) TestWaitOperationOkRightResult() {
   519  	cases := []struct {
   520  		req              interface{}
   521  		resp             *pb.QueryStatusResponse
   522  		expectedOK       bool
   523  		expectedEmptyMsg bool
   524  	}{
   525  		{
   526  			&pb.OperateTaskRequest{
   527  				Op:   pb.TaskOp_Pause,
   528  				Name: "task-unittest",
   529  			},
   530  			&pb.QueryStatusResponse{
   531  				SubTaskStatus: []*pb.SubTaskStatus{
   532  					{Stage: pb.Stage_Paused},
   533  				},
   534  			},
   535  			true,
   536  			true,
   537  		},
   538  		{
   539  			&pb.OperateTaskRequest{
   540  				Op:   pb.TaskOp_Pause,
   541  				Name: "task-unittest",
   542  			},
   543  			&pb.QueryStatusResponse{
   544  				SubTaskStatus: []*pb.SubTaskStatus{
   545  					{
   546  						Stage:  pb.Stage_Paused,
   547  						Result: &pb.ProcessResult{Errors: []*pb.ProcessError{{Message: "paused by previous error"}}},
   548  					},
   549  				},
   550  			},
   551  			true,
   552  			false,
   553  		},
   554  	}
   555  
   556  	ctrl := gomock.NewController(t.T())
   557  	defer ctrl.Finish()
   558  	ctx := context.Background()
   559  	duration, _ := time.ParseDuration("1s")
   560  	s := &Server{cfg: &Config{RPCTimeout: duration}}
   561  	for _, ca := range cases {
   562  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
   563  		mockWorkerClient.EXPECT().QueryStatus(
   564  			gomock.Any(),
   565  			gomock.Any(),
   566  		).Return(ca.resp, nil)
   567  		mockWorker := scheduler.NewMockWorker(newMockRPCClient(mockWorkerClient))
   568  
   569  		ok, msg, _, err := s.waitOperationOk(ctx, mockWorker, "", "", ca.req)
   570  		require.NoError(t.T(), err)
   571  		require.Equal(t.T(), ca.expectedOK, ok)
   572  		if ca.expectedEmptyMsg {
   573  			require.Empty(t.T(), msg)
   574  		} else {
   575  			require.NotEmpty(t.T(), msg)
   576  		}
   577  	}
   578  }
   579  
   580  func (t *testMasterSuite) TestStopTaskWithExceptRight() {
   581  	taskName := "test-stop-task"
   582  	responeses := [][]*pb.QueryStatusResponse{{
   583  		&pb.QueryStatusResponse{
   584  			SubTaskStatus: []*pb.SubTaskStatus{
   585  				{
   586  					Name: taskName,
   587  					Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{
   588  						UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}},
   589  					}},
   590  				},
   591  			},
   592  		},
   593  		&pb.QueryStatusResponse{SubTaskStatus: []*pb.SubTaskStatus{}},
   594  	}, {
   595  		&pb.QueryStatusResponse{
   596  			SubTaskStatus: []*pb.SubTaskStatus{
   597  				{
   598  					Name: taskName,
   599  					Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{
   600  						UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}},
   601  					}},
   602  				},
   603  			},
   604  		},
   605  		&pb.QueryStatusResponse{SubTaskStatus: []*pb.SubTaskStatus{
   606  			{
   607  				Name:   taskName,
   608  				Status: &pb.SubTaskStatus_Msg{Msg: common2.NoSubTaskMsg(taskName)},
   609  			},
   610  		}},
   611  	}}
   612  	req := &pb.OperateTaskRequest{
   613  		Op:   pb.TaskOp_Delete,
   614  		Name: taskName,
   615  	}
   616  	ctrl := gomock.NewController(t.T())
   617  	defer ctrl.Finish()
   618  	ctx := context.Background()
   619  	s := &Server{cfg: &Config{RPCTimeout: time.Second}}
   620  
   621  	for _, item := range responeses {
   622  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
   623  		mockWorkerClient.EXPECT().QueryStatus(
   624  			gomock.Any(),
   625  			gomock.Any(),
   626  		).Return(item[0], nil).Return(item[1], nil).MaxTimes(2)
   627  		mockWorker := scheduler.NewMockWorker(newMockRPCClient(mockWorkerClient))
   628  		ok, msg, _, err := s.waitOperationOk(ctx, mockWorker, taskName, "", req)
   629  		require.NoError(t.T(), err)
   630  		require.True(t.T(), ok)
   631  		require.Empty(t.T(), msg)
   632  	}
   633  }
   634  
   635  func (t *testMasterSuite) TestFillUnsyncedStatus() {
   636  	var (
   637  		logger  = log.L()
   638  		task1   = "task1"
   639  		task2   = "task2"
   640  		source1 = "source1"
   641  		source2 = "source2"
   642  		sources = []string{source1, source2}
   643  	)
   644  	cases := []struct {
   645  		infos    []pessimism.Info
   646  		input    []*pb.QueryStatusResponse
   647  		expected []*pb.QueryStatusResponse
   648  	}{
   649  		// test it could work
   650  		{
   651  			[]pessimism.Info{
   652  				{
   653  					Task:   task1,
   654  					Source: source1,
   655  					Schema: "db",
   656  					Table:  "tbl",
   657  				},
   658  			},
   659  			[]*pb.QueryStatusResponse{
   660  				{
   661  					SourceStatus: &pb.SourceStatus{
   662  						Source: source1,
   663  					},
   664  					SubTaskStatus: []*pb.SubTaskStatus{
   665  						{
   666  							Name: task1,
   667  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{
   668  								UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}},
   669  							}},
   670  						},
   671  						{
   672  							Name:   task2,
   673  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}},
   674  						},
   675  					},
   676  				}, {
   677  					SourceStatus: &pb.SourceStatus{
   678  						Source: source2,
   679  					},
   680  					SubTaskStatus: []*pb.SubTaskStatus{
   681  						{
   682  							Name:   task1,
   683  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}},
   684  						},
   685  						{
   686  							Name:   task2,
   687  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}},
   688  						},
   689  					},
   690  				},
   691  			},
   692  			[]*pb.QueryStatusResponse{
   693  				{
   694  					SourceStatus: &pb.SourceStatus{
   695  						Source: source1,
   696  					},
   697  					SubTaskStatus: []*pb.SubTaskStatus{
   698  						{
   699  							Name: task1,
   700  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{
   701  								UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}},
   702  							}},
   703  						},
   704  						{
   705  							Name:   task2,
   706  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}},
   707  						},
   708  					},
   709  				}, {
   710  					SourceStatus: &pb.SourceStatus{
   711  						Source: source2,
   712  					},
   713  					SubTaskStatus: []*pb.SubTaskStatus{
   714  						{
   715  							Name: task1,
   716  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{
   717  								UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"this DM-worker doesn't receive any shard DDL of this group"}}},
   718  							}},
   719  						},
   720  						{
   721  							Name:   task2,
   722  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{}},
   723  						},
   724  					},
   725  				},
   726  			},
   727  		},
   728  		// test won't interfere not sync status
   729  		{
   730  			[]pessimism.Info{
   731  				{
   732  					Task:   task1,
   733  					Source: source1,
   734  					Schema: "db",
   735  					Table:  "tbl",
   736  				},
   737  			},
   738  			[]*pb.QueryStatusResponse{
   739  				{
   740  					SourceStatus: &pb.SourceStatus{
   741  						Source: source1,
   742  					},
   743  					SubTaskStatus: []*pb.SubTaskStatus{
   744  						{
   745  							Name: task1,
   746  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{
   747  								UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}},
   748  							}},
   749  						},
   750  					},
   751  				}, {
   752  					SourceStatus: &pb.SourceStatus{
   753  						Source: source2,
   754  					},
   755  					SubTaskStatus: []*pb.SubTaskStatus{
   756  						{
   757  							Name:   task1,
   758  							Status: &pb.SubTaskStatus_Load{Load: &pb.LoadStatus{}},
   759  						},
   760  					},
   761  				},
   762  			},
   763  			[]*pb.QueryStatusResponse{
   764  				{
   765  					SourceStatus: &pb.SourceStatus{
   766  						Source: source1,
   767  					},
   768  					SubTaskStatus: []*pb.SubTaskStatus{
   769  						{
   770  							Name: task1,
   771  							Status: &pb.SubTaskStatus_Sync{Sync: &pb.SyncStatus{
   772  								UnresolvedGroups: []*pb.ShardingGroup{{Target: "`db`.`tbl`", Unsynced: []string{"table1"}}},
   773  							}},
   774  						},
   775  					},
   776  				}, {
   777  					SourceStatus: &pb.SourceStatus{
   778  						Source: source2,
   779  					},
   780  					SubTaskStatus: []*pb.SubTaskStatus{
   781  						{
   782  							Name:   task1,
   783  							Status: &pb.SubTaskStatus_Load{Load: &pb.LoadStatus{}},
   784  						},
   785  					},
   786  				},
   787  			},
   788  		},
   789  	}
   790  
   791  	// test pessimistic mode
   792  	for _, ca := range cases {
   793  		s := &Server{}
   794  		s.pessimist = shardddl.NewPessimist(&logger, func(task string) []string { return sources })
   795  		require.NoError(t.T(), s.pessimist.Start(context.Background(), t.etcdTestCli))
   796  		for _, i := range ca.infos {
   797  			_, err := pessimism.PutInfo(t.etcdTestCli, i)
   798  			require.NoError(t.T(), err)
   799  		}
   800  		if len(ca.infos) > 0 {
   801  			utils.WaitSomething(20, 100*time.Millisecond, func() bool {
   802  				return len(s.pessimist.ShowLocks("", nil)) > 0
   803  			})
   804  		}
   805  
   806  		s.fillUnsyncedStatus(ca.input)
   807  		require.Equal(t.T(), ca.expected, ca.input)
   808  		_, err := pessimism.DeleteInfosOperations(t.etcdTestCli, ca.infos, nil)
   809  		require.NoError(t.T(), err)
   810  	}
   811  }
   812  
   813  func (t *testMasterSuite) TestCheckTask() {
   814  	ctrl := gomock.NewController(t.T())
   815  	defer ctrl.Finish()
   816  
   817  	server := testDefaultMasterServer(t.T())
   818  	sources, workers := defaultWorkerSource()
   819  
   820  	t.workerClients = makeNilWorkerClients(workers)
   821  	var wg sync.WaitGroup
   822  	ctx, cancel := context.WithCancel(context.Background())
   823  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients)
   824  	mock := conn.InitVersionDB()
   825  	defer func() {
   826  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
   827  	}()
   828  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
   829  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
   830  	resp, err := server.CheckTask(context.Background(), &pb.CheckTaskRequest{
   831  		Task: taskConfig,
   832  	})
   833  	require.NoError(t.T(), err)
   834  	require.True(t.T(), resp.Result)
   835  
   836  	// decode task with error
   837  	resp, err = server.CheckTask(context.Background(), &pb.CheckTaskRequest{
   838  		Task: "invalid toml config",
   839  	})
   840  	require.NoError(t.T(), err)
   841  	require.False(t.T(), resp.Result)
   842  	t.clearSchedulerEnv(cancel, &wg)
   843  
   844  	// simulate invalid password returned from scheduler, but config was supported plaintext mysql password, so cfg.SubTaskConfigs will success
   845  	ctx, cancel = context.WithCancel(context.Background())
   846  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "invalid-encrypt-password", t.workerClients)
   847  	mock = conn.InitVersionDB()
   848  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
   849  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
   850  	resp, err = server.CheckTask(context.Background(), &pb.CheckTaskRequest{
   851  		Task: taskConfig,
   852  	})
   853  	require.NoError(t.T(), err)
   854  	require.True(t.T(), resp.Result)
   855  	t.clearSchedulerEnv(cancel, &wg)
   856  }
   857  
   858  func (t *testMasterSuite) TestStartTask() {
   859  	ctrl := gomock.NewController(t.T())
   860  	defer ctrl.Finish()
   861  
   862  	server := testDefaultMasterServer(t.T())
   863  	server.etcdClient = t.etcdTestCli
   864  	sources, workers := defaultWorkerSource()
   865  
   866  	// s.generateSubTask with error
   867  	resp, err := server.StartTask(context.Background(), &pb.StartTaskRequest{
   868  		Task: "invalid toml config",
   869  	})
   870  	require.NoError(t.T(), err)
   871  	require.False(t.T(), resp.Result)
   872  
   873  	// test start task successfully
   874  	var wg sync.WaitGroup
   875  	// taskName is relative to taskConfig
   876  	taskName := "test"
   877  	ctx, cancel := context.WithCancel(context.Background())
   878  	req := &pb.StartTaskRequest{
   879  		Task:    taskConfig,
   880  		Sources: sources,
   881  	}
   882  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "",
   883  		makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req))
   884  	mock := conn.InitVersionDB()
   885  	defer func() {
   886  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
   887  	}()
   888  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
   889  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
   890  	resp, err = server.StartTask(context.Background(), req)
   891  	require.NoError(t.T(), err)
   892  	require.True(t.T(), resp.Result)
   893  	for _, source := range sources {
   894  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
   895  		tcm, _, err2 := ha.GetSubTaskCfg(t.etcdTestCli, source, taskName, 0)
   896  		require.NoError(t.T(), err2)
   897  		require.Contains(t.T(), tcm, taskName)
   898  		require.Equal(t.T(), taskName, tcm[taskName].Name)
   899  		require.Equal(t.T(), source, tcm[taskName].SourceID)
   900  	}
   901  
   902  	// check start-task with an invalid source
   903  	invalidSource := "invalid-source"
   904  	mock = conn.InitVersionDB()
   905  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
   906  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
   907  	resp, err = server.StartTask(context.Background(), &pb.StartTaskRequest{
   908  		Task:    taskConfig,
   909  		Sources: []string{invalidSource},
   910  	})
   911  	require.NoError(t.T(), err)
   912  	require.False(t.T(), resp.Result)
   913  	require.Len(t.T(), resp.Sources, 1)
   914  	require.False(t.T(), resp.Sources[0].Result)
   915  	require.Equal(t.T(), invalidSource, resp.Sources[0].Source)
   916  
   917  	// test start task, but the first step check-task fails
   918  	bakCheckSyncConfigFunc := checker.CheckSyncConfigFunc
   919  	checker.CheckSyncConfigFunc = func(_ context.Context, _ []*config.SubTaskConfig, _, _ int64) (string, error) {
   920  		return "", errors.New(errCheckSyncConfig)
   921  	}
   922  	defer func() {
   923  		checker.CheckSyncConfigFunc = bakCheckSyncConfigFunc
   924  	}()
   925  	mock = conn.InitVersionDB()
   926  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
   927  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
   928  	resp, err = server.StartTask(context.Background(), &pb.StartTaskRequest{
   929  		Task:    taskConfig,
   930  		Sources: sources,
   931  	})
   932  	require.NoError(t.T(), err)
   933  	require.False(t.T(), resp.Result)
   934  	require.Regexp(t.T(), errCheckSyncConfigReg, resp.CheckResult)
   935  	t.clearSchedulerEnv(cancel, &wg)
   936  }
   937  
   938  func (t *testMasterSuite) TestStartTaskWithRemoveMeta() {
   939  	ctrl := gomock.NewController(t.T())
   940  	defer ctrl.Finish()
   941  
   942  	server := testDefaultMasterServer(t.T())
   943  	sources, workers := defaultWorkerSource()
   944  	server.etcdClient = t.etcdTestCli
   945  
   946  	// test start task successfully
   947  	var wg sync.WaitGroup
   948  	// taskName is relative to taskConfig
   949  	cfg := config.NewTaskConfig()
   950  	err := cfg.FromYaml(taskConfig)
   951  	require.NoError(t.T(), err)
   952  	taskName := cfg.Name
   953  	ctx, cancel := context.WithCancel(context.Background())
   954  	logger := log.L()
   955  
   956  	// test remove meta with pessimist
   957  	cfg.ShardMode = config.ShardPessimistic
   958  	req := &pb.StartTaskRequest{
   959  		Task:       strings.ReplaceAll(taskConfig, `shard-mode: ""`, fmt.Sprintf(`shard-mode: "%s"`, cfg.ShardMode)),
   960  		Sources:    sources,
   961  		RemoveMeta: true,
   962  	}
   963  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "",
   964  		makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req))
   965  	server.pessimist = shardddl.NewPessimist(&logger, func(task string) []string { return sources })
   966  	server.optimist = shardddl.NewOptimist(&logger, server.scheduler.GetDownstreamMetaByTask)
   967  
   968  	var (
   969  		DDLs          = []string{"ALTER TABLE bar ADD COLUMN c1 INT"}
   970  		schema, table = "foo", "bar"
   971  		ID            = fmt.Sprintf("%s-`%s`.`%s`", taskName, schema, table)
   972  		i11           = pessimism.NewInfo(taskName, sources[0], schema, table, DDLs)
   973  		op2           = pessimism.NewOperation(ID, taskName, sources[0], DDLs, true, false)
   974  	)
   975  	_, err = pessimism.PutInfo(t.etcdTestCli, i11)
   976  	require.NoError(t.T(), err)
   977  	_, succ, err := pessimism.PutOperations(t.etcdTestCli, false, op2)
   978  	require.True(t.T(), succ)
   979  	require.NoError(t.T(), err)
   980  
   981  	require.NoError(t.T(), server.pessimist.Start(ctx, t.etcdTestCli))
   982  	require.NoError(t.T(), server.optimist.Start(ctx, t.etcdTestCli))
   983  
   984  	verMock := conn.InitVersionDB()
   985  	defer func() {
   986  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
   987  	}()
   988  	verMock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
   989  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
   990  	mock, err := conn.MockDefaultDBProvider()
   991  	require.NoError(t.T(), err)
   992  	mock.ExpectBegin()
   993  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LoaderCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
   994  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LightningCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
   995  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
   996  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerShardMeta(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
   997  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerOnlineDDL(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
   998  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
   999  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorPendingChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1000  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorErrorChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1001  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorTableStatus(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1002  	mock.ExpectExec(fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", loader.GetTaskInfoSchemaName(cfg.MetaSchema, cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1003  	mock.ExpectCommit()
  1004  	require.Greater(t.T(), len(server.pessimist.Locks()), 0)
  1005  
  1006  	resp, err := server.StartTask(context.Background(), req)
  1007  	wg.Add(1)
  1008  	go func() {
  1009  		defer wg.Done()
  1010  		time.Sleep(10 * time.Microsecond)
  1011  		// start another same task at the same time, should get err
  1012  		verMock2 := conn.InitVersionDB()
  1013  		verMock2.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  1014  			AddRow("version", "5.7.25-TiDB-v4.0.2"))
  1015  		resp1, err1 := server.StartTask(context.Background(), req)
  1016  		require.NoError(t.T(), err1)
  1017  		require.False(t.T(), resp1.Result)
  1018  		require.Equal(t.T(), terror.Annotate(terror.ErrSchedulerSubTaskExist.Generate(cfg.Name, sources),
  1019  			"while remove-meta is true").Error(), resp1.Msg)
  1020  	}()
  1021  	require.NoError(t.T(), err)
  1022  	require.True(t.T(), resp.Result, "start task failed: %s", resp.Msg)
  1023  	for _, source := range sources {
  1024  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  1025  		tcm, _, err2 := ha.GetSubTaskCfg(t.etcdTestCli, source, taskName, 0)
  1026  		require.NoError(t.T(), err2)
  1027  		require.Contains(t.T(), tcm, taskName)
  1028  		require.Equal(t.T(), taskName, tcm[taskName].Name)
  1029  		require.Equal(t.T(), source, tcm[taskName].SourceID)
  1030  	}
  1031  
  1032  	require.Len(t.T(), server.pessimist.Locks(), 0)
  1033  	require.NoError(t.T(), mock.ExpectationsWereMet())
  1034  	ifm, _, err := pessimism.GetAllInfo(t.etcdTestCli)
  1035  	require.NoError(t.T(), err)
  1036  	require.Len(t.T(), ifm, 0)
  1037  	opm, _, err := pessimism.GetAllOperations(t.etcdTestCli)
  1038  	require.NoError(t.T(), err)
  1039  	require.Len(t.T(), opm, 0)
  1040  	t.clearSchedulerEnv(cancel, &wg)
  1041  
  1042  	// test remove meta with optimist
  1043  	ctx, cancel = context.WithCancel(context.Background())
  1044  	cfg.ShardMode = config.ShardOptimistic
  1045  	req = &pb.StartTaskRequest{
  1046  		Task:       strings.ReplaceAll(taskConfig, `shard-mode: ""`, fmt.Sprintf(`shard-mode: "%s"`, cfg.ShardMode)),
  1047  		Sources:    sources,
  1048  		RemoveMeta: true,
  1049  	}
  1050  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "",
  1051  		makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req))
  1052  	server.pessimist = shardddl.NewPessimist(&logger, func(task string) []string { return sources })
  1053  	server.optimist = shardddl.NewOptimist(&logger, server.scheduler.GetDownstreamMetaByTask)
  1054  
  1055  	var (
  1056  		p           = parser.New()
  1057  		se          = tidbmock.NewContext()
  1058  		tblID int64 = 111
  1059  
  1060  		st1      = optimism.NewSourceTables(taskName, sources[0])
  1061  		DDLs1    = []string{"ALTER TABLE bar ADD COLUMN c1 INT"}
  1062  		tiBefore = createTableInfo(t.T(), p, se, tblID, `CREATE TABLE bar (id INT PRIMARY KEY)`)
  1063  		tiAfter1 = createTableInfo(t.T(), p, se, tblID, `CREATE TABLE bar (id INT PRIMARY KEY, c1 TEXT)`)
  1064  		info1    = optimism.NewInfo(taskName, sources[0], "foo-1", "bar-1", schema, table, DDLs1, tiBefore, []*model.TableInfo{tiAfter1})
  1065  		op1      = optimism.NewOperation(ID, taskName, sources[0], info1.UpSchema, info1.UpTable, DDLs1, optimism.ConflictNone, "", false, []string{})
  1066  	)
  1067  
  1068  	st1.AddTable("foo-1", "bar-1", schema, table)
  1069  	_, err = optimism.PutSourceTables(t.etcdTestCli, st1)
  1070  	require.NoError(t.T(), err)
  1071  	_, err = optimism.PutInfo(t.etcdTestCli, info1)
  1072  	require.NoError(t.T(), err)
  1073  	_, succ, err = optimism.PutOperation(t.etcdTestCli, false, op1, 0)
  1074  	require.True(t.T(), succ)
  1075  	require.NoError(t.T(), err)
  1076  
  1077  	err = server.pessimist.Start(ctx, t.etcdTestCli)
  1078  	require.NoError(t.T(), err)
  1079  	err = server.optimist.Start(ctx, t.etcdTestCli)
  1080  	require.NoError(t.T(), err)
  1081  
  1082  	verMock = conn.InitVersionDB()
  1083  	verMock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  1084  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
  1085  	mock, err = conn.MockDefaultDBProvider()
  1086  	require.NoError(t.T(), err)
  1087  	mock.ExpectBegin()
  1088  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LoaderCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1089  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.LightningCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1090  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1091  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerShardMeta(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1092  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.SyncerOnlineDDL(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1093  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorCheckpoint(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1094  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorPendingChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1095  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorErrorChange(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1096  	mock.ExpectExec(fmt.Sprintf("DROP TABLE IF EXISTS `%s`.`%s`", cfg.MetaSchema, cputil.ValidatorTableStatus(cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1097  	mock.ExpectExec(fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", loader.GetTaskInfoSchemaName(cfg.MetaSchema, cfg.Name))).WillReturnResult(sqlmock.NewResult(1, 1))
  1098  	mock.ExpectCommit()
  1099  	require.Greater(t.T(), len(server.optimist.Locks()), 0)
  1100  
  1101  	resp, err = server.StartTask(context.Background(), req)
  1102  	wg.Add(1)
  1103  	go func() {
  1104  		defer wg.Done()
  1105  		time.Sleep(10 * time.Microsecond)
  1106  		// start another same task at the same time, should get err
  1107  		vermock2 := conn.InitVersionDB()
  1108  		vermock2.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  1109  			AddRow("version", "5.7.25-TiDB-v4.0.2"))
  1110  		resp1, err1 := server.StartTask(context.Background(), req)
  1111  		require.NoError(t.T(), err1)
  1112  		require.False(t.T(), resp1.Result)
  1113  		require.Equal(t.T(), terror.Annotate(terror.ErrSchedulerSubTaskExist.Generate(cfg.Name, sources),
  1114  			"while remove-meta is true").Error(), resp1.Msg)
  1115  	}()
  1116  	require.NoError(t.T(), err)
  1117  	require.True(t.T(), resp.Result)
  1118  	for _, source := range sources {
  1119  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  1120  		tcm, _, err2 := ha.GetSubTaskCfg(t.etcdTestCli, source, taskName, 0)
  1121  		require.NoError(t.T(), err2)
  1122  		require.Contains(t.T(), tcm, taskName)
  1123  		require.Equal(t.T(), taskName, tcm[taskName].Name)
  1124  		require.Equal(t.T(), source, tcm[taskName].SourceID)
  1125  	}
  1126  
  1127  	require.Len(t.T(), server.optimist.Locks(), 0)
  1128  	require.NoError(t.T(), mock.ExpectationsWereMet())
  1129  	ifm2, _, err := optimism.GetAllInfo(t.etcdTestCli)
  1130  	require.NoError(t.T(), err)
  1131  	require.Len(t.T(), ifm2, 0)
  1132  	opm2, _, err := optimism.GetAllOperations(t.etcdTestCli)
  1133  	require.NoError(t.T(), err)
  1134  	require.Len(t.T(), opm2, 0)
  1135  	tbm, _, err := optimism.GetAllSourceTables(t.etcdTestCli)
  1136  	require.NoError(t.T(), err)
  1137  	require.Len(t.T(), tbm, 0)
  1138  
  1139  	t.clearSchedulerEnv(cancel, &wg)
  1140  }
  1141  
  1142  func (t *testMasterSuite) TestOperateTask() {
  1143  	var (
  1144  		taskName = "unit-test-task"
  1145  		pauseOp  = pb.TaskOp_Pause
  1146  	)
  1147  
  1148  	ctrl := gomock.NewController(t.T())
  1149  	defer ctrl.Finish()
  1150  	server := testDefaultMasterServer(t.T())
  1151  	server.etcdClient = t.etcdTestCli
  1152  	sources, workers := defaultWorkerSource()
  1153  
  1154  	// test operate-task with invalid task name
  1155  	resp, err := server.OperateTask(context.Background(), &pb.OperateTaskRequest{
  1156  		Op:   pauseOp,
  1157  		Name: taskName,
  1158  	})
  1159  	require.NoError(t.T(), err)
  1160  	require.False(t.T(), resp.Result)
  1161  	require.Equal(t.T(), fmt.Sprintf("task %s has no source or not exist, please check the task name and status", taskName), resp.Msg)
  1162  
  1163  	// 1. start task
  1164  	taskName = "test"
  1165  	var wg sync.WaitGroup
  1166  	ctx, cancel := context.WithCancel(context.Background())
  1167  	startReq := &pb.StartTaskRequest{
  1168  		Task:    taskConfig,
  1169  		Sources: sources,
  1170  	}
  1171  	pauseReq := &pb.OperateTaskRequest{
  1172  		Op:   pauseOp,
  1173  		Name: taskName,
  1174  	}
  1175  	resumeReq := &pb.OperateTaskRequest{
  1176  		Op:   pb.TaskOp_Resume,
  1177  		Name: taskName,
  1178  	}
  1179  	stopReq1 := &pb.OperateTaskRequest{
  1180  		Op:      pb.TaskOp_Delete,
  1181  		Name:    taskName,
  1182  		Sources: []string{sources[0]},
  1183  	}
  1184  	stopReq2 := &pb.OperateTaskRequest{
  1185  		Op:   pb.TaskOp_Delete,
  1186  		Name: taskName,
  1187  	}
  1188  	sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}}
  1189  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "",
  1190  		makeWorkerClientsForHandle(ctrl, taskName, sources, workers, startReq, pauseReq, resumeReq, stopReq1, stopReq2))
  1191  	mock := conn.InitVersionDB()
  1192  	defer func() {
  1193  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
  1194  	}()
  1195  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  1196  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
  1197  	stResp, err := server.StartTask(context.Background(), startReq)
  1198  	require.NoError(t.T(), err)
  1199  	require.True(t.T(), stResp.Result)
  1200  	for _, source := range sources {
  1201  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  1202  	}
  1203  
  1204  	require.Equal(t.T(), sourceResps, stResp.Sources)
  1205  	// 2. pause task
  1206  	resp, err = server.OperateTask(context.Background(), pauseReq)
  1207  	require.NoError(t.T(), err)
  1208  	require.True(t.T(), resp.Result)
  1209  	for _, source := range sources {
  1210  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Paused)
  1211  	}
  1212  
  1213  	require.Equal(t.T(), sourceResps, resp.Sources)
  1214  	// 3. resume task
  1215  	resp, err = server.OperateTask(context.Background(), resumeReq)
  1216  	require.NoError(t.T(), err)
  1217  	require.True(t.T(), resp.Result)
  1218  	for _, source := range sources {
  1219  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  1220  	}
  1221  	require.Equal(t.T(), sourceResps, resp.Sources)
  1222  	// 4. test stop task successfully, remove partial sources
  1223  	resp, err = server.OperateTask(context.Background(), stopReq1)
  1224  	require.NoError(t.T(), err)
  1225  	require.True(t.T(), resp.Result)
  1226  	require.Equal(t.T(), []string{sources[1]}, server.getTaskSourceNameList(taskName))
  1227  	require.Equal(t.T(), []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}}, resp.Sources)
  1228  	// 5. test stop task successfully, remove all workers
  1229  	resp, err = server.OperateTask(context.Background(), stopReq2)
  1230  	require.NoError(t.T(), err)
  1231  	require.True(t.T(), resp.Result)
  1232  	require.Len(t.T(), server.getTaskSourceNameList(taskName), 0)
  1233  	require.Equal(t.T(), []*pb.CommonWorkerResponse{{Result: true, Source: sources[1]}}, resp.Sources)
  1234  	t.clearSchedulerEnv(cancel, &wg)
  1235  }
  1236  
  1237  func (t *testMasterSuite) TestPurgeWorkerRelay() {
  1238  	ctrl := gomock.NewController(t.T())
  1239  	defer ctrl.Finish()
  1240  
  1241  	server := testDefaultMasterServer(t.T())
  1242  	sources, workers := defaultWorkerSource()
  1243  	var (
  1244  		now      = time.Now().Unix()
  1245  		filename = "mysql-bin.000005"
  1246  	)
  1247  
  1248  	// mock PurgeRelay request
  1249  	mockPurgeRelay := func(rpcSuccess bool) {
  1250  		for i, worker := range workers {
  1251  			rets := []interface{}{
  1252  				nil,
  1253  				errors.New(errGRPCFailed),
  1254  			}
  1255  			if rpcSuccess {
  1256  				rets = []interface{}{
  1257  					&pb.CommonWorkerResponse{
  1258  						Result: true,
  1259  						Source: sources[i],
  1260  					},
  1261  					nil,
  1262  				}
  1263  			}
  1264  			mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
  1265  			mockWorkerClient.EXPECT().PurgeRelay(
  1266  				gomock.Any(),
  1267  				&pb.PurgeRelayRequest{
  1268  					Time:     now,
  1269  					Filename: filename,
  1270  				},
  1271  			).Return(rets...)
  1272  			t.workerClients[worker] = newMockRPCClient(mockWorkerClient)
  1273  		}
  1274  	}
  1275  
  1276  	var wg sync.WaitGroup
  1277  	ctx, cancel := context.WithCancel(context.Background())
  1278  	server.scheduler, _ = t.testMockSchedulerForRelay(ctx, &wg, nil, nil, "", t.workerClients)
  1279  
  1280  	// test PurgeWorkerRelay with invalid dm-worker[s]
  1281  	resp, err := server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{
  1282  		Sources:  []string{"invalid-source1", "invalid-source2"},
  1283  		Time:     now,
  1284  		Filename: filename,
  1285  	})
  1286  	require.NoError(t.T(), err)
  1287  	require.True(t.T(), resp.Result)
  1288  	require.Len(t.T(), resp.Sources, 2)
  1289  	for _, w := range resp.Sources {
  1290  		require.False(t.T(), w.Result)
  1291  		require.Regexp(t.T(), "relay worker for source .* not found.*", w.Msg)
  1292  	}
  1293  	t.clearSchedulerEnv(cancel, &wg)
  1294  
  1295  	ctx, cancel = context.WithCancel(context.Background())
  1296  	// test PurgeWorkerRelay successfully
  1297  	mockPurgeRelay(true)
  1298  	server.scheduler, _ = t.testMockSchedulerForRelay(ctx, &wg, sources, workers, "", t.workerClients)
  1299  	resp, err = server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{
  1300  		Sources:  sources,
  1301  		Time:     now,
  1302  		Filename: filename,
  1303  	})
  1304  	require.NoError(t.T(), err)
  1305  	require.True(t.T(), resp.Result)
  1306  	require.Len(t.T(), resp.Sources, 2)
  1307  	for _, w := range resp.Sources {
  1308  		require.True(t.T(), w.Result)
  1309  	}
  1310  	t.clearSchedulerEnv(cancel, &wg)
  1311  
  1312  	ctx, cancel = context.WithCancel(context.Background())
  1313  	// test PurgeWorkerRelay with error response
  1314  	mockPurgeRelay(false)
  1315  	server.scheduler, _ = t.testMockSchedulerForRelay(ctx, &wg, sources, workers, "", t.workerClients)
  1316  	resp, err = server.PurgeWorkerRelay(context.Background(), &pb.PurgeWorkerRelayRequest{
  1317  		Sources:  sources,
  1318  		Time:     now,
  1319  		Filename: filename,
  1320  	})
  1321  	require.NoError(t.T(), err)
  1322  	require.True(t.T(), resp.Result)
  1323  	require.Len(t.T(), resp.Sources, 2)
  1324  	for _, w := range resp.Sources {
  1325  		require.False(t.T(), w.Result)
  1326  		require.Regexp(t.T(), errGRPCFailedReg, w.Msg)
  1327  	}
  1328  	t.clearSchedulerEnv(cancel, &wg)
  1329  }
  1330  
  1331  func (t *testMasterSuite) TestOperateWorkerRelayTask() {
  1332  	ctrl := gomock.NewController(t.T())
  1333  	defer ctrl.Finish()
  1334  
  1335  	server := testDefaultMasterServer(t.T())
  1336  	sources, workers := defaultWorkerSource()
  1337  	var wg sync.WaitGroup
  1338  	ctx, cancel := context.WithCancel(context.Background())
  1339  	pauseReq := &pb.OperateWorkerRelayRequest{
  1340  		Sources: sources,
  1341  		Op:      pb.RelayOp_PauseRelay,
  1342  	}
  1343  	resumeReq := &pb.OperateWorkerRelayRequest{
  1344  		Sources: sources,
  1345  		Op:      pb.RelayOp_ResumeRelay,
  1346  	}
  1347  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "",
  1348  		makeWorkerClientsForHandle(ctrl, "", sources, workers, pauseReq, resumeReq))
  1349  
  1350  	// test OperateWorkerRelayTask with invalid dm-worker[s]
  1351  	resp, err := server.OperateWorkerRelayTask(context.Background(), &pb.OperateWorkerRelayRequest{
  1352  		Sources: []string{"invalid-source1", "invalid-source2"},
  1353  		Op:      pb.RelayOp_PauseRelay,
  1354  	})
  1355  	require.NoError(t.T(), err)
  1356  	require.False(t.T(), resp.Result)
  1357  	require.Contains(t.T(), resp.Msg, "need to update expectant relay stage not exist")
  1358  
  1359  	sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}}
  1360  	// 1. test pause-relay successfully
  1361  	resp, err = server.OperateWorkerRelayTask(context.Background(), pauseReq)
  1362  	require.NoError(t.T(), err)
  1363  	require.True(t.T(), resp.Result)
  1364  	for _, source := range sources {
  1365  		t.relayStageMatch(server.scheduler, source, pb.Stage_Paused)
  1366  	}
  1367  	require.Equal(t.T(), sourceResps, resp.Sources)
  1368  	// 2. test resume-relay successfully
  1369  	resp, err = server.OperateWorkerRelayTask(context.Background(), resumeReq)
  1370  	require.NoError(t.T(), err)
  1371  	require.True(t.T(), resp.Result)
  1372  	for _, source := range sources {
  1373  		t.relayStageMatch(server.scheduler, source, pb.Stage_Running)
  1374  	}
  1375  	require.Equal(t.T(), sourceResps, resp.Sources)
  1376  	t.clearSchedulerEnv(cancel, &wg)
  1377  }
  1378  
  1379  func (t *testMasterSuite) TestServer() {
  1380  	var err error
  1381  	cfg := NewConfig()
  1382  	require.NoError(t.T(), cfg.FromContent(SampleConfig))
  1383  	cfg.PeerUrls = "http://127.0.0.1:8294"
  1384  	cfg.DataDir = t.T().TempDir()
  1385  	cfg.MasterAddr = tempurl.Alloc()[len("http://"):]
  1386  	cfg.AdvertiseAddr = cfg.MasterAddr
  1387  
  1388  	basicServiceCheck := func(cfg *Config) {
  1389  		t.testHTTPInterface(fmt.Sprintf("http://%s/status", cfg.AdvertiseAddr), []byte(version.GetRawInfo()))
  1390  		t.testHTTPInterface(fmt.Sprintf("http://%s/debug/pprof/", cfg.AdvertiseAddr), []byte("Types of profiles available"))
  1391  		// HTTP API in this unit test is unstable, but we test it in `http_apis` in integration test.
  1392  		// t.testHTTPInterface( fmt.Sprintf("http://%s/apis/v1alpha1/status/test-task", cfg.AdvertiseAddr), []byte("task test-task has no source or not exist"))
  1393  	}
  1394  	t.testNormalServerLifecycle(cfg, func(cfg *Config) {
  1395  		basicServiceCheck(cfg)
  1396  
  1397  		// try to start another server with the same address.  Expect it to fail
  1398  		// unset an etcd variable because it will cause checking on exit, and block forever
  1399  		err = os.Unsetenv(verify.ENV_VERIFY)
  1400  		require.NoError(t.T(), err)
  1401  
  1402  		dupServer := NewServer(cfg)
  1403  		ctx, cancel := context.WithCancel(context.Background())
  1404  		defer cancel()
  1405  		err1 := dupServer.Start(ctx)
  1406  		require.True(t.T(), terror.ErrMasterStartEmbedEtcdFail.Equal(err1))
  1407  		require.Contains(t.T(), err1.Error(), "bind: address already in use")
  1408  
  1409  		err = os.Setenv(verify.ENV_VERIFY, verify.ENV_VERIFY_ALL_VALUE)
  1410  		require.NoError(t.T(), err)
  1411  	})
  1412  
  1413  	// test the listen address is 0.0.0.0
  1414  	masterAddrStr := tempurl.Alloc()[len("http://"):]
  1415  	_, masterPort, err := net.SplitHostPort(masterAddrStr)
  1416  	require.NoError(t.T(), err)
  1417  	cfg2 := NewConfig()
  1418  	*cfg2 = *cfg
  1419  	cfg2.MasterAddr = fmt.Sprintf("0.0.0.0:%s", masterPort)
  1420  	cfg2.AdvertiseAddr = masterAddrStr
  1421  	t.testNormalServerLifecycle(cfg2, basicServiceCheck)
  1422  }
  1423  
  1424  func (t *testMasterSuite) TestMasterTLS() {
  1425  	var err error
  1426  	masterAddr := tempurl.Alloc()[len("http://"):]
  1427  	peerAddr := tempurl.Alloc()[len("http://"):]
  1428  	_, masterPort, err := net.SplitHostPort(masterAddr)
  1429  	require.NoError(t.T(), err)
  1430  	_, peerPort, err := net.SplitHostPort(peerAddr)
  1431  	require.NoError(t.T(), err)
  1432  
  1433  	caPath := pwd + "/tls_for_test/ca.pem"
  1434  	certPath := pwd + "/tls_for_test/dm.pem"
  1435  	keyPath := pwd + "/tls_for_test/dm.key"
  1436  
  1437  	// all with `https://` prefix
  1438  	cfg := NewConfig()
  1439  	err = cfg.Parse([]string{
  1440  		"--name=master-tls",
  1441  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1442  		fmt.Sprintf("--master-addr=https://%s", masterAddr),
  1443  		fmt.Sprintf("--advertise-addr=https://%s", masterAddr),
  1444  		fmt.Sprintf("--peer-urls=https://%s", peerAddr),
  1445  		fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr),
  1446  		fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr),
  1447  		"--ssl-ca=" + caPath,
  1448  		"--ssl-cert=" + certPath,
  1449  		"--ssl-key=" + keyPath,
  1450  	})
  1451  	require.NoError(t.T(), err)
  1452  	t.testTLSPrefix(cfg)
  1453  	require.Equal(t.T(), masterAddr, cfg.MasterAddr)
  1454  	require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr)
  1455  	require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls)
  1456  	require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls)
  1457  	require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster)
  1458  
  1459  	// no `https://` prefix for `--master-addr`
  1460  	cfg = NewConfig()
  1461  	err = cfg.Parse([]string{
  1462  		"--name=master-tls",
  1463  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1464  		fmt.Sprintf("--master-addr=%s", masterAddr),
  1465  		fmt.Sprintf("--advertise-addr=https://%s", masterAddr),
  1466  		fmt.Sprintf("--peer-urls=https://%s", peerAddr),
  1467  		fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr),
  1468  		fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr),
  1469  		"--ssl-ca=" + caPath,
  1470  		"--ssl-cert=" + certPath,
  1471  		"--ssl-key=" + keyPath,
  1472  	})
  1473  	require.NoError(t.T(), err)
  1474  	t.testTLSPrefix(cfg)
  1475  
  1476  	// no `https://` prefix for `--master-addr` and `--advertise-addr`
  1477  	cfg = NewConfig()
  1478  	err = cfg.Parse([]string{
  1479  		"--name=master-tls",
  1480  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1481  		fmt.Sprintf("--master-addr=%s", masterAddr),
  1482  		fmt.Sprintf("--advertise-addr=%s", masterAddr),
  1483  		fmt.Sprintf("--peer-urls=https://%s", peerAddr),
  1484  		fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr),
  1485  		fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr),
  1486  		"--ssl-ca=" + caPath,
  1487  		"--ssl-cert=" + certPath,
  1488  		"--ssl-key=" + keyPath,
  1489  	})
  1490  	require.NoError(t.T(), err)
  1491  	t.testTLSPrefix(cfg)
  1492  
  1493  	// no `https://` prefix for `--master-addr`, `--advertise-addr` and `--peer-urls`
  1494  	cfg = NewConfig()
  1495  	err = cfg.Parse([]string{
  1496  		"--name=master-tls",
  1497  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1498  		fmt.Sprintf("--master-addr=%s", masterAddr),
  1499  		fmt.Sprintf("--advertise-addr=%s", masterAddr),
  1500  		fmt.Sprintf("--peer-urls=%s", peerAddr),
  1501  		fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr),
  1502  		fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr),
  1503  		"--ssl-ca=" + caPath,
  1504  		"--ssl-cert=" + certPath,
  1505  		"--ssl-key=" + keyPath,
  1506  	})
  1507  	require.NoError(t.T(), err)
  1508  	t.testTLSPrefix(cfg)
  1509  
  1510  	// no `https://` prefix for `--master-addr`, `--advertise-addr`, `--peer-urls` and `--advertise-peer-urls`
  1511  	cfg = NewConfig()
  1512  	err = cfg.Parse([]string{
  1513  		"--name=master-tls",
  1514  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1515  		fmt.Sprintf("--master-addr=%s", masterAddr),
  1516  		fmt.Sprintf("--advertise-addr=%s", masterAddr),
  1517  		fmt.Sprintf("--peer-urls=%s", peerAddr),
  1518  		fmt.Sprintf("--advertise-peer-urls=%s", peerAddr),
  1519  		fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr),
  1520  		"--ssl-ca=" + caPath,
  1521  		"--ssl-cert=" + certPath,
  1522  		"--ssl-key=" + keyPath,
  1523  	})
  1524  	require.NoError(t.T(), err)
  1525  	t.testTLSPrefix(cfg)
  1526  
  1527  	// all without `https://`/`http://` prefix
  1528  	cfg = NewConfig()
  1529  	err = cfg.Parse([]string{
  1530  		"--name=master-tls",
  1531  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1532  		fmt.Sprintf("--master-addr=%s", masterAddr),
  1533  		fmt.Sprintf("--advertise-addr=%s", masterAddr),
  1534  		fmt.Sprintf("--peer-urls=%s", peerAddr),
  1535  		fmt.Sprintf("--advertise-peer-urls=%s", peerAddr),
  1536  		fmt.Sprintf("--initial-cluster=master-tls=%s", peerAddr),
  1537  		"--ssl-ca=" + caPath,
  1538  		"--ssl-cert=" + certPath,
  1539  		"--ssl-key=" + keyPath,
  1540  	})
  1541  	require.NoError(t.T(), err)
  1542  	t.testTLSPrefix(cfg)
  1543  	require.Equal(t.T(), masterAddr, cfg.MasterAddr)
  1544  	require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr)
  1545  	require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls)
  1546  	require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls)
  1547  	require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster)
  1548  
  1549  	// all with `http://` prefix, but with TLS enabled.
  1550  	cfg = NewConfig()
  1551  	err = cfg.Parse([]string{
  1552  		"--name=master-tls",
  1553  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1554  		fmt.Sprintf("--master-addr=http://%s", masterAddr),
  1555  		fmt.Sprintf("--advertise-addr=http://%s", masterAddr),
  1556  		fmt.Sprintf("--peer-urls=http://%s", peerAddr),
  1557  		fmt.Sprintf("--advertise-peer-urls=http://%s", peerAddr),
  1558  		fmt.Sprintf("--initial-cluster=master-tls=http://%s", peerAddr),
  1559  		"--ssl-ca=" + caPath,
  1560  		"--ssl-cert=" + certPath,
  1561  		"--ssl-key=" + keyPath,
  1562  	})
  1563  	require.NoError(t.T(), err)
  1564  	require.Equal(t.T(), masterAddr, cfg.MasterAddr)
  1565  	require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr)
  1566  	require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls)
  1567  	require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls)
  1568  	require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster)
  1569  
  1570  	// different prefix for `--peer-urls` and `--initial-cluster`
  1571  	cfg = NewConfig()
  1572  	err = cfg.Parse([]string{
  1573  		"--name=master-tls",
  1574  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1575  		fmt.Sprintf("--master-addr=https://%s", masterAddr),
  1576  		fmt.Sprintf("--advertise-addr=https://%s", masterAddr),
  1577  		fmt.Sprintf("--peer-urls=https://%s", peerAddr),
  1578  		fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr),
  1579  		fmt.Sprintf("--initial-cluster=master-tls=http://%s", peerAddr),
  1580  		"--ssl-ca=" + caPath,
  1581  		"--ssl-cert=" + certPath,
  1582  		"--ssl-key=" + keyPath,
  1583  	})
  1584  	require.NoError(t.T(), err)
  1585  	require.Equal(t.T(), masterAddr, cfg.MasterAddr)
  1586  	require.Equal(t.T(), masterAddr, cfg.AdvertiseAddr)
  1587  	require.Equal(t.T(), "https://"+peerAddr, cfg.PeerUrls)
  1588  	require.Equal(t.T(), "https://"+peerAddr, cfg.AdvertisePeerUrls)
  1589  	require.Equal(t.T(), "master-tls=https://"+peerAddr, cfg.InitialCluster)
  1590  	t.testTLSPrefix(cfg)
  1591  
  1592  	// listen address set to 0.0.0.0
  1593  	cfg = NewConfig()
  1594  	err = cfg.Parse([]string{
  1595  		"--name=master-tls",
  1596  		fmt.Sprintf("--data-dir=%s", t.T().TempDir()),
  1597  		fmt.Sprintf("--master-addr=0.0.0.0:%s", masterPort),
  1598  		fmt.Sprintf("--advertise-addr=https://%s", masterAddr),
  1599  		fmt.Sprintf("--peer-urls=0.0.0.0:%s", peerPort),
  1600  		fmt.Sprintf("--advertise-peer-urls=https://%s", peerAddr),
  1601  		fmt.Sprintf("--initial-cluster=master-tls=https://%s", peerAddr),
  1602  		"--ssl-ca=" + caPath,
  1603  		"--ssl-cert=" + certPath,
  1604  		"--ssl-key=" + keyPath,
  1605  	})
  1606  	require.NoError(t.T(), err)
  1607  	t.testTLSPrefix(cfg)
  1608  }
  1609  
  1610  func (t *testMasterSuite) testTLSPrefix(cfg *Config) {
  1611  	t.testNormalServerLifecycle(cfg, func(cfg *Config) {
  1612  		t.testHTTPInterface(fmt.Sprintf("https://%s/status", cfg.AdvertiseAddr), []byte(version.GetRawInfo()))
  1613  		t.testHTTPInterface(fmt.Sprintf("https://%s/debug/pprof/", cfg.AdvertiseAddr), []byte("Types of profiles available"))
  1614  	})
  1615  }
  1616  
  1617  func (t *testMasterSuite) testNormalServerLifecycle(cfg *Config, checkLogic func(*Config)) {
  1618  	var err error
  1619  	s := NewServer(cfg)
  1620  
  1621  	ctx, cancel := context.WithCancel(context.Background())
  1622  	err = s.Start(ctx)
  1623  	require.NoError(t.T(), err)
  1624  
  1625  	checkLogic(cfg)
  1626  
  1627  	// close
  1628  	cancel()
  1629  	s.Close()
  1630  
  1631  	require.Eventually(t.T(), func() bool {
  1632  		return s.closed.Load()
  1633  	}, 3*time.Second, 100*time.Millisecond)
  1634  }
  1635  
  1636  func (t *testMasterSuite) testHTTPInterface(url string, contain []byte) {
  1637  	// we use HTTPS in some test cases.
  1638  	tlsConfig, err := toolutils.NewTLSConfig(
  1639  		toolutils.WithCAPath(pwd+"/tls_for_test/ca.pem"),
  1640  		toolutils.WithCertAndKeyPath(pwd+"/tls_for_test/dm.pem", pwd+"/tls_for_test/dm.key"),
  1641  	)
  1642  	require.NoError(t.T(), err)
  1643  	cli := toolutils.ClientWithTLS(tlsConfig)
  1644  
  1645  	// nolint:noctx
  1646  	resp, err := cli.Get(url)
  1647  	require.NoError(t.T(), err)
  1648  	defer resp.Body.Close()
  1649  	require.Equal(t.T(), http.StatusOK, resp.StatusCode)
  1650  
  1651  	body, err := io.ReadAll(resp.Body)
  1652  	require.NoError(t.T(), err)
  1653  	require.True(t.T(), bytes.Contains(body, contain))
  1654  }
  1655  
  1656  func (t *testMasterSuite) TestJoinMember() {
  1657  	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
  1658  
  1659  	// create a new cluster
  1660  	cfg1 := NewConfig()
  1661  	require.NoError(t.T(), cfg1.FromContent(SampleConfig))
  1662  	cfg1.Name = "dm-master-1"
  1663  	cfg1.DataDir = t.T().TempDir()
  1664  	cfg1.MasterAddr = tempurl.Alloc()[len("http://"):]
  1665  	cfg1.AdvertiseAddr = cfg1.MasterAddr
  1666  	cfg1.PeerUrls = tempurl.Alloc()
  1667  	cfg1.AdvertisePeerUrls = cfg1.PeerUrls
  1668  	cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls)
  1669  
  1670  	s1 := NewServer(cfg1)
  1671  	require.NoError(t.T(), s1.Start(ctx))
  1672  	defer s1.Close()
  1673  
  1674  	// wait the first one become the leader
  1675  	require.Eventually(t.T(), func() bool {
  1676  		return s1.election.IsLeader()
  1677  	}, 3*time.Second, 100*time.Millisecond)
  1678  
  1679  	// join to an existing cluster
  1680  	cfg2 := NewConfig()
  1681  	require.NoError(t.T(), cfg2.FromContent(SampleConfig))
  1682  	cfg2.Name = "dm-master-2"
  1683  	cfg2.DataDir = t.T().TempDir()
  1684  	cfg2.MasterAddr = tempurl.Alloc()[len("http://"):]
  1685  	cfg2.AdvertiseAddr = cfg2.MasterAddr
  1686  	cfg2.PeerUrls = tempurl.Alloc()
  1687  	cfg2.AdvertisePeerUrls = cfg2.PeerUrls
  1688  	cfg2.Join = cfg1.MasterAddr // join to an existing cluster
  1689  
  1690  	s2 := NewServer(cfg2)
  1691  	require.NoError(t.T(), s2.Start(ctx))
  1692  	defer s2.Close()
  1693  
  1694  	client, err := etcdutil.CreateClient(strings.Split(cfg1.AdvertisePeerUrls, ","), nil)
  1695  	require.NoError(t.T(), err)
  1696  	defer client.Close()
  1697  
  1698  	// verify members
  1699  	listResp, err := etcdutil.ListMembers(client)
  1700  	require.NoError(t.T(), err)
  1701  	require.Len(t.T(), listResp.Members, 2)
  1702  	names := make(map[string]struct{}, len(listResp.Members))
  1703  	for _, m := range listResp.Members {
  1704  		names[m.Name] = struct{}{}
  1705  	}
  1706  	require.Contains(t.T(), names, cfg1.Name)
  1707  	require.Contains(t.T(), names, cfg2.Name)
  1708  
  1709  	// s1 is still the leader
  1710  	_, leaderID, _, err := s2.election.LeaderInfo(ctx)
  1711  
  1712  	require.NoError(t.T(), err)
  1713  	require.Equal(t.T(), leaderID, cfg1.Name)
  1714  
  1715  	cfg3 := NewConfig()
  1716  	require.NoError(t.T(), cfg3.FromContent(SampleConfig))
  1717  	cfg3.Name = "dm-master-3"
  1718  	cfg3.DataDir = t.T().TempDir()
  1719  	cfg3.MasterAddr = tempurl.Alloc()[len("http://"):]
  1720  	cfg3.AdvertiseAddr = cfg3.MasterAddr
  1721  	cfg3.PeerUrls = tempurl.Alloc()
  1722  	cfg3.AdvertisePeerUrls = cfg3.PeerUrls
  1723  	cfg3.Join = cfg1.MasterAddr // join to an existing cluster
  1724  
  1725  	// mock join master without wal dir
  1726  	require.NoError(t.T(), os.Mkdir(filepath.Join(cfg3.DataDir, "member"), privateDirMode))
  1727  	require.NoError(t.T(), os.Mkdir(filepath.Join(cfg3.DataDir, "member", "join"), privateDirMode))
  1728  	s3 := NewServer(cfg3)
  1729  	// avoid join a unhealthy cluster
  1730  	require.Eventually(t.T(), func() bool {
  1731  		return s3.Start(ctx) == nil
  1732  	}, 30*time.Second, time.Second)
  1733  	defer s3.Close()
  1734  
  1735  	// verify members
  1736  	listResp, err = etcdutil.ListMembers(client)
  1737  	require.NoError(t.T(), err)
  1738  	require.Len(t.T(), listResp.Members, 3)
  1739  	names = make(map[string]struct{}, len(listResp.Members))
  1740  	for _, m := range listResp.Members {
  1741  		names[m.Name] = struct{}{}
  1742  	}
  1743  	require.Contains(t.T(), names, cfg1.Name)
  1744  	require.Contains(t.T(), names, cfg2.Name)
  1745  	require.Contains(t.T(), names, cfg3.Name)
  1746  
  1747  	cancel()
  1748  	t.clearEtcdEnv()
  1749  }
  1750  
  1751  func (t *testMasterSuite) TestOperateSource() {
  1752  	ctx, cancel := context.WithCancel(context.Background())
  1753  	defer cancel()
  1754  	ctrl := gomock.NewController(t.T())
  1755  	defer ctrl.Finish()
  1756  
  1757  	// create a new cluster
  1758  	cfg1 := NewConfig()
  1759  	require.NoError(t.T(), cfg1.FromContent(SampleConfig))
  1760  	cfg1.Name = "dm-master-1"
  1761  	cfg1.DataDir = t.T().TempDir()
  1762  	cfg1.MasterAddr = tempurl.Alloc()[len("http://"):]
  1763  	cfg1.AdvertiseAddr = cfg1.MasterAddr
  1764  	cfg1.PeerUrls = tempurl.Alloc()
  1765  	cfg1.AdvertisePeerUrls = cfg1.PeerUrls
  1766  	cfg1.InitialCluster = fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls)
  1767  
  1768  	s1 := NewServer(cfg1)
  1769  	s1.leader.Store(oneselfLeader)
  1770  	require.NoError(t.T(), s1.Start(ctx))
  1771  	defer s1.Close()
  1772  	mysqlCfg, err := config.SourceCfgFromYamlAndVerify(config.SampleSourceConfig)
  1773  	require.NoError(t.T(), err)
  1774  	mysqlCfg.From.Password = os.Getenv("MYSQL_PSWD")
  1775  	task, err := mysqlCfg.Yaml()
  1776  	require.NoError(t.T(), err)
  1777  	sourceID := mysqlCfg.SourceID
  1778  	// 1. wait for scheduler to start
  1779  	time.Sleep(3 * time.Second)
  1780  
  1781  	// 2. try to add a new mysql source
  1782  	req := &pb.OperateSourceRequest{Op: pb.SourceOp_StartSource, Config: []string{task}}
  1783  	resp, err := s1.OperateSource(ctx, req)
  1784  	require.NoError(t.T(), err)
  1785  	require.True(t.T(), resp.Result)
  1786  	require.Equal(t.T(), []*pb.CommonWorkerResponse{{
  1787  		Result: true,
  1788  		Msg:    "source is added but there is no free worker to bound",
  1789  		Source: sourceID,
  1790  	}}, resp.Sources)
  1791  	unBoundSources := s1.scheduler.UnboundSources()
  1792  	require.Len(t.T(), unBoundSources, 1)
  1793  	require.Equal(t.T(), sourceID, unBoundSources[0])
  1794  
  1795  	// 3. try to add multiple source
  1796  	// 3.1 duplicated source id
  1797  	sourceID2 := "mysql-replica-02"
  1798  	mysqlCfg.SourceID = sourceID2
  1799  	task2, err := mysqlCfg.Yaml()
  1800  	require.NoError(t.T(), err)
  1801  	req = &pb.OperateSourceRequest{Op: pb.SourceOp_StartSource, Config: []string{task2, task2}}
  1802  	resp, err = s1.OperateSource(ctx, req)
  1803  	require.NoError(t.T(), err)
  1804  	require.False(t.T(), resp.Result)
  1805  	require.Contains(t.T(), resp.Msg, "source config with ID "+sourceID2+" already exists")
  1806  	// 3.2 run same command after correction
  1807  	sourceID3 := "mysql-replica-03"
  1808  	mysqlCfg.SourceID = sourceID3
  1809  	task3, err := mysqlCfg.Yaml()
  1810  	require.NoError(t.T(), err)
  1811  	req = &pb.OperateSourceRequest{Op: pb.SourceOp_StartSource, Config: []string{task2, task3}}
  1812  	resp, err = s1.OperateSource(ctx, req)
  1813  	require.NoError(t.T(), err)
  1814  	require.True(t.T(), resp.Result)
  1815  	sort.Slice(resp.Sources, func(i, j int) bool {
  1816  		return resp.Sources[i].Source < resp.Sources[j].Source
  1817  	})
  1818  	require.Equal(t.T(), []*pb.CommonWorkerResponse{{
  1819  		Result: true,
  1820  		Msg:    "source is added but there is no free worker to bound",
  1821  		Source: sourceID2,
  1822  	}, {
  1823  		Result: true,
  1824  		Msg:    "source is added but there is no free worker to bound",
  1825  		Source: sourceID3,
  1826  	}}, resp.Sources)
  1827  	unBoundSources = s1.scheduler.UnboundSources()
  1828  	require.Len(t.T(), unBoundSources, 3)
  1829  	require.Equal(t.T(), sourceID, unBoundSources[0])
  1830  	require.Equal(t.T(), sourceID2, unBoundSources[1])
  1831  	require.Equal(t.T(), sourceID3, unBoundSources[2])
  1832  
  1833  	// 4. try to stop a non-exist-source
  1834  	req.Op = pb.SourceOp_StopSource
  1835  	mysqlCfg.SourceID = "not-exist-source"
  1836  	task4, err := mysqlCfg.Yaml()
  1837  	require.NoError(t.T(), err)
  1838  	req.Config = []string{task4}
  1839  	resp, err = s1.OperateSource(ctx, req)
  1840  	require.NoError(t.T(), err)
  1841  	require.False(t.T(), resp.Result)
  1842  	require.Contains(t.T(), resp.Msg, "source config with ID "+mysqlCfg.SourceID+" not exists")
  1843  
  1844  	// 5. start workers, the unbound sources should be bound
  1845  	var wg sync.WaitGroup
  1846  	workerName1 := "worker1"
  1847  	workerName2 := "worker2"
  1848  	workerName3 := "worker3"
  1849  	defer func() {
  1850  		t.clearSchedulerEnv(cancel, &wg)
  1851  	}()
  1852  	require.NoError(t.T(), s1.scheduler.AddWorker(workerName1, "172.16.10.72:8262"))
  1853  	wg.Add(1)
  1854  	go func(ctx context.Context, workerName string) {
  1855  		defer wg.Done()
  1856  		require.NoError(t.T(), ha.KeepAlive(ctx, s1.etcdClient, workerName, keepAliveTTL))
  1857  	}(ctx, workerName1)
  1858  	require.NoError(t.T(), s1.scheduler.AddWorker(workerName2, "172.16.10.72:8263"))
  1859  	wg.Add(1)
  1860  	go func(ctx context.Context, workerName string) {
  1861  		defer wg.Done()
  1862  		require.NoError(t.T(), ha.KeepAlive(ctx, s1.etcdClient, workerName, keepAliveTTL))
  1863  	}(ctx, workerName2)
  1864  	require.NoError(t.T(), s1.scheduler.AddWorker(workerName3, "172.16.10.72:8264"))
  1865  	wg.Add(1)
  1866  	go func(ctx context.Context, workerName string) {
  1867  		defer wg.Done()
  1868  		require.NoError(t.T(), ha.KeepAlive(ctx, s1.etcdClient, workerName, keepAliveTTL))
  1869  	}(ctx, workerName3)
  1870  	require.Eventually(t.T(), func() bool {
  1871  		w := s1.scheduler.GetWorkerBySource(sourceID)
  1872  		return w != nil
  1873  	}, 3*time.Second, 100*time.Millisecond)
  1874  
  1875  	// 6. stop sources
  1876  	req.Config = []string{task, task2, task3}
  1877  	req.Op = pb.SourceOp_StopSource
  1878  
  1879  	mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
  1880  	mockRevelantWorkerClient(mockWorkerClient, "", sourceID, req)
  1881  	s1.scheduler.SetWorkerClientForTest(workerName1, newMockRPCClient(mockWorkerClient))
  1882  	mockWorkerClient2 := pbmock.NewMockWorkerClient(ctrl)
  1883  	mockRevelantWorkerClient(mockWorkerClient2, "", sourceID2, req)
  1884  	s1.scheduler.SetWorkerClientForTest(workerName2, newMockRPCClient(mockWorkerClient2))
  1885  	mockWorkerClient3 := pbmock.NewMockWorkerClient(ctrl)
  1886  	mockRevelantWorkerClient(mockWorkerClient3, "", sourceID3, req)
  1887  	s1.scheduler.SetWorkerClientForTest(workerName3, newMockRPCClient(mockWorkerClient3))
  1888  	resp, err = s1.OperateSource(ctx, req)
  1889  	require.NoError(t.T(), err)
  1890  	require.True(t.T(), resp.Result)
  1891  	require.Equal(t.T(), []*pb.CommonWorkerResponse{{
  1892  		Result: true,
  1893  		Source: sourceID,
  1894  	}, {
  1895  		Result: true,
  1896  		Source: sourceID2,
  1897  	}, {
  1898  		Result: true,
  1899  		Source: sourceID3,
  1900  	}}, resp.Sources)
  1901  	scm, _, err := ha.GetSourceCfg(t.etcdTestCli, sourceID, 0)
  1902  	require.NoError(t.T(), err)
  1903  	require.Len(t.T(), scm, 0)
  1904  	t.clearSchedulerEnv(cancel, &wg)
  1905  
  1906  	cancel()
  1907  }
  1908  
  1909  func (t *testMasterSuite) TestOfflineMember() {
  1910  	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
  1911  
  1912  	cfg1 := generateServerConfig(t.T(), "dm-master-1")
  1913  	cfg2 := generateServerConfig(t.T(), "dm-master-2")
  1914  	cfg3 := generateServerConfig(t.T(), "dm-master-3")
  1915  
  1916  	initialCluster := fmt.Sprintf("%s=%s", cfg1.Name, cfg1.AdvertisePeerUrls) + "," +
  1917  		fmt.Sprintf("%s=%s", cfg2.Name, cfg2.AdvertisePeerUrls) + "," +
  1918  		fmt.Sprintf("%s=%s", cfg3.Name, cfg3.AdvertisePeerUrls)
  1919  	cfg1.InitialCluster = initialCluster
  1920  	cfg2.InitialCluster = initialCluster
  1921  	cfg3.InitialCluster = initialCluster
  1922  
  1923  	var wg sync.WaitGroup
  1924  	s1 := NewServer(cfg1)
  1925  	defer func() {
  1926  		cancel()
  1927  		s1.Close()
  1928  	}()
  1929  	wg.Add(1)
  1930  	go func() {
  1931  		require.NoError(t.T(), s1.Start(ctx))
  1932  		wg.Done()
  1933  	}()
  1934  
  1935  	s2 := NewServer(cfg2)
  1936  	defer func() {
  1937  		cancel()
  1938  		s2.Close()
  1939  	}()
  1940  	wg.Add(1)
  1941  	go func() {
  1942  		require.NoError(t.T(), s2.Start(ctx))
  1943  		wg.Done()
  1944  	}()
  1945  
  1946  	ctx3, cancel3 := context.WithCancel(ctx)
  1947  	s3 := NewServer(cfg3)
  1948  	require.NoError(t.T(), s3.Start(ctx3))
  1949  	defer func() {
  1950  		cancel3()
  1951  		s3.Close()
  1952  	}()
  1953  
  1954  	wg.Wait()
  1955  
  1956  	var leaderID string
  1957  	// ensure s2 has got the right leader info, because it will be used to `OfflineMember`.
  1958  	require.Eventually(t.T(), func() bool {
  1959  		s2.RLock()
  1960  		leader := s2.leader.Load()
  1961  		s2.RUnlock()
  1962  		if leader == "" {
  1963  			return false
  1964  		}
  1965  		if leader == oneselfLeader {
  1966  			leaderID = s2.cfg.Name
  1967  		} else {
  1968  			leaderID = s2.leader.Load()
  1969  		}
  1970  		return true
  1971  	}, 3*time.Second, 100*time.Millisecond)
  1972  
  1973  	// master related operations
  1974  	req := &pb.OfflineMemberRequest{
  1975  		Type: "masters",
  1976  		Name: "xixi",
  1977  	}
  1978  	// test offline member with wrong type
  1979  	resp, err := s2.OfflineMember(ctx, req)
  1980  	require.NoError(t.T(), err)
  1981  	require.False(t.T(), resp.Result)
  1982  	require.Contains(t.T(), resp.Msg, terror.ErrMasterInvalidOfflineType.Generate(req.Type).Error())
  1983  	// test offline member with invalid master name
  1984  	req.Type = common.Master
  1985  	resp, err = s2.OfflineMember(ctx, req)
  1986  	require.NoError(t.T(), err)
  1987  	require.False(t.T(), resp.Result)
  1988  	require.Contains(t.T(), resp.Msg, `dm-master with name `+req.Name+` not exists`)
  1989  	// test offline member with correct master name
  1990  	cli := s2.etcdClient
  1991  	listResp, err := etcdutil.ListMembers(cli)
  1992  	require.NoError(t.T(), err)
  1993  	require.Len(t.T(), listResp.Members, 3)
  1994  
  1995  	// make sure s3 is not the leader, otherwise it will take some time to campaign a new leader after close s3, and it may cause timeout
  1996  	require.Eventually(t.T(), func() bool {
  1997  		_, leaderID, _, err = s1.election.LeaderInfo(ctx)
  1998  		if err != nil {
  1999  			return false
  2000  		}
  2001  
  2002  		if leaderID == s3.cfg.Name {
  2003  			_, err = s3.OperateLeader(ctx, &pb.OperateLeaderRequest{
  2004  				Op: pb.LeaderOp_EvictLeaderOp,
  2005  			})
  2006  			require.NoError(t.T(), err)
  2007  		}
  2008  		return leaderID != s3.cfg.Name
  2009  	}, 10*time.Second, 500*time.Millisecond)
  2010  
  2011  	cancel3()
  2012  	s3.Close()
  2013  
  2014  	req.Name = s3.cfg.Name
  2015  	resp, err = s2.OfflineMember(ctx, req)
  2016  	require.NoError(t.T(), err)
  2017  	require.Equal(t.T(), "", resp.Msg)
  2018  	require.True(t.T(), resp.Result)
  2019  
  2020  	listResp, err = etcdutil.ListMembers(cli)
  2021  	require.NoError(t.T(), err)
  2022  	require.Len(t.T(), listResp.Members, 2)
  2023  	if listResp.Members[0].Name == cfg2.Name {
  2024  		listResp.Members[0], listResp.Members[1] = listResp.Members[1], listResp.Members[0]
  2025  	}
  2026  	require.Equal(t.T(), cfg1.Name, listResp.Members[0].Name)
  2027  	require.Equal(t.T(), cfg2.Name, listResp.Members[1].Name)
  2028  
  2029  	_, leaderID2, _, err := s1.election.LeaderInfo(ctx)
  2030  	require.NoError(t.T(), err)
  2031  
  2032  	if leaderID == cfg3.Name {
  2033  		// s3 is leader before, leader should re-campaign
  2034  		require.False(t.T(), leaderID != leaderID2)
  2035  	} else {
  2036  		// s3 isn't leader before, leader should keep the same
  2037  		require.Equal(t.T(), leaderID, leaderID2)
  2038  	}
  2039  
  2040  	// worker related operations
  2041  	ectx, canc := context.WithTimeout(ctx, time.Second)
  2042  	defer canc()
  2043  	req1 := &pb.RegisterWorkerRequest{
  2044  		Name:    "xixi",
  2045  		Address: "127.0.0.1:1000",
  2046  	}
  2047  	regReq, err := s1.RegisterWorker(ectx, req1)
  2048  	require.NoError(t.T(), err)
  2049  	require.True(t.T(), regReq.Result)
  2050  
  2051  	req2 := &pb.OfflineMemberRequest{
  2052  		Type: common.Worker,
  2053  		Name: "haha",
  2054  	}
  2055  	{
  2056  		res, err := s1.OfflineMember(ectx, req2)
  2057  		require.NoError(t.T(), err)
  2058  		require.False(t.T(), res.Result)
  2059  		require.Contains(t.T(), res.Msg, `dm-worker with name `+req2.Name+` not exists`)
  2060  	}
  2061  	{
  2062  		req2.Name = "xixi"
  2063  		res, err := s1.OfflineMember(ectx, req2)
  2064  		require.NoError(t.T(), err)
  2065  		require.True(t.T(), res.Result)
  2066  	}
  2067  	{
  2068  		// register offline worker again. TICASE-962, 963
  2069  		resp, err := s1.RegisterWorker(ectx, req1)
  2070  		require.NoError(t.T(), err)
  2071  		require.True(t.T(), resp.Result)
  2072  	}
  2073  	t.clearSchedulerEnv(cancel, &wg)
  2074  }
  2075  
  2076  func (t *testMasterSuite) TestGetCfg() {
  2077  	ctrl := gomock.NewController(t.T())
  2078  	defer ctrl.Finish()
  2079  
  2080  	server := testDefaultMasterServer(t.T())
  2081  	sources, workers := defaultWorkerSource()
  2082  
  2083  	var wg sync.WaitGroup
  2084  	taskName := "test"
  2085  	ctx, cancel := context.WithCancel(context.Background())
  2086  	req := &pb.StartTaskRequest{
  2087  		Task:    taskConfig,
  2088  		Sources: sources,
  2089  	}
  2090  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "",
  2091  		makeWorkerClientsForHandle(ctrl, taskName, sources, workers, req))
  2092  	server.etcdClient = t.etcdTestCli
  2093  
  2094  	// start task
  2095  	mock := conn.InitVersionDB()
  2096  	defer func() {
  2097  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
  2098  	}()
  2099  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  2100  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
  2101  	resp, err := server.StartTask(context.Background(), req)
  2102  	require.NoError(t.T(), err)
  2103  	require.True(t.T(), resp.Result)
  2104  
  2105  	// get task config
  2106  	req1 := &pb.GetCfgRequest{
  2107  		Name: taskName,
  2108  		Type: pb.CfgType_TaskType,
  2109  	}
  2110  	resp1, err := server.GetCfg(context.Background(), req1)
  2111  	require.NoError(t.T(), err)
  2112  	require.True(t.T(), resp1.Result)
  2113  	require.Contains(t.T(), resp1.Cfg, "name: test")
  2114  
  2115  	// not exist task name
  2116  	taskName2 := "wrong"
  2117  	req2 := &pb.GetCfgRequest{
  2118  		Name: taskName2,
  2119  		Type: pb.CfgType_TaskType,
  2120  	}
  2121  	resp2, err := server.GetCfg(context.Background(), req2)
  2122  	require.NoError(t.T(), err)
  2123  	require.False(t.T(), resp2.Result)
  2124  	require.Contains(t.T(), resp2.Msg, "task not found")
  2125  
  2126  	// generate a template named `wrong`, test get this task template
  2127  	openapiTask, err := fixtures.GenNoShardOpenAPITaskForTest()
  2128  	require.NoError(t.T(), err)
  2129  	openapiTask.Name = taskName2
  2130  	require.NoError(t.T(), ha.PutOpenAPITaskTemplate(t.etcdTestCli, openapiTask, true))
  2131  	require.NoError(t.T(), failpoint.Enable("github.com/pingcap/tiflow/dm/master/MockSkipAdjustTargetDB", `return(true)`))
  2132  	resp2, err = server.GetCfg(context.Background(), &pb.GetCfgRequest{Name: taskName2, Type: pb.CfgType_TaskTemplateType})
  2133  	require.NoError(t.T(), failpoint.Disable("github.com/pingcap/tiflow/dm/master/MockSkipAdjustTargetDB"))
  2134  	require.NoError(t.T(), err)
  2135  	require.True(t.T(), resp2.Result)
  2136  	require.Contains(t.T(), resp2.Cfg, "name: "+taskName2)
  2137  
  2138  	// test restart master
  2139  	server.scheduler.Close()
  2140  	require.NoError(t.T(), server.scheduler.Start(ctx, t.etcdTestCli))
  2141  
  2142  	resp3, err := server.GetCfg(context.Background(), req1)
  2143  	require.NoError(t.T(), err)
  2144  	require.True(t.T(), resp3.Result)
  2145  	require.Equal(t.T(), resp1.Cfg, resp3.Cfg)
  2146  
  2147  	req3 := &pb.GetCfgRequest{
  2148  		Name: "dm-master",
  2149  		Type: pb.CfgType_MasterType,
  2150  	}
  2151  	resp4, err := server.GetCfg(context.Background(), req3)
  2152  	require.NoError(t.T(), err)
  2153  	require.True(t.T(), resp4.Result)
  2154  	require.Contains(t.T(), resp4.Cfg, `name = "dm-master"`)
  2155  
  2156  	req4 := &pb.GetCfgRequest{
  2157  		Name: "haha",
  2158  		Type: pb.CfgType_MasterType,
  2159  	}
  2160  	resp5, err := server.GetCfg(context.Background(), req4)
  2161  	require.NoError(t.T(), err)
  2162  	require.False(t.T(), resp5.Result)
  2163  	require.Contains(t.T(), resp5.Msg, "master not found")
  2164  
  2165  	req5 := &pb.GetCfgRequest{
  2166  		Name: "haha",
  2167  		Type: pb.CfgType_WorkerType,
  2168  	}
  2169  	resp6, err := server.GetCfg(context.Background(), req5)
  2170  	require.NoError(t.T(), err)
  2171  	require.False(t.T(), resp6.Result)
  2172  	require.Contains(t.T(), resp6.Msg, "worker not found")
  2173  
  2174  	req6 := &pb.GetCfgRequest{
  2175  		Name: "mysql-replica-01",
  2176  		Type: pb.CfgType_SourceType,
  2177  	}
  2178  	resp7, err := server.GetCfg(context.Background(), req6)
  2179  	require.NoError(t.T(), err)
  2180  	require.True(t.T(), resp7.Result)
  2181  	require.Contains(t.T(), resp7.Cfg, `source-id: mysql-replica-01`)
  2182  
  2183  	req7 := &pb.GetCfgRequest{
  2184  		Name: "haha",
  2185  		Type: pb.CfgType_SourceType,
  2186  	}
  2187  	resp8, err := server.GetCfg(context.Background(), req7)
  2188  	require.NoError(t.T(), err)
  2189  	require.False(t.T(), resp8.Result)
  2190  	require.Equal(t.T(), resp8.Msg, "source not found")
  2191  
  2192  	t.clearSchedulerEnv(cancel, &wg)
  2193  }
  2194  
  2195  func (t *testMasterSuite) relayStageMatch(s *scheduler.Scheduler, source string, expectStage pb.Stage) {
  2196  	stage := ha.NewRelayStage(expectStage, source)
  2197  	stageDeepEqualExcludeRev(t.T(), s.GetExpectRelayStage(source), stage)
  2198  
  2199  	eStage, _, err := ha.GetRelayStage(t.etcdTestCli, source)
  2200  	require.NoError(t.T(), err)
  2201  	switch expectStage {
  2202  	case pb.Stage_Running, pb.Stage_Paused:
  2203  		stageDeepEqualExcludeRev(t.T(), eStage, stage)
  2204  	}
  2205  }
  2206  
  2207  func (t *testMasterSuite) subTaskStageMatch(s *scheduler.Scheduler, task, source string, expectStage pb.Stage) {
  2208  	stage := ha.NewSubTaskStage(expectStage, source, task)
  2209  	require.Equal(t.T(), s.GetExpectSubTaskStage(task, source), stage)
  2210  
  2211  	eStageM, _, err := ha.GetSubTaskStage(t.etcdTestCli, source, task)
  2212  	require.NoError(t.T(), err)
  2213  	switch expectStage {
  2214  	case pb.Stage_Running, pb.Stage_Paused:
  2215  		require.Len(t.T(), eStageM, 1)
  2216  		stageDeepEqualExcludeRev(t.T(), eStageM[task], stage)
  2217  	default:
  2218  		require.Len(t.T(), eStageM, 0)
  2219  	}
  2220  }
  2221  
  2222  func (t *testMasterSuite) TestGRPCLongResponse() {
  2223  	require.NoError(t.T(), failpoint.Enable("github.com/pingcap/tiflow/dm/master/LongRPCResponse", `return()`))
  2224  	//nolint:errcheck
  2225  	defer failpoint.Disable("github.com/pingcap/tiflow/dm/master/LongRPCResponse")
  2226  	require.NoError(t.T(), failpoint.Enable("github.com/pingcap/tiflow/dm/ctl/common/SkipUpdateMasterClient", `return()`))
  2227  	//nolint:errcheck
  2228  	defer failpoint.Disable("github.com/pingcap/tiflow/dm/ctl/common/SkipUpdateMasterClient")
  2229  
  2230  	masterAddr := tempurl.Alloc()[len("http://"):]
  2231  	lis, err := net.Listen("tcp", masterAddr)
  2232  	require.NoError(t.T(), err)
  2233  	defer lis.Close()
  2234  	server := grpc.NewServer()
  2235  	pb.RegisterMasterServer(server, &Server{})
  2236  	//nolint:errcheck
  2237  	go server.Serve(lis)
  2238  
  2239  	conn, err := grpc.Dial(utils.UnwrapScheme(masterAddr),
  2240  		grpc.WithInsecure(),
  2241  		grpc.WithBlock())
  2242  	require.NoError(t.T(), err)
  2243  	defer conn.Close()
  2244  
  2245  	common.GlobalCtlClient.MasterClient = pb.NewMasterClient(conn)
  2246  	ctx := context.Background()
  2247  	resp := &pb.StartTaskResponse{}
  2248  	err = common.SendRequest(ctx, "StartTask", &pb.StartTaskRequest{}, &resp)
  2249  	require.NoError(t.T(), err)
  2250  }
  2251  
  2252  func (t *testMasterSuite) TestStartStopValidation() {
  2253  	var (
  2254  		wg       sync.WaitGroup
  2255  		taskName = "test"
  2256  	)
  2257  	ctrl := gomock.NewController(t.T())
  2258  	defer ctrl.Finish()
  2259  	server := testDefaultMasterServer(t.T())
  2260  	server.etcdClient = t.etcdTestCli
  2261  	sources, workers := defaultWorkerSource()
  2262  	ctx, cancel := context.WithCancel(context.Background())
  2263  	defer t.clearSchedulerEnv(cancel, &wg)
  2264  	// start task without validation
  2265  	startReq := &pb.StartTaskRequest{
  2266  		Task:    taskConfig,
  2267  		Sources: sources,
  2268  	}
  2269  	sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}}
  2270  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "",
  2271  		makeWorkerClientsForHandle(ctrl, taskName, sources, workers, startReq))
  2272  	mock := conn.InitVersionDB()
  2273  	defer func() {
  2274  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
  2275  	}()
  2276  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  2277  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
  2278  	stResp, err := server.StartTask(context.Background(), startReq)
  2279  	require.NoError(t.T(), err)
  2280  	require.True(t.T(), stResp.Result)
  2281  
  2282  	for _, source := range sources {
  2283  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  2284  	}
  2285  	require.Equal(t.T(), sourceResps, stResp.Sources)
  2286  
  2287  	// (fail) start all validator of the task with explicit but invalid mode
  2288  	validatorStartReq := &pb.StartValidationRequest{
  2289  		Mode:     &pb.StartValidationRequest_ModeValue{ModeValue: "invalid-mode"},
  2290  		TaskName: taskName,
  2291  	}
  2292  	startResp, err := server.StartValidation(context.Background(), validatorStartReq)
  2293  	require.NoError(t.T(), err)
  2294  	require.False(t.T(), startResp.Result)
  2295  	require.Contains(t.T(), startResp.Msg, "validation mode should be either `full` or `fast`")
  2296  	t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage)
  2297  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2298  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "")
  2299  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2300  
  2301  	// (fail) start with explicit but invalid start-time
  2302  	validatorStartReq = &pb.StartValidationRequest{
  2303  		StartTime: &pb.StartValidationRequest_StartTimeValue{StartTimeValue: "xxx"},
  2304  		TaskName:  taskName,
  2305  	}
  2306  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2307  	require.NoError(t.T(), err)
  2308  	require.False(t.T(), startResp.Result)
  2309  	require.Contains(t.T(), startResp.Msg, "start-time should be in the format like")
  2310  	t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage)
  2311  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2312  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "")
  2313  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2314  
  2315  	// (fail) start for non-existed subtask
  2316  	validatorStartReq = &pb.StartValidationRequest{
  2317  		TaskName: "not-exist-name",
  2318  	}
  2319  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2320  	require.NoError(t.T(), err)
  2321  	require.False(t.T(), startResp.Result)
  2322  	require.Contains(t.T(), startResp.Msg, "cannot get subtask by task name")
  2323  	t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage)
  2324  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2325  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "")
  2326  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2327  
  2328  	// (fail) start for non-exist source
  2329  	validatorStartReq = &pb.StartValidationRequest{
  2330  		Sources: []string{"xxx"},
  2331  	}
  2332  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2333  	require.NoError(t.T(), err)
  2334  	require.False(t.T(), startResp.Result)
  2335  	require.Contains(t.T(), startResp.Msg, "cannot get subtask by sources")
  2336  	t.validatorStageMatch(taskName, sources[0], pb.Stage_InvalidStage)
  2337  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2338  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationNone, "")
  2339  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2340  
  2341  	// (success) start validation without explicit mode for source 0
  2342  	validatorStartReq = &pb.StartValidationRequest{
  2343  		TaskName: taskName,
  2344  		Sources:  []string{sources[0]},
  2345  	}
  2346  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2347  	require.NoError(t.T(), err)
  2348  	require.True(t.T(), startResp.Result)
  2349  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2350  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2351  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2352  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2353  
  2354  	// (fail) start all validator with explicit mode
  2355  	validatorStartReq = &pb.StartValidationRequest{
  2356  		Mode:     &pb.StartValidationRequest_ModeValue{ModeValue: config.ValidationFull},
  2357  		TaskName: taskName,
  2358  	}
  2359  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2360  	require.NoError(t.T(), err)
  2361  	require.False(t.T(), startResp.Result)
  2362  	require.Regexp(t.T(), ".*some of target validator.* has already enabled.*", startResp.Msg)
  2363  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2364  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2365  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2366  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2367  
  2368  	// (fail) start validation with explicit mode for source 0 again
  2369  	validatorStartReq = &pb.StartValidationRequest{
  2370  		Mode:     &pb.StartValidationRequest_ModeValue{ModeValue: config.ValidationFull},
  2371  		TaskName: taskName,
  2372  		Sources:  []string{sources[0]},
  2373  	}
  2374  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2375  	require.NoError(t.T(), err)
  2376  	require.False(t.T(), startResp.Result)
  2377  	require.Contains(t.T(), startResp.Msg, "all target validator has enabled, cannot do 'validation start' with explicit mode or start-time")
  2378  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2379  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2380  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2381  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2382  
  2383  	// (fail) start all validator without explicit mode
  2384  	validatorStartReq = &pb.StartValidationRequest{
  2385  		TaskName: taskName,
  2386  	}
  2387  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2388  	require.NoError(t.T(), err)
  2389  	require.False(t.T(), startResp.Result)
  2390  	require.Regexp(t.T(), ".*some of target validator.* has already enabled.*", startResp.Msg)
  2391  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2392  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2393  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2394  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2395  
  2396  	// (fail) stop validator of source 1
  2397  	validatorStopReq := &pb.StopValidationRequest{
  2398  		TaskName: taskName,
  2399  		Sources:  sources[1:],
  2400  	}
  2401  	stopResp, err := server.StopValidation(context.Background(), validatorStopReq)
  2402  	require.NoError(t.T(), err)
  2403  	require.False(t.T(), stopResp.Result)
  2404  	require.Regexp(t.T(), ".*some target validator.* is not enabled.*", stopResp.Msg)
  2405  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2406  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2407  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2408  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2409  
  2410  	// (fail) stop all validator
  2411  	validatorStopReq = &pb.StopValidationRequest{
  2412  		TaskName: taskName,
  2413  	}
  2414  	stopResp, err = server.StopValidation(context.Background(), validatorStopReq)
  2415  	require.NoError(t.T(), err)
  2416  	require.False(t.T(), stopResp.Result)
  2417  	require.Regexp(t.T(), ".*some target validator.* is not enabled.*", stopResp.Msg)
  2418  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2419  	t.validatorStageMatch(taskName, sources[1], pb.Stage_InvalidStage)
  2420  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2421  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationNone, "")
  2422  
  2423  	// (success) start validation with fast mode and start-time for source 1
  2424  	validatorStartReq = &pb.StartValidationRequest{
  2425  		Mode:      &pb.StartValidationRequest_ModeValue{ModeValue: config.ValidationFast},
  2426  		StartTime: &pb.StartValidationRequest_StartTimeValue{StartTimeValue: "2006-01-02 15:04:05"},
  2427  		TaskName:  taskName,
  2428  		Sources:   []string{sources[1]},
  2429  	}
  2430  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2431  	require.NoError(t.T(), err)
  2432  	require.True(t.T(), startResp.Result)
  2433  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2434  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Running)
  2435  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2436  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2437  
  2438  	// now validator of the 2 subtask is enabled(running)
  2439  
  2440  	// (success) start all validator of the task without explicit param again, i.e. resuming
  2441  	validatorStartReq = &pb.StartValidationRequest{
  2442  		TaskName: taskName,
  2443  	}
  2444  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2445  	require.NoError(t.T(), err)
  2446  	require.True(t.T(), startResp.Result)
  2447  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2448  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Running)
  2449  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2450  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2451  
  2452  	// (fail) stop non-existed subtask's validator
  2453  	validatorStopReq = &pb.StopValidationRequest{
  2454  		TaskName: "not-exist-name",
  2455  	}
  2456  	stopResp, err = server.StopValidation(context.Background(), validatorStopReq)
  2457  	require.NoError(t.T(), err)
  2458  	require.False(t.T(), stopResp.Result)
  2459  	require.Contains(t.T(), stopResp.Msg, "cannot get subtask by task name")
  2460  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2461  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Running)
  2462  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2463  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2464  
  2465  	// (fail) stop all task but with non-exist source
  2466  	validatorStopReq = &pb.StopValidationRequest{
  2467  		Sources: []string{"xxx"},
  2468  	}
  2469  	stopResp, err = server.StopValidation(context.Background(), validatorStopReq)
  2470  	require.NoError(t.T(), err)
  2471  	require.False(t.T(), stopResp.Result)
  2472  	require.Contains(t.T(), stopResp.Msg, "cannot get subtask by source")
  2473  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2474  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Running)
  2475  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2476  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2477  
  2478  	// (success) stop validation of source 0
  2479  	validatorStopReq = &pb.StopValidationRequest{
  2480  		TaskName: taskName,
  2481  		Sources:  []string{sources[0]},
  2482  	}
  2483  	stopResp, err = server.StopValidation(context.Background(), validatorStopReq)
  2484  	require.NoError(t.T(), err)
  2485  	require.True(t.T(), stopResp.Result)
  2486  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Stopped)
  2487  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Running)
  2488  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2489  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2490  
  2491  	// (success) stop all
  2492  	validatorStopReq = &pb.StopValidationRequest{
  2493  		TaskName: "",
  2494  	}
  2495  	stopResp, err = server.StopValidation(context.Background(), validatorStopReq)
  2496  	require.NoError(t.T(), err)
  2497  	require.True(t.T(), stopResp.Result)
  2498  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Stopped)
  2499  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Stopped)
  2500  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2501  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2502  
  2503  	// (success) stop all again
  2504  	validatorStopReq = &pb.StopValidationRequest{
  2505  		TaskName: "",
  2506  	}
  2507  	stopResp, err = server.StopValidation(context.Background(), validatorStopReq)
  2508  	require.NoError(t.T(), err)
  2509  	require.True(t.T(), stopResp.Result)
  2510  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Stopped)
  2511  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Stopped)
  2512  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2513  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2514  
  2515  	// (success) start all tasks
  2516  	validatorStartReq = &pb.StartValidationRequest{
  2517  		TaskName: "",
  2518  	}
  2519  	startResp, err = server.StartValidation(context.Background(), validatorStartReq)
  2520  	require.NoError(t.T(), err)
  2521  	require.True(t.T(), startResp.Result)
  2522  	t.validatorStageMatch(taskName, sources[0], pb.Stage_Running)
  2523  	t.validatorStageMatch(taskName, sources[1], pb.Stage_Running)
  2524  	t.validatorModeMatch(server.scheduler, taskName, sources[0], config.ValidationFull, "")
  2525  	t.validatorModeMatch(server.scheduler, taskName, sources[1], config.ValidationFast, "2006-01-02 15:04:05")
  2526  }
  2527  
  2528  //nolint:unparam
  2529  func (t *testMasterSuite) validatorStageMatch(taskName, source string, expectStage pb.Stage) {
  2530  	stage := ha.NewValidatorStage(expectStage, source, taskName)
  2531  
  2532  	stageM, _, err := ha.GetValidatorStage(t.etcdTestCli, source, taskName, 0)
  2533  	require.NoError(t.T(), err)
  2534  	switch expectStage {
  2535  	case pb.Stage_Running, pb.Stage_Stopped:
  2536  		require.Len(t.T(), stageM, 1)
  2537  		stageDeepEqualExcludeRev(t.T(), stageM[taskName], stage)
  2538  	default:
  2539  		require.Len(t.T(), stageM, 0)
  2540  	}
  2541  }
  2542  
  2543  //nolint:unparam
  2544  func (t *testMasterSuite) validatorModeMatch(s *scheduler.Scheduler, task, source string,
  2545  	expectMode, expectedStartTime string,
  2546  ) {
  2547  	cfgs := s.GetSubTaskCfgsByTaskAndSource(task, []string{source})
  2548  	v, ok := cfgs[task]
  2549  	require.True(t.T(), ok)
  2550  	cfg, ok := v[source]
  2551  	require.True(t.T(), ok)
  2552  	require.Equal(t.T(), expectMode, cfg.ValidatorCfg.Mode)
  2553  	require.Equal(t.T(), expectedStartTime, cfg.ValidatorCfg.StartTime)
  2554  }
  2555  
  2556  func (t *testMasterSuite) TestGetValidatorStatus() {
  2557  	var (
  2558  		wg       sync.WaitGroup
  2559  		taskName = "test"
  2560  	)
  2561  	ctrl := gomock.NewController(t.T())
  2562  	defer ctrl.Finish()
  2563  	server := testDefaultMasterServer(t.T())
  2564  	server.etcdClient = t.etcdTestCli
  2565  	sources, workers := defaultWorkerSource()
  2566  	startReq := &pb.StartTaskRequest{
  2567  		Task:    taskConfig,
  2568  		Sources: sources,
  2569  	}
  2570  	// test query all workers
  2571  	for idx, worker := range workers {
  2572  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
  2573  		mockWorkerClient.EXPECT().GetWorkerValidatorStatus(
  2574  			gomock.Any(),
  2575  			gomock.Any(),
  2576  		).Return(&pb.GetValidationStatusResponse{
  2577  			Result: true,
  2578  			TableStatuses: []*pb.ValidationTableStatus{
  2579  				{
  2580  					SrcTable: "tbl1",
  2581  				},
  2582  			},
  2583  		}, nil)
  2584  		mockWorkerClient.EXPECT().GetWorkerValidatorStatus(
  2585  			gomock.Any(),
  2586  			gomock.Any(),
  2587  		).Return(&pb.GetValidationStatusResponse{
  2588  			Result: false,
  2589  			Msg:    "something wrong in worker",
  2590  		}, nil)
  2591  		mockWorkerClient.EXPECT().GetWorkerValidatorStatus(
  2592  			gomock.Any(),
  2593  			gomock.Any(),
  2594  		).Return(&pb.GetValidationStatusResponse{}, errors.New("grpc error"))
  2595  		mockRevelantWorkerClient(mockWorkerClient, taskName, sources[idx], startReq)
  2596  		t.workerClients[worker] = newMockRPCClient(mockWorkerClient)
  2597  	}
  2598  	ctx, cancel := context.WithCancel(context.Background())
  2599  	defer t.clearSchedulerEnv(cancel, &wg)
  2600  	// start task without validation
  2601  	sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}}
  2602  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients)
  2603  	mock := conn.InitVersionDB()
  2604  	defer func() {
  2605  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
  2606  	}()
  2607  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  2608  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
  2609  	stResp, err := server.StartTask(context.Background(), startReq)
  2610  	require.NoError(t.T(), err)
  2611  	require.True(t.T(), stResp.Result)
  2612  
  2613  	for _, source := range sources {
  2614  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  2615  	}
  2616  	require.Equal(t.T(), sourceResps, stResp.Sources)
  2617  	// 1. query existing task's status
  2618  	statusReq := &pb.GetValidationStatusRequest{
  2619  		TaskName: taskName,
  2620  	}
  2621  	resp, err := server.GetValidationStatus(context.Background(), statusReq)
  2622  	require.NoError(t.T(), err)
  2623  	require.Equal(t.T(), "", resp.Msg)
  2624  	require.True(t.T(), resp.Result)
  2625  	require.Equal(t.T(), 2, len(resp.TableStatuses))
  2626  	// 2. query invalid task's status
  2627  	statusReq.TaskName = "invalid-task"
  2628  	resp, err = server.GetValidationStatus(context.Background(), statusReq)
  2629  	require.NoError(t.T(), err)
  2630  	require.Contains(t.T(), resp.Msg, "cannot get subtask by task name")
  2631  	require.False(t.T(), resp.Result)
  2632  	// 3. query invalid stage
  2633  	statusReq.TaskName = taskName
  2634  	statusReq.FilterStatus = pb.Stage_Paused // invalid stage
  2635  	resp, err = server.GetValidationStatus(context.Background(), statusReq)
  2636  	require.NoError(t.T(), err)
  2637  	require.Contains(t.T(), resp.Msg, "filtering stage should be either")
  2638  	require.False(t.T(), resp.Result)
  2639  	// 4. worker error
  2640  	statusReq.FilterStatus = pb.Stage_Running
  2641  	resp, err = server.GetValidationStatus(context.Background(), statusReq)
  2642  	require.NoError(t.T(), err)
  2643  	require.False(t.T(), resp.Result)
  2644  	require.Contains(t.T(), resp.Msg, "something wrong in worker")
  2645  	// 5. grpc error
  2646  	statusReq.FilterStatus = pb.Stage_Running
  2647  	resp, err = server.GetValidationStatus(context.Background(), statusReq)
  2648  	require.NoError(t.T(), err)
  2649  	require.False(t.T(), resp.Result)
  2650  	require.Contains(t.T(), resp.Msg, "grpc error")
  2651  }
  2652  
  2653  func (t *testMasterSuite) TestGetValidationError() {
  2654  	var (
  2655  		wg       sync.WaitGroup
  2656  		taskName = "test"
  2657  	)
  2658  	ctrl := gomock.NewController(t.T())
  2659  	defer ctrl.Finish()
  2660  	server := testDefaultMasterServer(t.T())
  2661  	server.etcdClient = t.etcdTestCli
  2662  	sources, workers := defaultWorkerSource()
  2663  	startReq := &pb.StartTaskRequest{
  2664  		Task:    taskConfig,
  2665  		Sources: sources,
  2666  	}
  2667  	// test query all workers
  2668  	for idx, worker := range workers {
  2669  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
  2670  		mockWorkerClient.EXPECT().GetValidatorError(
  2671  			gomock.Any(),
  2672  			gomock.Any(),
  2673  		).Return(&pb.GetValidationErrorResponse{
  2674  			Result: true,
  2675  			Error: []*pb.ValidationError{
  2676  				{
  2677  					Id: "1",
  2678  				},
  2679  			},
  2680  		}, nil)
  2681  		mockWorkerClient.EXPECT().GetValidatorError(
  2682  			gomock.Any(),
  2683  			gomock.Any(),
  2684  		).Return(&pb.GetValidationErrorResponse{
  2685  			Result: false,
  2686  			Msg:    "something wrong in worker",
  2687  			Error:  []*pb.ValidationError{},
  2688  		}, nil)
  2689  		mockWorkerClient.EXPECT().GetValidatorError(
  2690  			gomock.Any(),
  2691  			gomock.Any(),
  2692  		).Return(&pb.GetValidationErrorResponse{}, errors.New("grpc error"))
  2693  		mockRevelantWorkerClient(mockWorkerClient, taskName, sources[idx], startReq)
  2694  		t.workerClients[worker] = newMockRPCClient(mockWorkerClient)
  2695  	}
  2696  	ctx, cancel := context.WithCancel(context.Background())
  2697  	defer t.clearSchedulerEnv(cancel, &wg)
  2698  	// start task without validation
  2699  	sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}}
  2700  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients)
  2701  	mock := conn.InitVersionDB()
  2702  	defer func() {
  2703  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
  2704  	}()
  2705  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  2706  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
  2707  	stResp, err := server.StartTask(context.Background(), startReq)
  2708  	require.NoError(t.T(), err)
  2709  	require.True(t.T(), stResp.Result)
  2710  
  2711  	for _, source := range sources {
  2712  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  2713  	}
  2714  	require.Equal(t.T(), sourceResps, stResp.Sources)
  2715  	// 1. query existing task's error
  2716  	errReq := &pb.GetValidationErrorRequest{
  2717  		TaskName: taskName,
  2718  		ErrState: pb.ValidateErrorState_InvalidErr,
  2719  	}
  2720  	resp, err := server.GetValidationError(context.Background(), errReq)
  2721  	require.NoError(t.T(), err)
  2722  	require.Equal(t.T(), "", resp.Msg)
  2723  	require.True(t.T(), resp.Result)
  2724  	require.Len(t.T(), resp.Error, 2)
  2725  	// 2. query invalid task's error
  2726  	errReq.TaskName = "invalid-task"
  2727  	resp, err = server.GetValidationError(context.Background(), errReq)
  2728  	require.NoError(t.T(), err)
  2729  	require.Contains(t.T(), resp.Msg, "cannot get subtask by task name")
  2730  	require.False(t.T(), resp.Result)
  2731  	// 3. query invalid state
  2732  	errReq.TaskName = taskName
  2733  	errReq.ErrState = pb.ValidateErrorState_ResolvedErr // invalid state
  2734  	resp, err = server.GetValidationError(context.Background(), errReq)
  2735  	require.NoError(t.T(), err)
  2736  	require.Contains(t.T(), resp.Msg, "only support querying `all`, `unprocessed`, and `ignored` error")
  2737  	require.False(t.T(), resp.Result)
  2738  	// 4. worker error
  2739  	errReq.TaskName = taskName
  2740  	errReq.ErrState = pb.ValidateErrorState_InvalidErr
  2741  	resp, err = server.GetValidationError(context.Background(), errReq)
  2742  	require.NoError(t.T(), err)
  2743  	require.False(t.T(), resp.Result)
  2744  	require.Contains(t.T(), resp.Msg, "something wrong in worker")
  2745  	// 5. grpc error
  2746  	resp, err = server.GetValidationError(context.Background(), errReq)
  2747  	require.NoError(t.T(), err)
  2748  	require.False(t.T(), resp.Result)
  2749  	require.Contains(t.T(), resp.Msg, "grpc error")
  2750  }
  2751  
  2752  func (t *testMasterSuite) TestOperateValidationError() {
  2753  	var (
  2754  		wg       sync.WaitGroup
  2755  		taskName = "test"
  2756  	)
  2757  	ctrl := gomock.NewController(t.T())
  2758  	defer ctrl.Finish()
  2759  	server := testDefaultMasterServer(t.T())
  2760  	server.etcdClient = t.etcdTestCli
  2761  	sources, workers := defaultWorkerSource()
  2762  	startReq := &pb.StartTaskRequest{
  2763  		Task:    taskConfig,
  2764  		Sources: sources,
  2765  	}
  2766  	// test query all workers
  2767  	for idx, worker := range workers {
  2768  		mockWorkerClient := pbmock.NewMockWorkerClient(ctrl)
  2769  		mockWorkerClient.EXPECT().OperateValidatorError(
  2770  			gomock.Any(),
  2771  			gomock.Any(),
  2772  		).Return(&pb.OperateValidationErrorResponse{
  2773  			Result: true,
  2774  			Msg:    "",
  2775  		}, nil)
  2776  		mockWorkerClient.EXPECT().OperateValidatorError(
  2777  			gomock.Any(),
  2778  			gomock.Any(),
  2779  		).Return(&pb.OperateValidationErrorResponse{
  2780  			Result: false,
  2781  			Msg:    "something wrong in worker",
  2782  		}, nil)
  2783  		mockWorkerClient.EXPECT().OperateValidatorError(
  2784  			gomock.Any(),
  2785  			gomock.Any(),
  2786  		).Return(&pb.OperateValidationErrorResponse{}, errors.New("grpc error"))
  2787  		mockRevelantWorkerClient(mockWorkerClient, taskName, sources[idx], startReq)
  2788  		t.workerClients[worker] = newMockRPCClient(mockWorkerClient)
  2789  	}
  2790  	ctx, cancel := context.WithCancel(context.Background())
  2791  	defer t.clearSchedulerEnv(cancel, &wg)
  2792  	// start task without validation
  2793  	sourceResps := []*pb.CommonWorkerResponse{{Result: true, Source: sources[0]}, {Result: true, Source: sources[1]}}
  2794  	server.scheduler, _ = t.testMockScheduler(ctx, &wg, sources, workers, "", t.workerClients)
  2795  	mock := conn.InitVersionDB()
  2796  	defer func() {
  2797  		conn.DefaultDBProvider = &conn.DefaultDBProviderImpl{}
  2798  	}()
  2799  	mock.ExpectQuery("SHOW GLOBAL VARIABLES LIKE 'version'").WillReturnRows(sqlmock.NewRows([]string{"Variable_name", "Value"}).
  2800  		AddRow("version", "5.7.25-TiDB-v4.0.2"))
  2801  	stResp, err := server.StartTask(context.Background(), startReq)
  2802  	require.NoError(t.T(), err)
  2803  	require.True(t.T(), stResp.Result)
  2804  
  2805  	for _, source := range sources {
  2806  		t.subTaskStageMatch(server.scheduler, taskName, source, pb.Stage_Running)
  2807  	}
  2808  	require.Equal(t.T(), sourceResps, stResp.Sources)
  2809  	// 1. query existing task's error
  2810  	opReq := &pb.OperateValidationErrorRequest{
  2811  		TaskName:   taskName,
  2812  		IsAllError: true,
  2813  	}
  2814  	resp, err := server.OperateValidationError(context.Background(), opReq)
  2815  	require.NoError(t.T(), err)
  2816  	require.Equal(t.T(), resp.Msg, "")
  2817  	require.True(t.T(), resp.Result)
  2818  	// 2. query invalid task's error
  2819  	opReq.TaskName = "invalid-task"
  2820  	resp, err = server.OperateValidationError(context.Background(), opReq)
  2821  	require.NoError(t.T(), err)
  2822  	require.Contains(t.T(), resp.Msg, "cannot get subtask by task name")
  2823  	require.False(t.T(), resp.Result)
  2824  	// 3. worker error
  2825  	opReq.TaskName = taskName
  2826  	resp, err = server.OperateValidationError(context.Background(), opReq)
  2827  	require.NoError(t.T(), err)
  2828  	require.False(t.T(), resp.Result)
  2829  	require.Contains(t.T(), resp.Msg, "something wrong in worker")
  2830  	// 4. grpc error
  2831  	opReq.TaskName = taskName
  2832  	resp, err = server.OperateValidationError(context.Background(), opReq)
  2833  	require.NoError(t.T(), err)
  2834  	require.False(t.T(), resp.Result)
  2835  	require.Contains(t.T(), resp.Msg, "grpc error")
  2836  }
  2837  
  2838  func (t *testMasterSuite) TestDashboardAddress() {
  2839  	// Temp file for test log output
  2840  	file, err := ioutil.TempFile(t.T().TempDir(), "*")
  2841  	require.NoError(t.T(), err)
  2842  	defer os.Remove(file.Name())
  2843  
  2844  	cfg := NewConfig()
  2845  	err = cfg.FromContent(SampleConfig)
  2846  	require.NoError(t.T(), err)
  2847  
  2848  	err = log.InitLogger(&log.Config{
  2849  		File: file.Name(),
  2850  	})
  2851  	require.NoError(t.T(), err)
  2852  	defer func() {
  2853  		err = log.InitLogger(&log.Config{})
  2854  		require.NoError(t.T(), err)
  2855  	}()
  2856  
  2857  	cfg.OpenAPI = true
  2858  	cfg.LogFile = file.Name()
  2859  	cfg.DataDir = t.T().TempDir()
  2860  
  2861  	server := NewServer(cfg)
  2862  	server.leader.Store(oneselfLeader)
  2863  	ctx, cancel := context.WithCancel(context.Background())
  2864  	go server.ap.Start(ctx)
  2865  	go func() {
  2866  		err2 := server.Start(ctx)
  2867  		require.NoError(t.T(), err2)
  2868  	}()
  2869  	defer server.Close()
  2870  	defer cancel()
  2871  
  2872  	// Wait server bootstraped.
  2873  	time.Sleep(time.Second * 3)
  2874  
  2875  	content, err := ioutil.ReadFile(file.Name())
  2876  	require.NoError(t.T(), err)
  2877  	require.Contains(t.T(), string(content), "Web UI enabled")
  2878  }
  2879  
  2880  func (t *testMasterSuite) TestGetLatestMeta() {
  2881  	_, mockDB, err := conn.InitMockDBFull()
  2882  	require.NoError(t.T(), err)
  2883  	getMasterStatusError := errors.New("failed to get master status")
  2884  	mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnError(getMasterStatusError)
  2885  	meta, err := GetLatestMeta(context.Background(), "", &dbconfig.DBConfig{})
  2886  	require.Contains(t.T(), err.Error(), getMasterStatusError.Error())
  2887  	require.Nil(t.T(), meta)
  2888  
  2889  	_, mockDB, err = conn.InitMockDBFull()
  2890  	require.NoError(t.T(), err)
  2891  	rows := mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB", "Executed_Gtid_Set"})
  2892  	mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows)
  2893  	meta, err = GetLatestMeta(context.Background(), "", &dbconfig.DBConfig{})
  2894  	require.True(t.T(), terror.ErrNoMasterStatus.Equal(err))
  2895  	require.Nil(t.T(), meta)
  2896  
  2897  	_, mockDB, err = conn.InitMockDBFull()
  2898  	require.NoError(t.T(), err)
  2899  	// 5 columns for MySQL
  2900  	rows = mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB", "Executed_Gtid_Set"}).AddRow(
  2901  		"mysql-bin.000009", 11232, "do_db", "ignore_db", "",
  2902  	)
  2903  	mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows)
  2904  	meta, err = GetLatestMeta(context.Background(), mysql.MySQLFlavor, &dbconfig.DBConfig{})
  2905  	require.NoError(t.T(), err)
  2906  	require.Equal(t.T(), meta.BinLogName, "mysql-bin.000009")
  2907  	require.Equal(t.T(), meta.BinLogPos, uint32(11232))
  2908  	require.Equal(t.T(), meta.BinLogGTID, "")
  2909  
  2910  	_, mockDB, err = conn.InitMockDBFull()
  2911  	require.NoError(t.T(), err)
  2912  	// 4 columns for MariaDB
  2913  	rows = mockDB.NewRows([]string{"File", "Position", "Binlog_Do_DB", "Binlog_Ignore_DB"}).AddRow(
  2914  		"mysql-bin.000009", 11232, "do_db", "ignore_db",
  2915  	)
  2916  	mockDB.ExpectQuery(`SHOW MASTER STATUS`).WillReturnRows(rows)
  2917  	rows = mockDB.NewRows([]string{"Variable_name", "Value"}).AddRow("gtid_binlog_pos", "1-2-100")
  2918  	mockDB.ExpectQuery(`SHOW GLOBAL VARIABLES LIKE 'gtid_binlog_pos'`).WillReturnRows(rows)
  2919  	meta, err = GetLatestMeta(context.Background(), mysql.MariaDBFlavor, &dbconfig.DBConfig{})
  2920  	require.NoError(t.T(), err)
  2921  	require.Equal(t.T(), meta.BinLogName, "mysql-bin.000009")
  2922  	require.Equal(t.T(), meta.BinLogPos, uint32(11232))
  2923  	require.Equal(t.T(), meta.BinLogGTID, "1-2-100")
  2924  }