github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/bootstrap_test.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package master
    15  
    16  import (
    17  	"context"
    18  	"os"
    19  	"path/filepath"
    20  	"strconv"
    21  	"strings"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/golang/mock/gomock"
    26  	. "github.com/pingcap/check"
    27  	"github.com/pingcap/tiflow/dm/config"
    28  	"github.com/pingcap/tiflow/dm/master/workerrpc"
    29  	"github.com/pingcap/tiflow/dm/pb"
    30  	"github.com/pingcap/tiflow/dm/pbmock"
    31  	tcontext "github.com/pingcap/tiflow/dm/pkg/context"
    32  	"github.com/pingcap/tiflow/dm/pkg/ha"
    33  	"github.com/pingcap/tiflow/dm/pkg/log"
    34  	"github.com/pingcap/tiflow/dm/pkg/terror"
    35  	filter "github.com/pingcap/tiflow/pkg/binlog-filter"
    36  	clientv3 "go.etcd.io/etcd/client/v3"
    37  	"go.etcd.io/etcd/tests/v3/integration"
    38  )
    39  
    40  type testMaster struct {
    41  	workerClients     map[string]workerrpc.Client
    42  	saveMaxRetryNum   int
    43  	electionTTLBackup int
    44  	testT             *testing.T
    45  
    46  	testEtcdCluster *integration.ClusterV3
    47  	etcdTestCli     *clientv3.Client
    48  }
    49  
    50  var testSuite = SerialSuites(&testMaster{})
    51  
    52  func TestMaster(t *testing.T) {
    53  	err := log.InitLogger(&log.Config{})
    54  	if err != nil {
    55  		t.Fatal(err)
    56  	}
    57  	pwd, err = os.Getwd()
    58  	if err != nil {
    59  		t.Fatal(err)
    60  	}
    61  	integration.BeforeTestExternal(t)
    62  	// inject *testing.T to testMaster
    63  	s := testSuite.(*testMaster)
    64  	s.testT = t
    65  
    66  	TestingT(t)
    67  }
    68  
    69  func (t *testMaster) SetUpSuite(c *C) {
    70  	c.Assert(log.InitLogger(&log.Config{}), IsNil)
    71  	t.workerClients = make(map[string]workerrpc.Client)
    72  	t.saveMaxRetryNum = maxRetryNum
    73  	t.electionTTLBackup = electionTTL
    74  	electionTTL = 3
    75  	maxRetryNum = 2
    76  	checkAndAdjustSourceConfigForDMCtlFunc = checkAndNoAdjustSourceConfigMock
    77  }
    78  
    79  func (t *testMaster) TearDownSuite(c *C) {
    80  	maxRetryNum = t.saveMaxRetryNum
    81  	electionTTL = t.electionTTLBackup
    82  	checkAndAdjustSourceConfigForDMCtlFunc = checkAndAdjustSourceConfig
    83  }
    84  
    85  func (t *testMaster) SetUpTest(c *C) {
    86  	t.testEtcdCluster = integration.NewClusterV3(t.testT, &integration.ClusterConfig{Size: 1})
    87  	t.etcdTestCli = t.testEtcdCluster.RandClient()
    88  	t.clearEtcdEnv(c)
    89  }
    90  
    91  func (t *testMaster) TearDownTest(c *C) {
    92  	t.clearEtcdEnv(c)
    93  	t.testEtcdCluster.Terminate(t.testT)
    94  }
    95  
    96  func (t *testMaster) clearEtcdEnv(c *C) {
    97  	c.Assert(ha.ClearTestInfoOperation(t.etcdTestCli), IsNil)
    98  }
    99  
   100  func testDefaultMasterServerWithC(c *C) *Server {
   101  	cfg := NewConfig()
   102  	err := cfg.FromContent(SampleConfig)
   103  	c.Assert(err, IsNil)
   104  	cfg.DataDir = c.MkDir()
   105  	server := NewServer(cfg)
   106  	server.leader.Store(oneselfLeader)
   107  	go server.ap.Start(context.Background())
   108  
   109  	return server
   110  }
   111  
   112  func (t *testMaster) TestCollectSourceConfigFilesV1Import(c *C) {
   113  	s := testDefaultMasterServerWithC(c)
   114  	defer s.Close()
   115  	s.cfg.V1SourcesPath = c.MkDir()
   116  
   117  	ctx, cancel := context.WithCancel(context.Background())
   118  	defer cancel()
   119  
   120  	tctx := tcontext.NewContext(ctx, log.L())
   121  
   122  	// no source file exist.
   123  	cfgs, err := s.collectSourceConfigFilesV1Import(tctx)
   124  	c.Assert(err, IsNil)
   125  	c.Assert(cfgs, HasLen, 0)
   126  
   127  	host := os.Getenv("MYSQL_HOST")
   128  	if host == "" {
   129  		host = "127.0.0.1"
   130  	}
   131  	port, _ := strconv.Atoi(os.Getenv("MYSQL_PORT"))
   132  	if port == 0 {
   133  		port = 3306
   134  	}
   135  	user := os.Getenv("MYSQL_USER")
   136  	if user == "" {
   137  		user = "root"
   138  	}
   139  	password := os.Getenv("MYSQL_PSWD")
   140  
   141  	cfg1, err := config.SourceCfgFromYaml(config.SampleSourceConfig)
   142  	c.Assert(err, IsNil)
   143  	// fix empty map after marshal/unmarshal becomes nil
   144  	cfg1.From.Adjust()
   145  	cfg1.Tracer = map[string]interface{}{}
   146  	cfg1.Filters = []*filter.BinlogEventRule{}
   147  	cfg1.From.Host = host
   148  	cfg1.From.Port = port
   149  	cfg1.From.User = user
   150  	cfg1.From.Password = password
   151  	cfg1.RelayDir = "relay-dir"
   152  	c.Assert(checkAndAdjustSourceConfigForDMCtlFunc(ctx, cfg1), IsNil) // adjust source config.
   153  	cfg2 := cfg1.Clone()
   154  	cfg2.SourceID = "mysql-replica-02"
   155  
   156  	// write into source files.
   157  	data1, err := cfg1.Yaml()
   158  	c.Assert(err, IsNil)
   159  	c.Assert(os.WriteFile(filepath.Join(s.cfg.V1SourcesPath, "source1.yaml"), []byte(data1), 0o644), IsNil)
   160  	data2, err := cfg2.Yaml()
   161  	c.Assert(err, IsNil)
   162  	c.Assert(os.WriteFile(filepath.Join(s.cfg.V1SourcesPath, "source2.yaml"), []byte(data2), 0o644), IsNil)
   163  
   164  	// collect again, two configs exist.
   165  	cfgs, err = s.collectSourceConfigFilesV1Import(tctx)
   166  	c.Assert(err, IsNil)
   167  	for _, cfg := range cfgs {
   168  		cfg.From.Session = nil
   169  	}
   170  	c.Assert(cfgs, HasLen, 2)
   171  	c.Assert(cfgs[cfg1.SourceID], DeepEquals, cfg1)
   172  	c.Assert(cfgs[cfg2.SourceID], DeepEquals, cfg2)
   173  
   174  	// put a invalid source file.
   175  	c.Assert(os.WriteFile(filepath.Join(s.cfg.V1SourcesPath, "invalid.yaml"), []byte("invalid-source-data"), 0o644), IsNil)
   176  	cfgs, err = s.collectSourceConfigFilesV1Import(tctx)
   177  	c.Assert(terror.ErrConfigYamlTransform.Equal(err), IsTrue)
   178  	c.Assert(cfgs, HasLen, 0)
   179  }
   180  
   181  func (t *testMaster) TestWaitWorkersReadyV1Import(c *C) {
   182  	oldWaitWorkerV1Timeout := waitWorkerV1Timeout
   183  	defer func() {
   184  		waitWorkerV1Timeout = oldWaitWorkerV1Timeout
   185  	}()
   186  	waitWorkerV1Timeout = 5 * time.Second
   187  
   188  	ctx, cancel := context.WithCancel(context.Background())
   189  	defer cancel()
   190  
   191  	tctx := tcontext.NewContext(ctx, log.L())
   192  
   193  	s := testDefaultMasterServerWithC(c)
   194  	defer s.Close()
   195  	s.cfg.V1SourcesPath = c.MkDir()
   196  	c.Assert(s.scheduler.Start(ctx, t.etcdTestCli), IsNil)
   197  
   198  	cfg1, err := config.SourceCfgFromYaml(config.SampleSourceConfig)
   199  	c.Assert(err, IsNil)
   200  	cfg2 := cfg1.Clone()
   201  	cfg2.SourceID = "mysql-replica-02"
   202  	cfgs := map[string]*config.SourceConfig{
   203  		cfg1.SourceID: cfg1,
   204  		cfg2.SourceID: cfg2,
   205  	}
   206  
   207  	// no worker registered, timeout.
   208  	err = s.waitWorkersReadyV1Import(tctx, cfgs)
   209  	c.Assert(err, ErrorMatches, ".*wait for DM-worker instances timeout.*")
   210  
   211  	// register one worker.
   212  	req1 := &pb.RegisterWorkerRequest{
   213  		Name:    "worker-1",
   214  		Address: "127.0.0.1:8262",
   215  	}
   216  	resp1, err := s.RegisterWorker(ctx, req1)
   217  	c.Assert(err, IsNil)
   218  	c.Assert(resp1.Result, IsTrue)
   219  
   220  	// still timeout because no enough workers.
   221  	err = s.waitWorkersReadyV1Import(tctx, cfgs)
   222  	c.Assert(err, ErrorMatches, ".*wait for DM-worker instances timeout.*")
   223  
   224  	// register another worker.
   225  	go func() {
   226  		time.Sleep(1500 * time.Millisecond)
   227  		req2 := &pb.RegisterWorkerRequest{
   228  			Name:    "worker-2",
   229  			Address: "127.0.0.1:8263",
   230  		}
   231  		resp2, err2 := s.RegisterWorker(ctx, req2)
   232  		c.Assert(err2, IsNil)
   233  		c.Assert(resp2.Result, IsTrue)
   234  	}()
   235  
   236  	err = s.waitWorkersReadyV1Import(tctx, cfgs)
   237  	c.Assert(err, IsNil)
   238  }
   239  
   240  func (t *testMaster) TestSubtaskCfgsStagesV1Import(c *C) {
   241  	var (
   242  		worker1Name = "worker-1"
   243  		worker1Addr = "127.0.0.1:8262"
   244  		worker2Name = "worker-2"
   245  		worker2Addr = "127.0.0.1:8263"
   246  		taskName1   = "task-1"
   247  		taskName2   = "task-2"
   248  		sourceID1   = "mysql-replica-01"
   249  		sourceID2   = "mysql-replica-02"
   250  	)
   251  
   252  	cfg11 := config.NewSubTaskConfig()
   253  	c.Assert(cfg11.Decode(config.SampleSubtaskConfig, true), IsNil)
   254  	cfg11.Dir = "./dump_data"
   255  	cfg11.ChunkFilesize = "64"
   256  	cfg11.Name = taskName1
   257  	cfg11.SourceID = sourceID1
   258  	c.Assert(cfg11.Adjust(true), IsNil) // adjust again after manually modified some items.
   259  	data11, err := cfg11.Toml()
   260  	c.Assert(err, IsNil)
   261  	data11 = strings.ReplaceAll(data11, `chunk-filesize = "64"`, `chunk-filesize = 64`) // different type between v1.0.x and v2.0.x.
   262  
   263  	cfg12, err := cfg11.Clone()
   264  	c.Assert(err, IsNil)
   265  	cfg12.SourceID = sourceID2
   266  	data12, err := cfg12.Toml()
   267  	c.Assert(err, IsNil)
   268  	data12 = strings.ReplaceAll(data12, `chunk-filesize = "64"`, `chunk-filesize = 64`)
   269  
   270  	cfg21, err := cfg11.Clone()
   271  	c.Assert(err, IsNil)
   272  	cfg21.Dir = "./dump_data"
   273  	cfg21.Name = taskName2
   274  	c.Assert(cfg21.Adjust(true), IsNil)
   275  	data21, err := cfg21.Toml()
   276  	c.Assert(err, IsNil)
   277  	data21 = strings.ReplaceAll(data21, `chunk-filesize = "64"`, `chunk-filesize = 64`)
   278  
   279  	cfg22, err := cfg21.Clone()
   280  	c.Assert(err, IsNil)
   281  	cfg22.SourceID = sourceID2
   282  	data22, err := cfg22.Toml()
   283  	c.Assert(err, IsNil)
   284  	data22 = strings.ReplaceAll(data22, `chunk-filesize = "64"`, `chunk-filesize = 64`)
   285  
   286  	ctx, cancel := context.WithCancel(context.Background())
   287  	defer cancel()
   288  	tctx := tcontext.NewContext(ctx, log.L())
   289  
   290  	s := testDefaultMasterServerWithC(c)
   291  	defer s.Close()
   292  	s.cfg.V1SourcesPath = c.MkDir()
   293  	c.Assert(s.scheduler.Start(ctx, t.etcdTestCli), IsNil)
   294  
   295  	// no workers exist, no config and status need to get.
   296  	cfgs, stages, err := s.getSubtaskCfgsStagesV1Import(tctx)
   297  	c.Assert(err, IsNil)
   298  	c.Assert(cfgs, HasLen, 0)
   299  	c.Assert(stages, HasLen, 0)
   300  
   301  	ctrl := gomock.NewController(c)
   302  	defer ctrl.Finish()
   303  	mockWCli1 := pbmock.NewMockWorkerClient(ctrl)
   304  	mockWCli2 := pbmock.NewMockWorkerClient(ctrl)
   305  	c.Assert(s.scheduler.AddWorker(worker1Name, worker1Addr), IsNil)
   306  	c.Assert(s.scheduler.AddWorker(worker2Name, worker2Addr), IsNil)
   307  	s.scheduler.SetWorkerClientForTest(worker1Name, newMockRPCClient(mockWCli1))
   308  	s.scheduler.SetWorkerClientForTest(worker2Name, newMockRPCClient(mockWCli2))
   309  
   310  	mockWCli1.EXPECT().OperateV1Meta(
   311  		gomock.Any(),
   312  		&pb.OperateV1MetaRequest{
   313  			Op: pb.V1MetaOp_GetV1Meta,
   314  		},
   315  	).Return(&pb.OperateV1MetaResponse{
   316  		Result: true,
   317  		Meta: map[string]*pb.V1SubTaskMeta{
   318  			taskName1: {
   319  				Op:    pb.TaskOp_Start,
   320  				Stage: pb.Stage_Running,
   321  				Name:  taskName1,
   322  				Task:  []byte(data11),
   323  			},
   324  			taskName2: {
   325  				Op:    pb.TaskOp_Pause,
   326  				Stage: pb.Stage_Paused,
   327  				Name:  taskName2,
   328  				Task:  []byte(data21),
   329  			},
   330  		},
   331  	}, nil)
   332  
   333  	mockWCli2.EXPECT().OperateV1Meta(
   334  		gomock.Any(),
   335  		&pb.OperateV1MetaRequest{
   336  			Op: pb.V1MetaOp_GetV1Meta,
   337  		},
   338  	).Return(&pb.OperateV1MetaResponse{
   339  		Result: true,
   340  		Meta: map[string]*pb.V1SubTaskMeta{
   341  			taskName1: {
   342  				Op:    pb.TaskOp_Resume,
   343  				Stage: pb.Stage_Running,
   344  				Name:  taskName1,
   345  				Task:  []byte(data12),
   346  			},
   347  			taskName2: {
   348  				Op:    pb.TaskOp_Start,
   349  				Stage: pb.Stage_Running,
   350  				Name:  taskName2,
   351  				Task:  []byte(data22),
   352  			},
   353  		},
   354  	}, nil)
   355  
   356  	// all workers return valid config and stage.
   357  	cfgs, stages, err = s.getSubtaskCfgsStagesV1Import(tctx)
   358  	c.Assert(err, IsNil)
   359  	c.Assert(cfgs, HasLen, 2)
   360  	c.Assert(stages, HasLen, 2)
   361  	c.Assert(cfgs[taskName1], HasLen, 2)
   362  	c.Assert(cfgs[taskName2], HasLen, 2)
   363  	c.Assert(cfgs[taskName1][sourceID1], DeepEquals, *cfg11)
   364  	c.Assert(cfgs[taskName1][sourceID2], DeepEquals, *cfg12)
   365  	c.Assert(cfgs[taskName2][sourceID1], DeepEquals, *cfg21)
   366  	c.Assert(cfgs[taskName2][sourceID2], DeepEquals, *cfg22)
   367  	c.Assert(stages[taskName1], HasLen, 2)
   368  	c.Assert(stages[taskName2], HasLen, 2)
   369  	c.Assert(stages[taskName1][sourceID1], Equals, pb.Stage_Running)
   370  	c.Assert(stages[taskName1][sourceID2], Equals, pb.Stage_Running)
   371  	c.Assert(stages[taskName2][sourceID1], Equals, pb.Stage_Paused)
   372  	c.Assert(stages[taskName2][sourceID2], Equals, pb.Stage_Running)
   373  
   374  	// one of workers return invalid config.
   375  	mockWCli1.EXPECT().OperateV1Meta(
   376  		gomock.Any(),
   377  		&pb.OperateV1MetaRequest{
   378  			Op: pb.V1MetaOp_GetV1Meta,
   379  		},
   380  	).Return(&pb.OperateV1MetaResponse{
   381  		Result: true,
   382  		Meta: map[string]*pb.V1SubTaskMeta{
   383  			taskName1: {
   384  				Op:    pb.TaskOp_Start,
   385  				Stage: pb.Stage_Running,
   386  				Name:  taskName1,
   387  				Task:  []byte(data11),
   388  			},
   389  			taskName2: {
   390  				Op:    pb.TaskOp_Pause,
   391  				Stage: pb.Stage_Paused,
   392  				Name:  taskName2,
   393  				Task:  []byte(data21),
   394  			},
   395  		},
   396  	}, nil)
   397  	mockWCli2.EXPECT().OperateV1Meta(
   398  		gomock.Any(),
   399  		&pb.OperateV1MetaRequest{
   400  			Op: pb.V1MetaOp_GetV1Meta,
   401  		},
   402  	).Return(&pb.OperateV1MetaResponse{
   403  		Result: true,
   404  		Meta: map[string]*pb.V1SubTaskMeta{
   405  			taskName1: {
   406  				Op:    pb.TaskOp_Resume,
   407  				Stage: pb.Stage_Running,
   408  				Name:  taskName1,
   409  				Task:  []byte("invalid subtask data"),
   410  			},
   411  			taskName2: {
   412  				Op:    pb.TaskOp_Start,
   413  				Stage: pb.Stage_Running,
   414  				Name:  taskName2,
   415  				Task:  []byte(data22),
   416  			},
   417  		},
   418  	}, nil)
   419  	cfgs, stages, err = s.getSubtaskCfgsStagesV1Import(tctx)
   420  	c.Assert(err, ErrorMatches, ".*fail to get subtask config and stage.*")
   421  	c.Assert(cfgs, HasLen, 0)
   422  	c.Assert(stages, HasLen, 0)
   423  }
   424  
   425  func checkAndNoAdjustSourceConfigMock(ctx context.Context, cfg *config.SourceConfig) error {
   426  	if _, err := cfg.Yaml(); err != nil {
   427  		return err
   428  	}
   429  	return cfg.Verify()
   430  }