github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/test/e2e/e2e_dm_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package e2e_test
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"database/sql"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  	"net/url"
    25  	"os"
    26  	"regexp"
    27  	"strings"
    28  	"sync"
    29  	"testing"
    30  	"time"
    31  
    32  	pb "github.com/pingcap/tiflow/engine/enginepb"
    33  	"github.com/pingcap/tiflow/engine/jobmaster/dm"
    34  	"github.com/pingcap/tiflow/engine/jobmaster/dm/metadata"
    35  	"github.com/pingcap/tiflow/engine/jobmaster/dm/openapi"
    36  	dmpkg "github.com/pingcap/tiflow/engine/pkg/dm"
    37  	"github.com/pingcap/tiflow/engine/test/e2e"
    38  	"github.com/pingcap/tiflow/pkg/httputil"
    39  	"github.com/pingcap/tiflow/tests/integration_tests/util"
    40  	"github.com/stretchr/testify/require"
    41  )
    42  
    43  const (
    44  	masterAddr = "127.0.0.1:10245"
    45  	baseURL    = "http://" + masterAddr + "/api/v1/jobs/%s"
    46  	tenantID   = "e2e-test"
    47  	projectID  = "project-dm"
    48  
    49  	envS3AccessKeyID     = "ENGINE_S3_ACCESS_KEY"
    50  	envS3SecretAccessKey = "ENGINE_S3_SECRET_KEY"
    51  	envS3Endpoint        = "ENGINE_S3_ENDPOINT"
    52  )
    53  
    54  func TestDMJob(t *testing.T) {
    55  	mysqlCfg := util.DBConfig{
    56  		Host:     "127.0.0.1",
    57  		Port:     3306,
    58  		User:     "root",
    59  		Password: "",
    60  	}
    61  	tidbCfg := util.DBConfig{
    62  		Host:     "127.0.0.1",
    63  		Port:     4000,
    64  		User:     "root",
    65  		Password: "",
    66  	}
    67  
    68  	mysql, err := util.CreateDB(mysqlCfg)
    69  	require.NoError(t, err)
    70  	defer func() {
    71  		require.NoError(t, util.CloseDB(mysql))
    72  	}()
    73  	tidb, err := util.CreateDB(tidbCfg)
    74  	require.NoError(t, err)
    75  	defer func() {
    76  		require.NoError(t, util.CloseDB(tidb))
    77  	}()
    78  
    79  	// clean up
    80  	_, err = tidb.Exec("drop database if exists dm_meta")
    81  	require.NoError(t, err)
    82  
    83  	var wg sync.WaitGroup
    84  	wg.Add(2)
    85  	go func() {
    86  		defer wg.Done()
    87  		testSimpleAllModeTask(t, mysql, tidb, "test1")
    88  	}()
    89  	go func() {
    90  		defer wg.Done()
    91  		testSimpleAllModeTask(t, mysql, tidb, "test2")
    92  	}()
    93  	wg.Wait()
    94  
    95  	// executor metrics
    96  	metricsURLs := []string{
    97  		"http://127.0.0.1:11241/metrics",
    98  		"http://127.0.0.1:11242/metrics",
    99  		"http://127.0.0.1:11243/metrics",
   100  	}
   101  
   102  	testMetrics := func(re *regexp.Regexp) {
   103  		jobIDs := map[string]struct{}{}
   104  
   105  		ctx := context.Background()
   106  		cli, err := httputil.NewClient(nil)
   107  		require.NoError(t, err)
   108  		for _, metricsURL := range metricsURLs {
   109  			resp, err := cli.Get(ctx, metricsURL)
   110  			require.NoError(t, err)
   111  			content, err := io.ReadAll(resp.Body)
   112  			require.NoError(t, err)
   113  			matched := re.FindAllSubmatch(content, -1)
   114  			for _, m := range matched {
   115  				jobIDs[string(m[1])] = struct{}{}
   116  			}
   117  			require.NoError(t, resp.Body.Close())
   118  		}
   119  		require.Equal(t, 2, len(jobIDs))
   120  	}
   121  
   122  	// test metrics for syncer
   123  	re := regexp.MustCompile(`syncer.*\{job_id="(.{36})"`)
   124  	testMetrics(re)
   125  	// test metrics for dm_worker_task_state: 2 running all job
   126  	re = regexp.MustCompile(`dm_worker_task_state.*\{job_id="(.{36})".*2`)
   127  	testMetrics(re)
   128  }
   129  
   130  func TestDMDumpSyncJob(t *testing.T) {
   131  	mysqlCfg := util.DBConfig{
   132  		Host:     "127.0.0.1",
   133  		Port:     3306,
   134  		User:     "root",
   135  		Password: "",
   136  	}
   137  	tidbCfg := util.DBConfig{
   138  		Host:     "127.0.0.1",
   139  		Port:     4000,
   140  		User:     "root",
   141  		Password: "",
   142  	}
   143  
   144  	mysql, err := util.CreateDB(mysqlCfg)
   145  	require.NoError(t, err)
   146  	defer func() {
   147  		require.NoError(t, util.CloseDB(mysql))
   148  	}()
   149  	tidb, err := util.CreateDB(tidbCfg)
   150  	require.NoError(t, err)
   151  	defer func() {
   152  		require.NoError(t, util.CloseDB(tidb))
   153  	}()
   154  
   155  	// clean up
   156  	_, err = tidb.Exec("drop database if exists dm_meta")
   157  	require.NoError(t, err)
   158  
   159  	var wg sync.WaitGroup
   160  	wg.Add(2)
   161  	go func() {
   162  		defer wg.Done()
   163  		testSimpleDumpSyncModeTask(t, mysql, tidb, "test1")
   164  	}()
   165  	go func() {
   166  		defer wg.Done()
   167  		testSimpleDumpSyncModeTask(t, mysql, tidb, "test2")
   168  	}()
   169  	wg.Wait()
   170  
   171  	// executor metrics
   172  	metricsURLs := []string{
   173  		"http://127.0.0.1:11241/metrics",
   174  		"http://127.0.0.1:11242/metrics",
   175  		"http://127.0.0.1:11243/metrics",
   176  	}
   177  
   178  	testMetrics := func(re *regexp.Regexp) {
   179  		jobIDs := map[string]struct{}{}
   180  
   181  		ctx := context.Background()
   182  		cli, err := httputil.NewClient(nil)
   183  		require.NoError(t, err)
   184  		for _, metricsURL := range metricsURLs {
   185  			resp, err := cli.Get(ctx, metricsURL)
   186  			require.NoError(t, err)
   187  			content, err := io.ReadAll(resp.Body)
   188  			require.NoError(t, err)
   189  			matched := re.FindAllSubmatch(content, -1)
   190  			for _, m := range matched {
   191  				jobIDs[string(m[1])] = struct{}{}
   192  			}
   193  			require.NoError(t, resp.Body.Close())
   194  		}
   195  		require.Equal(t, 2, len(jobIDs))
   196  	}
   197  
   198  	// test metrics for syncer
   199  	re := regexp.MustCompile(`syncer.*\{job_id="(.{36})"`)
   200  	testMetrics(re)
   201  	// test metrics for dm_worker_task_state: 2 running all job
   202  	re = regexp.MustCompile(`dm_worker_task_state.*\{job_id="(.{36})".*2`)
   203  	testMetrics(re)
   204  }
   205  
   206  // testSimpleAllModeTask extracts the common logic for a DM "all" mode task,
   207  // `db` should not contain special character.
   208  func testSimpleAllModeTask(
   209  	t *testing.T,
   210  	mysql, tidb *sql.DB,
   211  	db string,
   212  ) {
   213  	ctx := context.Background()
   214  	noError := func(_ interface{}, err error) {
   215  		require.NoError(t, err)
   216  	}
   217  
   218  	httpClient, err := httputil.NewClient(nil)
   219  	require.NoError(t, err)
   220  
   221  	noError(tidb.Exec("drop database if exists " + db))
   222  	noError(mysql.Exec("drop database if exists " + db))
   223  
   224  	// full phase
   225  	noError(mysql.Exec("create database " + db))
   226  	noError(mysql.Exec("create table " + db + ".t1(c int primary key)"))
   227  	noError(mysql.Exec("insert into " + db + ".t1 values(1)"))
   228  
   229  	dmJobCfg, err := os.ReadFile("./dm-job.yaml")
   230  	require.NoError(t, err)
   231  	// start full job
   232  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte("<placeholder>"), []byte(db))
   233  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte("task-mode: all"), []byte("task-mode: full"))
   234  	var jobID string
   235  	require.Eventually(t, func() bool {
   236  		var err error
   237  		jobID, err = e2e.CreateJobViaHTTP(ctx, masterAddr, tenantID, projectID,
   238  			pb.Job_DM, dmJobCfg)
   239  		return err == nil
   240  	}, time.Second*5, time.Millisecond*100)
   241  
   242  	// check full phase
   243  	waitRow := func(where string, db string) {
   244  		require.Eventually(t, func() bool {
   245  			//nolint:sqlclosecheck,rowserrcheck
   246  			rs, err := tidb.Query("select 1 from " + db + ".t1 where " + where)
   247  			if err != nil {
   248  				t.Logf("query error: %v", err)
   249  				return false
   250  			}
   251  			defer func(rs *sql.Rows) {
   252  				_ = rs.Close()
   253  			}(rs)
   254  			if !rs.Next() {
   255  				t.Log("no rows")
   256  				return false
   257  			}
   258  			if rs.Next() {
   259  				t.Log("more than one row")
   260  				return false
   261  			}
   262  			return true
   263  		}, 30*time.Second, 500*time.Millisecond)
   264  	}
   265  	waitRow("c = 1", db)
   266  
   267  	// load finished and job exits
   268  	// TODO: check load status after framework supports it
   269  	// TODO: check checkpoint deleted after frameworker support StopImpl
   270  	require.Eventually(t, func() bool {
   271  		job, err := e2e.QueryJobViaHTTP(ctx, masterAddr, tenantID, projectID, jobID)
   272  		return err == nil && job.State == pb.Job_Finished
   273  	}, time.Second*30, time.Millisecond*100)
   274  
   275  	source1 := "mysql-replica-01"
   276  	source2 := "mysql-replica-02"
   277  
   278  	// start incremental job
   279  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte("task-mode: full"), []byte("task-mode: incremental"))
   280  	require.Eventually(t, func() bool {
   281  		var err error
   282  		jobID, err = e2e.CreateJobViaHTTP(ctx, masterAddr, tenantID, projectID, pb.Job_DM, dmJobCfg)
   283  		return err == nil
   284  	}, time.Second*5, time.Millisecond*100)
   285  
   286  	var jobStatus *dm.JobStatus
   287  	// wait job online
   288  	require.Eventually(t, func() bool {
   289  		jobStatus, err = queryStatus(ctx, httpClient, jobID, []string{source1, source2})
   290  		return err == nil && jobStatus.JobID == jobID
   291  	}, time.Second*5, time.Millisecond*100)
   292  	require.Contains(t, string(jobStatus.TaskStatus[source1].Status.Status), "totalEvents")
   293  	require.Contains(t, jobStatus.TaskStatus[source2].Status.ErrorMsg, fmt.Sprintf("task %s for job not found", source2))
   294  
   295  	// incremental phase
   296  	noError(mysql.Exec("insert into " + db + ".t1 values(2)"))
   297  	waitRow("c = 2", db)
   298  
   299  	// imitate an error that can auto resume
   300  	noError(tidb.Exec("drop table " + db + ".t1"))
   301  	noError(mysql.Exec("insert into " + db + ".t1 values(3)"))
   302  	time.Sleep(time.Second)
   303  	noError(tidb.Exec("create table " + db + ".t1(c int primary key)"))
   304  	time.Sleep(time.Second)
   305  
   306  	// check auto resume
   307  	waitRow("c = 3", db)
   308  
   309  	// pause task
   310  	err = operateJob(ctx, httpClient, jobID, []string{source1}, dmpkg.Pause)
   311  	require.NoError(t, err)
   312  
   313  	// eventually paused
   314  	require.Eventually(t, func() bool {
   315  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   316  		for _, task := range jobStatus.TaskStatus {
   317  			require.Greater(t, task.Status.IoTotalBytes, uint64(0))
   318  			require.Greater(t, task.Status.DumpIoTotalBytes, uint64(0))
   319  		}
   320  		require.NoError(t, err)
   321  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StagePaused
   322  	}, time.Second*10, time.Second)
   323  
   324  	// binlog schema list
   325  	binlogSchemaResp, err := getBinlogSchema(ctx, httpClient, jobID, source1, "", "")
   326  	require.NoError(t, err)
   327  	require.Len(t, binlogSchemaResp.Results, 1)
   328  	require.Equal(t, fmt.Sprintf(`["%s"]`, db), binlogSchemaResp.Results[source1].Msg)
   329  	require.Equal(t, "", binlogSchemaResp.Results[source1].ErrorMsg)
   330  
   331  	// resume task
   332  	err = operateJob(ctx, httpClient, jobID, nil, dmpkg.Resume)
   333  	require.NoError(t, err)
   334  
   335  	// eventually resumed
   336  	require.Eventually(t, func() bool {
   337  		jobStatus, err = queryStatus(ctx, httpClient, jobID, []string{source1})
   338  		require.NoError(t, err)
   339  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StageRunning
   340  	}, time.Second*10, time.Second)
   341  
   342  	// get job cfg
   343  	jobCfg, err := getJobCfg(ctx, httpClient, jobID)
   344  	require.NoError(t, err)
   345  	require.Contains(t, jobCfg, `flavor: mysql`)
   346  	require.Contains(t, jobCfg, `tidb_txn_mode: optimistic`)
   347  
   348  	noError(mysql.Exec("alter table " + db + ".t1 add column new_col int unique"))
   349  	noError(mysql.Exec("insert into " + db + ".t1 values(4,4)"))
   350  
   351  	// eventually error
   352  	require.Eventually(t, func() bool {
   353  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   354  		require.NoError(t, err)
   355  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StageError &&
   356  			strings.Contains(jobStatus.TaskStatus[source1].Status.Result.Errors[0].RawCause,
   357  				`unsupported add column 'new_col' constraint UNIQUE KEY when altering`)
   358  	}, time.Second*10, time.Second)
   359  
   360  	// binlog replace
   361  	binlogReq := &openapi.SetBinlogOperatorRequest{
   362  		Op: openapi.SetBinlogOperatorRequestOpReplace,
   363  		Sqls: &[]string{
   364  			"alter table " + db + ".t1 add column new_col int;",
   365  			"alter table " + db + ".t1 add unique(new_col);",
   366  		},
   367  	}
   368  	binlogResp, err := setBinlogOperator(ctx, httpClient, jobID, source1, binlogReq)
   369  	require.NoError(t, err)
   370  	require.Equal(t, "", binlogResp.ErrorMsg)
   371  	require.Len(t, binlogResp.Results, 1)
   372  	require.Equal(t, "", binlogResp.Results[source1].ErrorMsg)
   373  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   374  	waitRow("new_col = 4", db)
   375  
   376  	// binlog replace again
   377  	binlogResp, err = setBinlogOperator(ctx, httpClient, jobID, source1, binlogReq)
   378  	require.NoError(t, err)
   379  	require.Equal(t, "", binlogResp.ErrorMsg)
   380  	require.Len(t, binlogResp.Results, 1)
   381  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   382  	require.Equal(t, fmt.Sprintf("source '%s' has no error", source1), binlogResp.Results[source1].ErrorMsg)
   383  
   384  	// binlog get
   385  	binlogResp, err = getBinlogOperator(ctx, httpClient, jobID, source1, "")
   386  	require.NoError(t, err)
   387  	require.Equal(t, "", binlogResp.ErrorMsg)
   388  	require.Len(t, binlogResp.Results, 1)
   389  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   390  	require.Equal(t, fmt.Sprintf("source '%s' has no error", source1), binlogResp.Results[source1].ErrorMsg)
   391  
   392  	// binlog delete
   393  	binlogResp, err = deleteBinlogOperator(ctx, httpClient, jobID, source1)
   394  	require.NoError(t, err)
   395  	require.Equal(t, "", binlogResp.ErrorMsg)
   396  	require.Len(t, binlogResp.Results, 1)
   397  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   398  	require.Equal(t, fmt.Sprintf("source '%s' has no error", source1), binlogResp.Results[source1].ErrorMsg)
   399  
   400  	// pause task
   401  	err = operateJob(ctx, httpClient, jobID, []string{source1}, dmpkg.Pause)
   402  	require.NoError(t, err)
   403  	// eventually paused
   404  	require.Eventually(t, func() bool {
   405  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   406  		require.NoError(t, err)
   407  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StagePaused
   408  	}, time.Second*10, time.Second)
   409  	// set binlog schema
   410  	fromSource := true
   411  	binlogSchemaReq := &openapi.SetBinlogSchemaRequest{
   412  		Database:   db,
   413  		Table:      "t1",
   414  		FromSource: &fromSource,
   415  	}
   416  	binlogSchemaResp, err = setBinlogSchema(ctx, httpClient, jobID, source1, binlogSchemaReq)
   417  	require.NoError(t, err)
   418  	require.Equal(t, "", binlogSchemaResp.ErrorMsg)
   419  	require.Equal(t, "", binlogSchemaResp.Results[source1].ErrorMsg)
   420  	require.Equal(t, "", binlogSchemaResp.Results[source1].Msg)
   421  	// get new binlog schema
   422  	binlogSchemaResp, err = getBinlogSchema(ctx, httpClient, jobID, source1, db, "t1")
   423  	require.NoError(t, err)
   424  	require.Len(t, binlogSchemaResp.Results, 1)
   425  	require.Equal(t, "CREATE TABLE `t1` ( `c` int(11) NOT NULL, `new_col` int(11) DEFAULT NULL, PRIMARY KEY (`c`) /*T![clustered_index] CLUSTERED */, UNIQUE KEY `new_col` (`new_col`)) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin", binlogSchemaResp.Results[source1].Msg)
   426  	require.Equal(t, "", binlogSchemaResp.Results[source1].ErrorMsg)
   427  
   428  	// update with new balist
   429  	newDB := "new_" + db
   430  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte(fmt.Sprintf(`["%s"]`, db)), []byte(fmt.Sprintf(`["%s", "%s"]`, db, newDB)))
   431  	err = updateJobCfg(ctx, httpClient, jobID, string(dmJobCfg))
   432  	require.NoError(t, err)
   433  	// get new config
   434  	jobCfg, err = getJobCfg(ctx, httpClient, jobID)
   435  	require.NoError(t, err)
   436  	require.Contains(t, jobCfg, newDB)
   437  	require.Contains(t, jobCfg, `mod-revision: 1`)
   438  	// eventually apply new config, task still paused
   439  	require.Eventually(t, func() bool {
   440  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   441  		return err == nil && !jobStatus.TaskStatus[source1].ConfigOutdated && jobStatus.TaskStatus[source1].Status.Stage == metadata.StagePaused
   442  	}, time.Second*30, time.Second)
   443  
   444  	// resume task
   445  	err = operateJob(ctx, httpClient, jobID, nil, dmpkg.Resume)
   446  	require.NoError(t, err)
   447  	// eventually resumed
   448  	require.Eventually(t, func() bool {
   449  		jobStatus, err = queryStatus(ctx, httpClient, jobID, []string{source1})
   450  		require.NoError(t, err)
   451  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StageRunning
   452  	}, time.Second*10, time.Second)
   453  
   454  	noError(mysql.Exec("create database " + newDB))
   455  	noError(mysql.Exec("create table " + newDB + ".t1(c int primary key)"))
   456  	noError(mysql.Exec("insert into " + newDB + ".t1 values(1)"))
   457  	waitRow("c = 1", newDB)
   458  }
   459  
   460  // testSimpleDumpSyncModeTask extracts the common logic for a DM "all" mode task,
   461  // `db` should not contain special character.
   462  func testSimpleDumpSyncModeTask(
   463  	t *testing.T,
   464  	mysql, tidb *sql.DB,
   465  	db string,
   466  ) {
   467  	ctx := context.Background()
   468  	noError := func(_ interface{}, err error) {
   469  		require.NoError(t, err)
   470  	}
   471  
   472  	httpClient, err := httputil.NewClient(nil)
   473  	require.NoError(t, err)
   474  
   475  	noError(tidb.Exec("drop database if exists " + db))
   476  	noError(mysql.Exec("drop database if exists " + db))
   477  
   478  	// full phase
   479  	noError(mysql.Exec("create database " + db))
   480  	noError(mysql.Exec("create table " + db + ".t1(c int primary key)"))
   481  	noError(mysql.Exec("insert into " + db + ".t1 values(1)"))
   482  
   483  	dmJobCfg, err := os.ReadFile("./dm-job.yaml")
   484  	require.NoError(t, err)
   485  	// start dump job
   486  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte("<placeholder>"), []byte(db))
   487  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte("task-mode: all"), []byte("task-mode: dump"))
   488  
   489  	endpoint := os.Getenv(envS3Endpoint)
   490  	require.Greater(t, len(endpoint), 0, "empty endpoint in env %s", envS3Endpoint)
   491  
   492  	accessKeyID := os.Getenv(envS3AccessKeyID)
   493  	require.Greater(t, len(accessKeyID), 0, "empty access key ID in env %s", envS3AccessKeyID)
   494  
   495  	secretAccessKey := os.Getenv(envS3SecretAccessKey)
   496  	require.Greater(t, len(secretAccessKey), 0, "empty secret access key in env %s", envS3SecretAccessKey)
   497  
   498  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte("#    dir: ./dumped_data"), []byte(fmt.Sprintf("    dir: s3://engine-it/dumped_data_%s?force-path-style=1&access-key=%s&secret-access-key=%s&endpoint=%s", db, accessKeyID, secretAccessKey, endpoint)))
   499  	var jobID string
   500  	require.Eventually(t, func() bool {
   501  		var err error
   502  		jobID, err = e2e.CreateJobViaHTTP(ctx, masterAddr, tenantID, projectID,
   503  			pb.Job_DM, dmJobCfg)
   504  		return err == nil
   505  	}, time.Second*5, time.Millisecond*100)
   506  
   507  	waitRow := func(where string, db string) {
   508  		require.Eventually(t, func() bool {
   509  			//nolint:sqlclosecheck,rowserrcheck
   510  			rs, err := tidb.Query("select 1 from " + db + ".t1 where " + where)
   511  			if err != nil {
   512  				t.Logf("query error: %v", err)
   513  				return false
   514  			}
   515  			defer func(rs *sql.Rows) {
   516  				_ = rs.Close()
   517  			}(rs)
   518  			if !rs.Next() {
   519  				t.Log("no rows")
   520  				return false
   521  			}
   522  			if rs.Next() {
   523  				t.Log("more than one row")
   524  				return false
   525  			}
   526  			return true
   527  		}, 30*time.Second, 500*time.Millisecond)
   528  	}
   529  
   530  	// dump finished and job exits
   531  	require.Eventually(t, func() bool {
   532  		job, err := e2e.QueryJobViaHTTP(ctx, masterAddr, tenantID, projectID, jobID)
   533  		return err == nil && job.State == pb.Job_Finished
   534  	}, time.Second*30, time.Millisecond*100)
   535  
   536  	source1 := "mysql-replica-01"
   537  	source2 := "mysql-replica-02"
   538  
   539  	// start incremental job
   540  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte("task-mode: dump"), []byte("task-mode: \"load&sync\""))
   541  	require.Eventually(t, func() bool {
   542  		var err error
   543  		jobID, err = e2e.CreateJobViaHTTP(ctx, masterAddr, tenantID, projectID, pb.Job_DM, dmJobCfg)
   544  		return err == nil
   545  	}, time.Second*5, time.Millisecond*100)
   546  
   547  	var jobStatus *dm.JobStatus
   548  	// wait job online
   549  	require.Eventually(t, func() bool {
   550  		jobStatus, err = queryStatus(ctx, httpClient, jobID, []string{source1, source2})
   551  		return err == nil && jobStatus.JobID == jobID
   552  	}, time.Second*5, time.Millisecond*100)
   553  	// wait sync job online
   554  	require.Eventually(t, func() bool {
   555  		jobStatus, err = queryStatus(ctx, httpClient, jobID, []string{source1, source2})
   556  		return err == nil && strings.Contains(string(jobStatus.TaskStatus[source1].Status.Status), "totalEvents")
   557  	}, time.Second*5, time.Millisecond*100)
   558  	require.Contains(t, jobStatus.TaskStatus[source2].Status.ErrorMsg, fmt.Sprintf("task %s for job not found", source2))
   559  
   560  	// check full phase
   561  	waitRow("c = 1", db)
   562  
   563  	// incremental phase
   564  	noError(mysql.Exec("insert into " + db + ".t1 values(2)"))
   565  	waitRow("c = 2", db)
   566  
   567  	// imitate an error that can auto resume
   568  	noError(tidb.Exec("drop table " + db + ".t1"))
   569  	noError(mysql.Exec("insert into " + db + ".t1 values(3)"))
   570  	time.Sleep(time.Second)
   571  	noError(tidb.Exec("create table " + db + ".t1(c int primary key)"))
   572  	time.Sleep(time.Second)
   573  
   574  	// check auto resume
   575  	waitRow("c = 3", db)
   576  
   577  	// pause task
   578  	err = operateJob(ctx, httpClient, jobID, []string{source1}, dmpkg.Pause)
   579  	require.NoError(t, err)
   580  
   581  	// eventually paused
   582  	require.Eventually(t, func() bool {
   583  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   584  		for _, task := range jobStatus.TaskStatus {
   585  			require.Greater(t, task.Status.IoTotalBytes, uint64(0))
   586  			require.Greater(t, task.Status.DumpIoTotalBytes, uint64(0))
   587  		}
   588  		require.NoError(t, err)
   589  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StagePaused
   590  	}, time.Second*10, time.Second)
   591  
   592  	// binlog schema list
   593  	binlogSchemaResp, err := getBinlogSchema(ctx, httpClient, jobID, source1, "", "")
   594  	require.NoError(t, err)
   595  	require.Len(t, binlogSchemaResp.Results, 1)
   596  	require.Equal(t, fmt.Sprintf(`["%s"]`, db), binlogSchemaResp.Results[source1].Msg)
   597  	require.Equal(t, "", binlogSchemaResp.Results[source1].ErrorMsg)
   598  
   599  	// resume task
   600  	err = operateJob(ctx, httpClient, jobID, nil, dmpkg.Resume)
   601  	require.NoError(t, err)
   602  
   603  	// eventually resumed
   604  	require.Eventually(t, func() bool {
   605  		jobStatus, err = queryStatus(ctx, httpClient, jobID, []string{source1})
   606  		require.NoError(t, err)
   607  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StageRunning
   608  	}, time.Second*10, time.Second)
   609  
   610  	// get job cfg
   611  	jobCfg, err := getJobCfg(ctx, httpClient, jobID)
   612  	require.NoError(t, err)
   613  	require.Contains(t, jobCfg, `flavor: mysql`)
   614  	require.Contains(t, jobCfg, `tidb_txn_mode: optimistic`)
   615  
   616  	noError(mysql.Exec("alter table " + db + ".t1 add column new_col int unique"))
   617  	noError(mysql.Exec("insert into " + db + ".t1 values(4,4)"))
   618  
   619  	// eventually error
   620  	require.Eventually(t, func() bool {
   621  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   622  		require.NoError(t, err)
   623  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StageError &&
   624  			strings.Contains(jobStatus.TaskStatus[source1].Status.Result.Errors[0].RawCause,
   625  				`unsupported add column 'new_col' constraint UNIQUE KEY when altering`)
   626  	}, time.Second*10, time.Second)
   627  
   628  	// binlog replace
   629  	binlogReq := &openapi.SetBinlogOperatorRequest{
   630  		Op: openapi.SetBinlogOperatorRequestOpReplace,
   631  		Sqls: &[]string{
   632  			"alter table " + db + ".t1 add column new_col int;",
   633  			"alter table " + db + ".t1 add unique(new_col);",
   634  		},
   635  	}
   636  	binlogResp, err := setBinlogOperator(ctx, httpClient, jobID, source1, binlogReq)
   637  	require.NoError(t, err)
   638  	require.Equal(t, "", binlogResp.ErrorMsg)
   639  	require.Len(t, binlogResp.Results, 1)
   640  	require.Equal(t, "", binlogResp.Results[source1].ErrorMsg)
   641  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   642  	waitRow("new_col = 4", db)
   643  
   644  	// binlog replace again
   645  	binlogResp, err = setBinlogOperator(ctx, httpClient, jobID, source1, binlogReq)
   646  	require.NoError(t, err)
   647  	require.Equal(t, "", binlogResp.ErrorMsg)
   648  	require.Len(t, binlogResp.Results, 1)
   649  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   650  	require.Equal(t, fmt.Sprintf("source '%s' has no error", source1), binlogResp.Results[source1].ErrorMsg)
   651  
   652  	// binlog get
   653  	binlogResp, err = getBinlogOperator(ctx, httpClient, jobID, source1, "")
   654  	require.NoError(t, err)
   655  	require.Equal(t, "", binlogResp.ErrorMsg)
   656  	require.Len(t, binlogResp.Results, 1)
   657  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   658  	require.Equal(t, fmt.Sprintf("source '%s' has no error", source1), binlogResp.Results[source1].ErrorMsg)
   659  
   660  	// binlog delete
   661  	binlogResp, err = deleteBinlogOperator(ctx, httpClient, jobID, source1)
   662  	require.NoError(t, err)
   663  	require.Equal(t, "", binlogResp.ErrorMsg)
   664  	require.Len(t, binlogResp.Results, 1)
   665  	require.Equal(t, "", binlogResp.Results[source1].Msg)
   666  	require.Equal(t, fmt.Sprintf("source '%s' has no error", source1), binlogResp.Results[source1].ErrorMsg)
   667  
   668  	// pause task
   669  	err = operateJob(ctx, httpClient, jobID, []string{source1}, dmpkg.Pause)
   670  	require.NoError(t, err)
   671  	// eventually paused
   672  	require.Eventually(t, func() bool {
   673  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   674  		require.NoError(t, err)
   675  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StagePaused
   676  	}, time.Second*10, time.Second)
   677  	// set binlog schema
   678  	fromSource := true
   679  	binlogSchemaReq := &openapi.SetBinlogSchemaRequest{
   680  		Database:   db,
   681  		Table:      "t1",
   682  		FromSource: &fromSource,
   683  	}
   684  	binlogSchemaResp, err = setBinlogSchema(ctx, httpClient, jobID, source1, binlogSchemaReq)
   685  	require.NoError(t, err)
   686  	require.Equal(t, "", binlogSchemaResp.ErrorMsg)
   687  	require.Equal(t, "", binlogSchemaResp.Results[source1].ErrorMsg)
   688  	require.Equal(t, "", binlogSchemaResp.Results[source1].Msg)
   689  	// get new binlog schema
   690  	binlogSchemaResp, err = getBinlogSchema(ctx, httpClient, jobID, source1, db, "t1")
   691  	require.NoError(t, err)
   692  	require.Len(t, binlogSchemaResp.Results, 1)
   693  	require.Equal(t, "CREATE TABLE `t1` ( `c` int(11) NOT NULL, `new_col` int(11) DEFAULT NULL, PRIMARY KEY (`c`) /*T![clustered_index] CLUSTERED */, UNIQUE KEY `new_col` (`new_col`)) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin", binlogSchemaResp.Results[source1].Msg)
   694  	require.Equal(t, "", binlogSchemaResp.Results[source1].ErrorMsg)
   695  
   696  	// update with new balist
   697  	newDB := "new_" + db
   698  	dmJobCfg = bytes.ReplaceAll(dmJobCfg, []byte(fmt.Sprintf(`["%s"]`, db)), []byte(fmt.Sprintf(`["%s", "%s"]`, db, newDB)))
   699  	err = updateJobCfg(ctx, httpClient, jobID, string(dmJobCfg))
   700  	require.NoError(t, err)
   701  	// get new config
   702  	jobCfg, err = getJobCfg(ctx, httpClient, jobID)
   703  	require.NoError(t, err)
   704  	require.Contains(t, jobCfg, newDB)
   705  	require.Contains(t, jobCfg, `mod-revision: 1`)
   706  	// eventually apply new config, task still paused
   707  	require.Eventually(t, func() bool {
   708  		jobStatus, err = queryStatus(ctx, httpClient, jobID, nil)
   709  		return err == nil && !jobStatus.TaskStatus[source1].ConfigOutdated && jobStatus.TaskStatus[source1].Status.Stage == metadata.StagePaused
   710  	}, time.Second*30, time.Second)
   711  
   712  	// resume task
   713  	err = operateJob(ctx, httpClient, jobID, nil, dmpkg.Resume)
   714  	require.NoError(t, err)
   715  	// eventually resumed
   716  	require.Eventually(t, func() bool {
   717  		jobStatus, err = queryStatus(ctx, httpClient, jobID, []string{source1})
   718  		require.NoError(t, err)
   719  		return jobStatus.TaskStatus[source1].Status.Stage == metadata.StageRunning
   720  	}, time.Second*10, time.Second)
   721  
   722  	noError(mysql.Exec("create database " + newDB))
   723  	noError(mysql.Exec("create table " + newDB + ".t1(c int primary key)"))
   724  	noError(mysql.Exec("insert into " + newDB + ".t1 values(1)"))
   725  	waitRow("c = 1", newDB)
   726  }
   727  
   728  func queryStatus(ctx context.Context, client *httputil.Client, jobID string, tasks []string) (*dm.JobStatus, error) {
   729  	ctx, cancel := context.WithTimeout(ctx, time.Second*3)
   730  	defer cancel()
   731  	u := fmt.Sprintf(baseURL+"/status", jobID)
   732  	v := url.Values{}
   733  	for _, task := range tasks {
   734  		v.Add("tasks", task)
   735  	}
   736  	u += "?" + v.Encode()
   737  	resp, err := client.Get(ctx, u)
   738  	if err != nil {
   739  		return nil, err
   740  	}
   741  
   742  	respBody, err := io.ReadAll(resp.Body)
   743  	if err != nil {
   744  		return nil, err
   745  	}
   746  	err = resp.Body.Close()
   747  	if err != nil {
   748  		return nil, err
   749  	}
   750  	if resp.StatusCode/100 != 2 {
   751  		return nil, fmt.Errorf("status code %d, body %s", resp.StatusCode, string(respBody))
   752  	}
   753  
   754  	var jobStatus dm.JobStatus
   755  	err = json.Unmarshal(respBody, &jobStatus)
   756  	return &jobStatus, err
   757  }
   758  
   759  func operateJob(
   760  	ctx context.Context, client *httputil.Client, jobID string, tasks []string,
   761  	op dmpkg.OperateType,
   762  ) error {
   763  	operateJobReq := &openapi.OperateJobRequest{
   764  		Tasks: &tasks,
   765  	}
   766  	switch op {
   767  	case dmpkg.Pause:
   768  		operateJobReq.Op = openapi.OperateJobRequestOpPause
   769  	case dmpkg.Resume:
   770  		operateJobReq.Op = openapi.OperateJobRequestOpResume
   771  	}
   772  
   773  	url := fmt.Sprintf(baseURL+"/status", jobID)
   774  	header := http.Header{"Content-Type": {"application/json"}}
   775  	bs, err := json.Marshal(operateJobReq)
   776  	if err != nil {
   777  		return err
   778  	}
   779  	_, err = client.DoRequest(ctx, url, http.MethodPut, header, bytes.NewReader(bs))
   780  	return err
   781  }
   782  
   783  func getJobCfg(ctx context.Context, client *httputil.Client, jobID string) (string, error) {
   784  	resp, err := client.Get(ctx, fmt.Sprintf(baseURL+"/config", jobID))
   785  	if err != nil {
   786  		return "", err
   787  	}
   788  
   789  	respBody, err := io.ReadAll(resp.Body)
   790  	if err != nil {
   791  		return "", err
   792  	}
   793  	err = resp.Body.Close()
   794  	if err != nil {
   795  		return "", err
   796  	}
   797  	var jobCfg string
   798  	err = json.Unmarshal(respBody, &jobCfg)
   799  	return jobCfg, err
   800  }
   801  
   802  func updateJobCfg(ctx context.Context, client *httputil.Client, jobID string, cfg string) error {
   803  	url := fmt.Sprintf(baseURL+"/config", jobID)
   804  	req := openapi.UpdateJobConfigRequest{
   805  		Config: cfg,
   806  	}
   807  	bs, err := json.Marshal(req)
   808  	if err != nil {
   809  		return err
   810  	}
   811  	header := http.Header{"Content-Type": {"application/json"}}
   812  	_, err = client.DoRequest(ctx, url, http.MethodPut, header, bytes.NewReader(bs))
   813  	return err
   814  }
   815  
   816  func getBinlogOperator(ctx context.Context, client *httputil.Client, jobID string,
   817  	task string, binlogPos string,
   818  ) (*dmpkg.BinlogResponse, error) {
   819  	u := fmt.Sprintf(baseURL+"/binlog/tasks/%s", jobID, task)
   820  	v := url.Values{}
   821  	if binlogPos != "" {
   822  		v.Add("binlog_pos", binlogPos)
   823  	}
   824  	u += "?" + v.Encode()
   825  	resp, err := client.Get(ctx, u)
   826  	if err != nil {
   827  		return nil, err
   828  	}
   829  
   830  	respBody, err := io.ReadAll(resp.Body)
   831  	if err != nil {
   832  		return nil, err
   833  	}
   834  	err = resp.Body.Close()
   835  	if err != nil {
   836  		return nil, err
   837  	}
   838  	var binlogResp dmpkg.BinlogResponse
   839  	err = json.Unmarshal(respBody, &binlogResp)
   840  	return &binlogResp, err
   841  }
   842  
   843  func setBinlogOperator(ctx context.Context, client *httputil.Client, jobID string,
   844  	task string, req *openapi.SetBinlogOperatorRequest,
   845  ) (*dmpkg.BinlogResponse, error) {
   846  	bs, err := json.Marshal(req)
   847  	if err != nil {
   848  		return nil, err
   849  	}
   850  	url := fmt.Sprintf(baseURL+"/binlog/tasks/%s", jobID, task)
   851  	header := http.Header{"Content-Type": {"application/json"}}
   852  	respBody, err := client.DoRequest(ctx, url, http.MethodPost, header, bytes.NewReader(bs))
   853  	if err != nil {
   854  		return nil, err
   855  	}
   856  
   857  	var binlogResp dmpkg.BinlogResponse
   858  	err = json.Unmarshal(respBody, &binlogResp)
   859  	return &binlogResp, err
   860  }
   861  
   862  func deleteBinlogOperator(ctx context.Context, client *httputil.Client, jobID string,
   863  	task string,
   864  ) (*dmpkg.BinlogResponse, error) {
   865  	url := fmt.Sprintf(baseURL+"/binlog/tasks/%s", jobID, task)
   866  	respBody, err := client.DoRequest(ctx, url, http.MethodDelete, nil, nil)
   867  	if err != nil {
   868  		return nil, err
   869  	}
   870  	var binlogResp dmpkg.BinlogResponse
   871  	err = json.Unmarshal(respBody, &binlogResp)
   872  	return &binlogResp, err
   873  }
   874  
   875  func getBinlogSchema(ctx context.Context, client *httputil.Client, jobID string,
   876  	task string, schema string, table string,
   877  ) (*dmpkg.BinlogSchemaResponse, error) {
   878  	u := fmt.Sprintf(baseURL+"/schema/tasks/%s", jobID, task)
   879  	v := url.Values{}
   880  	if schema != "" {
   881  		v.Add("database", schema)
   882  	}
   883  	if table != "" {
   884  		v.Add("table", table)
   885  	}
   886  	u += "?" + v.Encode()
   887  	resp, err := client.Get(ctx, u)
   888  	if err != nil {
   889  		return nil, err
   890  	}
   891  
   892  	respBody, err := io.ReadAll(resp.Body)
   893  	if err != nil {
   894  		return nil, err
   895  	}
   896  	err = resp.Body.Close()
   897  	if err != nil {
   898  		return nil, err
   899  	}
   900  	var binlogSchemaResp dmpkg.BinlogSchemaResponse
   901  	err = json.Unmarshal(respBody, &binlogSchemaResp)
   902  	return &binlogSchemaResp, err
   903  }
   904  
   905  func setBinlogSchema(
   906  	ctx context.Context, client *httputil.Client, jobID string, task string,
   907  	req *openapi.SetBinlogSchemaRequest,
   908  ) (*dmpkg.BinlogSchemaResponse, error) {
   909  	url := fmt.Sprintf(baseURL+"/schema/tasks/%s", jobID, task)
   910  	bs, err := json.Marshal(req)
   911  	if err != nil {
   912  		return nil, err
   913  	}
   914  
   915  	header := http.Header{"Content-Type": {"application/json"}}
   916  	respBody, err := client.DoRequest(ctx, url, http.MethodPut, header, bytes.NewReader(bs))
   917  	if err != nil {
   918  		return nil, err
   919  	}
   920  	var binlogSchemaResp dmpkg.BinlogSchemaResponse
   921  	err = json.Unmarshal(respBody, &binlogSchemaResp)
   922  	return &binlogSchemaResp, err
   923  }