github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/lightning_load_task/run.sh (about)

     1  #!/bin/bash
     2  
     3  set -eu
     4  
     5  cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
     6  DATA_DIR=$cur/../load_task/data
     7  CONF_DIR=$cur/../load_task/conf
     8  source $cur/../_utils/test_prepare
     9  WORK_DIR=$TEST_DIR/$TEST_NAME
    10  API_VERSION="v1alpha1"
    11  WORKER1="worker1"
    12  WORKER2="worker2"
    13  WORKER3="worker3"
    14  
    15  function test_worker_restart() {
    16  	echo "test worker restart"
    17  	# worker1 offline
    18  	kill_process dm-worker1
    19  	check_port_offline $WORKER1_PORT 20
    20  
    21  	# source1 is bound to worker3
    22  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    23  		"list-member -w -n worker3" \
    24  		"\"stage\": \"bound\"" 1 \
    25  		"\"source\": \"mysql-replica-01\"" 1
    26  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    27  		"list-member -w -n worker1" \
    28  		"\"stage\": \"offline\"" 1
    29  
    30  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    31  		"query-status load_task1" \
    32  		"different worker in load stage, previous worker: $WORKER1, current worker: $WORKER3" 1 \
    33  		"Please check if the previous worker is online." 1
    34  
    35  	# worker1 online
    36  	export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task1\")"
    37  	run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
    38  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
    39  
    40  	# transfer to worker1
    41  	check_log_contain_with_retry 'transfer source and worker.*worker1.*worker3.*mysql-replica-01' $WORK_DIR/master/log/dm-master.log
    42  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    43  		"list-member -w -n worker3" \
    44  		"\"stage\": \"free\"" 1
    45  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    46  		"list-member -w -n worker1" \
    47  		"\"stage\": \"bound\"" 1 \
    48  		"\"source\": \"mysql-replica-01\"" 1
    49  
    50  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    51  		"query-status load_task1" \
    52  		"\"unit\": \"Load\"" 1 \
    53  		"\"unit\": \"Sync\"" 1
    54  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    55  		"query-status load_task2" \
    56  		"\"unit\": \"Load\"" 1 \
    57  		"\"unit\": \"Sync\"" 1
    58  }
    59  
    60  # almost never happen since user hardly start a load task after another load task failed.
    61  function test_transfer_two_sources() {
    62  	echo "test_transfer_two_sources"
    63  	# worker2 offline
    64  	kill_process dm-worker2
    65  	check_port_offline $WORKER2_PORT 20
    66  
    67  	# source2 bound to worker3
    68  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    69  		"list-member -w -n worker3" \
    70  		"\"stage\": \"bound\"" 1 \
    71  		"\"source\": \"mysql-replica-02\"" 1
    72  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    73  		"query-status load_task2" \
    74  		"different worker in load stage, previous worker: $WORKER2, current worker: $WORKER3" 1
    75  
    76  	# start load task for worker3
    77  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    78  		"start-task $cur/conf/dm-task3.yaml --remove-meta" \
    79  		"\"result\": true" 2
    80  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    81  		"query-status load_task3" \
    82  		"\"unit\": \"Load\"" 1
    83  
    84  	# worker2 online
    85  	export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDown=sleep(15000)"
    86  	run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
    87  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
    88  
    89  	# worker2 free since (worker3, source2) has load task(load_task3)
    90  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    91  		"list-member -w -n worker2" \
    92  		"\"stage\": \"free\"" 1
    93  
    94  	# worker1 offline
    95  	kill_process dm-worker1
    96  	check_port_offline $WORKER1_PORT 20
    97  
    98  	# source1 is bound to worker2
    99  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   100  		"list-member -w -n worker2" \
   101  		"\"stage\": \"bound\"" 1 \
   102  		"\"source\": \"mysql-replica-01\"" 1
   103  
   104  	# start load_task4 on worker2
   105  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   106  		"start-task $cur/conf/dm-task4.yaml --remove-meta" \
   107  		"\"result\": true" 2
   108  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   109  		"query-status load_task4" \
   110  		"\"unit\": \"Load\"" 1
   111  
   112  	# worker1 online
   113  	export GO_FAILPOINTS=""
   114  	run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
   115  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
   116  
   117  	# worker1 free since (worker2, source1) has load task(load_task4)
   118  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   119  		"list-member -w -n worker1" \
   120  		"\"stage\": \"free\"" 1
   121  
   122  	# now, worker2 waiting worker3 finish load_task3, worker1 waiting worker2 finish load_task4
   123  	# worker3 offline
   124  	kill_process dm-worker3
   125  	check_port_offline $WORKER3_PORT 20
   126  
   127  	# source2 is bound to worker1
   128  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   129  		"list-member -w -n worker1" \
   130  		"\"stage\": \"bound\"" 1 \
   131  		"\"source\": \"mysql-replica-02\"" 1
   132  
   133  	# (worker1, source2), (worker2, source1)
   134  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   135  		"query-status load_task1" \
   136  		"different worker in load stage, previous worker: $WORKER1, current worker: $WORKER2" 1
   137  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   138  		"query-status load_task2" \
   139  		"different worker in load stage, previous worker: $WORKER2, current worker: $WORKER1" 1
   140  
   141  	# worker2 finish load_task4
   142  	# master transfer (worker1, source2), (worker2, source1) to (worker1, source1), (worker2, source2)
   143  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   144  		"list-member -w -n worker1" \
   145  		"\"stage\": \"bound\"" 1 \
   146  		"\"source\": \"mysql-replica-01\"" 1
   147  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   148  		"list-member -w -n worker2" \
   149  		"\"stage\": \"bound\"" 1 \
   150  		"\"source\": \"mysql-replica-02\"" 1
   151  
   152  	# task1, 2, 4 running, task3 fail
   153  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   154  		"query-status" \
   155  		"\"taskStatus\": \"Running\"" 3 \
   156  		"taskStatus.*Error" 1
   157  
   158  	# worker3 online
   159  	export GO_FAILPOINTS=""
   160  	run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
   161  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT
   162  
   163  	# source2 is bound to worker3 since load_task3
   164  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   165  		"list-member -w -n worker2" \
   166  		"\"stage\": \"bound\"" 1 \
   167  		"\"source\": \"mysql-replica-02\"" 1
   168  
   169  	# all task running
   170  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   171  		"query-status" \
   172  		"\"taskStatus\": \"Running\"" 4
   173  }
   174  
   175  function run() {
   176  	echo "import prepare data"
   177  	run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
   178  	check_contains 'Query OK, 2 rows affected'
   179  	run_sql_file $cur/data/db2.prepare.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2
   180  	check_contains 'Query OK, 3 rows affected'
   181  
   182  	echo "start DM master, workers and sources"
   183  	run_dm_master $WORK_DIR/master $MASTER_PORT1 $cur/conf/dm-master.toml
   184  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1
   185  
   186  	# worker1 loading load_task1
   187  	export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task1\")"
   188  	run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
   189  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
   190  	cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml
   191  	sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml
   192  	dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1
   193  
   194  	# worker2 loading load_task2
   195  	export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task2\")"
   196  	run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
   197  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
   198  	cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml
   199  	sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml
   200  	dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2
   201  
   202  	# worker3 loading load_task3
   203  	export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task3\")"
   204  	run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
   205  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT
   206  
   207  	echo "start DM task"
   208  	dmctl_start_task "$cur/conf/dm-task.yaml" "--remove-meta"
   209  	dmctl_start_task "$cur/conf/dm-task2.yaml" "--remove-meta"
   210  
   211  	check_log_contain_with_retry 'inject failpoint LoadDataSlowDownByTask in lightning loader' $WORK_DIR/worker1/log/dm-worker.log
   212  	check_log_contain_with_retry 'inject failpoint LoadDataSlowDownByTask in lightning loader' $WORK_DIR/worker2/log/dm-worker.log
   213  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   214  		"query-status load_task1" \
   215  		"\"unit\": \"Load\"" 1 \
   216  		"\"unit\": \"Sync\"" 1
   217  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   218  		"query-status load_task2" \
   219  		"\"unit\": \"Load\"" 1 \
   220  		"\"unit\": \"Sync\"" 1
   221  
   222  	test_worker_restart
   223  
   224  	test_transfer_two_sources
   225  
   226  	run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
   227  	run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2
   228  	check_sync_diff $WORK_DIR $cur/conf/diff_config1.toml
   229  	check_sync_diff $WORK_DIR $cur/conf/diff_config2.toml
   230  	check_sync_diff $WORK_DIR $cur/conf/diff_config3.toml
   231  	check_sync_diff $WORK_DIR $cur/conf/diff_config4.toml
   232  }
   233  
   234  cleanup_data load_task1
   235  cleanup_data load_task2
   236  cleanup_data load_task3
   237  cleanup_data load_task4
   238  # also cleanup dm processes in case of last run failed
   239  cleanup_process $*
   240  run $*
   241  cleanup_process $*
   242  
   243  echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"