github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/checkpoint_transaction/run.sh (about)

     1  #!/bin/bash
     2  
     3  set -eu
     4  
     5  cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
     6  source $cur/../_utils/test_prepare
     7  WORK_DIR=$TEST_DIR/$TEST_NAME
     8  
     9  function check_worker_ungraceful_stop_with_retry() {
    10  	for ((k = 0; k < 10; k++)); do
    11  		sleep 1
    12  		echo "start check_worker_ungraceful_stop_with_retry times: $k"
    13  
    14  		num=$(grep "kill unit" $WORK_DIR/worker1/log/dm-worker.log | wc -l)
    15  		if [ $num -lt 1 ]; then
    16  			continue
    17  		fi
    18  		num=$(grep "kill syncer without graceful" $WORK_DIR/worker1/log/dm-worker.log | wc -l)
    19  		if [ $num -lt 1 ]; then
    20  			continue
    21  		fi
    22  		num=$(grep "received ungraceful exit ctx, exit now" $WORK_DIR/worker1/log/dm-worker.log | wc -l)
    23  		if [ $num -lt 1 ]; then
    24  			continue
    25  		fi
    26  		echo "check_worker_ungraceful_stop_with_retry success after retry: $k"
    27  		return 0
    28  	done
    29  
    30  	echo "check_worker_ungraceful_stop_with_retry failed after retry"
    31  	exit 1
    32  }
    33  
    34  function run() {
    35  	export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/syncer/checkCheckpointInMiddleOfTransaction=return"
    36  
    37  	run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
    38  	check_contains 'Query OK, 1 row affected'
    39  
    40  	# run dm master
    41  	run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml
    42  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT
    43  	check_metric $MASTER_PORT 'start_leader_counter' 3 0 2
    44  
    45  	# bound source1 to worker1
    46  	run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
    47  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
    48  	dmctl_operate_source create $cur/conf/source1.yaml $SOURCE_ID1
    49  
    50  	# start a task in all mode
    51  	dmctl_start_task_standalone $cur/conf/dm-task.yaml
    52  
    53  	# check diff
    54  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
    55  
    56  	# test ungraceful stop, worker will not wait transaction finish
    57  	run_sql_file $cur/data/db1.increment1.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
    58  	sleep 2
    59  	# kill dm-master 1 to make worker lost keep alive while a transaction is not finished
    60  	echo "kill dm-master1"
    61  	kill_dm_master
    62  	check_master_port_offline 1
    63  	sleep 1 # wait worker lost keep alive ttl is 1 second
    64  
    65  	# check dm-worker will exit quickly without waiting for the transaction to finish
    66  	check_worker_ungraceful_stop_with_retry
    67  
    68  	# test data in tidb less than source
    69  	dataCountSource=$(mysql -uroot -h$MYSQL_HOST1 -P$MYSQL_PORT1 -p$MYSQL_PASSWORD1 -se "select count(1) from checkpoint_transaction.t1")
    70  	dataCountInTiDB=$(mysql -uroot -h127.0.0.1 -P4000 -se "select count(1) from checkpoint_transaction.t1")
    71  	echo "after ungraceful exit data in source count: $dataCountSource data in tidb count: $dataCountInTiDB"
    72  	if [ "$dataCountInTiDB" -lt "$dataCountSource" ]; then
    73  		echo "ungraceful stop test success"
    74  	else
    75  		echo "ungraceful stop test failed"
    76  		exit 1
    77  	fi
    78  
    79  	# start dm-master again task will be resume, and data will be synced
    80  	run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml
    81  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT
    82  	sleep 3
    83  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
    84  	run_sql_file $cur/data/db1.increment1.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
    85  	# wait transaction start
    86  	check_log_contain_with_retry "\[32,30,null\]" $WORK_DIR/worker1/log/dm-worker.log
    87  	echo "pause task and check status"
    88  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    89  		"pause-task test" \
    90  		"\"result\": true" 2
    91  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    92  		"query-status test" \
    93  		"\"stage\": \"Paused\"" 1
    94  	# check the point is the middle of checkpoint
    95  	num=$(grep "not receive xid job yet" $WORK_DIR/worker1/log/dm-worker.log | wc -l)
    96  
    97  	if [ "$num" -gt 0 ]; then
    98  		echo "graceful pause test success"
    99  	else
   100  		echo "graceful pause test failed"
   101  		exit 1
   102  	fi
   103  
   104  	echo "start check pause diff"
   105  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
   106  
   107  	echo "resume task and check status"
   108  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   109  		"resume-task test" \
   110  		"\"result\": true" 2
   111  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   112  		"query-status test" \
   113  		"\"stage\": \"Running\"" 1
   114  
   115  	echo "kill dm-worker1"
   116  	kill_process dm-worker1
   117  	check_port_offline $WORKER1_PORT 20
   118  	rm -rf $WORK_DIR/worker1
   119  	run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
   120  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
   121  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   122  		"query-status test" \
   123  		"\"stage\": \"Running\"" 1
   124  
   125  	run_sql_file $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
   126  	# wait transaction start
   127  	check_log_contain_with_retry "\[62,null,30\]" $WORK_DIR/worker1/log/dm-worker.log
   128  	echo "stop task"
   129  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   130  		"stop-task test" \
   131  		"\"result\": true" 2
   132  	# check the point is the middle of checkpoint
   133  	num=$(grep "not receive xid job yet" $WORK_DIR/worker1/log/dm-worker.log | wc -l)
   134  	if [ "$num" -gt 0 ]; then
   135  		echo "graceful stop test success"
   136  	else
   137  		echo "graceful stop test failed"
   138  		exit 1
   139  	fi
   140  
   141  	echo "start check stop diff"
   142  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
   143  
   144  	export GO_FAILPOINTS=""
   145  }
   146  
   147  cleanup_data checkpoint_transaction
   148  # also cleanup dm processes in case of last run failed
   149  cleanup_process
   150  run
   151  cleanup_process
   152  
   153  echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"