github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/checkpoint_transaction/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $cur/../_utils/test_prepare 7 WORK_DIR=$TEST_DIR/$TEST_NAME 8 9 function check_worker_ungraceful_stop_with_retry() { 10 for ((k = 0; k < 10; k++)); do 11 sleep 1 12 echo "start check_worker_ungraceful_stop_with_retry times: $k" 13 14 num=$(grep "kill unit" $WORK_DIR/worker1/log/dm-worker.log | wc -l) 15 if [ $num -lt 1 ]; then 16 continue 17 fi 18 num=$(grep "kill syncer without graceful" $WORK_DIR/worker1/log/dm-worker.log | wc -l) 19 if [ $num -lt 1 ]; then 20 continue 21 fi 22 num=$(grep "received ungraceful exit ctx, exit now" $WORK_DIR/worker1/log/dm-worker.log | wc -l) 23 if [ $num -lt 1 ]; then 24 continue 25 fi 26 echo "check_worker_ungraceful_stop_with_retry success after retry: $k" 27 return 0 28 done 29 30 echo "check_worker_ungraceful_stop_with_retry failed after retry" 31 exit 1 32 } 33 34 function run() { 35 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/syncer/checkCheckpointInMiddleOfTransaction=return" 36 37 run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 38 check_contains 'Query OK, 1 row affected' 39 40 # run dm master 41 run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml 42 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT 43 check_metric $MASTER_PORT 'start_leader_counter' 3 0 2 44 45 # bound source1 to worker1 46 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 47 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 48 dmctl_operate_source create $cur/conf/source1.yaml $SOURCE_ID1 49 50 # start a task in all mode 51 dmctl_start_task_standalone $cur/conf/dm-task.yaml 52 53 # check diff 54 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 55 56 # test ungraceful stop, worker will not wait transaction finish 57 run_sql_file $cur/data/db1.increment1.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 58 sleep 2 59 # kill dm-master 1 to make worker lost keep alive while a transaction is not finished 60 echo "kill dm-master1" 61 kill_dm_master 62 check_master_port_offline 1 63 sleep 1 # wait worker lost keep alive ttl is 1 second 64 65 # check dm-worker will exit quickly without waiting for the transaction to finish 66 check_worker_ungraceful_stop_with_retry 67 68 # test data in tidb less than source 69 dataCountSource=$(mysql -uroot -h$MYSQL_HOST1 -P$MYSQL_PORT1 -p$MYSQL_PASSWORD1 -se "select count(1) from checkpoint_transaction.t1") 70 dataCountInTiDB=$(mysql -uroot -h127.0.0.1 -P4000 -se "select count(1) from checkpoint_transaction.t1") 71 echo "after ungraceful exit data in source count: $dataCountSource data in tidb count: $dataCountInTiDB" 72 if [ "$dataCountInTiDB" -lt "$dataCountSource" ]; then 73 echo "ungraceful stop test success" 74 else 75 echo "ungraceful stop test failed" 76 exit 1 77 fi 78 79 # start dm-master again task will be resume, and data will be synced 80 run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml 81 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT 82 sleep 3 83 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 84 run_sql_file $cur/data/db1.increment1.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 85 # wait transaction start 86 check_log_contain_with_retry "\[32,30,null\]" $WORK_DIR/worker1/log/dm-worker.log 87 echo "pause task and check status" 88 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 89 "pause-task test" \ 90 "\"result\": true" 2 91 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 92 "query-status test" \ 93 "\"stage\": \"Paused\"" 1 94 # check the point is the middle of checkpoint 95 num=$(grep "not receive xid job yet" $WORK_DIR/worker1/log/dm-worker.log | wc -l) 96 97 if [ "$num" -gt 0 ]; then 98 echo "graceful pause test success" 99 else 100 echo "graceful pause test failed" 101 exit 1 102 fi 103 104 echo "start check pause diff" 105 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 106 107 echo "resume task and check status" 108 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 109 "resume-task test" \ 110 "\"result\": true" 2 111 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 112 "query-status test" \ 113 "\"stage\": \"Running\"" 1 114 115 echo "kill dm-worker1" 116 kill_process dm-worker1 117 check_port_offline $WORKER1_PORT 20 118 rm -rf $WORK_DIR/worker1 119 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 120 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 121 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 122 "query-status test" \ 123 "\"stage\": \"Running\"" 1 124 125 run_sql_file $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 126 # wait transaction start 127 check_log_contain_with_retry "\[62,null,30\]" $WORK_DIR/worker1/log/dm-worker.log 128 echo "stop task" 129 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 130 "stop-task test" \ 131 "\"result\": true" 2 132 # check the point is the middle of checkpoint 133 num=$(grep "not receive xid job yet" $WORK_DIR/worker1/log/dm-worker.log | wc -l) 134 if [ "$num" -gt 0 ]; then 135 echo "graceful stop test success" 136 else 137 echo "graceful stop test failed" 138 exit 1 139 fi 140 141 echo "start check stop diff" 142 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 143 144 export GO_FAILPOINTS="" 145 } 146 147 cleanup_data checkpoint_transaction 148 # also cleanup dm processes in case of last run failed 149 cleanup_process 150 run 151 cleanup_process 152 153 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"