github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/retry_cancel/run.sh.todo (about) 1 #!/bin/bash 2 # TODO: this case can't run under new HA model, already remove from `other_integratin.txt`, add it back when supported. 3 4 set -eu 5 6 cur=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 7 source $cur/../_utils/test_prepare 8 WORK_DIR=$TEST_DIR/$TEST_NAME 9 10 function run() { 11 run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 12 check_contains 'Query OK, 2 rows affected' 13 run_sql_file $cur/data/db2.prepare.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 14 check_contains 'Query OK, 2 rows affected' 15 16 # inject error for loading data 17 export GO_FAILPOINTS='github.com/pingcap/tiflow/dm/pkg/conn/retryableError=return("retry_cancel")' 18 19 run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml 20 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT 21 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 22 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 23 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 24 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 25 # operate mysql config to worker 26 cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml 27 cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml 28 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml 29 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml 30 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 31 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 32 33 # start-task with retry_cancel enabled 34 echo "1st time to start task" 35 dmctl_start_task 36 37 sleep 5 # should sleep > retryTimeout (now 3s) 38 39 # query-task, it should still be running (retrying) 40 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 41 "query-status test" \ 42 "\"stage\": \"Running\"" 4 43 44 # check log, retrying in load unit 45 check_log_contains $WORK_DIR/worker1/log/dm-worker.log '\["execute statements"\] \[task=test\] \[unit=load\] \[retry=0\] \[queries="\[CREATE DATABASE `retry_cancel`;\]"\]' 46 47 # stop-task, should not block too much time 48 start_time=$(date +%s) 49 dmctl_stop_task test 50 duration=$(( $(date +%s)-$start_time )) 51 if [[ $duration -gt 3 ]]; then 52 echo "stop-task tasks for full import too long duration $duration" 53 exit 1 54 fi 55 56 # stop DM-worker, then update failpoint for checkpoint 57 kill_dm_worker 58 export GO_FAILPOINTS='github.com/pingcap/tiflow/dm/pkg/conn/retryableError=return("UPDATE `dm_meta`.`test_loader_checkpoint`")' 59 60 # start DM-worker again 61 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 62 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 63 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 64 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 65 sleep 5 # wait gRPC from DM-master to DM-worker established again 66 67 echo "2nd time to start task" 68 dmctl_start_task 69 70 sleep 5 # should sleep > retryTimeout (now 3s) 71 72 # query-task, it should still be running (retrying) 73 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 74 "query-status test" \ 75 "\"stage\": \"Running\"" 4 76 77 # check log, retrying in load unit 78 check_log_contains $WORK_DIR/worker1/log/dm-worker.log 'Error 1213: failpoint inject retryable error for UPDATE `dm_meta`.`test_loader_checkpoint`' 79 80 # stop-task, should not block too much time 81 start_time=$(date +%s) 82 dmctl_stop_task test 83 duration=$(( $(date +%s)-$start_time )) 84 if [[ $duration -gt 3 ]]; then 85 echo "stop-task tasks for updating loader checkpoint too long duration $duration" 86 exit 1 87 fi 88 89 # stop DM-worker, then disable failponits 90 kill_dm_worker 91 export GO_FAILPOINTS='' 92 93 # start DM-worker again 94 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 95 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 96 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 97 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 98 sleep 5 # wait gRPC from DM-master to DM-worker established again 99 100 # start-task with retry_cancel disabled 101 echo "3st time to start task" 102 dmctl_start_task 103 104 # use sync_diff_inspector to check full dump loader 105 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 106 107 # ---------- test for incremental replication ---------- 108 # stop DM-worker, then enable failponits 109 kill_dm_worker 110 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/pkg/conn/retryableError=return(\"retry_cancel\")" 111 112 # run sql files to trigger incremental replication 113 run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 114 run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 115 116 # start DM-worker again 117 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 118 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 119 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 120 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 121 122 sleep 5 123 echo "start task for incremental replication" 124 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 125 "start-task $cur/conf/dm-task.yaml" \ 126 "\"result\": true" 1 \ 127 "start sub task test: sub task test already exists" 2 128 129 sleep 5 # should sleep > retryTimeout (now 3s) 130 131 # query-task, it should still be running (retrying) 132 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 133 "query-status test" \ 134 "\"stage\": \"Running\"" 4 135 136 # check log, retrying in binlog replication unit 137 check_log_contains $WORK_DIR/worker1/log/dm-worker.log '\["execute statements"\] \[task=test\] \[unit="binlog replication"\] \[retry=0\] \[queries="\[REPLACE INTO `retry_cancel`' 138 139 # stop-task, should not block too much time 140 start_time=$(date +%s) 141 dmctl_stop_task test 142 duration=$(( $(date +%s)-$start_time )) 143 if [[ $duration -gt 3 ]]; then 144 echo "stop-task tasks for incremental replication too long duration $duration" 145 exit 1 146 fi 147 148 # stop DM-worker, then disable failponits 149 kill_dm_worker 150 export GO_FAILPOINTS='' 151 152 # start DM-worker again 153 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 154 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 155 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 156 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 157 sleep 5 # wait gRPC from DM-master to DM-worker established again 158 159 # start-task with retry_cancel disabled 160 echo "5th time to start task" 161 dmctl_start_task 162 163 # use sync_diff_inspector to check data now! 164 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 165 } 166 167 cleanup_data retry_cancel 168 # also cleanup dm processes in case of last run failed 169 cleanup_process $* 170 run $* 171 cleanup_process $* 172 173 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"