github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/ha_cases_1/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $cur/../_utils/test_prepare 7 WORK_DIR=$TEST_DIR/$TEST_NAME 8 API_VERSION="v1alpha1" 9 # import helper functions 10 source $cur/../_utils/ha_cases_lib.sh 11 12 function test_running() { 13 echo "[$(date)] <<<<<< start test_running >>>>>>" 14 cleanup 15 prepare_sql 16 start_cluster 17 18 # make sure task to step in "Sync" stage 19 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 20 "query-status test" \ 21 "\"stage\": \"Running\"" 2 \ 22 "\"unit\": \"Sync\"" 2 23 24 echo "use sync_diff_inspector to check full dump loader" 25 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 26 27 echo "flush logs to force rotate binlog file" 28 run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1 29 run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2 30 31 echo "apply increment data before restart dm-worker to ensure entering increment phase" 32 run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test 33 run_sql_file_withdb $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test 34 35 sleep 3 # wait for flush checkpoint 36 echo "use sync_diff_inspector to check increment data" 37 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 38 echo "[$(date)] <<<<<< finish test_running >>>>>>" 39 } 40 41 function test_kill_master() { 42 echo "[$(date)] <<<<<< start test_kill_master >>>>>>" 43 test_running 44 45 echo "kill dm-master1" 46 kill_process dm-master1 47 check_master_port_offline 1 48 rm -rf $WORK_DIR/master1/default.master1 49 50 echo "waiting 5 seconds" 51 sleep 5 52 echo "check task is running" 53 check_http_alive 127.0.0.1:$MASTER_PORT2/apis/${API_VERSION}/status/test '"stage": "Running"' 10 54 55 echo "check master2,3 are running" 56 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \ 57 "query-status test" \ 58 "\"stage\": \"Running\"" 2 59 60 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 61 "query-status test" \ 62 "\"stage\": \"Running\"" 2 63 64 run_sql_file_withdb $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test 65 run_sql_file_withdb $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test 66 sleep 2 67 68 echo "use sync_diff_inspector to check increment2 data now!" 69 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 70 echo "[$(date)] <<<<<< finish test_kill_master >>>>>>" 71 } 72 73 function test_kill_and_isolate_worker() { 74 inject_points=("github.com/pingcap/tiflow/dm/worker/defaultKeepAliveTTL=return(1)" 75 "github.com/pingcap/tiflow/dm/worker/defaultRelayKeepAliveTTL=return(2)" 76 ) 77 export GO_FAILPOINTS="$(join_string \; ${inject_points[@]})" 78 echo "[$(date)] <<<<<< start test_kill_and_isolate_worker >>>>>>" 79 test_running 80 81 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 82 "start-relay -s $SOURCE_ID2 worker2" \ 83 "\"result\": true" 2 84 85 echo "kill dm-worker2" 86 kill_process dm-worker2 87 check_port_offline $WORKER2_PORT 20 88 rm -rf $WORK_DIR/worker2/relay_log 89 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 90 "query-status test" \ 91 "\"result\": false" 1 92 93 run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml 94 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT 95 96 echo "wait and check task running" 97 check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10 98 99 run_dm_worker $WORK_DIR/worker4 $WORKER4_PORT $cur/conf/dm-worker4.toml 100 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER4_PORT 101 102 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 103 "start-relay -s $SOURCE_ID2 worker3 worker4" \ 104 "\"result\": true" 3 105 106 echo "restart dm-worker3" 107 kill_process dm-worker3 108 check_port_offline $WORKER3_PORT 20 109 rm -rf $WORK_DIR/worker3/relay_log 110 111 echo "wait and check task running" 112 check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10 113 114 run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml 115 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT 116 117 echo "isolate dm-worker4" 118 isolate_worker 4 "isolate" 119 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 120 "query-status test" \ 121 "\"stage\": \"Running\"" 3 122 123 echo "isolate dm-worker3" 124 isolate_worker 3 "isolate" 125 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 126 "query-status test" \ 127 "\"stage\": \"Running\"" 1 \ 128 "\"result\": false" 1 129 130 echo "disable isolate dm-worker4" 131 isolate_worker 4 "disable_isolate" 132 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 133 "query-status test" \ 134 "\"stage\": \"Running\"" 3 135 136 echo "query-status from all dm-master" 137 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 138 "query-status test" \ 139 "\"stage\": \"Running\"" 3 140 141 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \ 142 "query-status test" \ 143 "\"stage\": \"Running\"" 3 144 145 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 146 "query-status test" \ 147 "\"stage\": \"Running\"" 3 148 149 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 150 "pause-task test" \ 151 "\"result\": true" 3 152 153 echo "restart worker4" 154 kill_process dm-worker4 155 check_port_offline $WORKER4_PORT 20 156 rm -rf $WORK_DIR/worker4/relay_log 157 run_dm_worker $WORK_DIR/worker4 $WORKER4_PORT $cur/conf/dm-worker4.toml 158 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER4_PORT 159 160 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 161 "resume-task test" \ 162 "\"result\": true" 3 163 164 run_sql_file_withdb $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test 165 run_sql_file_withdb $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test 166 sleep 2 167 168 echo "use sync_diff_inspector to check increment2 data now!" 169 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 170 echo "[$(date)] <<<<<< finish test_kill_and_isolate_worker >>>>>>" 171 export GO_FAILPOINTS="" 172 } 173 174 function run() { 175 test_kill_master # TICASE-996, 958 176 test_kill_and_isolate_worker # TICASE-968, 973, 1002, 975, 969, 972, 974, 970, 971, 976, 978, 988 177 } 178 179 cleanup_data $ha_test 180 cleanup_data $ha_test2 181 # also cleanup dm processes in case of last run failed 182 cleanup_process $* 183 run $* 184 cleanup_process $* 185 186 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"