github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/ha_cases2/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $cur/../_utils/test_prepare 7 WORK_DIR=$TEST_DIR/$TEST_NAME 8 API_VERSION="v1alpha1" 9 # import helper functions 10 source $cur/../_utils/ha_cases_lib.sh 11 12 function print_debug_status() { 13 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 14 "query-status test" \ 15 "fail me!" 1 && 16 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 17 "query-status test2" \ 18 "fail me!" 1 && exit 1 19 } 20 21 function test_multi_task_running() { 22 echo "[$(date)] <<<<<< start test_multi_task_running >>>>>>" 23 cleanup 24 prepare_sql_multi_task 25 start_multi_tasks_cluster 26 27 # make sure task to step in "Sync" stage 28 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 29 "query-status test" \ 30 "\"stage\": \"Running\"" 2 \ 31 "\"unit\": \"Sync\"" 2 32 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 33 "query-status test2" \ 34 "\"stage\": \"Running\"" 2 \ 35 "\"unit\": \"Sync\"" 2 36 37 echo "use sync_diff_inspector to check full dump loader" 38 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 39 check_sync_diff $WORK_DIR $cur/conf/diff_config_multi_task.toml 40 41 echo "flush logs to force rotate binlog file" 42 run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1 43 run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2 44 45 echo "apply increment data before restart dm-worker to ensure entering increment phase" 46 run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test 47 run_sql_file_withdb $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test 48 run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test2 49 run_sql_file_withdb $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test2 50 51 sleep 5 # wait for flush checkpoint 52 echo "use sync_diff_inspector to check increment data" 53 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 50 || print_debug_status 54 check_sync_diff $WORK_DIR $cur/conf/diff_config_multi_task.toml 50 || print_debug_status 55 echo "[$(date)] <<<<<< finish test_multi_task_running >>>>>>" 56 } 57 58 function test_pause_task() { 59 echo "[$(date)] <<<<<< start test_pause_task >>>>>>" 60 test_multi_task_running 61 62 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 63 "start-relay -s $SOURCE_ID1 worker1" \ 64 "\"result\": true" 2 65 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 66 "start-relay -s $SOURCE_ID2 worker2" \ 67 "\"result\": true" 2 68 69 echo "start dumping SQLs into source" 70 load_data $MYSQL_PORT1 $MYSQL_PASSWORD1 "a" & 71 load_data $MYSQL_PORT2 $MYSQL_PASSWORD2 "b" & 72 73 task_name=(test test2) 74 for name in ${task_name[@]}; do 75 echo "pause tasks $name" 76 77 # because some SQL may running (often remove checkpoint record), pause will cause that SQL failed 78 # thus `result` is not true 79 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 80 "pause-task $name" 81 82 # pause twice, just used to test pause by the way 83 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 84 "pause-task $name" 85 86 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 87 "query-status $name" \ 88 "\"stage\": \"Paused\"" 2 89 done 90 91 sleep 1 92 93 for name in ${task_name[@]}; do 94 echo "resume tasks $name" 95 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 96 "resume-task $name" \ 97 "\"result\": true" 3 98 99 # resume twice, just used to test resume by the way 100 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 101 "resume-task $name" \ 102 "\"result\": true" 3 103 104 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 105 "query-status $name" \ 106 "\"stage\": \"Running\"" 4 107 done 108 109 # waiting for syncing 110 wait 111 sleep 1 112 113 echo "use sync_diff_inspector to check increment data" 114 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 115 check_sync_diff $WORK_DIR $cur/conf/diff_config_multi_task.toml 116 echo "[$(date)] <<<<<< finish test_pause_task >>>>>>" 117 } 118 119 function test_multi_task_reduce_and_restart_worker() { 120 echo "[$(date)] <<<<<< start test_multi_task_reduce_and_restart_worker >>>>>>" 121 test_multi_task_running 122 123 echo "start dumping SQLs into source" 124 load_data $MYSQL_PORT1 $MYSQL_PASSWORD1 "a" & 125 load_data $MYSQL_PORT2 $MYSQL_PASSWORD2 "b" & 126 load_data $MYSQL_PORT1 $MYSQL_PASSWORD1 "a" $ha_test2 & 127 load_data $MYSQL_PORT2 $MYSQL_PASSWORD2 "b" $ha_test2 & 128 worker_ports=($WORKER1_PORT $WORKER2_PORT $WORKER3_PORT $WORKER4_PORT $WORKER5_PORT) 129 130 # find which worker is in use 131 task_name=(test test2) 132 worker_inuse=("") # such as ("worker1" "worker4") 133 status=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT" query-status test | 134 grep 'worker' | awk -F: '{print $2}') 135 echo $status 136 for w in ${status[@]}; do 137 worker_inuse=(${worker_inuse[*]} ${w:0-9:7}) 138 echo "find workers: ${w:0-9:7} for task: test" 139 done 140 echo "find all workers: ${worker_inuse[@]} (total: ${#worker_inuse[@]})" 141 142 for idx in $(seq 1 5); do 143 if [[ ! " ${worker_inuse[@]} " =~ " worker${idx} " ]]; then 144 echo "restart unuse worker${idx}" 145 146 echo "try to kill worker port ${worker_ports[$(($idx - 1))]}" 147 kill_process dm-worker${idx} 148 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT2 "list-member --worker --name=worker$idx" '"stage": "offline"' 1 149 150 echo "start dm-worker${idx}" 151 run_dm_worker $WORK_DIR/worker${idx} ${worker_ports[$(($idx - 1))]} $cur/conf/dm-worker${idx}.toml 152 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:${worker_ports[$(($idx - 1))]} 153 fi 154 done 155 156 for ((i = 0; i < ${#worker_inuse[@]}; i++)); do 157 wk=${worker_inuse[$i]:0-1:1} # get worker id, such as ("1", "4") 158 echo "try to kill worker port ${worker_ports[$(($wk - 1))]}" # get relative worker port 159 kill_process dm-${worker_inuse[$i]} 160 check_port_offline ${worker_ports[$(($wk - 1))]} 20 161 # just one worker was killed should be safe 162 echo "${worker_inuse[$i]} was killed" 163 for name in ${task_name[@]}; do 164 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 165 "query-status $name" \ 166 "\"stage\": \"Running\"" 2 167 done 168 if [ $i = 0 ]; then 169 # waiting for syncing 170 wait 171 sleep 2 172 echo "use sync_diff_inspector to check increment data" 173 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 174 check_sync_diff $WORK_DIR $cur/conf/diff_config_multi_task.toml 175 echo "data checked after one worker was killed" 176 fi 177 done 178 echo "[$(date)] <<<<<< finish test_multi_task_reduce_and_restart_worker >>>>>>" 179 } 180 181 function run() { 182 test_pause_task # TICASE-990 183 test_multi_task_reduce_and_restart_worker # TICASE-968, 994, 995, 964, 966, 979, 981, 982, 985, 986, 989, 993 184 } 185 186 cleanup_data $ha_test 187 cleanup_data $ha_test2 188 # also cleanup dm processes in case of last run failed 189 cleanup_process $* 190 run $* 191 cleanup_process $* 192 193 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"