github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/ha/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $cur/../_utils/test_prepare 7 WORK_DIR=$TEST_DIR/$TEST_NAME 8 API_VERSION="v1alpha1" 9 10 function run() { 11 echo "import prepare data" 12 run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 13 check_contains 'Query OK, 2 rows affected' 14 run_sql_file $cur/data/db2.prepare.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 15 check_contains 'Query OK, 3 rows affected' 16 17 echo "start DM worker and master" 18 run_dm_master $WORK_DIR/master1 $MASTER_PORT1 $cur/conf/dm-master1.toml 19 run_dm_master $WORK_DIR/master2 $MASTER_PORT2 $cur/conf/dm-master2.toml 20 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1 21 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT2 22 23 # master1 or master2 join campaign 24 check_metric $MASTER_PORT2 'start_leader_counter' 3 0 2 || check_metric $MASTER_PORT1 'start_leader_counter' 3 0 2 25 26 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 27 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 28 echo "operate mysql config to worker" 29 cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml 30 cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml 31 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml 32 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml 33 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 34 35 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 36 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 37 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 38 39 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 40 "start-relay -s $SOURCE_ID2 worker2" \ 41 "\"result\": true" 2 42 43 # join master3 44 run_dm_master $WORK_DIR/master3 $MASTER_PORT3 $cur/conf/dm-master3.toml 45 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT3 46 check_metric $MASTER_PORT3 'start_leader_counter' 3 -1 1 # master3 is not leader 47 48 # worker in running stage 49 check_metric $MASTER_PORT1 'dm_master_worker_state{worker="worker1"}' 3 1 3 || check_metric $MASTER_PORT2 'dm_master_worker_state{worker="worker1"}' 3 1 3 50 51 echo "start DM task" 52 dmctl_start_task 53 54 echo "use sync_diff_inspector to check full dump loader" 55 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 56 57 echo "flush logs to force rotate binlog file" 58 run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1 59 run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2 60 61 echo "apply increment data before restart dm-worker to ensure entering increment phase" 62 run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 63 run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 64 65 echo "use sync_diff_inspector to check increment data" 66 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 67 68 echo "pause task before kill and restart dm-worker" 69 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 70 "pause-task test" \ 71 "\"result\": true" 3 72 73 echo "start dm-worker3 and kill dm-worker2" 74 kill_process dm-worker2 75 check_port_offline $WORKER2_PORT 20 76 rm -rf $WORK_DIR/worker2/relay_log 77 78 run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml 79 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT 80 81 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 82 "start-relay -s $SOURCE_ID2 worker2" \ 83 "\"result\": true" 2 84 85 sleep 8 86 echo "wait for the task to be scheduled and keep paused" 87 check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Paused"' 10 88 89 echo "resume task before kill and restart dm-worker" 90 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 91 "resume-task test" \ 92 "\"result\": true" 3 93 94 echo "start dm-worker2 and kill dm-worker3" 95 kill_process dm-worker3 96 check_port_offline $WORKER3_PORT 20 97 rm -rf $WORK_DIR/worker3/relay_log 98 99 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 100 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 101 102 sleep 8 103 echo "wait and check task running" 104 check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10 105 106 # manually transfer a exist source to a newly started worker 107 run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml 108 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT 109 110 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 111 "transfer-source $SOURCE_ID1 worker3" \ 112 "\"result\": true" 1 113 114 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 115 "list-member --name worker3" \ 116 "$SOURCE_ID1" 1 117 118 echo "query-status from all dm-master" 119 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 120 "query-status test" \ 121 "\"stage\": \"Running\"" 3 122 123 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \ 124 "query-status test" \ 125 "\"stage\": \"Running\"" 3 126 127 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 128 "query-status test" \ 129 "\"stage\": \"Running\"" 3 130 131 echo "join new dm-master and query-status" 132 run_dm_master $WORK_DIR/master4 $MASTER_PORT4 $cur/conf/dm-master4.toml 133 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT4 134 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT4" \ 135 "query-status test" \ 136 "\"stage\": \"Running\"" 3 137 138 # may join failed with error `fail to join embed etcd: add member http://127.0.0.1:8295: etcdserver: unhealthy cluster`, and dm-master will exit. so just sleep some seconds. 139 sleep 5 140 141 run_dm_master $WORK_DIR/master5 $MASTER_PORT5 $cur/conf/dm-master5.toml 142 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT5 143 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT5" \ 144 "query-status test" \ 145 "\"stage\": \"Running\"" 3 146 sleep 5 147 148 run_dm_master $WORK_DIR/master6 $MASTER_PORT6 $cur/conf/dm-master6.toml 149 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT6 150 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT6" \ 151 "query-status test" \ 152 "\"stage\": \"Running\"" 3 153 sleep 5 154 155 echo "kill dm-master1" 156 kill_process dm-master1 157 check_master_port_offline 1 158 echo "kill dm-master2" 159 kill_process dm-master2 160 check_master_port_offline 2 161 162 echo "initial cluster of dm-masters have been killed" 163 echo "now we will check whether joined masters can work normally" 164 165 # we need some time for cluster to re-elect new available leader 166 dmctl_stop_task_with_retry "test" $MASTER_PORT5 167 168 run_sql_file $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 169 run_sql_file $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 170 sleep 2 171 172 # leader needs some time to rebuild info 173 # start-task is not retryable 174 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT5" \ 175 "start-task $cur/conf/dm-task.yaml" \ 176 "\"result\": true" 3 \ 177 "\"source\": \"$SOURCE_ID1\"" 1 \ 178 "\"source\": \"$SOURCE_ID2\"" 1 179 180 echo "use sync_diff_inspector to check increment2 data now!" 181 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 182 } 183 184 cleanup_data ha_test 185 # also cleanup dm processes in case of last run failed 186 cleanup_process $* 187 run $* 188 cleanup_process $* 189 190 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"