github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/ha_cases/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $cur/../_utils/test_prepare 7 WORK_DIR=$TEST_DIR/$TEST_NAME 8 API_VERSION="v1alpha1" 9 # import helper functions 10 source $cur/../_utils/ha_cases_lib.sh 11 12 function test_running() { 13 echo "[$(date)] <<<<<< start test_running >>>>>>" 14 cleanup 15 prepare_sql 16 start_cluster 17 18 # make sure task to step in "Sync" stage 19 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \ 20 "query-status test" \ 21 "\"stage\": \"Running\"" 2 \ 22 "\"unit\": \"Sync\"" 2 23 24 echo "use sync_diff_inspector to check full dump loader" 25 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 26 27 echo "flush logs to force rotate binlog file" 28 run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1 29 run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2 30 31 echo "apply increment data before restart dm-worker to ensure entering increment phase" 32 run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test 33 run_sql_file_withdb $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test 34 35 sleep 3 # wait for flush checkpoint 36 echo "use sync_diff_inspector to check increment data" 37 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 38 echo "[$(date)] <<<<<< finish test_running >>>>>>" 39 } 40 41 function test_join_masters_and_worker { 42 echo "[$(date)] <<<<<< start test_join_masters_and_worker >>>>>>" 43 cleanup 44 45 run_dm_master $WORK_DIR/master-join1 $MASTER_PORT1 $cur/conf/dm-master-join1.toml 46 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1 47 48 echo "query-status from unique master" 49 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT1 "query-status" '"result": true' 1 50 51 run_dm_master $WORK_DIR/master-join2 $MASTER_PORT2 $cur/conf/dm-master-join2.toml 52 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT2 53 sleep 5 54 run_dm_master $WORK_DIR/master-join3 $MASTER_PORT3 $cur/conf/dm-master-join3.toml 55 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT3 56 sleep 5 57 run_dm_master $WORK_DIR/master-join4 $MASTER_PORT4 $cur/conf/dm-master-join4.toml 58 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT4 59 sleep 5 60 run_dm_master $WORK_DIR/master-join5 $MASTER_PORT5 $cur/conf/dm-master-join5.toml 61 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT5 62 63 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT2 "query-status" '"result": true' 1 64 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT3 "query-status" '"result": true' 1 65 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT4 "query-status" '"result": true' 1 66 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT5 "query-status" '"result": true' 1 67 68 echo "join worker with dm-master1 endpoint" 69 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker-join2.toml 70 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 71 72 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT2 "list-member --worker --name=worker2" '"stage": "free",' 1 73 74 echo "kill dm-master-join1" 75 kill_process dm-master-join1 76 check_master_port_offline 1 77 rm -rf $WORK_DIR/master1/default.master1 78 79 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT2 "list-member --worker --name=worker2" '"stage": "free",' 1 80 81 echo "join worker with 5 masters endpoint" 82 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker-join1.toml 83 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 84 85 echo "query-status from master2" 86 run_dm_ctl_with_retry $WORK_DIR 127.0.0.1:$MASTER_PORT2 "query-status" '"result": true' 1 87 88 echo "[$(date)] <<<<<< finish test_join_masters_and_worker >>>>>>" 89 } 90 91 function test_standalone_running() { 92 echo "[$(date)] <<<<<< start test_standalone_running >>>>>>" 93 cleanup 94 prepare_sql 95 start_standalone_cluster 96 97 echo "use sync_diff_inspector to check full dump loader" 98 check_sync_diff $WORK_DIR $cur/conf/diff-standalone-config.toml 99 100 echo "flush logs to force rotate binlog file" 101 run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1 102 103 echo "apply increment data before restart dm-worker to ensure entering increment phase" 104 run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test 105 106 echo "use sync_diff_inspector to check increment data" 107 check_sync_diff $WORK_DIR $cur/conf/diff-standalone-config.toml 108 109 cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml 110 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 111 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 112 "start-task $cur/conf/standalone-task2.yaml" \ 113 "\"result\": false" 1 114 115 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 116 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 117 118 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 119 "start-task $cur/conf/standalone-task2.yaml" \ 120 "\"result\": true" 2 \ 121 "\"source\": \"$SOURCE_ID2\"" 1 122 123 worker=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT" query-status test2 | 124 grep 'worker' | awk -F: '{print $2}') 125 worker_name=${worker:0-9:7} 126 worker_idx=${worker_name:0-1:1} 127 worker_ports=(0 WORKER1_PORT WORKER2_PORT) 128 129 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 130 "query-status" \ 131 "\"taskStatus\": \"Running\"" 2 132 133 echo "kill $worker_name" 134 kill_process dm-worker${worker_idx} 135 check_port_offline ${worker_ports[$worker_idx]} 20 136 rm -rf $WORK_DIR/worker${worker_idx}/relay-dir 137 138 # test running, test2 fail 139 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 140 "query-status" \ 141 "\"stage\": \"Running\"" 1 \ 142 "\"worker\": \"source not bound\"" 1 143 144 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 145 "stop-task test2" \ 146 "\"result\": true" 1 147 148 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 149 "start-task $cur/conf/standalone-task2.yaml" \ 150 "\"result\": false" 1 151 152 # test should still running 153 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 154 "query-status test" \ 155 "\"stage\": \"Running\"" 1 156 157 echo "[$(date)] <<<<<< finish test_standalone_running >>>>>>" 158 } 159 160 function test_config_name() { 161 echo "[$(date)] <<<<<< start test_config_name >>>>>>" 162 163 cp $cur/conf/dm-master-join2.toml $WORK_DIR/dm-master-join2.toml 164 sed -i "s/name = \"master2\"/name = \"master1\"/g" $WORK_DIR/dm-master-join2.toml 165 run_dm_master $WORK_DIR/master-join1 $MASTER_PORT1 $cur/conf/dm-master-join1.toml 166 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1 167 run_dm_master $WORK_DIR/master-join2 $MASTER_PORT2 $WORK_DIR/dm-master-join2.toml 168 check_log_contain_with_retry "missing data or joining a duplicate member master1" $WORK_DIR/master-join2/log/dm-master.log 169 170 TEST_CHAR="!@#$%^\&*()_+¥" 171 cp $cur/conf/dm-master-join2.toml $WORK_DIR/dm-master-join2.toml 172 sed -i "s/name = \"master2\"/name = \"test$TEST_CHAR\"/g" $WORK_DIR/dm-master-join2.toml 173 run_dm_master $WORK_DIR/master-join2 $MASTER_PORT2 $WORK_DIR/dm-master-join2.toml 174 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT2 175 176 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 177 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 178 179 cp $cur/conf/dm-worker2.toml $WORK_DIR/dm-worker2.toml 180 sed -i "s/name = \"worker2\"/name = \"worker1\"/g" $WORK_DIR/dm-worker2.toml 181 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $WORK_DIR/dm-worker2.toml 182 sleep 2 183 184 check_log_contain_with_retry "[dm-worker with name {\"name\":\"worker1\",\"addr\":\"127.0.0.1:8262\"} already exists]" $WORK_DIR/worker2/log/dm-worker.log 185 186 cp $cur/conf/dm-worker2.toml $WORK_DIR/dm-worker2.toml 187 sed -i "s/name = \"worker2\"/name = \"master1\"/g" $WORK_DIR/dm-worker2.toml 188 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $WORK_DIR/dm-worker2.toml 189 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 190 191 echo "[$(date)] <<<<<< finish test_config_name >>>>>>" 192 } 193 194 function test_last_bound() { 195 echo "[$(date)] <<<<<< start test_last_bound >>>>>>" 196 test_running 197 198 # now in start_cluster, we ensure source_i is bound to worker_i 199 worker1bound=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT1" list-member --name worker1 | 200 grep 'source' | awk -F: '{print $2}') 201 echo "worker1bound $worker1bound" 202 worker2bound=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT1" list-member --name worker2 | 203 grep 'source' | awk -F: '{print $2}') 204 echo "worker2bound $worker2bound" 205 206 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 207 "start-relay -s $SOURCE_ID1 worker1" \ 208 "\"result\": true" 2 209 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 210 "start-relay -s $SOURCE_ID2 worker2" \ 211 "\"result\": true" 2 212 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 213 "query-status test" \ 214 "\"stage\": \"Running\"" 4 215 216 kill_2_worker_ensure_unbound 1 2 217 218 # start 1 then 2 219 start_2_worker_ensure_bound 1 2 220 221 check_bound 222 # only contains 1 "will try purge ..." which is printed the first time dm worker start 223 check_log_contains $WORK_DIR/worker1/log/dm-worker.log "will try purge whole relay dir for new relay log" 1 224 check_log_contains $WORK_DIR/worker2/log/dm-worker.log "will try purge whole relay dir for new relay log" 1 225 226 kill_2_worker_ensure_unbound 1 2 227 228 # start 2 then 1 229 start_2_worker_ensure_bound 2 1 230 231 check_bound 232 check_log_contains $WORK_DIR/worker1/log/dm-worker.log "will try purge whole relay dir for new relay log" 1 233 check_log_contains $WORK_DIR/worker2/log/dm-worker.log "will try purge whole relay dir for new relay log" 1 234 235 # kill 12, start 34, kill 34 236 kill_2_worker_ensure_unbound 1 2 237 start_2_worker_ensure_bound 3 4 238 worker3bound=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT1" list-member --name worker3 | 239 grep 'source' | awk -F: '{print $2}' | cut -d'"' -f 2) 240 worker4bound=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT1" list-member --name worker4 | 241 grep 'source' | awk -F: '{print $2}' | cut -d'"' -f 2) 242 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 243 "start-relay -s $worker3bound worker3" \ 244 "\"result\": true" 2 245 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 246 "start-relay -s $worker4bound worker4" \ 247 "\"result\": true" 2 248 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 249 "query-status test" \ 250 "\"stage\": \"Running\"" 4 251 252 # let other workers rather then 1 2 forward the syncer's progress 253 run_sql_file_withdb $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test 254 run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2 255 run_sql_file_withdb $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test 256 # wait the checkpoint updated 257 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 258 kill_2_worker_ensure_unbound 3 4 259 260 # start 1 then 2 261 start_2_worker_ensure_bound 1 2 262 263 # check 264 check_bound 265 # other workers has forwarded the sync progress, if moved to a new binlog file, original relay log could be removed 266 num1=$(grep "will try purge whole relay dir for new relay log" $WORK_DIR/worker1/log/dm-worker.log | wc -l) 267 num2=$(grep "will try purge whole relay dir for new relay log" $WORK_DIR/worker2/log/dm-worker.log | wc -l) 268 echo "num1 $num1 num2 $num2" 269 [[ $num1+$num2 -eq 3 ]] 270 271 echo "[$(date)] <<<<<< finish test_last_bound >>>>>>" 272 } 273 274 function test_exclusive_relay() { 275 echo "[$(date)] <<<<<< start test_exclusive_relay >>>>>>" 276 277 echo "start DM worker and master cluster" 278 run_dm_master $WORK_DIR/master1 $MASTER_PORT1 $cur/conf/dm-master-standalone.toml 279 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1 280 281 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 282 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 283 cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml 284 cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml 285 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml 286 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml 287 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 288 289 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 290 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 291 292 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 293 "start-relay -s $SOURCE_ID1 worker1 worker2" \ 294 "\"result\": true" 3 295 296 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 297 298 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 299 "list-member --worker" \ 300 "\"stage\": \"bound\"" 1 \ 301 "\"stage\": \"relay\"" 1 302 303 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 304 "operate-source show -s $SOURCE_ID2" \ 305 "\"msg\": \"source is added but there is no free worker to bound\"" 1 306 307 cleanup 308 echo "[$(date)] <<<<<< finish test_exclusive_relay >>>>>>" 309 } 310 311 function test_exclusive_relay_2() { 312 echo "[$(date)] <<<<<< start test_exclusive_relay_2 >>>>>>" 313 314 echo "start DM worker and master cluster" 315 run_dm_master $WORK_DIR/master1 $MASTER_PORT1 $cur/conf/dm-master-standalone.toml 316 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1 317 318 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 319 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 320 cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml 321 cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml 322 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml 323 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml 324 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 325 326 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 327 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 328 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 329 330 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 331 "start-relay -s $SOURCE_ID1 worker1" \ 332 "\"result\": true" 2 333 334 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 335 "start-relay -s $SOURCE_ID2 worker2" \ 336 "\"result\": true" 2 337 338 run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml 339 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT 340 341 # kill worker1, source1 should be bound to worker3 342 echo "kill dm-worker1" 343 kill_process dm-worker1 344 check_port_offline $WORKER1_PORT 20 345 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 346 "list-member --name worker3" \ 347 "\"source\": \"mysql-replica-01\"" 1 348 349 # worker1 online, nothing happened 350 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 351 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 352 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 353 "list-member --name worker3" \ 354 "\"source\": \"mysql-replica-01\"" 1 355 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 356 "list-member --name worker1" \ 357 "\"stage\": \"relay\"" 1 358 359 # kill worker2, source2 should not be bound 360 echo "kill dm-worker2" 361 kill_process dm-worker2 362 check_port_offline $WORKER2_PORT 20 363 364 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 365 "operate-source show -s $SOURCE_ID2" \ 366 "\"msg\": \"source is added but there is no free worker to bound\"" 1 367 368 # worker2 online, bound to source2 369 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 370 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 371 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \ 372 "list-member --name worker2" \ 373 "\"source\": \"mysql-replica-02\"" 1 374 375 cleanup 376 echo "[$(date)] <<<<<< finish test_exclusive_relay_2 >>>>>>" 377 } 378 379 function run() { 380 test_exclusive_relay 381 test_exclusive_relay_2 382 test_last_bound 383 test_config_name # TICASE-915, 916, 954, 955 384 test_join_masters_and_worker # TICASE-928, 930, 931, 961, 932, 957 385 test_standalone_running # TICASE-929, 959, 960, 967, 977, 980, 983 386 } 387 388 cleanup_data $ha_test 389 cleanup_data $ha_test2 390 # also cleanup dm processes in case of last run failed 391 cleanup_process $* 392 run $* 393 cleanup_process $* 394 395 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"