github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/ha_cases_1/run.sh (about)

     1  #!/bin/bash
     2  
     3  set -eu
     4  
     5  cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
     6  source $cur/../_utils/test_prepare
     7  WORK_DIR=$TEST_DIR/$TEST_NAME
     8  API_VERSION="v1alpha1"
     9  # import helper functions
    10  source $cur/../_utils/ha_cases_lib.sh
    11  
    12  function test_running() {
    13  	echo "[$(date)] <<<<<< start test_running >>>>>>"
    14  	cleanup
    15  	prepare_sql
    16  	start_cluster
    17  
    18  	# make sure task to step in "Sync" stage
    19  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
    20  		"query-status test" \
    21  		"\"stage\": \"Running\"" 2 \
    22  		"\"unit\": \"Sync\"" 2
    23  
    24  	echo "use sync_diff_inspector to check full dump loader"
    25  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
    26  
    27  	echo "flush logs to force rotate binlog file"
    28  	run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1
    29  	run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2
    30  
    31  	echo "apply increment data before restart dm-worker to ensure entering increment phase"
    32  	run_sql_file_withdb $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test
    33  	run_sql_file_withdb $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test
    34  
    35  	sleep 3 # wait for flush checkpoint
    36  	echo "use sync_diff_inspector to check increment data"
    37  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
    38  	echo "[$(date)] <<<<<< finish test_running >>>>>>"
    39  }
    40  
    41  function test_kill_master() {
    42  	echo "[$(date)] <<<<<< start test_kill_master >>>>>>"
    43  	test_running
    44  
    45  	echo "kill dm-master1"
    46  	kill_process dm-master1
    47  	check_master_port_offline 1
    48  	rm -rf $WORK_DIR/master1/default.master1
    49  
    50  	echo "waiting 5 seconds"
    51  	sleep 5
    52  	echo "check task is running"
    53  	check_http_alive 127.0.0.1:$MASTER_PORT2/apis/${API_VERSION}/status/test '"stage": "Running"' 10
    54  
    55  	echo "check master2,3 are running"
    56  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \
    57  		"query-status test" \
    58  		"\"stage\": \"Running\"" 2
    59  
    60  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
    61  		"query-status test" \
    62  		"\"stage\": \"Running\"" 2
    63  
    64  	run_sql_file_withdb $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test
    65  	run_sql_file_withdb $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test
    66  	sleep 2
    67  
    68  	echo "use sync_diff_inspector to check increment2 data now!"
    69  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
    70  	echo "[$(date)] <<<<<< finish test_kill_master >>>>>>"
    71  }
    72  
    73  function test_kill_and_isolate_worker() {
    74  	inject_points=("github.com/pingcap/tiflow/dm/worker/defaultKeepAliveTTL=return(1)"
    75  		"github.com/pingcap/tiflow/dm/worker/defaultRelayKeepAliveTTL=return(2)"
    76  	)
    77  	export GO_FAILPOINTS="$(join_string \; ${inject_points[@]})"
    78  	echo "[$(date)] <<<<<< start test_kill_and_isolate_worker >>>>>>"
    79  	test_running
    80  
    81  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    82  		"start-relay -s $SOURCE_ID2 worker2" \
    83  		"\"result\": true" 2
    84  
    85  	echo "kill dm-worker2"
    86  	kill_process dm-worker2
    87  	check_port_offline $WORKER2_PORT 20
    88  	rm -rf $WORK_DIR/worker2/relay_log
    89  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
    90  		"query-status test" \
    91  		"\"result\": false" 1
    92  
    93  	run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
    94  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT
    95  
    96  	echo "wait and check task running"
    97  	check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10
    98  
    99  	run_dm_worker $WORK_DIR/worker4 $WORKER4_PORT $cur/conf/dm-worker4.toml
   100  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER4_PORT
   101  
   102  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   103  		"start-relay -s $SOURCE_ID2 worker3 worker4" \
   104  		"\"result\": true" 3
   105  
   106  	echo "restart dm-worker3"
   107  	kill_process dm-worker3
   108  	check_port_offline $WORKER3_PORT 20
   109  	rm -rf $WORK_DIR/worker3/relay_log
   110  
   111  	echo "wait and check task running"
   112  	check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10
   113  
   114  	run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
   115  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT
   116  
   117  	echo "isolate dm-worker4"
   118  	isolate_worker 4 "isolate"
   119  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
   120  		"query-status test" \
   121  		"\"stage\": \"Running\"" 3
   122  
   123  	echo "isolate dm-worker3"
   124  	isolate_worker 3 "isolate"
   125  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
   126  		"query-status test" \
   127  		"\"stage\": \"Running\"" 1 \
   128  		"\"result\": false" 1
   129  
   130  	echo "disable isolate dm-worker4"
   131  	isolate_worker 4 "disable_isolate"
   132  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
   133  		"query-status test" \
   134  		"\"stage\": \"Running\"" 3
   135  
   136  	echo "query-status from all dm-master"
   137  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
   138  		"query-status test" \
   139  		"\"stage\": \"Running\"" 3
   140  
   141  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \
   142  		"query-status test" \
   143  		"\"stage\": \"Running\"" 3
   144  
   145  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
   146  		"query-status test" \
   147  		"\"stage\": \"Running\"" 3
   148  
   149  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   150  		"pause-task test" \
   151  		"\"result\": true" 3
   152  
   153  	echo "restart worker4"
   154  	kill_process dm-worker4
   155  	check_port_offline $WORKER4_PORT 20
   156  	rm -rf $WORK_DIR/worker4/relay_log
   157  	run_dm_worker $WORK_DIR/worker4 $WORKER4_PORT $cur/conf/dm-worker4.toml
   158  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER4_PORT
   159  
   160  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   161  		"resume-task test" \
   162  		"\"result\": true" 3
   163  
   164  	run_sql_file_withdb $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 $ha_test
   165  	run_sql_file_withdb $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 $ha_test
   166  	sleep 2
   167  
   168  	echo "use sync_diff_inspector to check increment2 data now!"
   169  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
   170  	echo "[$(date)] <<<<<< finish test_kill_and_isolate_worker >>>>>>"
   171  	export GO_FAILPOINTS=""
   172  }
   173  
   174  function run() {
   175  	test_kill_master             # TICASE-996, 958
   176  	test_kill_and_isolate_worker # TICASE-968, 973, 1002, 975, 969, 972, 974, 970, 971, 976, 978, 988
   177  }
   178  
   179  cleanup_data $ha_test
   180  cleanup_data $ha_test2
   181  # also cleanup dm processes in case of last run failed
   182  cleanup_process $*
   183  run $*
   184  cleanup_process $*
   185  
   186  echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"