github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/ha/run.sh (about)

     1  #!/bin/bash
     2  
     3  set -eu
     4  
     5  cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
     6  source $cur/../_utils/test_prepare
     7  WORK_DIR=$TEST_DIR/$TEST_NAME
     8  API_VERSION="v1alpha1"
     9  
    10  function run() {
    11  	echo "import prepare data"
    12  	run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
    13  	check_contains 'Query OK, 2 rows affected'
    14  	run_sql_file $cur/data/db2.prepare.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2
    15  	check_contains 'Query OK, 3 rows affected'
    16  
    17  	echo "start DM worker and master"
    18  	run_dm_master $WORK_DIR/master1 $MASTER_PORT1 $cur/conf/dm-master1.toml
    19  	run_dm_master $WORK_DIR/master2 $MASTER_PORT2 $cur/conf/dm-master2.toml
    20  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1
    21  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT2
    22  
    23  	# master1 or master2 join campaign
    24  	check_metric $MASTER_PORT2 'start_leader_counter' 3 0 2 || check_metric $MASTER_PORT1 'start_leader_counter' 3 0 2
    25  
    26  	run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
    27  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
    28  	echo "operate mysql config to worker"
    29  	cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml
    30  	cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml
    31  	sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml
    32  	sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml
    33  	dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1
    34  
    35  	run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
    36  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
    37  	dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2
    38  
    39  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    40  		"start-relay -s $SOURCE_ID2 worker2" \
    41  		"\"result\": true" 2
    42  
    43  	# join master3
    44  	run_dm_master $WORK_DIR/master3 $MASTER_PORT3 $cur/conf/dm-master3.toml
    45  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT3
    46  	check_metric $MASTER_PORT3 'start_leader_counter' 3 -1 1 # master3 is not leader
    47  
    48  	# worker in running stage
    49  	check_metric $MASTER_PORT1 'dm_master_worker_state{worker="worker1"}' 3 1 3 || check_metric $MASTER_PORT2 'dm_master_worker_state{worker="worker1"}' 3 1 3
    50  
    51  	echo "start DM task"
    52  	dmctl_start_task
    53  
    54  	echo "use sync_diff_inspector to check full dump loader"
    55  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
    56  
    57  	echo "flush logs to force rotate binlog file"
    58  	run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1
    59  	run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2
    60  
    61  	echo "apply increment data before restart dm-worker to ensure entering increment phase"
    62  	run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
    63  	run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2
    64  
    65  	echo "use sync_diff_inspector to check increment data"
    66  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
    67  
    68  	echo "pause task before kill and restart dm-worker"
    69  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    70  		"pause-task test" \
    71  		"\"result\": true" 3
    72  
    73  	echo "start dm-worker3 and kill dm-worker2"
    74  	kill_process dm-worker2
    75  	check_port_offline $WORKER2_PORT 20
    76  	rm -rf $WORK_DIR/worker2/relay_log
    77  
    78  	run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
    79  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT
    80  
    81  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    82  		"start-relay -s $SOURCE_ID2 worker2" \
    83  		"\"result\": true" 2
    84  
    85  	sleep 8
    86  	echo "wait for the task to be scheduled and keep paused"
    87  	check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Paused"' 10
    88  
    89  	echo "resume task before kill and restart dm-worker"
    90  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    91  		"resume-task test" \
    92  		"\"result\": true" 3
    93  
    94  	echo "start dm-worker2 and kill dm-worker3"
    95  	kill_process dm-worker3
    96  	check_port_offline $WORKER3_PORT 20
    97  	rm -rf $WORK_DIR/worker3/relay_log
    98  
    99  	run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
   100  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
   101  
   102  	sleep 8
   103  	echo "wait and check task running"
   104  	check_http_alive 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test '"stage": "Running"' 10
   105  
   106  	# manually transfer a exist source to a newly started worker
   107  	run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml
   108  	check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT
   109  
   110  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   111  		"transfer-source $SOURCE_ID1 worker3" \
   112  		"\"result\": true" 1
   113  
   114  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   115  		"list-member --name worker3" \
   116  		"$SOURCE_ID1" 1
   117  
   118  	echo "query-status from all dm-master"
   119  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT1" \
   120  		"query-status test" \
   121  		"\"stage\": \"Running\"" 3
   122  
   123  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT2" \
   124  		"query-status test" \
   125  		"\"stage\": \"Running\"" 3
   126  
   127  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT3" \
   128  		"query-status test" \
   129  		"\"stage\": \"Running\"" 3
   130  
   131  	echo "join new dm-master and query-status"
   132  	run_dm_master $WORK_DIR/master4 $MASTER_PORT4 $cur/conf/dm-master4.toml
   133  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT4
   134  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT4" \
   135  		"query-status test" \
   136  		"\"stage\": \"Running\"" 3
   137  
   138  	# may join failed with error `fail to join embed etcd: add member http://127.0.0.1:8295: etcdserver: unhealthy cluster`, and dm-master will exit. so just sleep some seconds.
   139  	sleep 5
   140  
   141  	run_dm_master $WORK_DIR/master5 $MASTER_PORT5 $cur/conf/dm-master5.toml
   142  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT5
   143  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT5" \
   144  		"query-status test" \
   145  		"\"stage\": \"Running\"" 3
   146  	sleep 5
   147  
   148  	run_dm_master $WORK_DIR/master6 $MASTER_PORT6 $cur/conf/dm-master6.toml
   149  	check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT6
   150  	run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT6" \
   151  		"query-status test" \
   152  		"\"stage\": \"Running\"" 3
   153  	sleep 5
   154  
   155  	echo "kill dm-master1"
   156  	kill_process dm-master1
   157  	check_master_port_offline 1
   158  	echo "kill dm-master2"
   159  	kill_process dm-master2
   160  	check_master_port_offline 2
   161  
   162  	echo "initial cluster of dm-masters have been killed"
   163  	echo "now we will check whether joined masters can work normally"
   164  
   165  	# we need some time for cluster to re-elect new available leader
   166  	dmctl_stop_task_with_retry "test" $MASTER_PORT5
   167  
   168  	run_sql_file $cur/data/db1.increment2.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
   169  	run_sql_file $cur/data/db2.increment2.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2
   170  	sleep 2
   171  
   172  	# leader needs some time to rebuild info
   173  	# start-task is not retryable
   174  	run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT5" \
   175  		"start-task $cur/conf/dm-task.yaml" \
   176  		"\"result\": true" 3 \
   177  		"\"source\": \"$SOURCE_ID1\"" 1 \
   178  		"\"source\": \"$SOURCE_ID2\"" 1
   179  
   180  	echo "use sync_diff_inspector to check increment2 data now!"
   181  	check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
   182  }
   183  
   184  cleanup_data ha_test
   185  # also cleanup dm processes in case of last run failed
   186  cleanup_process $*
   187  run $*
   188  cleanup_process $*
   189  
   190  echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"