github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/capture_suicide_while_balance_table/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $CUR/../_utils/test_prepare 7 WORK_DIR=$OUT_DIR/$TEST_NAME 8 CDC_BINARY=cdc.test 9 SINK_TYPE=$1 10 11 # This test mainly verifies CDC can handle the following scenario 12 # 1. Two captures, capture-1 is the owner, each capture replicates more than one table. 13 # 2. capture-2 replicates some DMLs but has some delay, such as large amount of 14 # incremental scan data, sink block, etc, we name this slow table as table-slow. 15 # 3. Before capture-2 the checkpoint ts of table-slow reaches global resolved ts, 16 # a rebalance operation is triggered, either by manual rebalance or a new capture 17 # joins the cluster. So a delete table operation will be dispatched to capture-2, 18 # and the boundary ts is global resolved ts. capture-2 will continue to replicate 19 # table-slow until the checkpoint ts reaches the boundary ts. 20 # 4. However, before the checkpoint ts of table-slow reaches boundary ts, capture-2 21 # suicides itself because of some network issue or PD jitter. 22 # 5. After the cluster recovers, the data of table-slow in downstream should be 23 # consistent with upstream. 24 # 25 # In this test, step-2 is achieved by failpoint injection, step-3 is triggered 26 # by manual rebalance, step-4 is achieved by revoking the lease of capture key. 27 function run() { 28 # test with mysql sink only 29 if [ "$SINK_TYPE" != "mysql" ]; then 30 return 31 fi 32 33 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 34 start_tidb_cluster --workdir $WORK_DIR 35 cd $WORK_DIR 36 37 pd_addr="http://$UP_PD_HOST_1:$UP_PD_PORT_1" 38 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --pd $pd_addr --logsuffix 1 --addr "127.0.0.1:8300" 39 export GO_FAILPOINTS='github.com/pingcap/tiflow/cdc/sink/dmlsink/txn/mysql/MySQLSinkHangLongTime=1*return(true)' 40 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --pd $pd_addr --logsuffix 2 --addr "127.0.0.1:8301" 41 42 SINK_URI="mysql://normal:123456@127.0.0.1:3306/?max-txn-row=1" 43 changefeed_id=$(cdc cli changefeed create --pd=$pd_addr --sink-uri="$SINK_URI" 2>&1 | tail -n2 | head -n1 | awk '{print $2}') 44 45 run_sql "CREATE DATABASE capture_suicide_while_balance_table;" ${UP_TIDB_HOST} ${UP_TIDB_PORT} 46 for i in $(seq 1 4); do 47 run_sql "CREATE table capture_suicide_while_balance_table.t$i (id int primary key auto_increment)" ${UP_TIDB_HOST} ${UP_TIDB_PORT} 48 done 49 50 for i in $(seq 1 4); do 51 check_table_exists "capture_suicide_while_balance_table.t$i" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 52 done 53 54 capture1_id=$(cdc cli capture list | jq -r '.[]|select(.address=="127.0.0.1:8300")|.id') 55 capture2_id=$(cdc cli capture list | jq -r '.[]|select(.address=="127.0.0.1:8301")|.id') 56 57 target_capture=$capture1_id 58 one_table_id=$(cdc cli processor query -c $changefeed_id -p $capture2_id | jq -r '.status.tables|keys[0]') 59 if [[ $one_table_id == "null" ]]; then 60 target_capture=$capture2_id 61 one_table_id=$(cdc cli processor query -c $changefeed_id -p $capture1_id | jq -r '.status.tables|keys[0]') 62 fi 63 table_query=$(mysql -h${UP_TIDB_HOST} -P${UP_TIDB_PORT} -uroot -e "select table_name from information_schema.tables where tidb_table_id = ${one_table_id}\G") 64 table_name=$(echo $table_query | tail -n 1 | awk '{print $(NF)}') 65 run_sql "insert into capture_suicide_while_balance_table.${table_name} values (),(),(),(),()" 66 67 # sleep some time to wait global resolved ts forwarded 68 sleep 2 69 curl -X POST http://127.0.0.1:8300/capture/owner/move_table -d "cf-id=${changefeed_id}&target-cp-id=${target_capture}&table-id=${one_table_id}" 70 # sleep some time to wait table balance job is written to etcd 71 sleep 2 72 73 # revoke lease of etcd capture key to simulate etcd session done 74 lease=$(ETCDCTL_API=3 etcdctl get /tidb/cdc/default/__cdc_meta__/capture/${capture2_id} -w json | grep -o 'lease":[0-9]*' | awk -F: '{print $2}') 75 lease_hex=$(printf '%x\n' $lease) 76 ETCDCTL_API=3 etcdctl lease revoke $lease_hex 77 78 check_sync_diff $WORK_DIR $CUR/conf/diff_config.toml 79 export GO_FAILPOINTS='' 80 cleanup_process $CDC_BINARY 81 } 82 83 trap stop_tidb_cluster EXIT 84 run $* 85 check_logs $WORK_DIR 86 echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>"