github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/capture_suicide_while_balance_table/run.sh (about)

     1  #!/bin/bash
     2  
     3  set -eu
     4  
     5  CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
     6  source $CUR/../_utils/test_prepare
     7  WORK_DIR=$OUT_DIR/$TEST_NAME
     8  CDC_BINARY=cdc.test
     9  SINK_TYPE=$1
    10  
    11  # This test mainly verifies CDC can handle the following scenario
    12  # 1. Two captures, capture-1 is the owner, each capture replicates more than one table.
    13  # 2. capture-2 replicates some DMLs but has some delay, such as large amount of
    14  #    incremental scan data, sink block, etc, we name this slow table as table-slow.
    15  # 3. Before capture-2 the checkpoint ts of table-slow reaches global resolved ts,
    16  #    a rebalance operation is triggered, either by manual rebalance or a new capture
    17  #    joins the cluster. So a delete table operation will be dispatched to capture-2,
    18  #    and the boundary ts is global resolved ts. capture-2 will continue to replicate
    19  #    table-slow until the checkpoint ts reaches the boundary ts.
    20  # 4. However, before the checkpoint ts of table-slow reaches boundary ts, capture-2
    21  #    suicides itself because of some network issue or PD jitter.
    22  # 5. After the cluster recovers, the data of table-slow in downstream should be
    23  #    consistent with upstream.
    24  #
    25  # In this test, step-2 is achieved by failpoint injection, step-3 is triggered
    26  # by manual rebalance, step-4 is achieved by revoking the lease of capture key.
    27  function run() {
    28  	# test with mysql sink only
    29  	if [ "$SINK_TYPE" != "mysql" ]; then
    30  		return
    31  	fi
    32  
    33  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
    34  	start_tidb_cluster --workdir $WORK_DIR
    35  	cd $WORK_DIR
    36  
    37  	pd_addr="http://$UP_PD_HOST_1:$UP_PD_PORT_1"
    38  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --pd $pd_addr --logsuffix 1 --addr "127.0.0.1:8300"
    39  	export GO_FAILPOINTS='github.com/pingcap/tiflow/cdc/sink/dmlsink/txn/mysql/MySQLSinkHangLongTime=1*return(true)'
    40  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --pd $pd_addr --logsuffix 2 --addr "127.0.0.1:8301"
    41  
    42  	SINK_URI="mysql://normal:123456@127.0.0.1:3306/?max-txn-row=1"
    43  	changefeed_id=$(cdc cli changefeed create --pd=$pd_addr --sink-uri="$SINK_URI" 2>&1 | tail -n2 | head -n1 | awk '{print $2}')
    44  
    45  	run_sql "CREATE DATABASE capture_suicide_while_balance_table;" ${UP_TIDB_HOST} ${UP_TIDB_PORT}
    46  	for i in $(seq 1 4); do
    47  		run_sql "CREATE table capture_suicide_while_balance_table.t$i (id int primary key auto_increment)" ${UP_TIDB_HOST} ${UP_TIDB_PORT}
    48  	done
    49  
    50  	for i in $(seq 1 4); do
    51  		check_table_exists "capture_suicide_while_balance_table.t$i" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT}
    52  	done
    53  
    54  	capture1_id=$(cdc cli capture list | jq -r '.[]|select(.address=="127.0.0.1:8300")|.id')
    55  	capture2_id=$(cdc cli capture list | jq -r '.[]|select(.address=="127.0.0.1:8301")|.id')
    56  
    57  	target_capture=$capture1_id
    58  	one_table_id=$(cdc cli processor query -c $changefeed_id -p $capture2_id | jq -r '.status.tables|keys[0]')
    59  	if [[ $one_table_id == "null" ]]; then
    60  		target_capture=$capture2_id
    61  		one_table_id=$(cdc cli processor query -c $changefeed_id -p $capture1_id | jq -r '.status.tables|keys[0]')
    62  	fi
    63  	table_query=$(mysql -h${UP_TIDB_HOST} -P${UP_TIDB_PORT} -uroot -e "select table_name from information_schema.tables where tidb_table_id = ${one_table_id}\G")
    64  	table_name=$(echo $table_query | tail -n 1 | awk '{print $(NF)}')
    65  	run_sql "insert into capture_suicide_while_balance_table.${table_name} values (),(),(),(),()"
    66  
    67  	# sleep some time to wait global resolved ts forwarded
    68  	sleep 2
    69  	curl -X POST http://127.0.0.1:8300/capture/owner/move_table -d "cf-id=${changefeed_id}&target-cp-id=${target_capture}&table-id=${one_table_id}"
    70  	# sleep some time to wait table balance job is written to etcd
    71  	sleep 2
    72  
    73  	# revoke lease of etcd capture key to simulate etcd session done
    74  	lease=$(ETCDCTL_API=3 etcdctl get /tidb/cdc/default/__cdc_meta__/capture/${capture2_id} -w json | grep -o 'lease":[0-9]*' | awk -F: '{print $2}')
    75  	lease_hex=$(printf '%x\n' $lease)
    76  	ETCDCTL_API=3 etcdctl lease revoke $lease_hex
    77  
    78  	check_sync_diff $WORK_DIR $CUR/conf/diff_config.toml
    79  	export GO_FAILPOINTS=''
    80  	cleanup_process $CDC_BINARY
    81  }
    82  
    83  trap stop_tidb_cluster EXIT
    84  run $*
    85  check_logs $WORK_DIR
    86  echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>"