github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/synced_status_with_redo/run.sh

github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/synced_status_with_redo/run.sh (about)

     1  #!/bin/bash
     2  
     3  ## test the same logic as `sync_status``, but with redo mode
     4  
     5  #!/bin/bash
     6  
     7  # [DISCRIPTION]:
     8  #   This test is related to
     9  #   It will test the sync status request of cdc server in the following scenarios:
    10  #   1. The sync status request of cdc server when the upstream cluster is available
    11  #      1.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold
    12  #      1.2 pdNow - lastSyncedTs < threshold
    13  #      1.3 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold, resolvedTs - checkpointTs > threshold
    14  #   2. The sync status request of cdc server when the upstream pd is unavailable
    15  #      2.1 resolvedTs - checkpointTs < threshold
    16  #   3. The sync status request of cdc server when the upstream tikv is unavailable
    17  #      3.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs > threshold, resolvedTs - checkpointTs < threshold
    18  #      3.2 pdNow - lastSyncedTs < threshold
    19  #   4. The sync status request of cdc server when the downstream tidb is available
    20  #      4.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold
    21  #      4.2 pdNow - lastSyncedTs < threshold
    22  # [STEP]:
    23  #   1. Create changefeed with synced-time-config = xx
    24  #   2. insert data to upstream cluster, and do the related actions for each scenarios
    25  #   3. do the query of synced status of cdc server
    26  #   4. check the info and status of query
    27  
    28  set -xeu
    29  
    30  CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
    31  source $CUR/../_utils/test_prepare
    32  WORK_DIR=$OUT_DIR/$TEST_NAME
    33  CDC_BINARY=cdc.test
    34  SINK_TYPE=$1
    35  
    36  CDC_COUNT=3
    37  DB_COUNT=4
    38  
    39  function kill_pd() {
    40  	info=$(ps aux | grep pd-server | grep $WORK_DIR) || true
    41  	$(ps aux | grep pd-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true
    42  }
    43  
    44  function kill_tikv() {
    45  	info=$(ps aux | grep tikv-server | grep $WORK_DIR) || true
    46  	$(ps aux | grep tikv-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true
    47  }
    48  
    49  function kill_tidb() {
    50  	info=$(ps aux | grep tidb-server | grep $WORK_DIR) || true
    51  	$(ps aux | grep tidb-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true
    52  }
    53  
    54  function run_normal_case_and_unavailable_pd() {
    55  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
    56  
    57  	start_tidb_cluster --workdir $WORK_DIR
    58  
    59  	cd $WORK_DIR
    60  
    61  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
    62  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
    63  
    64  	config_path=$1
    65  
    66  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
    67  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
    68  
    69  	# case 1: test in available cluster
    70  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
    71  
    72  	status=$(echo $synced_status | jq '.synced')
    73  	sink_checkpoint_ts=$(echo $synced_status | jq -r '.sink_checkpoint_ts')
    74  	puller_resolved_ts=$(echo $synced_status | jq -r '.puller_resolved_ts')
    75  	last_synced_ts=$(echo $synced_status | jq -r '.last_synced_ts')
    76  	if [ $status != true ]; then
    77  		echo "synced status isn't correct"
    78  		exit 1
    79  	fi
    80  	# the timestamp for puller_resolved_ts is 0 when do data insert
    81  	if [ "$puller_resolved_ts" != "1970-01-01 08:00:00.000" ]; then
    82  		echo "puller_resolved_ts is not 1970-01-01 08:00:00.000"
    83  		exit 1
    84  	fi
    85  	# the timestamp for last_synced_ts is 0 when do data insert
    86  	if [ "$last_synced_ts" != "1970-01-01 08:00:00.000" ]; then
    87  		echo "last_synced_ts is not 1970-01-01 08:00:00.000"
    88  		exit 1
    89  	fi
    90  
    91  	# compare sink_checkpoint_ts with current time
    92  	current=$(date +"%Y-%m-%d %H:%M:%S")
    93  	echo "sink_checkpoint_ts is "$sink_checkpoint_ts
    94  	checkpoint_timestamp=$(date -d "$sink_checkpoint_ts" +%s)
    95  	current_timestamp=$(date -d "$current" +%s)
    96  	if [ $(($current_timestamp - $checkpoint_timestamp)) -gt 300 ]; then # give a soft check
    97  		echo "sink_checkpoint_ts is not correct"
    98  		exit 1
    99  	fi
   100  
   101  	run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);"
   102  	check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT}
   103  
   104  	sleep 5 # wait data insert
   105  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   106  	status=$(echo $synced_status | jq '.synced')
   107  	if [ $status != false ]; then
   108  		echo "synced status isn't correct"
   109  		exit 1
   110  	fi
   111  	info=$(echo $synced_status | jq -r '.info')
   112  	if [ "$info" != "The data syncing is not finished, please wait" ]; then
   113  		echo "synced status info is not correct"
   114  		exit 1
   115  	fi
   116  
   117  	sleep 130 # wait enough time for pass synced-check-interval
   118  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   119  	status=$(echo $synced_status | jq '.synced')
   120  	if [ $status != true ]; then
   121  		echo "synced status isn't correct"
   122  		exit 1
   123  	fi
   124  
   125  	#==========
   126  	# case 2: test with unavailable pd, query will not get the available response
   127  	kill_pd
   128  
   129  	sleep 20
   130  
   131  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   132  	error_code=$(echo $synced_status | jq -r '.error_code')
   133  	cleanup_process $CDC_BINARY
   134  	stop_tidb_cluster
   135  }
   136  
   137  function run_case_with_unavailable_tikv() {
   138  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
   139  
   140  	start_tidb_cluster --workdir $WORK_DIR
   141  
   142  	cd $WORK_DIR
   143  
   144  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
   145  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
   146  
   147  	config_path=$1
   148  
   149  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
   150  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
   151  
   152  	# case 3: test in unavailable tikv cluster
   153  	run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);"
   154  	check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT}
   155  
   156  	sleep 5 # make data inserted into downstream
   157  	kill_tikv
   158  
   159  	# test the case when pdNow - lastSyncedTs < threshold
   160  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   161  	status=$(echo $synced_status | jq '.synced')
   162  	if [ $status != false ]; then
   163  		echo "synced status isn't correct"
   164  		exit 1
   165  	fi
   166  	info=$(echo $synced_status | jq -r '.info')
   167  	target_message="The data syncing is not finished, please wait"
   168  
   169  	if [ "$info" != "$target_message" ]; then
   170  		echo "synced status info is not correct"
   171  		exit 1
   172  	fi
   173  
   174  	sleep 130 # wait enough time for pass synced-check-interval
   175  	# test the case when pdNow - lastSyncedTs > threshold
   176  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   177  	status=$(echo $synced_status | jq '.synced')
   178  	if [ $status != false ]; then
   179  		echo "synced status isn't correct"
   180  		exit 1
   181  	fi
   182  	info=$(echo $synced_status | jq -r '.info')
   183  	target_message="Please check whether PD is online and TiKV Regions are all available. \
   184  If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \
   185  To check whether TiKV regions are all available, you can view \
   186  'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \
   187  If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \
   188  Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait"
   189  
   190  	if [ "$info" != "$target_message" ]; then
   191  		echo "synced status info is not correct"
   192  		exit 1
   193  	fi
   194  
   195  	cleanup_process $CDC_BINARY
   196  	stop_tidb_cluster
   197  }
   198  
   199  function run_case_with_unavailable_tidb() {
   200  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
   201  
   202  	start_tidb_cluster --workdir $WORK_DIR
   203  
   204  	cd $WORK_DIR
   205  
   206  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
   207  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
   208  
   209  	config_path=$1
   210  
   211  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
   212  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
   213  
   214  	# case 3: test in unavailable tikv cluster
   215  	run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);"
   216  	check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT}
   217  
   218  	sleep 5 # make data inserted into downstream
   219  	kill_tidb
   220  
   221  	# test the case when pdNow - lastSyncedTs < threshold
   222  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   223  	status=$(echo $synced_status | jq '.synced')
   224  	if [ $status != false ]; then
   225  		echo "synced status isn't correct"
   226  		exit 1
   227  	fi
   228  	info=$(echo $synced_status | jq -r '.info')
   229  	target_message="The data syncing is not finished, please wait"
   230  
   231  	if [ "$info" != "$target_message" ]; then
   232  		echo "synced status info is not correct"
   233  		exit 1
   234  	fi
   235  
   236  	sleep 130 # wait enough time for pass synced-check-interval
   237  	# test the case when pdNow - lastSyncedTs > threshold
   238  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   239  	status=$(echo $synced_status | jq '.synced')
   240  	if [ $status != true ]; then
   241  		echo "synced status isn't correct"
   242  		exit 1
   243  	fi
   244  	info=$(echo $synced_status | jq -r '.info')
   245  	target_message="Data syncing is finished"
   246  
   247  	if [ "$info" != "$target_message" ]; then
   248  		echo "synced status info is not correct"
   249  		exit 1
   250  	fi
   251  
   252  	cleanup_process $CDC_BINARY
   253  	stop_tidb_cluster
   254  }
   255  
   256  function run_case_with_failpoint() {
   257  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
   258  
   259  	start_tidb_cluster --workdir $WORK_DIR
   260  
   261  	cd $WORK_DIR
   262  
   263  	# make failpoint to block checkpoint-ts
   264  	export GO_FAILPOINTS='github.com/pingcap/tiflow/cdc/owner/ChangefeedOwnerNotUpdateCheckpoint=return(true)'
   265  
   266  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
   267  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
   268  
   269  	config_path=$1
   270  
   271  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
   272  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
   273  
   274  	sleep 20 # wait enough time for pass checkpoint-check-interval
   275  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   276  	status=$(echo $synced_status | jq '.synced')
   277  	if [ $status != false ]; then
   278  		echo "synced status isn't correct"
   279  		exit 1
   280  	fi
   281  	info=$(echo $synced_status | jq -r '.info')
   282  	target_message="Please check whether PD is online and TiKV Regions are all available. \
   283  If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \
   284  To check whether TiKV regions are all available, you can view \
   285  'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \
   286  If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \
   287  Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait"
   288  	if [ "$info" != "$target_message" ]; then
   289  		echo "synced status info is not correct"
   290  		exit 1
   291  	fi
   292  
   293  	export GO_FAILPOINTS=''
   294  
   295  	cleanup_process $CDC_BINARY
   296  	stop_tidb_cluster
   297  }
   298  
   299  trap stop_tidb_cluster EXIT
   300  
   301  # enable redo
   302  run_normal_case_and_unavailable_pd "conf/changefeed-redo.toml"
   303  run_case_with_unavailable_tikv "conf/changefeed-redo.toml"
   304  run_case_with_unavailable_tidb "conf/changefeed-redo.toml"
   305  run_case_with_failpoint "conf/changefeed-redo.toml"
   306  
   307  check_logs $WORK_DIR
   308  echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>"