github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/synced_status/run.sh (about)

     1  #!/bin/bash
     2  
     3  # [DISCRIPTION]:
     4  #   This test is related to
     5  #   It will test the sync status request of cdc server in the following scenarios:
     6  #   1. The sync status request of cdc server when the upstream cluster is available
     7  #      1.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold
     8  #      1.2 pdNow - lastSyncedTs < threshold
     9  #      1.3 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold, resolvedTs - checkpointTs > threshold
    10  #   2. The sync status request of cdc server when the upstream pd is unavailable
    11  #      2.1 resolvedTs - checkpointTs < threshold
    12  #   3. The sync status request of cdc server when the upstream tikv is unavailable
    13  #      3.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs > threshold, resolvedTs - checkpointTs < threshold
    14  #      3.2 pdNow - lastSyncedTs < threshold
    15  #   4. The sync status request of cdc server when the downstream tidb is available
    16  #      4.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold
    17  #      4.2 pdNow - lastSyncedTs < threshold
    18  # [STEP]:
    19  #   1. Create changefeed with synced-time-config = xx
    20  #   2. insert data to upstream cluster, and do the related actions for each scenarios
    21  #   3. do the query of synced status of cdc server
    22  #   4. check the info and status of query
    23  
    24  set -xeu
    25  
    26  CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
    27  source $CUR/../_utils/test_prepare
    28  WORK_DIR=$OUT_DIR/$TEST_NAME
    29  CDC_BINARY=cdc.test
    30  SINK_TYPE=$1
    31  
    32  CDC_COUNT=3
    33  DB_COUNT=4
    34  
    35  function kill_pd() {
    36  	info=$(ps aux | grep pd-server | grep $WORK_DIR) || true
    37  	$(ps aux | grep pd-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true
    38  }
    39  
    40  function kill_tikv() {
    41  	info=$(ps aux | grep tikv-server | grep $WORK_DIR) || true
    42  	$(ps aux | grep tikv-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true
    43  }
    44  
    45  function kill_tidb() {
    46  	info=$(ps aux | grep tidb-server | grep $WORK_DIR) || true
    47  	$(ps aux | grep tidb-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true
    48  }
    49  
    50  function run_normal_case_and_unavailable_pd() {
    51  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
    52  
    53  	start_tidb_cluster --workdir $WORK_DIR
    54  
    55  	cd $WORK_DIR
    56  
    57  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
    58  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
    59  
    60  	config_path=$1
    61  
    62  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
    63  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
    64  
    65  	# case 1: test in available cluster
    66  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
    67  
    68  	status=$(echo $synced_status | jq '.synced')
    69  	sink_checkpoint_ts=$(echo $synced_status | jq -r '.sink_checkpoint_ts')
    70  	puller_resolved_ts=$(echo $synced_status | jq -r '.puller_resolved_ts')
    71  	last_synced_ts=$(echo $synced_status | jq -r '.last_synced_ts')
    72  	if [ $status != true ]; then
    73  		echo "synced status isn't correct"
    74  		exit 1
    75  	fi
    76  	# the timestamp for puller_resolved_ts is 0 when do data insert
    77  	if [ "$puller_resolved_ts" != "1970-01-01 08:00:00.000" ]; then
    78  		echo "puller_resolved_ts is not 1970-01-01 08:00:00.000"
    79  		exit 1
    80  	fi
    81  	# the timestamp for last_synced_ts is 0 when do data insert
    82  	if [ "$last_synced_ts" != "1970-01-01 08:00:00.000" ]; then
    83  		echo "last_synced_ts is not 1970-01-01 08:00:00.000"
    84  		exit 1
    85  	fi
    86  
    87  	# compare sink_checkpoint_ts with current time
    88  	current=$(date +"%Y-%m-%d %H:%M:%S")
    89  	echo "sink_checkpoint_ts is "$sink_checkpoint_ts
    90  	checkpoint_timestamp=$(date -d "$sink_checkpoint_ts" +%s)
    91  	current_timestamp=$(date -d "$current" +%s)
    92  	if [ $(($current_timestamp - $checkpoint_timestamp)) -gt 300 ]; then # give a soft check
    93  		echo "sink_checkpoint_ts is not correct"
    94  		exit 1
    95  	fi
    96  
    97  	run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);"
    98  	check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT}
    99  
   100  	sleep 5 # wait data insert
   101  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   102  	status=$(echo $synced_status | jq '.synced')
   103  	if [ $status != false ]; then
   104  		echo "synced status isn't correct"
   105  		exit 1
   106  	fi
   107  	info=$(echo $synced_status | jq -r '.info')
   108  	if [ "$info" != "The data syncing is not finished, please wait" ]; then
   109  		echo "synced status info is not correct"
   110  		exit 1
   111  	fi
   112  
   113  	sleep 130 # wait enough time for pass synced-check-interval
   114  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   115  	status=$(echo $synced_status | jq '.synced')
   116  	if [ $status != true ]; then
   117  		echo "synced status isn't correct"
   118  		exit 1
   119  	fi
   120  
   121  	#==========
   122  	# case 2: test with unavailable pd, query will not get the available response
   123  	kill_pd
   124  
   125  	sleep 20
   126  
   127  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   128  	error_code=$(echo $synced_status | jq -r '.error_code')
   129  	cleanup_process $CDC_BINARY
   130  	stop_tidb_cluster
   131  }
   132  
   133  function run_case_with_unavailable_tikv() {
   134  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
   135  
   136  	start_tidb_cluster --workdir $WORK_DIR
   137  
   138  	cd $WORK_DIR
   139  
   140  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
   141  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
   142  
   143  	config_path=$1
   144  
   145  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
   146  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
   147  
   148  	# case 3: test in unavailable tikv cluster
   149  	run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);"
   150  	check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT}
   151  
   152  	sleep 5 # make data inserted into downstream
   153  	kill_tikv
   154  
   155  	# test the case when pdNow - lastSyncedTs < threshold
   156  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   157  	status=$(echo $synced_status | jq '.synced')
   158  	if [ $status != false ]; then
   159  		echo "synced status isn't correct"
   160  		exit 1
   161  	fi
   162  	info=$(echo $synced_status | jq -r '.info')
   163  	target_message="The data syncing is not finished, please wait"
   164  
   165  	if [ "$info" != "$target_message" ]; then
   166  		echo "synced status info is not correct"
   167  		exit 1
   168  	fi
   169  
   170  	sleep 130 # wait enough time for pass synced-check-interval
   171  	# test the case when pdNow - lastSyncedTs > threshold
   172  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   173  	status=$(echo $synced_status | jq '.synced')
   174  	if [ $status != false ]; then
   175  		echo "synced status isn't correct"
   176  		exit 1
   177  	fi
   178  	info=$(echo $synced_status | jq -r '.info')
   179  	target_message="Please check whether PD is online and TiKV Regions are all available. \
   180  If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \
   181  To check whether TiKV regions are all available, you can view \
   182  'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \
   183  If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \
   184  Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait"
   185  
   186  	if [ "$info" != "$target_message" ]; then
   187  		echo "synced status info is not correct"
   188  		exit 1
   189  	fi
   190  
   191  	cleanup_process $CDC_BINARY
   192  	stop_tidb_cluster
   193  }
   194  
   195  function run_case_with_unavailable_tidb() {
   196  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
   197  
   198  	start_tidb_cluster --workdir $WORK_DIR
   199  
   200  	cd $WORK_DIR
   201  
   202  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
   203  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
   204  
   205  	config_path=$1
   206  
   207  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
   208  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
   209  
   210  	# case 3: test in unavailable tikv cluster
   211  	run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);"
   212  	check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT}
   213  
   214  	sleep 5 # make data inserted into downstream
   215  	kill_tidb
   216  
   217  	# test the case when pdNow - lastSyncedTs < threshold
   218  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   219  	status=$(echo $synced_status | jq '.synced')
   220  	if [ $status != false ]; then
   221  		echo "synced status isn't correct"
   222  		exit 1
   223  	fi
   224  	info=$(echo $synced_status | jq -r '.info')
   225  	target_message="The data syncing is not finished, please wait"
   226  
   227  	if [ "$info" != "$target_message" ]; then
   228  		echo "synced status info is not correct"
   229  		exit 1
   230  	fi
   231  
   232  	sleep 130 # wait enough time for pass synced-check-interval
   233  	# test the case when pdNow - lastSyncedTs > threshold
   234  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   235  	status=$(echo $synced_status | jq '.synced')
   236  	if [ $status != true ]; then
   237  		echo "synced status isn't correct"
   238  		exit 1
   239  	fi
   240  	info=$(echo $synced_status | jq -r '.info')
   241  	target_message="Data syncing is finished"
   242  
   243  	if [ "$info" != "$target_message" ]; then
   244  		echo "synced status info is not correct"
   245  		exit 1
   246  	fi
   247  
   248  	cleanup_process $CDC_BINARY
   249  	stop_tidb_cluster
   250  }
   251  
   252  function run_case_with_failpoint() {
   253  	rm -rf $WORK_DIR && mkdir -p $WORK_DIR
   254  
   255  	start_tidb_cluster --workdir $WORK_DIR
   256  
   257  	cd $WORK_DIR
   258  
   259  	# make failpoint to block checkpoint-ts
   260  	export GO_FAILPOINTS='github.com/pingcap/tiflow/cdc/owner/ChangefeedOwnerNotUpdateCheckpoint=return(true)'
   261  
   262  	start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1})
   263  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
   264  
   265  	config_path=$1
   266  
   267  	SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1"
   268  	run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path"
   269  
   270  	sleep 20 # wait enough time for pass checkpoint-check-interval
   271  	synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced)
   272  	status=$(echo $synced_status | jq '.synced')
   273  	if [ $status != false ]; then
   274  		echo "synced status isn't correct"
   275  		exit 1
   276  	fi
   277  	info=$(echo $synced_status | jq -r '.info')
   278  	target_message="Please check whether PD is online and TiKV Regions are all available. \
   279  If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \
   280  To check whether TiKV regions are all available, you can view \
   281  'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \
   282  If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \
   283  Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait"
   284  	if [ "$info" != "$target_message" ]; then
   285  		echo "synced status info is not correct"
   286  		exit 1
   287  	fi
   288  
   289  	export GO_FAILPOINTS=''
   290  
   291  	cleanup_process $CDC_BINARY
   292  	stop_tidb_cluster
   293  }
   294  
   295  trap stop_tidb_cluster EXIT
   296  run_normal_case_and_unavailable_pd "conf/changefeed.toml"
   297  run_case_with_unavailable_tikv "conf/changefeed.toml"
   298  run_case_with_unavailable_tidb "conf/changefeed.toml"
   299  run_case_with_failpoint "conf/changefeed.toml"
   300  
   301  check_logs $WORK_DIR
   302  echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>"