github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/synced_status/run.sh (about) 1 #!/bin/bash 2 3 # [DISCRIPTION]: 4 # This test is related to 5 # It will test the sync status request of cdc server in the following scenarios: 6 # 1. The sync status request of cdc server when the upstream cluster is available 7 # 1.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold 8 # 1.2 pdNow - lastSyncedTs < threshold 9 # 1.3 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold, resolvedTs - checkpointTs > threshold 10 # 2. The sync status request of cdc server when the upstream pd is unavailable 11 # 2.1 resolvedTs - checkpointTs < threshold 12 # 3. The sync status request of cdc server when the upstream tikv is unavailable 13 # 3.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs > threshold, resolvedTs - checkpointTs < threshold 14 # 3.2 pdNow - lastSyncedTs < threshold 15 # 4. The sync status request of cdc server when the downstream tidb is available 16 # 4.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold 17 # 4.2 pdNow - lastSyncedTs < threshold 18 # [STEP]: 19 # 1. Create changefeed with synced-time-config = xx 20 # 2. insert data to upstream cluster, and do the related actions for each scenarios 21 # 3. do the query of synced status of cdc server 22 # 4. check the info and status of query 23 24 set -xeu 25 26 CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 27 source $CUR/../_utils/test_prepare 28 WORK_DIR=$OUT_DIR/$TEST_NAME 29 CDC_BINARY=cdc.test 30 SINK_TYPE=$1 31 32 CDC_COUNT=3 33 DB_COUNT=4 34 35 function kill_pd() { 36 info=$(ps aux | grep pd-server | grep $WORK_DIR) || true 37 $(ps aux | grep pd-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true 38 } 39 40 function kill_tikv() { 41 info=$(ps aux | grep tikv-server | grep $WORK_DIR) || true 42 $(ps aux | grep tikv-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true 43 } 44 45 function kill_tidb() { 46 info=$(ps aux | grep tidb-server | grep $WORK_DIR) || true 47 $(ps aux | grep tidb-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true 48 } 49 50 function run_normal_case_and_unavailable_pd() { 51 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 52 53 start_tidb_cluster --workdir $WORK_DIR 54 55 cd $WORK_DIR 56 57 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 58 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 59 60 config_path=$1 61 62 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 63 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 64 65 # case 1: test in available cluster 66 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 67 68 status=$(echo $synced_status | jq '.synced') 69 sink_checkpoint_ts=$(echo $synced_status | jq -r '.sink_checkpoint_ts') 70 puller_resolved_ts=$(echo $synced_status | jq -r '.puller_resolved_ts') 71 last_synced_ts=$(echo $synced_status | jq -r '.last_synced_ts') 72 if [ $status != true ]; then 73 echo "synced status isn't correct" 74 exit 1 75 fi 76 # the timestamp for puller_resolved_ts is 0 when do data insert 77 if [ "$puller_resolved_ts" != "1970-01-01 08:00:00.000" ]; then 78 echo "puller_resolved_ts is not 1970-01-01 08:00:00.000" 79 exit 1 80 fi 81 # the timestamp for last_synced_ts is 0 when do data insert 82 if [ "$last_synced_ts" != "1970-01-01 08:00:00.000" ]; then 83 echo "last_synced_ts is not 1970-01-01 08:00:00.000" 84 exit 1 85 fi 86 87 # compare sink_checkpoint_ts with current time 88 current=$(date +"%Y-%m-%d %H:%M:%S") 89 echo "sink_checkpoint_ts is "$sink_checkpoint_ts 90 checkpoint_timestamp=$(date -d "$sink_checkpoint_ts" +%s) 91 current_timestamp=$(date -d "$current" +%s) 92 if [ $(($current_timestamp - $checkpoint_timestamp)) -gt 300 ]; then # give a soft check 93 echo "sink_checkpoint_ts is not correct" 94 exit 1 95 fi 96 97 run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);" 98 check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 99 100 sleep 5 # wait data insert 101 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 102 status=$(echo $synced_status | jq '.synced') 103 if [ $status != false ]; then 104 echo "synced status isn't correct" 105 exit 1 106 fi 107 info=$(echo $synced_status | jq -r '.info') 108 if [ "$info" != "The data syncing is not finished, please wait" ]; then 109 echo "synced status info is not correct" 110 exit 1 111 fi 112 113 sleep 130 # wait enough time for pass synced-check-interval 114 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 115 status=$(echo $synced_status | jq '.synced') 116 if [ $status != true ]; then 117 echo "synced status isn't correct" 118 exit 1 119 fi 120 121 #========== 122 # case 2: test with unavailable pd, query will not get the available response 123 kill_pd 124 125 sleep 20 126 127 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 128 error_code=$(echo $synced_status | jq -r '.error_code') 129 cleanup_process $CDC_BINARY 130 stop_tidb_cluster 131 } 132 133 function run_case_with_unavailable_tikv() { 134 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 135 136 start_tidb_cluster --workdir $WORK_DIR 137 138 cd $WORK_DIR 139 140 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 141 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 142 143 config_path=$1 144 145 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 146 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 147 148 # case 3: test in unavailable tikv cluster 149 run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);" 150 check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 151 152 sleep 5 # make data inserted into downstream 153 kill_tikv 154 155 # test the case when pdNow - lastSyncedTs < threshold 156 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 157 status=$(echo $synced_status | jq '.synced') 158 if [ $status != false ]; then 159 echo "synced status isn't correct" 160 exit 1 161 fi 162 info=$(echo $synced_status | jq -r '.info') 163 target_message="The data syncing is not finished, please wait" 164 165 if [ "$info" != "$target_message" ]; then 166 echo "synced status info is not correct" 167 exit 1 168 fi 169 170 sleep 130 # wait enough time for pass synced-check-interval 171 # test the case when pdNow - lastSyncedTs > threshold 172 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 173 status=$(echo $synced_status | jq '.synced') 174 if [ $status != false ]; then 175 echo "synced status isn't correct" 176 exit 1 177 fi 178 info=$(echo $synced_status | jq -r '.info') 179 target_message="Please check whether PD is online and TiKV Regions are all available. \ 180 If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \ 181 To check whether TiKV regions are all available, you can view \ 182 'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \ 183 If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \ 184 Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait" 185 186 if [ "$info" != "$target_message" ]; then 187 echo "synced status info is not correct" 188 exit 1 189 fi 190 191 cleanup_process $CDC_BINARY 192 stop_tidb_cluster 193 } 194 195 function run_case_with_unavailable_tidb() { 196 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 197 198 start_tidb_cluster --workdir $WORK_DIR 199 200 cd $WORK_DIR 201 202 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 203 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 204 205 config_path=$1 206 207 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 208 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 209 210 # case 3: test in unavailable tikv cluster 211 run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);" 212 check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 213 214 sleep 5 # make data inserted into downstream 215 kill_tidb 216 217 # test the case when pdNow - lastSyncedTs < threshold 218 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 219 status=$(echo $synced_status | jq '.synced') 220 if [ $status != false ]; then 221 echo "synced status isn't correct" 222 exit 1 223 fi 224 info=$(echo $synced_status | jq -r '.info') 225 target_message="The data syncing is not finished, please wait" 226 227 if [ "$info" != "$target_message" ]; then 228 echo "synced status info is not correct" 229 exit 1 230 fi 231 232 sleep 130 # wait enough time for pass synced-check-interval 233 # test the case when pdNow - lastSyncedTs > threshold 234 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 235 status=$(echo $synced_status | jq '.synced') 236 if [ $status != true ]; then 237 echo "synced status isn't correct" 238 exit 1 239 fi 240 info=$(echo $synced_status | jq -r '.info') 241 target_message="Data syncing is finished" 242 243 if [ "$info" != "$target_message" ]; then 244 echo "synced status info is not correct" 245 exit 1 246 fi 247 248 cleanup_process $CDC_BINARY 249 stop_tidb_cluster 250 } 251 252 function run_case_with_failpoint() { 253 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 254 255 start_tidb_cluster --workdir $WORK_DIR 256 257 cd $WORK_DIR 258 259 # make failpoint to block checkpoint-ts 260 export GO_FAILPOINTS='github.com/pingcap/tiflow/cdc/owner/ChangefeedOwnerNotUpdateCheckpoint=return(true)' 261 262 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 263 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 264 265 config_path=$1 266 267 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 268 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 269 270 sleep 20 # wait enough time for pass checkpoint-check-interval 271 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 272 status=$(echo $synced_status | jq '.synced') 273 if [ $status != false ]; then 274 echo "synced status isn't correct" 275 exit 1 276 fi 277 info=$(echo $synced_status | jq -r '.info') 278 target_message="Please check whether PD is online and TiKV Regions are all available. \ 279 If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \ 280 To check whether TiKV regions are all available, you can view \ 281 'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \ 282 If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \ 283 Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait" 284 if [ "$info" != "$target_message" ]; then 285 echo "synced status info is not correct" 286 exit 1 287 fi 288 289 export GO_FAILPOINTS='' 290 291 cleanup_process $CDC_BINARY 292 stop_tidb_cluster 293 } 294 295 trap stop_tidb_cluster EXIT 296 run_normal_case_and_unavailable_pd "conf/changefeed.toml" 297 run_case_with_unavailable_tikv "conf/changefeed.toml" 298 run_case_with_unavailable_tidb "conf/changefeed.toml" 299 run_case_with_failpoint "conf/changefeed.toml" 300 301 check_logs $WORK_DIR 302 echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>"