github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/synced_status_with_redo/run.sh (about) 1 #!/bin/bash 2 3 ## test the same logic as `sync_status``, but with redo mode 4 5 #!/bin/bash 6 7 # [DISCRIPTION]: 8 # This test is related to 9 # It will test the sync status request of cdc server in the following scenarios: 10 # 1. The sync status request of cdc server when the upstream cluster is available 11 # 1.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold 12 # 1.2 pdNow - lastSyncedTs < threshold 13 # 1.3 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold, resolvedTs - checkpointTs > threshold 14 # 2. The sync status request of cdc server when the upstream pd is unavailable 15 # 2.1 resolvedTs - checkpointTs < threshold 16 # 3. The sync status request of cdc server when the upstream tikv is unavailable 17 # 3.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs > threshold, resolvedTs - checkpointTs < threshold 18 # 3.2 pdNow - lastSyncedTs < threshold 19 # 4. The sync status request of cdc server when the downstream tidb is available 20 # 4.1 pdNow - lastSyncedTs > threshold, pdNow - checkpointTs < threshold 21 # 4.2 pdNow - lastSyncedTs < threshold 22 # [STEP]: 23 # 1. Create changefeed with synced-time-config = xx 24 # 2. insert data to upstream cluster, and do the related actions for each scenarios 25 # 3. do the query of synced status of cdc server 26 # 4. check the info and status of query 27 28 set -xeu 29 30 CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 31 source $CUR/../_utils/test_prepare 32 WORK_DIR=$OUT_DIR/$TEST_NAME 33 CDC_BINARY=cdc.test 34 SINK_TYPE=$1 35 36 CDC_COUNT=3 37 DB_COUNT=4 38 39 function kill_pd() { 40 info=$(ps aux | grep pd-server | grep $WORK_DIR) || true 41 $(ps aux | grep pd-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true 42 } 43 44 function kill_tikv() { 45 info=$(ps aux | grep tikv-server | grep $WORK_DIR) || true 46 $(ps aux | grep tikv-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true 47 } 48 49 function kill_tidb() { 50 info=$(ps aux | grep tidb-server | grep $WORK_DIR) || true 51 $(ps aux | grep tidb-server | grep $WORK_DIR | awk '{print $2}' | xargs kill -9 &>/dev/null) || true 52 } 53 54 function run_normal_case_and_unavailable_pd() { 55 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 56 57 start_tidb_cluster --workdir $WORK_DIR 58 59 cd $WORK_DIR 60 61 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 62 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 63 64 config_path=$1 65 66 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 67 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 68 69 # case 1: test in available cluster 70 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 71 72 status=$(echo $synced_status | jq '.synced') 73 sink_checkpoint_ts=$(echo $synced_status | jq -r '.sink_checkpoint_ts') 74 puller_resolved_ts=$(echo $synced_status | jq -r '.puller_resolved_ts') 75 last_synced_ts=$(echo $synced_status | jq -r '.last_synced_ts') 76 if [ $status != true ]; then 77 echo "synced status isn't correct" 78 exit 1 79 fi 80 # the timestamp for puller_resolved_ts is 0 when do data insert 81 if [ "$puller_resolved_ts" != "1970-01-01 08:00:00.000" ]; then 82 echo "puller_resolved_ts is not 1970-01-01 08:00:00.000" 83 exit 1 84 fi 85 # the timestamp for last_synced_ts is 0 when do data insert 86 if [ "$last_synced_ts" != "1970-01-01 08:00:00.000" ]; then 87 echo "last_synced_ts is not 1970-01-01 08:00:00.000" 88 exit 1 89 fi 90 91 # compare sink_checkpoint_ts with current time 92 current=$(date +"%Y-%m-%d %H:%M:%S") 93 echo "sink_checkpoint_ts is "$sink_checkpoint_ts 94 checkpoint_timestamp=$(date -d "$sink_checkpoint_ts" +%s) 95 current_timestamp=$(date -d "$current" +%s) 96 if [ $(($current_timestamp - $checkpoint_timestamp)) -gt 300 ]; then # give a soft check 97 echo "sink_checkpoint_ts is not correct" 98 exit 1 99 fi 100 101 run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);" 102 check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 103 104 sleep 5 # wait data insert 105 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 106 status=$(echo $synced_status | jq '.synced') 107 if [ $status != false ]; then 108 echo "synced status isn't correct" 109 exit 1 110 fi 111 info=$(echo $synced_status | jq -r '.info') 112 if [ "$info" != "The data syncing is not finished, please wait" ]; then 113 echo "synced status info is not correct" 114 exit 1 115 fi 116 117 sleep 130 # wait enough time for pass synced-check-interval 118 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 119 status=$(echo $synced_status | jq '.synced') 120 if [ $status != true ]; then 121 echo "synced status isn't correct" 122 exit 1 123 fi 124 125 #========== 126 # case 2: test with unavailable pd, query will not get the available response 127 kill_pd 128 129 sleep 20 130 131 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 132 error_code=$(echo $synced_status | jq -r '.error_code') 133 cleanup_process $CDC_BINARY 134 stop_tidb_cluster 135 } 136 137 function run_case_with_unavailable_tikv() { 138 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 139 140 start_tidb_cluster --workdir $WORK_DIR 141 142 cd $WORK_DIR 143 144 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 145 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 146 147 config_path=$1 148 149 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 150 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 151 152 # case 3: test in unavailable tikv cluster 153 run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);" 154 check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 155 156 sleep 5 # make data inserted into downstream 157 kill_tikv 158 159 # test the case when pdNow - lastSyncedTs < threshold 160 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 161 status=$(echo $synced_status | jq '.synced') 162 if [ $status != false ]; then 163 echo "synced status isn't correct" 164 exit 1 165 fi 166 info=$(echo $synced_status | jq -r '.info') 167 target_message="The data syncing is not finished, please wait" 168 169 if [ "$info" != "$target_message" ]; then 170 echo "synced status info is not correct" 171 exit 1 172 fi 173 174 sleep 130 # wait enough time for pass synced-check-interval 175 # test the case when pdNow - lastSyncedTs > threshold 176 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 177 status=$(echo $synced_status | jq '.synced') 178 if [ $status != false ]; then 179 echo "synced status isn't correct" 180 exit 1 181 fi 182 info=$(echo $synced_status | jq -r '.info') 183 target_message="Please check whether PD is online and TiKV Regions are all available. \ 184 If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \ 185 To check whether TiKV regions are all available, you can view \ 186 'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \ 187 If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \ 188 Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait" 189 190 if [ "$info" != "$target_message" ]; then 191 echo "synced status info is not correct" 192 exit 1 193 fi 194 195 cleanup_process $CDC_BINARY 196 stop_tidb_cluster 197 } 198 199 function run_case_with_unavailable_tidb() { 200 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 201 202 start_tidb_cluster --workdir $WORK_DIR 203 204 cd $WORK_DIR 205 206 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 207 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 208 209 config_path=$1 210 211 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 212 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 213 214 # case 3: test in unavailable tikv cluster 215 run_sql "USE TEST;Create table t1(a int primary key, b int);insert into t1 values(1,2);insert into t1 values(2,3);" 216 check_table_exists "test.t1" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 217 218 sleep 5 # make data inserted into downstream 219 kill_tidb 220 221 # test the case when pdNow - lastSyncedTs < threshold 222 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 223 status=$(echo $synced_status | jq '.synced') 224 if [ $status != false ]; then 225 echo "synced status isn't correct" 226 exit 1 227 fi 228 info=$(echo $synced_status | jq -r '.info') 229 target_message="The data syncing is not finished, please wait" 230 231 if [ "$info" != "$target_message" ]; then 232 echo "synced status info is not correct" 233 exit 1 234 fi 235 236 sleep 130 # wait enough time for pass synced-check-interval 237 # test the case when pdNow - lastSyncedTs > threshold 238 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 239 status=$(echo $synced_status | jq '.synced') 240 if [ $status != true ]; then 241 echo "synced status isn't correct" 242 exit 1 243 fi 244 info=$(echo $synced_status | jq -r '.info') 245 target_message="Data syncing is finished" 246 247 if [ "$info" != "$target_message" ]; then 248 echo "synced status info is not correct" 249 exit 1 250 fi 251 252 cleanup_process $CDC_BINARY 253 stop_tidb_cluster 254 } 255 256 function run_case_with_failpoint() { 257 rm -rf $WORK_DIR && mkdir -p $WORK_DIR 258 259 start_tidb_cluster --workdir $WORK_DIR 260 261 cd $WORK_DIR 262 263 # make failpoint to block checkpoint-ts 264 export GO_FAILPOINTS='github.com/pingcap/tiflow/cdc/owner/ChangefeedOwnerNotUpdateCheckpoint=return(true)' 265 266 start_ts=$(run_cdc_cli_tso_query ${UP_PD_HOST_1} ${UP_PD_PORT_1}) 267 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY 268 269 config_path=$1 270 271 SINK_URI="mysql://root@127.0.0.1:3306/?max-txn-row=1" 272 run_cdc_cli changefeed create --start-ts=$start_ts --sink-uri="$SINK_URI" --changefeed-id="test-1" --config="$CUR/$config_path" 273 274 sleep 20 # wait enough time for pass checkpoint-check-interval 275 synced_status=$(curl -X GET http://127.0.0.1:8300/api/v2/changefeeds/test-1/synced) 276 status=$(echo $synced_status | jq '.synced') 277 if [ $status != false ]; then 278 echo "synced status isn't correct" 279 exit 1 280 fi 281 info=$(echo $synced_status | jq -r '.info') 282 target_message="Please check whether PD is online and TiKV Regions are all available. \ 283 If PD is offline or some TiKV regions are not available, it means that the data syncing process is complete. \ 284 To check whether TiKV regions are all available, you can view \ 285 'TiKV-Details' > 'Resolved-Ts' > 'Max Leader Resolved TS gap' on Grafana. \ 286 If the gap is large, such as a few minutes, it means that some regions in TiKV are unavailable. \ 287 Otherwise, if the gap is small and PD is online, it means the data syncing is incomplete, so please wait" 288 if [ "$info" != "$target_message" ]; then 289 echo "synced status info is not correct" 290 exit 1 291 fi 292 293 export GO_FAILPOINTS='' 294 295 cleanup_process $CDC_BINARY 296 stop_tidb_cluster 297 } 298 299 trap stop_tidb_cluster EXIT 300 301 # enable redo 302 run_normal_case_and_unavailable_pd "conf/changefeed-redo.toml" 303 run_case_with_unavailable_tikv "conf/changefeed-redo.toml" 304 run_case_with_unavailable_tidb "conf/changefeed-redo.toml" 305 run_case_with_failpoint "conf/changefeed-redo.toml" 306 307 check_logs $WORK_DIR 308 echo "[$(date)] <<<<<< run test case $TEST_NAME success! >>>>>>"