github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/incremental_mode/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $cur/../_utils/test_prepare 7 WORK_DIR=$TEST_DIR/$TEST_NAME 8 TASK_NAME="test" 9 10 API_VERSION="v1alpha1" 11 12 function get_uuid() { 13 uuid=$(echo "show variables like '%server_uuid%';" | MYSQL_PWD=123456 mysql -uroot -h$1 -P$2 | awk 'FNR == 2 {print $2}') 14 echo $uuid 15 } 16 17 function get_binlog_name() { 18 binlog_name=$(echo "SHOW BINARY LOGS;" | MYSQL_PWD=123456 mysql -uroot -h127.0.0.1 -P3307 | awk 'FNR == 2 {print $1}') 19 echo $binlog_name 20 } 21 22 function get_latest_name() { 23 binlog_name=$(echo "SHOW BINARY LOGS;" | MYSQL_PWD=123456 mysql -uroot -h$1 -P$2 | awk 'END{print $1}') 24 echo $binlog_name 25 } 26 27 ###################################################### 28 # this test also used by binlog 999999 test 29 ###################################################### 30 function run() { 31 run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 32 check_contains 'Query OK, 2 rows affected' 33 run_sql_file $cur/data/db2.prepare.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 34 check_contains 'Query OK, 3 rows affected' 35 uuid=($(get_uuid $MYSQL_HOST1 $MYSQL_PORT1)) 36 binlog_name=($(get_binlog_name $MYSQL_HOST2 $MYSQL_PORT2)) 37 38 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/worker/defaultKeepAliveTTL=return(1)" 39 40 run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml 41 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT 42 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 43 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 44 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 45 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 46 47 # test keepalive is changed by failpoint, so after 1 second DM master will know not alive 48 killall -9 dm-worker.test 49 sleep 3 50 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 51 "list-member" \ 52 "\"stage\": \"offline\"" 2 53 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 54 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 55 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 56 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 57 58 # operate mysql config to worker 59 cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml 60 cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml 61 sed -i "s/binlog-gtid-placeholder/$uuid:0/g" $WORK_DIR/source1.yaml 62 sed -i "s/binlog-name-placeholder/$binlog_name/g" $WORK_DIR/source2.yaml 63 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml 64 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml 65 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 66 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 67 68 worker1bound=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT1" list-member --name worker1 | 69 grep 'source' | awk -F: '{print $2}' | cut -d'"' -f 2) 70 worker2bound=$($PWD/bin/dmctl.test DEVEL --master-addr "127.0.0.1:$MASTER_PORT1" list-member --name worker2 | 71 grep 'source' | awk -F: '{print $2}' | cut -d'"' -f 2) 72 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 73 "start-relay -s $worker1bound worker1" \ 74 "\"result\": true" 2 75 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 76 "start-relay -s $worker2bound worker2" \ 77 "\"result\": true" 2 78 79 # relay should be started after start-relay 80 sleep 2 81 # and now default keepalive TTL is 30 minutes 82 killall -9 dm-worker.test 83 sleep 3 84 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 85 "list-member" \ 86 "\"stage\": \"bound\"" 2 87 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 88 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 89 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 90 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 91 92 kill_dm_worker 93 check_port_offline $WORKER1_PORT 20 94 check_port_offline $WORKER2_PORT 20 95 96 # using account with limited privileges 97 run_sql_file $cur/data/db1.prepare.user.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 98 check_count 'Query OK, 0 rows affected' 7 99 run_sql_file $cur/data/db2.prepare.user.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 100 check_count 'Query OK, 0 rows affected' 7 101 102 # update mysql config 103 sed -i "s/root/dm_incremental/g" $WORK_DIR/source1.yaml 104 sed -i "s/root/dm_incremental/g" $WORK_DIR/source2.yaml 105 106 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 107 "operate-source update $WORK_DIR/source1.yaml" \ 108 "Update worker config is not supported by dm-ha now" 1 109 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 110 "operate-source update $WORK_DIR/source2.yaml" \ 111 "Update worker config is not supported by dm-ha now" 1 112 # update mysql config is not supported by dm-ha now, so we stop and start source again to update source config 113 114 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 115 "stop-relay -s $worker1bound worker1" \ 116 "\"result\": true" 2 117 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 118 "stop-relay -s $worker2bound worker2" \ 119 "\"result\": true" 2 120 121 dmctl_operate_source stop $WORK_DIR/source1.yaml $SOURCE_ID1 122 dmctl_operate_source stop $WORK_DIR/source2.yaml $SOURCE_ID2 123 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 124 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 125 126 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 127 "start-relay -s $worker1bound worker1" \ 128 "\"result\": true" 2 129 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 130 "start-relay -s $worker2bound worker2" \ 131 "\"result\": true" 2 132 133 worker1_run_source_1=$(sed "s/$SOURCE_ID1/$SOURCE_ID1\n/g" $WORK_DIR/worker1/log/dm-worker.log | grep -c "$SOURCE_ID1") || true 134 echo "start task in incremental mode with zero gtid/pos" 135 sed "s/binlog-gtid-placeholder-1/$uuid:0/g" $cur/conf/dm-task.yaml >$WORK_DIR/dm-task.yaml 136 sed -i "s/binlog-name-placeholder-2/$binlog_name/g" $WORK_DIR/dm-task.yaml 137 sed -i "s/binlog-pos-placeholder-2/4/g" $WORK_DIR/dm-task.yaml 138 139 # test graceful display error 140 export GO_FAILPOINTS='github.com/pingcap/tiflow/dm/syncer/binlogstream/GetEventError=return' 141 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 142 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 143 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 144 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 145 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 146 "start-task $WORK_DIR/dm-task.yaml --remove-meta" 147 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 148 "query-status test" \ 149 "\"ErrCode\": 36069" 2 150 151 kill_dm_worker 152 check_port_offline $WORKER1_PORT 20 153 check_port_offline $WORKER2_PORT 20 154 155 # only mock pull binlog failed once 156 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/syncer/WaitUserCancel=return(8);github.com/pingcap/tiflow/dm/syncer/binlogstream/GetEventError=1*return" 157 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 158 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 159 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 160 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 161 162 sleep 3 163 # check not specify binlog name could also update active relay log 164 if [ $worker1_run_source_1 -gt 0 ]; then 165 grep -E ".*current earliest active relay log.*$binlog_name" $WORK_DIR/worker2/log/dm-worker.log 166 else 167 grep -E ".*current earliest active relay log.*$binlog_name" $WORK_DIR/worker1/log/dm-worker.log 168 fi 169 170 run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 171 run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 172 173 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 174 "query-status test" \ 175 "Running" 4 176 # check reset binlog puller success 177 grep -Fq "reset replication binlog puller" $WORK_DIR/worker1/log/dm-worker.log 178 grep -Fq "reset replication binlog puller" $WORK_DIR/worker2/log/dm-worker.log 179 180 check_log_contain_with_retry 'finish to handle ddls in normal mode.*create table t2' $WORK_DIR/worker1/log/dm-worker.log $WORK_DIR/worker2/log/dm-worker.log 181 182 # we use failpoint to let worker sleep 8 second when executeSQLs, to increase possibility of 183 # meeting an error of context cancel. 184 # when below check pass, it means we filter out that error, or that error doesn't happen. 185 # we only focus on fails, to find any unfiltered context cancel error. 186 # and should not contain errors like: 187 # - `driver: bad connection` 188 # - `sql: connection is already closed` 189 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 190 "pause-task test" \ 191 "\"result\": true" 3 192 193 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 194 "stop-task test" \ 195 "\"result\": true" 3 196 kill_dm_worker 197 check_port_offline $WORKER1_PORT 20 198 check_port_offline $WORKER2_PORT 20 199 200 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/syncer/FlushCheckpointStage=return(100)" # for all stages 201 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 202 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 203 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 204 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 205 206 sleep 3 207 # start DM task. don't check error because it will meet injected error soon 208 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 209 "start-task $WORK_DIR/dm-task.yaml" 210 211 # the task should paused by `FlushCheckpointStage` failpoint before flush old checkpoint. 212 # `db2.increment.sql` has no DDL, so we check count of content as `1`. 213 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 214 "query-status test" \ 215 "failpoint error for FlushCheckpointStage before flush old checkpoint" 1 216 217 # resume-task to next stage 218 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 219 "resume-task test" 220 221 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 222 "query-status test" \ 223 "failpoint error for FlushCheckpointStage before track DDL" 1 224 225 # resume-task to next stage 226 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 227 "resume-task test" 228 229 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 230 "query-status test" \ 231 "failpoint error for FlushCheckpointStage before execute DDL" 1 232 233 # resume-task to next stage 234 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 235 "resume-task test" 236 237 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 238 "query-status test" \ 239 "failpoint error for FlushCheckpointStage before save checkpoint" 1 240 241 # resume-task to next stage 242 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 243 "resume-task test" 244 245 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 246 "query-status test" \ 247 "failpoint error for FlushCheckpointStage before flush checkpoint" 1 248 249 # resume-task to continue the sync 250 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 251 "resume-task test" \ 252 "\"result\": true" 3 253 254 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 255 256 # test rotate binlog, after rotate and ddl, master binlog should be equal to sync binlog 257 run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1 258 run_sql "truncate table incremental_mode.t1;" $MYSQL_PORT1 $MYSQL_PASSWORD1 259 260 sleep 2 261 curl -X GET 127.0.0.1:$MASTER_PORT/apis/${API_VERSION}/status/test >$WORK_DIR/status.log 262 SYNCER_BINLOG=$(cat $WORK_DIR/status.log | sed 's/.*mysql-replica-01.*\"syncerBinlog\":\"\(.*\)\",\"syncerBinlogGtid.*mysql-replica-02.*/\1/g') 263 MASTER_BINLOG=$(cat $WORK_DIR/status.log | sed 's/.*mysql-replica-01.*\"masterBinlog\":\"\(.*\)\",\"masterBinlogGtid.*mysql-replica-02.*/\1/g') 264 265 if [ "$MASTER_BINLOG" != "$SYNCER_BINLOG" ]; then 266 echo "master binlog is not equal to syncer binlog" 267 cat $WORK_DIR/status.log 268 exit 1 269 fi 270 271 export GO_FAILPOINTS='' 272 # stop DM task. 273 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 274 "stop-task $WORK_DIR/dm-task.yaml" 275 276 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 277 "stop-relay -s $worker1bound worker1" \ 278 "\"result\": true" 2 279 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 280 "stop-relay -s $worker2bound worker2" \ 281 "\"result\": true" 2 282 283 # test rotate binlog, after rotate purge some binlogs. check whether dm can do precheck correctly 284 uuid=($(get_uuid $MYSQL_HOST1 $MYSQL_PORT1)) # get uuid before truncate 285 binlog_name=($(get_latest_name $MYSQL_HOST2 $MYSQL_PORT2)) 286 287 run_sql "flush logs;" $MYSQL_PORT1 $MYSQL_PASSWORD1 288 run_sql "flush logs;" $MYSQL_PORT2 $MYSQL_PASSWORD2 289 new_binlog_name1=($(get_latest_name $MYSQL_HOST1 $MYSQL_PORT1)) 290 new_binlog_name2=($(get_latest_name $MYSQL_HOST2 $MYSQL_PORT2)) 291 292 sed "s/binlog-gtid-placeholder-1/$uuid:0/g" $cur/conf/dm-task.yaml >$WORK_DIR/dm-task.yaml 293 sed -i "s/binlog-name-placeholder-2/$new_binlog_name2/g" $WORK_DIR/dm-task.yaml 294 sed -i "s/binlog-pos-placeholder-2/4/g" $WORK_DIR/dm-task.yaml 295 # precheck DM task. 296 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 297 "check-task $WORK_DIR/dm-task.yaml" \ 298 "\"name\": \"meta position check\"" 0 299 300 run_sql "purge binary logs to '$new_binlog_name2'" $MYSQL_PORT2 $MYSQL_PASSWORD2 301 run_sql "truncate table incremental_mode.t1;" $MYSQL_PORT1 $MYSQL_PASSWORD1 302 run_sql "truncate table incremental_mode.t2;" $MYSQL_PORT2 $MYSQL_PASSWORD2 303 304 sed "s/binlog-gtid-placeholder-1/$uuid:0/g" $cur/conf/dm-task.yaml >$WORK_DIR/dm-task.yaml 305 sed -i "s/binlog-name-placeholder-2/$binlog_name/g" $WORK_DIR/dm-task.yaml 306 sed -i "s/binlog-pos-placeholder-2/4/g" $WORK_DIR/dm-task.yaml 307 308 # precheck DM task. 309 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 310 "check-task $WORK_DIR/dm-task.yaml" \ 311 "\"name\": \"meta position check\"" 1 312 313 run_sql "purge binary logs to '$new_binlog_name1'" $MYSQL_PORT1 $MYSQL_PASSWORD1 314 sed "s/binlog-gtid-placeholder-1/$uuid:0/g" $cur/conf/dm-task.yaml >$WORK_DIR/dm-task.yaml 315 sed -i "s/binlog-name-placeholder-2/$new_binlog_name2/g" $WORK_DIR/dm-task.yaml 316 sed -i "s/binlog-pos-placeholder-2/4/g" $WORK_DIR/dm-task.yaml 317 318 # precheck DM task. 319 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 320 "check-task $WORK_DIR/dm-task.yaml" \ 321 "\"name\": \"meta position check\"" 1 322 323 # start DM task. 324 run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 325 "start-task $WORK_DIR/dm-task.yaml --remove-meta" \ 326 "\"name\": \"meta position check\"" 1 327 } 328 329 cleanup_data $TEST_NAME 330 # also cleanup dm processes in case of last run failed 331 cleanup_process $* 332 run $* 333 cleanup_process $* 334 335 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"