github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/print_status/run.sh (about) 1 #!/bin/bash 2 3 set -eux 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $cur/../_utils/test_prepare 7 WORK_DIR=$TEST_DIR/$TEST_NAME 8 9 function run() { 10 run_sql_file $cur/data/db.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 11 12 # in load stage, the dumped file split into 14 insert segments, we slow down 14 * 100 ms 13 # in sync stage, there are 92 group of binlog events, including an XIDEvent, 14 # TableMapEvent, QueryEvent, GTIDEvent, and a specific Event in each group. 15 # so we slow down 460 * 4 ms. Besides the log may be not flushed to disk asap, 16 # we need to add some retry mechanism 17 inject_points=("github.com/pingcap/tiflow/dm/worker/PrintStatusCheckSeconds=return(1)" 18 "github.com/pingcap/tiflow/dm/loader/LoadDataSlowDown=sleep(100)" 19 "github.com/pingcap/tiflow/dm/syncer/ProcessBinlogSlowDown=sleep(4)") 20 export GO_FAILPOINTS="$(join_string \; ${inject_points[@]})" 21 22 cp $cur/conf/dm-worker1.toml $WORK_DIR/dm-worker1.toml 23 sed -i "s%placeholder%$WORK_DIR/relay_by_worker%g" $WORK_DIR/dm-worker1.toml 24 25 run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml 26 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT 27 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $WORK_DIR/dm-worker1.toml 28 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 29 # operate mysql config to worker 30 cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml 31 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml 32 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 33 34 # start DM task only 35 dmctl_start_task_standalone 36 37 # use sync_diff_inspector to check full dump loader 38 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 39 40 ls $WORK_DIR/relay_by_worker/worker1/* 41 42 run_sql_file $cur/data/db.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 43 check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 44 check_log_contains $WORK_DIR/worker1/log/dm-worker.log 'enable safe-mode because of task initialization.*duration=1m0s' 45 } 46 47 function check_print_status() { 48 # wait for all dm-worker's log flushed to disk 49 i=0 50 while [ $i -lt 3 ]; do 51 exit_log=$(grep "dm-worker exit" $WORK_DIR/worker1/log/dm-worker.log || echo "not found") 52 if [ "$exit_log" == "not found" ]; then 53 echo "wait for dm-worker exit log for the $i-th time" 54 sleep 1 55 else 56 break 57 fi 58 done 59 if [ $i -ge 3 ]; then 60 echo "wait for dm-worker exit log timeout" 61 exit 1 62 fi 63 64 echo "checking print status" 65 # check dump unit print status 66 dump_status_file=$WORK_DIR/worker1/log/dump_status.log 67 grep -o "progress status of dumpling" $WORK_DIR/worker1/log/dm-worker.log >$dump_status_file 68 dump_status_count=$(wc -l $dump_status_file | awk '{print $1}') 69 [ $dump_status_count -ge 1 ] 70 # bps must not be zero 71 grep -o '\[bps=0' $WORK_DIR/worker1/log/dm-worker.log >$dump_status_file || true 72 dump_status_count=$(wc -l $dump_status_file | awk '{print $1}') 73 [ $dump_status_count -eq 0 ] 74 75 # check load unit print status 76 status_file=$WORK_DIR/worker1/log/loader_status.log 77 grep -oP "\[unit=lightning-load\] \[IsCanceled=false\] \[finished_bytes=59674\] \[total_bytes=59674\] \[progress=.*\]" $WORK_DIR/worker1/log/dm-worker.log >$status_file 78 status_count=$(wc -l $status_file | awk '{print $1}') 79 [ $status_count -eq 1 ] 80 # must have a non-zero speed in log 81 grep 'current speed (bytes / seconds)' $WORK_DIR/worker1/log/dm-worker.log | grep -vq '"current speed (bytes / seconds)"=0' 82 echo "check load unit print status success" 83 84 # check sync unit print status 85 status_file2=$WORK_DIR/worker1/log/syncer_status.log 86 #grep -oP "syncer.*\Ktotal events = [0-9]+, total tps = [0-9]+, recent tps = [0-9]+, master-binlog = .*" $WORK_DIR/worker1/log/dm-worker.log > $status_file2 87 grep -oP "\[total_rows=[0-9]+\] \[total_rps=[0-9]+\] \[rps=[0-9]+\] \[master_position=.*\]" $WORK_DIR/worker1/log/dm-worker.log >$status_file2 88 status_count2=$(wc -l $status_file2 | awk '{print $1}') 89 [ $status_count2 -ge 1 ] 90 echo "check sync unit print status success" 91 } 92 93 cleanup_data $TEST_NAME 94 # also cleanup dm processes in case of last run failed 95 cleanup_process $* 96 run $* 97 cleanup_process $* 98 99 check_print_status $* 100 101 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"