github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/lightning_load_task/run.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 DATA_DIR=$cur/../load_task/data 7 CONF_DIR=$cur/../load_task/conf 8 source $cur/../_utils/test_prepare 9 WORK_DIR=$TEST_DIR/$TEST_NAME 10 API_VERSION="v1alpha1" 11 WORKER1="worker1" 12 WORKER2="worker2" 13 WORKER3="worker3" 14 15 function test_worker_restart() { 16 echo "test worker restart" 17 # worker1 offline 18 kill_process dm-worker1 19 check_port_offline $WORKER1_PORT 20 20 21 # source1 is bound to worker3 22 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 23 "list-member -w -n worker3" \ 24 "\"stage\": \"bound\"" 1 \ 25 "\"source\": \"mysql-replica-01\"" 1 26 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 27 "list-member -w -n worker1" \ 28 "\"stage\": \"offline\"" 1 29 30 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 31 "query-status load_task1" \ 32 "different worker in load stage, previous worker: $WORKER1, current worker: $WORKER3" 1 \ 33 "Please check if the previous worker is online." 1 34 35 # worker1 online 36 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task1\")" 37 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 38 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 39 40 # transfer to worker1 41 check_log_contain_with_retry 'transfer source and worker.*worker1.*worker3.*mysql-replica-01' $WORK_DIR/master/log/dm-master.log 42 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 43 "list-member -w -n worker3" \ 44 "\"stage\": \"free\"" 1 45 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 46 "list-member -w -n worker1" \ 47 "\"stage\": \"bound\"" 1 \ 48 "\"source\": \"mysql-replica-01\"" 1 49 50 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 51 "query-status load_task1" \ 52 "\"unit\": \"Load\"" 1 \ 53 "\"unit\": \"Sync\"" 1 54 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 55 "query-status load_task2" \ 56 "\"unit\": \"Load\"" 1 \ 57 "\"unit\": \"Sync\"" 1 58 } 59 60 # almost never happen since user hardly start a load task after another load task failed. 61 function test_transfer_two_sources() { 62 echo "test_transfer_two_sources" 63 # worker2 offline 64 kill_process dm-worker2 65 check_port_offline $WORKER2_PORT 20 66 67 # source2 bound to worker3 68 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 69 "list-member -w -n worker3" \ 70 "\"stage\": \"bound\"" 1 \ 71 "\"source\": \"mysql-replica-02\"" 1 72 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 73 "query-status load_task2" \ 74 "different worker in load stage, previous worker: $WORKER2, current worker: $WORKER3" 1 75 76 # start load task for worker3 77 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 78 "start-task $cur/conf/dm-task3.yaml --remove-meta" \ 79 "\"result\": true" 2 80 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 81 "query-status load_task3" \ 82 "\"unit\": \"Load\"" 1 83 84 # worker2 online 85 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDown=sleep(15000)" 86 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 87 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 88 89 # worker2 free since (worker3, source2) has load task(load_task3) 90 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 91 "list-member -w -n worker2" \ 92 "\"stage\": \"free\"" 1 93 94 # worker1 offline 95 kill_process dm-worker1 96 check_port_offline $WORKER1_PORT 20 97 98 # source1 is bound to worker2 99 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 100 "list-member -w -n worker2" \ 101 "\"stage\": \"bound\"" 1 \ 102 "\"source\": \"mysql-replica-01\"" 1 103 104 # start load_task4 on worker2 105 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 106 "start-task $cur/conf/dm-task4.yaml --remove-meta" \ 107 "\"result\": true" 2 108 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 109 "query-status load_task4" \ 110 "\"unit\": \"Load\"" 1 111 112 # worker1 online 113 export GO_FAILPOINTS="" 114 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 115 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 116 117 # worker1 free since (worker2, source1) has load task(load_task4) 118 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 119 "list-member -w -n worker1" \ 120 "\"stage\": \"free\"" 1 121 122 # now, worker2 waiting worker3 finish load_task3, worker1 waiting worker2 finish load_task4 123 # worker3 offline 124 kill_process dm-worker3 125 check_port_offline $WORKER3_PORT 20 126 127 # source2 is bound to worker1 128 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 129 "list-member -w -n worker1" \ 130 "\"stage\": \"bound\"" 1 \ 131 "\"source\": \"mysql-replica-02\"" 1 132 133 # (worker1, source2), (worker2, source1) 134 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 135 "query-status load_task1" \ 136 "different worker in load stage, previous worker: $WORKER1, current worker: $WORKER2" 1 137 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 138 "query-status load_task2" \ 139 "different worker in load stage, previous worker: $WORKER2, current worker: $WORKER1" 1 140 141 # worker2 finish load_task4 142 # master transfer (worker1, source2), (worker2, source1) to (worker1, source1), (worker2, source2) 143 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 144 "list-member -w -n worker1" \ 145 "\"stage\": \"bound\"" 1 \ 146 "\"source\": \"mysql-replica-01\"" 1 147 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 148 "list-member -w -n worker2" \ 149 "\"stage\": \"bound\"" 1 \ 150 "\"source\": \"mysql-replica-02\"" 1 151 152 # task1, 2, 4 running, task3 fail 153 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 154 "query-status" \ 155 "\"taskStatus\": \"Running\"" 3 \ 156 "taskStatus.*Error" 1 157 158 # worker3 online 159 export GO_FAILPOINTS="" 160 run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml 161 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT 162 163 # source2 is bound to worker3 since load_task3 164 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 165 "list-member -w -n worker2" \ 166 "\"stage\": \"bound\"" 1 \ 167 "\"source\": \"mysql-replica-02\"" 1 168 169 # all task running 170 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 171 "query-status" \ 172 "\"taskStatus\": \"Running\"" 4 173 } 174 175 function run() { 176 echo "import prepare data" 177 run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 178 check_contains 'Query OK, 2 rows affected' 179 run_sql_file $cur/data/db2.prepare.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 180 check_contains 'Query OK, 3 rows affected' 181 182 echo "start DM master, workers and sources" 183 run_dm_master $WORK_DIR/master $MASTER_PORT1 $cur/conf/dm-master.toml 184 check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT1 185 186 # worker1 loading load_task1 187 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task1\")" 188 run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml 189 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT 190 cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml 191 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml 192 dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 193 194 # worker2 loading load_task2 195 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task2\")" 196 run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml 197 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT 198 cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml 199 sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml 200 dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2 201 202 # worker3 loading load_task3 203 export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/loader/LoadDataSlowDownByTask=return(\"load_task3\")" 204 run_dm_worker $WORK_DIR/worker3 $WORKER3_PORT $cur/conf/dm-worker3.toml 205 check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER3_PORT 206 207 echo "start DM task" 208 dmctl_start_task "$cur/conf/dm-task.yaml" "--remove-meta" 209 dmctl_start_task "$cur/conf/dm-task2.yaml" "--remove-meta" 210 211 check_log_contain_with_retry 'inject failpoint LoadDataSlowDownByTask in lightning loader' $WORK_DIR/worker1/log/dm-worker.log 212 check_log_contain_with_retry 'inject failpoint LoadDataSlowDownByTask in lightning loader' $WORK_DIR/worker2/log/dm-worker.log 213 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 214 "query-status load_task1" \ 215 "\"unit\": \"Load\"" 1 \ 216 "\"unit\": \"Sync\"" 1 217 run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ 218 "query-status load_task2" \ 219 "\"unit\": \"Load\"" 1 \ 220 "\"unit\": \"Sync\"" 1 221 222 test_worker_restart 223 224 test_transfer_two_sources 225 226 run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 227 run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2 228 check_sync_diff $WORK_DIR $cur/conf/diff_config1.toml 229 check_sync_diff $WORK_DIR $cur/conf/diff_config2.toml 230 check_sync_diff $WORK_DIR $cur/conf/diff_config3.toml 231 check_sync_diff $WORK_DIR $cur/conf/diff_config4.toml 232 } 233 234 cleanup_data load_task1 235 cleanup_data load_task2 236 cleanup_data load_task3 237 cleanup_data load_task4 238 # also cleanup dm processes in case of last run failed 239 cleanup_process $* 240 run $* 241 cleanup_process $* 242 243 echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"