github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/tests/retry_cancel/run.sh.todo (about)

     1  #!/bin/bash
     2  # TODO: this case can't run under new HA model, already remove from `other_integratin.txt`, add it back when supported.
     3  
     4  set -eu
     5  
     6  cur=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
     7  source $cur/../_utils/test_prepare
     8  WORK_DIR=$TEST_DIR/$TEST_NAME
     9  
    10  function run() {
    11      run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
    12      check_contains 'Query OK, 2 rows affected'
    13      run_sql_file $cur/data/db2.prepare.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2
    14      check_contains 'Query OK, 2 rows affected'
    15  
    16      # inject error for loading data
    17      export GO_FAILPOINTS='github.com/pingcap/tiflow/dm/pkg/conn/retryableError=return("retry_cancel")'
    18  
    19      run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml
    20      check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT
    21      run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
    22      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
    23      run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
    24      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
    25      # operate mysql config to worker
    26      cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml
    27      cp $cur/conf/source2.yaml $WORK_DIR/source2.yaml
    28      sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker1/relay_log" $WORK_DIR/source1.yaml
    29      sed -i "/relay-binlog-name/i\relay-dir: $WORK_DIR/worker2/relay_log" $WORK_DIR/source2.yaml
    30      dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1
    31      dmctl_operate_source create $WORK_DIR/source2.yaml $SOURCE_ID2
    32  
    33      # start-task with retry_cancel enabled
    34      echo "1st time to start task"
    35      dmctl_start_task
    36  
    37      sleep 5 # should sleep > retryTimeout (now 3s)
    38  
    39      # query-task, it should still be running (retrying)
    40      run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    41          "query-status test" \
    42          "\"stage\": \"Running\"" 4
    43  
    44      # check log, retrying in load unit
    45      check_log_contains $WORK_DIR/worker1/log/dm-worker.log '\["execute statements"\] \[task=test\] \[unit=load\] \[retry=0\] \[queries="\[CREATE DATABASE `retry_cancel`;\]"\]'
    46  
    47      # stop-task, should not block too much time
    48      start_time=$(date +%s)
    49      dmctl_stop_task test
    50      duration=$(( $(date +%s)-$start_time ))
    51      if [[ $duration -gt 3 ]]; then
    52          echo "stop-task tasks for full import too long duration $duration"
    53          exit 1
    54      fi
    55  
    56      # stop DM-worker, then update failpoint for checkpoint
    57      kill_dm_worker
    58      export GO_FAILPOINTS='github.com/pingcap/tiflow/dm/pkg/conn/retryableError=return("UPDATE `dm_meta`.`test_loader_checkpoint`")'
    59  
    60      # start DM-worker again
    61      run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
    62      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
    63      run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
    64      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
    65      sleep 5 # wait gRPC from DM-master to DM-worker established again
    66  
    67      echo "2nd time to start task"
    68      dmctl_start_task
    69  
    70      sleep 5 # should sleep > retryTimeout (now 3s)
    71  
    72      # query-task, it should still be running (retrying)
    73      run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
    74          "query-status test" \
    75          "\"stage\": \"Running\"" 4
    76  
    77      # check log, retrying in load unit
    78      check_log_contains $WORK_DIR/worker1/log/dm-worker.log 'Error 1213: failpoint inject retryable error for UPDATE `dm_meta`.`test_loader_checkpoint`'
    79  
    80      # stop-task, should not block too much time
    81      start_time=$(date +%s)
    82      dmctl_stop_task test
    83      duration=$(( $(date +%s)-$start_time ))
    84      if [[ $duration -gt 3 ]]; then
    85          echo "stop-task tasks for updating loader checkpoint too long duration $duration"
    86          exit 1
    87      fi
    88  
    89      # stop DM-worker, then disable failponits
    90      kill_dm_worker
    91      export GO_FAILPOINTS=''
    92  
    93      # start DM-worker again
    94      run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
    95      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
    96      run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
    97      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
    98      sleep 5 # wait gRPC from DM-master to DM-worker established again
    99  
   100      # start-task with retry_cancel disabled
   101      echo "3st time to start task"
   102      dmctl_start_task
   103  
   104      # use sync_diff_inspector to check full dump loader
   105      check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
   106  
   107      # ---------- test for incremental replication ----------
   108      # stop DM-worker, then enable failponits
   109      kill_dm_worker
   110      export GO_FAILPOINTS="github.com/pingcap/tiflow/dm/pkg/conn/retryableError=return(\"retry_cancel\")"
   111  
   112      # run sql files to trigger incremental replication
   113      run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1
   114      run_sql_file $cur/data/db2.increment.sql $MYSQL_HOST2 $MYSQL_PORT2 $MYSQL_PASSWORD2
   115  
   116      # start DM-worker again
   117      run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
   118      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
   119      run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
   120      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
   121  
   122      sleep 5
   123      echo "start task for incremental replication"
   124      run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   125          "start-task $cur/conf/dm-task.yaml" \
   126          "\"result\": true" 1 \
   127          "start sub task test: sub task test already exists" 2
   128  
   129      sleep 5 # should sleep > retryTimeout (now 3s)
   130  
   131      # query-task, it should still be running (retrying)
   132      run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \
   133          "query-status test" \
   134          "\"stage\": \"Running\"" 4
   135  
   136      # check log, retrying in binlog replication unit
   137      check_log_contains $WORK_DIR/worker1/log/dm-worker.log '\["execute statements"\] \[task=test\] \[unit="binlog replication"\] \[retry=0\] \[queries="\[REPLACE INTO `retry_cancel`'
   138  
   139      # stop-task, should not block too much time
   140      start_time=$(date +%s)
   141      dmctl_stop_task test
   142      duration=$(( $(date +%s)-$start_time ))
   143      if [[ $duration -gt 3 ]]; then
   144          echo "stop-task tasks for incremental replication too long duration $duration"
   145          exit 1
   146      fi
   147  
   148      # stop DM-worker, then disable failponits
   149      kill_dm_worker
   150      export GO_FAILPOINTS=''
   151  
   152      # start DM-worker again
   153      run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml
   154      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT
   155      run_dm_worker $WORK_DIR/worker2 $WORKER2_PORT $cur/conf/dm-worker2.toml
   156      check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER2_PORT
   157      sleep 5 # wait gRPC from DM-master to DM-worker established again
   158  
   159      # start-task with retry_cancel disabled
   160      echo "5th time to start task"
   161      dmctl_start_task
   162  
   163      # use sync_diff_inspector to check data now!
   164      check_sync_diff $WORK_DIR $cur/conf/diff_config.toml
   165  }
   166  
   167  cleanup_data retry_cancel
   168  # also cleanup dm processes in case of last run failed
   169  cleanup_process $*
   170  run $*
   171  cleanup_process $*
   172  
   173  echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>"