github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/tests/availability/capture.sh (about)

     1  #!/bin/bash
     2  
     3  set -e
     4  
     5  CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
     6  source $CUR/../_utils/test_prepare
     7  WORK_DIR=$OUT_DIR/$TEST_NAME
     8  CDC_BINARY=cdc.test
     9  
    10  MAX_RETRIES=50
    11  
    12  function sql_check() {
    13      # run check in sequence and short circuit principle, if error hanppens,
    14      # the following statement will be not executed
    15  
    16      # check table availability.
    17      echo "run sql_check", ${DOWN_TIDB_HOST}
    18      run_sql "SELECT id, val FROM test.availability1;" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} &&
    19          check_contains "id: 1" &&
    20          check_contains "val: 1" &&
    21          check_contains "id: 2" &&
    22          check_contains "val: 22" &&
    23          check_not_contains "id: 3"
    24  }
    25  export -f sql_check
    26  
    27  function check_result() {
    28      ensure $MAX_RETRIES sql_check
    29  }
    30  
    31  function empty() {
    32      sql=$*
    33      run_sql "$sql" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} &&
    34          check_not_contains "id:"
    35  }
    36  
    37  function nonempty() {
    38      sql=$*
    39      run_sql "$sql" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} &&
    40          check_contains "id:"
    41  }
    42  
    43  export -f empty
    44  export -f nonempty
    45  
    46  function test_capture_ha() {
    47      test_kill_capture
    48      test_hang_up_capture
    49      test_expire_capture
    50      check_result
    51  }
    52  
    53  # test_kill_capture starts two servers and kills the working one
    54  # We expect the task is rebalanced to the live capture and the data
    55  # continues to replicate.
    56  function test_kill_capture() {
    57      echo "run test case test_kill_capture"
    58      # start one server
    59      run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_kill_capture.server1
    60  
    61      # ensure the server become the owner
    62      ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'"
    63      owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
    64      owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/id/{print $4}')
    65      echo "owner pid:" $owner_pid
    66      echo "owner id" $owner_id
    67  
    68      # wait for the tables to appear
    69      check_table_exists test.availability1 ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 20
    70  
    71      run_sql "INSERT INTO test.availability1(id, val) VALUES (1, 1);"
    72      ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=1 and val=1'
    73  
    74      # start the second capture
    75      run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --addr "127.0.0.1:8301" --logsuffix test_kill_capture.server2
    76      ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep -v \"$owner_id\" | grep id"
    77      capture_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/id/{print $4}' | grep -v "$owner_id")
    78  
    79      # kill the owner
    80      kill -9 $owner_pid
    81  
    82      run_sql "INSERT INTO test.availability1(id, val) VALUES (2, 2);"
    83      ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=2 and val=2'
    84  
    85      cleanup_process $CDC_BINARY
    86  }
    87  
    88  # test_hang_up_caputre starts two captures and hang up the working one by
    89  # send SIGSTOP signal to the process.
    90  # We expect the task is rebalanced to the live capture and the data continues
    91  # to replicate.
    92  function test_hang_up_capture() {
    93      echo "run test case test_hang_up_capture"
    94      # start one server
    95      run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_hang_up_capture.server1
    96  
    97      # ensure the server become the owner
    98      ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'"
    99      owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
   100      owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/id/{print $4}')
   101      echo "owner pid:" $owner_pid
   102      echo "owner id" $owner_id
   103  
   104      # start the second capture
   105      run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --addr "127.0.0.1:8301" --logsuffix test_hang_up_capture.server2
   106      ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep -v \"$owner_id\" | grep id"
   107      capture_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/id/{print $4}' | grep -v "$owner_id")
   108  
   109      kill -STOP $owner_pid
   110      run_sql "INSERT INTO test.availability1(id, val) VALUES (3, 3);"
   111      ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=3 and val=3'
   112      kill -CONT $owner_pid
   113      cleanup_process $CDC_BINARY
   114  }
   115  
   116  # test_expire_capture start one server and then stop it unitl
   117  # the session expires, and then resume the server.
   118  # We expect the capture suicides itself and then recovers. The data
   119  # should be replicated after recovering.
   120  function test_expire_capture() {
   121      echo "run test case test_expire_capture"
   122      # start one server
   123      run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY
   124  
   125      # ensure the server become the owner
   126      ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'"
   127      owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
   128      owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/id/{print $4}')
   129      echo "owner pid:" $owner_pid
   130      echo "owner id" $owner_id
   131  
   132      # stop the owner
   133      kill -SIGSTOP $owner_pid
   134      echo "process status:" $(ps -h -p $owner_pid -o "s")
   135  
   136      # ensure the session has expired
   137      ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\[\]'"
   138  
   139      # resume the owner
   140      kill -SIGCONT $owner_pid
   141      echo "process status:" $(ps -h -p $owner_pid -o "s")
   142  
   143      run_sql "UPDATE test.availability1 set val = 22 where id = 2;"
   144      run_sql "DELETE from test.availability1 where id = 3;"
   145      ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=2 and val=22'
   146      cleanup_process $CDC_BINARY
   147  }