github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/availability/capture.sh (about)

     1  #!/bin/bash
     2  
     3  set -eu
     4  
     5  CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
     6  source $CUR/../_utils/test_prepare
     7  WORK_DIR=$OUT_DIR/$TEST_NAME
     8  CDC_BINARY=cdc.test
     9  
    10  MAX_RETRIES=50
    11  
    12  function sql_check() {
    13  	# run check in sequence and short circuit principle, if error hanppens,
    14  	# the following statement will be not executed
    15  
    16  	# check table availability.
    17  	echo "run sql_check", ${DOWN_TIDB_HOST}
    18  	run_sql "SELECT id, val FROM test.availability1;" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} &&
    19  		check_contains "id: 1" &&
    20  		check_contains "val: 1" &&
    21  		check_contains "id: 2" &&
    22  		check_contains "val: 22" &&
    23  		check_not_contains "id: 3"
    24  }
    25  export -f sql_check
    26  
    27  function check_result() {
    28  	ensure $MAX_RETRIES sql_check
    29  }
    30  
    31  function empty() {
    32  	sql=$*
    33  	run_sql "$sql" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} &&
    34  		check_not_contains "id:"
    35  }
    36  
    37  function nonempty() {
    38  	sql=$*
    39  	run_sql "$sql" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} &&
    40  		check_contains "id:"
    41  }
    42  
    43  export -f empty
    44  export -f nonempty
    45  
    46  function test_capture_ha() {
    47  	test_kill_capture
    48  	test_hang_up_capture
    49  	test_expire_capture
    50  	check_result
    51  }
    52  
    53  # test_kill_capture starts two servers and kills the working one
    54  # We expect the task is rebalanced to the live capture and the data
    55  # continues to replicate.
    56  function test_kill_capture() {
    57  	echo "run test case test_kill_capture"
    58  	# start one server
    59  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_kill_capture.server1
    60  
    61  	# ensure the server become the owner
    62  	ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'"
    63  	owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
    64  	owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}')
    65  	echo "owner pid:" $owner_pid
    66  	echo "owner id" $owner_id
    67  
    68  	# wait for the tables to appear
    69  	check_table_exists test.availability1 ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 20
    70  
    71  	run_sql "INSERT INTO test.availability1(id, val) VALUES (1, 1);"
    72  	ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=1 and val=1'
    73  
    74  	# start the second capture
    75  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --addr "127.0.0.1:8301" --logsuffix test_kill_capture.server2
    76  	ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep -v \"$owner_id\" | grep id"
    77  	capture_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}' | grep -v "$owner_id")
    78  
    79  	# kill the owner
    80  	kill -9 $owner_pid
    81  
    82  	run_sql "INSERT INTO test.availability1(id, val) VALUES (2, 2);"
    83  	ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=2 and val=2'
    84  
    85  	cleanup_process $CDC_BINARY
    86  }
    87  
    88  # test_hang_up_caputre starts two captures and hang up the working one by
    89  # send SIGSTOP signal to the process.
    90  # We expect the task is rebalanced to the live capture and the data continues
    91  # to replicate.
    92  function test_hang_up_capture() {
    93  	echo "run test case test_hang_up_capture"
    94  	# start one server
    95  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_hang_up_capture.server1
    96  
    97  	# ensure the server become the owner
    98  	ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'"
    99  	owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
   100  	owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}')
   101  	echo "owner pid:" $owner_pid
   102  	echo "owner id" $owner_id
   103  
   104  	# start the second capture
   105  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --addr "127.0.0.1:8301" --logsuffix test_hang_up_capture.server2
   106  	ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep -v \"$owner_id\" | grep id"
   107  	capture_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}' | grep -v "$owner_id")
   108  
   109  	kill -STOP $owner_pid
   110  	run_sql "INSERT INTO test.availability1(id, val) VALUES (3, 3);"
   111  	ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=3 and val=3'
   112  	kill -CONT $owner_pid
   113  	cleanup_process $CDC_BINARY
   114  }
   115  
   116  # test_expire_capture start one server and then stop it unitl
   117  # the session expires, and then resume the server.
   118  # We expect the capture suicides itself and then recovers. The data
   119  # should be replicated after recovering.
   120  function test_expire_capture() {
   121  	echo "run test case test_expire_capture"
   122  	# start one server
   123  	run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_expire_capture.server1
   124  
   125  	# ensure the server become the owner
   126  	ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'"
   127  	owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}')
   128  	owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}')
   129  	echo "owner pid:" $owner_pid
   130  	echo "owner id" $owner_id
   131  
   132  	# stop the owner
   133  	kill -SIGSTOP $owner_pid
   134  	echo "process status:" $(ps -h -p $owner_pid -o "s")
   135  
   136  	# ensure the session has expired
   137  	ensure $MAX_RETRIES "ETCDCTL_API=3 etcdctl get /tidb/cdc/default/__cdc_meta__/owner --prefix | grep -v '$owner_id'"
   138  
   139  	# resume the owner
   140  	kill -SIGCONT $owner_pid
   141  	echo "process status:" $(ps -h -p $owner_pid -o "s")
   142  
   143  	run_sql "UPDATE test.availability1 set val = 22 where id = 2;"
   144  	run_sql "DELETE from test.availability1 where id = 3;"
   145  	ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=2 and val=22'
   146  	cleanup_process $CDC_BINARY
   147  }