github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/tests/integration_tests/availability/capture.sh (about) 1 #!/bin/bash 2 3 set -eu 4 5 CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) 6 source $CUR/../_utils/test_prepare 7 WORK_DIR=$OUT_DIR/$TEST_NAME 8 CDC_BINARY=cdc.test 9 10 MAX_RETRIES=50 11 12 function sql_check() { 13 # run check in sequence and short circuit principle, if error hanppens, 14 # the following statement will be not executed 15 16 # check table availability. 17 echo "run sql_check", ${DOWN_TIDB_HOST} 18 run_sql "SELECT id, val FROM test.availability1;" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} && 19 check_contains "id: 1" && 20 check_contains "val: 1" && 21 check_contains "id: 2" && 22 check_contains "val: 22" && 23 check_not_contains "id: 3" 24 } 25 export -f sql_check 26 27 function check_result() { 28 ensure $MAX_RETRIES sql_check 29 } 30 31 function empty() { 32 sql=$* 33 run_sql "$sql" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} && 34 check_not_contains "id:" 35 } 36 37 function nonempty() { 38 sql=$* 39 run_sql "$sql" ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} && 40 check_contains "id:" 41 } 42 43 export -f empty 44 export -f nonempty 45 46 function test_capture_ha() { 47 test_kill_capture 48 test_hang_up_capture 49 test_expire_capture 50 check_result 51 } 52 53 # test_kill_capture starts two servers and kills the working one 54 # We expect the task is rebalanced to the live capture and the data 55 # continues to replicate. 56 function test_kill_capture() { 57 echo "run test case test_kill_capture" 58 # start one server 59 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_kill_capture.server1 60 61 # ensure the server become the owner 62 ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'" 63 owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}') 64 owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}') 65 echo "owner pid:" $owner_pid 66 echo "owner id" $owner_id 67 68 # wait for the tables to appear 69 check_table_exists test.availability1 ${DOWN_TIDB_HOST} ${DOWN_TIDB_PORT} 20 70 71 run_sql "INSERT INTO test.availability1(id, val) VALUES (1, 1);" 72 ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=1 and val=1' 73 74 # start the second capture 75 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --addr "127.0.0.1:8301" --logsuffix test_kill_capture.server2 76 ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep -v \"$owner_id\" | grep id" 77 capture_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}' | grep -v "$owner_id") 78 79 # kill the owner 80 kill -9 $owner_pid 81 82 run_sql "INSERT INTO test.availability1(id, val) VALUES (2, 2);" 83 ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=2 and val=2' 84 85 cleanup_process $CDC_BINARY 86 } 87 88 # test_hang_up_caputre starts two captures and hang up the working one by 89 # send SIGSTOP signal to the process. 90 # We expect the task is rebalanced to the live capture and the data continues 91 # to replicate. 92 function test_hang_up_capture() { 93 echo "run test case test_hang_up_capture" 94 # start one server 95 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_hang_up_capture.server1 96 97 # ensure the server become the owner 98 ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'" 99 owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}') 100 owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}') 101 echo "owner pid:" $owner_pid 102 echo "owner id" $owner_id 103 104 # start the second capture 105 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --addr "127.0.0.1:8301" --logsuffix test_hang_up_capture.server2 106 ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep -v \"$owner_id\" | grep id" 107 capture_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}' | grep -v "$owner_id") 108 109 kill -STOP $owner_pid 110 run_sql "INSERT INTO test.availability1(id, val) VALUES (3, 3);" 111 ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=3 and val=3' 112 kill -CONT $owner_pid 113 cleanup_process $CDC_BINARY 114 } 115 116 # test_expire_capture start one server and then stop it unitl 117 # the session expires, and then resume the server. 118 # We expect the capture suicides itself and then recovers. The data 119 # should be replicated after recovering. 120 function test_expire_capture() { 121 echo "run test case test_expire_capture" 122 # start one server 123 run_cdc_server --workdir $WORK_DIR --binary $CDC_BINARY --logsuffix test_expire_capture.server1 124 125 # ensure the server become the owner 126 ensure $MAX_RETRIES "$CDC_BINARY cli capture list 2>&1 | grep '\"is-owner\": true'" 127 owner_pid=$(ps -C $CDC_BINARY -o pid= | awk '{print $1}') 128 owner_id=$($CDC_BINARY cli capture list 2>&1 | awk -F '"' '/\"id/{print $4}') 129 echo "owner pid:" $owner_pid 130 echo "owner id" $owner_id 131 132 # stop the owner 133 kill -SIGSTOP $owner_pid 134 echo "process status:" $(ps -h -p $owner_pid -o "s") 135 136 # ensure the session has expired 137 ensure $MAX_RETRIES "ETCDCTL_API=3 etcdctl get /tidb/cdc/default/__cdc_meta__/owner --prefix | grep -v '$owner_id'" 138 139 # resume the owner 140 kill -SIGCONT $owner_pid 141 echo "process status:" $(ps -h -p $owner_pid -o "s") 142 143 run_sql "UPDATE test.availability1 set val = 22 where id = 2;" 144 run_sql "DELETE from test.availability1 where id = 3;" 145 ensure $MAX_RETRIES nonempty 'select id, val from test.availability1 where id=2 and val=22' 146 cleanup_process $CDC_BINARY 147 }