github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/tests/suites/controller/enable_ha.sh (about) 1 wait_for_controller_machines() { 2 amount=${1} 3 4 attempt=0 5 # shellcheck disable=SC2143 6 until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "started") | .current' | wc -l | grep "${amount}")" ]]; do 7 echo "[+] (attempt ${attempt}) polling machines" 8 juju machines -m controller 2>&1 | sed 's/^/ | /g' || true 9 sleep "${SHORT_TIMEOUT}" 10 attempt=$((attempt + 1)) 11 12 # Wait for roughly 16 minutes for a enable-ha. In the field it's known 13 # that enable-ha can take this long. 14 if [[ ${attempt} -gt 200 ]]; then 15 echo "enable-ha failed waiting for machines to start" 16 exit 1 17 fi 18 done 19 20 if [[ ${attempt} -gt 0 ]]; then 21 echo "[+] $(green 'Completed polling machines')" 22 juju machines -m controller 2>&1 | sed 's/^/ | /g' 23 24 sleep "${SHORT_TIMEOUT}" 25 fi 26 } 27 28 wait_for_controller_machines_tear_down() { 29 amount=${1} 30 31 attempt=0 32 # shellcheck disable=SC2143 33 until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "started") | .current' | wc -l | grep "${amount}")" ]]; do 34 echo "[+] (attempt ${attempt}) polling started machines during ha tear down" 35 juju machines -m controller 2>&1 | sed 's/^/ | /g' || true 36 sleep "${SHORT_TIMEOUT}" 37 attempt=$((attempt + 1)) 38 39 if [[ ${attempt} -gt 25 ]]; then 40 echo "enable-ha failed waiting for only 1 started machine" 41 exit 1 42 fi 43 done 44 45 attempt=0 46 # shellcheck disable=SC2143 47 until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "stopped") | .current' | wc -l | grep 0)" ]]; do 48 echo "[+] (attempt ${attempt}) polling stopped machines during ha tear down" 49 juju machines -m controller 2>&1 | sed 's/^/ | /g' || true 50 sleep "${SHORT_TIMEOUT}" 51 attempt=$((attempt + 1)) 52 53 if [[ ${attempt} -gt 25 ]]; then 54 echo "enable-ha failed waiting for machines to tear down" 55 exit 1 56 fi 57 done 58 59 if [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "error") | .current' | wc -l)" -gt 0 ]]; then 60 echo "machine in controller model with error during ha tear down" 61 juju machines -m controller 2>&1 | sed 's/^/ | /g' || true 62 exit 1 63 fi 64 65 if [[ ${attempt} -gt 0 ]]; then 66 echo "[+] $(green 'Completed polling machines')" 67 juju machines -m controller 2>&1 | sed 's/^/ | /g' 68 69 sleep "${SHORT_TIMEOUT}" 70 fi 71 } 72 73 wait_for_ha() { 74 amount=${1} 75 76 attempt=0 77 # shellcheck disable=SC2143 78 until [[ "$(juju show-controller --format=json | jq -r '.[] | .["controller-machines"] | .[] | select(.["ha-status"] == "ha-enabled") | .["instance-id"]' | wc -l | grep "${amount}")" ]]; do 79 echo "[+] (attempt ${attempt}) polling ha" 80 juju show-controller 2>&1 | sed 's/^/ | /g' 81 sleep "${SHORT_TIMEOUT}" 82 attempt=$((attempt + 1)) 83 84 # Wait for roughly 16 minutes for a enable-ha. In the field it's known 85 # that enable-ha can take this long. 86 if [[ ${attempt} -gt 100 ]]; then 87 echo "enable-ha failed waiting for machines to start" 88 exit 1 89 fi 90 done 91 92 if [[ ${attempt} -gt 0 ]]; then 93 echo "[+] $(green 'Completed polling ha')" 94 juju show-controller 2>&1 | sed 's/^/ | /g' 95 96 sleep "${SHORT_TIMEOUT}" 97 fi 98 } 99 100 wait_for_controller_leader() { 101 # Since the institution of Dqlite for leases, we need to wait until the 102 # backstop workflow has run before we are functional with a single 103 # controller. 104 # A proxy for this is leadership determination. The command below will 105 # sometimes block for extended periods, other times we will be told that 106 # leadership can not be determined, so there is no fixed number of attempts 107 # that we can rely on. 108 # shellcheck disable=SC2143 109 until [[ "$(juju exec -m controller --unit controller/leader uptime | grep load)" ]]; do 110 echo "[+] waiting for controller leadership" 111 done 112 } 113 114 run_enable_ha() { 115 echo 116 117 file="${TEST_DIR}/enable_ha.log" 118 119 ensure "enable-ha" "${file}" 120 121 juju deploy jameinel-ubuntu-lite 122 123 juju enable-ha 124 125 wait_for_controller_machines 3 126 wait_for_ha 3 127 128 # Ensure all the units are fully deployed before trying to 129 # tear down HA. There is a window between when wait_for_ha 130 # returns and the controller units are fully deployed when 131 # remove-machine will fail. Wait for the config to be 132 # settled before trying to tear down. 133 juju switch controller 134 wait_for "controller" "$(idle_condition "controller" 0 0)" 135 wait_for "controller" "$(idle_condition "controller" 0 1)" 136 wait_for "controller" "$(idle_condition "controller" 0 2)" 137 138 juju switch enable-ha 139 juju remove-machine -m controller 1 140 juju remove-machine -m controller 2 141 142 wait_for_controller_machines_tear_down 1 143 144 # Ensure that we have no ha enabled machines. 145 juju show-controller --format=json | jq -r '.[] | .["controller-machines"] | reduce(.[] | select(.["instance-id"] == null)) as $i (0;.+=1)' | grep 0 146 147 wait_for_controller_leader 148 149 destroy_model "enable-ha" 150 } 151 152 test_enable_ha() { 153 if [ -n "$(skip 'test_enable_ha')" ]; then 154 echo "==> SKIP: Asked to skip controller enable-ha tests" 155 return 156 fi 157 158 ( 159 set_verbosity 160 161 cd .. || exit 162 163 run "run_enable_ha" 164 ) 165 }