github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/tests/suites/controller/enable_ha.sh (about)

     1  wait_for_controller_machines() {
     2  	amount=${1}
     3  
     4  	attempt=0
     5  	# shellcheck disable=SC2143
     6  	until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "started") | .current' | wc -l | grep "${amount}")" ]]; do
     7  		echo "[+] (attempt ${attempt}) polling machines"
     8  		juju machines -m controller 2>&1 | sed 's/^/    | /g' || true
     9  		sleep "${SHORT_TIMEOUT}"
    10  		attempt=$((attempt + 1))
    11  
    12  		# Wait for roughly 16 minutes for a enable-ha. In the field it's known
    13  		# that enable-ha can take this long.
    14  		if [[ ${attempt} -gt 200 ]]; then
    15  			echo "enable-ha failed waiting for machines to start"
    16  			exit 1
    17  		fi
    18  	done
    19  
    20  	if [[ ${attempt} -gt 0 ]]; then
    21  		echo "[+] $(green 'Completed polling machines')"
    22  		juju machines -m controller 2>&1 | sed 's/^/    | /g'
    23  
    24  		sleep "${SHORT_TIMEOUT}"
    25  	fi
    26  }
    27  
    28  wait_for_controller_machines_tear_down() {
    29  	amount=${1}
    30  
    31  	attempt=0
    32  	# shellcheck disable=SC2143
    33  	until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "started") | .current' | wc -l | grep "${amount}")" ]]; do
    34  		echo "[+] (attempt ${attempt}) polling started machines during ha tear down"
    35  		juju machines -m controller 2>&1 | sed 's/^/    | /g' || true
    36  		sleep "${SHORT_TIMEOUT}"
    37  		attempt=$((attempt + 1))
    38  
    39  		if [[ ${attempt} -gt 25 ]]; then
    40  			echo "enable-ha failed waiting for only 1 started machine"
    41  			exit 1
    42  		fi
    43  	done
    44  
    45  	attempt=0
    46  	# shellcheck disable=SC2143
    47  	until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "stopped") | .current' | wc -l | grep 0)" ]]; do
    48  		echo "[+] (attempt ${attempt}) polling stopped machines during ha tear down"
    49  		juju machines -m controller 2>&1 | sed 's/^/    | /g' || true
    50  		sleep "${SHORT_TIMEOUT}"
    51  		attempt=$((attempt + 1))
    52  
    53  		if [[ ${attempt} -gt 25 ]]; then
    54  			echo "enable-ha failed waiting for machines to tear down"
    55  			exit 1
    56  		fi
    57  	done
    58  
    59  	if [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "error") | .current' | wc -l)" -gt 0 ]]; then
    60  		echo "machine in controller model with error during ha tear down"
    61  		juju machines -m controller 2>&1 | sed 's/^/    | /g' || true
    62  		exit 1
    63  	fi
    64  
    65  	if [[ ${attempt} -gt 0 ]]; then
    66  		echo "[+] $(green 'Completed polling machines')"
    67  		juju machines -m controller 2>&1 | sed 's/^/    | /g'
    68  
    69  		sleep "${SHORT_TIMEOUT}"
    70  	fi
    71  }
    72  
    73  wait_for_ha() {
    74  	amount=${1}
    75  
    76  	attempt=0
    77  	# shellcheck disable=SC2143
    78  	until [[ "$(juju show-controller --format=json | jq -r '.[] | .["controller-machines"] | .[] | select(.["ha-status"] == "ha-enabled") | .["instance-id"]' | wc -l | grep "${amount}")" ]]; do
    79  		echo "[+] (attempt ${attempt}) polling ha"
    80  		juju show-controller 2>&1 | sed 's/^/    | /g'
    81  		sleep "${SHORT_TIMEOUT}"
    82  		attempt=$((attempt + 1))
    83  
    84  		# Wait for roughly 16 minutes for a enable-ha. In the field it's known
    85  		# that enable-ha can take this long.
    86  		if [[ ${attempt} -gt 100 ]]; then
    87  			echo "enable-ha failed waiting for machines to start"
    88  			exit 1
    89  		fi
    90  	done
    91  
    92  	if [[ ${attempt} -gt 0 ]]; then
    93  		echo "[+] $(green 'Completed polling ha')"
    94  		juju show-controller 2>&1 | sed 's/^/    | /g'
    95  
    96  		sleep "${SHORT_TIMEOUT}"
    97  	fi
    98  }
    99  
   100  wait_for_controller_leader() {
   101  	# Since the institution of Dqlite for leases, we need to wait until the
   102  	# backstop workflow has run before we are functional with a single
   103  	# controller.
   104  	# A proxy for this is leadership determination. The command below will
   105  	# sometimes block for extended periods, other times we will be told that
   106  	# leadership can not be determined, so there is no fixed number of attempts
   107  	# that we can rely on.
   108  	# shellcheck disable=SC2143
   109  	until [[ "$(juju exec -m controller --unit controller/leader uptime | grep load)" ]]; do
   110  		echo "[+] waiting for controller leadership"
   111  	done
   112  }
   113  
   114  run_enable_ha() {
   115  	echo
   116  
   117  	file="${TEST_DIR}/enable_ha.log"
   118  
   119  	ensure "enable-ha" "${file}"
   120  
   121  	juju deploy jameinel-ubuntu-lite
   122  
   123  	juju enable-ha
   124  
   125  	wait_for_controller_machines 3
   126  	wait_for_ha 3
   127  
   128  	# Ensure all the units are fully deployed before trying to
   129  	# tear down HA. There is a window between when wait_for_ha
   130  	# returns and the controller units are fully deployed when
   131  	# remove-machine will fail. Wait for the config to be
   132  	# settled before trying to tear down.
   133  	juju switch controller
   134  	wait_for "controller" "$(idle_condition "controller" 0 0)"
   135  	wait_for "controller" "$(idle_condition "controller" 0 1)"
   136  	wait_for "controller" "$(idle_condition "controller" 0 2)"
   137  
   138  	juju switch enable-ha
   139  	juju remove-machine -m controller 1
   140  	juju remove-machine -m controller 2
   141  
   142  	wait_for_controller_machines_tear_down 1
   143  
   144  	# Ensure that we have no ha enabled machines.
   145  	juju show-controller --format=json | jq -r '.[] | .["controller-machines"] |  reduce(.[] | select(.["instance-id"] == null)) as $i (0;.+=1)' | grep 0
   146  
   147  	wait_for_controller_leader
   148  
   149  	destroy_model "enable-ha"
   150  }
   151  
   152  test_enable_ha() {
   153  	if [ -n "$(skip 'test_enable_ha')" ]; then
   154  		echo "==> SKIP: Asked to skip controller enable-ha tests"
   155  		return
   156  	fi
   157  
   158  	(
   159  		set_verbosity
   160  
   161  		cd .. || exit
   162  
   163  		run "run_enable_ha"
   164  	)
   165  }