github.com/percona/percona-xtradb-cluster-operator@v1.14.0/e2e-tests/operator-self-healing-chaos/run (about)

     1  #!/bin/bash
     2  
     3  set -o errexit
     4  
     5  test_dir=$(realpath $(dirname $0))
     6  . ${test_dir}/../functions
     7  cluster="operator-chaos"
     8  
     9  set_debug
    10  
    11  fail_pod() {
    12  	local init_pod=$(get_operator_pod)
    13  	local restart_count_before=$(kubectl get pod ${init_pod} --namespace="${OPERATOR_NS:-$namespace}" -ojsonpath='{.status.containerStatuses[0].restartCount}')
    14  
    15  	yq eval '
    16  		.metadata.name = "chaos-operator-pod-failure" |
    17  		del(.spec.selector.pods.test-namespace) |
    18  		.spec.selector.pods.'$test_namespace'[0] = "'$init_pod'"' $conf_dir/chaos-pod-failure.yml \
    19  		| kubectl apply --namespace $test_namespace -f -
    20  	sleep 10
    21  
    22  	desc 'check if operator works fine: scale down from 5 to 3'
    23  	kubectl scale --replicas=3 pxc/$cluster
    24  	sleep 60
    25  
    26  	local pod=$(get_operator_pod)
    27  	local restart_count_after=$(kubectl get pod ${pod} --namespace="${OPERATOR_NS:-$namespace}" -ojsonpath='{.status.containerStatuses[0].restartCount}')
    28  	if [ "$init_pod" != "$pod" ]; then
    29  		echo "Operator pod was killed, when it should have just been restarted."
    30  		echo "Pod name before: $init_pod , pod name after test: $pod"
    31  		return 1
    32  	elif [ $restart_count_before -eq $restart_count_after ]; then
    33  		echo "Seems operator pod was not restarted when it should have been."
    34  		echo "Pod: $pod , restarts before: $restart_count_before , restarts after test: $restart_count_after"
    35  		return 1
    36  	fi
    37  
    38  	if [ -n "$OPERATOR_NS" ]; then
    39  		kubectl config set-context $(kubectl config current-context) --namespace="$OPERATOR_NS"
    40  	fi
    41  	desc "check if Pod started"
    42  	wait_pod $pod
    43  	kubectl config set-context $(kubectl config current-context) --namespace="$namespace"
    44  
    45  	desc "check scale down"
    46  	wait_for_delete pod/$cluster-pxc-3
    47  }
    48  
    49  network_loss() {
    50  	local pod=$(get_operator_pod)
    51  
    52  	yq eval '
    53  		.metadata.name = "chaos-operator-network" |
    54  		del(.spec.selector.pods.test-namespace) |
    55  		.spec.selector.pods.'$test_namespace'[0] = "'$pod'"' $conf_dir/chaos-network-loss.yml \
    56  		| kubectl apply --namespace $test_namespace -f -
    57  	sleep 10
    58  
    59  	desc 'check if operator works fine: scale up from 3 to 5'
    60  	kubectl scale --replicas=5 pxc/$cluster
    61  	sleep 10
    62  
    63  	if [ -n "$OPERATOR_NS" ]; then
    64  		kubectl config set-context $(kubectl config current-context) --namespace="$OPERATOR_NS"
    65  	fi
    66  	desc "check if Pod started"
    67  	wait_pod $pod
    68  	kubectl config set-context $(kubectl config current-context) --namespace="$namespace"
    69  
    70  	desc "check scale up"
    71  	wait_for_running "$cluster-pxc" 5
    72  }
    73  
    74  kill_pod() {
    75  	local init_pod=$(get_operator_pod)
    76  
    77  	yq eval '
    78  		.metadata.name = "chaos-operator-pod-kill" |
    79  		del(.spec.selector.pods.test-namespace) |
    80  		.spec.selector.pods.'$test_namespace'[0] = "'$init_pod'"' $conf_dir/chaos-pod-kill.yml \
    81  		| kubectl apply --namespace $test_namespace -f -
    82  	sleep 10
    83  
    84  	local pod=$(get_operator_pod)
    85  	if [ "$init_pod" == "$pod" ]; then
    86  		echo "operator pod was not killed! something went wrong."
    87  		return 1
    88  	fi
    89  
    90  	desc 'check if operator works fine: scale up from 3 to 5'
    91  	kubectl scale --replicas=5 pxc/$cluster
    92  	sleep 10
    93  
    94  	if [ -n "$OPERATOR_NS" ]; then
    95  		kubectl config set-context $(kubectl config current-context) --namespace="$OPERATOR_NS"
    96  	fi
    97  	desc "check if Pod started"
    98  	wait_pod $pod
    99  	kubectl config set-context $(kubectl config current-context) --namespace="$namespace"
   100  
   101  	desc "check scale up"
   102  	wait_for_running "$cluster-pxc" 5
   103  }
   104  
   105  main() {
   106  	create_infra $namespace
   107  
   108  	test_namespace=$namespace
   109  	if [ -n "$OPERATOR_NS" ]; then
   110  		kubectl patch clusterrole percona-xtradb-cluster-operator --type=json -p '[{"op":"remove","path":"/rules/1"}]'
   111  		kubectl delete validatingwebhookconfigurations.admissionregistration.k8s.io percona-xtradbcluster-webhook
   112  		test_namespace=$OPERATOR_NS
   113  	fi
   114  	deploy_chaos_mesh $test_namespace
   115  
   116  	desc 'create PXC cluster'
   117  	spinup_pxc "$cluster" "$test_dir/conf/$cluster.yml"
   118  
   119  	desc 'kill operator'
   120  	kill_pod
   121  
   122  	desc 'fail operator pod for 60s'
   123  	fail_pod
   124  
   125  	desc 'emulate bad network for 60s'
   126  	network_loss
   127  
   128  	destroy_chaos_mesh
   129  	destroy $namespace
   130  	desc "test passed"
   131  }
   132  
   133  main