github.com/percona/percona-xtradb-cluster-operator@v1.14.0/e2e-tests/operator-self-healing-chaos/run (about) 1 #!/bin/bash 2 3 set -o errexit 4 5 test_dir=$(realpath $(dirname $0)) 6 . ${test_dir}/../functions 7 cluster="operator-chaos" 8 9 set_debug 10 11 fail_pod() { 12 local init_pod=$(get_operator_pod) 13 local restart_count_before=$(kubectl get pod ${init_pod} --namespace="${OPERATOR_NS:-$namespace}" -ojsonpath='{.status.containerStatuses[0].restartCount}') 14 15 yq eval ' 16 .metadata.name = "chaos-operator-pod-failure" | 17 del(.spec.selector.pods.test-namespace) | 18 .spec.selector.pods.'$test_namespace'[0] = "'$init_pod'"' $conf_dir/chaos-pod-failure.yml \ 19 | kubectl apply --namespace $test_namespace -f - 20 sleep 10 21 22 desc 'check if operator works fine: scale down from 5 to 3' 23 kubectl scale --replicas=3 pxc/$cluster 24 sleep 60 25 26 local pod=$(get_operator_pod) 27 local restart_count_after=$(kubectl get pod ${pod} --namespace="${OPERATOR_NS:-$namespace}" -ojsonpath='{.status.containerStatuses[0].restartCount}') 28 if [ "$init_pod" != "$pod" ]; then 29 echo "Operator pod was killed, when it should have just been restarted." 30 echo "Pod name before: $init_pod , pod name after test: $pod" 31 return 1 32 elif [ $restart_count_before -eq $restart_count_after ]; then 33 echo "Seems operator pod was not restarted when it should have been." 34 echo "Pod: $pod , restarts before: $restart_count_before , restarts after test: $restart_count_after" 35 return 1 36 fi 37 38 if [ -n "$OPERATOR_NS" ]; then 39 kubectl config set-context $(kubectl config current-context) --namespace="$OPERATOR_NS" 40 fi 41 desc "check if Pod started" 42 wait_pod $pod 43 kubectl config set-context $(kubectl config current-context) --namespace="$namespace" 44 45 desc "check scale down" 46 wait_for_delete pod/$cluster-pxc-3 47 } 48 49 network_loss() { 50 local pod=$(get_operator_pod) 51 52 yq eval ' 53 .metadata.name = "chaos-operator-network" | 54 del(.spec.selector.pods.test-namespace) | 55 .spec.selector.pods.'$test_namespace'[0] = "'$pod'"' $conf_dir/chaos-network-loss.yml \ 56 | kubectl apply --namespace $test_namespace -f - 57 sleep 10 58 59 desc 'check if operator works fine: scale up from 3 to 5' 60 kubectl scale --replicas=5 pxc/$cluster 61 sleep 10 62 63 if [ -n "$OPERATOR_NS" ]; then 64 kubectl config set-context $(kubectl config current-context) --namespace="$OPERATOR_NS" 65 fi 66 desc "check if Pod started" 67 wait_pod $pod 68 kubectl config set-context $(kubectl config current-context) --namespace="$namespace" 69 70 desc "check scale up" 71 wait_for_running "$cluster-pxc" 5 72 } 73 74 kill_pod() { 75 local init_pod=$(get_operator_pod) 76 77 yq eval ' 78 .metadata.name = "chaos-operator-pod-kill" | 79 del(.spec.selector.pods.test-namespace) | 80 .spec.selector.pods.'$test_namespace'[0] = "'$init_pod'"' $conf_dir/chaos-pod-kill.yml \ 81 | kubectl apply --namespace $test_namespace -f - 82 sleep 10 83 84 local pod=$(get_operator_pod) 85 if [ "$init_pod" == "$pod" ]; then 86 echo "operator pod was not killed! something went wrong." 87 return 1 88 fi 89 90 desc 'check if operator works fine: scale up from 3 to 5' 91 kubectl scale --replicas=5 pxc/$cluster 92 sleep 10 93 94 if [ -n "$OPERATOR_NS" ]; then 95 kubectl config set-context $(kubectl config current-context) --namespace="$OPERATOR_NS" 96 fi 97 desc "check if Pod started" 98 wait_pod $pod 99 kubectl config set-context $(kubectl config current-context) --namespace="$namespace" 100 101 desc "check scale up" 102 wait_for_running "$cluster-pxc" 5 103 } 104 105 main() { 106 create_infra $namespace 107 108 test_namespace=$namespace 109 if [ -n "$OPERATOR_NS" ]; then 110 kubectl patch clusterrole percona-xtradb-cluster-operator --type=json -p '[{"op":"remove","path":"/rules/1"}]' 111 kubectl delete validatingwebhookconfigurations.admissionregistration.k8s.io percona-xtradbcluster-webhook 112 test_namespace=$OPERATOR_NS 113 fi 114 deploy_chaos_mesh $test_namespace 115 116 desc 'create PXC cluster' 117 spinup_pxc "$cluster" "$test_dir/conf/$cluster.yml" 118 119 desc 'kill operator' 120 kill_pod 121 122 desc 'fail operator pod for 60s' 123 fail_pod 124 125 desc 'emulate bad network for 60s' 126 network_loss 127 128 destroy_chaos_mesh 129 destroy $namespace 130 desc "test passed" 131 } 132 133 main