github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/integration/stability/soak_parallel_rm.sh (about) 1 #!/bin/bash 2 # 3 # Copyright (c) 2017-2018, 2020 Intel Corporation 4 # 5 # SPDX-License-Identifier: Apache-2.0 6 # 7 # This test will run a number of parallel containers, and then try to 8 # 'rm -f' them all at the same time. It will check after each run and 9 # rm that we have the expected number of containers, shims, 10 # qemus and runtimes active 11 # The goals are two fold: 12 # - spot any stuck or non-started components 13 # - catch any hang ups 14 15 cidir=$(dirname "$0") 16 source "${cidir}/../../metrics/lib/common.bash" 17 source "/etc/os-release" || source "/usr/lib/os-release" 18 19 # How many times will we run the test loop... 20 ITERATIONS="${ITERATIONS:-5}" 21 22 # the system 'free available' level where we stop running the tests, as otherwise 23 # the system can crawl to a halt, and/or start refusing to launch new VMs anyway 24 # We choose 2G, as that is one of the default VM sizes for Kata 25 MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}" 26 27 # do we need a command argument for this payload? 28 COMMAND="${COMMAND:-tail -f /dev/null}" 29 30 # Runtime path 31 RUNTIME_PATH=$(command -v $RUNTIME) 32 33 # The place where virtcontainers keeps its active pod info 34 # This is ultimately what 'kata-runtime list' uses to get its info, but 35 # we can also check it for sanity directly 36 VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}" 37 38 # let's cap the test. If you want to run until you hit the memory limit 39 # then just set this to a very large number 40 MAX_CONTAINERS="${MAX_CONTAINERS:-110}" 41 42 KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" 43 44 check_vsock_active() { 45 vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}') 46 vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}') 47 if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then 48 return 0 49 else 50 return 1 51 fi 52 } 53 54 count_containers() { 55 sudo ctr c list -q | wc -l 56 } 57 58 check_all_running() { 59 local goterror=0 60 61 echo "Checking ${how_many} containers have all relevant components" 62 63 # check what docker thinks 64 how_many_running=$(count_containers) 65 66 if (( ${how_many_running} != ${how_many} )); then 67 echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping" 68 ((goterror++)) 69 fi 70 71 # Only check for Kata components if we are using a Kata runtime 72 if (( $check_kata_components )); then 73 74 # check we have the right number of shims 75 how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l) 76 # one shim process per container... 77 if (( ${how_many_running} != ${how_many_shims} )); then 78 echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping" 79 ((goterror++)) 80 fi 81 82 # check we have the right number of vm's 83 how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l) 84 if (( ${how_many_running} != ${how_many_vms} )); then 85 echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping" 86 ((goterror++)) 87 fi 88 89 # if this is kata-runtime, check how many pods virtcontainers thinks we have 90 if [[ "$RUNTIME" == "$KATA_RUNTIME_NAME" ]]; then 91 num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l) 92 93 if (( ${how_many_running} != ${num_vc_pods} )); then 94 echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)" 95 ((goterror++)) 96 fi 97 fi 98 fi 99 100 if (( goterror != 0 )); then 101 show_system_ctr_state 102 die "Got $goterror errors, quitting" 103 fi 104 } 105 106 # reported system 'available' memory 107 get_system_avail() { 108 echo $(free -b | head -2 | tail -1 | awk '{print $7}') 109 } 110 111 go() { 112 echo "Running..." 113 114 how_many=0 115 116 while true; do { 117 check_all_running 118 119 for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do 120 containers+=($(random_name)) 121 sudo ctr run --runtime=${CONTAINERD_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND} 122 ((how_many++)) 123 done 124 125 if (( ${how_many} >= ${MAX_CONTAINERS} )); then 126 echo "And we have hit the max ${how_many} containers" 127 return 128 fi 129 130 how_much=$(get_system_avail) 131 if (( ${how_much} < ${MEM_CUTOFF} )); then 132 echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})" 133 return 134 fi 135 } 136 done 137 } 138 139 kill_all_containers() { 140 present=$(sudo ctr c list -q | wc -l) 141 if ((${present})); then 142 sudo ctr tasks kill $(sudo ctr task ls -q) 143 sudo ctr tasks rm -f $(sudo ctr task list -q) 144 sudo ctr c rm $(sudo ctr c list -q) 145 fi 146 } 147 148 count_mounts() { 149 echo $(mount | wc -l) 150 } 151 152 check_mounts() { 153 final_mount_count=$(count_mounts) 154 155 if [[ $final_mount_count < $initial_mount_count ]]; then 156 echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})" 157 fi 158 } 159 160 init() { 161 sudo systemctl restart containerd 162 extract_kata_env 163 kill_all_containers 164 165 # remember how many mount points we had before we do anything 166 # and then sanity check we end up with no new ones dangling at the end 167 initial_mount_count=$(count_mounts) 168 169 # Only check Kata items if we are using a Kata runtime 170 if [[ "$RUNTIME" == "$KATA_RUNTIME_NAME" ]]; then 171 echo "Checking Kata runtime $RUNTIME" 172 check_kata_components=1 173 else 174 echo "Not a Kata runtime, not checking for Kata components" 175 check_kata_components=0 176 fi 177 178 versions_file="${cidir}/../../versions.yaml" 179 nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version") 180 nginx_image="docker.io/library/nginx:$nginx_version" 181 182 # Pull nginx image 183 sudo ctr image pull ${nginx_image} 184 if [ $? != 0 ]; then 185 die "Unable to retry docker image ${nginx_image}" 186 fi 187 } 188 189 spin() { 190 for ((i=1; i<= ITERATIONS; i++)); do { 191 echo "Start iteration $i of $ITERATIONS" 192 #spin them up 193 go 194 #check we are in a sane state 195 check_all_running 196 #shut them all down 197 kill_all_containers 198 #Note there should be none running 199 how_many=0 200 #and check they all died 201 check_all_running 202 #and that we have no dangling mounts 203 check_mounts 204 } 205 done 206 207 } 208 209 init 210 spin