github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/integration/stability/soak_parallel_rm.sh (about)

     1  #!/bin/bash
     2  #
     3  # Copyright (c) 2017-2018, 2020 Intel Corporation
     4  #
     5  # SPDX-License-Identifier: Apache-2.0
     6  #
     7  # This test will run a number of parallel containers, and then try to
     8  # 'rm -f' them all at the same time. It will check after each run and
     9  # rm that we have the expected number of containers, shims,
    10  # qemus and runtimes active
    11  # The goals are two fold:
    12  # - spot any stuck or non-started components
    13  # - catch any hang ups
    14  
    15  cidir=$(dirname "$0")
    16  source "${cidir}/../../metrics/lib/common.bash"
    17  source "/etc/os-release" || source "/usr/lib/os-release"
    18  
    19  # How many times will we run the test loop...
    20  ITERATIONS="${ITERATIONS:-5}"
    21  
    22  # the system 'free available' level where we stop running the tests, as otherwise
    23  #  the system can crawl to a halt, and/or start refusing to launch new VMs anyway
    24  # We choose 2G, as that is one of the default VM sizes for Kata
    25  MEM_CUTOFF="${MEM_CUTOFF:-(2*1024*1024*1024)}"
    26  
    27  # do we need a command argument for this payload?
    28  COMMAND="${COMMAND:-tail -f /dev/null}"
    29  
    30  # Runtime path
    31  RUNTIME_PATH=$(command -v $RUNTIME)
    32  
    33  # The place where virtcontainers keeps its active pod info
    34  # This is ultimately what 'kata-runtime list' uses to get its info, but
    35  # we can also check it for sanity directly
    36  VC_POD_DIR="${VC_POD_DIR:-/run/vc/sbs}"
    37  
    38  # let's cap the test. If you want to run until you hit the memory limit
    39  # then just set this to a very large number
    40  MAX_CONTAINERS="${MAX_CONTAINERS:-110}"
    41  
    42  KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}"
    43  
    44  check_vsock_active() {
    45  	vsock_configured=$($RUNTIME_PATH kata-env | awk '/UseVSock/ {print $3}')
    46  	vsock_supported=$($RUNTIME_PATH kata-env | awk '/SupportVSock/ {print $3}')
    47  	if [ "$vsock_configured" == true ] && [ "$vsock_supported" == true ]; then
    48  		return 0
    49  	else
    50  		return 1
    51  	fi
    52  }
    53  
    54  count_containers() {
    55  	sudo ctr c list -q | wc -l
    56  }
    57  
    58  check_all_running() {
    59  	local goterror=0
    60  
    61  	echo "Checking ${how_many} containers have all relevant components"
    62  
    63  	# check what docker thinks
    64  	how_many_running=$(count_containers)
    65  
    66  	if (( ${how_many_running} != ${how_many} )); then
    67  		echo "Wrong number of containers running (${how_many_running} != ${how_many}) - stopping"
    68  		((goterror++))
    69  	fi
    70  
    71  	# Only check for Kata components if we are using a Kata runtime
    72  	if (( $check_kata_components )); then
    73  
    74  		# check we have the right number of shims
    75  		how_many_shims=$(pgrep -a -f ${SHIM_PATH} | grep containerd.sock | wc -l)
    76  		# one shim process per container...
    77  		if (( ${how_many_running} != ${how_many_shims} )); then
    78  			echo "Wrong number of shims running (${how_many_running} != ${how_many_shims}) - stopping"
    79  			((goterror++))
    80  		fi
    81  
    82  		# check we have the right number of vm's
    83  		how_many_vms=$(pgrep -a $(basename ${HYPERVISOR_PATH} | cut -d '-' -f1) | wc -l)
    84  		if (( ${how_many_running} != ${how_many_vms} )); then
    85  			echo "Wrong number of $KATA_HYPERVISOR running (${how_many_running} != ${how_many_vms}) - stopping"
    86  			((goterror++))
    87  		fi
    88  
    89  		# if this is kata-runtime, check how many pods virtcontainers thinks we have
    90  		if [[ "$RUNTIME" == "$KATA_RUNTIME_NAME" ]]; then
    91  			num_vc_pods=$(sudo ls -1 ${VC_POD_DIR} | wc -l)
    92  
    93  			if (( ${how_many_running} != ${num_vc_pods} )); then
    94  				echo "Wrong number of pods in $VC_POD_DIR (${how_many_running} != ${num_vc_pods}) - stopping)"
    95  				((goterror++))
    96  			fi
    97  		fi
    98  	fi
    99  
   100  	if (( goterror != 0 )); then
   101  		show_system_ctr_state
   102  		die "Got $goterror errors, quitting"
   103  	fi
   104  }
   105  
   106  # reported system 'available' memory
   107  get_system_avail() {
   108  	echo $(free -b | head -2 | tail -1 | awk '{print $7}')
   109  }
   110  
   111  go() {
   112  	echo "Running..."
   113  
   114  	how_many=0
   115  
   116  	while true; do {
   117  		check_all_running
   118  
   119  		for ((i=1; i<= ${MAX_CONTAINERS}; i++)); do
   120  			containers+=($(random_name))
   121  			sudo ctr run --runtime=${CONTAINERD_RUNTIME} -d ${nginx_image} ${containers[-1]} sh -c ${COMMAND}
   122  			((how_many++))
   123  		done
   124  
   125  		if (( ${how_many} >= ${MAX_CONTAINERS} )); then
   126  			echo "And we have hit the max ${how_many} containers"
   127  			return
   128  		fi
   129  
   130  		how_much=$(get_system_avail)
   131  		if (( ${how_much} < ${MEM_CUTOFF} )); then
   132  			echo "And we are out of memory on container ${how_many} (${how_much} < ${MEM_CUTOFF})"
   133  			return
   134  		fi
   135  	}
   136  	done
   137  }
   138  
   139  kill_all_containers() {
   140  	present=$(sudo ctr c list -q | wc -l)
   141  	if ((${present})); then
   142  		sudo ctr tasks kill $(sudo ctr task ls -q)
   143  		sudo ctr tasks rm -f $(sudo ctr task list -q)
   144  		sudo ctr c rm $(sudo ctr c list -q)
   145  	fi
   146  }
   147  
   148  count_mounts() {
   149  	echo $(mount | wc -l)
   150  }
   151  
   152  check_mounts() {
   153  	final_mount_count=$(count_mounts)
   154  
   155  	if [[ $final_mount_count < $initial_mount_count ]]; then
   156  		echo "Final mount count does not match initial count (${final_mount_count} != ${initial_mount_count})"
   157  	fi
   158  }
   159  
   160  init() {
   161  	sudo systemctl restart containerd
   162  	extract_kata_env
   163  	kill_all_containers
   164  
   165  	# remember how many mount points we had before we do anything
   166  	# and then sanity check we end up with no new ones dangling at the end
   167  	initial_mount_count=$(count_mounts)
   168  
   169  	# Only check Kata items if we are using a Kata runtime
   170  	if [[ "$RUNTIME" == "$KATA_RUNTIME_NAME" ]]; then
   171  		echo "Checking Kata runtime $RUNTIME"
   172  		check_kata_components=1
   173  	else
   174  		echo "Not a Kata runtime, not checking for Kata components"
   175  		check_kata_components=0
   176  	fi
   177  
   178  	versions_file="${cidir}/../../versions.yaml"
   179  	nginx_version=$("${GOPATH}/bin/yq" read "$versions_file" "docker_images.nginx.version")
   180  	nginx_image="docker.io/library/nginx:$nginx_version"
   181  
   182  	# Pull nginx image
   183  	sudo ctr image pull ${nginx_image}
   184  	if [ $? != 0 ]; then
   185  		die "Unable to retry docker image ${nginx_image}"
   186  	fi
   187  }
   188  
   189  spin() {
   190  	for ((i=1; i<= ITERATIONS; i++)); do {
   191  		echo "Start iteration $i of $ITERATIONS"
   192  		#spin them up
   193  		go
   194  		#check we are in a sane state
   195  		check_all_running
   196  		#shut them all down
   197  		kill_all_containers
   198  		#Note there should be none running
   199  		how_many=0
   200  		#and check they all died
   201  		check_all_running
   202  		#and that we have no dangling mounts
   203  		check_mounts
   204  	}
   205  	done
   206  
   207  }
   208  
   209  init
   210  spin