github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/density/fast_footprint.sh (about)

     1  #!/bin/bash
     2  # Copyright (c) 2017-2018, 2021 Intel Corporation
     3  #
     4  # SPDX-License-Identifier: Apache-2.0
     5  #
     6  # A script to gather memory 'footprint' information as we launch more
     7  # and more containers
     8  #
     9  # The script gathers information about both user and kernel space consumption
    10  # Output is into a .json file, named using some of the config component names
    11  # (such as footprint-busybox.json)
    12  
    13  # Pull in some common, useful, items
    14  SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
    15  source "${SCRIPT_PATH}/../lib/common.bash"
    16  
    17  # Note that all vars that can be set from outside the script (that is,
    18  # passed in the ENV), use the ':-' setting to allow being over-ridden
    19  
    20  # Default sleep, in seconds, to let containers come up and finish their
    21  # initialisation before we take the measures. Some of the larger
    22  # containers can take a number of seconds to get running.
    23  PAYLOAD_SLEEP="${PAYLOAD_SLEEP:-10}"
    24  
    25  # How long, in seconds, do we wait for KSM to 'settle down', before we
    26  # timeout and just continue anyway.
    27  KSM_WAIT_TIME="${KSM_WAIT_TIME:-300}"
    28  
    29  # How long, in seconds, do we poll for ctr to complete launching all the
    30  # containers?
    31  CTR_POLL_TIMEOUT="${CTR_POLL_TIMEOUT:-300}"
    32  
    33  # How many containers do we launch in parallel before taking the PAYLOAD_SLEEP
    34  # nap
    35  PARALLELISM="${PARALLELISM:-10}"
    36  
    37  ### The default config - run a small busybox image
    38  # Define what we will be running (app under test)
    39  #  Default is we run busybox, as a 'small' workload
    40  PAYLOAD="${PAYLOAD:-quay.io/prometheus/busybox:latest}"
    41  PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}"
    42  
    43  ###
    44  # which RUNTIME we use is picked up from the env in
    45  # common.bash. You can over-ride by setting RUNTIME in your env
    46  
    47  ###
    48  # Define the cutoff checks for when we stop running the test
    49    # Run up to this many containers
    50  NUM_CONTAINERS="${NUM_CONTAINERS:-100}"
    51    # Run until we have consumed this much memory (from MemFree)
    52  MAX_MEMORY_CONSUMED="${MAX_MEMORY_CONSUMED:-256*1024*1024*1024}"
    53    # Run until we have this much MemFree left
    54  MIN_MEMORY_FREE="${MIN_MEMORY_FREE:-2*1024*1024*1024}"
    55  
    56  # Tools we need to have installed in order to operate
    57  REQUIRED_COMMANDS="smem awk"
    58  
    59  # If we 'dump' the system caches before we measure then we get less
    60  # noise in the results - they show more what our un-reclaimable footprint is
    61  DUMP_CACHES="${DUMP_CACHES:-1}"
    62  
    63  # Affects the name of the file to store the results in
    64  TEST_NAME="${TEST_NAME:-fast-footprint-busybox}"
    65  
    66  ############# end of configurable items ###################
    67  
    68  # vars to remember where we started so we can calc diffs
    69  base_mem_avail=0
    70  base_mem_free=0
    71  
    72  # dump the kernel caches, so we get a more precise (or just different)
    73  # view of what our footprint really is.
    74  function dump_caches() {
    75  	sudo bash -c "echo 3 > /proc/sys/vm/drop_caches"
    76  }
    77  
    78  function init() {
    79  	restart_containerd_service
    80  
    81  	check_cmds $REQUIRED_COMMANDS
    82  	sudo -E "${CTR_EXE}" image pull "$PAYLOAD"
    83  
    84  	# Modify the test name if running with KSM enabled
    85  	check_for_ksm
    86  
    87  	# Use the common init func to get to a known state
    88  	init_env
    89  
    90  	# Prepare to start storing results
    91  	metrics_json_init
    92  
    93  	# Store up baseline measures
    94  	base_mem_avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
    95  	base_mem_free=$(get_memfree)
    96  
    97  	# Store our configuration for this run
    98  	save_config
    99  }
   100  
   101  save_config(){
   102  	metrics_json_start_array
   103  
   104  	local json="$(cat << EOF
   105  	{
   106  		"testname": "${TEST_NAME}",
   107  		"payload": "${PAYLOAD}",
   108  		"payload_args": "${PAYLOAD_ARGS}",
   109  		"payload_sleep": ${PAYLOAD_SLEEP},
   110  		"ksm_settle_time": ${KSM_WAIT_TIME},
   111  		"num_containers": ${NUM_CONTAINERS},
   112  		"parallelism": ${PARALLELISM},
   113  		"max_memory_consumed": "${MAX_MEMORY_CONSUMED}",
   114  		"min_memory_free": "${MIN_MEMORY_FREE}",
   115  		"dump_caches": "${DUMP_CACHES}"
   116  	}
   117  EOF
   118  )"
   119  	metrics_json_add_array_element "$json"
   120  	metrics_json_end_array "Config"
   121  }
   122  
   123  function cleanup() {
   124  	# Finish storing the results
   125  	metrics_json_save
   126  
   127  	clean_env_ctr
   128  }
   129  
   130  # helper function to get USS of process in arg1
   131  function get_proc_uss() {
   132  	item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $4}')
   133  	((item*=1024))
   134  	echo $item
   135  }
   136  
   137  # helper function to get PSS of process in arg1
   138  function get_proc_pss() {
   139  	item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $5}')
   140  	((item*=1024))
   141  	echo $item
   142  }
   143  
   144  # Get the PSS for the whole of userspace (all processes)
   145  #  This allows us to see if we had any impact on the rest of the system, for instance
   146  #  dockerd grows as we launch containers, so we should account for that in our total
   147  #  memory breakdown
   148  function grab_all_pss() {
   149  	item=$(sudo smem -t | tail -1 | awk '{print $5}')
   150  	((item*=1024))
   151  
   152  	local json="$(cat << EOF
   153  		"all_pss": {
   154  			"pss": $item,
   155  			"Units": "KB"
   156  		}
   157  EOF
   158  )"
   159  
   160  	metrics_json_add_array_fragment "$json"
   161  }
   162  
   163  function grab_user_smem() {
   164  	# userspace
   165  	item=$(sudo smem -w | head -5 | tail -1 | awk '{print $3}')
   166  	((item*=1024))
   167  
   168  	local json="$(cat << EOF
   169  		"user_smem": {
   170  			"userspace": $item,
   171  			"Units": "KB"
   172  		}
   173  EOF
   174  )"
   175  
   176  	metrics_json_add_array_fragment "$json"
   177  }
   178  
   179  function grab_slab() {
   180  	# Grabbing slab total from meminfo is easier than doing the math
   181  	# on slabinfo
   182  	item=$(fgrep "Slab:" /proc/meminfo | awk '{print $2}')
   183  	((item*=1024))
   184  
   185  	local json="$(cat << EOF
   186  		"slab": {
   187  			"slab": $item,
   188  			"Units": "KB"
   189  		}
   190  EOF
   191  )"
   192  
   193  	metrics_json_add_array_fragment "$json"
   194  }
   195  
   196  function get_memfree() {
   197  	mem_free=$(sudo smem -w | head -6 | tail -1 | awk '{print $4}')
   198  	((mem_free*=1024))
   199  	echo $mem_free
   200  }
   201  
   202  function grab_system() {
   203  
   204  	# avail memory, from 'free'
   205  	local avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
   206  	local avail_decr=$((base_mem_avail-avail))
   207  
   208  	# cached memory, from 'free'
   209  	local cached=$(free -b | head -2 | tail -1 | awk '{print $6}')
   210  
   211  	# free memory from smem
   212  	local smem_free=$(get_memfree)
   213  	local free_decr=$((base_mem_free-item))
   214  
   215  	# Anon pages
   216  	local anon=$(fgrep "AnonPages:" /proc/meminfo | awk '{print $2}')
   217  	((anon*=1024))
   218  
   219  	# Mapped pages
   220  	local mapped=$(egrep "^Mapped:" /proc/meminfo | awk '{print $2}')
   221  	((mapped*=1024))
   222  
   223  	# Cached
   224  	local meminfo_cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}')
   225  	((meminfo_cached*=1024))
   226  
   227  	local json="$(cat << EOF
   228  		"system": {
   229  			"avail": $avail,
   230  			"avail_decr": $avail_decr,
   231  			"cached": $cached,
   232  			"smem_free": $smem_free,
   233  			"free_decr": $free_decr,
   234  			"anon": $anon,
   235  			"mapped": $mapped,
   236  			"meminfo_cached": $meminfo_cached,
   237  			"Units": "KB"
   238  		}
   239  EOF
   240  )"
   241  
   242  	metrics_json_add_array_fragment "$json"
   243  }
   244  
   245  function grab_stats() {
   246  	# If configured, dump the caches so we get a more stable
   247  	# view of what our static footprint really is
   248  	if [[ "$DUMP_CACHES" ]] ; then
   249  		dump_caches
   250  	fi
   251  
   252  	# user space data
   253  		# PSS taken all userspace
   254  	grab_all_pss
   255  		# user as reported by smem
   256  	grab_user_smem
   257  
   258  	# System overview data
   259  		# System free and cached
   260  	grab_system
   261  
   262  	# kernel data
   263  		# The 'total kernel space taken' we can work out as:
   264  		# ktotal = ((free-avail)-user)
   265  		# So, we don't grab that number from smem, as that is what it does
   266  		# internally anyhow.
   267  		# Still try to grab any finer kernel details that we can though
   268  
   269  		# totals from slabinfo
   270  	grab_slab
   271  
   272  	metrics_json_close_array_element
   273  }
   274  
   275  function check_limits() {
   276  	mem_free=$(get_memfree)
   277  	if ((mem_free <= MIN_MEMORY_FREE)); then
   278  		echo 1
   279  		return
   280  	fi
   281  
   282  	mem_consumed=$((base_mem_avail-mem_free))
   283  	if ((mem_consumed >= MAX_MEMORY_CONSUMED)); then
   284  		echo 1
   285  		return
   286  	fi
   287  
   288  	echo 0
   289  }
   290  
   291  launch_containers() {
   292  	local parloops leftovers
   293  
   294  	(( parloops=${NUM_CONTAINERS}/${PARALLELISM} ))
   295  	(( leftovers=${NUM_CONTAINERS} - (${parloops}*${PARALLELISM}) ))
   296  
   297  	echo "Launching ${parloops}x${PARALLELISM} containers + ${leftovers} etras"
   298  
   299  	containers=()
   300  
   301  	local iter n
   302  	for iter in $(seq 1 $parloops); do
   303  		echo "Launch iteration ${iter}"
   304  		for n in $(seq 1 $PARALLELISM); do
   305  			containers+=($(random_name))
   306  			sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
   307  		done
   308  
   309  		if [[ $PAYLOAD_SLEEP ]]; then
   310  			sleep $PAYLOAD_SLEEP
   311  		fi
   312  
   313  		# check if we have hit one of our limits and need to wrap up the tests
   314  		if (($(check_limits))); then
   315  			echo "Ran out of resources, check_limits failed"
   316  			return
   317  		fi
   318  	done
   319  
   320  	for n in $(seq 1 $leftovers); do
   321  		containers+=($(random_name))
   322  		sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
   323  	done
   324  }
   325  
   326  wait_containers() {
   327  	local t numcontainers
   328  	# nap 3s between checks
   329  	local step=3
   330  
   331  	for ((t=0; t<${CTR_POLL_TIMEOUT}; t+=step)); do
   332  
   333  		numcontainers=$(sudo -E "${CTR_EXE}" c list -q | wc -l)
   334  
   335  		if (( numcontainers >=  ${NUM_CONTAINERS} )); then
   336  			echo "All containers now launched (${t}s)"
   337  				return
   338  		else
   339  			echo "Waiting for containers to launch (${numcontainers} at ${t}s)"
   340  		fi
   341  		sleep ${step}
   342  	done
   343  
   344  	echo "Timed out waiting for containers to launch (${t}s)"
   345  	cleanup
   346  	die "Timed out waiting for containers to launch (${t}s)"
   347  }
   348  
   349  function go() {
   350  	# Init the json cycle for this save
   351  	metrics_json_start_array
   352  
   353  	# Grab the first set of stats before we run any containers.
   354  	grab_stats
   355  
   356  	launch_containers
   357  	wait_containers
   358  
   359  	if [ $ksm_on == "1" ]; then
   360  		echo "Wating for KSM to settle..."
   361  		wait_ksm_settle ${KSM_WAIT_TIME}
   362  	fi
   363  
   364  	grab_stats
   365  
   366  	# Wrap up the results array
   367  	metrics_json_end_array "Results"
   368  }
   369  
   370  function show_vars()
   371  {
   372  	echo -e "\nEvironment variables:"
   373  	echo -e "\tName (default)"
   374  	echo -e "\t\tDescription"
   375  	echo -e "\tPAYLOAD (${PAYLOAD})"
   376  	echo -e "\t\tThe ctr image to run"
   377  	echo -e "\tPAYLOAD_ARGS (${PAYLOAD_ARGS})"
   378  	echo -e "\t\tAny extra arguments passed into the docker 'run' command"
   379  	echo -e "\tPAYLOAD_SLEEP (${PAYLOAD_SLEEP})"
   380  	echo -e "\t\tSeconds to sleep between launch and measurement, to allow settling"
   381  	echo -e "\tKSM_WAIT_TIME (${KSM_WAIT_TIME})"
   382  	echo -e "\t\tSeconds to wait for KSM to settle before we take the final measure"
   383  	echo -e "\tCTR_POLL_TIMEOUT (${CTR_POLL_TIMEOUT})"
   384  	echo -e "\t\tSeconds to poll for ctr to finish launching containers"
   385  	echo -e "\tPARALLELISM (${PARALLELISM})"
   386  	echo -e "\t\tNumber of containers we launch in parallel"
   387  	echo -e "\tNUM_CONTAINERS (${NUM_CONTAINERS})"
   388  	echo -e "\t\tThe total number of containers to run"
   389  	echo -e "\tMAX_MEMORY_CONSUMED (${MAX_MEMORY_CONSUMED})"
   390  	echo -e "\t\tThe maximum amount of memory to be consumed before terminating"
   391  	echo -e "\tMIN_MEMORY_FREE (${MIN_MEMORY_FREE})"
   392  	echo -e "\t\tThe minimum amount of memory allowed to be free before terminating"
   393  	echo -e "\tDUMP_CACHES (${DUMP_CACHES})"
   394  	echo -e "\t\tA flag to note if the system caches should be dumped before capturing stats"
   395  	echo -e "\tTEST_NAME (${TEST_NAME})"
   396  	echo -e "\t\tCan be set to over-ride the default JSON results filename"
   397  
   398  }
   399  
   400  function help()
   401  {
   402  	usage=$(cat << EOF
   403  Usage: $0 [-h] [options]
   404     Description:
   405  	Launch a series of workloads and take memory metric measurements after
   406  	each launch.
   407     Options:
   408          -h,    Help page.
   409  EOF
   410  )
   411  	echo "$usage"
   412  	show_vars
   413  }
   414  
   415  function main() {
   416  
   417  	local OPTIND
   418  	while getopts "h" opt;do
   419  		case ${opt} in
   420  		h)
   421  		    help
   422  		    exit 0;
   423  		    ;;
   424  		esac
   425  	done
   426  	shift $((OPTIND-1))
   427  
   428  	init
   429  	go
   430  	cleanup
   431  }
   432  
   433  main "$@"