github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/metrics/density/fast_footprint.sh

github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/metrics/density/fast_footprint.sh (about)

     1  #!/bin/bash
     2  # Copyright (c) 2017-2018, 2021 Intel Corporation
     3  #
     4  # SPDX-License-Identifier: Apache-2.0
     5  #
     6  # A script to gather memory 'footprint' information as we launch more
     7  # and more containers
     8  #
     9  # The script gathers information about both user and kernel space consumption
    10  # Output is into a .json file, named using some of the config component names
    11  # (such as footprint-busybox.json)
    12  
    13  # Pull in some common, useful, items
    14  SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
    15  source "${SCRIPT_PATH}/../lib/common.bash"
    16  
    17  # Note that all vars that can be set from outside the script (that is,
    18  # passed in the ENV), use the ':-' setting to allow being over-ridden
    19  
    20  # Default sleep, in seconds, to let containers come up and finish their
    21  # initialisation before we take the measures. Some of the larger
    22  # containers can take a number of seconds to get running.
    23  PAYLOAD_SLEEP="${PAYLOAD_SLEEP:-10}"
    24  
    25  # How long, in seconds, do we wait for KSM to 'settle down', before we
    26  # timeout and just continue anyway.
    27  KSM_WAIT_TIME="${KSM_WAIT_TIME:-300}"
    28  
    29  # How long, in seconds, do we poll for ctr to complete launching all the
    30  # containers?
    31  CTR_POLL_TIMEOUT="${CTR_POLL_TIMEOUT:-300}"
    32  
    33  # How many containers do we launch in parallel before taking the PAYLOAD_SLEEP
    34  # nap
    35  PARALLELISM="${PARALLELISM:-10}"
    36  
    37  ### The default config - run a small busybox image
    38  # Define what we will be running (app under test)
    39  #  Default is we run busybox, as a 'small' workload
    40  PAYLOAD="${PAYLOAD:-quay.io/prometheus/busybox:latest}"
    41  PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}"
    42  PAYLOAD_RUNTIME_ARGS="${PAYLOAD_RUNTIME_ARGS:---memory-limit 5120}"
    43  
    44  ###
    45  # which RUNTIME we use is picked up from the env in
    46  # common.bash. You can over-ride by setting RUNTIME in your env
    47  
    48  ###
    49  # Define the cutoff checks for when we stop running the test
    50    # Run up to this many containers
    51  NUM_CONTAINERS="${NUM_CONTAINERS:-200}"
    52    # Run until we have consumed this much memory (from MemFree)
    53  MAX_MEMORY_CONSUMED="${MAX_MEMORY_CONSUMED:-256*1024*1024*1024}"
    54    # Run until we have this much MemFree left
    55  MIN_MEMORY_FREE="${MIN_MEMORY_FREE:-2*1024*1024*1024}"
    56  
    57  # Tools we need to have installed in order to operate
    58  REQUIRED_COMMANDS="smem awk"
    59  
    60  # If we 'dump' the system caches before we measure then we get less
    61  # noise in the results - they show more what our un-reclaimable footprint is
    62  DUMP_CACHES="${DUMP_CACHES:-1}"
    63  
    64  # Affects the name of the file to store the results in
    65  TEST_NAME="${TEST_NAME:-fast-footprint-busybox}"
    66  
    67  ############# end of configurable items ###################
    68  
    69  # vars to remember where we started so we can calc diffs
    70  base_mem_avail=0
    71  base_mem_free=0
    72  
    73  # dump the kernel caches, so we get a more precise (or just different)
    74  # view of what our footprint really is.
    75  function dump_caches() {
    76  	sudo bash -c "echo 3 > /proc/sys/vm/drop_caches"
    77  }
    78  
    79  function init() {
    80  	sudo systemctl restart containerd
    81  	clean_env_ctr
    82  
    83  	CONTAINERD_RUNTIME="io.containerd.kata.v2"
    84  	check_cmds $REQUIRED_COMMANDS
    85  	sudo ctr image pull "$PAYLOAD"
    86  
    87  	# Modify the test name if running with KSM enabled
    88  	check_for_ksm
    89  
    90  	# Use the common init func to get to a known state
    91  	init_env
    92  
    93  	# Prepare to start storing results
    94  	metrics_json_init
    95  
    96  	# Store up baseline measures
    97  	base_mem_avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
    98  	base_mem_free=$(get_memfree)
    99  
   100  	# Store our configuration for this run
   101  	save_config
   102  }
   103  
   104  save_config(){
   105  	metrics_json_start_array
   106  
   107  	local json="$(cat << EOF
   108  	{
   109  		"testname": "${TEST_NAME}",
   110  		"payload": "${PAYLOAD}",
   111  		"payload_args": "${PAYLOAD_ARGS}",
   112  		"payload_runtime_args": "${PAYLOAD_RUNTIME_ARGS}",
   113  		"payload_sleep": ${PAYLOAD_SLEEP},
   114  		"ksm_settle_time": ${KSM_WAIT_TIME},
   115  		"num_containers": ${NUM_CONTAINERS},
   116  		"parallelism": ${PARALLELISM},
   117  		"max_memory_consumed": "${MAX_MEMORY_CONSUMED}",
   118  		"min_memory_free": "${MIN_MEMORY_FREE}",
   119  		"dump_caches": "${DUMP_CACHES}"
   120  	}
   121  EOF
   122  )"
   123  	metrics_json_add_array_element "$json"
   124  	metrics_json_end_array "Config"
   125  }
   126  
   127  function cleanup() {
   128  	# Finish storing the results
   129  	metrics_json_save
   130  
   131  	clean_env_ctr
   132  }
   133  
   134  # helper function to get USS of process in arg1
   135  function get_proc_uss() {
   136  	item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $4}')
   137  	((item*=1024))
   138  	echo $item
   139  }
   140  
   141  # helper function to get PSS of process in arg1
   142  function get_proc_pss() {
   143  	item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $5}')
   144  	((item*=1024))
   145  	echo $item
   146  }
   147  
   148  # Get the PSS for the whole of userspace (all processes)
   149  #  This allows us to see if we had any impact on the rest of the system, for instance
   150  #  dockerd grows as we launch containers, so we should account for that in our total
   151  #  memory breakdown
   152  function grab_all_pss() {
   153  	item=$(sudo smem -t | tail -1 | awk '{print $5}')
   154  	((item*=1024))
   155  
   156  	local json="$(cat << EOF
   157  		"all_pss": {
   158  			"pss": $item,
   159  			"Units": "KB"
   160  		}
   161  EOF
   162  )"
   163  
   164  	metrics_json_add_array_fragment "$json"
   165  }
   166  
   167  function grab_user_smem() {
   168  	# userspace
   169  	item=$(sudo smem -w | head -5 | tail -1 | awk '{print $3}')
   170  	((item*=1024))
   171  
   172  	local json="$(cat << EOF
   173  		"user_smem": {
   174  			"userspace": $item,
   175  			"Units": "KB"
   176  		}
   177  EOF
   178  )"
   179  
   180  	metrics_json_add_array_fragment "$json"
   181  }
   182  
   183  function grab_slab() {
   184  	# Grabbing slab total from meminfo is easier than doing the math
   185  	# on slabinfo
   186  	item=$(fgrep "Slab:" /proc/meminfo | awk '{print $2}')
   187  	((item*=1024))
   188  
   189  	local json="$(cat << EOF
   190  		"slab": {
   191  			"slab": $item,
   192  			"Units": "KB"
   193  		}
   194  EOF
   195  )"
   196  
   197  	metrics_json_add_array_fragment "$json"
   198  }
   199  
   200  function get_memfree() {
   201  	mem_free=$(sudo smem -w | head -6 | tail -1 | awk '{print $4}')
   202  	((mem_free*=1024))
   203  	echo $mem_free
   204  }
   205  
   206  function grab_system() {
   207  
   208  	# avail memory, from 'free'
   209  	local avail=$(free -b | head -2 | tail -1 | awk '{print $7}')
   210  	local avail_decr=$((base_mem_avail-avail))
   211  
   212  	# cached memory, from 'free'
   213  	local cached=$(free -b | head -2 | tail -1 | awk '{print $6}')
   214  
   215  	# free memory from smem
   216  	local smem_free=$(get_memfree)
   217  	local free_decr=$((base_mem_free-item))
   218  
   219  	# Anon pages
   220  	local anon=$(fgrep "AnonPages:" /proc/meminfo | awk '{print $2}')
   221  	((anon*=1024))
   222  
   223  	# Mapped pages
   224  	local mapped=$(egrep "^Mapped:" /proc/meminfo | awk '{print $2}')
   225  	((mapped*=1024))
   226  
   227  	# Cached
   228  	local meminfo_cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}')
   229  	((meminfo_cached*=1024))
   230  
   231  	local json="$(cat << EOF
   232  		"system": {
   233  			"avail": $avail,
   234  			"avail_decr": $avail_decr,
   235  			"cached": $cached,
   236  			"smem_free": $smem_free,
   237  			"free_decr": $free_decr,
   238  			"anon": $anon,
   239  			"mapped": $mapped,
   240  			"meminfo_cached": $meminfo_cached,
   241  			"Units": "KB"
   242  		}
   243  EOF
   244  )"
   245  
   246  	metrics_json_add_array_fragment "$json"
   247  }
   248  
   249  function grab_stats() {
   250  	# If configured, dump the caches so we get a more stable
   251  	# view of what our static footprint really is
   252  	if [[ "$DUMP_CACHES" ]] ; then
   253  		dump_caches
   254  	fi
   255  
   256  	# user space data
   257  		# PSS taken all userspace
   258  	grab_all_pss
   259  		# user as reported by smem
   260  	grab_user_smem
   261  
   262  	# System overview data
   263  		# System free and cached
   264  	grab_system
   265  
   266  	# kernel data
   267  		# The 'total kernel space taken' we can work out as:
   268  		# ktotal = ((free-avail)-user)
   269  		# So, we don't grab that number from smem, as that is what it does
   270  		# internally anyhow.
   271  		# Still try to grab any finer kernel details that we can though
   272  
   273  		# totals from slabinfo
   274  	grab_slab
   275  
   276  	metrics_json_close_array_element
   277  }
   278  
   279  function check_limits() {
   280  	mem_free=$(get_memfree)
   281  	if ((mem_free <= MIN_MEMORY_FREE)); then
   282  		echo 1
   283  		return
   284  	fi
   285  
   286  	mem_consumed=$((base_mem_avail-mem_free))
   287  	if ((mem_consumed >= MAX_MEMORY_CONSUMED)); then
   288  		echo 1
   289  		return
   290  	fi
   291  
   292  	echo 0
   293  }
   294  
   295  launch_containers() {
   296  	local parloops leftovers
   297  
   298  	(( parloops=${NUM_CONTAINERS}/${PARALLELISM} ))
   299  	(( leftovers=${NUM_CONTAINERS} - (${parloops}*${PARALLELISM}) ))
   300  
   301  	echo "Launching ${parloops}x${PARALLELISM} containers + ${leftovers} etras"
   302  
   303  	containers=()
   304  
   305  	local iter n
   306  	for iter in $(seq 1 $parloops); do
   307  		echo "Launch iteration ${iter}"
   308  		for n in $(seq 1 $PARALLELISM); do
   309  			containers+=($(random_name))
   310  			sudo ctr run $PAYLOAD_RUNTIME_ARGS -d --runtime=$CONTAINERD_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
   311  		done
   312  
   313  		if [[ $PAYLOAD_SLEEP ]]; then
   314  			sleep $PAYLOAD_SLEEP
   315  		fi
   316  
   317  		# check if we have hit one of our limits and need to wrap up the tests
   318  		if (($(check_limits))); then
   319  			echo "Ran out of resources, check_limits failed"
   320  			return
   321  		fi
   322  	done
   323  
   324  	for n in $(seq 1 $leftovers); do
   325  		containers+=($(random_name))
   326  		sudo ctr run $PAYLOAD_RUNTIME_ARGS -d --runtime=$CONTAINERD_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS &
   327  	done
   328  }
   329  
   330  wait_containers() {
   331  	local t numcontainers
   332  	# nap 3s between checks
   333  	local step=3
   334  
   335  	for ((t=0; t<${CTR_POLL_TIMEOUT}; t+=step)); do
   336  
   337  		numcontainers=$(sudo ctr c list -q | wc -l)
   338  
   339  		if (( numcontainers >=  ${NUM_CONTAINERS} )); then
   340  			echo "All containers now launched (${t}s)"
   341  				return
   342  		else
   343  			echo "Waiting for containers to launch (${numcontainers} at ${t}s)"
   344  		fi
   345  		sleep ${step}
   346  	done
   347  
   348  	echo "Timed out waiting for containers to launch (${t}s)"
   349  	cleanup
   350  	die "Timed out waiting for containers to launch (${t}s)"
   351  }
   352  
   353  function go() {
   354  	# Init the json cycle for this save
   355  	metrics_json_start_array
   356  
   357  	# Grab the first set of stats before we run any containers.
   358  	grab_stats
   359  
   360  	launch_containers
   361  	wait_containers
   362  
   363  	if [ $ksm_on == "1" ]; then
   364  		echo "Wating for KSM to settle..."
   365  		wait_ksm_settle ${KSM_WAIT_TIME}
   366  	fi
   367  
   368  	grab_stats
   369  
   370  	# Wrap up the results array
   371  	metrics_json_end_array "Results"
   372  }
   373  
   374  function show_vars()
   375  {
   376  	echo -e "\nEvironment variables:"
   377  	echo -e "\tName (default)"
   378  	echo -e "\t\tDescription"
   379  	echo -e "\tPAYLOAD (${PAYLOAD})"
   380  	echo -e "\t\tThe ctr image to run"
   381  	echo -e "\tPAYLOAD_ARGS (${PAYLOAD_ARGS})"
   382  	echo -e "\t\tAny arguments passed into the ctr image"
   383  	echo -e "\tPAYLOAD_RUNTIME_ARGS (${PAYLOAD_RUNTIME_ARGS})"
   384  	echo -e "\t\tAny extra arguments passed into the docker 'run' command"
   385  	echo -e "\tPAYLOAD_SLEEP (${PAYLOAD_SLEEP})"
   386  	echo -e "\t\tSeconds to sleep between launch and measurement, to allow settling"
   387  	echo -e "\tKSM_WAIT_TIME (${KSM_WAIT_TIME})"
   388  	echo -e "\t\tSeconds to wait for KSM to settle before we take the final measure"
   389  	echo -e "\tCTR_POLL_TIMEOUT (${CTR_POLL_TIMEOUT})"
   390  	echo -e "\t\tSeconds to poll for ctr to finish launching containers"
   391  	echo -e "\tPARALLELISM (${PARALLELISM})"
   392  	echo -e "\t\tNumber of containers we launch in parallel"
   393  	echo -e "\tNUM_CONTAINERS (${NUM_CONTAINERS})"
   394  	echo -e "\t\tThe total number of containers to run"
   395  	echo -e "\tMAX_MEMORY_CONSUMED (${MAX_MEMORY_CONSUMED})"
   396  	echo -e "\t\tThe maximum amount of memory to be consumed before terminating"
   397  	echo -e "\tMIN_MEMORY_FREE (${MIN_MEMORY_FREE})"
   398  	echo -e "\t\tThe minimum amount of memory allowed to be free before terminating"
   399  	echo -e "\tDUMP_CACHES (${DUMP_CACHES})"
   400  	echo -e "\t\tA flag to note if the system caches should be dumped before capturing stats"
   401  	echo -e "\tTEST_NAME (${TEST_NAME})"
   402  	echo -e "\t\tCan be set to over-ride the default JSON results filename"
   403  
   404  }
   405  
   406  function help()
   407  {
   408  	usage=$(cat << EOF
   409  Usage: $0 [-h] [options]
   410     Description:
   411  	Launch a series of workloads and take memory metric measurements after
   412  	each launch.
   413     Options:
   414          -h,    Help page.
   415  EOF
   416  )
   417  	echo "$usage"
   418  	show_vars
   419  }
   420  
   421  function main() {
   422  
   423  	local OPTIND
   424  	while getopts "h" opt;do
   425  		case ${opt} in
   426  		h)
   427  		    help
   428  		    exit 0;
   429  		    ;;
   430  		esac
   431  	done
   432  	shift $((OPTIND-1))
   433  
   434  	init
   435  	go
   436  	cleanup
   437  }
   438  
   439  main "$@"