github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/density/fast_footprint.sh (about) 1 #!/bin/bash 2 # Copyright (c) 2017-2018, 2021 Intel Corporation 3 # 4 # SPDX-License-Identifier: Apache-2.0 5 # 6 # A script to gather memory 'footprint' information as we launch more 7 # and more containers 8 # 9 # The script gathers information about both user and kernel space consumption 10 # Output is into a .json file, named using some of the config component names 11 # (such as footprint-busybox.json) 12 13 # Pull in some common, useful, items 14 SCRIPT_PATH=$(dirname "$(readlink -f "$0")") 15 source "${SCRIPT_PATH}/../lib/common.bash" 16 17 # Note that all vars that can be set from outside the script (that is, 18 # passed in the ENV), use the ':-' setting to allow being over-ridden 19 20 # Default sleep, in seconds, to let containers come up and finish their 21 # initialisation before we take the measures. Some of the larger 22 # containers can take a number of seconds to get running. 23 PAYLOAD_SLEEP="${PAYLOAD_SLEEP:-10}" 24 25 # How long, in seconds, do we wait for KSM to 'settle down', before we 26 # timeout and just continue anyway. 27 KSM_WAIT_TIME="${KSM_WAIT_TIME:-300}" 28 29 # How long, in seconds, do we poll for ctr to complete launching all the 30 # containers? 31 CTR_POLL_TIMEOUT="${CTR_POLL_TIMEOUT:-300}" 32 33 # How many containers do we launch in parallel before taking the PAYLOAD_SLEEP 34 # nap 35 PARALLELISM="${PARALLELISM:-10}" 36 37 ### The default config - run a small busybox image 38 # Define what we will be running (app under test) 39 # Default is we run busybox, as a 'small' workload 40 PAYLOAD="${PAYLOAD:-quay.io/prometheus/busybox:latest}" 41 PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" 42 43 ### 44 # which RUNTIME we use is picked up from the env in 45 # common.bash. You can over-ride by setting RUNTIME in your env 46 47 ### 48 # Define the cutoff checks for when we stop running the test 49 # Run up to this many containers 50 NUM_CONTAINERS="${NUM_CONTAINERS:-100}" 51 # Run until we have consumed this much memory (from MemFree) 52 MAX_MEMORY_CONSUMED="${MAX_MEMORY_CONSUMED:-256*1024*1024*1024}" 53 # Run until we have this much MemFree left 54 MIN_MEMORY_FREE="${MIN_MEMORY_FREE:-2*1024*1024*1024}" 55 56 # Tools we need to have installed in order to operate 57 REQUIRED_COMMANDS="smem awk" 58 59 # If we 'dump' the system caches before we measure then we get less 60 # noise in the results - they show more what our un-reclaimable footprint is 61 DUMP_CACHES="${DUMP_CACHES:-1}" 62 63 # Affects the name of the file to store the results in 64 TEST_NAME="${TEST_NAME:-fast-footprint-busybox}" 65 66 ############# end of configurable items ################### 67 68 # vars to remember where we started so we can calc diffs 69 base_mem_avail=0 70 base_mem_free=0 71 72 # dump the kernel caches, so we get a more precise (or just different) 73 # view of what our footprint really is. 74 function dump_caches() { 75 sudo bash -c "echo 3 > /proc/sys/vm/drop_caches" 76 } 77 78 function init() { 79 restart_containerd_service 80 81 check_cmds $REQUIRED_COMMANDS 82 sudo -E "${CTR_EXE}" image pull "$PAYLOAD" 83 84 # Modify the test name if running with KSM enabled 85 check_for_ksm 86 87 # Use the common init func to get to a known state 88 init_env 89 90 # Prepare to start storing results 91 metrics_json_init 92 93 # Store up baseline measures 94 base_mem_avail=$(free -b | head -2 | tail -1 | awk '{print $7}') 95 base_mem_free=$(get_memfree) 96 97 # Store our configuration for this run 98 save_config 99 } 100 101 save_config(){ 102 metrics_json_start_array 103 104 local json="$(cat << EOF 105 { 106 "testname": "${TEST_NAME}", 107 "payload": "${PAYLOAD}", 108 "payload_args": "${PAYLOAD_ARGS}", 109 "payload_sleep": ${PAYLOAD_SLEEP}, 110 "ksm_settle_time": ${KSM_WAIT_TIME}, 111 "num_containers": ${NUM_CONTAINERS}, 112 "parallelism": ${PARALLELISM}, 113 "max_memory_consumed": "${MAX_MEMORY_CONSUMED}", 114 "min_memory_free": "${MIN_MEMORY_FREE}", 115 "dump_caches": "${DUMP_CACHES}" 116 } 117 EOF 118 )" 119 metrics_json_add_array_element "$json" 120 metrics_json_end_array "Config" 121 } 122 123 function cleanup() { 124 # Finish storing the results 125 metrics_json_save 126 127 clean_env_ctr 128 } 129 130 # helper function to get USS of process in arg1 131 function get_proc_uss() { 132 item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $4}') 133 ((item*=1024)) 134 echo $item 135 } 136 137 # helper function to get PSS of process in arg1 138 function get_proc_pss() { 139 item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $5}') 140 ((item*=1024)) 141 echo $item 142 } 143 144 # Get the PSS for the whole of userspace (all processes) 145 # This allows us to see if we had any impact on the rest of the system, for instance 146 # dockerd grows as we launch containers, so we should account for that in our total 147 # memory breakdown 148 function grab_all_pss() { 149 item=$(sudo smem -t | tail -1 | awk '{print $5}') 150 ((item*=1024)) 151 152 local json="$(cat << EOF 153 "all_pss": { 154 "pss": $item, 155 "Units": "KB" 156 } 157 EOF 158 )" 159 160 metrics_json_add_array_fragment "$json" 161 } 162 163 function grab_user_smem() { 164 # userspace 165 item=$(sudo smem -w | head -5 | tail -1 | awk '{print $3}') 166 ((item*=1024)) 167 168 local json="$(cat << EOF 169 "user_smem": { 170 "userspace": $item, 171 "Units": "KB" 172 } 173 EOF 174 )" 175 176 metrics_json_add_array_fragment "$json" 177 } 178 179 function grab_slab() { 180 # Grabbing slab total from meminfo is easier than doing the math 181 # on slabinfo 182 item=$(fgrep "Slab:" /proc/meminfo | awk '{print $2}') 183 ((item*=1024)) 184 185 local json="$(cat << EOF 186 "slab": { 187 "slab": $item, 188 "Units": "KB" 189 } 190 EOF 191 )" 192 193 metrics_json_add_array_fragment "$json" 194 } 195 196 function get_memfree() { 197 mem_free=$(sudo smem -w | head -6 | tail -1 | awk '{print $4}') 198 ((mem_free*=1024)) 199 echo $mem_free 200 } 201 202 function grab_system() { 203 204 # avail memory, from 'free' 205 local avail=$(free -b | head -2 | tail -1 | awk '{print $7}') 206 local avail_decr=$((base_mem_avail-avail)) 207 208 # cached memory, from 'free' 209 local cached=$(free -b | head -2 | tail -1 | awk '{print $6}') 210 211 # free memory from smem 212 local smem_free=$(get_memfree) 213 local free_decr=$((base_mem_free-item)) 214 215 # Anon pages 216 local anon=$(fgrep "AnonPages:" /proc/meminfo | awk '{print $2}') 217 ((anon*=1024)) 218 219 # Mapped pages 220 local mapped=$(egrep "^Mapped:" /proc/meminfo | awk '{print $2}') 221 ((mapped*=1024)) 222 223 # Cached 224 local meminfo_cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}') 225 ((meminfo_cached*=1024)) 226 227 local json="$(cat << EOF 228 "system": { 229 "avail": $avail, 230 "avail_decr": $avail_decr, 231 "cached": $cached, 232 "smem_free": $smem_free, 233 "free_decr": $free_decr, 234 "anon": $anon, 235 "mapped": $mapped, 236 "meminfo_cached": $meminfo_cached, 237 "Units": "KB" 238 } 239 EOF 240 )" 241 242 metrics_json_add_array_fragment "$json" 243 } 244 245 function grab_stats() { 246 # If configured, dump the caches so we get a more stable 247 # view of what our static footprint really is 248 if [[ "$DUMP_CACHES" ]] ; then 249 dump_caches 250 fi 251 252 # user space data 253 # PSS taken all userspace 254 grab_all_pss 255 # user as reported by smem 256 grab_user_smem 257 258 # System overview data 259 # System free and cached 260 grab_system 261 262 # kernel data 263 # The 'total kernel space taken' we can work out as: 264 # ktotal = ((free-avail)-user) 265 # So, we don't grab that number from smem, as that is what it does 266 # internally anyhow. 267 # Still try to grab any finer kernel details that we can though 268 269 # totals from slabinfo 270 grab_slab 271 272 metrics_json_close_array_element 273 } 274 275 function check_limits() { 276 mem_free=$(get_memfree) 277 if ((mem_free <= MIN_MEMORY_FREE)); then 278 echo 1 279 return 280 fi 281 282 mem_consumed=$((base_mem_avail-mem_free)) 283 if ((mem_consumed >= MAX_MEMORY_CONSUMED)); then 284 echo 1 285 return 286 fi 287 288 echo 0 289 } 290 291 launch_containers() { 292 local parloops leftovers 293 294 (( parloops=${NUM_CONTAINERS}/${PARALLELISM} )) 295 (( leftovers=${NUM_CONTAINERS} - (${parloops}*${PARALLELISM}) )) 296 297 echo "Launching ${parloops}x${PARALLELISM} containers + ${leftovers} etras" 298 299 containers=() 300 301 local iter n 302 for iter in $(seq 1 $parloops); do 303 echo "Launch iteration ${iter}" 304 for n in $(seq 1 $PARALLELISM); do 305 containers+=($(random_name)) 306 sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS & 307 done 308 309 if [[ $PAYLOAD_SLEEP ]]; then 310 sleep $PAYLOAD_SLEEP 311 fi 312 313 # check if we have hit one of our limits and need to wrap up the tests 314 if (($(check_limits))); then 315 echo "Ran out of resources, check_limits failed" 316 return 317 fi 318 done 319 320 for n in $(seq 1 $leftovers); do 321 containers+=($(random_name)) 322 sudo -E "${CTR_EXE}" run -d --runtime=$CTR_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS & 323 done 324 } 325 326 wait_containers() { 327 local t numcontainers 328 # nap 3s between checks 329 local step=3 330 331 for ((t=0; t<${CTR_POLL_TIMEOUT}; t+=step)); do 332 333 numcontainers=$(sudo -E "${CTR_EXE}" c list -q | wc -l) 334 335 if (( numcontainers >= ${NUM_CONTAINERS} )); then 336 echo "All containers now launched (${t}s)" 337 return 338 else 339 echo "Waiting for containers to launch (${numcontainers} at ${t}s)" 340 fi 341 sleep ${step} 342 done 343 344 echo "Timed out waiting for containers to launch (${t}s)" 345 cleanup 346 die "Timed out waiting for containers to launch (${t}s)" 347 } 348 349 function go() { 350 # Init the json cycle for this save 351 metrics_json_start_array 352 353 # Grab the first set of stats before we run any containers. 354 grab_stats 355 356 launch_containers 357 wait_containers 358 359 if [ $ksm_on == "1" ]; then 360 echo "Wating for KSM to settle..." 361 wait_ksm_settle ${KSM_WAIT_TIME} 362 fi 363 364 grab_stats 365 366 # Wrap up the results array 367 metrics_json_end_array "Results" 368 } 369 370 function show_vars() 371 { 372 echo -e "\nEvironment variables:" 373 echo -e "\tName (default)" 374 echo -e "\t\tDescription" 375 echo -e "\tPAYLOAD (${PAYLOAD})" 376 echo -e "\t\tThe ctr image to run" 377 echo -e "\tPAYLOAD_ARGS (${PAYLOAD_ARGS})" 378 echo -e "\t\tAny extra arguments passed into the docker 'run' command" 379 echo -e "\tPAYLOAD_SLEEP (${PAYLOAD_SLEEP})" 380 echo -e "\t\tSeconds to sleep between launch and measurement, to allow settling" 381 echo -e "\tKSM_WAIT_TIME (${KSM_WAIT_TIME})" 382 echo -e "\t\tSeconds to wait for KSM to settle before we take the final measure" 383 echo -e "\tCTR_POLL_TIMEOUT (${CTR_POLL_TIMEOUT})" 384 echo -e "\t\tSeconds to poll for ctr to finish launching containers" 385 echo -e "\tPARALLELISM (${PARALLELISM})" 386 echo -e "\t\tNumber of containers we launch in parallel" 387 echo -e "\tNUM_CONTAINERS (${NUM_CONTAINERS})" 388 echo -e "\t\tThe total number of containers to run" 389 echo -e "\tMAX_MEMORY_CONSUMED (${MAX_MEMORY_CONSUMED})" 390 echo -e "\t\tThe maximum amount of memory to be consumed before terminating" 391 echo -e "\tMIN_MEMORY_FREE (${MIN_MEMORY_FREE})" 392 echo -e "\t\tThe minimum amount of memory allowed to be free before terminating" 393 echo -e "\tDUMP_CACHES (${DUMP_CACHES})" 394 echo -e "\t\tA flag to note if the system caches should be dumped before capturing stats" 395 echo -e "\tTEST_NAME (${TEST_NAME})" 396 echo -e "\t\tCan be set to over-ride the default JSON results filename" 397 398 } 399 400 function help() 401 { 402 usage=$(cat << EOF 403 Usage: $0 [-h] [options] 404 Description: 405 Launch a series of workloads and take memory metric measurements after 406 each launch. 407 Options: 408 -h, Help page. 409 EOF 410 ) 411 echo "$usage" 412 show_vars 413 } 414 415 function main() { 416 417 local OPTIND 418 while getopts "h" opt;do 419 case ${opt} in 420 h) 421 help 422 exit 0; 423 ;; 424 esac 425 done 426 shift $((OPTIND-1)) 427 428 init 429 go 430 cleanup 431 } 432 433 main "$@"