github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/metrics/density/fast_footprint.sh (about) 1 #!/bin/bash 2 # Copyright (c) 2017-2018, 2021 Intel Corporation 3 # 4 # SPDX-License-Identifier: Apache-2.0 5 # 6 # A script to gather memory 'footprint' information as we launch more 7 # and more containers 8 # 9 # The script gathers information about both user and kernel space consumption 10 # Output is into a .json file, named using some of the config component names 11 # (such as footprint-busybox.json) 12 13 # Pull in some common, useful, items 14 SCRIPT_PATH=$(dirname "$(readlink -f "$0")") 15 source "${SCRIPT_PATH}/../lib/common.bash" 16 17 # Note that all vars that can be set from outside the script (that is, 18 # passed in the ENV), use the ':-' setting to allow being over-ridden 19 20 # Default sleep, in seconds, to let containers come up and finish their 21 # initialisation before we take the measures. Some of the larger 22 # containers can take a number of seconds to get running. 23 PAYLOAD_SLEEP="${PAYLOAD_SLEEP:-10}" 24 25 # How long, in seconds, do we wait for KSM to 'settle down', before we 26 # timeout and just continue anyway. 27 KSM_WAIT_TIME="${KSM_WAIT_TIME:-300}" 28 29 # How long, in seconds, do we poll for ctr to complete launching all the 30 # containers? 31 CTR_POLL_TIMEOUT="${CTR_POLL_TIMEOUT:-300}" 32 33 # How many containers do we launch in parallel before taking the PAYLOAD_SLEEP 34 # nap 35 PARALLELISM="${PARALLELISM:-10}" 36 37 ### The default config - run a small busybox image 38 # Define what we will be running (app under test) 39 # Default is we run busybox, as a 'small' workload 40 PAYLOAD="${PAYLOAD:-quay.io/prometheus/busybox:latest}" 41 PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" 42 PAYLOAD_RUNTIME_ARGS="${PAYLOAD_RUNTIME_ARGS:---memory-limit 5120}" 43 44 ### 45 # which RUNTIME we use is picked up from the env in 46 # common.bash. You can over-ride by setting RUNTIME in your env 47 48 ### 49 # Define the cutoff checks for when we stop running the test 50 # Run up to this many containers 51 NUM_CONTAINERS="${NUM_CONTAINERS:-200}" 52 # Run until we have consumed this much memory (from MemFree) 53 MAX_MEMORY_CONSUMED="${MAX_MEMORY_CONSUMED:-256*1024*1024*1024}" 54 # Run until we have this much MemFree left 55 MIN_MEMORY_FREE="${MIN_MEMORY_FREE:-2*1024*1024*1024}" 56 57 # Tools we need to have installed in order to operate 58 REQUIRED_COMMANDS="smem awk" 59 60 # If we 'dump' the system caches before we measure then we get less 61 # noise in the results - they show more what our un-reclaimable footprint is 62 DUMP_CACHES="${DUMP_CACHES:-1}" 63 64 # Affects the name of the file to store the results in 65 TEST_NAME="${TEST_NAME:-fast-footprint-busybox}" 66 67 ############# end of configurable items ################### 68 69 # vars to remember where we started so we can calc diffs 70 base_mem_avail=0 71 base_mem_free=0 72 73 # dump the kernel caches, so we get a more precise (or just different) 74 # view of what our footprint really is. 75 function dump_caches() { 76 sudo bash -c "echo 3 > /proc/sys/vm/drop_caches" 77 } 78 79 function init() { 80 sudo systemctl restart containerd 81 clean_env_ctr 82 83 CONTAINERD_RUNTIME="io.containerd.kata.v2" 84 check_cmds $REQUIRED_COMMANDS 85 sudo ctr image pull "$PAYLOAD" 86 87 # Modify the test name if running with KSM enabled 88 check_for_ksm 89 90 # Use the common init func to get to a known state 91 init_env 92 93 # Prepare to start storing results 94 metrics_json_init 95 96 # Store up baseline measures 97 base_mem_avail=$(free -b | head -2 | tail -1 | awk '{print $7}') 98 base_mem_free=$(get_memfree) 99 100 # Store our configuration for this run 101 save_config 102 } 103 104 save_config(){ 105 metrics_json_start_array 106 107 local json="$(cat << EOF 108 { 109 "testname": "${TEST_NAME}", 110 "payload": "${PAYLOAD}", 111 "payload_args": "${PAYLOAD_ARGS}", 112 "payload_runtime_args": "${PAYLOAD_RUNTIME_ARGS}", 113 "payload_sleep": ${PAYLOAD_SLEEP}, 114 "ksm_settle_time": ${KSM_WAIT_TIME}, 115 "num_containers": ${NUM_CONTAINERS}, 116 "parallelism": ${PARALLELISM}, 117 "max_memory_consumed": "${MAX_MEMORY_CONSUMED}", 118 "min_memory_free": "${MIN_MEMORY_FREE}", 119 "dump_caches": "${DUMP_CACHES}" 120 } 121 EOF 122 )" 123 metrics_json_add_array_element "$json" 124 metrics_json_end_array "Config" 125 } 126 127 function cleanup() { 128 # Finish storing the results 129 metrics_json_save 130 131 clean_env_ctr 132 } 133 134 # helper function to get USS of process in arg1 135 function get_proc_uss() { 136 item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $4}') 137 ((item*=1024)) 138 echo $item 139 } 140 141 # helper function to get PSS of process in arg1 142 function get_proc_pss() { 143 item=$(sudo smem -t -P "^$1" | tail -1 | awk '{print $5}') 144 ((item*=1024)) 145 echo $item 146 } 147 148 # Get the PSS for the whole of userspace (all processes) 149 # This allows us to see if we had any impact on the rest of the system, for instance 150 # dockerd grows as we launch containers, so we should account for that in our total 151 # memory breakdown 152 function grab_all_pss() { 153 item=$(sudo smem -t | tail -1 | awk '{print $5}') 154 ((item*=1024)) 155 156 local json="$(cat << EOF 157 "all_pss": { 158 "pss": $item, 159 "Units": "KB" 160 } 161 EOF 162 )" 163 164 metrics_json_add_array_fragment "$json" 165 } 166 167 function grab_user_smem() { 168 # userspace 169 item=$(sudo smem -w | head -5 | tail -1 | awk '{print $3}') 170 ((item*=1024)) 171 172 local json="$(cat << EOF 173 "user_smem": { 174 "userspace": $item, 175 "Units": "KB" 176 } 177 EOF 178 )" 179 180 metrics_json_add_array_fragment "$json" 181 } 182 183 function grab_slab() { 184 # Grabbing slab total from meminfo is easier than doing the math 185 # on slabinfo 186 item=$(fgrep "Slab:" /proc/meminfo | awk '{print $2}') 187 ((item*=1024)) 188 189 local json="$(cat << EOF 190 "slab": { 191 "slab": $item, 192 "Units": "KB" 193 } 194 EOF 195 )" 196 197 metrics_json_add_array_fragment "$json" 198 } 199 200 function get_memfree() { 201 mem_free=$(sudo smem -w | head -6 | tail -1 | awk '{print $4}') 202 ((mem_free*=1024)) 203 echo $mem_free 204 } 205 206 function grab_system() { 207 208 # avail memory, from 'free' 209 local avail=$(free -b | head -2 | tail -1 | awk '{print $7}') 210 local avail_decr=$((base_mem_avail-avail)) 211 212 # cached memory, from 'free' 213 local cached=$(free -b | head -2 | tail -1 | awk '{print $6}') 214 215 # free memory from smem 216 local smem_free=$(get_memfree) 217 local free_decr=$((base_mem_free-item)) 218 219 # Anon pages 220 local anon=$(fgrep "AnonPages:" /proc/meminfo | awk '{print $2}') 221 ((anon*=1024)) 222 223 # Mapped pages 224 local mapped=$(egrep "^Mapped:" /proc/meminfo | awk '{print $2}') 225 ((mapped*=1024)) 226 227 # Cached 228 local meminfo_cached=$(grep "^Cached:" /proc/meminfo | awk '{print $2}') 229 ((meminfo_cached*=1024)) 230 231 local json="$(cat << EOF 232 "system": { 233 "avail": $avail, 234 "avail_decr": $avail_decr, 235 "cached": $cached, 236 "smem_free": $smem_free, 237 "free_decr": $free_decr, 238 "anon": $anon, 239 "mapped": $mapped, 240 "meminfo_cached": $meminfo_cached, 241 "Units": "KB" 242 } 243 EOF 244 )" 245 246 metrics_json_add_array_fragment "$json" 247 } 248 249 function grab_stats() { 250 # If configured, dump the caches so we get a more stable 251 # view of what our static footprint really is 252 if [[ "$DUMP_CACHES" ]] ; then 253 dump_caches 254 fi 255 256 # user space data 257 # PSS taken all userspace 258 grab_all_pss 259 # user as reported by smem 260 grab_user_smem 261 262 # System overview data 263 # System free and cached 264 grab_system 265 266 # kernel data 267 # The 'total kernel space taken' we can work out as: 268 # ktotal = ((free-avail)-user) 269 # So, we don't grab that number from smem, as that is what it does 270 # internally anyhow. 271 # Still try to grab any finer kernel details that we can though 272 273 # totals from slabinfo 274 grab_slab 275 276 metrics_json_close_array_element 277 } 278 279 function check_limits() { 280 mem_free=$(get_memfree) 281 if ((mem_free <= MIN_MEMORY_FREE)); then 282 echo 1 283 return 284 fi 285 286 mem_consumed=$((base_mem_avail-mem_free)) 287 if ((mem_consumed >= MAX_MEMORY_CONSUMED)); then 288 echo 1 289 return 290 fi 291 292 echo 0 293 } 294 295 launch_containers() { 296 local parloops leftovers 297 298 (( parloops=${NUM_CONTAINERS}/${PARALLELISM} )) 299 (( leftovers=${NUM_CONTAINERS} - (${parloops}*${PARALLELISM}) )) 300 301 echo "Launching ${parloops}x${PARALLELISM} containers + ${leftovers} etras" 302 303 containers=() 304 305 local iter n 306 for iter in $(seq 1 $parloops); do 307 echo "Launch iteration ${iter}" 308 for n in $(seq 1 $PARALLELISM); do 309 containers+=($(random_name)) 310 sudo ctr run $PAYLOAD_RUNTIME_ARGS -d --runtime=$CONTAINERD_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS & 311 done 312 313 if [[ $PAYLOAD_SLEEP ]]; then 314 sleep $PAYLOAD_SLEEP 315 fi 316 317 # check if we have hit one of our limits and need to wrap up the tests 318 if (($(check_limits))); then 319 echo "Ran out of resources, check_limits failed" 320 return 321 fi 322 done 323 324 for n in $(seq 1 $leftovers); do 325 containers+=($(random_name)) 326 sudo ctr run $PAYLOAD_RUNTIME_ARGS -d --runtime=$CONTAINERD_RUNTIME $PAYLOAD ${containers[-1]} sh -c $PAYLOAD_ARGS & 327 done 328 } 329 330 wait_containers() { 331 local t numcontainers 332 # nap 3s between checks 333 local step=3 334 335 for ((t=0; t<${CTR_POLL_TIMEOUT}; t+=step)); do 336 337 numcontainers=$(sudo ctr c list -q | wc -l) 338 339 if (( numcontainers >= ${NUM_CONTAINERS} )); then 340 echo "All containers now launched (${t}s)" 341 return 342 else 343 echo "Waiting for containers to launch (${numcontainers} at ${t}s)" 344 fi 345 sleep ${step} 346 done 347 348 echo "Timed out waiting for containers to launch (${t}s)" 349 cleanup 350 die "Timed out waiting for containers to launch (${t}s)" 351 } 352 353 function go() { 354 # Init the json cycle for this save 355 metrics_json_start_array 356 357 # Grab the first set of stats before we run any containers. 358 grab_stats 359 360 launch_containers 361 wait_containers 362 363 if [ $ksm_on == "1" ]; then 364 echo "Wating for KSM to settle..." 365 wait_ksm_settle ${KSM_WAIT_TIME} 366 fi 367 368 grab_stats 369 370 # Wrap up the results array 371 metrics_json_end_array "Results" 372 } 373 374 function show_vars() 375 { 376 echo -e "\nEvironment variables:" 377 echo -e "\tName (default)" 378 echo -e "\t\tDescription" 379 echo -e "\tPAYLOAD (${PAYLOAD})" 380 echo -e "\t\tThe ctr image to run" 381 echo -e "\tPAYLOAD_ARGS (${PAYLOAD_ARGS})" 382 echo -e "\t\tAny arguments passed into the ctr image" 383 echo -e "\tPAYLOAD_RUNTIME_ARGS (${PAYLOAD_RUNTIME_ARGS})" 384 echo -e "\t\tAny extra arguments passed into the docker 'run' command" 385 echo -e "\tPAYLOAD_SLEEP (${PAYLOAD_SLEEP})" 386 echo -e "\t\tSeconds to sleep between launch and measurement, to allow settling" 387 echo -e "\tKSM_WAIT_TIME (${KSM_WAIT_TIME})" 388 echo -e "\t\tSeconds to wait for KSM to settle before we take the final measure" 389 echo -e "\tCTR_POLL_TIMEOUT (${CTR_POLL_TIMEOUT})" 390 echo -e "\t\tSeconds to poll for ctr to finish launching containers" 391 echo -e "\tPARALLELISM (${PARALLELISM})" 392 echo -e "\t\tNumber of containers we launch in parallel" 393 echo -e "\tNUM_CONTAINERS (${NUM_CONTAINERS})" 394 echo -e "\t\tThe total number of containers to run" 395 echo -e "\tMAX_MEMORY_CONSUMED (${MAX_MEMORY_CONSUMED})" 396 echo -e "\t\tThe maximum amount of memory to be consumed before terminating" 397 echo -e "\tMIN_MEMORY_FREE (${MIN_MEMORY_FREE})" 398 echo -e "\t\tThe minimum amount of memory allowed to be free before terminating" 399 echo -e "\tDUMP_CACHES (${DUMP_CACHES})" 400 echo -e "\t\tA flag to note if the system caches should be dumped before capturing stats" 401 echo -e "\tTEST_NAME (${TEST_NAME})" 402 echo -e "\t\tCan be set to over-ride the default JSON results filename" 403 404 } 405 406 function help() 407 { 408 usage=$(cat << EOF 409 Usage: $0 [-h] [options] 410 Description: 411 Launch a series of workloads and take memory metric measurements after 412 each launch. 413 Options: 414 -h, Help page. 415 EOF 416 ) 417 echo "$usage" 418 show_vars 419 } 420 421 function main() { 422 423 local OPTIND 424 while getopts "h" opt;do 425 case ${opt} in 426 h) 427 help 428 exit 0; 429 ;; 430 esac 431 done 432 shift $((OPTIND-1)) 433 434 init 435 go 436 cleanup 437 } 438 439 main "$@"