github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/time/launch_times.sh (about) 1 #!/bin/bash 2 # Copyright (c) 2017-2022 Intel Corporation 3 # 4 # SPDX-License-Identifier: Apache-2.0 5 # 6 # Description of the test: 7 # This test takes a number of time measurements through the complete 8 # launch/shutdown cycle of a single container. 9 # From those measurements it derives a number of time measures, such as: 10 # - time to payload execution 11 # - time to get to VM kernel 12 # - time in VM kernel boot 13 # - time to quit 14 # - total time (from launch to finished) 15 # 16 # Note, the <image> used for this test must support the full 'date' command 17 # syntax - the date from busybox for instance *does not* support this, so 18 # will not work with this test. 19 # 20 # Note, this test launches a single container at a time, that quits - thus, 21 # this test measures times for the 'first container' only. This test does 22 # not look for any scalability slowdowns as the number of running containers 23 # increases for instance - that is handled in other tests 24 25 set -e 26 27 SCRIPT_PATH=$(dirname "$(readlink -f "$0")") 28 source "${SCRIPT_PATH}/../lib/common.bash" 29 30 # Calculating the kernel time from dmesg stamps only really works for VM 31 # based runtimes - we dynamically enable it if we find we are using a known 32 # VM runtime 33 CALCULATE_KERNEL= 34 35 REQUIRED_CMDS=("bc" "awk") 36 37 # set the total number of decimal digits after the decimal point 38 # for representing the calculations results 39 CALC_SCALE=4 40 41 # The date command format we use to ensure we capture the ns timings 42 # Note the no-0-padding - 0 padding the results breaks bc in some cases 43 DATECMD="date -u +%-s:%-N" 44 45 # The modern Linux RNG is extremely fast at generating entropy on demand 46 # and does not need to have as large a store of entropy anymore as the value 47 # of 256 was found to work well with common cryptographic algorithms 48 entropy_level="256" 49 50 # Grabs the number of iterations performed 51 num_iters=0 52 53 # sets to this max number of repetitons for failed runs 54 MAX_REPETITIONS=3 55 56 # The individual results are stored in an array 57 declare -a total_result_ds 58 declare -a to_workload_ds 59 declare -a in_kernel_ds 60 declare -a to_kernel_ds 61 declare -a to_quit_ds 62 # data_is_valid value 1 represent not valid 63 # data_is_valid value 0 represent is valid 64 data_is_valid=0 65 66 check_entropy_level() { 67 retries="10" 68 for i in $(seq 1 "$retries"); do 69 if [ $(cat /proc/sys/kernel/random/entropy_avail) -ge ${entropy_level} ]; then 70 break; 71 fi 72 sleep 1 73 done 74 if [ $(cat /proc/sys/kernel/random/entropy_avail) -lt ${entropy_level} ]; then 75 die "Not enough entropy level to run this test" 76 fi 77 } 78 79 # convert a 'seconds:nanoseconds' string into nanoseconds 80 sn_to_ns() { 81 # !!: Remove 0's from beginning otherwise the number will be converted to octal 82 s=$(echo ${1%:*} | sed 's/^0*//g') 83 ns=$(echo ${1##*:} | sed 's/^0*//g') 84 # use shell magic to strip out the 's' and 'ns' fields and print 85 # them as a 0-padded ns string... 86 printf "%d%09d" ${s} ${ns} 87 } 88 89 # convert 'nanoseconds' (since epoch) into a 'float' seconds 90 ns_to_s() { 91 printf "%.0${CALC_SCALE}f" $(bc <<< "scale=$CALC_SCALE; $1 / 1000000000") 92 } 93 94 run_workload() { 95 # L_CALC_SCALE is set to accounting a significant 96 # number of decimal digits after the decimal points 97 # for 'bc' performing math in kernel period estimation 98 L_CALC_SCALE=13 99 start_time=$($DATECMD) 100 101 # Check entropy level of the host 102 check_entropy_level 103 104 # Run the image and command and capture the results into an array... 105 declare workload_result 106 readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime=${CTR_RUNTIME} ${IMAGE} test bash -c "$DATECMD $DMESGCMD") 107 108 end_time=$($DATECMD) 109 110 # Delay this calculation until after we have run - do not want 111 # to measure it in the results 112 start_time=$(sn_to_ns $start_time) 113 end_time=$(sn_to_ns $end_time) 114 115 # Extract the 'date' info from the first line of the log 116 # This script assumes the VM clock is in sync with the host clock... 117 workload_time=${workload_result[0]} 118 workload_time=$(echo $workload_time | tr -d '\r') 119 workload_time=$(sn_to_ns $workload_time) 120 121 # How long did the whole launch/quit take 122 total_period=$((end_time-start_time)) 123 # How long did it take to get to the workload 124 workload_period=$((workload_time-start_time)) 125 # How long did it take to quit 126 shutdown_period=$((end_time-workload_time)) 127 128 if [ -n "$CALCULATE_KERNEL" ]; then 129 # Grab the last kernel dmesg time 130 # In our case, we need to find the last real kernel line before 131 # the systemd lines begin. The last: 132 # 'Freeing unused kernel' line is a reasonable 133 # 'last in kernel line' to look for. 134 # We make a presumption here that as we are in a cold-boot VM 135 # kernel, the first dmesg is at '0 seconds', so the timestamp 136 # of that last line is the length of time in the kernel. 137 kernel_last_line=$( (fgrep "Freeing unused kernel" <<- EOF 138 ${workload_result[@]} 139 EOF 140 ) | tail -1 ) 141 142 if [ -z "$kernel_last_line" ]; then 143 echo "No kernel last line" 144 for l in "${workload_result[@]}"; do 145 echo ">: [$l]" 146 done 147 die "No kernel last line" 148 fi 149 150 kernel_period=$(echo $kernel_last_line | awk '{print $2}' | tr -d "]") 151 152 # And we can then work out how much time it took to get to the kernel 153 to_kernel_period=$(printf "%f" $(bc <<<"scale=$L_CALC_SCALE; $(ns_to_s $workload_period) - $kernel_period")) 154 else 155 kernel_period="0.0" 156 to_kernel_period="0.0" 157 fi 158 159 total_result="$(ns_to_s $total_period)" 160 to_workload="$(ns_to_s $workload_period)" 161 in_kernel=$kernel_period 162 to_kernel=$to_kernel_period 163 to_quit=$(ns_to_s $shutdown_period) 164 165 tr_is_neg=$(echo $total_result'<='0.0 | bc -l) 166 tw_is_neg=$(echo $to_workload'<='0.0 | bc -l) 167 ik_is_neg=$(echo $in_kernel'<='0.0 | bc -l) 168 tk_is_neg=$(echo $to_kernel'<='0.0 | bc -l) 169 tq_is_neg=$(echo $to_quit'<='0.0 | bc -l) 170 171 data_is_valid=0 172 if [ $tr_is_neg -eq 1 ] || [ $tw_is_neg -eq 1 ] || [ $ik_is_neg -eq 1 ] || [ $tk_is_neg -eq 1 ] || [ $tq_is_neg -eq 1 ]; then 173 data_is_valid=1 174 else 175 # Insert results individually 176 total_result_ds+=($total_result) 177 to_workload_ds+=($to_workload) 178 in_kernel_ds+=($in_kernel) 179 to_kernel_ds+=($to_kernel) 180 to_quit_ds+=($to_quit) 181 fi 182 183 ((num_iters+=1)) 184 185 # If we are doing an (optional) scaling test, then we launch a permanent container 186 # between each of our 'test' containers. The aim being to see if our launch times 187 # are linear with the number of running containers or not 188 if [ -n "$SCALING" ]; then 189 sudo -E "${CTR_EXE}" run --runtime=${CTR_RUNTIME} -d ${IMAGE} test bash -c "tail -f /dev/null" 190 fi 191 } 192 193 # Writes a JSON with the measurements 194 # results per execution 195 write_individual_results() { 196 for i in "${!total_result_ds[@]}"; do 197 local json="$(cat << EOF 198 { 199 "total": { 200 "Result": ${total_result_ds[i]}, 201 "Units": "s" 202 }, 203 "to-workload": { 204 "Result": ${to_workload_ds[i]}, 205 "Units": "s" 206 }, 207 "in-kernel": { 208 "Result": ${in_kernel_ds[i]}, 209 "Units": "s" 210 }, 211 "to-kernel": { 212 "Result": ${to_kernel_ds[i]}, 213 "Units": "s" 214 }, 215 "to-quit": { 216 "Result": ${to_quit_ds[i]}, 217 "Units": "s" 218 } 219 } 220 EOF 221 )" 222 metrics_json_add_array_element "$json" 223 done 224 } 225 226 init () { 227 TEST_ARGS="image=${IMAGE} runtime=${CTR_RUNTIME} units=seconds" 228 229 # We set the generic name here, but we save the different time results separately, 230 # and append the actual detail to the name at the time of saving... 231 TEST_NAME="boot times" 232 233 # If we are scaling, note that in the name 234 [ -n "$SCALING" ] && TEST_NAME="${TEST_NAME} scaling" 235 236 echo "Executing test: ${TEST_NAME} ${TEST_ARGS}" 237 check_cmds "${REQUIRED_CMDS[@]}" 238 239 # For non-VM runtimes, we don't use the output of dmesg, and 240 # we have seen it cause some test instabilities, so do not invo> 241 # it if not needed. 242 if [ "${CTR_RUNTIME}" == "io.containerd.runc.v2" ]; then 243 DMESGCMD="" 244 else 245 CALCULATE_KERNEL=1 246 DMESGCMD="; dmesg" 247 fi 248 249 # Start from a fairly clean environment 250 init_env 251 check_images "$IMAGE" 252 } 253 254 # Computes the average of the data 255 calc_avg_array() { 256 data=("$@") 257 avg=0 258 LSCALE=6 259 size="${#data[@]}" 260 261 [ -z "$data" ] && die "List of results was not passed to the calc_avg_array() function when trying to calculate the average result." 262 [ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the average result." 263 264 sum=$(IFS='+'; echo "scale=4; ${data[*]}" | bc) 265 avg=$(echo "scale=$LSCALE; $sum / $size" | bc) 266 printf "%.0${CALC_SCALE}f" $avg 267 } 268 269 270 # Computes the standard deviation of the data 271 calc_sd_array() { 272 data=("$@") 273 sum_sqr_n=0 274 size=${#data[@]} 275 276 # LSCALE is the scale used for calculations in the middle 277 # CALC_SCALE is the scale used for the result 278 LSCALE=13 279 CALC_SCALE=6 280 281 [ -z "$data" ] && die "List results was not passed to the calc_sd_result() function when trying to calculate the standard deviation result." 282 [ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the standard deviation result." 283 284 285 # [1] sum data 286 sum_data=$(IFS='+'; echo "scale=$LSCALE; ${data[*]}" | bc) 287 288 # [2] square the sum of data 289 pow_2_sum_data=$(echo "scale=$LSCALE; $sum_data ^ 2" | bc) 290 291 # [3] divide the square of data by the num of items 292 div_sqr_n=$(echo "scale=$LSCALE; $pow_2_sum_data / $size" | bc) 293 294 # [4] Sum of the sqr of each item 295 for i in "${data[@]}"; do 296 sqr_n=$(echo "scale=$LSCALE; $i ^ 2" | bc) 297 sum_sqr_n=$(echo "scale=$LSCALE; $sqr_n + $sum_sqr_n" | bc) 298 done 299 300 # substract [4] from [3] 301 subs=$(echo "scale=$LSCALE; $sum_sqr_n - $div_sqr_n" | bc) 302 303 # get variance 304 var=$(echo "scale=$LSCALE; $subs / $size" | bc) 305 306 # get standard deviation 307 sd=$(echo "scale=$LSCALE; sqrt($var)" | bc) 308 309 # if sd is zero, limit the decimal scale to 1 digit 310 sd_is_zero=$(echo $sd'=='0.0 | bc -l) 311 [ $sd_is_zero -eq 1 ] && CALC_SCALE=1 312 313 printf "%.0${CALC_SCALE}f" $sd 314 } 315 316 # Computes the Coefficient of variation. 317 # The result is given as percentage. 318 calc_cov_array() { 319 sd=$1 320 mean=$2 321 322 # LSCALE used for consider more decimals digits than usual in cov estimation. 323 # CALC_SCALE is the scale used to return the result. 324 LSCALE=13 325 CALC_SCALE=6 326 327 mean_is_zero=$(echo $mean'=='0.0 | bc -l) 328 329 [ -z "$sd" ] && die "Standard deviation was not passed to the calc_cov_array() function when trying to calculate the CoV result." 330 [ -z "$mean" ] && die "Mean was not passed to the calc_cov_array() function when trying to calculate the CoV result." 331 [ $mean_is_zero -eq 1 ] && die "Division by zero: Mean value passed is 0 when trying to get CoV result." 332 333 cov=$(echo "scale=$LSCALE; $sd / $mean" | bc) 334 cov=$(echo "scale=$LSCALE; $cov * 100" | bc) 335 336 # if cov is zero, limit the decimal scale to 1 digit 337 cov_is_zero=$(echo $cov'=='0.0 | bc -l) 338 [ $cov_is_zero -eq 1 ] && CALC_SCALE=1 339 340 printf "%.0${CALC_SCALE}f" $cov 341 } 342 343 # Writes a JSON with the statistics results 344 # for each launch time metric 345 write_stats_results() { 346 size="${#total_result_ds[@]}" 347 avg_total_result=$(calc_avg_array "${total_result_ds[@]}") 348 avg_to_workload=$(calc_avg_array "${to_workload_ds[@]}") 349 avg_in_kernel=$(calc_avg_array "${in_kernel_ds[@]}") 350 avg_to_kernel=$(calc_avg_array "${to_kernel_ds[@]}") 351 avg_to_quit=$(calc_avg_array "${to_quit_ds[@]}") 352 353 sd_total_result=$(calc_sd_array "${total_result_ds[@]}") 354 sd_to_workload=$(calc_sd_array "${to_workload_ds[@]}") 355 sd_in_kernel=$(calc_sd_array "${in_kernel_ds[@]}") 356 sd_to_kernel=$(calc_sd_array "${to_kernel_ds[@]}") 357 sd_to_quit=$(calc_sd_array "${to_quit_ds[@]}") 358 359 cov_total_result=$(calc_cov_array ${sd_total_result} ${avg_total_result}) 360 cov_to_workload=$(calc_cov_array ${sd_to_workload} ${avg_to_workload}) 361 cov_in_kernel=$(calc_cov_array ${sd_in_kernel} ${avg_in_kernel}) 362 cov_to_kernel=$(calc_cov_array ${sd_to_kernel} ${avg_to_kernel}) 363 cov_to_quit=$(calc_cov_array ${sd_to_quit} ${avg_to_quit}) 364 365 local json="$(cat << EOF 366 { 367 "size": $size, 368 "total": { 369 "avg": $avg_total_result, 370 "sd": $sd_total_result, 371 "cov": $cov_total_result 372 }, 373 "to-workload": { 374 "avg": $avg_to_workload, 375 "sd": $sd_to_workload, 376 "cov": $cov_to_workload 377 }, 378 "in-kernel": { 379 "avg": $avg_in_kernel, 380 "sd": $sd_in_kernel, 381 "cov": $cov_in_kernel 382 }, 383 "to-kernel_avg": { 384 "avg": $avg_to_kernel, 385 "sd": $sd_to_kernel, 386 "cov": $cov_to_kernel 387 }, 388 "to-quit": { 389 "avg": $avg_to_quit, 390 "sd": $sd_to_quit, 391 "cov": $cov_to_quit 392 } 393 } 394 EOF 395 )" 396 metrics_json_add_array_element "$json" 397 } 398 399 help() { 400 usage=$(cat << EOF 401 Usage: $0 [-h] [options] 402 Description: 403 This script takes time measurements for different 404 stages of a boot/run/rm cycle 405 Options: 406 -h, Help 407 -i <name>, Image name (mandatory) 408 -n <n>, Number of containers to run (mandatory) 409 -s, Enable scaling (keep containers running) 410 EOF 411 ) 412 echo "$usage" 413 } 414 415 main() { 416 local OPTIND 417 while getopts "dhi:n:s" opt;do 418 case ${opt} in 419 h) 420 help 421 exit 0; 422 ;; 423 i) 424 IMAGE="${OPTARG}" 425 ;; 426 n) 427 TIMES="${OPTARG}" 428 ;; 429 s) 430 SCALING=true 431 ;; 432 ?) 433 # parse failure 434 help 435 die "Failed to parse arguments" 436 ;; 437 esac 438 done 439 shift $((OPTIND-1)) 440 441 [ -z "$IMAGE" ] && help && die "Mandatory IMAGE name not supplied" 442 [ -z "$TIMES" ] && help && die "Mandatory nunmber of containers not supplied" 443 # Although this is mandatory, the 'lib/common.bash' environment can set 444 # it, so we may not fail if it is not set on the command line... 445 [ -z "$RUNTIME" ] && help && die "Mandatory runtime argument not supplied" 446 447 init 448 j=0 449 max_reps=$MAX_REPETITIONS 450 451 while [ $j -lt $TIMES ]; do 452 453 echo " run $num_iters" 454 run_workload 455 456 if [ $data_is_valid -eq 0 ]; then 457 j=$(( j + 1 )) 458 # if valid result then reset 'max_reps' to initial value 459 max_reps=$MAX_REPETITIONS 460 continue 461 fi 462 463 echo "Skipping run due to invalid result" 464 ((max_reps-=1)) 465 466 if [ $max_reps -lt 0 ]; then 467 die "Max. num of repetitions reached for run: $j" 468 fi 469 done 470 471 metrics_json_init 472 metrics_json_start_array 473 write_stats_results 474 metrics_json_end_array "Statistics" 475 metrics_json_start_array 476 write_individual_results 477 metrics_json_end_array "Results" 478 metrics_json_save 479 clean_env_ctr 480 } 481 482 main "$@"