github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/time/launch_times.sh (about)

     1  #!/bin/bash
     2  # Copyright (c) 2017-2022 Intel Corporation
     3  #
     4  # SPDX-License-Identifier: Apache-2.0
     5  #
     6  #  Description of the test:
     7  #  This test takes a number of time measurements through the complete
     8  #  launch/shutdown cycle of a single container.
     9  #  From those measurements it derives a number of time measures, such as:
    10  #   - time to payload execution
    11  #   - time to get to VM kernel
    12  #   - time in VM kernel boot
    13  #   - time to quit
    14  #   - total time (from launch to finished)
    15  #
    16  # Note, the <image> used for this test must support the full 'date' command
    17  # syntax - the date from busybox for instance *does not* support this, so
    18  # will not work with this test.
    19  #
    20  # Note, this test launches a single container at a time, that quits - thus,
    21  # this test measures times for the 'first container' only. This test does
    22  # not look for any scalability slowdowns as the number of running containers
    23  # increases for instance - that is handled in other tests
    24  
    25  set -e
    26  
    27  SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
    28  source "${SCRIPT_PATH}/../lib/common.bash"
    29  
    30  # Calculating the kernel time from dmesg stamps only really works for VM
    31  # based runtimes - we dynamically enable it if we find we are using a known
    32  # VM runtime
    33  CALCULATE_KERNEL=
    34  
    35  REQUIRED_CMDS=("bc" "awk")
    36  
    37  # set the total number of decimal digits after the decimal point 
    38  # for representing the calculations results
    39  CALC_SCALE=4
    40  
    41  # The date command format we use to ensure we capture the ns timings
    42  # Note the no-0-padding - 0 padding the results breaks bc in some cases
    43  DATECMD="date -u +%-s:%-N"
    44  
    45  # The modern Linux RNG is extremely fast at generating entropy on demand
    46  # and does not need to have as large a store of entropy anymore as the value
    47  # of 256 was found to work well with common cryptographic algorithms
    48  entropy_level="256"
    49  
    50  # Grabs the number of iterations performed
    51  num_iters=0
    52  
    53  # sets to this max number of repetitons for failed runs
    54  MAX_REPETITIONS=3
    55  
    56  # The individual results are stored in an array
    57  declare -a total_result_ds
    58  declare -a to_workload_ds
    59  declare -a in_kernel_ds
    60  declare -a to_kernel_ds
    61  declare -a to_quit_ds
    62  # data_is_valid value 1 represent not valid
    63  # data_is_valid value 0 represent is valid
    64  data_is_valid=0
    65  
    66  check_entropy_level() {
    67  	retries="10"
    68  	for i in $(seq 1 "$retries"); do
    69  		if [ $(cat /proc/sys/kernel/random/entropy_avail) -ge ${entropy_level} ]; then
    70  			break;
    71  		fi
    72  		sleep 1
    73  	done
    74  	if [ $(cat /proc/sys/kernel/random/entropy_avail) -lt ${entropy_level} ]; then
    75  		die "Not enough entropy level to run this test"
    76  	fi
    77  }
    78  
    79  # convert a 'seconds:nanoseconds' string into nanoseconds
    80  sn_to_ns() {
    81  	# !!: Remove 0's from beginning otherwise the number will be converted to octal
    82  	s=$(echo ${1%:*} | sed 's/^0*//g')
    83  	ns=$(echo ${1##*:} | sed 's/^0*//g')
    84  	# use shell magic to strip out the 's' and 'ns' fields and print
    85  	# them as a 0-padded ns string...
    86  	printf "%d%09d" ${s} ${ns}
    87  }
    88  
    89  # convert 'nanoseconds' (since epoch) into a 'float' seconds
    90  ns_to_s() {
    91  	printf "%.0${CALC_SCALE}f" $(bc <<< "scale=$CALC_SCALE; $1 / 1000000000")
    92  }
    93  
    94  run_workload() {
    95  	# L_CALC_SCALE is set to accounting a significant
    96  	# number of decimal digits after the decimal points
    97  	# for 'bc' performing math in kernel period estimation
    98  	L_CALC_SCALE=13
    99  	start_time=$($DATECMD)
   100  
   101  	# Check entropy level of the host
   102  	check_entropy_level
   103  
   104  	# Run the image and command and capture the results into an array...
   105  	declare workload_result
   106  	readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime=${CTR_RUNTIME} ${IMAGE} test bash -c "$DATECMD $DMESGCMD")
   107  
   108  	end_time=$($DATECMD)
   109  
   110  	# Delay this calculation until after we have run - do not want
   111  	# to measure it in the results
   112  	start_time=$(sn_to_ns $start_time)
   113  	end_time=$(sn_to_ns $end_time)
   114  
   115  	# Extract the 'date' info from the first line of the log
   116  	# This script assumes the VM clock is in sync with the host clock...
   117  	workload_time=${workload_result[0]}
   118  	workload_time=$(echo $workload_time | tr -d '\r')
   119  	workload_time=$(sn_to_ns $workload_time)
   120  
   121  	# How long did the whole launch/quit take
   122  	total_period=$((end_time-start_time))
   123  	# How long did it take to get to the workload
   124  	workload_period=$((workload_time-start_time))
   125  	# How long did it take to quit
   126  	shutdown_period=$((end_time-workload_time))
   127  
   128  	if [ -n "$CALCULATE_KERNEL" ]; then
   129  		# Grab the last kernel dmesg time
   130  		# In our case, we need to find the last real kernel line before
   131  		# the systemd lines begin. The last:
   132  		# 'Freeing unused kernel' line is a reasonable
   133  		# 'last in kernel line' to look for.
   134  		# We make a presumption here that as we are in a cold-boot VM
   135  		# kernel, the first dmesg is at '0 seconds', so the timestamp
   136  		# of that last line is the length of time in the kernel.
   137  		kernel_last_line=$( (fgrep "Freeing unused kernel" <<- EOF
   138  				${workload_result[@]}
   139  			EOF
   140  			) | tail -1 )
   141  
   142  		if [ -z "$kernel_last_line" ]; then
   143  			echo "No kernel last line"
   144  			for l in "${workload_result[@]}"; do
   145  				echo ">: [$l]"
   146  			done
   147  			die "No kernel last line"
   148  		fi
   149  
   150  		kernel_period=$(echo $kernel_last_line | awk '{print $2}' | tr -d "]")
   151  
   152  		# And we can then work out how much time it took to get to the kernel
   153  		to_kernel_period=$(printf "%f" $(bc <<<"scale=$L_CALC_SCALE; $(ns_to_s $workload_period) - $kernel_period"))
   154  	else
   155  		kernel_period="0.0"
   156  		to_kernel_period="0.0"
   157  	fi
   158  
   159  	total_result="$(ns_to_s $total_period)"
   160  	to_workload="$(ns_to_s $workload_period)"
   161  	in_kernel=$kernel_period
   162  	to_kernel=$to_kernel_period
   163  	to_quit=$(ns_to_s $shutdown_period)
   164  
   165  	tr_is_neg=$(echo $total_result'<='0.0 | bc -l)
   166  	tw_is_neg=$(echo $to_workload'<='0.0 | bc -l)
   167  	ik_is_neg=$(echo $in_kernel'<='0.0 | bc -l)
   168  	tk_is_neg=$(echo $to_kernel'<='0.0 | bc -l)
   169  	tq_is_neg=$(echo $to_quit'<='0.0 | bc -l)
   170  
   171  	data_is_valid=0
   172  	if [ $tr_is_neg -eq 1 ] || [ $tw_is_neg -eq 1 ] || [ $ik_is_neg -eq 1 ] || [ $tk_is_neg -eq 1 ] || [ $tq_is_neg -eq 1 ]; then
   173  		data_is_valid=1
   174  	else
   175  		# Insert results individually
   176  		total_result_ds+=($total_result)
   177  		to_workload_ds+=($to_workload)
   178  		in_kernel_ds+=($in_kernel)
   179  		to_kernel_ds+=($to_kernel)
   180  		to_quit_ds+=($to_quit)
   181  	fi
   182  
   183  	((num_iters+=1))
   184  
   185  	# If we are doing an (optional) scaling test, then we launch a permanent container
   186  	# between each of our 'test' containers. The aim being to see if our launch times
   187  	# are linear with the number of running containers or not
   188  	if [ -n "$SCALING" ]; then
   189  		sudo -E "${CTR_EXE}" run --runtime=${CTR_RUNTIME} -d ${IMAGE} test bash -c "tail -f /dev/null"
   190  	fi
   191  }
   192  
   193  # Writes a JSON  with the measurements
   194  # results per execution
   195  write_individual_results() {
   196  	for i in "${!total_result_ds[@]}"; do
   197  		local json="$(cat << EOF
   198  	{
   199  		"total": {
   200  			"Result": ${total_result_ds[i]},
   201  			"Units": "s"
   202  		},
   203  		"to-workload": {
   204  			"Result": ${to_workload_ds[i]},
   205  			"Units": "s"
   206  		},
   207  		"in-kernel": {
   208  			"Result": ${in_kernel_ds[i]},
   209  			"Units": "s"
   210  		},
   211  		"to-kernel": {
   212  			"Result": ${to_kernel_ds[i]},
   213  			"Units": "s"
   214  		},
   215  		"to-quit": {
   216  			"Result": ${to_quit_ds[i]},
   217  			"Units": "s"
   218  		}
   219  	}
   220  EOF
   221  )"
   222  		metrics_json_add_array_element "$json"
   223  	done
   224  }
   225  
   226  init () {
   227  	TEST_ARGS="image=${IMAGE} runtime=${CTR_RUNTIME} units=seconds"
   228  
   229  	# We set the generic name here, but we save the different time results separately,
   230  	# and append the actual detail to the name at the time of saving...
   231  	TEST_NAME="boot times"
   232  
   233  	# If we are scaling, note that in the name
   234  	[ -n "$SCALING" ] && TEST_NAME="${TEST_NAME} scaling"
   235  
   236  	echo "Executing test: ${TEST_NAME} ${TEST_ARGS}"
   237  	check_cmds "${REQUIRED_CMDS[@]}"
   238  
   239  	# For non-VM runtimes, we don't use the output of dmesg, and
   240  	# we have seen it cause some test instabilities, so do not invo>
   241  	# it if not needed.
   242  	if [ "${CTR_RUNTIME}" == "io.containerd.runc.v2" ]; then
   243  		DMESGCMD=""
   244  	else
   245  		CALCULATE_KERNEL=1
   246  		DMESGCMD="; dmesg"
   247  	fi
   248  
   249  	# Start from a fairly clean environment
   250  	init_env
   251  	check_images "$IMAGE"
   252  }
   253  
   254  # Computes the average of the data
   255  calc_avg_array() {
   256  	data=("$@")
   257  	avg=0
   258  	LSCALE=6
   259  	size="${#data[@]}"
   260  
   261  	[ -z "$data" ] && die "List of results was not passed to the calc_avg_array() function when trying to calculate the average result."
   262  	[ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the average result."
   263  
   264  	sum=$(IFS='+'; echo "scale=4; ${data[*]}" | bc)
   265  	avg=$(echo "scale=$LSCALE; $sum / $size" | bc)
   266  	printf "%.0${CALC_SCALE}f" $avg
   267  }
   268  
   269  
   270  # Computes the standard deviation of the data
   271  calc_sd_array() {
   272  	data=("$@")
   273  	sum_sqr_n=0
   274  	size=${#data[@]}
   275  
   276  	# LSCALE is the scale used for calculations in the middle
   277  	# CALC_SCALE is the scale used for the result
   278  	LSCALE=13
   279  	CALC_SCALE=6
   280  
   281  	[ -z "$data" ] && die "List results was not passed to the calc_sd_result() function when trying to calculate the standard deviation result."
   282  	[ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the standard deviation result."
   283  
   284  
   285  	# [1] sum data
   286  	sum_data=$(IFS='+'; echo "scale=$LSCALE; ${data[*]}" | bc)
   287  
   288  	# [2] square the sum of data
   289  	pow_2_sum_data=$(echo "scale=$LSCALE; $sum_data ^ 2" | bc)
   290  
   291  	# [3] divide the square of data by the num of items
   292  	div_sqr_n=$(echo "scale=$LSCALE; $pow_2_sum_data / $size" | bc)
   293  
   294  	# [4] Sum of the sqr of each item
   295  	for i in "${data[@]}"; do
   296  		sqr_n=$(echo "scale=$LSCALE; $i ^ 2" | bc)
   297  		sum_sqr_n=$(echo "scale=$LSCALE; $sqr_n + $sum_sqr_n" | bc)
   298  	done
   299  
   300  	# substract [4] from [3]
   301  	subs=$(echo "scale=$LSCALE; $sum_sqr_n - $div_sqr_n" | bc)
   302  
   303  	# get variance
   304  	var=$(echo "scale=$LSCALE; $subs / $size" | bc)
   305  
   306  	# get standard deviation
   307  	sd=$(echo "scale=$LSCALE; sqrt($var)" | bc)
   308  
   309  	# if sd is zero, limit the decimal scale to 1 digit
   310  	sd_is_zero=$(echo $sd'=='0.0 | bc -l)
   311  	[ $sd_is_zero -eq 1 ] && CALC_SCALE=1
   312  
   313  	printf "%.0${CALC_SCALE}f" $sd
   314  }
   315  
   316  # Computes the Coefficient of variation.
   317  # The result is given as percentage.
   318  calc_cov_array() {
   319  	sd=$1
   320  	mean=$2
   321  
   322  	# LSCALE used for consider more decimals digits than usual in cov estimation.
   323  	# CALC_SCALE is the scale used to return the result.
   324  	LSCALE=13
   325  	CALC_SCALE=6
   326  
   327  	mean_is_zero=$(echo $mean'=='0.0 | bc -l)
   328  
   329  	[ -z "$sd" ] && die "Standard deviation was not passed to the calc_cov_array() function when trying to calculate the CoV result."
   330  	[ -z "$mean" ] && die "Mean was not passed to the calc_cov_array() function when trying to calculate the CoV result."
   331  	[ $mean_is_zero -eq 1 ] && die "Division by zero: Mean value passed is 0 when trying to get CoV result."
   332  
   333  	cov=$(echo "scale=$LSCALE; $sd / $mean" | bc)
   334  	cov=$(echo "scale=$LSCALE; $cov * 100" | bc)
   335  
   336  	# if cov is zero, limit the decimal scale to 1 digit
   337  	cov_is_zero=$(echo $cov'=='0.0 | bc -l)
   338  	[ $cov_is_zero -eq 1 ] && CALC_SCALE=1
   339  
   340  	printf "%.0${CALC_SCALE}f" $cov
   341  }
   342  
   343  # Writes a JSON with the statistics results
   344  # for each launch time metric
   345  write_stats_results() {
   346  	size="${#total_result_ds[@]}"
   347  	avg_total_result=$(calc_avg_array "${total_result_ds[@]}")
   348  	avg_to_workload=$(calc_avg_array "${to_workload_ds[@]}")
   349  	avg_in_kernel=$(calc_avg_array "${in_kernel_ds[@]}")
   350  	avg_to_kernel=$(calc_avg_array "${to_kernel_ds[@]}")
   351  	avg_to_quit=$(calc_avg_array "${to_quit_ds[@]}")
   352  
   353  	sd_total_result=$(calc_sd_array "${total_result_ds[@]}")
   354  	sd_to_workload=$(calc_sd_array "${to_workload_ds[@]}")
   355  	sd_in_kernel=$(calc_sd_array "${in_kernel_ds[@]}")
   356  	sd_to_kernel=$(calc_sd_array "${to_kernel_ds[@]}")
   357  	sd_to_quit=$(calc_sd_array "${to_quit_ds[@]}")
   358  
   359  	cov_total_result=$(calc_cov_array ${sd_total_result} ${avg_total_result})
   360  	cov_to_workload=$(calc_cov_array ${sd_to_workload} ${avg_to_workload})
   361  	cov_in_kernel=$(calc_cov_array ${sd_in_kernel} ${avg_in_kernel})
   362  	cov_to_kernel=$(calc_cov_array ${sd_to_kernel} ${avg_to_kernel})
   363  	cov_to_quit=$(calc_cov_array ${sd_to_quit} ${avg_to_quit})
   364  
   365  	local json="$(cat << EOF
   366  	{
   367  	"size": $size,
   368  	"total": {
   369  		"avg": $avg_total_result,
   370  		"sd": $sd_total_result,
   371  		"cov": $cov_total_result
   372  	},
   373  	"to-workload": {
   374  		"avg": $avg_to_workload,
   375  		"sd": $sd_to_workload,
   376  		"cov": $cov_to_workload
   377  	},
   378  	"in-kernel": {
   379  		"avg": $avg_in_kernel,
   380  		"sd": $sd_in_kernel,
   381  		"cov": $cov_in_kernel
   382  	},
   383  	"to-kernel_avg": {
   384  		"avg": $avg_to_kernel,
   385  		"sd": $sd_to_kernel,
   386  		"cov": $cov_to_kernel
   387  	},
   388  	"to-quit": {
   389  		"avg": $avg_to_quit,
   390  		"sd": $sd_to_quit,
   391  		"cov": $cov_to_quit
   392  	}
   393  	}
   394  EOF
   395  )"
   396  	metrics_json_add_array_element "$json"
   397  }
   398  
   399  help() {
   400  	usage=$(cat << EOF
   401  Usage: $0 [-h] [options]
   402     Description:
   403          This script takes time measurements for different
   404  	stages of a boot/run/rm cycle
   405     Options:
   406          -h,         Help
   407          -i <name>,  Image name (mandatory)
   408          -n <n>,     Number of containers to run (mandatory)
   409          -s,         Enable scaling (keep containers running)
   410  EOF
   411  )
   412  	echo "$usage"
   413  }
   414  
   415  main() {
   416  	local OPTIND
   417  	while getopts "dhi:n:s" opt;do
   418  		case ${opt} in
   419  		h)
   420  		    help
   421  		    exit 0;
   422  		    ;;
   423  		i)
   424  		    IMAGE="${OPTARG}"
   425  		    ;;
   426  		n)
   427  		    TIMES="${OPTARG}"
   428  		    ;;
   429  		s)
   430  		    SCALING=true
   431  		    ;;
   432  		?)
   433  		    # parse failure
   434  		    help
   435  		    die "Failed to parse arguments"
   436  		    ;;
   437  		esac
   438  	done
   439  	shift $((OPTIND-1))
   440  
   441  	[ -z "$IMAGE" ] && help && die "Mandatory IMAGE name not supplied"
   442  	[ -z "$TIMES" ] && help && die "Mandatory nunmber of containers not supplied"
   443  	# Although this is mandatory, the 'lib/common.bash' environment can set
   444  	# it, so we may not fail if it is not set on the command line...
   445  	[ -z "$RUNTIME" ] && help && die "Mandatory runtime argument not supplied"
   446  
   447  	init
   448  	j=0
   449  	max_reps=$MAX_REPETITIONS
   450  
   451  	while [ $j -lt $TIMES ]; do
   452  
   453  		echo " run $num_iters"
   454  		run_workload
   455  
   456  		if [ $data_is_valid -eq 0 ]; then
   457  			j=$(( j + 1 ))
   458  			# if valid result then reset 'max_reps' to initial value
   459  			max_reps=$MAX_REPETITIONS
   460  			continue
   461  		fi
   462  
   463  		echo "Skipping run due to invalid result"
   464  		((max_reps-=1))
   465  
   466  		if [ $max_reps -lt 0 ]; then
   467  			die "Max. num of repetitions reached for run: $j"
   468  		fi
   469  	done
   470  
   471  	metrics_json_init
   472  	metrics_json_start_array
   473  	write_stats_results
   474  	metrics_json_end_array "Statistics"
   475  	metrics_json_start_array
   476  	write_individual_results
   477  	metrics_json_end_array "Results"
   478  	metrics_json_save
   479  	clean_env_ctr
   480  }
   481  
   482  main "$@"