github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/metrics/time/launch_times.sh

github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/metrics/time/launch_times.sh (about)

     1  #!/bin/bash
     2  # Copyright (c) 2017-2021 Intel Corporation
     3  #
     4  # SPDX-License-Identifier: Apache-2.0
     5  #
     6  #  Description of the test:
     7  #  This test takes a number of time measurements through the complete
     8  #  launch/shutdown cycle of a single container.
     9  #  From those measurements it derives a number of time measures, such as:
    10  #   - time to payload execution
    11  #   - time to get to VM kernel
    12  #   - time in VM kernel boot
    13  #   - time to quit
    14  #   - total time (from launch to finished)
    15  #
    16  # Note, the <image> used for this test must support the full 'date' command
    17  # syntax - the date from busybox for instance *does not* support this, so
    18  # will not work with this test.
    19  #
    20  # Note, this test launches a single container at a time, that quits - thus,
    21  # this test measures times for the 'first container' only. This test does
    22  # not look for any scalability slowdowns as the number of running containers
    23  # increases for instance - that is handled in other tests
    24  
    25  set -e
    26  
    27  SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
    28  source "${SCRIPT_PATH}/../lib/common.bash"
    29  
    30  # Calculating the kernel time from dmesg stamps only really works for VM
    31  # based runtimes - we dynamically enable it if we find we are using a known
    32  # VM runtime
    33  CALCULATE_KERNEL=
    34  
    35  REQUIRED_CMDS=("bc" "awk")
    36  
    37  # The date command format we use to ensure we capture the ns timings
    38  # Note the no-0-padding - 0 padding the results breaks bc in some cases
    39  DATECMD="date -u +%-s:%-N"
    40  
    41  # This the minimum entropy level produced
    42  # by haveged is 1000 see https://wiki.archlinux.org/index.php/Haveged
    43  # Less than 1000 could potentially slow down cryptographic
    44  # applications see https://www.suse.com/support/kb/doc/?id=7011351
    45  entropy_level="1000"
    46  
    47  check_entropy_level() {
    48  	retries="10"
    49  	for i in $(seq 1 "$retries"); do
    50  		if [ $(cat /proc/sys/kernel/random/entropy_avail) -ge ${entropy_level} ]; then
    51  			break;
    52  		fi
    53  		sleep 1
    54  	done
    55  	if [ $(cat /proc/sys/kernel/random/entropy_avail) -le ${entropy_level} ]; then
    56  		die "Not enough entropy level to run this test"
    57  	fi
    58  }
    59  
    60  # convert a 'seconds:nanoseconds' string into nanoseconds
    61  sn_to_ns() {
    62  	# !!: Remove 0's from beginning otherwise the number will be converted to octal
    63  	s=$(echo ${1%:*} | sed 's/^0*//g')
    64  	ns=$(echo ${1##*:} | sed 's/^0*//g')
    65  	# use shell magic to strip out the 's' and 'ns' fields and print
    66  	# them as a 0-padded ns string...
    67  	printf "%d%09d" ${s} ${ns}
    68  }
    69  
    70  # convert 'nanoseconds' (since epoch) into a 'float' seconds
    71  ns_to_s() {
    72  	printf "%.03f" $(bc <<< "scale=3; $1 / 1000000000")
    73  }
    74  
    75  run_workload() {
    76  	start_time=$($DATECMD)
    77  
    78  	# Check entropy level of the host
    79  	check_entropy_level
    80  
    81  	# Run the image and command and capture the results into an array...
    82  	declare workload_result
    83  	readarray -n 0 workload_result < <(ctr run --rm --runtime=${CTR_RUNTIME} ${IMAGE} test bash -c "$DATECMD $DMESGCMD")
    84  
    85  	end_time=$($DATECMD)
    86  
    87  	# Delay this calculation until after we have run - do not want
    88  	# to measure it in the results
    89  	start_time=$(sn_to_ns $start_time)
    90  	end_time=$(sn_to_ns $end_time)
    91  
    92  	# Extract the 'date' info from the first line of the log
    93  	# This script assumes the VM clock is in sync with the host clock...
    94  	workload_time=${workload_result[0]}
    95  	workload_time=$(echo $workload_time | tr -d '\r')
    96  	workload_time=$(sn_to_ns $workload_time)
    97  
    98  	# How long did the whole launch/quit take
    99  	total_period=$((end_time-start_time))
   100  	# How long did it take to get to the workload
   101  	workload_period=$((workload_time-start_time))
   102  	# How long did it take to quit
   103  	shutdown_period=$((end_time-workload_time))
   104  
   105  	if [ -n "$CALCULATE_KERNEL" ]; then
   106  		# Grab the last kernel dmesg time
   107  		# In our case, we need to find the last real kernel line before
   108  		# the systemd lines begin. The last:
   109  		# 'Freeing unused kernel' line is a reasonable
   110  		# 'last in kernel line' to look for.
   111  		# We make a presumption here that as we are in a cold-boot VM
   112  		# kernel, the first dmesg is at '0 seconds', so the timestamp
   113  		# of that last line is the length of time in the kernel.
   114  		kernel_last_line=$( (fgrep "Freeing unused kernel" <<- EOF
   115  				${workload_result[@]}
   116  			EOF
   117  			) | tail -1 )
   118  
   119  		if [ -z "$kernel_last_line" ]; then
   120  			echo "No kernel last line"
   121  			for l in "${workload_result[@]}"; do
   122  				echo ">: [$l]"
   123  			done
   124  			die "No kernel last line"
   125  		fi
   126  
   127  
   128  		kernel_period=$(echo $kernel_last_line | awk '{print $2}' | tr -d "]")
   129  
   130  		# And we can then work out how much time it took to get to the kernel
   131  		to_kernel_period=$(printf "%0f" $(bc <<<"scale=3; $(ns_to_s $workload_period) - $kernel_period"))
   132  	else
   133  		kernel_period="0.0"
   134  		to_kernel_period="0.0"
   135  	fi
   136  
   137  	# And store the results...
   138  	local json="$(cat << EOF
   139  	{
   140  		"total": {
   141  			"Result": $(ns_to_s $total_period),
   142  			"Units" : "s"
   143  		},
   144  		"to-workload": {
   145  			"Result": $(ns_to_s $workload_period),
   146  			"Units" : "s"
   147  		},
   148  		"in-kernel": {
   149  			"Result": $kernel_period,
   150  			"Units" : "s"
   151  		},
   152  		"to-kernel": {
   153  			"Result": $to_kernel_period,
   154  			"Units" : "s"
   155  		},
   156  		"to-quit": {
   157  			"Result": $(ns_to_s $shutdown_period),
   158  			"Units" : "s"
   159  		}
   160  	}
   161  EOF
   162  )"
   163  	metrics_json_add_array_element "$json"
   164  
   165  	# If we are doing an (optional) scaling test, then we launch a permanent container
   166  	# between each of our 'test' containers. The aim being to see if our launch times
   167  	# are linear with the number of running containers or not
   168  	if [ -n "$SCALING" ]; then
   169  		ctr run --runtime=${CTR_RUNTIME} -d ${IMAGE} test bash -c "tail -f /dev/null"
   170  	fi
   171  }
   172  
   173  init () {
   174  	TEST_ARGS="image=${IMAGE} runtime=${CTR_RUNTIME} units=seconds"
   175  
   176  	# We set the generic name here, but we save the different time results separately,
   177  	# and append the actual detail to the name at the time of saving...
   178  	TEST_NAME="boot times"
   179  
   180  	# If we are scaling, note that in the name
   181  	[ -n "$SCALING" ] && TEST_NAME="${TEST_NAME} scaling"
   182  
   183  	echo "Executing test: ${TEST_NAME} ${TEST_ARGS}"
   184  	check_cmds "${REQUIRED_CMDS[@]}"
   185  
   186  	# Only try to grab a dmesg boot time if we are pretty sure we are running a
   187  	# Kata runtime
   188  	local iskata=$(is_a_kata_runtime "$RUNTIME")
   189  	if [ "$iskata" == "1" ]; then
   190  		CALCULATE_KERNEL=1
   191  		DMESGCMD="; dmesg"
   192  	else
   193  		# For non-VM runtimes, we don't use the output of dmesg, and
   194  		# we have seen it cause some test instabilities, so do not invoke
   195  		# it if not needed.
   196  		DMESGCMD=""
   197  	fi
   198  
   199  	# Start from a fairly clean environment
   200  	init_env
   201  	check_images "$IMAGE"
   202  }
   203  
   204  help() {
   205  	usage=$(cat << EOF
   206  Usage: $0 [-h] [options]
   207     Description:
   208          This script takes time measurements for different
   209  	stages of a boot/run/rm cycle
   210     Options:
   211          -h,         Help
   212          -i <name>,  Image name (mandatory)
   213          -n <n>,     Number of containers to run (mandatory)
   214          -s,         Enable scaling (keep containers running)
   215  EOF
   216  )
   217  	echo "$usage"
   218  }
   219  
   220  main() {
   221  	local OPTIND
   222  	while getopts "dhi:n:s" opt;do
   223  		case ${opt} in
   224  		h)
   225  		    help
   226  		    exit 0;
   227  		    ;;
   228  		i)
   229  		    IMAGE="${OPTARG}"
   230  		    ;;
   231  		n)
   232  		    TIMES="${OPTARG}"
   233  		    ;;
   234  		s)
   235  		    SCALING=true
   236  		    ;;
   237  		?)
   238  		    # parse failure
   239  		    help
   240  		    die "Failed to parse arguments"
   241  		    ;;
   242  		esac
   243  	done
   244  	shift $((OPTIND-1))
   245  
   246  	[ -z "$IMAGE" ] && help && die "Mandatory IMAGE name not supplied"
   247  	[ -z "$TIMES" ] && help && die "Mandatory nunmber of containers not supplied"
   248  	# Although this is mandatory, the 'lib/common.bash' environment can set
   249  	# it, so we may not fail if it is not set on the command line...
   250  	[ -z "$RUNTIME" ] && help && die "Mandatory runtime argument not supplied"
   251  
   252  	init
   253  	metrics_json_init
   254  	metrics_json_start_array
   255  	for i in $(seq 1 "$TIMES"); do
   256  		echo " run $i"
   257  		run_workload
   258  	done
   259  	metrics_json_end_array "Results"
   260  	metrics_json_save
   261  	clean_env
   262  }
   263  
   264  main "$@"