github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/machine_learning/tensorflow.sh (about)

     1  #!/bin/bash
     2  #
     3  # Copyright (c) 2023 Intel Corporation
     4  #
     5  # SPDX-License-Identifier: Apache-2.0
     6  
     7  set -e
     8  
     9  # General env
    10  SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
    11  source "${SCRIPT_PATH}/../lib/common.bash"
    12  
    13  IMAGE="docker.io/library/tensorflow:latest"
    14  DOCKERFILE="${SCRIPT_PATH}/tensorflow_dockerfile/Dockerfile"
    15  BATCH_SIZE="512"
    16  NUM_BATCHES="300"
    17  CMD_RESULT="cd benchmarks/scripts/tf_cnn_benchmarks/ && cat result"
    18  CMD_FILE="cat benchmarks/scripts/tf_cnn_benchmarks/result | grep 'total images' | wc -l"
    19  tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX)
    20  NUM_CONTAINERS="$1"
    21  TIMEOUT="$2"
    22  TEST_NAME="tensorflow"
    23  PAYLOAD_ARGS="tail -f /dev/null"
    24  
    25  function remove_tmp_file() {
    26  	rm -rf "${tensorflow_file}"
    27  }
    28  
    29  trap remove_tmp_file EXIT
    30  
    31  function help() {
    32  cat << EOF
    33  Usage: $0 <count> <timeout>
    34  	Description:
    35  		This script launches n number of containers
    36  		to run the tf cnn benchmarks using a Tensorflow
    37  		container.
    38  	Options:
    39  		<count> : Number of containers to run.
    40  		<timeout> : Timeout to launch the containers.
    41  EOF
    42  }
    43  
    44  function resnet50_test() {
    45  	local CMD_RUN="cd benchmarks/scripts/tf_cnn_benchmarks/ && python tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > result"
    46  	info "Running Resnet50 Tensorflow test"
    47  	for i in "${containers[@]}"; do
    48  		sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN}"
    49  	done
    50  
    51  	for i in "${containers[@]}"; do
    52  		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}")
    53  		retries="200"
    54  		for j in $(seq 1 "${retries}"); do
    55  			[ "${check_file}" -eq 1 ] && break
    56  			sleep 1
    57  		done
    58  	done
    59  
    60  	for i in "${containers[@]}"; do
    61  		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}"  >> "${tensorflow_file}"
    62  	done
    63  
    64  	local resnet50_results=$(cat "${tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
    65  	local average_resnet50=$(echo "${resnet50_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
    66  
    67  	local json="$(cat << EOF
    68  	{
    69  		"Resnet50": {
    70  			"Result": "${resnet50_results}",
    71  			"Average": "${average_resnet50}",
    72  			"Units": "s"
    73  		}
    74  	}
    75  EOF
    76  )"
    77  	metrics_json_add_array_element "$json"
    78  }
    79  
    80  function axelnet_test() {
    81  	local CMD_RUN="cd benchmarks/scripts/tf_cnn_benchmarks/ && python tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > result"
    82  	info "Running AxelNet Tensorflow test"
    83  	for i in "${containers[@]}"; do
    84  		sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN}"
    85  	done
    86  
    87  	for i in "${containers[@]}"; do
    88  		check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}")
    89  		retries="200"
    90  		for j in $(seq 1 "${retries}"); do
    91  			[ "${check_file}" -eq 1 ] && break
    92  			sleep 1
    93  		done
    94  	done
    95  
    96  	for i in "${containers[@]}"; do
    97  		sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}"  >> "${tensorflow_file}"
    98  	done
    99  
   100  	local axelnet_results=$(cat "${tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//')
   101  	local average_axelnet=$(echo "${axelnet_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
   102  
   103  	local json="$(cat << EOF
   104  	{
   105  		"AxelNet": {
   106  			"Result": "${axelnet_results}",
   107  			"Average": "${average_axelnet}",
   108  			"Units": "s"
   109  		}
   110  	}
   111  EOF
   112  )"
   113  	metrics_json_add_array_element "$json"
   114  	metrics_json_end_array "Results"
   115  }
   116  
   117  function check_containers_are_up() {
   118  	local containers_launched=0
   119  	for i in $(seq "${TIMEOUT}") ; do
   120  		info "Verify that the containers are running"
   121  		containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")"
   122  		[ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break
   123  		sleep 1
   124  		[ "${i}" == "${TIMEOUT}" ] && return 1
   125  	done
   126  }
   127  
   128  function main() {
   129  	# Verify enough arguments
   130  	if [ $# != 2 ]; then
   131  		echo >&2 "error: Not enough arguments [$@]"
   132  		help
   133  		exit 1
   134  	fi
   135  
   136  	local i=0
   137  	local containers=()
   138  	local not_started_count="${NUM_CONTAINERS}"
   139  
   140  	# Check tools/commands dependencies
   141  	cmds=("awk" "docker" "bc")
   142  	check_cmds "${cmds[@]}"
   143  	check_ctr_images "${IMAGE}" "${DOCKERFILE}"
   144  
   145  	init_env
   146  	info "Creating ${NUM_CONTAINERS} containers"
   147  
   148  	for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do
   149  		containers+=($(random_name))
   150  		sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
   151  		((not_started_count--))
   152  		info "$not_started_count remaining containers"
   153  	done
   154  
   155  	metrics_json_init
   156  	metrics_json_start_array
   157  
   158  	# Check that the requested number of containers are running
   159  	check_containers_are_up
   160  
   161  	resnet50_test
   162  
   163  	axelnet_test
   164  
   165  	metrics_json_save
   166  
   167  	clean_env_ctr
   168  }
   169  main "$@"