github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/machine_learning/tensorflow.sh (about) 1 #!/bin/bash 2 # 3 # Copyright (c) 2023 Intel Corporation 4 # 5 # SPDX-License-Identifier: Apache-2.0 6 7 set -e 8 9 # General env 10 SCRIPT_PATH=$(dirname "$(readlink -f "$0")") 11 source "${SCRIPT_PATH}/../lib/common.bash" 12 13 IMAGE="docker.io/library/tensorflow:latest" 14 DOCKERFILE="${SCRIPT_PATH}/tensorflow_dockerfile/Dockerfile" 15 BATCH_SIZE="512" 16 NUM_BATCHES="300" 17 CMD_RESULT="cd benchmarks/scripts/tf_cnn_benchmarks/ && cat result" 18 CMD_FILE="cat benchmarks/scripts/tf_cnn_benchmarks/result | grep 'total images' | wc -l" 19 tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX) 20 NUM_CONTAINERS="$1" 21 TIMEOUT="$2" 22 TEST_NAME="tensorflow" 23 PAYLOAD_ARGS="tail -f /dev/null" 24 25 function remove_tmp_file() { 26 rm -rf "${tensorflow_file}" 27 } 28 29 trap remove_tmp_file EXIT 30 31 function help() { 32 cat << EOF 33 Usage: $0 <count> <timeout> 34 Description: 35 This script launches n number of containers 36 to run the tf cnn benchmarks using a Tensorflow 37 container. 38 Options: 39 <count> : Number of containers to run. 40 <timeout> : Timeout to launch the containers. 41 EOF 42 } 43 44 function resnet50_test() { 45 local CMD_RUN="cd benchmarks/scripts/tf_cnn_benchmarks/ && python tf_cnn_benchmarks.py -data_format=NHWC --device cpu --batch_size=${BATCH_SIZE} --num_batches=${NUM_BATCHES} > result" 46 info "Running Resnet50 Tensorflow test" 47 for i in "${containers[@]}"; do 48 sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN}" 49 done 50 51 for i in "${containers[@]}"; do 52 check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") 53 retries="200" 54 for j in $(seq 1 "${retries}"); do 55 [ "${check_file}" -eq 1 ] && break 56 sleep 1 57 done 58 done 59 60 for i in "${containers[@]}"; do 61 sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}" >> "${tensorflow_file}" 62 done 63 64 local resnet50_results=$(cat "${tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') 65 local average_resnet50=$(echo "${resnet50_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) 66 67 local json="$(cat << EOF 68 { 69 "Resnet50": { 70 "Result": "${resnet50_results}", 71 "Average": "${average_resnet50}", 72 "Units": "s" 73 } 74 } 75 EOF 76 )" 77 metrics_json_add_array_element "$json" 78 } 79 80 function axelnet_test() { 81 local CMD_RUN="cd benchmarks/scripts/tf_cnn_benchmarks/ && python tf_cnn_benchmarks.py --num_batches=${NUM_BATCHES} --device=cpu --batch_size=${BATCH_SIZE} --forward_only=true --model=alexnet --data_format=NHWC > result" 82 info "Running AxelNet Tensorflow test" 83 for i in "${containers[@]}"; do 84 sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN}" 85 done 86 87 for i in "${containers[@]}"; do 88 check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") 89 retries="200" 90 for j in $(seq 1 "${retries}"); do 91 [ "${check_file}" -eq 1 ] && break 92 sleep 1 93 done 94 done 95 96 for i in "${containers[@]}"; do 97 sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}" >> "${tensorflow_file}" 98 done 99 100 local axelnet_results=$(cat "${tensorflow_file}" | grep "total images/sec" | cut -d ":" -f2 | sed -e 's/^[ \t]*//' | tr '\n' ',' | sed 's/.$//') 101 local average_axelnet=$(echo "${axelnet_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) 102 103 local json="$(cat << EOF 104 { 105 "AxelNet": { 106 "Result": "${axelnet_results}", 107 "Average": "${average_axelnet}", 108 "Units": "s" 109 } 110 } 111 EOF 112 )" 113 metrics_json_add_array_element "$json" 114 metrics_json_end_array "Results" 115 } 116 117 function check_containers_are_up() { 118 local containers_launched=0 119 for i in $(seq "${TIMEOUT}") ; do 120 info "Verify that the containers are running" 121 containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" 122 [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break 123 sleep 1 124 [ "${i}" == "${TIMEOUT}" ] && return 1 125 done 126 } 127 128 function main() { 129 # Verify enough arguments 130 if [ $# != 2 ]; then 131 echo >&2 "error: Not enough arguments [$@]" 132 help 133 exit 1 134 fi 135 136 local i=0 137 local containers=() 138 local not_started_count="${NUM_CONTAINERS}" 139 140 # Check tools/commands dependencies 141 cmds=("awk" "docker" "bc") 142 check_cmds "${cmds[@]}" 143 check_ctr_images "${IMAGE}" "${DOCKERFILE}" 144 145 init_env 146 info "Creating ${NUM_CONTAINERS} containers" 147 148 for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do 149 containers+=($(random_name)) 150 sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" 151 ((not_started_count--)) 152 info "$not_started_count remaining containers" 153 done 154 155 metrics_json_init 156 metrics_json_start_array 157 158 # Check that the requested number of containers are running 159 check_containers_are_up 160 161 resnet50_test 162 163 axelnet_test 164 165 metrics_json_save 166 167 clean_env_ctr 168 } 169 main "$@"