github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/metrics/machine_learning/pytorch.sh (about) 1 #!/bin/bash 2 # 3 # Copyright (c) 2023 Intel Corporation 4 # 5 # SPDX-License-Identifier: Apache-2.0 6 7 set -e 8 9 # General env 10 SCRIPT_PATH=$(dirname "$(readlink -f "$0")") 11 source "${SCRIPT_PATH}/../lib/common.bash" 12 13 IMAGE="docker.io/library/pytorch:latest" 14 DOCKERFILE="${SCRIPT_PATH}/pytorch_dockerfile/Dockerfile" 15 equation_pytorch_file=$(mktemp pytorchresults.XXXXXXXXXX) 16 isoneural_pytorch_file=$(mktemp pytorchresults.XXXXXXXXXX) 17 NUM_CONTAINERS="$1" 18 TIMEOUT="$2" 19 TEST_NAME="pytorch" 20 CMD_RUN="cd pyhpc-benchmarks-3.0 && python run.py benchmarks/equation_of_state --burnin 20 --device cpu -b pytorch -s 524288 > LOG" 21 CMD_RUN_ISONEURAL="cd pyhpc-benchmarks-3.0 && python run.py benchmarks/isoneutral_mixing --burnin 20 --device cpu -b pytorch -s 524288 > LOG" 22 CMD_RESULT="cd pyhpc-benchmarks-3.0 && cat LOG" 23 CMD_FILE="cat pyhpc-benchmarks-3.0/LOG | grep 'seconds' | wc -l" 24 PAYLOAD_ARGS="tail -f /dev/null" 25 26 function remove_tmp_file() { 27 rm -rf "${equation_pytorch_file}" "${isoneural_pytorch_file}" 28 } 29 30 trap remove_tmp_file EXIT 31 32 function check_containers_are_up() { 33 local containers_launched=0 34 for i in $(seq "${TIMEOUT}") ; do 35 info "Verify that the containers are running" 36 containers_launched="$(sudo ${CTR_EXE} t list | grep -c "RUNNING")" 37 [ "${containers_launched}" -eq "${NUM_CONTAINERS}" ] && break 38 sleep 1 39 [ "${i}" == "${TIMEOUT}" ] && return 1 40 done 41 } 42 43 function equation_of_state_pytorch_test() { 44 info "Running Equation of State Pytorch test" 45 for i in "${containers[@]}"; do 46 sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN}" 47 done 48 49 for i in "${containers[@]}"; do 50 check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") 51 retries="200" 52 for j in $(seq 1 "${retries}"); do 53 [ "${check_file}" -eq 1 ] && break 54 sleep 1 55 done 56 done 57 58 for i in "${containers[@]}"; do 59 sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}" >> "${equation_pytorch_file}" 60 done 61 62 local equation_pytorch_results=$(cat "${equation_pytorch_file}" | grep pytorch | sed '/Using pytorch version/d' | awk '{print $4}' | tr '\n' ',' | sed 's/.$//') 63 local equation_average_pytorch=$(echo "${equation_pytorch_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) 64 65 local json="$(cat << EOF 66 { 67 "Pytorch Equation of State": { 68 "Result": "${equation_pytorch_results}", 69 "Average": "${equation_average_pytorch}", 70 "Units": "s" 71 } 72 } 73 EOF 74 )" 75 metrics_json_add_array_element "$json" 76 77 } 78 79 function isoneural_pytorch_test() { 80 info "Running Isoneural Pytorch test" 81 for i in "${containers[@]}"; do 82 sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_RUN_ISONEURAL}" 83 done 84 85 for i in "${containers[@]}"; do 86 check_file=$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_FILE}") 87 retries="200" 88 for j in $(seq 1 "${retries}"); do 89 [ "${check_file}" -eq 1 ] && break 90 sleep 1 91 done 92 done 93 94 for i in "${containers[@]}"; do 95 sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULT}" >> "${isoneural_pytorch_file}" 96 done 97 98 local isoneural_pytorch_results=$(cat "${isoneural_pytorch_file}" | grep pytorch | sed '/Using pytorch version/d' | awk '{print $4}' | tr '\n' ',' | sed 's/.$//') 99 local isoneural_average_pytorch=$(echo "${isoneural_pytorch_results}" | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l) 100 101 local json="$(cat << EOF 102 { 103 "Pytorch Isoneural": { 104 "Result": "${isoneural_pytorch_results}", 105 "Average": "${isoneural_average_pytorch}", 106 "Units": "s" 107 } 108 } 109 EOF 110 )" 111 metrics_json_add_array_element "$json" 112 metrics_json_end_array "Results" 113 114 } 115 116 117 function main() { 118 # Verify enough arguments 119 if [ $# != 2 ]; then 120 echo >&2 "error: Not enough arguments [$@]" 121 help 122 exit 1 123 fi 124 125 local i=0 126 local containers=() 127 local not_started_count="${NUM_CONTAINERS}" 128 129 # Check tools/commands dependencies 130 cmds=("awk" "docker" "bc") 131 check_cmds "${cmds[@]}" 132 check_ctr_images "${IMAGE}" "${DOCKERFILE}" 133 134 init_env 135 info "Creating ${NUM_CONTAINERS} containers" 136 137 for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do 138 containers+=($(random_name)) 139 sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}" 140 ((not_started_count--)) 141 info "$not_started_count remaining containers" 142 done 143 144 metrics_json_init 145 metrics_json_start_array 146 147 148 # Check that the requested number of containers are running 149 check_containers_are_up 150 151 equation_of_state_pytorch_test 152 153 isoneural_pytorch_test 154 155 metrics_json_save 156 157 clean_env_ctr 158 159 } 160 main "$@"