github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/functional/kata-monitor/run.sh (about) 1 #!/bin/bash 2 # 3 # Copyright (c) 2022 Red Hat 4 # 5 # SPDX-License-Identifier: Apache-2.0 6 # 7 # This test file will test kata-monitor for basic functionality (retrieve kata sandboxes) 8 # It will assume an environment where: 9 # - a CRI container manager (container engine) will be up and running 10 # - crictl is installed and configured 11 # - the kata-monitor binary is available on the host 12 # 13 14 set -o errexit 15 set -o nounset 16 set -o pipefail 17 18 source "/etc/os-release" || source "/usr/lib/os-release" 19 20 [ -n "${BASH_VERSION:-}" ] && set -o errtrace 21 [ -n "${DEBUG:-}" ] && set -o xtrace 22 23 readonly MONITOR_HTTP_ENDPOINT="127.0.0.1:8090" 24 # we should collect few hundred metrics, let's put a reasonable minimum 25 readonly MONITOR_MIN_METRICS_NUM=200 26 BAREMETAL=${BAREMETAL:-"false"} 27 CRI_RUNTIME=${CRI_RUNTIME:-"crio"} 28 CRICTL_RUNTIME=${CRICTL_RUNTIME:-"kata"} 29 KATA_MONITOR_BIN="${KATA_MONITOR_BIN:-$(command -v kata-monitor || true)}" 30 KATA_MONITOR_PID="" 31 IAM=$(whoami) 32 TMPATH=$(mktemp -d -t kata-monitor-test-XXXXXXXXX) 33 METRICS_FILE="${TMPATH}/metrics.txt" 34 MONITOR_LOG_FILE="${TMPATH}/kata-monitor.log" 35 CACHE_UPD_TIMEOUT_SEC=${CACHE_UPD_TIMEOUT_SEC:-20} 36 POD_ID="" 37 CID="" 38 RUNC_POD_ID="" 39 RUNC_CID="" 40 CURRENT_TASK="" 41 42 FALSE=1 43 TRUE=0 44 45 trap error_with_msg ERR 46 47 title() { 48 local step="$1" 49 echo -e "\n* STEP: $step" 50 } 51 52 echo_ok() { 53 local msg="$1" 54 55 echo "OK: $msg" 56 } 57 58 # quiet crictrl 59 qcrictl() { 60 crictl "$@" > /dev/null 61 } 62 63 # this is just an hash of current date (+ nanoseconds) 64 gen_unique_id() { 65 date +%T:%N | md5sum | cut -d ' ' -f 1 66 } 67 68 error_with_msg() { 69 local msg=${1:-"cannot $CURRENT_TASK"} 70 71 trap - ERR 72 echo -e "\nERROR: $msg" 73 if [ -f "$MONITOR_LOG_FILE" ]; then 74 echo -e "\nkata-monitor logs:\n----------------" 75 cat "$MONITOR_LOG_FILE" 76 fi 77 echo -e "\nkata-monitor testing: FAILED!" 78 cleanup 79 exit 1 80 } 81 82 cleanup() { 83 stop_workload 84 stop_workload "$RUNC_CID" "$RUNC_POD_ID" 85 86 [ -n "$KATA_MONITOR_PID" ] \ 87 && [ -d "/proc/$KATA_MONITOR_PID" ] \ 88 && kill -9 "$KATA_MONITOR_PID" 89 90 rm -rf "$TMPATH" 91 } 92 93 create_sandbox_json() { 94 local uid_name_suffix="$(gen_unique_id)" 95 local sbfile="$TMPATH/sandbox-$uid_name_suffix.json" 96 97 cat <<EOF >$sbfile 98 { 99 "metadata": { 100 "name": "nginx-$uid_name_suffix", 101 "namespace": "default", 102 "attempt": 1 103 }, 104 "logDirectory": "/tmp", 105 "linux": { 106 } 107 } 108 EOF 109 echo "$sbfile" 110 } 111 112 create_container_json() { 113 local uid_name_suffix="$(gen_unique_id)" 114 local cntfile="$TMPATH/container-$uid_name_suffix.json" 115 116 cat <<EOF >$cntfile 117 { 118 "metadata": { 119 "name": "busybox" 120 }, 121 "image":{ 122 "image": "busybox" 123 }, 124 "command": [ 125 "top" 126 ], 127 "log_path":"busybox.log", 128 "linux": { 129 } 130 } 131 EOF 132 echo "$cntfile" 133 } 134 135 start_workload() { 136 local runtime=${1:-} 137 local args="" 138 local sbfile="" 139 local cntfile="" 140 141 [ -n "$runtime" ] && args="-r $runtime" 142 143 sbfile="$(create_sandbox_json)" 144 cntfile="$(create_container_json)" 145 146 POD_ID=$(crictl runp $args $sbfile) 147 CID=$(crictl create $POD_ID $cntfile $sbfile) 148 qcrictl start $CID 149 } 150 151 stop_workload() { 152 local cid="${1:-$CID}" 153 local pod_id="${2:-$POD_ID}" 154 local check 155 156 [ -z "$pod_id" ] && return 157 check=$(crictl pods -q -id $pod_id) 158 [ -z "$check" ] && return 159 160 qcrictl stop $cid 161 qcrictl rm $cid 162 163 qcrictl stopp $pod_id 164 qcrictl rmp $pod_id 165 } 166 167 is_sandbox_there() { 168 local podid=${1} 169 local sbs s 170 171 sbs=$(curl -s ${MONITOR_HTTP_ENDPOINT}/sandboxes) 172 if [ -n "$sbs" ]; then 173 for s in $sbs; do 174 if [ "$s" = "$podid" ]; then 175 return $TRUE 176 break 177 fi 178 done 179 fi 180 return $FALSE 181 } 182 183 is_sandbox_there_iterate() { 184 local podid=${1} 185 186 for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do 187 is_sandbox_there "$podid" && return $TRUE 188 echo -n "." 189 sleep 1 190 continue 191 done 192 193 return $FALSE 194 } 195 196 is_sandbox_missing_iterate() { 197 local podid=${1} 198 199 for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do 200 is_sandbox_there "$podid" || return $TRUE 201 echo -n "." 202 sleep 1 203 continue 204 done 205 206 return $FALSE 207 } 208 209 main() { 210 local args="" 211 212 # Our baremetal CI enforces cleanups of the environment (e.g., cni plugins): 213 # we here want a ready environment to just do few quick checks. So, let's skip 214 # baremetal environments for now. 215 # (kata-containers-2.0-tests-ubuntu-ARM-PR would fail) 216 if [ "$BAREMETAL" = true ]; then 217 echo "INFO: baremetal environment - skip kata-monitor tests" 218 exit 0 219 fi 220 221 ########################### 222 title "pre-checks" 223 224 [ "$IAM" != "root" ] && 225 error_with_msg "run this script as root please (user is \"${IAM}\")" 226 227 CURRENT_TASK="connect to the container engine" 228 qcrictl pods 229 echo_ok "$CURRENT_TASK" 230 231 ########################### 232 title "create workloads" 233 234 CURRENT_TASK="start workload (runc)" 235 start_workload 236 RUNC_POD_ID="$POD_ID" 237 RUNC_CID="$CID" 238 echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" 239 240 CURRENT_TASK="start workload ($CRICTL_RUNTIME)" 241 start_workload "$CRICTL_RUNTIME" 242 echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" 243 244 ########################### 245 title "start kata-monitor" 246 247 [ ! -x "$KATA_MONITOR_BIN" ] && error_with_msg "kata-monitor binary not found" 248 249 [ "$CRI_RUNTIME" = "crio" ] && args="--runtime-endpoint /run/crio/crio.sock" 250 251 CURRENT_TASK="start kata-monitor" 252 $KATA_MONITOR_BIN $args --log-level trace > "$MONITOR_LOG_FILE" 2>&1 & 253 KATA_MONITOR_PID="$!" 254 echo_ok "$CURRENT_TASK ($KATA_MONITOR_PID)" 255 256 ########################### 257 title "kata-monitor cache update checks" 258 259 CURRENT_TASK="retrieve $POD_ID in kata-monitor cache" 260 is_sandbox_there_iterate "$POD_ID" || error_with_msg 261 echo_ok "$CURRENT_TASK" 262 263 CURRENT_TASK="look for runc pod $RUNC_POD_ID in kata-monitor cache" 264 is_sandbox_there_iterate "$RUNC_POD_ID" && error_with_msg "cache: got runc pod $RUNC_POD_ID" 265 echo_ok "runc pod $RUNC_POD_ID skipped from kata-monitor cache" 266 267 ########################### 268 title "kata-monitor metrics retrieval" 269 270 CURRENT_TASK="retrieve metrics from kata-monitor" 271 curl -s ${MONITOR_HTTP_ENDPOINT}/metrics > "$METRICS_FILE" 272 echo_ok "$CURRENT_TASK" 273 274 CURRENT_TASK="retrieve metrics for pod $POD_ID" 275 METRICS_COUNT=$(grep -c "$POD_ID" "$METRICS_FILE") 276 [ ${METRICS_COUNT} -lt ${MONITOR_MIN_METRICS_NUM} ] \ 277 && error_with_msg "got too few metrics (#${METRICS_COUNT})" 278 echo_ok "$CURRENT_TASK - found #${METRICS_COUNT} metrics" 279 280 ########################### 281 title "remove kata workload" 282 283 CURRENT_TASK="stop workload ($CRICTL_RUNTIME)" 284 stop_workload 285 echo_ok "$CURRENT_TASK" 286 287 ########################### 288 title "kata-monitor cache update checks (removal)" 289 290 CURRENT_TASK="verify removal of $POD_ID from kata-monitor cache" 291 is_sandbox_missing_iterate "$POD_ID" || error_with_msg "pod $POD_ID was not removed" 292 echo_ok "$CURRENT_TASK" 293 294 ########################### 295 CURRENT_TASK="cleanup" 296 cleanup 297 298 echo -e "\nkata-monitor testing: PASSED!\n" 299 } 300 301 main "@"