github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/functional/kata-monitor/run.sh (about)

     1  #!/bin/bash
     2  #
     3  # Copyright (c) 2022 Red Hat
     4  #
     5  # SPDX-License-Identifier: Apache-2.0
     6  #
     7  # This test file will test kata-monitor for basic functionality (retrieve kata sandboxes)
     8  # It will assume an environment where:
     9  # - a CRI container manager (container engine) will be up and running
    10  # - crictl is installed and configured
    11  # - the kata-monitor binary is available on the host
    12  #
    13  
    14  set -o errexit
    15  set -o nounset
    16  set -o pipefail
    17  
    18  source "/etc/os-release" || source "/usr/lib/os-release"
    19  
    20  [ -n "${BASH_VERSION:-}" ] && set -o errtrace
    21  [ -n "${DEBUG:-}" ] && set -o xtrace
    22  
    23  readonly MONITOR_HTTP_ENDPOINT="127.0.0.1:8090"
    24  # we should collect few hundred metrics, let's put a reasonable minimum
    25  readonly MONITOR_MIN_METRICS_NUM=200
    26  BAREMETAL=${BAREMETAL:-"false"}
    27  CRI_RUNTIME=${CRI_RUNTIME:-"crio"}
    28  CRICTL_RUNTIME=${CRICTL_RUNTIME:-"kata"}
    29  KATA_MONITOR_BIN="${KATA_MONITOR_BIN:-$(command -v kata-monitor || true)}"
    30  KATA_MONITOR_PID=""
    31  IAM=$(whoami)
    32  TMPATH=$(mktemp -d -t kata-monitor-test-XXXXXXXXX)
    33  METRICS_FILE="${TMPATH}/metrics.txt"
    34  MONITOR_LOG_FILE="${TMPATH}/kata-monitor.log"
    35  CACHE_UPD_TIMEOUT_SEC=${CACHE_UPD_TIMEOUT_SEC:-20}
    36  POD_ID=""
    37  CID=""
    38  RUNC_POD_ID=""
    39  RUNC_CID=""
    40  CURRENT_TASK=""
    41  
    42  FALSE=1
    43  TRUE=0
    44  
    45  trap error_with_msg ERR
    46  
    47  title() {
    48  	local step="$1"
    49  	echo -e "\n* STEP: $step"
    50  }
    51  
    52  echo_ok() {
    53  	local msg="$1"
    54  
    55  	echo "OK: $msg"
    56  }
    57  
    58  # quiet crictrl
    59  qcrictl() {
    60  	crictl "$@" > /dev/null
    61  }
    62  
    63  # this is just an hash of current date (+ nanoseconds)
    64  gen_unique_id() {
    65  	date +%T:%N | md5sum | cut -d ' ' -f 1
    66  }
    67  
    68  error_with_msg() {
    69  	local msg=${1:-"cannot $CURRENT_TASK"}
    70  
    71  	trap - ERR
    72  	echo -e "\nERROR: $msg"
    73  	if [ -f "$MONITOR_LOG_FILE" ]; then
    74  		echo -e "\nkata-monitor logs:\n----------------"
    75  		cat "$MONITOR_LOG_FILE"
    76  	fi
    77  	echo -e "\nkata-monitor testing: FAILED!"
    78  	cleanup
    79  	exit 1
    80  }
    81  
    82  cleanup() {
    83  	stop_workload
    84  	stop_workload "$RUNC_CID" "$RUNC_POD_ID"
    85  
    86  	[ -n "$KATA_MONITOR_PID" ] \
    87  		&& [ -d "/proc/$KATA_MONITOR_PID" ] \
    88  		&& kill -9 "$KATA_MONITOR_PID"
    89  
    90  	rm -rf "$TMPATH"
    91  }
    92  
    93  create_sandbox_json() {
    94  	local uid_name_suffix="$(gen_unique_id)"
    95  	local sbfile="$TMPATH/sandbox-$uid_name_suffix.json"
    96  
    97  	cat <<EOF >$sbfile
    98  {
    99  	"metadata": {
   100  		"name": "nginx-$uid_name_suffix",
   101  		"namespace": "default",
   102  		"attempt": 1
   103  	},
   104  	"logDirectory": "/tmp",
   105  	"linux": {
   106  	}
   107  }
   108  EOF
   109  	echo "$sbfile"
   110  }
   111  
   112  create_container_json() {
   113  	local uid_name_suffix="$(gen_unique_id)"
   114  	local cntfile="$TMPATH/container-$uid_name_suffix.json"
   115  
   116  	cat <<EOF >$cntfile
   117  {
   118  	"metadata": {
   119  		"name": "busybox"
   120  	},
   121  	"image":{
   122  		"image": "busybox"
   123  	},
   124  	"command": [
   125  		"top"
   126  	],
   127  	"log_path":"busybox.log",
   128  	"linux": {
   129  	}
   130  }
   131  EOF
   132  	echo "$cntfile"
   133  }
   134  
   135  start_workload() {
   136  	local runtime=${1:-}
   137  	local args=""
   138  	local sbfile=""
   139  	local cntfile=""
   140  
   141  	[ -n "$runtime" ] && args="-r $runtime"
   142  
   143  	sbfile="$(create_sandbox_json)"
   144  	cntfile="$(create_container_json)"
   145  
   146  	POD_ID=$(crictl runp $args $sbfile)
   147  	CID=$(crictl create $POD_ID $cntfile $sbfile)
   148  	qcrictl start $CID
   149  }
   150  
   151  stop_workload() {
   152  	local cid="${1:-$CID}"
   153  	local pod_id="${2:-$POD_ID}"
   154  	local check
   155  
   156  	[ -z "$pod_id" ] && return
   157  	check=$(crictl pods -q -id $pod_id)
   158  	[ -z "$check" ] && return
   159  
   160  	qcrictl stop $cid
   161  	qcrictl rm $cid
   162  
   163  	qcrictl stopp $pod_id
   164  	qcrictl rmp $pod_id
   165  }
   166  
   167  is_sandbox_there() {
   168  	local podid=${1}
   169  	local sbs s
   170  
   171  	sbs=$(curl -s ${MONITOR_HTTP_ENDPOINT}/sandboxes)
   172  	if [ -n "$sbs" ]; then
   173  		for s in $sbs; do
   174  			if [ "$s" = "$podid" ]; then
   175  				return $TRUE
   176  				break
   177  			fi
   178  		done
   179  	fi
   180  	return $FALSE
   181  }
   182  
   183  is_sandbox_there_iterate() {
   184  	local podid=${1}
   185  
   186  	for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do
   187  		is_sandbox_there "$podid" && return $TRUE
   188  		echo -n "."
   189  		sleep 1
   190  		continue
   191  	done
   192  
   193  	return $FALSE
   194  }
   195  
   196  is_sandbox_missing_iterate() {
   197  	local podid=${1}
   198  
   199  	for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do
   200  		is_sandbox_there "$podid" || return $TRUE
   201  		echo -n "."
   202  		sleep 1
   203  		continue
   204  	done
   205  
   206  	return $FALSE
   207  }
   208  
   209  main() {
   210  	local args=""
   211  
   212  	# Our baremetal CI enforces cleanups of the environment (e.g., cni plugins):
   213  	# we here want a ready environment to just do few quick checks. So, let's skip
   214  	# baremetal environments for now.
   215  	# (kata-containers-2.0-tests-ubuntu-ARM-PR would fail)
   216  	if [ "$BAREMETAL" = true ]; then
   217  		echo "INFO: baremetal environment - skip kata-monitor tests"
   218  		exit 0
   219  	fi
   220  
   221  	###########################
   222  	title "pre-checks"
   223  
   224  	[ "$IAM" != "root" ] &&
   225  		error_with_msg "run this script as root please (user is \"${IAM}\")"
   226  
   227  	CURRENT_TASK="connect to the container engine"
   228  	qcrictl pods
   229  	echo_ok "$CURRENT_TASK"
   230  
   231  	###########################
   232  	title "create workloads"
   233  
   234  	CURRENT_TASK="start workload (runc)"
   235  	start_workload
   236  	RUNC_POD_ID="$POD_ID"
   237  	RUNC_CID="$CID"
   238  	echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID"
   239  
   240  	CURRENT_TASK="start workload ($CRICTL_RUNTIME)"
   241  	start_workload "$CRICTL_RUNTIME"
   242  	echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID"
   243  
   244  	###########################
   245  	title "start kata-monitor"
   246  
   247  	[ ! -x "$KATA_MONITOR_BIN" ] && error_with_msg "kata-monitor binary not found"
   248  
   249  	[ "$CRI_RUNTIME" = "crio" ] && args="--runtime-endpoint /run/crio/crio.sock"
   250  
   251  	CURRENT_TASK="start kata-monitor"
   252  	$KATA_MONITOR_BIN $args --log-level trace > "$MONITOR_LOG_FILE" 2>&1 &
   253  	KATA_MONITOR_PID="$!"
   254  	echo_ok "$CURRENT_TASK ($KATA_MONITOR_PID)"
   255  
   256  	###########################
   257  	title "kata-monitor cache update checks"
   258  
   259  	CURRENT_TASK="retrieve $POD_ID in kata-monitor cache"
   260  	is_sandbox_there_iterate "$POD_ID" || error_with_msg
   261  	echo_ok "$CURRENT_TASK"
   262  
   263  	CURRENT_TASK="look for runc pod $RUNC_POD_ID in kata-monitor cache"
   264  	is_sandbox_there_iterate "$RUNC_POD_ID" && error_with_msg "cache: got runc pod $RUNC_POD_ID"
   265  	echo_ok "runc pod $RUNC_POD_ID skipped from kata-monitor cache"
   266  
   267  	###########################
   268  	title "kata-monitor metrics retrieval"
   269  
   270  	CURRENT_TASK="retrieve metrics from kata-monitor"
   271  	curl -s ${MONITOR_HTTP_ENDPOINT}/metrics > "$METRICS_FILE"
   272  	echo_ok "$CURRENT_TASK"
   273  
   274  	CURRENT_TASK="retrieve metrics for pod $POD_ID"
   275  	METRICS_COUNT=$(grep -c "$POD_ID" "$METRICS_FILE")
   276  	[ ${METRICS_COUNT} -lt ${MONITOR_MIN_METRICS_NUM} ] \
   277  		&& error_with_msg "got too few metrics (#${METRICS_COUNT})"
   278  	echo_ok "$CURRENT_TASK - found #${METRICS_COUNT} metrics"
   279  
   280  	###########################
   281  	title "remove kata workload"
   282  
   283  	CURRENT_TASK="stop workload ($CRICTL_RUNTIME)"
   284  	stop_workload
   285  	echo_ok "$CURRENT_TASK"
   286  
   287  	###########################
   288  	title "kata-monitor cache update checks (removal)"
   289  
   290  	CURRENT_TASK="verify removal of $POD_ID from kata-monitor cache"
   291  	is_sandbox_missing_iterate "$POD_ID" || error_with_msg "pod $POD_ID was not removed"
   292  	echo_ok "$CURRENT_TASK"
   293  
   294  	###########################
   295  	CURRENT_TASK="cleanup"
   296  	cleanup
   297  
   298  	echo -e "\nkata-monitor testing: PASSED!\n"
   299  }
   300  
   301  main "@"