github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/functional/vfio/run.sh (about)

     1  #!/bin/bash
     2  #
     3  # Copyright (c) 2021 Intel Corporation
     4  #
     5  # SPDX-License-Identifier: Apache-2.0
     6  #
     7  
     8  set -x
     9  set -o errexit
    10  set -o nounset
    11  set -o pipefail
    12  set -o errtrace
    13  
    14  script_path=$(dirname "$0")
    15  source "${script_path}/../../lib/common.bash"
    16  
    17  addr=
    18  tmp_data_dir="$(mktemp -d)"
    19  rootfs_tar="${tmp_data_dir}/rootfs.tar"
    20  trap cleanup EXIT
    21  
    22  # kata-runtime options
    23  SANDBOX_CGROUP_ONLY=""
    24  HYPERVISOR=
    25  MACHINE_TYPE=
    26  IMAGE_TYPE=
    27  
    28  cleanup() {
    29  	clean_env_ctr
    30  	sudo rm -rf "${tmp_data_dir}"
    31  
    32  	[ -n "${host_pci}" ] && sudo driverctl unset-override "${host_pci}"
    33  }
    34  
    35  host_pci_addr() {
    36  	lspci -D | grep "Ethernet controller" | grep "Virtio.*network device" | tail -1 | cut -d' ' -f1
    37  }
    38  
    39  get_vfio_path() {
    40  	local addr="$1"
    41  	echo "/dev/vfio/$(basename $(realpath /sys/bus/pci/drivers/vfio-pci/${host_pci}/iommu_group))"
    42  }
    43  
    44  pull_rootfs() {
    45  	# pull and export busybox image in tar file
    46  	local image="quay.io/prometheus/busybox:latest"
    47  	sudo -E ctr i pull ${image}
    48  	sudo -E ctr i export "${rootfs_tar}" "${image}"
    49  	sudo chown ${USER}:${USER} "${rootfs_tar}"
    50  	sync
    51  }
    52  
    53  create_bundle() {
    54  	local bundle_dir="$1"
    55  	mkdir -p "${bundle_dir}"
    56  
    57  	# extract busybox rootfs
    58  	local rootfs_dir="${bundle_dir}/rootfs"
    59  	mkdir -p "${rootfs_dir}"
    60  	local layers_dir="$(mktemp -d)"
    61  	tar -C "${layers_dir}" -pxf "${rootfs_tar}"
    62  	for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do
    63  		tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]")
    64  	done
    65  	sync
    66  
    67  	# Copy config.json
    68  	cp -a "${script_path}/config.json" "${bundle_dir}/config.json"
    69  }
    70  
    71  run_container() {
    72  	local container_id="$1"
    73  	local bundle_dir="$2"
    74  
    75  	sudo -E ctr run -d --runtime io.containerd.kata.v2 --config "${bundle_dir}/config.json" "${container_id}"
    76  }
    77  
    78  
    79  get_ctr_cmd_output() {
    80  	local container_id="$1"
    81  	shift
    82  	sudo -E ctr t exec --exec-id 2 "${container_id}" "${@}"
    83  }
    84  
    85  check_guest_kernel() {
    86  	local container_id="$1"
    87  	# For vfio_mode=guest-kernel, the device should be bound to
    88  	# the guest kernel's native driver.  To check this has worked,
    89  	# we look for an ethernet device named 'eth*'
    90  	get_ctr_cmd_output "${container_id}" ip a | grep "eth" || die "Missing VFIO network interface"
    91  }
    92  
    93  check_vfio() {
    94  	local cid="$1"
    95  	# For vfio_mode=vfio, the device should be bound to the guest
    96  	# vfio-pci driver.
    97  
    98  	# Check the control device is visible
    99  	get_ctr_cmd_output "${cid}" ls /dev/vfio/vfio || die "Couldn't find VFIO control device in container"
   100  
   101  	# The device should *not* cause an ethernet interface to appear
   102  	! get_ctr_cmd_output "${cid}" ip a | grep "eth" || die "Unexpected network interface"
   103  
   104  	# There should be exactly one VFIO group device (there might
   105  	# be multiple IOMMU groups in the VM, but only one device
   106  	# should be bound to the VFIO driver, so there should still
   107  	# only be one VFIO device
   108  	group="$(get_ctr_cmd_output "${cid}" ls /dev/vfio | grep -v vfio)"
   109  	if [ $(echo "${group}" | wc -w) != "1" ] ; then
   110  	    die "Expected exactly one VFIO group got: ${group}"
   111  	fi
   112  
   113  	# There should be two devices in the IOMMU group: the ethernet
   114  	# device we care about, plus the PCIe to PCI bridge device
   115  	devs="$(get_ctr_cmd_output "${cid}" ls /sys/kernel/iommu_groups/"${group}"/devices)"
   116  	if [ $(echo "${devs}" | wc -w) != "2" ] ; then
   117  	    die "Expected exactly two devices got: ${devs}"
   118  	fi
   119  
   120  	# The bridge device will always sort first, because it is on
   121  	# bus zero, whereas the NIC will be on a non-zero bus
   122  	guest_pci=$(echo "${devs}" | tail -1)
   123  
   124  	# This is a roundabout way of getting the environment
   125  	# variable, but to use the more obvious "echo $PCIDEVICE_..."
   126  	# we would have to escape the '$' enough to not be expanded
   127  	# before it's injected into the container, but not so much
   128  	# that it *is* expanded by the shell within the container.
   129  	# Doing that with another shell function in between is very
   130  	# fragile, so do it this way instead.
   131  	guest_env="$(get_ctr_cmd_output "${cid}" env | grep ^PCIDEVICE_VIRTIO_NET | sed s/^[^=]*=//)"
   132  	if [ "${guest_env}" != "${guest_pci}" ]; then
   133  	    die "PCIDEVICE variable was \"${guest_env}\" instead of \"${guest_pci}\""
   134  	fi
   135  }
   136  
   137  get_dmesg() {
   138  	local container_id="$1"
   139  	get_ctr_cmd_output "${container_id}" dmesg
   140  }
   141  
   142  # Show help about this script
   143  help(){
   144  cat << EOF
   145  Usage: $0 [-h] [options]
   146      Description:
   147          This script runs a kata container and passthrough a vfio device
   148      Options:
   149          -h,          Help
   150          -i <string>, Specify initrd or image
   151          -m <string>, Specify kata-runtime machine type for qemu hypervisor
   152          -p <string>, Specify kata-runtime hypervisor
   153          -s <value>,  Set sandbox_cgroup_only in the configuration file
   154  EOF
   155  }
   156  
   157  setup_configuration_file() {
   158  	local qemu_config_file="configuration-qemu.toml"
   159  	local clh_config_file="configuration-clh.toml"
   160  	local image_file="/opt/kata/share/kata-containers/kata-containers.img"
   161  	local initrd_file="/opt/kata/share/kata-containers/kata-containers-initrd.img"
   162  	local kata_config_file=""
   163  
   164  	for file in $(kata-runtime --kata-show-default-config-paths); do
   165  		if [ ! -f "${file}" ]; then
   166  			continue
   167  		fi
   168  
   169  		kata_config_file="${file}"
   170  		config_dir=$(dirname ${file})
   171  		config_filename=""
   172  
   173  		if [ "$HYPERVISOR" = "qemu" ]; then
   174  			config_filename="${qemu_config_file}"
   175  		elif [ "$HYPERVISOR" = "clh" ]; then
   176  			config_filename="${clh_config_file}"
   177  		fi
   178  
   179  		config_file="${config_dir}/${config_filename}"
   180  		if [ -f "${config_file}" ]; then
   181  			rm -f "${kata_config_file}"
   182  			cp -a $(realpath "${config_file}") "${kata_config_file}"
   183  			break
   184  		fi
   185  	done
   186  
   187  	# machine type applies to configuration.toml and configuration-qemu.toml
   188  	if [ -n "$MACHINE_TYPE" ]; then
   189  		if [ "$HYPERVISOR" = "qemu" ]; then
   190  			sed -i 's|^machine_type.*|machine_type = "'${MACHINE_TYPE}'"|g' "${kata_config_file}"
   191  			# Make sure we have set hot_plug_vfio to a reasonable value
   192  			sudo sed -i -e 's|^#hot_plug_vfio =.*$|hot_plug_vfio = "bridge-port"|' -e 's|^hot_plug_vfio = .*$|hot_plug_vfio = "bridge-port"|' "${kata_config_file}"
   193  		else
   194  			warn "Variable machine_type only applies to qemu. It will be ignored"
   195  		fi
   196  	fi
   197  
   198  	if [ -n "${SANDBOX_CGROUP_ONLY}" ]; then
   199  	   sed -i 's|^sandbox_cgroup_only.*|sandbox_cgroup_only='${SANDBOX_CGROUP_ONLY}'|g' "${kata_config_file}"
   200  	fi
   201  
   202  	# Change to initrd or image depending on user input.
   203  	# Non-default configs must be changed to specify either initrd or image, image is default.
   204  	if [ "$IMAGE_TYPE" = "initrd" ]; then
   205  		if $(grep -q "^image.*" ${kata_config_file}); then
   206  			if $(grep -q "^initrd.*" ${kata_config_file}); then
   207  				sed -i '/^image.*/d' "${kata_config_file}"
   208  			else
   209  				sed -i 's|^image.*|initrd = "'${initrd_file}'"|g' "${kata_config_file}"
   210  			fi
   211  		fi
   212  	else
   213  		if $(grep -q "^initrd.*" ${kata_config_file}); then
   214  			if $(grep -q "^image.*" ${kata_config_file}); then
   215  				sed -i '/^initrd.*/d' "${kata_config_file}"
   216  			else
   217  				sed -i 's|^initrd.*|image = "'${image_file}'"|g' "${kata_config_file}"
   218  			fi
   219  		fi
   220  	fi
   221  
   222  	# enable debug
   223  	sed -i -e 's/^#\(enable_debug\).*=.*$/\1 = true/g' \
   224  	       -e 's/^#\(debug_console_enabled\).*=.*$/\1 = true/g' \
   225  	       -e 's/^kernel_params = "\(.*\)"/kernel_params = "\1 agent.log=debug"/g' \
   226  	       "${kata_config_file}"
   227  
   228  	# enable VFIO relevant hypervisor annotations
   229  	sed -i -e 's/^\(enable_annotations\).*=.*$/\1 = ["enable_iommu"]/' \
   230  		"${kata_config_file}"
   231  }
   232  
   233  run_test_container() {
   234  	local container_id="$1"
   235  	local bundle_dir="$2"
   236  	local config_json_in="$3"
   237  	local host_pci="$4"
   238  
   239  	# generate final config.json
   240  	sed -e '/^#.*/d' \
   241  	    -e 's|@VFIO_PATH@|'"${vfio_device}"'|g' \
   242  	    -e 's|@VFIO_MAJOR@|'"${vfio_major}"'|g' \
   243  	    -e 's|@VFIO_MINOR@|'"${vfio_minor}"'|g' \
   244  	    -e 's|@VFIO_CTL_MAJOR@|'"${vfio_ctl_major}"'|g' \
   245  	    -e 's|@VFIO_CTL_MINOR@|'"${vfio_ctl_minor}"'|g' \
   246  	    -e 's|@ROOTFS@|'"${bundle_dir}/rootfs"'|g' \
   247  	    -e 's|@HOST_PCI@|'"${host_pci}"'|g' \
   248  	    "${config_json_in}" > "${script_path}/config.json"
   249  
   250  	create_bundle "${bundle_dir}"
   251  
   252  	# run container
   253  	run_container "${container_id}" "${bundle_dir}"
   254  
   255  	# output VM dmesg
   256  	get_dmesg "${container_id}"
   257  }
   258  
   259  main() {
   260  	local OPTIND
   261  	while getopts "hi:m:p:s:" opt;do
   262  		case ${opt} in
   263  		h)
   264  		    help
   265  		    exit 0;
   266  		    ;;
   267  		i)
   268  		    IMAGE_TYPE="${OPTARG}"
   269  		    ;;
   270  		m)
   271  		    MACHINE_TYPE="${OPTARG}"
   272  		    ;;
   273  		p)
   274  		    HYPERVISOR="${OPTARG}"
   275  		    ;;
   276  		s)
   277  		    SANDBOX_CGROUP_ONLY="${OPTARG}"
   278  		    ;;
   279  		?)
   280  		    # parse failure
   281  		    help
   282  		    die "Failed to parse arguments"
   283  		    ;;
   284  		esac
   285  	done
   286  	shift $((OPTIND-1))
   287  
   288  	#
   289  	# Get the device ready on the host
   290  	#
   291  	setup_configuration_file
   292  
   293  	restart_containerd_service
   294  	sudo modprobe vfio
   295  	sudo modprobe vfio-pci
   296  
   297  	host_pci=$(host_pci_addr)
   298  	[ -n "${host_pci}" ] || die "virtio ethernet controller PCI address not found"
   299  
   300  	cat /proc/cmdline | grep -q "intel_iommu=on" || \
   301  		die "intel_iommu=on not found in kernel cmdline"
   302  
   303  	sudo driverctl set-override "${host_pci}" vfio-pci
   304  
   305  	vfio_device="$(get_vfio_path "${host_pci}")"
   306  	[ -n "${vfio_device}" ] || die "vfio device not found"
   307  	vfio_major="$(printf '%d' $(stat -c '0x%t' ${vfio_device}))"
   308  	vfio_minor="$(printf '%d' $(stat -c '0x%T' ${vfio_device}))"
   309  
   310  	[ -n "/dev/vfio/vfio" ] || die "vfio control device not found"
   311  	vfio_ctl_major="$(printf '%d' $(stat -c '0x%t' /dev/vfio/vfio))"
   312  	vfio_ctl_minor="$(printf '%d' $(stat -c '0x%T' /dev/vfio/vfio))"
   313  
   314  	# Get the rootfs we'll use for all tests
   315  	pull_rootfs
   316  
   317  	#
   318  	# Run the tests
   319  	#
   320  
   321  	# test for guest-kernel mode
   322  	guest_kernel_cid="vfio-guest-kernel-${RANDOM}"
   323  	run_test_container "${guest_kernel_cid}" \
   324  			   "${tmp_data_dir}/vfio-guest-kernel" \
   325  			   "${script_path}/guest-kernel.json.in" \
   326  			   "${host_pci}"
   327  	check_guest_kernel "${guest_kernel_cid}"
   328  
   329  	# Remove the container so we can re-use the device for the next test
   330  	clean_env_ctr
   331  
   332  	# test for vfio mode
   333  	vfio_cid="vfio-vfio-${RANDOM}"
   334  	run_test_container "${vfio_cid}" \
   335  			   "${tmp_data_dir}/vfio-vfio" \
   336  			   "${script_path}/vfio.json.in" \
   337  			   "${host_pci}"
   338  	check_vfio "${vfio_cid}"
   339  }
   340  
   341  main $@