github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/tests/integration/checkpoint.bats (about)

     1  #!/usr/bin/env bats
     2  
     3  load helpers
     4  
     5  function setup() {
     6  	# XXX: currently criu require root containers.
     7  	requires criu root
     8  
     9  	setup_busybox
    10  }
    11  
    12  function teardown() {
    13  	teardown_bundle
    14  }
    15  
    16  function setup_pipes() {
    17  	# The changes to 'terminal' are needed for running in detached mode
    18  	# shellcheck disable=SC2016
    19  	update_config ' (.. | select(.terminal? != null)) .terminal |= false
    20  			| (.. | select(.[]? == "sh")) += ["-c", "for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"]'
    21  
    22  	# Create three sets of pipes for __runc run.
    23  	# for stderr
    24  	exec {pipe}<> <(:)
    25  	exec {err_r}</proc/self/fd/$pipe
    26  	exec {err_w}>/proc/self/fd/$pipe
    27  	exec {pipe}>&-
    28  	# for stdout
    29  	exec {pipe}<> <(:)
    30  	exec {out_r}</proc/self/fd/$pipe
    31  	exec {out_w}>/proc/self/fd/$pipe
    32  	exec {pipe}>&-
    33  	# for stdin
    34  	exec {pipe}<> <(:)
    35  	exec {in_r}</proc/self/fd/$pipe
    36  	exec {in_w}>/proc/self/fd/$pipe
    37  	exec {pipe}>&-
    38  }
    39  
    40  function check_pipes() {
    41  	local output stderr
    42  
    43  	echo Ping >&${in_w}
    44  	exec {in_w}>&-
    45  	exec {out_w}>&-
    46  	exec {err_w}>&-
    47  
    48  	exec {in_r}>&-
    49  	output=$(cat <&${out_r})
    50  	exec {out_r}>&-
    51  	stderr=$(cat <&${err_r})
    52  	exec {err_r}>&-
    53  
    54  	[[ "${output}" == *"ponG Ping"* ]]
    55  	if [ -n "$stderr" ]; then
    56  		fail "runc stderr: $stderr"
    57  	fi
    58  }
    59  
    60  # Usage: runc_run_with_pipes container-name
    61  function runc_run_with_pipes() {
    62  	# Start a container to be checkpointed, with stdin/stdout redirected
    63  	# so that check_pipes can be used to check it's working fine.
    64  	# We have to redirect stderr as well because otherwise it is
    65  	# redirected to a bats log file, which is not accessible to CRIU
    66  	# (i.e. outside of container) so checkpointing will fail.
    67  	ret=0
    68  	__runc run -d "$1" <&${in_r} >&${out_w} 2>&${err_w} || ret=$?
    69  	if [ "$ret" -ne 0 ]; then
    70  		echo "runc run -d $1 (status: $ret):"
    71  		exec {err_w}>&-
    72  		cat <&${err_r}
    73  		fail "runc run failed"
    74  	fi
    75  
    76  	testcontainer "$1" running
    77  }
    78  
    79  # Usage: runc_restore_with_pipes work-dir container-name [optional-arguments ...]
    80  function runc_restore_with_pipes() {
    81  	workdir="$1"
    82  	shift
    83  	name="$1"
    84  	shift
    85  
    86  	ret=0
    87  	__runc restore -d --work-path "$workdir" --image-path ./image-dir "$@" "$name" <&${in_r} >&${out_w} 2>&${err_w} || ret=$?
    88  	if [ "$ret" -ne 0 ]; then
    89  		echo "__runc restore $name failed (status: $ret)"
    90  		exec {err_w}>&-
    91  		cat <&${err_r}
    92  		fail "runc restore failed"
    93  	fi
    94  
    95  	testcontainer "$name" running
    96  
    97  	runc exec --cwd /bin "$name" echo ok
    98  	[ "$status" -eq 0 ]
    99  	[ "$output" = "ok" ]
   100  }
   101  
   102  function simple_cr() {
   103  	runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
   104  	[ "$status" -eq 0 ]
   105  
   106  	testcontainer test_busybox running
   107  
   108  	for _ in $(seq 2); do
   109  		# checkpoint the running container
   110  		runc "$@" checkpoint --work-path ./work-dir test_busybox
   111  		[ "$status" -eq 0 ]
   112  
   113  		# after checkpoint busybox is no longer running
   114  		testcontainer test_busybox checkpointed
   115  
   116  		# restore from checkpoint
   117  		runc "$@" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
   118  		[ "$status" -eq 0 ]
   119  
   120  		# busybox should be back up and running
   121  		testcontainer test_busybox running
   122  	done
   123  }
   124  
   125  @test "checkpoint and restore" {
   126  	simple_cr
   127  }
   128  
   129  @test "checkpoint and restore (bind mount, destination is symlink)" {
   130  	mkdir -p rootfs/real/conf
   131  	ln -s /real/conf rootfs/conf
   132  	update_config '	  .mounts += [{
   133  					source: ".",
   134  					destination: "/conf",
   135  					options: ["bind"]
   136  				}]'
   137  	simple_cr
   138  }
   139  
   140  @test "checkpoint and restore (with --debug)" {
   141  	simple_cr --debug
   142  }
   143  
   144  @test "checkpoint and restore (cgroupns)" {
   145  	# cgroupv2 already enables cgroupns so this case was tested above already
   146  	requires cgroups_v1 cgroupns
   147  
   148  	# enable CGROUPNS
   149  	update_config '.linux.namespaces += [{"type": "cgroup"}]'
   150  
   151  	simple_cr
   152  }
   153  
   154  @test "checkpoint --pre-dump (bad --parent-path)" {
   155  	runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
   156  	[ "$status" -eq 0 ]
   157  
   158  	testcontainer test_busybox running
   159  
   160  	# runc should fail with absolute parent image path.
   161  	runc checkpoint --parent-path "$(pwd)"/parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox
   162  	[[ "${output}" == *"--parent-path"* ]]
   163  	[ "$status" -ne 0 ]
   164  
   165  	# runc should fail with invalid parent image path.
   166  	runc checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox
   167  	[[ "${output}" == *"--parent-path"* ]]
   168  	[ "$status" -ne 0 ]
   169  }
   170  
   171  @test "checkpoint --pre-dump and restore" {
   172  	# Requires kernel dirty memory tracking (missing on ARM, see
   173  	# https://github.com/checkpoint-restore/criu/issues/1729).
   174  	requires criu_feature_mem_dirty_track
   175  
   176  	setup_pipes
   177  	runc_run_with_pipes test_busybox
   178  
   179  	#test checkpoint pre-dump
   180  	mkdir parent-dir
   181  	runc checkpoint --pre-dump --image-path ./parent-dir test_busybox
   182  	[ "$status" -eq 0 ]
   183  
   184  	# busybox should still be running
   185  	testcontainer test_busybox running
   186  
   187  	# checkpoint the running container
   188  	mkdir image-dir
   189  	mkdir work-dir
   190  	runc checkpoint --parent-path ../parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox
   191  	[ "$status" -eq 0 ]
   192  
   193  	# check parent path is valid
   194  	[ -e ./image-dir/parent ]
   195  
   196  	# after checkpoint busybox is no longer running
   197  	testcontainer test_busybox checkpointed
   198  
   199  	runc_restore_with_pipes ./work-dir test_busybox
   200  	check_pipes
   201  }
   202  
   203  @test "checkpoint --lazy-pages and restore" {
   204  	# Requires lazy-pages support.
   205  	requires criu_feature_uffd-noncoop
   206  
   207  	setup_pipes
   208  	runc_run_with_pipes test_busybox
   209  
   210  	# checkpoint the running container
   211  	mkdir image-dir
   212  	mkdir work-dir
   213  
   214  	# For lazy migration we need to know when CRIU is ready to serve
   215  	# the memory pages via TCP.
   216  	exec {pipe}<> <(:)
   217  	# shellcheck disable=SC2094
   218  	exec {lazy_r}</proc/self/fd/$pipe {lazy_w}>/proc/self/fd/$pipe
   219  	exec {pipe}>&-
   220  
   221  	# TCP port for lazy migration
   222  	port=27277
   223  
   224  	__runc checkpoint \
   225  		--lazy-pages \
   226  		--page-server 0.0.0.0:${port} \
   227  		--status-fd ${lazy_w} \
   228  		--manage-cgroups-mode=ignore \
   229  		--work-path ./work-dir \
   230  		--image-path ./image-dir \
   231  		test_busybox &
   232  	cpt_pid=$!
   233  
   234  	# wait for lazy page server to be ready
   235  	out=$(timeout 2 dd if=/proc/self/fd/${lazy_r} bs=1 count=1 2>/dev/null | od)
   236  	exec {lazy_r}>&-
   237  	exec {lazy_w}>&-
   238  	# shellcheck disable=SC2116,SC2086
   239  	out=$(echo $out) # rm newlines
   240  	# expecting \0 which od prints as
   241  	[ "$out" = "0000000 000000 0000001" ]
   242  
   243  	# Check if inventory.img was written
   244  	[ -e image-dir/inventory.img ]
   245  
   246  	# Start CRIU in lazy-daemon mode
   247  	criu lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &
   248  	lp_pid=$!
   249  
   250  	# Restore lazily from checkpoint.
   251  	#
   252  	# The restored container needs a different name and a different cgroup
   253  	# (and a different systemd unit name, in case systemd cgroup driver is
   254  	# used) as the checkpointed container is not yet destroyed. It is only
   255  	# destroyed at that point in time when the last page is lazily
   256  	# transferred to the destination.
   257  	#
   258  	# Killing the CRIU on the checkpoint side will let the container
   259  	# continue to run if the migration failed at some point.
   260  	runc_restore_with_pipes ./image-dir test_busybox_restore \
   261  		--lazy-pages \
   262  		--manage-cgroups-mode=ignore
   263  
   264  	wait $cpt_pid
   265  
   266  	wait $lp_pid
   267  
   268  	check_pipes
   269  }
   270  
   271  @test "checkpoint and restore in external network namespace" {
   272  	# Requires external network namespaces (criu >= 3.10).
   273  	requires criu_feature_external_net_ns
   274  
   275  	# create a temporary name for the test network namespace
   276  	tmp=$(mktemp)
   277  	rm -f "$tmp"
   278  	ns_name=$(basename "$tmp")
   279  	# create network namespace
   280  	ip netns add "$ns_name"
   281  	ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/')
   282  	# shellcheck disable=SC2012
   283  	ns_inode=$(ls -iL "$ns_path" | awk '{ print $1 }')
   284  
   285  	# tell runc which network namespace to use
   286  	update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"'
   287  
   288  	runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
   289  	[ "$status" -eq 0 ]
   290  
   291  	testcontainer test_busybox running
   292  
   293  	for _ in $(seq 2); do
   294  		# checkpoint the running container; this automatically tells CRIU to
   295  		# handle the network namespace defined in config.json as an external
   296  		runc checkpoint --work-path ./work-dir test_busybox
   297  		[ "$status" -eq 0 ]
   298  
   299  		# after checkpoint busybox is no longer running
   300  		testcontainer test_busybox checkpointed
   301  
   302  		# restore from checkpoint; this should restore the container into the existing network namespace
   303  		runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
   304  		[ "$status" -eq 0 ]
   305  
   306  		# busybox should be back up and running
   307  		testcontainer test_busybox running
   308  
   309  		# container should be running in same network namespace as before
   310  		pid=$(__runc state test_busybox | jq '.pid')
   311  		ns_inode_new=$(readlink /proc/"$pid"/ns/net | sed -e 's/.*\[\(.*\)\]/\1/')
   312  		echo "old network namespace inode $ns_inode"
   313  		echo "new network namespace inode $ns_inode_new"
   314  		[ "$ns_inode" -eq "$ns_inode_new" ]
   315  	done
   316  	ip netns del "$ns_name"
   317  }
   318  
   319  @test "checkpoint and restore with container specific CRIU config" {
   320  	tmp=$(mktemp /tmp/runc-criu-XXXXXX.conf)
   321  	# This is the file we write to /etc/criu/default.conf
   322  	tmplog1=$(mktemp /tmp/runc-criu-log-XXXXXX.log)
   323  	unlink "$tmplog1"
   324  	tmplog1=$(basename "$tmplog1")
   325  	# That is the actual configuration file to be used
   326  	tmplog2=$(mktemp /tmp/runc-criu-log-XXXXXX.log)
   327  	unlink "$tmplog2"
   328  	tmplog2=$(basename "$tmplog2")
   329  	# This adds the annotation 'org.criu.config' to set a container
   330  	# specific CRIU config file.
   331  	update_config '.annotations += {"org.criu.config": "'"$tmp"'"}'
   332  
   333  	# Tell CRIU to use another configuration file
   334  	mkdir -p /etc/criu
   335  	echo "log-file=$tmplog1" >/etc/criu/default.conf
   336  	# Make sure the RPC defined configuration file overwrites the previous
   337  	echo "log-file=$tmplog2" >"$tmp"
   338  
   339  	runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
   340  	[ "$status" -eq 0 ]
   341  
   342  	testcontainer test_busybox running
   343  
   344  	# checkpoint the running container
   345  	runc checkpoint --work-path ./work-dir test_busybox
   346  	[ "$status" -eq 0 ]
   347  	run ! test -f ./work-dir/"$tmplog1"
   348  	test -f ./work-dir/"$tmplog2"
   349  
   350  	# after checkpoint busybox is no longer running
   351  	testcontainer test_busybox checkpointed
   352  
   353  	test -f ./work-dir/"$tmplog2" && unlink ./work-dir/"$tmplog2"
   354  	# restore from checkpoint
   355  	runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
   356  	[ "$status" -eq 0 ]
   357  	run ! test -f ./work-dir/"$tmplog1"
   358  	test -f ./work-dir/"$tmplog2"
   359  
   360  	# busybox should be back up and running
   361  	testcontainer test_busybox running
   362  	unlink "$tmp"
   363  	test -f ./work-dir/"$tmplog2" && unlink ./work-dir/"$tmplog2"
   364  }
   365  
   366  @test "checkpoint and restore with nested bind mounts" {
   367  	bind1=$(mktemp -d -p .)
   368  	bind2=$(mktemp -d -p .)
   369  	update_config '	  .mounts += [{
   370  					type: "bind",
   371  					source: "'"$bind1"'",
   372  					destination: "/test",
   373  					options: ["rw", "bind"]
   374  				},
   375  	                        {
   376  					type: "bind",
   377  					source: "'"$bind2"'",
   378  					destination: "/test/for/nested",
   379  					options: ["rw", "bind"]
   380  				}]'
   381  
   382  	runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
   383  	[ "$status" -eq 0 ]
   384  
   385  	testcontainer test_busybox running
   386  
   387  	# checkpoint the running container
   388  	runc checkpoint --work-path ./work-dir test_busybox
   389  	[ "$status" -eq 0 ]
   390  
   391  	# after checkpoint busybox is no longer running
   392  	testcontainer test_busybox checkpointed
   393  
   394  	# cleanup mountpoints created by runc during creation
   395  	# the mountpoints should be recreated during restore - that is the actual thing tested here
   396  	rm -rf "${bind1:?}"/*
   397  
   398  	# restore from checkpoint
   399  	runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
   400  	[ "$status" -eq 0 ]
   401  
   402  	# busybox should be back up and running
   403  	testcontainer test_busybox running
   404  }
   405  
   406  @test "checkpoint then restore into a different cgroup (via --manage-cgroups-mode ignore)" {
   407  	set_resources_limit
   408  	set_cgroups_path
   409  	runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
   410  	[ "$status" -eq 0 ]
   411  	testcontainer test_busybox running
   412  
   413  	local orig_path
   414  	orig_path=$(get_cgroup_path "pids")
   415  	# Check that the cgroup exists.
   416  	test -d "$orig_path"
   417  
   418  	runc checkpoint --work-path ./work-dir --manage-cgroups-mode ignore test_busybox
   419  	[ "$status" -eq 0 ]
   420  	testcontainer test_busybox checkpointed
   421  	# Check that the cgroup is gone.
   422  	run ! test -d "$orig_path"
   423  
   424  	# Restore into a different cgroup.
   425  	set_cgroups_path # Changes the path.
   426  	runc restore -d --manage-cgroups-mode ignore --pid-file pid \
   427  		--work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
   428  	[ "$status" -eq 0 ]
   429  	testcontainer test_busybox running
   430  
   431  	# Check that the old cgroup path doesn't exist.
   432  	run ! test -d "$orig_path"
   433  
   434  	# Check that the new path exists.
   435  	local new_path
   436  	new_path=$(get_cgroup_path "pids")
   437  	test -d "$new_path"
   438  
   439  	# Check that container's init is in the new cgroup.
   440  	local pid
   441  	pid=$(cat "pid")
   442  	grep -q "${REL_CGROUPS_PATH}$" "/proc/$pid/cgroup"
   443  }