github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/tests/integration/checkpoint.bats (about) 1 #!/usr/bin/env bats 2 3 load helpers 4 5 function setup() { 6 # XXX: currently criu require root containers. 7 requires criu root 8 9 setup_busybox 10 } 11 12 function teardown() { 13 teardown_bundle 14 } 15 16 function setup_pipes() { 17 # The changes to 'terminal' are needed for running in detached mode 18 # shellcheck disable=SC2016 19 update_config ' (.. | select(.terminal? != null)) .terminal |= false 20 | (.. | select(.[]? == "sh")) += ["-c", "for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"]' 21 22 # Create three sets of pipes for __runc run. 23 # for stderr 24 exec {pipe}<> <(:) 25 exec {err_r}</proc/self/fd/$pipe 26 exec {err_w}>/proc/self/fd/$pipe 27 exec {pipe}>&- 28 # for stdout 29 exec {pipe}<> <(:) 30 exec {out_r}</proc/self/fd/$pipe 31 exec {out_w}>/proc/self/fd/$pipe 32 exec {pipe}>&- 33 # for stdin 34 exec {pipe}<> <(:) 35 exec {in_r}</proc/self/fd/$pipe 36 exec {in_w}>/proc/self/fd/$pipe 37 exec {pipe}>&- 38 } 39 40 function check_pipes() { 41 local output stderr 42 43 echo Ping >&${in_w} 44 exec {in_w}>&- 45 exec {out_w}>&- 46 exec {err_w}>&- 47 48 exec {in_r}>&- 49 output=$(cat <&${out_r}) 50 exec {out_r}>&- 51 stderr=$(cat <&${err_r}) 52 exec {err_r}>&- 53 54 [[ "${output}" == *"ponG Ping"* ]] 55 if [ -n "$stderr" ]; then 56 fail "runc stderr: $stderr" 57 fi 58 } 59 60 # Usage: runc_run_with_pipes container-name 61 function runc_run_with_pipes() { 62 # Start a container to be checkpointed, with stdin/stdout redirected 63 # so that check_pipes can be used to check it's working fine. 64 # We have to redirect stderr as well because otherwise it is 65 # redirected to a bats log file, which is not accessible to CRIU 66 # (i.e. outside of container) so checkpointing will fail. 67 ret=0 68 __runc run -d "$1" <&${in_r} >&${out_w} 2>&${err_w} || ret=$? 69 if [ "$ret" -ne 0 ]; then 70 echo "runc run -d $1 (status: $ret):" 71 exec {err_w}>&- 72 cat <&${err_r} 73 fail "runc run failed" 74 fi 75 76 testcontainer "$1" running 77 } 78 79 # Usage: runc_restore_with_pipes work-dir container-name [optional-arguments ...] 80 function runc_restore_with_pipes() { 81 workdir="$1" 82 shift 83 name="$1" 84 shift 85 86 ret=0 87 __runc restore -d --work-path "$workdir" --image-path ./image-dir "$@" "$name" <&${in_r} >&${out_w} 2>&${err_w} || ret=$? 88 if [ "$ret" -ne 0 ]; then 89 echo "__runc restore $name failed (status: $ret)" 90 exec {err_w}>&- 91 cat <&${err_r} 92 fail "runc restore failed" 93 fi 94 95 testcontainer "$name" running 96 97 runc exec --cwd /bin "$name" echo ok 98 [ "$status" -eq 0 ] 99 [ "$output" = "ok" ] 100 } 101 102 function simple_cr() { 103 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox 104 [ "$status" -eq 0 ] 105 106 testcontainer test_busybox running 107 108 for _ in $(seq 2); do 109 # checkpoint the running container 110 runc "$@" checkpoint --work-path ./work-dir test_busybox 111 [ "$status" -eq 0 ] 112 113 # after checkpoint busybox is no longer running 114 testcontainer test_busybox checkpointed 115 116 # restore from checkpoint 117 runc "$@" restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox 118 [ "$status" -eq 0 ] 119 120 # busybox should be back up and running 121 testcontainer test_busybox running 122 done 123 } 124 125 @test "checkpoint and restore" { 126 simple_cr 127 } 128 129 @test "checkpoint and restore (bind mount, destination is symlink)" { 130 mkdir -p rootfs/real/conf 131 ln -s /real/conf rootfs/conf 132 update_config ' .mounts += [{ 133 source: ".", 134 destination: "/conf", 135 options: ["bind"] 136 }]' 137 simple_cr 138 } 139 140 @test "checkpoint and restore (with --debug)" { 141 simple_cr --debug 142 } 143 144 @test "checkpoint and restore (cgroupns)" { 145 # cgroupv2 already enables cgroupns so this case was tested above already 146 requires cgroups_v1 cgroupns 147 148 # enable CGROUPNS 149 update_config '.linux.namespaces += [{"type": "cgroup"}]' 150 151 simple_cr 152 } 153 154 @test "checkpoint --pre-dump (bad --parent-path)" { 155 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox 156 [ "$status" -eq 0 ] 157 158 testcontainer test_busybox running 159 160 # runc should fail with absolute parent image path. 161 runc checkpoint --parent-path "$(pwd)"/parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox 162 [[ "${output}" == *"--parent-path"* ]] 163 [ "$status" -ne 0 ] 164 165 # runc should fail with invalid parent image path. 166 runc checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox 167 [[ "${output}" == *"--parent-path"* ]] 168 [ "$status" -ne 0 ] 169 } 170 171 @test "checkpoint --pre-dump and restore" { 172 # Requires kernel dirty memory tracking (missing on ARM, see 173 # https://github.com/checkpoint-restore/criu/issues/1729). 174 requires criu_feature_mem_dirty_track 175 176 setup_pipes 177 runc_run_with_pipes test_busybox 178 179 #test checkpoint pre-dump 180 mkdir parent-dir 181 runc checkpoint --pre-dump --image-path ./parent-dir test_busybox 182 [ "$status" -eq 0 ] 183 184 # busybox should still be running 185 testcontainer test_busybox running 186 187 # checkpoint the running container 188 mkdir image-dir 189 mkdir work-dir 190 runc checkpoint --parent-path ../parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox 191 [ "$status" -eq 0 ] 192 193 # check parent path is valid 194 [ -e ./image-dir/parent ] 195 196 # after checkpoint busybox is no longer running 197 testcontainer test_busybox checkpointed 198 199 runc_restore_with_pipes ./work-dir test_busybox 200 check_pipes 201 } 202 203 @test "checkpoint --lazy-pages and restore" { 204 # Requires lazy-pages support. 205 requires criu_feature_uffd-noncoop 206 207 setup_pipes 208 runc_run_with_pipes test_busybox 209 210 # checkpoint the running container 211 mkdir image-dir 212 mkdir work-dir 213 214 # For lazy migration we need to know when CRIU is ready to serve 215 # the memory pages via TCP. 216 exec {pipe}<> <(:) 217 # shellcheck disable=SC2094 218 exec {lazy_r}</proc/self/fd/$pipe {lazy_w}>/proc/self/fd/$pipe 219 exec {pipe}>&- 220 221 # TCP port for lazy migration 222 port=27277 223 224 __runc checkpoint \ 225 --lazy-pages \ 226 --page-server 0.0.0.0:${port} \ 227 --status-fd ${lazy_w} \ 228 --manage-cgroups-mode=ignore \ 229 --work-path ./work-dir \ 230 --image-path ./image-dir \ 231 test_busybox & 232 cpt_pid=$! 233 234 # wait for lazy page server to be ready 235 out=$(timeout 2 dd if=/proc/self/fd/${lazy_r} bs=1 count=1 2>/dev/null | od) 236 exec {lazy_r}>&- 237 exec {lazy_w}>&- 238 # shellcheck disable=SC2116,SC2086 239 out=$(echo $out) # rm newlines 240 # expecting \0 which od prints as 241 [ "$out" = "0000000 000000 0000001" ] 242 243 # Check if inventory.img was written 244 [ -e image-dir/inventory.img ] 245 246 # Start CRIU in lazy-daemon mode 247 criu lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir & 248 lp_pid=$! 249 250 # Restore lazily from checkpoint. 251 # 252 # The restored container needs a different name and a different cgroup 253 # (and a different systemd unit name, in case systemd cgroup driver is 254 # used) as the checkpointed container is not yet destroyed. It is only 255 # destroyed at that point in time when the last page is lazily 256 # transferred to the destination. 257 # 258 # Killing the CRIU on the checkpoint side will let the container 259 # continue to run if the migration failed at some point. 260 runc_restore_with_pipes ./image-dir test_busybox_restore \ 261 --lazy-pages \ 262 --manage-cgroups-mode=ignore 263 264 wait $cpt_pid 265 266 wait $lp_pid 267 268 check_pipes 269 } 270 271 @test "checkpoint and restore in external network namespace" { 272 # Requires external network namespaces (criu >= 3.10). 273 requires criu_feature_external_net_ns 274 275 # create a temporary name for the test network namespace 276 tmp=$(mktemp) 277 rm -f "$tmp" 278 ns_name=$(basename "$tmp") 279 # create network namespace 280 ip netns add "$ns_name" 281 ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') 282 # shellcheck disable=SC2012 283 ns_inode=$(ls -iL "$ns_path" | awk '{ print $1 }') 284 285 # tell runc which network namespace to use 286 update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' 287 288 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox 289 [ "$status" -eq 0 ] 290 291 testcontainer test_busybox running 292 293 for _ in $(seq 2); do 294 # checkpoint the running container; this automatically tells CRIU to 295 # handle the network namespace defined in config.json as an external 296 runc checkpoint --work-path ./work-dir test_busybox 297 [ "$status" -eq 0 ] 298 299 # after checkpoint busybox is no longer running 300 testcontainer test_busybox checkpointed 301 302 # restore from checkpoint; this should restore the container into the existing network namespace 303 runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox 304 [ "$status" -eq 0 ] 305 306 # busybox should be back up and running 307 testcontainer test_busybox running 308 309 # container should be running in same network namespace as before 310 pid=$(__runc state test_busybox | jq '.pid') 311 ns_inode_new=$(readlink /proc/"$pid"/ns/net | sed -e 's/.*\[\(.*\)\]/\1/') 312 echo "old network namespace inode $ns_inode" 313 echo "new network namespace inode $ns_inode_new" 314 [ "$ns_inode" -eq "$ns_inode_new" ] 315 done 316 ip netns del "$ns_name" 317 } 318 319 @test "checkpoint and restore with container specific CRIU config" { 320 tmp=$(mktemp /tmp/runc-criu-XXXXXX.conf) 321 # This is the file we write to /etc/criu/default.conf 322 tmplog1=$(mktemp /tmp/runc-criu-log-XXXXXX.log) 323 unlink "$tmplog1" 324 tmplog1=$(basename "$tmplog1") 325 # That is the actual configuration file to be used 326 tmplog2=$(mktemp /tmp/runc-criu-log-XXXXXX.log) 327 unlink "$tmplog2" 328 tmplog2=$(basename "$tmplog2") 329 # This adds the annotation 'org.criu.config' to set a container 330 # specific CRIU config file. 331 update_config '.annotations += {"org.criu.config": "'"$tmp"'"}' 332 333 # Tell CRIU to use another configuration file 334 mkdir -p /etc/criu 335 echo "log-file=$tmplog1" >/etc/criu/default.conf 336 # Make sure the RPC defined configuration file overwrites the previous 337 echo "log-file=$tmplog2" >"$tmp" 338 339 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox 340 [ "$status" -eq 0 ] 341 342 testcontainer test_busybox running 343 344 # checkpoint the running container 345 runc checkpoint --work-path ./work-dir test_busybox 346 [ "$status" -eq 0 ] 347 run ! test -f ./work-dir/"$tmplog1" 348 test -f ./work-dir/"$tmplog2" 349 350 # after checkpoint busybox is no longer running 351 testcontainer test_busybox checkpointed 352 353 test -f ./work-dir/"$tmplog2" && unlink ./work-dir/"$tmplog2" 354 # restore from checkpoint 355 runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox 356 [ "$status" -eq 0 ] 357 run ! test -f ./work-dir/"$tmplog1" 358 test -f ./work-dir/"$tmplog2" 359 360 # busybox should be back up and running 361 testcontainer test_busybox running 362 unlink "$tmp" 363 test -f ./work-dir/"$tmplog2" && unlink ./work-dir/"$tmplog2" 364 } 365 366 @test "checkpoint and restore with nested bind mounts" { 367 bind1=$(mktemp -d -p .) 368 bind2=$(mktemp -d -p .) 369 update_config ' .mounts += [{ 370 type: "bind", 371 source: "'"$bind1"'", 372 destination: "/test", 373 options: ["rw", "bind"] 374 }, 375 { 376 type: "bind", 377 source: "'"$bind2"'", 378 destination: "/test/for/nested", 379 options: ["rw", "bind"] 380 }]' 381 382 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox 383 [ "$status" -eq 0 ] 384 385 testcontainer test_busybox running 386 387 # checkpoint the running container 388 runc checkpoint --work-path ./work-dir test_busybox 389 [ "$status" -eq 0 ] 390 391 # after checkpoint busybox is no longer running 392 testcontainer test_busybox checkpointed 393 394 # cleanup mountpoints created by runc during creation 395 # the mountpoints should be recreated during restore - that is the actual thing tested here 396 rm -rf "${bind1:?}"/* 397 398 # restore from checkpoint 399 runc restore -d --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox 400 [ "$status" -eq 0 ] 401 402 # busybox should be back up and running 403 testcontainer test_busybox running 404 } 405 406 @test "checkpoint then restore into a different cgroup (via --manage-cgroups-mode ignore)" { 407 set_resources_limit 408 set_cgroups_path 409 runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox 410 [ "$status" -eq 0 ] 411 testcontainer test_busybox running 412 413 local orig_path 414 orig_path=$(get_cgroup_path "pids") 415 # Check that the cgroup exists. 416 test -d "$orig_path" 417 418 runc checkpoint --work-path ./work-dir --manage-cgroups-mode ignore test_busybox 419 [ "$status" -eq 0 ] 420 testcontainer test_busybox checkpointed 421 # Check that the cgroup is gone. 422 run ! test -d "$orig_path" 423 424 # Restore into a different cgroup. 425 set_cgroups_path # Changes the path. 426 runc restore -d --manage-cgroups-mode ignore --pid-file pid \ 427 --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox 428 [ "$status" -eq 0 ] 429 testcontainer test_busybox running 430 431 # Check that the old cgroup path doesn't exist. 432 run ! test -d "$orig_path" 433 434 # Check that the new path exists. 435 local new_path 436 new_path=$(get_cgroup_path "pids") 437 test -d "$new_path" 438 439 # Check that container's init is in the new cgroup. 440 local pid 441 pid=$(cat "pid") 442 grep -q "${REL_CGROUPS_PATH}$" "/proc/$pid/cgroup" 443 }