github.com/rigado/snapd@v2.42.5-go-mod+incompatible/cmd/snap-confine/snap-confine.c (about) 1 /* 2 * Copyright (C) 2015-2018 Canonical Ltd 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 3 as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <glob.h> 24 #include <sched.h> 25 #include <signal.h> 26 #include <stdbool.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <sys/capability.h> 31 #include <sys/stat.h> 32 #include <sys/types.h> 33 #include <unistd.h> 34 35 #include "../libsnap-confine-private/apparmor-support.h" 36 #include "../libsnap-confine-private/cgroup-freezer-support.h" 37 #include "../libsnap-confine-private/cgroup-pids-support.h" 38 #include "../libsnap-confine-private/cgroup-support.h" 39 #include "../libsnap-confine-private/classic.h" 40 #include "../libsnap-confine-private/cleanup-funcs.h" 41 #include "../libsnap-confine-private/feature.h" 42 #include "../libsnap-confine-private/locking.h" 43 #include "../libsnap-confine-private/secure-getenv.h" 44 #include "../libsnap-confine-private/snap.h" 45 #include "../libsnap-confine-private/string-utils.h" 46 #include "../libsnap-confine-private/tool.h" 47 #include "../libsnap-confine-private/utils.h" 48 #include "cookie-support.h" 49 #include "mount-support.h" 50 #include "ns-support.h" 51 #include "seccomp-support.h" 52 #include "snap-confine-args.h" 53 #include "snap-confine-invocation.h" 54 #include "udev-support.h" 55 #include "user-support.h" 56 #ifdef HAVE_SELINUX 57 #include "selinux-support.h" 58 #endif 59 60 // sc_maybe_fixup_permissions fixes incorrect permissions 61 // inside the mount namespace for /var/lib. Before 1ccce4 62 // this directory was created with permissions 1777. 63 static void sc_maybe_fixup_permissions(void) 64 { 65 struct stat buf; 66 if (stat("/var/lib", &buf) != 0) { 67 die("cannot stat /var/lib"); 68 } 69 if ((buf.st_mode & 0777) == 0777) { 70 if (chmod("/var/lib", 0755) != 0) { 71 die("cannot chmod /var/lib"); 72 } 73 if (chown("/var/lib", 0, 0) != 0) { 74 die("cannot chown /var/lib"); 75 } 76 } 77 } 78 79 // sc_maybe_fixup_udev will remove incorrectly created udev tags 80 // that cause libudev on 16.04 to fail with "udev_enumerate_scan failed". 81 // See also: 82 // https://forum.snapcraft.io/t/weird-udev-enumerate-error/2360/17 83 static void sc_maybe_fixup_udev(void) 84 { 85 glob_t glob_res SC_CLEANUP(globfree) = { 86 .gl_pathv = NULL,.gl_pathc = 0,.gl_offs = 0, 87 }; 88 const char *glob_pattern = "/run/udev/tags/snap_*/*nvidia*"; 89 int err = glob(glob_pattern, 0, NULL, &glob_res); 90 if (err == GLOB_NOMATCH) { 91 return; 92 } 93 if (err != 0) { 94 die("cannot search using glob pattern %s: %d", 95 glob_pattern, err); 96 } 97 // kill bogus udev tags for nvidia. They confuse udev, this 98 // undoes the damage from github.com/snapcore/snapd/pull/3671. 99 // 100 // The udev tagging of nvidia got reverted in: 101 // https://github.com/snapcore/snapd/pull/4022 102 // but leftover files need to get removed or apps won't start 103 for (size_t i = 0; i < glob_res.gl_pathc; ++i) { 104 unlink(glob_res.gl_pathv[i]); 105 } 106 } 107 108 /** 109 * sc_preserved_process_state remembers clobbered state to restore. 110 * 111 * The umask is preserved and restored to ensure consistent permissions for 112 * runtime system. The value is preserved and restored perfectly. 113 **/ 114 typedef struct sc_preserved_process_state { 115 mode_t orig_umask; 116 int orig_cwd_fd; 117 struct stat file_info_orig_cwd; 118 } sc_preserved_process_state; 119 120 /** 121 * sc_preserve_and_sanitize_process_state sanitizes process state. 122 * 123 * The following process state is sanitised: 124 * - the umask is set to 0 125 * - the current working directory is set to / 126 * 127 * The original values are stored to be restored later. Currently only the 128 * umask is altered. It is set to zero to make the ownership of created files 129 * and directories more predictable. 130 **/ 131 static void sc_preserve_and_sanitize_process_state(sc_preserved_process_state * 132 proc_state) 133 { 134 /* Reset umask to zero, storing the old value. */ 135 proc_state->orig_umask = umask(0); 136 debug("umask reset, old umask was %#4o", proc_state->orig_umask); 137 /* Remember a file descriptor corresponding to the original working 138 * directory. This is an O_PATH file descriptor. The descriptor is 139 * used as explained below. */ 140 proc_state->orig_cwd_fd = 141 openat(AT_FDCWD, ".", 142 O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW); 143 if (proc_state->orig_cwd_fd < 0) { 144 die("cannot open path of the current working directory"); 145 } 146 if (fstat(proc_state->orig_cwd_fd, &proc_state->file_info_orig_cwd) < 0) { 147 die("cannot stat path of the current working directory"); 148 } 149 /* Move to the root directory. */ 150 if (chdir("/") < 0) { 151 die("cannot move to /"); 152 } 153 } 154 155 /** 156 * sc_restore_process_state restores values stored earlier. 157 **/ 158 static void sc_restore_process_state(const sc_preserved_process_state * 159 proc_state) 160 { 161 /* Restore original umask */ 162 umask(proc_state->orig_umask); 163 debug("umask restored to %#4o", proc_state->orig_umask); 164 165 /* Restore original current working directory. 166 * 167 * This part is more involved for the following reasons. While we hold an 168 * O_PATH file descriptor that still points to the original working 169 * directory, that directory may not be representable in the target mount 170 * namespace. A quick example may be /custom that exists on the host but 171 * not in the base snap of the application. 172 * 173 * Also consider when the path of the original working directory now 174 * maps to a different inode we cannot use fchdir(2). One example of 175 * that is the /tmp directory, which exists in both the host mount 176 * namespace and the per-snap mount namespace but actually represents a 177 * different directory. 178 **/ 179 180 /* Read the target of symlink at /proc/self/fd/<fd-of-orig-cwd> */ 181 char fd_path[PATH_MAX]; 182 char orig_cwd[PATH_MAX]; 183 ssize_t nread; 184 /* If the original working directory cannot be used for whatever reason then 185 * move the process to a special void directory. */ 186 const char *sc_void_dir = "/var/lib/snapd/void"; 187 int void_dir_fd SC_CLEANUP(sc_cleanup_close) = -1; 188 189 sc_must_snprintf(fd_path, sizeof fd_path, "/proc/self/fd/%d", 190 proc_state->orig_cwd_fd); 191 nread = readlink(fd_path, orig_cwd, sizeof orig_cwd); 192 if (nread < 0) { 193 die("cannot read symbolic link target %s", fd_path); 194 } 195 if (nread == sizeof orig_cwd) { 196 die("cannot fit symbolic link target %s", fd_path); 197 } 198 199 /* Open path corresponding to the original working directory in the 200 * execution environment. This may normally fail if the path no longer 201 * exists here, this is not a fatal error. It may also fail if we don't 202 * have permissions to view that path, that is not a fatal error either. */ 203 int inner_cwd_fd SC_CLEANUP(sc_cleanup_close) = -1; 204 inner_cwd_fd = 205 open(orig_cwd, O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW); 206 if (inner_cwd_fd < 0) { 207 if (errno == EPERM || errno == EACCES || errno == ENOENT) { 208 debug 209 ("cannot open path of the original working directory %s", 210 orig_cwd); 211 goto the_void; 212 } 213 /* Any error other than the three above is unexpected. */ 214 die("cannot open path of the original working directory %s", 215 orig_cwd); 216 } 217 218 /* The original working directory exists in the execution environment 219 * which lets us check if it points to the same inode as before. */ 220 struct stat file_info_inner; 221 if (fstat(inner_cwd_fd, &file_info_inner) < 0) { 222 die("cannot stat path of working directory in the execution environment"); 223 } 224 225 /* Note that we cannot use proc_state->orig_cwd_fd as that points to the 226 * directory but in another mount namespace and using that causes 227 * weird and undesired effects. 228 * 229 * By the time this code runs we are already running as the 230 * designated user so UNIX permissions are in effect. */ 231 if (fchdir(inner_cwd_fd) < 0) { 232 if (errno == EPERM || errno == EACCES) { 233 debug("cannot access original working directory %s", 234 orig_cwd); 235 goto the_void; 236 } 237 die("cannot restore original working directory via path"); 238 } 239 /* The distinction below is only logged and not acted upon. Perhaps someday 240 * this will be somehow communicated to cooperating applications that can 241 * instruct the user and avoid potential confusion. This mostly applies to 242 * tools that are invoked from /tmp. */ 243 if (proc_state->file_info_orig_cwd.st_dev == 244 file_info_inner.st_dev 245 && proc_state->file_info_orig_cwd.st_ino == 246 file_info_inner.st_ino) { 247 /* The path of the original working directory points to the same 248 * inode as before. */ 249 debug("working directory restored to %s", orig_cwd); 250 } else { 251 /* The path of the original working directory points to a different 252 * inode inside inside the execution environment than the host 253 * environment. */ 254 debug("working directory re-interpreted to %s", orig_cwd); 255 } 256 return; 257 the_void: 258 /* The void directory may be absent. On core18 system, and other 259 * systems using bootable base snap coupled with snapd snap, the 260 * /var/lib/snapd directory structure is not provided with packages but 261 * created on demand. */ 262 void_dir_fd = open(sc_void_dir, 263 O_DIRECTORY | O_PATH | O_NOFOLLOW | O_CLOEXEC); 264 if (void_dir_fd < 0 && errno == ENOENT) { 265 if (mkdir(sc_void_dir, 0111) < 0) { 266 die("cannot create void directory: %s", sc_void_dir); 267 } 268 if (lchown(sc_void_dir, 0, 0) < 0) { 269 die("cannot change ownership of void directory %s", 270 sc_void_dir); 271 } 272 void_dir_fd = open(sc_void_dir, 273 O_DIRECTORY | O_PATH | O_NOFOLLOW | 274 O_CLOEXEC); 275 } 276 if (void_dir_fd < 0) { 277 die("cannot open the void directory %s", sc_void_dir); 278 } 279 if (fchdir(void_dir_fd) < 0) { 280 die("cannot move to void directory %s", sc_void_dir); 281 } 282 debug("the process has been placed in the special void directory"); 283 } 284 285 /** 286 * sc_cleanup_preserved_process_state releases system resources. 287 **/ 288 static void sc_cleanup_preserved_process_state(sc_preserved_process_state * 289 proc_state) 290 { 291 sc_cleanup_close(&proc_state->orig_cwd_fd); 292 } 293 294 static void enter_classic_execution_environment(void); 295 static void enter_non_classic_execution_environment(sc_invocation * inv, 296 struct sc_apparmor *aa, 297 uid_t real_uid, 298 gid_t real_gid, 299 gid_t saved_gid); 300 301 int main(int argc, char **argv) 302 { 303 // Use our super-defensive parser to figure out what we've been asked to do. 304 sc_error *err = NULL; 305 struct sc_args *args SC_CLEANUP(sc_cleanup_args) = NULL; 306 sc_preserved_process_state proc_state 307 SC_CLEANUP(sc_cleanup_preserved_process_state) = { 308 .orig_umask = 0,.orig_cwd_fd = -1 309 }; 310 args = sc_nonfatal_parse_args(&argc, &argv, &err); 311 sc_die_on_error(err); 312 313 // Remember certain properties of the process that are clobbered by 314 // snap-confine during execution. Those are restored just before calling 315 // execv. 316 sc_preserve_and_sanitize_process_state(&proc_state); 317 318 // We've been asked to print the version string so let's just do that. 319 if (sc_args_is_version_query(args)) { 320 printf("%s %s\n", PACKAGE, PACKAGE_VERSION); 321 return 0; 322 } 323 324 /* Collect all invocation parameters. This gives us authoritative 325 * information about what needs to be invoked and how. The data comes 326 * from either the environment or from command line arguments */ 327 sc_invocation SC_CLEANUP(sc_cleanup_invocation) invocation; 328 const char *snap_instance_name_env = getenv("SNAP_INSTANCE_NAME"); 329 if (snap_instance_name_env == NULL) { 330 die("SNAP_INSTANCE_NAME is not set"); 331 } 332 sc_init_invocation(&invocation, args, snap_instance_name_env); 333 334 // Who are we? 335 uid_t real_uid, effective_uid, saved_uid; 336 gid_t real_gid, effective_gid, saved_gid; 337 if (getresuid(&real_uid, &effective_uid, &saved_uid) != 0) { 338 die("getresuid failed"); 339 } 340 if (getresgid(&real_gid, &effective_gid, &saved_gid) != 0) { 341 die("getresgid failed"); 342 } 343 debug("ruid: %d, euid: %d, suid: %d", 344 real_uid, effective_uid, saved_uid); 345 debug("rgid: %d, egid: %d, sgid: %d", 346 real_gid, effective_gid, saved_gid); 347 348 // snap-confine runs as both setuid root and setgid root. 349 // Temporarily drop group privileges here and reraise later 350 // as needed. 351 if (effective_gid == 0 && real_gid != 0) { 352 if (setegid(real_gid) != 0) { 353 die("cannot set effective group id to %d", real_gid); 354 } 355 } 356 #ifndef CAPS_OVER_SETUID 357 // this code always needs to run as root for the cgroup/udev setup, 358 // however for the tests we allow it to run as non-root 359 if (geteuid() != 0 && secure_getenv("SNAP_CONFINE_NO_ROOT") == NULL) { 360 die("need to run as root or suid"); 361 } 362 #endif 363 364 char *snap_context SC_CLEANUP(sc_cleanup_string) = NULL; 365 // Do no get snap context value if running a hook (we don't want to overwrite hook's SNAP_COOKIE) 366 if (!sc_is_hook_security_tag(invocation.security_tag)) { 367 sc_error *err SC_CLEANUP(sc_cleanup_error) = NULL; 368 snap_context = 369 sc_cookie_get_from_snapd(invocation.snap_instance, &err); 370 /* While the cookie is normally present due to various protection 371 * mechanisms ensuring its creation from snapd, we are not considering 372 * it a critical error for snap-confine in the case it is absent. When 373 * absent snaps attempting to utilize snapctl to interact with snapd 374 * will fail but it is more important to run a little than break 375 * entirely in case snapd-side code is incorrect. Therefore error 376 * information is collected but discarded. */ 377 } 378 379 struct sc_apparmor apparmor; 380 sc_init_apparmor_support(&apparmor); 381 if (!apparmor.is_confined && apparmor.mode != SC_AA_NOT_APPLICABLE 382 && getuid() != 0 && geteuid() == 0) { 383 // Refuse to run when this process is running unconfined on a system 384 // that supports AppArmor when the effective uid is root and the real 385 // id is non-root. This protects against, for example, unprivileged 386 // users trying to leverage the snap-confine in the core snap to 387 // escalate privileges. 388 die("snap-confine has elevated permissions and is not confined" 389 " but should be. Refusing to continue to avoid" 390 " permission escalation attacks"); 391 } 392 // TODO: check for similar situation and linux capabilities. 393 if (geteuid() == 0) { 394 if (invocation.classic_confinement) { 395 enter_classic_execution_environment(); 396 } else { 397 enter_non_classic_execution_environment(&invocation, 398 &apparmor, 399 real_uid, 400 real_gid, 401 saved_gid); 402 } 403 // The rest does not so temporarily drop privs back to calling 404 // user (we'll permanently drop after loading seccomp) 405 if (setegid(real_gid) != 0) 406 die("setegid failed"); 407 if (seteuid(real_uid) != 0) 408 die("seteuid failed"); 409 410 if (real_gid != 0 && geteuid() == 0) 411 die("dropping privs did not work"); 412 if (real_uid != 0 && getegid() == 0) 413 die("dropping privs did not work"); 414 } 415 // Ensure that the user data path exists. 416 setup_user_data(); 417 #if 0 418 setup_user_xdg_runtime_dir(); 419 #endif 420 // https://wiki.ubuntu.com/SecurityTeam/Specifications/SnappyConfinement 421 sc_maybe_aa_change_onexec(&apparmor, invocation.security_tag); 422 #ifdef HAVE_SELINUX 423 // For classic and confined snaps 424 sc_selinux_set_snap_execcon(); 425 #endif 426 if (snap_context != NULL) { 427 setenv("SNAP_COOKIE", snap_context, 1); 428 // for compatibility, if facing older snapd. 429 setenv("SNAP_CONTEXT", snap_context, 1); 430 } 431 // Normally setuid/setgid not only permanently drops the UID/GID, but 432 // also clears the capabilities bounding sets (see "Effect of user ID 433 // changes on capabilities" in 'man capabilities'). To load a seccomp 434 // profile, we need either CAP_SYS_ADMIN or PR_SET_NO_NEW_PRIVS. Since 435 // NNP causes issues with AppArmor and exec transitions in certain 436 // snapd interfaces, keep CAP_SYS_ADMIN temporarily when we are 437 // permanently dropping privileges. 438 if (getresuid(&real_uid, &effective_uid, &saved_uid) != 0) { 439 die("getresuid failed"); 440 } 441 debug("ruid: %d, euid: %d, suid: %d", 442 real_uid, effective_uid, saved_uid); 443 struct __user_cap_header_struct hdr = 444 { _LINUX_CAPABILITY_VERSION_3, 0 }; 445 struct __user_cap_data_struct cap_data[2] = { {0} }; 446 447 // At this point in time, if we are going to permanently drop our 448 // effective_uid will not be '0' but our saved_uid will be '0'. Detect 449 // and save when we are in the this state so know when to setup the 450 // capabilities bounding set, regain CAP_SYS_ADMIN and later drop it. 451 bool keep_sys_admin = effective_uid != 0 && saved_uid == 0; 452 if (keep_sys_admin) { 453 debug("setting capabilities bounding set"); 454 // clear all 32 bit caps but SYS_ADMIN, with none inheritable 455 cap_data[0].effective = CAP_TO_MASK(CAP_SYS_ADMIN); 456 cap_data[0].permitted = cap_data[0].effective; 457 cap_data[0].inheritable = 0; 458 // clear all 64 bit caps 459 cap_data[1].effective = 0; 460 cap_data[1].permitted = 0; 461 cap_data[1].inheritable = 0; 462 if (capset(&hdr, cap_data) != 0) { 463 die("capset failed"); 464 } 465 } 466 // Permanently drop if not root 467 if (effective_uid == 0) { 468 // Note that we do not call setgroups() here because its ok 469 // that the user keeps the groups he already belongs to 470 if (setgid(real_gid) != 0) 471 die("setgid failed"); 472 if (setuid(real_uid) != 0) 473 die("setuid failed"); 474 475 if (real_gid != 0 && (getuid() == 0 || geteuid() == 0)) 476 die("permanently dropping privs did not work"); 477 if (real_uid != 0 && (getgid() == 0 || getegid() == 0)) 478 die("permanently dropping privs did not work"); 479 } 480 // Now that we've permanently dropped, regain SYS_ADMIN 481 if (keep_sys_admin) { 482 debug("regaining SYS_ADMIN"); 483 cap_data[0].effective = CAP_TO_MASK(CAP_SYS_ADMIN); 484 cap_data[0].permitted = cap_data[0].effective; 485 if (capset(&hdr, cap_data) != 0) { 486 die("capset regain failed"); 487 } 488 } 489 // Now that we've dropped and regained SYS_ADMIN, we can load the 490 // seccomp profiles. 491 if (sc_apply_seccomp_profile_for_security_tag(invocation.security_tag)) { 492 // If the process is not explicitly unconfined then load the 493 // global profile as well. 494 sc_apply_global_seccomp_profile(); 495 } 496 // Even though we set inheritable to 0, let's clear SYS_ADMIN 497 // explicitly 498 if (keep_sys_admin) { 499 debug("clearing SYS_ADMIN"); 500 cap_data[0].effective = 0; 501 cap_data[0].permitted = cap_data[0].effective; 502 if (capset(&hdr, cap_data) != 0) { 503 die("capset clear failed"); 504 } 505 } 506 // and exec the new executable 507 argv[0] = (char *)invocation.executable; 508 debug("execv(%s, %s...)", invocation.executable, argv[0]); 509 for (int i = 1; i < argc; ++i) { 510 debug(" argv[%i] = %s", i, argv[i]); 511 } 512 // Restore process state that was recorded earlier. 513 sc_restore_process_state(&proc_state); 514 execv(invocation.executable, (char *const *)&argv[0]); 515 perror("execv failed"); 516 return 1; 517 } 518 519 static void enter_classic_execution_environment(void) 520 { 521 /* 'classic confinement' is designed to run without the sandbox inside the 522 * shared namespace. Specifically: 523 * - snap-confine skips using the snap-specific mount namespace 524 * - snap-confine skips using device cgroups 525 * - snapd sets up a lenient AppArmor profile for snap-confine to use 526 * - snapd sets up a lenient seccomp profile for snap-confine to use 527 */ 528 debug("skipping sandbox setup, classic confinement in use"); 529 } 530 531 static void enter_non_classic_execution_environment(sc_invocation * inv, 532 struct sc_apparmor *aa, 533 uid_t real_uid, 534 gid_t real_gid, 535 gid_t saved_gid) 536 { 537 /* snap-confine uses privately-shared /run/snapd/ns to store bind-mounted 538 * mount namespaces of each snap. In the case that snap-confine is invoked 539 * from the mount namespace it typically constructs, the said directory 540 * does not contain mount entries for preserved namespaces as those are 541 * only visible in the main, outer namespace. 542 * 543 * In order to operate in such an environment snap-confine must first 544 * re-associate its own process with another namespace in which the 545 * /run/snapd/ns directory is visible. The most obvious candidate is pid 546 * one, which definitely doesn't run in a snap-specific namespace, has a 547 * predictable PID and is long lived. 548 */ 549 sc_reassociate_with_pid1_mount_ns(); 550 // Do global initialization: 551 int global_lock_fd = sc_lock_global(); 552 // ensure that "/" or "/snap" is mounted with the 553 // "shared" option, see LP:#1668659 554 debug("ensuring that snap mount directory is shared"); 555 sc_ensure_shared_snap_mount(); 556 debug("unsharing snap namespace directory"); 557 sc_initialize_mount_ns(); 558 sc_unlock(global_lock_fd); 559 560 // Find and open snap-update-ns and snap-discard-ns from the same 561 // path as where we (snap-confine) were called. 562 int snap_update_ns_fd SC_CLEANUP(sc_cleanup_close) = -1; 563 snap_update_ns_fd = sc_open_snap_update_ns(); 564 int snap_discard_ns_fd SC_CLEANUP(sc_cleanup_close) = -1; 565 snap_discard_ns_fd = sc_open_snap_discard_ns(); 566 567 // Do per-snap initialization. 568 int snap_lock_fd = sc_lock_snap(inv->snap_instance); 569 debug("initializing mount namespace: %s", inv->snap_instance); 570 struct sc_mount_ns *group = NULL; 571 group = sc_open_mount_ns(inv->snap_instance); 572 573 // Init and check rootfs_dir, apply any fallback behaviors. 574 sc_check_rootfs_dir(inv); 575 576 /** Populate and join the device control group. */ 577 struct snappy_udev udev_s; 578 if (snappy_udev_init(inv->security_tag, &udev_s) == 0) { 579 if (!sc_cgroup_is_v2()) { 580 setup_devices_cgroup(inv->security_tag, &udev_s); 581 } 582 } 583 snappy_udev_cleanup(&udev_s); 584 585 /** 586 * is_normal_mode controls if we should pivot into the base snap. 587 * 588 * There are two modes of execution for snaps that are not using classic 589 * confinement: normal and legacy. The normal mode is where snap-confine 590 * sets up a rootfs and then pivots into it using pivot_root(2). The legacy 591 * mode is when snap-confine just unshares the initial mount namespace, 592 * makes some extra changes but largely runs with what was presented to it 593 * initially. 594 * 595 * Historically the ubuntu-core distribution used the now-legacy mode. This 596 * was sensible then since snaps already (kind of) have the right root 597 * file-system and just need some privacy and isolation features applied. 598 * With the introduction of snaps to classic distributions as well as the 599 * introduction of bases, where each snap can use a different root 600 * filesystem, this lost sensibility and thus became legacy. 601 * 602 * For compatibility with current installations of ubuntu-core 603 * distributions the legacy mode is used when: the distribution is 604 * SC_DISTRO_CORE16 or when the base snap name is not "core" or 605 * "ubuntu-core". 606 * 607 * The SC_DISTRO_CORE16 is applied to systems that boot with the "core", 608 * "ubuntu-core" or "core16" snap. Systems using the "core18" base snap do 609 * not qualify for that classification. 610 **/ 611 sc_distro distro = sc_classify_distro(); 612 inv->is_normal_mode = distro != SC_DISTRO_CORE16 || 613 !sc_streq(inv->orig_base_snap_name, "core"); 614 615 /* Stale mount namespace discarded or no mount namespace to 616 join. We need to construct a new mount namespace ourselves. 617 To capture it we will need a helper process so make one. */ 618 sc_fork_helper(group, aa); 619 int retval = sc_join_preserved_ns(group, aa, inv, snap_discard_ns_fd); 620 if (retval == ESRCH) { 621 /* Create and populate the mount namespace. This performs all 622 of the bootstrapping mounts, pivots into the new root filesystem and 623 applies the per-snap mount profile using snap-update-ns. */ 624 debug("unsharing the mount namespace (per-snap)"); 625 if (unshare(CLONE_NEWNS) < 0) { 626 die("cannot unshare the mount namespace"); 627 } 628 sc_populate_mount_ns(aa, snap_update_ns_fd, inv); 629 sc_store_ns_info(inv); 630 631 /* Preserve the mount namespace. */ 632 sc_preserve_populated_mount_ns(group); 633 } 634 635 /* Older versions of snap-confine created incorrect 777 permissions 636 for /var/lib and we need to fixup for systems that had their NS created 637 with an old version. */ 638 sc_maybe_fixup_permissions(); 639 sc_maybe_fixup_udev(); 640 641 /* User mount profiles do not apply to non-root users. */ 642 if (real_uid != 0) { 643 debug("joining preserved per-user mount namespace"); 644 retval = 645 sc_join_preserved_per_user_ns(group, inv->snap_instance); 646 if (retval == ESRCH) { 647 debug("unsharing the mount namespace (per-user)"); 648 if (unshare(CLONE_NEWNS) < 0) { 649 die("cannot unshare the mount namespace"); 650 } 651 sc_setup_user_mounts(aa, snap_update_ns_fd, 652 inv->snap_instance); 653 /* Preserve the mount per-user namespace. But only if the 654 * experimental feature is enabled. This way if the feature is 655 * disabled user mount namespaces will still exist but will be 656 * entirely ephemeral. In addition the call 657 * sc_join_preserved_user_ns() will never find a preserved mount 658 * namespace and will always enter this code branch. */ 659 if (sc_feature_enabled 660 (SC_FEATURE_PER_USER_MOUNT_NAMESPACE)) { 661 sc_preserve_populated_per_user_mount_ns(group); 662 } else { 663 debug 664 ("NOT preserving per-user mount namespace"); 665 } 666 } 667 } 668 // Associate each snap process with a dedicated snap freezer cgroup and 669 // snap pids cgroup. All snap processes belonging to one snap share the 670 // freezer cgroup. All snap processes belonging to one app or one hook 671 // share the pids cgroup. 672 // 673 // This simplifies testing if any processes belonging to a given snap are 674 // still alive as well as to properly account for each application and 675 // service. 676 if (getegid() != 0 && saved_gid == 0) { 677 // Temporarily raise egid so we can chown the freezer cgroup under LXD. 678 if (setegid(0) != 0) { 679 die("cannot set effective group id to root"); 680 } 681 } 682 if (!sc_cgroup_is_v2()) { 683 sc_cgroup_freezer_join(inv->snap_instance, getpid()); 684 if (sc_feature_enabled(SC_FEATURE_REFRESH_APP_AWARENESS)) { 685 sc_cgroup_pids_join(inv->security_tag, getpid()); 686 } 687 } 688 if (geteuid() == 0 && real_gid != 0) { 689 if (setegid(real_gid) != 0) { 690 die("cannot set effective group id to %d", real_gid); 691 } 692 } 693 694 sc_unlock(snap_lock_fd); 695 696 sc_close_mount_ns(group); 697 698 // Reset path as we cannot rely on the path from the host OS to make sense. 699 // The classic distribution may use any PATH that makes sense but we cannot 700 // assume it makes sense for the core snap layout. Note that the /usr/local 701 // directories are explicitly left out as they are not part of the core 702 // snap. 703 debug("resetting PATH to values in sync with core snap"); 704 setenv("PATH", 705 "/usr/local/sbin:" 706 "/usr/local/bin:" 707 "/usr/sbin:" 708 "/usr/bin:" 709 "/sbin:" "/bin:" "/usr/games:" "/usr/local/games", 1); 710 // Ensure we set the various TMPDIRs to /tmp. One of the parts of setting 711 // up the mount namespace is to create a private /tmp directory (this is 712 // done in sc_populate_mount_ns() above). The host environment may point to 713 // a directory not accessible by snaps so we need to reset it here. 714 const char *tmpd[] = { "TMPDIR", "TEMPDIR", NULL }; 715 int i; 716 for (i = 0; tmpd[i] != NULL; i++) { 717 if (setenv(tmpd[i], "/tmp", 1) != 0) { 718 die("cannot set environment variable '%s'", tmpd[i]); 719 } 720 } 721 }