github.com/meulengracht/snapd@v0.0.0-20210719210640-8bde69bcc84e/cmd/snap-confine/snap-confine.c (about) 1 /* 2 * Copyright (C) 2015-2018 Canonical Ltd 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 3 as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * 16 */ 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include <errno.h> 22 #include <fcntl.h> 23 #include <glob.h> 24 #include <sched.h> 25 #include <signal.h> 26 #include <stdbool.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <sys/capability.h> 31 #include <sys/stat.h> 32 #include <sys/types.h> 33 #include <unistd.h> 34 35 #include "../libsnap-confine-private/apparmor-support.h" 36 #include "../libsnap-confine-private/cgroup-freezer-support.h" 37 #include "../libsnap-confine-private/cgroup-support.h" 38 #include "../libsnap-confine-private/classic.h" 39 #include "../libsnap-confine-private/cleanup-funcs.h" 40 #include "../libsnap-confine-private/feature.h" 41 #include "../libsnap-confine-private/locking.h" 42 #include "../libsnap-confine-private/secure-getenv.h" 43 #include "../libsnap-confine-private/snap.h" 44 #include "../libsnap-confine-private/string-utils.h" 45 #include "../libsnap-confine-private/tool.h" 46 #include "../libsnap-confine-private/utils.h" 47 #include "cookie-support.h" 48 #include "mount-support.h" 49 #include "ns-support.h" 50 #include "seccomp-support.h" 51 #include "snap-confine-args.h" 52 #include "snap-confine-invocation.h" 53 #include "udev-support.h" 54 #include "user-support.h" 55 #ifdef HAVE_SELINUX 56 #include "selinux-support.h" 57 #endif 58 59 // sc_maybe_fixup_permissions fixes incorrect permissions 60 // inside the mount namespace for /var/lib. Before 1ccce4 61 // this directory was created with permissions 1777. 62 static void sc_maybe_fixup_permissions(void) 63 { 64 int fd SC_CLEANUP(sc_cleanup_close) = -1; 65 struct stat buf; 66 fd = open("/var/lib", O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW); 67 if (fd < 0) { 68 die("cannot open /var/lib"); 69 } 70 if (fstat(fd, &buf) < 0) { 71 die("cannot stat /var/lib"); 72 } 73 if ((buf.st_mode & 0777) == 0777) { 74 if (fchmod(fd, 0755) != 0) { 75 die("cannot chmod /var/lib"); 76 } 77 if (fchown(fd, 0, 0) != 0) { 78 die("cannot chown /var/lib"); 79 } 80 } 81 } 82 83 // sc_maybe_fixup_udev will remove incorrectly created udev tags 84 // that cause libudev on 16.04 to fail with "udev_enumerate_scan failed". 85 // See also: 86 // https://forum.snapcraft.io/t/weird-udev-enumerate-error/2360/17 87 static void sc_maybe_fixup_udev(void) 88 { 89 glob_t glob_res SC_CLEANUP(globfree) = { 90 .gl_pathv = NULL,.gl_pathc = 0,.gl_offs = 0, 91 }; 92 const char *glob_pattern = "/run/udev/tags/snap_*/*nvidia*"; 93 int err = glob(glob_pattern, 0, NULL, &glob_res); 94 if (err == GLOB_NOMATCH) { 95 return; 96 } 97 if (err != 0) { 98 die("cannot search using glob pattern %s: %d", 99 glob_pattern, err); 100 } 101 // kill bogus udev tags for nvidia. They confuse udev, this 102 // undoes the damage from github.com/snapcore/snapd/pull/3671. 103 // 104 // The udev tagging of nvidia got reverted in: 105 // https://github.com/snapcore/snapd/pull/4022 106 // but leftover files need to get removed or apps won't start 107 for (size_t i = 0; i < glob_res.gl_pathc; ++i) { 108 unlink(glob_res.gl_pathv[i]); 109 } 110 } 111 112 /** 113 * sc_preserved_process_state remembers clobbered state to restore. 114 * 115 * The umask is preserved and restored to ensure consistent permissions for 116 * runtime system. The value is preserved and restored perfectly. 117 **/ 118 typedef struct sc_preserved_process_state { 119 mode_t orig_umask; 120 int orig_cwd_fd; 121 struct stat file_info_orig_cwd; 122 } sc_preserved_process_state; 123 124 /** 125 * sc_preserve_and_sanitize_process_state sanitizes process state. 126 * 127 * The following process state is sanitized: 128 * - the umask is set to 0 129 * - the current working directory is set to / 130 * 131 * The original values are stored to be restored later. Currently only the 132 * umask is altered. It is set to zero to make the ownership of created files 133 * and directories more predictable. 134 **/ 135 static void sc_preserve_and_sanitize_process_state(sc_preserved_process_state * 136 proc_state) 137 { 138 /* Reset umask to zero, storing the old value. */ 139 proc_state->orig_umask = umask(0); 140 debug("umask reset, old umask was %#4o", proc_state->orig_umask); 141 /* Remember a file descriptor corresponding to the original working 142 * directory. This is an O_PATH file descriptor. The descriptor is 143 * used as explained below. */ 144 proc_state->orig_cwd_fd = 145 openat(AT_FDCWD, ".", 146 O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW); 147 if (proc_state->orig_cwd_fd < 0) { 148 die("cannot open path of the current working directory"); 149 } 150 if (fstat(proc_state->orig_cwd_fd, &proc_state->file_info_orig_cwd) < 0) { 151 die("cannot stat path of the current working directory"); 152 } 153 /* Move to the root directory. */ 154 if (chdir("/") < 0) { 155 die("cannot move to /"); 156 } 157 } 158 159 /** 160 * sc_restore_process_state restores values stored earlier. 161 **/ 162 static void sc_restore_process_state(const sc_preserved_process_state * 163 proc_state) 164 { 165 /* Restore original umask */ 166 umask(proc_state->orig_umask); 167 debug("umask restored to %#4o", proc_state->orig_umask); 168 169 /* Restore original current working directory. 170 * 171 * This part is more involved for the following reasons. While we hold an 172 * O_PATH file descriptor that still points to the original working 173 * directory, that directory may not be representable in the target mount 174 * namespace. A quick example may be /custom that exists on the host but 175 * not in the base snap of the application. 176 * 177 * Also consider when the path of the original working directory now 178 * maps to a different inode we cannot use fchdir(2). One example of 179 * that is the /tmp directory, which exists in both the host mount 180 * namespace and the per-snap mount namespace but actually represents a 181 * different directory. 182 **/ 183 184 /* Read the target of symlink at /proc/self/fd/<fd-of-orig-cwd> */ 185 char fd_path[PATH_MAX]; 186 char orig_cwd[PATH_MAX]; 187 ssize_t nread; 188 /* If the original working directory cannot be used for whatever reason then 189 * move the process to a special void directory. */ 190 const char *sc_void_dir = "/var/lib/snapd/void"; 191 int void_dir_fd SC_CLEANUP(sc_cleanup_close) = -1; 192 193 sc_must_snprintf(fd_path, sizeof fd_path, "/proc/self/fd/%d", 194 proc_state->orig_cwd_fd); 195 nread = readlink(fd_path, orig_cwd, sizeof orig_cwd); 196 if (nread < 0) { 197 die("cannot read symbolic link target %s", fd_path); 198 } 199 if (nread == sizeof orig_cwd) { 200 die("cannot fit symbolic link target %s", fd_path); 201 } 202 203 /* Open path corresponding to the original working directory in the 204 * execution environment. This may normally fail if the path no longer 205 * exists here, this is not a fatal error. It may also fail if we don't 206 * have permissions to view that path, that is not a fatal error either. */ 207 int inner_cwd_fd SC_CLEANUP(sc_cleanup_close) = -1; 208 inner_cwd_fd = 209 open(orig_cwd, O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW); 210 if (inner_cwd_fd < 0) { 211 if (errno == EPERM || errno == EACCES || errno == ENOENT) { 212 debug 213 ("cannot open path of the original working directory %s", 214 orig_cwd); 215 goto the_void; 216 } 217 /* Any error other than the three above is unexpected. */ 218 die("cannot open path of the original working directory %s", 219 orig_cwd); 220 } 221 222 /* The original working directory exists in the execution environment 223 * which lets us check if it points to the same inode as before. */ 224 struct stat file_info_inner; 225 if (fstat(inner_cwd_fd, &file_info_inner) < 0) { 226 die("cannot stat path of working directory in the execution environment"); 227 } 228 229 /* Note that we cannot use proc_state->orig_cwd_fd as that points to the 230 * directory but in another mount namespace and using that causes 231 * weird and undesired effects. 232 * 233 * By the time this code runs we are already running as the 234 * designated user so UNIX permissions are in effect. */ 235 if (fchdir(inner_cwd_fd) < 0) { 236 if (errno == EPERM || errno == EACCES) { 237 debug("cannot access original working directory %s", 238 orig_cwd); 239 goto the_void; 240 } 241 die("cannot restore original working directory via path"); 242 } 243 /* The distinction below is only logged and not acted upon. Perhaps someday 244 * this will be somehow communicated to cooperating applications that can 245 * instruct the user and avoid potential confusion. This mostly applies to 246 * tools that are invoked from /tmp. */ 247 if (proc_state->file_info_orig_cwd.st_dev == 248 file_info_inner.st_dev 249 && proc_state->file_info_orig_cwd.st_ino == 250 file_info_inner.st_ino) { 251 /* The path of the original working directory points to the same 252 * inode as before. */ 253 debug("working directory restored to %s", orig_cwd); 254 } else { 255 /* The path of the original working directory points to a different 256 * inode inside inside the execution environment than the host 257 * environment. */ 258 debug("working directory re-interpreted to %s", orig_cwd); 259 } 260 return; 261 the_void: 262 /* The void directory may be absent. On core18 system, and other 263 * systems using bootable base snap coupled with snapd snap, the 264 * /var/lib/snapd directory structure is not provided with packages but 265 * created on demand. */ 266 void_dir_fd = open(sc_void_dir, 267 O_DIRECTORY | O_PATH | O_NOFOLLOW | O_CLOEXEC); 268 if (void_dir_fd < 0 && errno == ENOENT) { 269 if (mkdir(sc_void_dir, 0111) < 0) { 270 die("cannot create void directory: %s", sc_void_dir); 271 } 272 if (lchown(sc_void_dir, 0, 0) < 0) { 273 die("cannot change ownership of void directory %s", 274 sc_void_dir); 275 } 276 void_dir_fd = open(sc_void_dir, 277 O_DIRECTORY | O_PATH | O_NOFOLLOW | 278 O_CLOEXEC); 279 } 280 if (void_dir_fd < 0) { 281 die("cannot open the void directory %s", sc_void_dir); 282 } 283 if (fchdir(void_dir_fd) < 0) { 284 die("cannot move to void directory %s", sc_void_dir); 285 } 286 debug("the process has been placed in the special void directory"); 287 } 288 289 /** 290 * sc_cleanup_preserved_process_state releases system resources. 291 **/ 292 static void sc_cleanup_preserved_process_state(sc_preserved_process_state * 293 proc_state) 294 { 295 sc_cleanup_close(&proc_state->orig_cwd_fd); 296 } 297 298 static void enter_classic_execution_environment(const sc_invocation * inv, 299 gid_t real_gid, 300 gid_t saved_gid); 301 static void enter_non_classic_execution_environment(sc_invocation * inv, 302 struct sc_apparmor *aa, 303 uid_t real_uid, 304 gid_t real_gid, 305 gid_t saved_gid); 306 307 int main(int argc, char **argv) 308 { 309 // Use our super-defensive parser to figure out what we've been asked to do. 310 sc_error *err = NULL; 311 struct sc_args *args SC_CLEANUP(sc_cleanup_args) = NULL; 312 sc_preserved_process_state proc_state 313 SC_CLEANUP(sc_cleanup_preserved_process_state) = { 314 .orig_umask = 0,.orig_cwd_fd = -1 315 }; 316 args = sc_nonfatal_parse_args(&argc, &argv, &err); 317 sc_die_on_error(err); 318 319 // Remember certain properties of the process that are clobbered by 320 // snap-confine during execution. Those are restored just before calling 321 // execv. 322 sc_preserve_and_sanitize_process_state(&proc_state); 323 324 // We've been asked to print the version string so let's just do that. 325 if (sc_args_is_version_query(args)) { 326 printf("%s %s\n", PACKAGE, PACKAGE_VERSION); 327 return 0; 328 } 329 330 /* Collect all invocation parameters. This gives us authoritative 331 * information about what needs to be invoked and how. The data comes 332 * from either the environment or from command line arguments */ 333 sc_invocation SC_CLEANUP(sc_cleanup_invocation) invocation; 334 const char *snap_instance_name_env = getenv("SNAP_INSTANCE_NAME"); 335 if (snap_instance_name_env == NULL) { 336 die("SNAP_INSTANCE_NAME is not set"); 337 } 338 sc_init_invocation(&invocation, args, snap_instance_name_env); 339 340 // Who are we? 341 uid_t real_uid, effective_uid, saved_uid; 342 gid_t real_gid, effective_gid, saved_gid; 343 if (getresuid(&real_uid, &effective_uid, &saved_uid) != 0) { 344 die("getresuid failed"); 345 } 346 if (getresgid(&real_gid, &effective_gid, &saved_gid) != 0) { 347 die("getresgid failed"); 348 } 349 debug("ruid: %d, euid: %d, suid: %d", 350 real_uid, effective_uid, saved_uid); 351 debug("rgid: %d, egid: %d, sgid: %d", 352 real_gid, effective_gid, saved_gid); 353 354 // snap-confine needs to run as root for cgroup/udev/mount/apparmor/etc setup. 355 if (effective_uid != 0) { 356 die("need to run as root or suid"); 357 } 358 359 char *snap_context SC_CLEANUP(sc_cleanup_string) = NULL; 360 // Do no get snap context value if running a hook (we don't want to overwrite hook's SNAP_COOKIE) 361 if (!sc_is_hook_security_tag(invocation.security_tag)) { 362 sc_error *err SC_CLEANUP(sc_cleanup_error) = NULL; 363 snap_context = 364 sc_cookie_get_from_snapd(invocation.snap_instance, &err); 365 /* While the cookie is normally present due to various protection 366 * mechanisms ensuring its creation from snapd, we are not considering 367 * it a critical error for snap-confine in the case it is absent. When 368 * absent snaps attempting to utilize snapctl to interact with snapd 369 * will fail but it is more important to run a little than break 370 * entirely in case snapd-side code is incorrect. Therefore error 371 * information is collected but discarded. */ 372 } 373 374 struct sc_apparmor apparmor; 375 sc_init_apparmor_support(&apparmor); 376 if (!apparmor.is_confined && apparmor.mode != SC_AA_NOT_APPLICABLE 377 && getuid() != 0 && geteuid() == 0) { 378 // Refuse to run when this process is running unconfined on a system 379 // that supports AppArmor when the effective uid is root and the real 380 // id is non-root. This protects against, for example, unprivileged 381 // users trying to leverage the snap-confine in the core snap to 382 // escalate privileges. 383 die("snap-confine has elevated permissions and is not confined" 384 " but should be. Refusing to continue to avoid" 385 " permission escalation attacks"); 386 } 387 388 /* perform global initialization of mount namespace support for non-classic 389 * snaps or both classic and non-classic when parallel-instances feature is 390 * enabled */ 391 if (!invocation.classic_confinement || 392 sc_feature_enabled(SC_FEATURE_PARALLEL_INSTANCES)) { 393 394 /* snap-confine uses privately-shared /run/snapd/ns to store bind-mounted 395 * mount namespaces of each snap. In the case that snap-confine is invoked 396 * from the mount namespace it typically constructs, the said directory 397 * does not contain mount entries for preserved namespaces as those are 398 * only visible in the main, outer namespace. 399 * 400 * In order to operate in such an environment snap-confine must first 401 * re-associate its own process with another namespace in which the 402 * /run/snapd/ns directory is visible. The most obvious candidate is pid 403 * one, which definitely doesn't run in a snap-specific namespace, has a 404 * predictable PID and is long lived. 405 */ 406 sc_reassociate_with_pid1_mount_ns(); 407 // Do global initialization: 408 int global_lock_fd = sc_lock_global(); 409 // Ensure that "/" or "/snap" is mounted with the 410 // "shared" option on legacy systems, see LP:#1668659 411 debug("ensuring that snap mount directory is shared"); 412 sc_ensure_shared_snap_mount(); 413 unsigned int experimental_features = 0; 414 if (sc_feature_enabled(SC_FEATURE_PARALLEL_INSTANCES)) { 415 experimental_features |= SC_FEATURE_PARALLEL_INSTANCES; 416 } 417 sc_initialize_mount_ns(experimental_features); 418 sc_unlock(global_lock_fd); 419 } 420 421 if (invocation.classic_confinement) { 422 enter_classic_execution_environment(&invocation, real_gid, 423 saved_gid); 424 } else { 425 enter_non_classic_execution_environment(&invocation, 426 &apparmor, 427 real_uid, 428 real_gid, saved_gid); 429 } 430 // Temporarily drop privileges back to the calling user until we can 431 // permanently drop (which we can't do just yet due to seccomp, see 432 // below). 433 sc_identity real_user_identity = { 434 .uid = real_uid, 435 .gid = real_gid, 436 .change_uid = 1, 437 .change_gid = 1, 438 }; 439 sc_set_effective_identity(real_user_identity); 440 // Ensure that the user data path exists. When creating it use the identity 441 // of the calling user (by using real user and group identifiers). This 442 // allows the creation of directories inside ~/ on NFS with root_squash 443 // attribute. 444 setup_user_data(); 445 #if 0 446 setup_user_xdg_runtime_dir(); 447 #endif 448 // https://wiki.ubuntu.com/SecurityTeam/Specifications/SnappyConfinement 449 sc_maybe_aa_change_onexec(&apparmor, invocation.security_tag); 450 #ifdef HAVE_SELINUX 451 // For classic and confined snaps 452 sc_selinux_set_snap_execcon(); 453 #endif 454 if (snap_context != NULL) { 455 setenv("SNAP_COOKIE", snap_context, 1); 456 // for compatibility, if facing older snapd. 457 setenv("SNAP_CONTEXT", snap_context, 1); 458 } 459 // Normally setuid/setgid not only permanently drops the UID/GID, but 460 // also clears the capabilities bounding sets (see "Effect of user ID 461 // changes on capabilities" in 'man capabilities'). To load a seccomp 462 // profile, we need either CAP_SYS_ADMIN or PR_SET_NO_NEW_PRIVS. Since 463 // NNP causes issues with AppArmor and exec transitions in certain 464 // snapd interfaces, keep CAP_SYS_ADMIN temporarily when we are 465 // permanently dropping privileges. 466 if (getresuid(&real_uid, &effective_uid, &saved_uid) != 0) { 467 die("getresuid failed"); 468 } 469 debug("ruid: %d, euid: %d, suid: %d", 470 real_uid, effective_uid, saved_uid); 471 struct __user_cap_header_struct hdr = 472 { _LINUX_CAPABILITY_VERSION_3, 0 }; 473 struct __user_cap_data_struct cap_data[2] = { {0} }; 474 475 // At this point in time, if we are going to permanently drop our 476 // effective_uid will not be '0' but our saved_uid will be '0'. Detect 477 // and save when we are in the this state so know when to setup the 478 // capabilities bounding set, regain CAP_SYS_ADMIN and later drop it. 479 bool keep_sys_admin = effective_uid != 0 && saved_uid == 0; 480 if (keep_sys_admin) { 481 debug("setting capabilities bounding set"); 482 // clear all 32 bit caps but SYS_ADMIN, with none inheritable 483 cap_data[0].effective = CAP_TO_MASK(CAP_SYS_ADMIN); 484 cap_data[0].permitted = cap_data[0].effective; 485 cap_data[0].inheritable = 0; 486 // clear all 64 bit caps 487 cap_data[1].effective = 0; 488 cap_data[1].permitted = 0; 489 cap_data[1].inheritable = 0; 490 if (capset(&hdr, cap_data) != 0) { 491 die("capset failed"); 492 } 493 } 494 // Permanently drop if not root 495 if (effective_uid == 0) { 496 // Note that we do not call setgroups() here because its ok 497 // that the user keeps the groups he already belongs to 498 if (setgid(real_gid) != 0) 499 die("setgid failed"); 500 if (setuid(real_uid) != 0) 501 die("setuid failed"); 502 503 if (real_gid != 0 && (getuid() == 0 || geteuid() == 0)) 504 die("permanently dropping privs did not work"); 505 if (real_uid != 0 && (getgid() == 0 || getegid() == 0)) 506 die("permanently dropping privs did not work"); 507 } 508 // Now that we've permanently dropped, regain SYS_ADMIN 509 if (keep_sys_admin) { 510 debug("regaining SYS_ADMIN"); 511 cap_data[0].effective = CAP_TO_MASK(CAP_SYS_ADMIN); 512 cap_data[0].permitted = cap_data[0].effective; 513 if (capset(&hdr, cap_data) != 0) { 514 die("capset regain failed"); 515 } 516 } 517 // Now that we've dropped and regained SYS_ADMIN, we can load the 518 // seccomp profiles. 519 if (sc_apply_seccomp_profile_for_security_tag(invocation.security_tag)) { 520 // If the process is not explicitly unconfined then load the 521 // global profile as well. 522 sc_apply_global_seccomp_profile(); 523 } 524 // Even though we set inheritable to 0, let's clear SYS_ADMIN 525 // explicitly 526 if (keep_sys_admin) { 527 debug("clearing SYS_ADMIN"); 528 cap_data[0].effective = 0; 529 cap_data[0].permitted = cap_data[0].effective; 530 if (capset(&hdr, cap_data) != 0) { 531 die("capset clear failed"); 532 } 533 } 534 // and exec the new executable 535 argv[0] = (char *)invocation.executable; 536 debug("execv(%s, %s...)", invocation.executable, argv[0]); 537 for (int i = 1; i < argc; ++i) { 538 debug(" argv[%i] = %s", i, argv[i]); 539 } 540 // Restore process state that was recorded earlier. 541 sc_restore_process_state(&proc_state); 542 execv(invocation.executable, (char *const *)&argv[0]); 543 perror("execv failed"); 544 return 1; 545 } 546 547 static void enter_classic_execution_environment(const sc_invocation * inv, 548 gid_t real_gid, gid_t saved_gid) 549 { 550 /* with parallel-instances enabled, main() reassociated with the mount ns of 551 * PID 1 to make /run/snapd/ns visible */ 552 553 /* 'classic confinement' is designed to run without the sandbox inside the 554 * shared namespace. Specifically: 555 * - snap-confine skips using the snap-specific, private, mount namespace 556 * - snap-confine skips using device cgroups 557 * - snapd sets up a lenient AppArmor profile for snap-confine to use 558 * - snapd sets up a lenient seccomp profile for snap-confine to use 559 */ 560 debug("preparing classic execution environment"); 561 562 if (!sc_feature_enabled(SC_FEATURE_PARALLEL_INSTANCES)) { 563 return; 564 } 565 566 /* all of the following code is experimental and part of parallel instances 567 * of classic snaps support */ 568 569 debug 570 ("(experimental) unsharing the mount namespace (per-classic-snap)"); 571 572 /* Construct a mount namespace where the snap instance directories are 573 * visible under the regular snap name. In order to do that we will: 574 * 575 * - convert SNAP_MOUNT_DIR into a mount point (global init) 576 * - convert /var/snap into a mount point (global init) 577 * - always create a new mount namespace 578 * - for snaps with non empty instance key: 579 * - set slave propagation recursively on SNAP_MOUNT_DIR and /var/snap 580 * - recursively bind mount SNAP_MOUNT_DIR/<snap>_<key> on top of SNAP_MOUNT_DIR/<snap> 581 * - recursively bind mount /var/snap/<snap>_<key> on top of /var/snap/<snap> 582 * 583 * The destination directories /var/snap/<snap> and SNAP_MOUNT_DIR/<snap> 584 * are guaranteed to exist and were created during installation of a given 585 * instance. 586 */ 587 588 if (unshare(CLONE_NEWNS) < 0) { 589 die("cannot unshare the mount namespace for parallel installed classic snap"); 590 } 591 592 /* Parallel installed classic snap get special handling */ 593 if (!sc_streq(inv->snap_instance, inv->snap_name)) { 594 debug 595 ("(experimental) setting up environment for classic snap instance %s", 596 inv->snap_instance); 597 598 /* set up mappings for snap and data directories */ 599 sc_setup_parallel_instance_classic_mounts(inv->snap_name, 600 inv->snap_instance); 601 } 602 } 603 604 static void enter_non_classic_execution_environment(sc_invocation * inv, 605 struct sc_apparmor *aa, 606 uid_t real_uid, 607 gid_t real_gid, 608 gid_t saved_gid) 609 { 610 // main() reassociated with the mount ns of PID 1 to make /run/snapd/ns 611 // visible 612 613 // Find and open snap-update-ns and snap-discard-ns from the same 614 // path as where we (snap-confine) were called. 615 int snap_update_ns_fd SC_CLEANUP(sc_cleanup_close) = -1; 616 snap_update_ns_fd = sc_open_snap_update_ns(); 617 int snap_discard_ns_fd SC_CLEANUP(sc_cleanup_close) = -1; 618 snap_discard_ns_fd = sc_open_snap_discard_ns(); 619 620 // Do per-snap initialization. 621 int snap_lock_fd = sc_lock_snap(inv->snap_instance); 622 debug("initializing mount namespace: %s", inv->snap_instance); 623 struct sc_mount_ns *group = NULL; 624 group = sc_open_mount_ns(inv->snap_instance); 625 626 // Init and check rootfs_dir, apply any fallback behaviors. 627 sc_check_rootfs_dir(inv); 628 629 /** Conditionally create, populate and join the device cgroup. */ 630 sc_setup_device_cgroup(inv->security_tag); 631 632 /** 633 * is_normal_mode controls if we should pivot into the base snap. 634 * 635 * There are two modes of execution for snaps that are not using classic 636 * confinement: normal and legacy. The normal mode is where snap-confine 637 * sets up a rootfs and then pivots into it using pivot_root(2). The legacy 638 * mode is when snap-confine just unshares the initial mount namespace, 639 * makes some extra changes but largely runs with what was presented to it 640 * initially. 641 * 642 * Historically the ubuntu-core distribution used the now-legacy mode. This 643 * was sensible then since snaps already (kind of) have the right root 644 * file-system and just need some privacy and isolation features applied. 645 * With the introduction of snaps to classic distributions as well as the 646 * introduction of bases, where each snap can use a different root 647 * filesystem, this lost sensibility and thus became legacy. 648 * 649 * For compatibility with current installations of ubuntu-core 650 * distributions the legacy mode is used when: the distribution is 651 * SC_DISTRO_CORE16 or when the base snap name is not "core" or 652 * "ubuntu-core". 653 * 654 * The SC_DISTRO_CORE16 is applied to systems that boot with the "core", 655 * "ubuntu-core" or "core16" snap. Systems using the "core18" base snap do 656 * not qualify for that classification. 657 **/ 658 sc_distro distro = sc_classify_distro(); 659 inv->is_normal_mode = distro != SC_DISTRO_CORE16 || 660 !sc_streq(inv->orig_base_snap_name, "core"); 661 662 /* Stale mount namespace discarded or no mount namespace to 663 join. We need to construct a new mount namespace ourselves. 664 To capture it we will need a helper process so make one. */ 665 sc_fork_helper(group, aa); 666 int retval = sc_join_preserved_ns(group, aa, inv, snap_discard_ns_fd); 667 if (retval == ESRCH) { 668 /* Create and populate the mount namespace. This performs all 669 of the bootstrapping mounts, pivots into the new root filesystem and 670 applies the per-snap mount profile using snap-update-ns. */ 671 debug("unsharing the mount namespace (per-snap)"); 672 if (unshare(CLONE_NEWNS) < 0) { 673 die("cannot unshare the mount namespace"); 674 } 675 sc_populate_mount_ns(aa, snap_update_ns_fd, inv, real_gid, 676 saved_gid); 677 sc_store_ns_info(inv); 678 679 /* Preserve the mount namespace. */ 680 sc_preserve_populated_mount_ns(group); 681 } 682 683 /* Older versions of snap-confine created incorrect 777 permissions 684 for /var/lib and we need to fixup for systems that had their NS created 685 with an old version. */ 686 sc_maybe_fixup_permissions(); 687 sc_maybe_fixup_udev(); 688 689 /* User mount profiles do not apply to non-root users. */ 690 if (real_uid != 0) { 691 debug("joining preserved per-user mount namespace"); 692 retval = 693 sc_join_preserved_per_user_ns(group, inv->snap_instance); 694 if (retval == ESRCH) { 695 debug("unsharing the mount namespace (per-user)"); 696 if (unshare(CLONE_NEWNS) < 0) { 697 die("cannot unshare the mount namespace"); 698 } 699 sc_setup_user_mounts(aa, snap_update_ns_fd, 700 inv->snap_instance); 701 /* Preserve the mount per-user namespace. But only if the 702 * experimental feature is enabled. This way if the feature is 703 * disabled user mount namespaces will still exist but will be 704 * entirely ephemeral. In addition the call 705 * sc_join_preserved_user_ns() will never find a preserved mount 706 * namespace and will always enter this code branch. */ 707 if (sc_feature_enabled 708 (SC_FEATURE_PER_USER_MOUNT_NAMESPACE)) { 709 sc_preserve_populated_per_user_mount_ns(group); 710 } else { 711 debug 712 ("NOT preserving per-user mount namespace"); 713 } 714 } 715 } 716 // With cgroups v1, associate each snap process with a dedicated 717 // snap freezer cgroup and snap pids cgroup. All snap processes 718 // belonging to one snap share the freezer cgroup. All snap 719 // processes belonging to one app or one hook share the pids cgroup. 720 // 721 // This simplifies testing if any processes belonging to a given snap are 722 // still alive as well as to properly account for each application and 723 // service. 724 // 725 // Note that with cgroups v2 there is no separate freeezer controller, 726 // but the freezer is associated with each group. The call chain when 727 // starting the snap application has already ensure that the process has 728 // been put in a dedicated group. 729 if (!sc_cgroup_is_v2()) { 730 sc_cgroup_freezer_join(inv->snap_instance, getpid()); 731 } 732 733 sc_unlock(snap_lock_fd); 734 735 sc_close_mount_ns(group); 736 737 // Reset path as we cannot rely on the path from the host OS to make sense. 738 // The classic distribution may use any PATH that makes sense but we cannot 739 // assume it makes sense for the core snap layout. Note that the /usr/local 740 // directories are explicitly left out as they are not part of the core 741 // snap. 742 debug("resetting PATH to values in sync with core snap"); 743 setenv("PATH", 744 "/usr/local/sbin:" 745 "/usr/local/bin:" 746 "/usr/sbin:" 747 "/usr/bin:" 748 "/sbin:" "/bin:" "/usr/games:" "/usr/local/games", 1); 749 // Ensure we set the various TMPDIRs to /tmp. One of the parts of setting 750 // up the mount namespace is to create a private /tmp directory (this is 751 // done in sc_populate_mount_ns() above). The host environment may point to 752 // a directory not accessible by snaps so we need to reset it here. 753 const char *tmpd[] = { "TMPDIR", "TEMPDIR", NULL }; 754 int i; 755 for (i = 0; tmpd[i] != NULL; i++) { 756 if (setenv(tmpd[i], "/tmp", 1) != 0) { 757 die("cannot set environment variable '%s'", tmpd[i]); 758 } 759 } 760 }