github.com/stulluk/snapd@v0.0.0-20210611110309-f6d5d5bd24b0/cmd/snap-confine/udev-support.c (about)

     1  /*
     2   * Copyright (C) 2015-2020 Canonical Ltd
     3   *
     4   * This program is free software: you can redistribute it and/or modify
     5   * it under the terms of the GNU General Public License version 3 as
     6   * published by the Free Software Foundation.
     7   *
     8   * This program is distributed in the hope that it will be useful,
     9   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11   * GNU General Public License for more details.
    12   *
    13   * You should have received a copy of the GNU General Public License
    14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15   *
    16   */
    17  #include "config.h"
    18  
    19  #include <ctype.h>
    20  #include <errno.h>
    21  #include <fcntl.h>
    22  #include <string.h>
    23  #include <sys/stat.h>
    24  #include <sys/sysmacros.h>
    25  #include <sys/types.h>
    26  #include <unistd.h>
    27  
    28  #include <libudev.h>
    29  
    30  #include "../libsnap-confine-private/cleanup-funcs.h"
    31  #include "../libsnap-confine-private/snap.h"
    32  #include "../libsnap-confine-private/string-utils.h"
    33  #include "../libsnap-confine-private/cgroup-support.h"
    34  #include "../libsnap-confine-private/utils.h"
    35  #include "udev-support.h"
    36  
    37  __attribute__((format(printf, 2, 3)))
    38  static void sc_dprintf(int fd, const char *format, ...);
    39  
    40  static void sc_dprintf(int fd, const char *format, ...)
    41  {
    42  	va_list ap1;
    43  	va_list ap2;
    44  	int n_expected, n_actual;
    45  
    46  	va_start(ap1, format);
    47  	va_copy(ap2, ap1);
    48  	n_expected = vsnprintf(NULL, 0, format, ap2);
    49  	n_actual = vdprintf(fd, format, ap1);
    50  	if (n_actual == -1 || n_expected != n_actual) {
    51  		die("cannot write to fd %d", fd);
    52  	}
    53  	va_end(ap2);
    54  	va_end(ap1);
    55  }
    56  
    57  /* Allow access to common devices. */
    58  static void sc_udev_allow_common(int devices_allow_fd)
    59  {
    60  	/* The devices we add here have static number allocation.
    61  	 * https://www.kernel.org/doc/html/v4.11/admin-guide/devices.html */
    62  	sc_dprintf(devices_allow_fd, "c 1:3 rwm\n");	// /dev/null
    63  	sc_dprintf(devices_allow_fd, "c 1:5 rwm\n");	// /dev/zero
    64  	sc_dprintf(devices_allow_fd, "c 1:7 rwm\n");	// /dev/full
    65  	sc_dprintf(devices_allow_fd, "c 1:8 rwm\n");	// /dev/random
    66  	sc_dprintf(devices_allow_fd, "c 1:9 rwm\n");	// /dev/urandom
    67  	sc_dprintf(devices_allow_fd, "c 5:0 rwm\n");	// /dev/tty
    68  	sc_dprintf(devices_allow_fd, "c 5:1 rwm\n");	// /dev/console
    69  	sc_dprintf(devices_allow_fd, "c 5:2 rwm\n");	// /dev/ptmx
    70  }
    71  
    72  /** Allow access to current and future PTY slaves.
    73   *
    74   * We unconditionally add them since we use a devpts newinstance. Unix98 PTY
    75   * slaves major are 136-143.
    76   *
    77   * See also:
    78   * https://www.kernel.org/doc/Documentation/admin-guide/devices.txt
    79   **/
    80  static void sc_udev_allow_pty_slaves(int devices_allow_fd)
    81  {
    82  	for (unsigned pty_major = 136; pty_major <= 143; pty_major++) {
    83  		sc_dprintf(devices_allow_fd, "c %u:* rwm\n", pty_major);
    84  	}
    85  }
    86  
    87  /** Allow access to Nvidia devices.
    88   *
    89   * Nvidia modules are proprietary and therefore aren't in sysfs and can't be
    90   * udev tagged. For now, just add existing nvidia devices to the cgroup
    91   * unconditionally (AppArmor will still mediate the access).  We'll want to
    92   * rethink this if snapd needs to mediate access to other proprietary devices.
    93   *
    94   * Device major and minor numbers are described in (though nvidia-uvm currently
    95   * isn't listed):
    96   *
    97   * https://www.kernel.org/doc/Documentation/admin-guide/devices.txt
    98   **/
    99  static void sc_udev_allow_nvidia(int devices_allow_fd)
   100  {
   101  	struct stat sbuf;
   102  
   103  	/* Allow access to /dev/nvidia0 through /dev/nvidia254 */
   104  	for (unsigned nv_minor = 0; nv_minor < 255; nv_minor++) {
   105  		char nv_path[15] = { 0 };	// /dev/nvidiaXXX
   106  		sc_must_snprintf(nv_path, sizeof(nv_path), "/dev/nvidia%u",
   107  				 nv_minor);
   108  
   109  		/* Stop trying to find devices after one is not found. In this manner,
   110  		 * we'll add /dev/nvidia0 and /dev/nvidia1 but stop trying to find
   111  		 * nvidia3 - nvidia254 if nvidia2 is not found. */
   112  		if (stat(nv_path, &sbuf) < 0) {
   113  			break;
   114  		}
   115  		sc_dprintf(devices_allow_fd, "c %u:%u rwm\n",
   116  			   major(sbuf.st_rdev), minor(sbuf.st_rdev));
   117  	}
   118  
   119  	if (stat("/dev/nvidiactl", &sbuf) == 0) {
   120  		sc_dprintf(devices_allow_fd, "c %u:%u rwm\n",
   121  			   major(sbuf.st_rdev), minor(sbuf.st_rdev));
   122  	}
   123  	if (stat("/dev/nvidia-uvm", &sbuf) == 0) {
   124  		sc_dprintf(devices_allow_fd, "c %u:%u rwm\n",
   125  			   major(sbuf.st_rdev), minor(sbuf.st_rdev));
   126  	}
   127  	if (stat("/dev/nvidia-modeset", &sbuf) == 0) {
   128  		sc_dprintf(devices_allow_fd, "c %u:%u rwm\n",
   129  			   major(sbuf.st_rdev), minor(sbuf.st_rdev));
   130  	}
   131  }
   132  
   133  /**
   134   * Allow access to /dev/uhid.
   135   *
   136   * Currently /dev/uhid isn't represented in sysfs, so add it to the device
   137   * cgroup if it exists and let AppArmor handle the mediation.
   138   **/
   139  static void sc_udev_allow_uhid(int devices_allow_fd)
   140  {
   141  	struct stat sbuf;
   142  
   143  	if (stat("/dev/uhid", &sbuf) == 0) {
   144  		sc_dprintf(devices_allow_fd, "c %u:%u rwm\n",
   145  			   major(sbuf.st_rdev), minor(sbuf.st_rdev));
   146  	}
   147  }
   148  
   149  /**
   150   * Allow access to /dev/net/tun
   151   *
   152   * When CONFIG_TUN=m, /dev/net/tun will exist but using it doesn't
   153   * autoload the tun module but also /dev/net/tun isn't udev tagged
   154   * until it is loaded. To work around this, if /dev/net/tun exists, add
   155   * it unconditionally to the cgroup and rely on AppArmor to mediate the
   156   * access. LP: #1859084
   157   **/
   158  static void sc_udev_allow_dev_net_tun(int devices_allow_fd)
   159  {
   160  	struct stat sbuf;
   161  
   162  	if (stat("/dev/net/tun", &sbuf) == 0) {
   163  		sc_dprintf(devices_allow_fd, "c %u:%u rwm\n",
   164  			   major(sbuf.st_rdev), minor(sbuf.st_rdev));
   165  	}
   166  }
   167  
   168  /**
   169   * Allow access to assigned devices.
   170   *
   171   * The snapd udev security backend uses udev rules to tag matching devices with
   172   * tags corresponding to snap applications. Here we interrogate udev and allow
   173   * access to all assigned devices.
   174   **/
   175  static void sc_udev_allow_assigned(int devices_allow_fd, struct udev *udev,
   176  				   struct udev_list_entry *assigned)
   177  {
   178  	for (struct udev_list_entry * entry = assigned; entry != NULL;
   179  	     entry = udev_list_entry_get_next(entry)) {
   180  		const char *path = udev_list_entry_get_name(entry);
   181  		if (path == NULL) {
   182  			die("udev_list_entry_get_name failed");
   183  		}
   184  		struct udev_device *device =
   185  		    udev_device_new_from_syspath(udev, path);
   186  		/** This is a non-fatal error as devices can disappear asynchronously
   187  		 * and on slow devices we may indeed observe a device that no longer
   188  		 * exists.
   189  		 *
   190  		 * Similar debug + continue pattern repeats in all the udev calls in
   191  		 * this function. Related to LP: #1881209 */
   192  		if (device == NULL) {
   193  			debug("cannot find device from syspath %s", path);
   194  			continue;
   195  		}
   196  		dev_t devnum = udev_device_get_devnum(device);
   197  		unsigned int major = major(devnum);
   198  		unsigned int minor = minor(devnum);
   199  		/* The manual page of udev_device_get_devnum says:
   200  		 * > On success, udev_device_get_devnum() returns the device type of
   201  		 * > the passed device. On failure, a device type with minor and major
   202  		 * > number set to 0 is returned. */
   203  		if (major == 0 && minor == 0) {
   204  			debug("cannot get major/minor numbers for syspath %s",
   205  			      path);
   206  			continue;
   207  		}
   208  		/* devnode is bound to the lifetime of the device and we cannot release
   209  		 * it separately. */
   210  		const char *devnode = udev_device_get_devnode(device);
   211  		if (devnode == NULL) {
   212  			debug("cannot find /dev node from udev device");
   213  			continue;
   214  		}
   215  		debug("inspecting type of device: %s", devnode);
   216  		struct stat file_info;
   217  		if (stat(devnode, &file_info) < 0) {
   218  			debug("cannot stat %s", devnode);
   219  			continue;
   220  		}
   221  		switch (file_info.st_mode & S_IFMT) {
   222  		case S_IFBLK:
   223  			dprintf(devices_allow_fd, "b %u:%u rwm\n", major,
   224  				minor);
   225  			break;
   226  		case S_IFCHR:
   227  			dprintf(devices_allow_fd, "c %u:%u rwm\n", major,
   228  				minor);
   229  			break;
   230  		default:
   231  			/* Not a device, ignore it. */
   232  			break;
   233  		}
   234  		udev_device_unref(device);
   235  	}
   236  }
   237  
   238  static void sc_udev_setup_acls(int devices_allow_fd, int devices_deny_fd,
   239  			       struct udev *udev,
   240  			       struct udev_list_entry *assigned)
   241  {
   242  	/* Deny device access by default.
   243  	 *
   244  	 * Write 'a' to devices.deny to remove all existing devices that were added
   245  	 * in previous launcher invocations, then add the static and assigned
   246  	 * devices. This ensures that at application launch the cgroup only has
   247  	 * what is currently assigned. */
   248  	sc_dprintf(devices_deny_fd, "a");
   249  
   250  	/* Allow access to various devices. */
   251  	sc_udev_allow_common(devices_allow_fd);
   252  	sc_udev_allow_pty_slaves(devices_allow_fd);
   253  	sc_udev_allow_nvidia(devices_allow_fd);
   254  	sc_udev_allow_uhid(devices_allow_fd);
   255  	sc_udev_allow_dev_net_tun(devices_allow_fd);
   256  	sc_udev_allow_assigned(devices_allow_fd, udev, assigned);
   257  }
   258  
   259  static char *sc_security_to_udev_tag(const char *security_tag)
   260  {
   261  	char *udev_tag = sc_strdup(security_tag);
   262  	for (char *c = strchr(udev_tag, '.'); c != NULL; c = strchr(c, '.')) {
   263  		*c = '_';
   264  	}
   265  	return udev_tag;
   266  }
   267  
   268  static void sc_cleanup_udev(struct udev **udev)
   269  {
   270  	if (udev != NULL && *udev != NULL) {
   271  		udev_unref(*udev);
   272  		*udev = NULL;
   273  	}
   274  }
   275  
   276  static void sc_cleanup_udev_enumerate(struct udev_enumerate **enumerate)
   277  {
   278  	if (enumerate != NULL && *enumerate != NULL) {
   279  		udev_enumerate_unref(*enumerate);
   280  		*enumerate = NULL;
   281  	}
   282  }
   283  
   284  typedef struct sc_cgroup_fds {
   285  	int devices_allow_fd;
   286  	int devices_deny_fd;
   287  	int cgroup_procs_fd;
   288  } sc_cgroup_fds;
   289  
   290  static sc_cgroup_fds sc_udev_open_cgroup_v1(const char *security_tag)
   291  {
   292  	/* Note that -1 is the neutral value for a file descriptor.
   293  	 * This is relevant as a cleanup handler for sc_cgroup_fds,
   294  	 * closes all file descriptors that are not -1. */
   295  	sc_cgroup_fds fds = { -1, -1, -1 };
   296  
   297  	/* Open /sys/fs/cgroup */
   298  	const char *cgroup_path = "/sys/fs/cgroup";
   299  	int SC_CLEANUP(sc_cleanup_close) cgroup_fd = -1;
   300  	cgroup_fd = open(cgroup_path,
   301  			 O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW);
   302  	if (cgroup_fd < 0) {
   303  		die("cannot open %s", cgroup_path);
   304  	}
   305  
   306  	/* Open devices relative to /sys/fs/cgroup */
   307  	const char *devices_relpath = "devices";
   308  	int SC_CLEANUP(sc_cleanup_close) devices_fd = -1;
   309  	devices_fd = openat(cgroup_fd, devices_relpath,
   310  			    O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW);
   311  	if (devices_fd < 0) {
   312  		die("cannot open %s/%s", cgroup_path, devices_relpath);
   313  	}
   314  
   315  	/* Open snap.$SNAP_NAME.$APP_NAME relative to /sys/fs/cgroup/devices,
   316  	 * creating the directory if necessary. Note that we always chown the
   317  	 * resulting directory to root:root. */
   318  	const char *security_tag_relpath = security_tag;
   319  	sc_identity old = sc_set_effective_identity(sc_root_group_identity());
   320  	if (mkdirat(devices_fd, security_tag_relpath, 0755) < 0) {
   321  		if (errno != EEXIST) {
   322  			die("cannot create directory %s/%s/%s", cgroup_path,
   323  			    devices_relpath, security_tag_relpath);
   324  		}
   325  	}
   326  	(void)sc_set_effective_identity(old);
   327  
   328  	int SC_CLEANUP(sc_cleanup_close) security_tag_fd = -1;
   329  	security_tag_fd = openat(devices_fd, security_tag_relpath,
   330  				 O_RDONLY | O_DIRECTORY | O_CLOEXEC |
   331  				 O_NOFOLLOW);
   332  	if (security_tag_fd < 0) {
   333  		die("cannot open %s/%s/%s", cgroup_path, devices_relpath,
   334  		    security_tag_relpath);
   335  	}
   336  
   337  	/* Open devices.allow relative to /sys/fs/cgroup/devices/snap.$SNAP_NAME.$APP_NAME */
   338  	const char *devices_allow_relpath = "devices.allow";
   339  	int SC_CLEANUP(sc_cleanup_close) devices_allow_fd = -1;
   340  	devices_allow_fd = openat(security_tag_fd, devices_allow_relpath,
   341  				  O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
   342  	if (devices_allow_fd < 0) {
   343  		die("cannot open %s/%s/%s/%s", cgroup_path, devices_relpath,
   344  		    security_tag_relpath, devices_allow_relpath);
   345  	}
   346  
   347  	/* Open devices.deny relative to /sys/fs/cgroup/devices/snap.$SNAP_NAME.$APP_NAME */
   348  	const char *devices_deny_relpath = "devices.deny";
   349  	int SC_CLEANUP(sc_cleanup_close) devices_deny_fd = -1;
   350  	devices_deny_fd = openat(security_tag_fd, devices_deny_relpath,
   351  				 O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
   352  	if (devices_deny_fd < 0) {
   353  		die("cannot open %s/%s/%s/%s", cgroup_path, devices_relpath,
   354  		    security_tag_relpath, devices_deny_relpath);
   355  	}
   356  
   357  	/* Open cgroup.procs relative to /sys/fs/cgroup/devices/snap.$SNAP_NAME.$APP_NAME */
   358  	const char *cgroup_procs_relpath = "cgroup.procs";
   359  	int SC_CLEANUP(sc_cleanup_close) cgroup_procs_fd = -1;
   360  	cgroup_procs_fd = openat(security_tag_fd, cgroup_procs_relpath,
   361  				 O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
   362  	if (cgroup_procs_fd < 0) {
   363  		die("cannot open %s/%s/%s/%s", cgroup_path, devices_relpath,
   364  		    security_tag_relpath, cgroup_procs_relpath);
   365  	}
   366  
   367  	/* Everything worked so pack the result and "move" the descriptors over so
   368  	 * that they are not closed by the cleanup functions associated with the
   369  	 * individual variables. */
   370  	fds.devices_allow_fd = devices_allow_fd;
   371  	fds.devices_deny_fd = devices_deny_fd;
   372  	fds.cgroup_procs_fd = cgroup_procs_fd;
   373  	/* Reset the locals so that they are not closed by the cleanup handlers. */
   374  	devices_allow_fd = -1;
   375  	devices_deny_fd = -1;
   376  	cgroup_procs_fd = -1;
   377  	return fds;
   378  }
   379  
   380  static void sc_cleanup_cgroup_fds(sc_cgroup_fds * fds)
   381  {
   382  	if (fds != NULL) {
   383  		sc_cleanup_close(&fds->devices_allow_fd);
   384  		sc_cleanup_close(&fds->devices_deny_fd);
   385  		sc_cleanup_close(&fds->cgroup_procs_fd);
   386  	}
   387  }
   388  
   389  void sc_setup_device_cgroup(const char *security_tag)
   390  {
   391  	debug("setting up device cgroup");
   392  	if (sc_cgroup_is_v2()) {
   393  		/* TODO: add support for v2 mode. This is coming but needs several more
   394  		 * rounds of iteration. */
   395  		return;
   396  	}
   397  
   398  	/* Derive the udev tag from the snap security tag.
   399  	 *
   400  	 * Because udev does not allow for dots in tag names, those are replaced by
   401  	 * underscores in snapd. We just match that behavior. */
   402  	char *udev_tag SC_CLEANUP(sc_cleanup_string) = NULL;
   403  	udev_tag = sc_security_to_udev_tag(security_tag);
   404  
   405  	/* Use udev APIs to talk to udev-the-daemon to determine the list of
   406  	 * "devices" with that tag assigned. The list may be empty, in which case
   407  	 * there's no udev tagging in effect and we must refrain from constructing
   408  	 * the cgroup as it would interfere with the execution of a program. */
   409  	struct udev SC_CLEANUP(sc_cleanup_udev) * udev = NULL;
   410  	udev = udev_new();
   411  	if (udev == NULL) {
   412  		die("cannot connect to udev");
   413  	}
   414  	struct udev_enumerate SC_CLEANUP(sc_cleanup_udev_enumerate) * devices =
   415  	    NULL;
   416  	devices = udev_enumerate_new(udev);
   417  	if (devices == NULL) {
   418  		die("cannot create udev device enumeration");
   419  	}
   420  	if (udev_enumerate_add_match_tag(devices, udev_tag) < 0) {
   421  		die("cannot add tag match to udev device enumeration");
   422  	}
   423  	if (udev_enumerate_scan_devices(devices) < 0) {
   424  		die("cannot enumerate udev devices");
   425  	}
   426  	/* NOTE: udev_list_entry is bound to life-cycle of the used udev_enumerate */
   427  	struct udev_list_entry *assigned;
   428  	assigned = udev_enumerate_get_list_entry(devices);
   429  	if (assigned == NULL) {
   430  		/* NOTE: Nothing is assigned, don't create or use the device cgroup. */
   431  		debug("no devices tagged with %s, skipping device cgroup setup",
   432  		      udev_tag);
   433  		return;
   434  	}
   435  
   436  	/* Note that -1 is the neutral value for a file descriptor.
   437  	 * The cleanup function associated with this variable closes
   438  	 * descriptors other than -1. */
   439  	sc_cgroup_fds SC_CLEANUP(sc_cleanup_cgroup_fds) fds = { -1, -1, -1 };
   440  	fds = sc_udev_open_cgroup_v1(security_tag);
   441  	if (fds.cgroup_procs_fd < 0) {
   442  		die("cannot prepare cgroup v1 device hierarchy");
   443  		return;
   444  	}
   445  	/* Setup the device group access control list */
   446  	sc_udev_setup_acls(fds.devices_allow_fd, fds.devices_deny_fd,
   447  			   udev, assigned);
   448  
   449  	/* Move ourselves to the device cgroup */
   450  	sc_dprintf(fds.cgroup_procs_fd, "%i\n", getpid());
   451  	debug("associated snap application process %i with device cgroup %s",
   452  	      getpid(), security_tag);
   453  }