github.com/stulluk/snapd@v0.0.0-20210611110309-f6d5d5bd24b0/cmd/snap-confine/udev-support.c (about) 1 /* 2 * Copyright (C) 2015-2020 Canonical Ltd 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 3 as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * 16 */ 17 #include "config.h" 18 19 #include <ctype.h> 20 #include <errno.h> 21 #include <fcntl.h> 22 #include <string.h> 23 #include <sys/stat.h> 24 #include <sys/sysmacros.h> 25 #include <sys/types.h> 26 #include <unistd.h> 27 28 #include <libudev.h> 29 30 #include "../libsnap-confine-private/cleanup-funcs.h" 31 #include "../libsnap-confine-private/snap.h" 32 #include "../libsnap-confine-private/string-utils.h" 33 #include "../libsnap-confine-private/cgroup-support.h" 34 #include "../libsnap-confine-private/utils.h" 35 #include "udev-support.h" 36 37 __attribute__((format(printf, 2, 3))) 38 static void sc_dprintf(int fd, const char *format, ...); 39 40 static void sc_dprintf(int fd, const char *format, ...) 41 { 42 va_list ap1; 43 va_list ap2; 44 int n_expected, n_actual; 45 46 va_start(ap1, format); 47 va_copy(ap2, ap1); 48 n_expected = vsnprintf(NULL, 0, format, ap2); 49 n_actual = vdprintf(fd, format, ap1); 50 if (n_actual == -1 || n_expected != n_actual) { 51 die("cannot write to fd %d", fd); 52 } 53 va_end(ap2); 54 va_end(ap1); 55 } 56 57 /* Allow access to common devices. */ 58 static void sc_udev_allow_common(int devices_allow_fd) 59 { 60 /* The devices we add here have static number allocation. 61 * https://www.kernel.org/doc/html/v4.11/admin-guide/devices.html */ 62 sc_dprintf(devices_allow_fd, "c 1:3 rwm\n"); // /dev/null 63 sc_dprintf(devices_allow_fd, "c 1:5 rwm\n"); // /dev/zero 64 sc_dprintf(devices_allow_fd, "c 1:7 rwm\n"); // /dev/full 65 sc_dprintf(devices_allow_fd, "c 1:8 rwm\n"); // /dev/random 66 sc_dprintf(devices_allow_fd, "c 1:9 rwm\n"); // /dev/urandom 67 sc_dprintf(devices_allow_fd, "c 5:0 rwm\n"); // /dev/tty 68 sc_dprintf(devices_allow_fd, "c 5:1 rwm\n"); // /dev/console 69 sc_dprintf(devices_allow_fd, "c 5:2 rwm\n"); // /dev/ptmx 70 } 71 72 /** Allow access to current and future PTY slaves. 73 * 74 * We unconditionally add them since we use a devpts newinstance. Unix98 PTY 75 * slaves major are 136-143. 76 * 77 * See also: 78 * https://www.kernel.org/doc/Documentation/admin-guide/devices.txt 79 **/ 80 static void sc_udev_allow_pty_slaves(int devices_allow_fd) 81 { 82 for (unsigned pty_major = 136; pty_major <= 143; pty_major++) { 83 sc_dprintf(devices_allow_fd, "c %u:* rwm\n", pty_major); 84 } 85 } 86 87 /** Allow access to Nvidia devices. 88 * 89 * Nvidia modules are proprietary and therefore aren't in sysfs and can't be 90 * udev tagged. For now, just add existing nvidia devices to the cgroup 91 * unconditionally (AppArmor will still mediate the access). We'll want to 92 * rethink this if snapd needs to mediate access to other proprietary devices. 93 * 94 * Device major and minor numbers are described in (though nvidia-uvm currently 95 * isn't listed): 96 * 97 * https://www.kernel.org/doc/Documentation/admin-guide/devices.txt 98 **/ 99 static void sc_udev_allow_nvidia(int devices_allow_fd) 100 { 101 struct stat sbuf; 102 103 /* Allow access to /dev/nvidia0 through /dev/nvidia254 */ 104 for (unsigned nv_minor = 0; nv_minor < 255; nv_minor++) { 105 char nv_path[15] = { 0 }; // /dev/nvidiaXXX 106 sc_must_snprintf(nv_path, sizeof(nv_path), "/dev/nvidia%u", 107 nv_minor); 108 109 /* Stop trying to find devices after one is not found. In this manner, 110 * we'll add /dev/nvidia0 and /dev/nvidia1 but stop trying to find 111 * nvidia3 - nvidia254 if nvidia2 is not found. */ 112 if (stat(nv_path, &sbuf) < 0) { 113 break; 114 } 115 sc_dprintf(devices_allow_fd, "c %u:%u rwm\n", 116 major(sbuf.st_rdev), minor(sbuf.st_rdev)); 117 } 118 119 if (stat("/dev/nvidiactl", &sbuf) == 0) { 120 sc_dprintf(devices_allow_fd, "c %u:%u rwm\n", 121 major(sbuf.st_rdev), minor(sbuf.st_rdev)); 122 } 123 if (stat("/dev/nvidia-uvm", &sbuf) == 0) { 124 sc_dprintf(devices_allow_fd, "c %u:%u rwm\n", 125 major(sbuf.st_rdev), minor(sbuf.st_rdev)); 126 } 127 if (stat("/dev/nvidia-modeset", &sbuf) == 0) { 128 sc_dprintf(devices_allow_fd, "c %u:%u rwm\n", 129 major(sbuf.st_rdev), minor(sbuf.st_rdev)); 130 } 131 } 132 133 /** 134 * Allow access to /dev/uhid. 135 * 136 * Currently /dev/uhid isn't represented in sysfs, so add it to the device 137 * cgroup if it exists and let AppArmor handle the mediation. 138 **/ 139 static void sc_udev_allow_uhid(int devices_allow_fd) 140 { 141 struct stat sbuf; 142 143 if (stat("/dev/uhid", &sbuf) == 0) { 144 sc_dprintf(devices_allow_fd, "c %u:%u rwm\n", 145 major(sbuf.st_rdev), minor(sbuf.st_rdev)); 146 } 147 } 148 149 /** 150 * Allow access to /dev/net/tun 151 * 152 * When CONFIG_TUN=m, /dev/net/tun will exist but using it doesn't 153 * autoload the tun module but also /dev/net/tun isn't udev tagged 154 * until it is loaded. To work around this, if /dev/net/tun exists, add 155 * it unconditionally to the cgroup and rely on AppArmor to mediate the 156 * access. LP: #1859084 157 **/ 158 static void sc_udev_allow_dev_net_tun(int devices_allow_fd) 159 { 160 struct stat sbuf; 161 162 if (stat("/dev/net/tun", &sbuf) == 0) { 163 sc_dprintf(devices_allow_fd, "c %u:%u rwm\n", 164 major(sbuf.st_rdev), minor(sbuf.st_rdev)); 165 } 166 } 167 168 /** 169 * Allow access to assigned devices. 170 * 171 * The snapd udev security backend uses udev rules to tag matching devices with 172 * tags corresponding to snap applications. Here we interrogate udev and allow 173 * access to all assigned devices. 174 **/ 175 static void sc_udev_allow_assigned(int devices_allow_fd, struct udev *udev, 176 struct udev_list_entry *assigned) 177 { 178 for (struct udev_list_entry * entry = assigned; entry != NULL; 179 entry = udev_list_entry_get_next(entry)) { 180 const char *path = udev_list_entry_get_name(entry); 181 if (path == NULL) { 182 die("udev_list_entry_get_name failed"); 183 } 184 struct udev_device *device = 185 udev_device_new_from_syspath(udev, path); 186 /** This is a non-fatal error as devices can disappear asynchronously 187 * and on slow devices we may indeed observe a device that no longer 188 * exists. 189 * 190 * Similar debug + continue pattern repeats in all the udev calls in 191 * this function. Related to LP: #1881209 */ 192 if (device == NULL) { 193 debug("cannot find device from syspath %s", path); 194 continue; 195 } 196 dev_t devnum = udev_device_get_devnum(device); 197 unsigned int major = major(devnum); 198 unsigned int minor = minor(devnum); 199 /* The manual page of udev_device_get_devnum says: 200 * > On success, udev_device_get_devnum() returns the device type of 201 * > the passed device. On failure, a device type with minor and major 202 * > number set to 0 is returned. */ 203 if (major == 0 && minor == 0) { 204 debug("cannot get major/minor numbers for syspath %s", 205 path); 206 continue; 207 } 208 /* devnode is bound to the lifetime of the device and we cannot release 209 * it separately. */ 210 const char *devnode = udev_device_get_devnode(device); 211 if (devnode == NULL) { 212 debug("cannot find /dev node from udev device"); 213 continue; 214 } 215 debug("inspecting type of device: %s", devnode); 216 struct stat file_info; 217 if (stat(devnode, &file_info) < 0) { 218 debug("cannot stat %s", devnode); 219 continue; 220 } 221 switch (file_info.st_mode & S_IFMT) { 222 case S_IFBLK: 223 dprintf(devices_allow_fd, "b %u:%u rwm\n", major, 224 minor); 225 break; 226 case S_IFCHR: 227 dprintf(devices_allow_fd, "c %u:%u rwm\n", major, 228 minor); 229 break; 230 default: 231 /* Not a device, ignore it. */ 232 break; 233 } 234 udev_device_unref(device); 235 } 236 } 237 238 static void sc_udev_setup_acls(int devices_allow_fd, int devices_deny_fd, 239 struct udev *udev, 240 struct udev_list_entry *assigned) 241 { 242 /* Deny device access by default. 243 * 244 * Write 'a' to devices.deny to remove all existing devices that were added 245 * in previous launcher invocations, then add the static and assigned 246 * devices. This ensures that at application launch the cgroup only has 247 * what is currently assigned. */ 248 sc_dprintf(devices_deny_fd, "a"); 249 250 /* Allow access to various devices. */ 251 sc_udev_allow_common(devices_allow_fd); 252 sc_udev_allow_pty_slaves(devices_allow_fd); 253 sc_udev_allow_nvidia(devices_allow_fd); 254 sc_udev_allow_uhid(devices_allow_fd); 255 sc_udev_allow_dev_net_tun(devices_allow_fd); 256 sc_udev_allow_assigned(devices_allow_fd, udev, assigned); 257 } 258 259 static char *sc_security_to_udev_tag(const char *security_tag) 260 { 261 char *udev_tag = sc_strdup(security_tag); 262 for (char *c = strchr(udev_tag, '.'); c != NULL; c = strchr(c, '.')) { 263 *c = '_'; 264 } 265 return udev_tag; 266 } 267 268 static void sc_cleanup_udev(struct udev **udev) 269 { 270 if (udev != NULL && *udev != NULL) { 271 udev_unref(*udev); 272 *udev = NULL; 273 } 274 } 275 276 static void sc_cleanup_udev_enumerate(struct udev_enumerate **enumerate) 277 { 278 if (enumerate != NULL && *enumerate != NULL) { 279 udev_enumerate_unref(*enumerate); 280 *enumerate = NULL; 281 } 282 } 283 284 typedef struct sc_cgroup_fds { 285 int devices_allow_fd; 286 int devices_deny_fd; 287 int cgroup_procs_fd; 288 } sc_cgroup_fds; 289 290 static sc_cgroup_fds sc_udev_open_cgroup_v1(const char *security_tag) 291 { 292 /* Note that -1 is the neutral value for a file descriptor. 293 * This is relevant as a cleanup handler for sc_cgroup_fds, 294 * closes all file descriptors that are not -1. */ 295 sc_cgroup_fds fds = { -1, -1, -1 }; 296 297 /* Open /sys/fs/cgroup */ 298 const char *cgroup_path = "/sys/fs/cgroup"; 299 int SC_CLEANUP(sc_cleanup_close) cgroup_fd = -1; 300 cgroup_fd = open(cgroup_path, 301 O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW); 302 if (cgroup_fd < 0) { 303 die("cannot open %s", cgroup_path); 304 } 305 306 /* Open devices relative to /sys/fs/cgroup */ 307 const char *devices_relpath = "devices"; 308 int SC_CLEANUP(sc_cleanup_close) devices_fd = -1; 309 devices_fd = openat(cgroup_fd, devices_relpath, 310 O_PATH | O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW); 311 if (devices_fd < 0) { 312 die("cannot open %s/%s", cgroup_path, devices_relpath); 313 } 314 315 /* Open snap.$SNAP_NAME.$APP_NAME relative to /sys/fs/cgroup/devices, 316 * creating the directory if necessary. Note that we always chown the 317 * resulting directory to root:root. */ 318 const char *security_tag_relpath = security_tag; 319 sc_identity old = sc_set_effective_identity(sc_root_group_identity()); 320 if (mkdirat(devices_fd, security_tag_relpath, 0755) < 0) { 321 if (errno != EEXIST) { 322 die("cannot create directory %s/%s/%s", cgroup_path, 323 devices_relpath, security_tag_relpath); 324 } 325 } 326 (void)sc_set_effective_identity(old); 327 328 int SC_CLEANUP(sc_cleanup_close) security_tag_fd = -1; 329 security_tag_fd = openat(devices_fd, security_tag_relpath, 330 O_RDONLY | O_DIRECTORY | O_CLOEXEC | 331 O_NOFOLLOW); 332 if (security_tag_fd < 0) { 333 die("cannot open %s/%s/%s", cgroup_path, devices_relpath, 334 security_tag_relpath); 335 } 336 337 /* Open devices.allow relative to /sys/fs/cgroup/devices/snap.$SNAP_NAME.$APP_NAME */ 338 const char *devices_allow_relpath = "devices.allow"; 339 int SC_CLEANUP(sc_cleanup_close) devices_allow_fd = -1; 340 devices_allow_fd = openat(security_tag_fd, devices_allow_relpath, 341 O_WRONLY | O_CLOEXEC | O_NOFOLLOW); 342 if (devices_allow_fd < 0) { 343 die("cannot open %s/%s/%s/%s", cgroup_path, devices_relpath, 344 security_tag_relpath, devices_allow_relpath); 345 } 346 347 /* Open devices.deny relative to /sys/fs/cgroup/devices/snap.$SNAP_NAME.$APP_NAME */ 348 const char *devices_deny_relpath = "devices.deny"; 349 int SC_CLEANUP(sc_cleanup_close) devices_deny_fd = -1; 350 devices_deny_fd = openat(security_tag_fd, devices_deny_relpath, 351 O_WRONLY | O_CLOEXEC | O_NOFOLLOW); 352 if (devices_deny_fd < 0) { 353 die("cannot open %s/%s/%s/%s", cgroup_path, devices_relpath, 354 security_tag_relpath, devices_deny_relpath); 355 } 356 357 /* Open cgroup.procs relative to /sys/fs/cgroup/devices/snap.$SNAP_NAME.$APP_NAME */ 358 const char *cgroup_procs_relpath = "cgroup.procs"; 359 int SC_CLEANUP(sc_cleanup_close) cgroup_procs_fd = -1; 360 cgroup_procs_fd = openat(security_tag_fd, cgroup_procs_relpath, 361 O_WRONLY | O_CLOEXEC | O_NOFOLLOW); 362 if (cgroup_procs_fd < 0) { 363 die("cannot open %s/%s/%s/%s", cgroup_path, devices_relpath, 364 security_tag_relpath, cgroup_procs_relpath); 365 } 366 367 /* Everything worked so pack the result and "move" the descriptors over so 368 * that they are not closed by the cleanup functions associated with the 369 * individual variables. */ 370 fds.devices_allow_fd = devices_allow_fd; 371 fds.devices_deny_fd = devices_deny_fd; 372 fds.cgroup_procs_fd = cgroup_procs_fd; 373 /* Reset the locals so that they are not closed by the cleanup handlers. */ 374 devices_allow_fd = -1; 375 devices_deny_fd = -1; 376 cgroup_procs_fd = -1; 377 return fds; 378 } 379 380 static void sc_cleanup_cgroup_fds(sc_cgroup_fds * fds) 381 { 382 if (fds != NULL) { 383 sc_cleanup_close(&fds->devices_allow_fd); 384 sc_cleanup_close(&fds->devices_deny_fd); 385 sc_cleanup_close(&fds->cgroup_procs_fd); 386 } 387 } 388 389 void sc_setup_device_cgroup(const char *security_tag) 390 { 391 debug("setting up device cgroup"); 392 if (sc_cgroup_is_v2()) { 393 /* TODO: add support for v2 mode. This is coming but needs several more 394 * rounds of iteration. */ 395 return; 396 } 397 398 /* Derive the udev tag from the snap security tag. 399 * 400 * Because udev does not allow for dots in tag names, those are replaced by 401 * underscores in snapd. We just match that behavior. */ 402 char *udev_tag SC_CLEANUP(sc_cleanup_string) = NULL; 403 udev_tag = sc_security_to_udev_tag(security_tag); 404 405 /* Use udev APIs to talk to udev-the-daemon to determine the list of 406 * "devices" with that tag assigned. The list may be empty, in which case 407 * there's no udev tagging in effect and we must refrain from constructing 408 * the cgroup as it would interfere with the execution of a program. */ 409 struct udev SC_CLEANUP(sc_cleanup_udev) * udev = NULL; 410 udev = udev_new(); 411 if (udev == NULL) { 412 die("cannot connect to udev"); 413 } 414 struct udev_enumerate SC_CLEANUP(sc_cleanup_udev_enumerate) * devices = 415 NULL; 416 devices = udev_enumerate_new(udev); 417 if (devices == NULL) { 418 die("cannot create udev device enumeration"); 419 } 420 if (udev_enumerate_add_match_tag(devices, udev_tag) < 0) { 421 die("cannot add tag match to udev device enumeration"); 422 } 423 if (udev_enumerate_scan_devices(devices) < 0) { 424 die("cannot enumerate udev devices"); 425 } 426 /* NOTE: udev_list_entry is bound to life-cycle of the used udev_enumerate */ 427 struct udev_list_entry *assigned; 428 assigned = udev_enumerate_get_list_entry(devices); 429 if (assigned == NULL) { 430 /* NOTE: Nothing is assigned, don't create or use the device cgroup. */ 431 debug("no devices tagged with %s, skipping device cgroup setup", 432 udev_tag); 433 return; 434 } 435 436 /* Note that -1 is the neutral value for a file descriptor. 437 * The cleanup function associated with this variable closes 438 * descriptors other than -1. */ 439 sc_cgroup_fds SC_CLEANUP(sc_cleanup_cgroup_fds) fds = { -1, -1, -1 }; 440 fds = sc_udev_open_cgroup_v1(security_tag); 441 if (fds.cgroup_procs_fd < 0) { 442 die("cannot prepare cgroup v1 device hierarchy"); 443 return; 444 } 445 /* Setup the device group access control list */ 446 sc_udev_setup_acls(fds.devices_allow_fd, fds.devices_deny_fd, 447 udev, assigned); 448 449 /* Move ourselves to the device cgroup */ 450 sc_dprintf(fds.cgroup_procs_fd, "%i\n", getpid()); 451 debug("associated snap application process %i with device cgroup %s", 452 getpid(), security_tag); 453 }