github.com/anonymouse64/snapd@v0.0.0-20210824153203-04c4c42d842d/cmd/libsnap-confine-private/cgroup-support.c (about) 1 /* 2 * Copyright (C) 2019-2021 Canonical Ltd 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 3 as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * 16 */ 17 18 #define _GNU_SOURCE 19 20 #include "cgroup-support.h" 21 22 #include <dirent.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <stdio.h> 26 #include <string.h> 27 #include <sys/stat.h> 28 #include <sys/types.h> 29 #include <sys/vfs.h> 30 #include <unistd.h> 31 32 #include "cleanup-funcs.h" 33 #include "string-utils.h" 34 #include "utils.h" 35 36 void sc_cgroup_create_and_join(const char *parent, const char *name, pid_t pid) { 37 int parent_fd SC_CLEANUP(sc_cleanup_close) = -1; 38 parent_fd = open(parent, O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC); 39 if (parent_fd < 0) { 40 die("cannot open cgroup hierarchy %s", parent); 41 } 42 // Since we may be running from a setuid but not setgid executable, switch 43 // to the effective group to root so that the mkdirat call creates a cgroup 44 // that is always owned by root.root. 45 sc_identity old = sc_set_effective_identity(sc_root_group_identity()); 46 if (mkdirat(parent_fd, name, 0755) < 0 && errno != EEXIST) { 47 die("cannot create cgroup hierarchy %s/%s", parent, name); 48 } 49 (void)sc_set_effective_identity(old); 50 int hierarchy_fd SC_CLEANUP(sc_cleanup_close) = -1; 51 hierarchy_fd = openat(parent_fd, name, O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC); 52 if (hierarchy_fd < 0) { 53 die("cannot open cgroup hierarchy %s/%s", parent, name); 54 } 55 // Open the cgroup.procs file. 56 int procs_fd SC_CLEANUP(sc_cleanup_close) = -1; 57 procs_fd = openat(hierarchy_fd, "cgroup.procs", O_WRONLY | O_NOFOLLOW | O_CLOEXEC); 58 if (procs_fd < 0) { 59 die("cannot open file %s/%s/cgroup.procs", parent, name); 60 } 61 // Write the process (task) number to the procs file. Linux task IDs are 62 // limited to 2^29 so a long int is enough to represent it. 63 // See include/linux/threads.h in the kernel source tree for details. 64 char buf[22] = {0}; // 2^64 base10 + 2 for NUL and '-' for long 65 int n = sc_must_snprintf(buf, sizeof buf, "%ld", (long)pid); 66 if (write(procs_fd, buf, n) < n) { 67 die("cannot move process %ld to cgroup hierarchy %s/%s", (long)pid, parent, name); 68 } 69 debug("moved process %ld to cgroup hierarchy %s/%s", (long)pid, parent, name); 70 } 71 72 static const char *cgroup_dir = "/sys/fs/cgroup"; 73 74 // from statfs(2) 75 #ifndef CGROUP2_SUPER_MAGIC 76 #define CGROUP2_SUPER_MAGIC 0x63677270 77 #endif 78 79 // Detect if we are running in cgroup v2 unified mode (as opposed to 80 // hybrid or legacy) The algorithm is described in 81 // https://systemd.io/CGROUP_DELEGATION/ 82 bool sc_cgroup_is_v2(void) { 83 bool hide_warning = getenv_bool("SNAPD_HIDE_CGROUPV2_WARNING", false); 84 static bool did_warn = false; 85 struct statfs buf; 86 87 if (statfs(cgroup_dir, &buf) != 0) { 88 if (errno == ENOENT) { 89 return false; 90 } 91 die("cannot statfs %s", cgroup_dir); 92 } 93 if (buf.f_type == CGROUP2_SUPER_MAGIC) { 94 if (!did_warn && !hide_warning) { 95 fprintf(stderr, "WARNING: cgroup v2 is not fully supported yet, proceeding with partial confinement\n"); 96 did_warn = true; 97 } 98 return true; 99 } 100 return false; 101 } 102 103 static const size_t max_traversal_depth = 32; 104 105 static bool traverse_looking_for_prefix_in_dir(DIR *root, const char *prefix, const char *skip, size_t depth) { 106 if (depth > max_traversal_depth) { 107 die("cannot traverse cgroups hierarchy deeper than %zu levels", max_traversal_depth); 108 } 109 while (true) { 110 errno = 0; 111 struct dirent *ent = readdir(root); 112 if (ent == NULL) { 113 // is this an error? 114 if (errno != 0) { 115 if (errno == ENOENT) { 116 // the processes may exit and the group entries may go away at 117 // any time 118 // the entries may go away at any time 119 break; 120 } 121 die("cannot read directory entry"); 122 } 123 break; 124 } 125 if (ent->d_type != DT_DIR) { 126 continue; 127 } 128 if (sc_streq(ent->d_name, "..") || sc_streq(ent->d_name, ".")) { 129 // we don't want to go up or process the current directory again 130 continue; 131 } 132 if (sc_streq(ent->d_name, skip)) { 133 // we were asked to skip this group 134 continue; 135 } 136 if (sc_startswith(ent->d_name, prefix)) { 137 debug("found matching prefix in \"%s\"", ent->d_name); 138 // the directory starts with our prefix 139 return true; 140 } 141 // entfd is consumed by fdopendir() and freed with closedir() 142 int entfd = openat(dirfd(root), ent->d_name, O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC); 143 if (entfd == -1) { 144 if (errno == ENOENT) { 145 // the processes may exit and the group entries may go away at 146 // any time 147 return false; 148 } 149 die("cannot open directory entry \"%s\"", ent->d_name); 150 } 151 // takes ownership of the file descriptor 152 DIR *entdir SC_CLEANUP(sc_cleanup_closedir) = fdopendir(entfd); 153 if (entdir == NULL) { 154 // we have the fd, so ENOENT isn't possible here 155 die("cannot fdopendir directory \"%s\"", ent->d_name); 156 } 157 bool found = traverse_looking_for_prefix_in_dir(entdir, prefix, skip, depth + 1); 158 if (found == true) { 159 return true; 160 } 161 } 162 return false; 163 } 164 165 bool sc_cgroup_v2_is_tracking_snap(const char *snap_instance) { 166 debug("is cgroup tracking snap %s?", snap_instance); 167 char tracking_group_name[PATH_MAX] = {0}; 168 // tracking groups created by snap run chain have a format: 169 // snap.<name>.<app>.<uuid>.scope, while the groups corresponding to snap 170 // services created by systemd are named like this: 171 // snap.<name>.<svc>.service 172 sc_must_snprintf(tracking_group_name, sizeof tracking_group_name, "snap.%s.", snap_instance); 173 174 // when running with cgroup v2, the snap run chain or systemd would create a 175 // tracking cgroup which the current process would execute in and would 176 // match the pattern we are looking for, thus it needs to be skipped 177 char *own_group SC_CLEANUP(sc_cleanup_string) = sc_cgroup_v2_own_path_full(); 178 if (own_group == NULL) { 179 die("cannot obtain own cgroup v2 group path"); 180 } 181 debug("own group: %s", own_group); 182 char *just_leaf = strrchr(own_group, '/'); 183 if (just_leaf == NULL) { 184 die("cannot obtain the leaf group path"); 185 } 186 // pointing at /, advance to the next char 187 just_leaf += 1; 188 189 // this would otherwise be inherently racy, but the caller is expected to 190 // keep the snap instance lock, thus preventing new apps of that snap from 191 // starting; note that we can still return false positive if the currently 192 // running process exits but we look at the hierarchy before systemd has 193 // cleaned up the group 194 195 debug("opening cgroup root dir at %s", cgroup_dir); 196 DIR *root SC_CLEANUP(sc_cleanup_closedir) = opendir(cgroup_dir); 197 if (root == NULL) { 198 if (errno == ENOENT) { 199 return false; 200 } 201 die("cannot open cgroup root dir"); 202 } 203 // traverse the cgroup hierarchy tree looking for other groups that 204 // correspond to the snap (i.e. their name matches the pattern), but skip 205 // our own group in the process 206 return traverse_looking_for_prefix_in_dir(root, tracking_group_name, just_leaf, 1); 207 } 208 209 static const char *self_cgroup = "/proc/self/cgroup"; 210 211 char *sc_cgroup_v2_own_path_full(void) { 212 FILE *in SC_CLEANUP(sc_cleanup_file) = fopen(self_cgroup, "r"); 213 if (in == NULL) { 214 die("cannot open %s", self_cgroup); 215 } 216 217 char *own_group = NULL; 218 219 while (true) { 220 char *line SC_CLEANUP(sc_cleanup_string) = NULL; 221 size_t linesz = 0; 222 ssize_t sz = getline(&line, &linesz, in); 223 if (sz < 0 && errno != 0) { 224 die("cannot read line from %s", self_cgroup); 225 } 226 if (sz < 0) { 227 // end of file 228 break; 229 } 230 if (!sc_startswith(line, "0::")) { 231 continue; 232 } 233 size_t len = strlen(line); 234 if (len <= 3) { 235 die("unexpected content of group entry %s", line); 236 } 237 // \n does not normally appear inside the group path, but if it did, it 238 // would be escaped anyway 239 char *newline = strchr(line, '\n'); 240 if (newline != NULL) { 241 *newline = '\0'; 242 } 243 own_group = sc_strdup(line + 3); 244 break; 245 } 246 return own_group; 247 }