github.com/tompreston/snapd@v0.0.0-20210817193607-954edfcb9611/cmd/libsnap-confine-private/cgroup-support.c (about)

     1  /*
     2   * Copyright (C) 2019-2021 Canonical Ltd
     3   *
     4   * This program is free software: you can redistribute it and/or modify
     5   * it under the terms of the GNU General Public License version 3 as
     6   * published by the Free Software Foundation.
     7   *
     8   * This program is distributed in the hope that it will be useful,
     9   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11   * GNU General Public License for more details.
    12   *
    13   * You should have received a copy of the GNU General Public License
    14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15   *
    16   */
    17  
    18  #define _GNU_SOURCE
    19  
    20  #include "cgroup-support.h"
    21  
    22  #include <dirent.h>
    23  #include <errno.h>
    24  #include <fcntl.h>
    25  #include <stdio.h>
    26  #include <string.h>
    27  #include <sys/stat.h>
    28  #include <sys/types.h>
    29  #include <sys/vfs.h>
    30  #include <unistd.h>
    31  
    32  #include "cleanup-funcs.h"
    33  #include "string-utils.h"
    34  #include "utils.h"
    35  
    36  void sc_cgroup_create_and_join(const char *parent, const char *name, pid_t pid) {
    37      int parent_fd SC_CLEANUP(sc_cleanup_close) = -1;
    38      parent_fd = open(parent, O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
    39      if (parent_fd < 0) {
    40          die("cannot open cgroup hierarchy %s", parent);
    41      }
    42      // Since we may be running from a setuid but not setgid executable, switch
    43      // to the effective group to root so that the mkdirat call creates a cgroup
    44      // that is always owned by root.root.
    45      sc_identity old = sc_set_effective_identity(sc_root_group_identity());
    46      if (mkdirat(parent_fd, name, 0755) < 0 && errno != EEXIST) {
    47          die("cannot create cgroup hierarchy %s/%s", parent, name);
    48      }
    49      (void)sc_set_effective_identity(old);
    50      int hierarchy_fd SC_CLEANUP(sc_cleanup_close) = -1;
    51      hierarchy_fd = openat(parent_fd, name, O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
    52      if (hierarchy_fd < 0) {
    53          die("cannot open cgroup hierarchy %s/%s", parent, name);
    54      }
    55      // Open the cgroup.procs file.
    56      int procs_fd SC_CLEANUP(sc_cleanup_close) = -1;
    57      procs_fd = openat(hierarchy_fd, "cgroup.procs", O_WRONLY | O_NOFOLLOW | O_CLOEXEC);
    58      if (procs_fd < 0) {
    59          die("cannot open file %s/%s/cgroup.procs", parent, name);
    60      }
    61      // Write the process (task) number to the procs file. Linux task IDs are
    62      // limited to 2^29 so a long int is enough to represent it.
    63      // See include/linux/threads.h in the kernel source tree for details.
    64      char buf[22] = {0};  // 2^64 base10 + 2 for NUL and '-' for long
    65      int n = sc_must_snprintf(buf, sizeof buf, "%ld", (long)pid);
    66      if (write(procs_fd, buf, n) < n) {
    67          die("cannot move process %ld to cgroup hierarchy %s/%s", (long)pid, parent, name);
    68      }
    69      debug("moved process %ld to cgroup hierarchy %s/%s", (long)pid, parent, name);
    70  }
    71  
    72  static const char *cgroup_dir = "/sys/fs/cgroup";
    73  
    74  // from statfs(2)
    75  #ifndef CGROUP2_SUPER_MAGIC
    76  #define CGROUP2_SUPER_MAGIC 0x63677270
    77  #endif
    78  
    79  // Detect if we are running in cgroup v2 unified mode (as opposed to
    80  // hybrid or legacy) The algorithm is described in
    81  // https://systemd.io/CGROUP_DELEGATION/
    82  bool sc_cgroup_is_v2(void) {
    83      static bool did_warn = false;
    84      struct statfs buf;
    85  
    86      if (statfs(cgroup_dir, &buf) != 0) {
    87          if (errno == ENOENT) {
    88              return false;
    89          }
    90          die("cannot statfs %s", cgroup_dir);
    91      }
    92      if (buf.f_type == CGROUP2_SUPER_MAGIC) {
    93          if (!did_warn) {
    94              fprintf(stderr, "WARNING: cgroup v2 is not fully supported yet, proceeding with partial confinement\n");
    95              did_warn = true;
    96          }
    97          return true;
    98      }
    99      return false;
   100  }
   101  
   102  static const size_t max_traversal_depth = 32;
   103  
   104  static bool traverse_looking_for_prefix_in_dir(DIR *root, const char *prefix, const char *skip, size_t depth) {
   105      if (depth > max_traversal_depth) {
   106          die("cannot traverse cgroups hierarchy deeper than %zu levels", max_traversal_depth);
   107      }
   108      while (true) {
   109          errno = 0;
   110          struct dirent *ent = readdir(root);
   111          if (ent == NULL) {
   112              // is this an error?
   113              if (errno != 0) {
   114                  if (errno == ENOENT) {
   115                      // the processes may exit and the group entries may go away at
   116                      // any time
   117                      // the entries may go away at any time
   118                      break;
   119                  }
   120                  die("cannot read directory entry");
   121              }
   122              break;
   123          }
   124          if (ent->d_type != DT_DIR) {
   125              continue;
   126          }
   127          if (sc_streq(ent->d_name, "..") || sc_streq(ent->d_name, ".")) {
   128              // we don't want to go up or process the current directory again
   129              continue;
   130          }
   131          if (sc_streq(ent->d_name, skip)) {
   132              // we were asked to skip this group
   133              continue;
   134          }
   135          if (sc_startswith(ent->d_name, prefix)) {
   136              debug("found matching prefix in \"%s\"", ent->d_name);
   137              // the directory starts with our prefix
   138              return true;
   139          }
   140          // entfd is consumed by fdopendir() and freed with closedir()
   141          int entfd = openat(dirfd(root), ent->d_name, O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
   142          if (entfd == -1) {
   143              if (errno == ENOENT) {
   144                  // the processes may exit and the group entries may go away at
   145                  // any time
   146                  return false;
   147              }
   148              die("cannot open directory entry \"%s\"", ent->d_name);
   149          }
   150          // takes ownership of the file descriptor
   151          DIR *entdir SC_CLEANUP(sc_cleanup_closedir) = fdopendir(entfd);
   152          if (entdir == NULL) {
   153              // we have the fd, so ENOENT isn't possible here
   154              die("cannot fdopendir directory \"%s\"", ent->d_name);
   155          }
   156          bool found = traverse_looking_for_prefix_in_dir(entdir, prefix, skip, depth + 1);
   157          if (found == true) {
   158              return true;
   159          }
   160      }
   161      return false;
   162  }
   163  
   164  bool sc_cgroup_v2_is_tracking_snap(const char *snap_instance) {
   165      debug("is cgroup tracking snap %s?", snap_instance);
   166      char tracking_group_name[PATH_MAX] = {0};
   167      // tracking groups created by snap run chain have a format:
   168      // snap.<name>.<app>.<uuid>.scope, while the groups corresponding to snap
   169      // services created by systemd are named like this:
   170      // snap.<name>.<svc>.service
   171      sc_must_snprintf(tracking_group_name, sizeof tracking_group_name, "snap.%s.", snap_instance);
   172  
   173      // when running with cgroup v2, the snap run chain or systemd would create a
   174      // tracking cgroup which the current process would execute in and would
   175      // match the pattern we are looking for, thus it needs to be skipped
   176      char *own_group SC_CLEANUP(sc_cleanup_string) = sc_cgroup_v2_own_path_full();
   177      if (own_group == NULL) {
   178          die("cannot obtain own cgroup v2 group path");
   179      }
   180      debug("own group: %s", own_group);
   181      char *just_leaf = strrchr(own_group, '/');
   182      if (just_leaf == NULL) {
   183          die("cannot obtain the leaf group path");
   184      }
   185      // pointing at /, advance to the next char
   186      just_leaf += 1;
   187  
   188      // this would otherwise be inherently racy, but the caller is expected to
   189      // keep the snap instance lock, thus preventing new apps of that snap from
   190      // starting; note that we can still return false positive if the currently
   191      // running process exits but we look at the hierarchy before systemd has
   192      // cleaned up the group
   193  
   194      debug("opening cgroup root dir at %s", cgroup_dir);
   195      DIR *root SC_CLEANUP(sc_cleanup_closedir) = opendir(cgroup_dir);
   196      if (root == NULL) {
   197          if (errno == ENOENT) {
   198              return false;
   199          }
   200          die("cannot open cgroup root dir");
   201      }
   202      // traverse the cgroup hierarchy tree looking for other groups that
   203      // correspond to the snap (i.e. their name matches the pattern), but skip
   204      // our own group in the process
   205      return traverse_looking_for_prefix_in_dir(root, tracking_group_name, just_leaf, 1);
   206  }
   207  
   208  static const char *self_cgroup = "/proc/self/cgroup";
   209  
   210  char *sc_cgroup_v2_own_path_full(void) {
   211      FILE *in SC_CLEANUP(sc_cleanup_file) = fopen(self_cgroup, "r");
   212      if (in == NULL) {
   213          die("cannot open %s", self_cgroup);
   214      }
   215  
   216      char *own_group = NULL;
   217  
   218      while (true) {
   219          char *line SC_CLEANUP(sc_cleanup_string) = NULL;
   220          size_t linesz = 0;
   221          ssize_t sz = getline(&line, &linesz, in);
   222          if (sz < 0 && errno != 0) {
   223              die("cannot read line from %s", self_cgroup);
   224          }
   225          if (sz < 0) {
   226              // end of file
   227              break;
   228          }
   229          if (!sc_startswith(line, "0::")) {
   230              continue;
   231          }
   232          size_t len = strlen(line);
   233          if (len <= 3) {
   234              die("unexpected content of group entry %s", line);
   235          }
   236          // \n does not normally appear inside the group path, but if it did, it
   237          // would be escaped anyway
   238          char *newline = strchr(line, '\n');
   239          if (newline != NULL) {
   240              *newline = '\0';
   241          }
   242          own_group = sc_strdup(line + 3);
   243          break;
   244      }
   245      return own_group;
   246  }