github.com/anonymouse64/snapd@v0.0.0-20210824153203-04c4c42d842d/cmd/libsnap-confine-private/cgroup-support.c (about)

     1  /*
     2   * Copyright (C) 2019-2021 Canonical Ltd
     3   *
     4   * This program is free software: you can redistribute it and/or modify
     5   * it under the terms of the GNU General Public License version 3 as
     6   * published by the Free Software Foundation.
     7   *
     8   * This program is distributed in the hope that it will be useful,
     9   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11   * GNU General Public License for more details.
    12   *
    13   * You should have received a copy of the GNU General Public License
    14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15   *
    16   */
    17  
    18  #define _GNU_SOURCE
    19  
    20  #include "cgroup-support.h"
    21  
    22  #include <dirent.h>
    23  #include <errno.h>
    24  #include <fcntl.h>
    25  #include <stdio.h>
    26  #include <string.h>
    27  #include <sys/stat.h>
    28  #include <sys/types.h>
    29  #include <sys/vfs.h>
    30  #include <unistd.h>
    31  
    32  #include "cleanup-funcs.h"
    33  #include "string-utils.h"
    34  #include "utils.h"
    35  
    36  void sc_cgroup_create_and_join(const char *parent, const char *name, pid_t pid) {
    37      int parent_fd SC_CLEANUP(sc_cleanup_close) = -1;
    38      parent_fd = open(parent, O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
    39      if (parent_fd < 0) {
    40          die("cannot open cgroup hierarchy %s", parent);
    41      }
    42      // Since we may be running from a setuid but not setgid executable, switch
    43      // to the effective group to root so that the mkdirat call creates a cgroup
    44      // that is always owned by root.root.
    45      sc_identity old = sc_set_effective_identity(sc_root_group_identity());
    46      if (mkdirat(parent_fd, name, 0755) < 0 && errno != EEXIST) {
    47          die("cannot create cgroup hierarchy %s/%s", parent, name);
    48      }
    49      (void)sc_set_effective_identity(old);
    50      int hierarchy_fd SC_CLEANUP(sc_cleanup_close) = -1;
    51      hierarchy_fd = openat(parent_fd, name, O_PATH | O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
    52      if (hierarchy_fd < 0) {
    53          die("cannot open cgroup hierarchy %s/%s", parent, name);
    54      }
    55      // Open the cgroup.procs file.
    56      int procs_fd SC_CLEANUP(sc_cleanup_close) = -1;
    57      procs_fd = openat(hierarchy_fd, "cgroup.procs", O_WRONLY | O_NOFOLLOW | O_CLOEXEC);
    58      if (procs_fd < 0) {
    59          die("cannot open file %s/%s/cgroup.procs", parent, name);
    60      }
    61      // Write the process (task) number to the procs file. Linux task IDs are
    62      // limited to 2^29 so a long int is enough to represent it.
    63      // See include/linux/threads.h in the kernel source tree for details.
    64      char buf[22] = {0};  // 2^64 base10 + 2 for NUL and '-' for long
    65      int n = sc_must_snprintf(buf, sizeof buf, "%ld", (long)pid);
    66      if (write(procs_fd, buf, n) < n) {
    67          die("cannot move process %ld to cgroup hierarchy %s/%s", (long)pid, parent, name);
    68      }
    69      debug("moved process %ld to cgroup hierarchy %s/%s", (long)pid, parent, name);
    70  }
    71  
    72  static const char *cgroup_dir = "/sys/fs/cgroup";
    73  
    74  // from statfs(2)
    75  #ifndef CGROUP2_SUPER_MAGIC
    76  #define CGROUP2_SUPER_MAGIC 0x63677270
    77  #endif
    78  
    79  // Detect if we are running in cgroup v2 unified mode (as opposed to
    80  // hybrid or legacy) The algorithm is described in
    81  // https://systemd.io/CGROUP_DELEGATION/
    82  bool sc_cgroup_is_v2(void) {
    83      bool hide_warning = getenv_bool("SNAPD_HIDE_CGROUPV2_WARNING", false);
    84      static bool did_warn = false;
    85      struct statfs buf;
    86  
    87      if (statfs(cgroup_dir, &buf) != 0) {
    88          if (errno == ENOENT) {
    89              return false;
    90          }
    91          die("cannot statfs %s", cgroup_dir);
    92      }
    93      if (buf.f_type == CGROUP2_SUPER_MAGIC) {
    94          if (!did_warn && !hide_warning) {
    95              fprintf(stderr, "WARNING: cgroup v2 is not fully supported yet, proceeding with partial confinement\n");
    96              did_warn = true;
    97          }
    98          return true;
    99      }
   100      return false;
   101  }
   102  
   103  static const size_t max_traversal_depth = 32;
   104  
   105  static bool traverse_looking_for_prefix_in_dir(DIR *root, const char *prefix, const char *skip, size_t depth) {
   106      if (depth > max_traversal_depth) {
   107          die("cannot traverse cgroups hierarchy deeper than %zu levels", max_traversal_depth);
   108      }
   109      while (true) {
   110          errno = 0;
   111          struct dirent *ent = readdir(root);
   112          if (ent == NULL) {
   113              // is this an error?
   114              if (errno != 0) {
   115                  if (errno == ENOENT) {
   116                      // the processes may exit and the group entries may go away at
   117                      // any time
   118                      // the entries may go away at any time
   119                      break;
   120                  }
   121                  die("cannot read directory entry");
   122              }
   123              break;
   124          }
   125          if (ent->d_type != DT_DIR) {
   126              continue;
   127          }
   128          if (sc_streq(ent->d_name, "..") || sc_streq(ent->d_name, ".")) {
   129              // we don't want to go up or process the current directory again
   130              continue;
   131          }
   132          if (sc_streq(ent->d_name, skip)) {
   133              // we were asked to skip this group
   134              continue;
   135          }
   136          if (sc_startswith(ent->d_name, prefix)) {
   137              debug("found matching prefix in \"%s\"", ent->d_name);
   138              // the directory starts with our prefix
   139              return true;
   140          }
   141          // entfd is consumed by fdopendir() and freed with closedir()
   142          int entfd = openat(dirfd(root), ent->d_name, O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC);
   143          if (entfd == -1) {
   144              if (errno == ENOENT) {
   145                  // the processes may exit and the group entries may go away at
   146                  // any time
   147                  return false;
   148              }
   149              die("cannot open directory entry \"%s\"", ent->d_name);
   150          }
   151          // takes ownership of the file descriptor
   152          DIR *entdir SC_CLEANUP(sc_cleanup_closedir) = fdopendir(entfd);
   153          if (entdir == NULL) {
   154              // we have the fd, so ENOENT isn't possible here
   155              die("cannot fdopendir directory \"%s\"", ent->d_name);
   156          }
   157          bool found = traverse_looking_for_prefix_in_dir(entdir, prefix, skip, depth + 1);
   158          if (found == true) {
   159              return true;
   160          }
   161      }
   162      return false;
   163  }
   164  
   165  bool sc_cgroup_v2_is_tracking_snap(const char *snap_instance) {
   166      debug("is cgroup tracking snap %s?", snap_instance);
   167      char tracking_group_name[PATH_MAX] = {0};
   168      // tracking groups created by snap run chain have a format:
   169      // snap.<name>.<app>.<uuid>.scope, while the groups corresponding to snap
   170      // services created by systemd are named like this:
   171      // snap.<name>.<svc>.service
   172      sc_must_snprintf(tracking_group_name, sizeof tracking_group_name, "snap.%s.", snap_instance);
   173  
   174      // when running with cgroup v2, the snap run chain or systemd would create a
   175      // tracking cgroup which the current process would execute in and would
   176      // match the pattern we are looking for, thus it needs to be skipped
   177      char *own_group SC_CLEANUP(sc_cleanup_string) = sc_cgroup_v2_own_path_full();
   178      if (own_group == NULL) {
   179          die("cannot obtain own cgroup v2 group path");
   180      }
   181      debug("own group: %s", own_group);
   182      char *just_leaf = strrchr(own_group, '/');
   183      if (just_leaf == NULL) {
   184          die("cannot obtain the leaf group path");
   185      }
   186      // pointing at /, advance to the next char
   187      just_leaf += 1;
   188  
   189      // this would otherwise be inherently racy, but the caller is expected to
   190      // keep the snap instance lock, thus preventing new apps of that snap from
   191      // starting; note that we can still return false positive if the currently
   192      // running process exits but we look at the hierarchy before systemd has
   193      // cleaned up the group
   194  
   195      debug("opening cgroup root dir at %s", cgroup_dir);
   196      DIR *root SC_CLEANUP(sc_cleanup_closedir) = opendir(cgroup_dir);
   197      if (root == NULL) {
   198          if (errno == ENOENT) {
   199              return false;
   200          }
   201          die("cannot open cgroup root dir");
   202      }
   203      // traverse the cgroup hierarchy tree looking for other groups that
   204      // correspond to the snap (i.e. their name matches the pattern), but skip
   205      // our own group in the process
   206      return traverse_looking_for_prefix_in_dir(root, tracking_group_name, just_leaf, 1);
   207  }
   208  
   209  static const char *self_cgroup = "/proc/self/cgroup";
   210  
   211  char *sc_cgroup_v2_own_path_full(void) {
   212      FILE *in SC_CLEANUP(sc_cleanup_file) = fopen(self_cgroup, "r");
   213      if (in == NULL) {
   214          die("cannot open %s", self_cgroup);
   215      }
   216  
   217      char *own_group = NULL;
   218  
   219      while (true) {
   220          char *line SC_CLEANUP(sc_cleanup_string) = NULL;
   221          size_t linesz = 0;
   222          ssize_t sz = getline(&line, &linesz, in);
   223          if (sz < 0 && errno != 0) {
   224              die("cannot read line from %s", self_cgroup);
   225          }
   226          if (sz < 0) {
   227              // end of file
   228              break;
   229          }
   230          if (!sc_startswith(line, "0::")) {
   231              continue;
   232          }
   233          size_t len = strlen(line);
   234          if (len <= 3) {
   235              die("unexpected content of group entry %s", line);
   236          }
   237          // \n does not normally appear inside the group path, but if it did, it
   238          // would be escaped anyway
   239          char *newline = strchr(line, '\n');
   240          if (newline != NULL) {
   241              *newline = '\0';
   242          }
   243          own_group = sc_strdup(line + 3);
   244          break;
   245      }
   246      return own_group;
   247  }