gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/proc.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <elf.h>
    16  #include <errno.h>
    17  #include <fcntl.h>
    18  #include <limits.h>
    19  #include <linux/magic.h>
    20  #include <sched.h>
    21  #include <signal.h>
    22  #include <stddef.h>
    23  #include <stdint.h>
    24  #include <stdio.h>
    25  #include <stdlib.h>
    26  #include <string.h>
    27  #include <sys/mman.h>
    28  #include <sys/prctl.h>
    29  #include <sys/ptrace.h>
    30  #include <sys/stat.h>
    31  #include <sys/statfs.h>
    32  #include <sys/utsname.h>
    33  #include <syscall.h>
    34  #include <unistd.h>
    35  
    36  #include <algorithm>
    37  #include <atomic>
    38  #include <functional>
    39  #include <iostream>
    40  #include <iterator>
    41  #include <map>
    42  #include <memory>
    43  #include <ostream>
    44  #include <regex>
    45  #include <string>
    46  #include <unordered_set>
    47  #include <utility>
    48  #include <vector>
    49  
    50  #include "gmock/gmock.h"
    51  #include "gtest/gtest.h"
    52  #include "absl/algorithm/container.h"
    53  #include "absl/container/btree_map.h"
    54  #include "absl/container/flat_hash_set.h"
    55  #include "absl/container/node_hash_set.h"
    56  #include "absl/flags/flag.h"
    57  #include "absl/strings/ascii.h"
    58  #include "absl/strings/match.h"
    59  #include "absl/strings/numbers.h"
    60  #include "absl/strings/str_cat.h"
    61  #include "absl/strings/str_format.h"
    62  #include "absl/strings/str_join.h"
    63  #include "absl/strings/str_split.h"
    64  #include "absl/strings/string_view.h"
    65  #include "absl/synchronization/mutex.h"
    66  #include "absl/synchronization/notification.h"
    67  #include "absl/time/clock.h"
    68  #include "absl/time/time.h"
    69  #include "test/util/capability_util.h"
    70  #include "test/util/cleanup.h"
    71  #include "test/util/eventfd_util.h"
    72  #include "test/util/file_descriptor.h"
    73  #include "test/util/fs_util.h"
    74  #include "test/util/memory_util.h"
    75  #include "test/util/mount_util.h"
    76  #include "test/util/multiprocess_util.h"
    77  #include "test/util/posix_error.h"
    78  #include "test/util/proc_util.h"
    79  #include "test/util/temp_path.h"
    80  #include "test/util/test_util.h"
    81  #include "test/util/thread_util.h"
    82  #include "test/util/time_util.h"
    83  #include "test/util/timer_util.h"
    84  
    85  // NOTE(magi): No, this isn't really a syscall but this is a really simple
    86  // way to get it tested on both gVisor, PTrace and Linux.
    87  
    88  using ::testing::AllOf;
    89  using ::testing::AnyOf;
    90  using ::testing::ContainerEq;
    91  using ::testing::Contains;
    92  using ::testing::ContainsRegex;
    93  using ::testing::Eq;
    94  using ::testing::Gt;
    95  using ::testing::HasSubstr;
    96  using ::testing::IsSupersetOf;
    97  using ::testing::Pair;
    98  using ::testing::StartsWith;
    99  using ::testing::UnorderedElementsAre;
   100  using ::testing::UnorderedElementsAreArray;
   101  
   102  // Exported by glibc.
   103  extern char** environ;
   104  
   105  ABSL_FLAG(bool, proc_pid_reuse_child, false,
   106            "If true, run the Proc_PidReuse child workload.");
   107  
   108  namespace gvisor {
   109  namespace testing {
   110  namespace {
   111  
   112  #ifndef SUID_DUMP_DISABLE
   113  #define SUID_DUMP_DISABLE 0
   114  #endif /* SUID_DUMP_DISABLE */
   115  #ifndef SUID_DUMP_USER
   116  #define SUID_DUMP_USER 1
   117  #endif /* SUID_DUMP_USER */
   118  #ifndef SUID_DUMP_ROOT
   119  #define SUID_DUMP_ROOT 2
   120  #endif /* SUID_DUMP_ROOT */
   121  
   122  #if defined(__x86_64__) || defined(__i386__)
   123  // This list of "required" fields is taken from reading the file
   124  // arch/x86/kernel/cpu/proc.c and seeing which fields will be unconditionally
   125  // printed by the kernel.
   126  static const char* required_fields[] = {
   127      "processor",
   128      "vendor_id",
   129      "cpu family",
   130      "model\t\t:",
   131      "model name",
   132      "stepping",
   133      "cpu MHz",
   134      "physical id",
   135      "siblings",
   136      "core id",
   137      "cpu cores",
   138      "apicid\t\t:",
   139      "initial apicid",
   140      "fpu\t\t:",
   141      "fpu_exception",
   142      "cpuid level",
   143      "wp",
   144      "bogomips",
   145      "clflush size",
   146      "cache_alignment",
   147      "address sizes",
   148      "power management",
   149  };
   150  #elif defined(__aarch64__)
   151  // This list of "required" fields is taken from reading the file
   152  // arch/arm64/kernel/cpuinfo.c and seeing which fields will be unconditionally
   153  // printed by the kernel.
   154  static const char* required_fields[] = {
   155      "processor",        "BogoMIPS",    "Features", "CPU implementer",
   156      "CPU architecture", "CPU variant", "CPU part", "CPU revision",
   157  };
   158  #elif defined(__riscv)
   159  // This list of "required" fields is taken from reading the file
   160  // arch/riscv/kernel/cpu.c and seeing which fields will be unconditionally
   161  // printed by the kernel.
   162  static const char* required_fields[] = {
   163      "processor",
   164      "hart",
   165  };
   166  #else
   167  #error "Unknown architecture"
   168  #endif
   169  
   170  // Takes the subprocess command line and pid.
   171  // If it returns !OK, WithSubprocess returns immediately.
   172  using SubprocessCallback = std::function<PosixError(int)>;
   173  
   174  std::vector<std::string> saved_argv;  // NOLINT
   175  
   176  // Helper function to dump /proc/{pid}/status and check the
   177  // state data. State should = "Z" for zombied or "RSD" for
   178  // running, interruptible sleeping (S), or uninterruptible sleep
   179  // (D).
   180  void CompareProcessState(absl::string_view state, int pid) {
   181    auto status_file = ASSERT_NO_ERRNO_AND_VALUE(
   182        GetContents(absl::StrCat("/proc/", pid, "/status")));
   183    // N.B. POSIX extended regexes don't support shorthand character classes (\w)
   184    // inside of brackets.
   185    EXPECT_THAT(status_file,
   186                ContainsRegex(absl::StrCat("State:.[", state,
   187                                           R"EOL(]\s+\([a-zA-Z ]+\))EOL")));
   188  }
   189  
   190  // Run callbacks while a subprocess is running, zombied, and/or exited.
   191  PosixError WithSubprocess(SubprocessCallback const& running,
   192                            SubprocessCallback const& zombied,
   193                            SubprocessCallback const& exited) {
   194    int pipe_fds[2] = {};
   195    if (pipe(pipe_fds) < 0) {
   196      return PosixError(errno, "pipe");
   197    }
   198  
   199    int child_pid = fork();
   200    if (child_pid < 0) {
   201      return PosixError(errno, "fork");
   202    }
   203  
   204    if (child_pid == 0) {
   205      close(pipe_fds[0]);    // Close the read end.
   206      const DisableSave ds;  // Timing issues.
   207  
   208      // Write to the pipe to tell it we're ready.
   209      char buf = 'a';
   210      int res = 0;
   211      res = WriteFd(pipe_fds[1], &buf, sizeof(buf));
   212      TEST_CHECK_MSG(res == sizeof(buf), "Write failure in subprocess");
   213  
   214      while (true) {
   215        SleepSafe(absl::Milliseconds(100));
   216      }
   217    }
   218  
   219    close(pipe_fds[1]);  // Close the write end.
   220  
   221    int status = 0;
   222    auto wait_cleanup = Cleanup([child_pid, &status] {
   223      EXPECT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
   224    });
   225    auto kill_cleanup = Cleanup([child_pid] {
   226      EXPECT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
   227    });
   228  
   229    // Wait for the child.
   230    char buf = 0;
   231    int res = ReadFd(pipe_fds[0], &buf, sizeof(buf));
   232    if (res < 0) {
   233      return PosixError(errno, "Read from pipe");
   234    } else if (res == 0) {
   235      return PosixError(EPIPE, "Unable to read from pipe: EOF");
   236    }
   237  
   238    if (running) {
   239      // The first arg, RSD, refers to a "running process", or a process with a
   240      // state of Running (R), Interruptable Sleep (S) or Uninterruptable
   241      // Sleep (D).
   242      CompareProcessState("RSD", child_pid);
   243      RETURN_IF_ERRNO(running(child_pid));
   244    }
   245  
   246    // Kill the process.
   247    kill_cleanup.Release()();
   248    siginfo_t info;
   249    // Wait until the child process has exited (WEXITED flag) but don't
   250    // reap the child (WNOWAIT flag).
   251    EXPECT_THAT(waitid(P_PID, child_pid, &info, WNOWAIT | WEXITED),
   252                SyscallSucceeds());
   253  
   254    if (zombied) {
   255      // Arg of "Z" refers to a Zombied Process.
   256      CompareProcessState("Z", child_pid);
   257      RETURN_IF_ERRNO(zombied(child_pid));
   258    }
   259  
   260    // Wait on the process.
   261    wait_cleanup.Release()();
   262    // If the process is reaped, then then this should return
   263    // with ECHILD.
   264    EXPECT_THAT(waitpid(child_pid, &status, WNOHANG),
   265                SyscallFailsWithErrno(ECHILD));
   266  
   267    if (exited) {
   268      RETURN_IF_ERRNO(exited(child_pid));
   269    }
   270  
   271    return NoError();
   272  }
   273  
   274  // Access the file returned by name when a subprocess is running.
   275  PosixError AccessWhileRunning(std::function<std::string(int pid)> name,
   276                                int flags, std::function<void(int fd)> access) {
   277    FileDescriptor fd;
   278    return WithSubprocess(
   279        [&](int pid) -> PosixError {
   280          // Running.
   281          ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
   282  
   283          access(fd.get());
   284          return NoError();
   285        },
   286        nullptr, nullptr);
   287  }
   288  
   289  // Access the file returned by name when the a subprocess is zombied.
   290  PosixError AccessWhileZombied(std::function<std::string(int pid)> name,
   291                                int flags, std::function<void(int fd)> access) {
   292    FileDescriptor fd;
   293    return WithSubprocess(
   294        [&](int pid) -> PosixError {
   295          // Running.
   296          ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
   297          return NoError();
   298        },
   299        [&](int pid) -> PosixError {
   300          // Zombied.
   301          access(fd.get());
   302          return NoError();
   303        },
   304        nullptr);
   305  }
   306  
   307  // Access the file returned by name when the a subprocess is exited.
   308  PosixError AccessWhileExited(std::function<std::string(int pid)> name,
   309                               int flags, std::function<void(int fd)> access) {
   310    FileDescriptor fd;
   311    return WithSubprocess(
   312        [&](int pid) -> PosixError {
   313          // Running.
   314          ASSIGN_OR_RETURN_ERRNO(fd, Open(name(pid), flags));
   315          return NoError();
   316        },
   317        nullptr,
   318        [&](int pid) -> PosixError {
   319          // Exited.
   320          access(fd.get());
   321          return NoError();
   322        });
   323  }
   324  
   325  // ReadFd(fd=/proc/PID/basename) while PID is running.
   326  int ReadWhileRunning(std::string const& basename, void* buf, size_t count) {
   327    int ret = 0;
   328    int err = 0;
   329    EXPECT_NO_ERRNO(AccessWhileRunning(
   330        [&](int pid) -> std::string {
   331          return absl::StrCat("/proc/", pid, "/", basename);
   332        },
   333        O_RDONLY,
   334        [&](int fd) {
   335          ret = ReadFd(fd, buf, count);
   336          err = errno;
   337        }));
   338    errno = err;
   339    return ret;
   340  }
   341  
   342  // ReadFd(fd=/proc/PID/basename) while PID is zombied.
   343  int ReadWhileZombied(std::string const& basename, void* buf, size_t count) {
   344    int ret = 0;
   345    int err = 0;
   346    EXPECT_NO_ERRNO(AccessWhileZombied(
   347        [&](int pid) -> std::string {
   348          return absl::StrCat("/proc/", pid, "/", basename);
   349        },
   350        O_RDONLY,
   351        [&](int fd) {
   352          ret = ReadFd(fd, buf, count);
   353          err = errno;
   354        }));
   355    errno = err;
   356    return ret;
   357  }
   358  
   359  // ReadFd(fd=/proc/PID/basename) while PID is exited.
   360  int ReadWhileExited(std::string const& basename, void* buf, size_t count) {
   361    int ret = 0;
   362    int err = 0;
   363    EXPECT_NO_ERRNO(AccessWhileExited(
   364        [&](int pid) -> std::string {
   365          return absl::StrCat("/proc/", pid, "/", basename);
   366        },
   367        O_RDONLY,
   368        [&](int fd) {
   369          ret = ReadFd(fd, buf, count);
   370          err = errno;
   371        }));
   372    errno = err;
   373    return ret;
   374  }
   375  
   376  // readlinkat(fd=/proc/PID/, basename) while PID is running.
   377  int ReadlinkWhileRunning(std::string const& basename, char* buf, size_t count) {
   378    int ret = 0;
   379    int err = 0;
   380    EXPECT_NO_ERRNO(AccessWhileRunning(
   381        [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
   382        O_DIRECTORY,
   383        [&](int fd) {
   384          ret = readlinkat(fd, basename.c_str(), buf, count);
   385          err = errno;
   386        }));
   387    errno = err;
   388    return ret;
   389  }
   390  
   391  // readlinkat(fd=/proc/PID/, basename) while PID is zombied.
   392  int ReadlinkWhileZombied(std::string const& basename, char* buf, size_t count) {
   393    int ret = 0;
   394    int err = 0;
   395    EXPECT_NO_ERRNO(AccessWhileZombied(
   396        [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
   397        O_DIRECTORY,
   398        [&](int fd) {
   399          ret = readlinkat(fd, basename.c_str(), buf, count);
   400          err = errno;
   401        }));
   402    errno = err;
   403    return ret;
   404  }
   405  
   406  // readlinkat(fd=/proc/PID/, basename) while PID is exited.
   407  int ReadlinkWhileExited(std::string const& basename, char* buf, size_t count) {
   408    int ret = 0;
   409    int err = 0;
   410    EXPECT_NO_ERRNO(AccessWhileExited(
   411        [&](int pid) -> std::string { return absl::StrCat("/proc/", pid, "/"); },
   412        O_DIRECTORY,
   413        [&](int fd) {
   414          ret = readlinkat(fd, basename.c_str(), buf, count);
   415          err = errno;
   416        }));
   417    errno = err;
   418    return ret;
   419  }
   420  
   421  void RemoveUnstableCPUInfoFields(std::vector<std::string>& cpu_info_fields) {
   422    const std::vector<std::string> unstable_fields{"cpu MHz", "bogomips"};
   423    auto it = cpu_info_fields.begin();
   424    while (it != cpu_info_fields.end()) {
   425      bool found = false;
   426      for (const std::string& unstable_field : unstable_fields) {
   427        if (it->find(unstable_field) != std::string::npos) {
   428          found = true;
   429          break;
   430        }
   431      }
   432      if (found) {
   433        it = cpu_info_fields.erase(it);
   434      } else {
   435        ++it;
   436      }
   437    }
   438  }
   439  
   440  TEST(ProcTest, NotFoundInRoot) {
   441    struct stat s;
   442    EXPECT_THAT(stat("/proc/foobar", &s), SyscallFailsWithErrno(ENOENT));
   443  }
   444  
   445  TEST(ProcSelfTest, IsThreadGroupLeader) {
   446    ScopedThread([] {
   447      const pid_t tgid = getpid();
   448      const pid_t tid = syscall(SYS_gettid);
   449      EXPECT_NE(tgid, tid);
   450      auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self"));
   451      EXPECT_EQ(link, absl::StrCat(tgid));
   452    });
   453  }
   454  
   455  TEST(ProcThreadSelfTest, Basic) {
   456    const pid_t tgid = getpid();
   457    const pid_t tid = syscall(SYS_gettid);
   458    EXPECT_EQ(tgid, tid);
   459    auto link_threadself =
   460        ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self"));
   461    EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid));
   462    // Just read one file inside thread-self to ensure that the link is valid.
   463    auto link_threadself_exe =
   464        ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe"));
   465    auto link_procself_exe =
   466        ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
   467    EXPECT_EQ(link_threadself_exe, link_procself_exe);
   468  }
   469  
   470  TEST(ProcThreadSelfTest, Thread) {
   471    ScopedThread([] {
   472      const pid_t tgid = getpid();
   473      const pid_t tid = syscall(SYS_gettid);
   474      EXPECT_NE(tgid, tid);
   475      auto link_threadself =
   476          ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self"));
   477  
   478      EXPECT_EQ(link_threadself, absl::StrCat(tgid, "/task/", tid));
   479      // Just read one file inside thread-self to ensure that the link is valid.
   480      auto link_threadself_exe =
   481          ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/thread-self/exe"));
   482      auto link_procself_exe =
   483          ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
   484      EXPECT_EQ(link_threadself_exe, link_procself_exe);
   485      // A thread should not have "/proc/<tid>/task".
   486      struct stat s;
   487      EXPECT_THAT(stat("/proc/thread-self/task", &s),
   488                  SyscallFailsWithErrno(ENOENT));
   489    });
   490  }
   491  
   492  // Returns the /proc/PID/maps entry for the MAP_PRIVATE | MAP_ANONYMOUS mapping
   493  // m with start address addr and length len.
   494  std::string AnonymousMapsEntry(uintptr_t addr, size_t len, int prot) {
   495    return absl::StrCat(absl::Hex(addr, absl::PadSpec::kZeroPad8), "-",
   496                        absl::Hex(addr + len, absl::PadSpec::kZeroPad8), " ",
   497                        prot & PROT_READ ? "r" : "-",
   498                        prot & PROT_WRITE ? "w" : "-",
   499                        prot & PROT_EXEC ? "x" : "-", "p 00000000 00:00 0 ");
   500  }
   501  
   502  std::string AnonymousMapsEntryForMapping(const Mapping& m, int prot) {
   503    return AnonymousMapsEntry(m.addr(), m.len(), prot);
   504  }
   505  
   506  PosixErrorOr<std::map<uint64_t, uint64_t>> ReadProcSelfAuxv() {
   507    std::string auxv_file;
   508    RETURN_IF_ERRNO(GetContents("/proc/self/auxv", &auxv_file));
   509    const Elf64_auxv_t* auxv_data =
   510        reinterpret_cast<const Elf64_auxv_t*>(auxv_file.data());
   511    std::map<uint64_t, uint64_t> auxv_entries;
   512    for (int i = 0; auxv_data[i].a_type != AT_NULL; i++) {
   513      auto a_type = auxv_data[i].a_type;
   514      EXPECT_EQ(0, auxv_entries.count(a_type)) << "a_type: " << a_type;
   515      auxv_entries.emplace(a_type, auxv_data[i].a_un.a_val);
   516    }
   517    return auxv_entries;
   518  }
   519  
   520  TEST(ProcSelfAuxv, EntryPresence) {
   521    auto auxv_entries = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv());
   522  
   523    EXPECT_EQ(auxv_entries.count(AT_ENTRY), 1);
   524    EXPECT_EQ(auxv_entries.count(AT_PHDR), 1);
   525    EXPECT_EQ(auxv_entries.count(AT_PHENT), 1);
   526    EXPECT_EQ(auxv_entries.count(AT_PHNUM), 1);
   527    EXPECT_EQ(auxv_entries.count(AT_BASE), 1);
   528    EXPECT_EQ(auxv_entries.count(AT_UID), 1);
   529    EXPECT_EQ(auxv_entries.count(AT_EUID), 1);
   530    EXPECT_EQ(auxv_entries.count(AT_GID), 1);
   531    EXPECT_EQ(auxv_entries.count(AT_EGID), 1);
   532    EXPECT_EQ(auxv_entries.count(AT_SECURE), 1);
   533    EXPECT_EQ(auxv_entries.count(AT_CLKTCK), 1);
   534    EXPECT_EQ(auxv_entries.count(AT_RANDOM), 1);
   535    EXPECT_EQ(auxv_entries.count(AT_EXECFN), 1);
   536    EXPECT_EQ(auxv_entries.count(AT_PAGESZ), 1);
   537    EXPECT_EQ(auxv_entries.count(AT_SYSINFO_EHDR), 1);
   538  }
   539  
   540  TEST(ProcSelfAuxv, EntryValues) {
   541    auto proc_auxv = ASSERT_NO_ERRNO_AND_VALUE(ReadProcSelfAuxv());
   542  
   543    // We need to find the ELF auxiliary vector. The section of memory pointed to
   544    // by envp contains some pointers to non-null pointers, followed by a single
   545    // pointer to a null pointer, followed by the auxiliary vector.
   546    char** envpi = environ;
   547    while (*envpi) {
   548      ++envpi;
   549    }
   550  
   551    const Elf64_auxv_t* envp_auxv =
   552        reinterpret_cast<const Elf64_auxv_t*>(envpi + 1);
   553    int i;
   554    for (i = 0; envp_auxv[i].a_type != AT_NULL; i++) {
   555      auto a_type = envp_auxv[i].a_type;
   556      EXPECT_EQ(proc_auxv.count(a_type), 1);
   557      EXPECT_EQ(proc_auxv[a_type], envp_auxv[i].a_un.a_val)
   558          << "a_type: " << a_type;
   559    }
   560    EXPECT_EQ(i, proc_auxv.size());
   561  }
   562  
   563  // Just open and read a part of /proc/self/mem, check that we can read an item.
   564  TEST(ProcPidMem, Read) {
   565    auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
   566    char input[] = "hello-world";
   567    char output[sizeof(input)];
   568    ASSERT_THAT(pread(memfd.get(), output, sizeof(output),
   569                      reinterpret_cast<off_t>(input)),
   570                SyscallSucceedsWithValue(sizeof(input)));
   571    ASSERT_STREQ(input, output);
   572  }
   573  
   574  // Perform read on an unmapped region.
   575  TEST(ProcPidMem, Unmapped) {
   576    // Strategy: map then unmap, so we have a guaranteed unmapped region
   577    auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
   578    Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE(
   579        MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
   580    // Fill it with things
   581    memset(mapping.ptr(), 'x', mapping.len());
   582    char expected = 'x', output;
   583    ASSERT_THAT(pread(memfd.get(), &output, sizeof(output),
   584                      reinterpret_cast<off_t>(mapping.ptr())),
   585                SyscallSucceedsWithValue(sizeof(output)));
   586    ASSERT_EQ(expected, output);
   587  
   588    const auto rest = [&] {
   589      // This is a new process, so we need to re-open /proc/self/mem.
   590      int memfd = open("/proc/self/mem", O_RDONLY);
   591      TEST_PCHECK_MSG(memfd >= 0, "open failed");
   592      // Unmap region again
   593      TEST_PCHECK_MSG(MunmapSafe(mapping.ptr(), mapping.len()) == 0,
   594                      "munmap failed");
   595      // Now we want EIO error
   596      TEST_CHECK(pread(memfd, &output, sizeof(output),
   597                       reinterpret_cast<off_t>(mapping.ptr())) == -1);
   598      TEST_PCHECK_MSG(errno == EIO, "pread failed with unexpected errno");
   599    };
   600  
   601    EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
   602  }
   603  
   604  // Perform read repeatedly to verify offset change.
   605  TEST(ProcPidMem, RepeatedRead) {
   606    auto const num_reads = 3;
   607    char expected[] = "01234567890abcdefghijkl";
   608    char output[sizeof(expected) / num_reads];
   609  
   610    auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
   611    ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET),
   612                SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected)));
   613    for (auto i = 0; i < num_reads; i++) {
   614      ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
   615                  SyscallSucceedsWithValue(sizeof(output)));
   616      ASSERT_EQ(strncmp(&expected[i * sizeof(output)], output, sizeof(output)),
   617                0);
   618    }
   619  }
   620  
   621  // Perform seek operations repeatedly.
   622  TEST(ProcPidMem, RepeatedSeek) {
   623    auto const num_reads = 3;
   624    char expected[] = "01234567890abcdefghijkl";
   625    char output[sizeof(expected) / num_reads];
   626  
   627    auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
   628    ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET),
   629                SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected)));
   630    // Read from start
   631    ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
   632                SyscallSucceedsWithValue(sizeof(output)));
   633    ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0);
   634    // Skip ahead one read
   635    ASSERT_THAT(lseek(memfd.get(), sizeof(output), SEEK_CUR),
   636                SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected) +
   637                                         sizeof(output) * 2));
   638    // Do read again
   639    ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
   640                SyscallSucceedsWithValue(sizeof(output)));
   641    ASSERT_EQ(strncmp(&expected[2 * sizeof(output)], output, sizeof(output)), 0);
   642    // Skip back three reads
   643    ASSERT_THAT(lseek(memfd.get(), -3 * sizeof(output), SEEK_CUR),
   644                SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected)));
   645    // Do read again
   646    ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
   647                SyscallSucceedsWithValue(sizeof(output)));
   648    ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0);
   649    // Check that SEEK_END does not work
   650    ASSERT_THAT(lseek(memfd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL));
   651  }
   652  
   653  // Perform read past an allocated memory region.
   654  TEST(ProcPidMem, PartialRead) {
   655    // Reserve 2 pages.
   656    Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE(
   657        MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
   658  
   659    // Fill the first page with data.
   660    memset(mapping.ptr(), 'x', kPageSize);
   661  
   662    char expected[] = {'x'};
   663    std::unique_ptr<char[]> output(new char[kPageSize]);
   664    off_t read_offset = reinterpret_cast<off_t>(mapping.ptr()) + kPageSize - 1;
   665    const auto rest = [&] {
   666      int memfd = open("/proc/self/mem", O_RDONLY);
   667      TEST_PCHECK_MSG(memfd >= 0, "open failed");
   668      // Unmap the second page.
   669      TEST_PCHECK_MSG(
   670          MunmapSafe(reinterpret_cast<void*>(mapping.addr() + kPageSize),
   671                     kPageSize) == 0,
   672          "munmap failed");
   673      // Expect to read up to the end of the first page without getting EIO.
   674      TEST_PCHECK_MSG(
   675          pread(memfd, output.get(), kPageSize, read_offset) == sizeof(expected),
   676          "pread failed");
   677      TEST_CHECK(expected[0] == output.get()[0]);
   678    };
   679  
   680    EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
   681  }
   682  
   683  // Perform read on /proc/[pid]/mem after exit.
   684  TEST(ProcPidMem, AfterExit) {
   685    int pfd1[2] = {};
   686    int pfd2[2] = {};
   687  
   688    char expected[] = "hello-world";
   689  
   690    ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
   691    ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
   692  
   693    // Create child process
   694    pid_t const child_pid = fork();
   695    if (child_pid == 0) {
   696      // Close reading end of first pipe
   697      close(pfd1[0]);
   698  
   699      // Tell parent about location of input
   700      char ok = 1;
   701      TEST_CHECK(WriteFd(pfd1[1], &ok, sizeof(ok)) == sizeof(ok));
   702      TEST_PCHECK(close(pfd1[1]) == 0);
   703  
   704      // Close writing end of second pipe
   705      TEST_PCHECK(close(pfd2[1]) == 0);
   706  
   707      // Await parent OK to die
   708      ok = 0;
   709      TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
   710  
   711      // Close rest pipes
   712      TEST_PCHECK(close(pfd2[0]) == 0);
   713      _exit(0);
   714    }
   715  
   716    // In parent process.
   717    ASSERT_THAT(child_pid, SyscallSucceeds());
   718  
   719    // Close writing end of first pipe
   720    EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
   721  
   722    // Wait for child to be alive and well
   723    char ok = 0;
   724    EXPECT_THAT(ReadFd(pfd1[0], &ok, sizeof(ok)),
   725                SyscallSucceedsWithValue(sizeof(ok)));
   726    // Close reading end of first pipe
   727    EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
   728  
   729    // Open /proc/pid/mem fd
   730    std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
   731    auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY));
   732  
   733    // Expect that we can read
   734    char output[sizeof(expected)];
   735    EXPECT_THAT(pread(memfd.get(), &output, sizeof(output),
   736                      reinterpret_cast<off_t>(&expected)),
   737                SyscallSucceedsWithValue(sizeof(output)));
   738    EXPECT_STREQ(expected, output);
   739  
   740    // Tell proc its ok to go
   741    EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
   742    ok = 1;
   743    EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
   744                SyscallSucceedsWithValue(sizeof(ok)));
   745    EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
   746  
   747    // Expect termination
   748    int status;
   749    ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
   750  
   751    // Expect that we can't read anymore
   752    EXPECT_THAT(pread(memfd.get(), &output, sizeof(output),
   753                      reinterpret_cast<off_t>(&expected)),
   754                SyscallSucceedsWithValue(0));
   755  }
   756  
   757  // Read from /proc/[pid]/mem with different UID/GID and attached state.
   758  TEST(ProcPidMem, DifferentUserAttached) {
   759    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
   760    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_DAC_OVERRIDE)));
   761    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_PTRACE)));
   762  
   763    int pfd1[2] = {};
   764    int pfd2[2] = {};
   765  
   766    ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
   767    ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
   768  
   769    // Create child process
   770    pid_t const child_pid = fork();
   771    if (child_pid == 0) {
   772      // Close reading end of first pipe
   773      close(pfd1[0]);
   774  
   775      // Tell parent about location of input
   776      char input[] = "hello-world";
   777      off_t input_location = reinterpret_cast<off_t>(input);
   778      TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) ==
   779                 sizeof(input_location));
   780      TEST_PCHECK(close(pfd1[1]) == 0);
   781  
   782      // Close writing end of second pipe
   783      TEST_PCHECK(close(pfd2[1]) == 0);
   784  
   785      // Await parent OK to die
   786      char ok = 0;
   787      TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
   788  
   789      // Close rest pipes
   790      TEST_PCHECK(close(pfd2[0]) == 0);
   791      _exit(0);
   792    }
   793  
   794    // In parent process.
   795    ASSERT_THAT(child_pid, SyscallSucceeds());
   796  
   797    // Close writing end of first pipe
   798    EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
   799  
   800    // Read target location from child
   801    off_t target_location;
   802    EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)),
   803                SyscallSucceedsWithValue(sizeof(target_location)));
   804    // Close reading end of first pipe
   805    EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
   806  
   807    ScopedThread([&] {
   808      // Attach to child subprocess without stopping it
   809      EXPECT_THAT(ptrace(PTRACE_SEIZE, child_pid, NULL, NULL), SyscallSucceeds());
   810  
   811      // Keep capabilities after setuid
   812      EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
   813      constexpr int kNobody = 65534;
   814      EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds());
   815  
   816      // Only restore CAP_SYS_PTRACE and CAP_DAC_OVERRIDE
   817      EXPECT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, true));
   818      EXPECT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, true));
   819  
   820      // Open /proc/pid/mem fd
   821      std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
   822      auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY));
   823      char expected[] = "hello-world";
   824      char output[sizeof(expected)];
   825      EXPECT_THAT(pread(memfd.get(), output, sizeof(output),
   826                        reinterpret_cast<off_t>(target_location)),
   827                  SyscallSucceedsWithValue(sizeof(output)));
   828      EXPECT_STREQ(expected, output);
   829  
   830      // Tell proc its ok to go
   831      EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
   832      char ok = 1;
   833      EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
   834                  SyscallSucceedsWithValue(sizeof(ok)));
   835      EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
   836  
   837      // Expect termination
   838      int status;
   839      ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
   840      EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   841          << " status " << status;
   842    });
   843  }
   844  
   845  // Attempt to read from /proc/[pid]/mem with different UID/GID.
   846  TEST(ProcPidMem, DifferentUser) {
   847    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
   848  
   849    int pfd1[2] = {};
   850    int pfd2[2] = {};
   851  
   852    ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
   853    ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
   854  
   855    // Create child process
   856    pid_t const child_pid = fork();
   857    if (child_pid == 0) {
   858      // Close reading end of first pipe
   859      close(pfd1[0]);
   860  
   861      // Tell parent about location of input
   862      char input[] = "hello-world";
   863      off_t input_location = reinterpret_cast<off_t>(input);
   864      TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) ==
   865                 sizeof(input_location));
   866      TEST_PCHECK(close(pfd1[1]) == 0);
   867  
   868      // Close writing end of second pipe
   869      TEST_PCHECK(close(pfd2[1]) == 0);
   870  
   871      // Await parent OK to die
   872      char ok = 0;
   873      TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
   874  
   875      // Close rest pipes
   876      TEST_PCHECK(close(pfd2[0]) == 0);
   877      _exit(0);
   878    }
   879  
   880    // In parent process.
   881    ASSERT_THAT(child_pid, SyscallSucceeds());
   882  
   883    // Close writing end of first pipe
   884    EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
   885  
   886    // Read target location from child
   887    off_t target_location;
   888    EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)),
   889                SyscallSucceedsWithValue(sizeof(target_location)));
   890    // Close reading end of first pipe
   891    EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
   892  
   893    ScopedThread([&] {
   894      constexpr int kNobody = 65534;
   895      EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds());
   896  
   897      // Attempt to open /proc/[child_pid]/mem
   898      std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
   899      EXPECT_THAT(open(mempath.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES));
   900  
   901      // Tell proc its ok to go
   902      EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
   903      char ok = 1;
   904      EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
   905                  SyscallSucceedsWithValue(sizeof(ok)));
   906      EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
   907  
   908      // Expect termination
   909      int status;
   910      ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
   911    });
   912  }
   913  
   914  // Perform read on /proc/[pid]/mem with same UID/GID.
   915  TEST(ProcPidMem, SameUser) {
   916    int pfd1[2] = {};
   917    int pfd2[2] = {};
   918  
   919    ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
   920    ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
   921  
   922    // Create child process
   923    pid_t const child_pid = fork();
   924    if (child_pid == 0) {
   925      // Close reading end of first pipe
   926      close(pfd1[0]);
   927  
   928      // Tell parent about location of input
   929      char input[] = "hello-world";
   930      off_t input_location = reinterpret_cast<off_t>(input);
   931      TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) ==
   932                 sizeof(input_location));
   933      TEST_PCHECK(close(pfd1[1]) == 0);
   934  
   935      // Close writing end of second pipe
   936      TEST_PCHECK(close(pfd2[1]) == 0);
   937  
   938      // Await parent OK to die
   939      char ok = 0;
   940      TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
   941  
   942      // Close rest pipes
   943      TEST_PCHECK(close(pfd2[0]) == 0);
   944      _exit(0);
   945    }
   946    // In parent process.
   947    ASSERT_THAT(child_pid, SyscallSucceeds());
   948  
   949    // Close writing end of first pipe
   950    EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
   951  
   952    // Read target location from child
   953    off_t target_location;
   954    EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)),
   955                SyscallSucceedsWithValue(sizeof(target_location)));
   956    // Close reading end of first pipe
   957    EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
   958  
   959    // Open /proc/pid/mem fd
   960    std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
   961    auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY));
   962    char expected[] = "hello-world";
   963    char output[sizeof(expected)];
   964    EXPECT_THAT(pread(memfd.get(), output, sizeof(output),
   965                      reinterpret_cast<off_t>(target_location)),
   966                SyscallSucceedsWithValue(sizeof(output)));
   967    EXPECT_STREQ(expected, output);
   968  
   969    // Tell proc its ok to go
   970    EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
   971    char ok = 1;
   972    EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
   973                SyscallSucceedsWithValue(sizeof(ok)));
   974    EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
   975  
   976    // Expect termination
   977    int status;
   978    ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
   979  }
   980  
   981  // Just open and read /proc/self/maps, check that we can find [stack]
   982  TEST(ProcSelfMaps, Basic) {
   983    auto proc_self_maps =
   984        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
   985  
   986    std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
   987    std::vector<std::string> stacks;
   988    // Make sure there's a stack in there.
   989    for (const auto& str : strings) {
   990      if (str.find("[stack]") != std::string::npos) {
   991        stacks.push_back(str);
   992      }
   993    }
   994    ASSERT_EQ(1, stacks.size()) << "[stack] not found in: " << proc_self_maps;
   995    // Linux pads to 73 characters then we add 7.
   996    EXPECT_EQ(80, stacks[0].length());
   997  }
   998  
   999  TEST(ProcSelfMaps, Map1) {
  1000    Mapping mapping =
  1001        ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_READ, MAP_PRIVATE));
  1002    auto proc_self_maps =
  1003        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1004    std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
  1005    std::vector<std::string> addrs;
  1006    // Make sure if is listed.
  1007    for (const auto& str : strings) {
  1008      if (str == AnonymousMapsEntryForMapping(mapping, PROT_READ)) {
  1009        addrs.push_back(str);
  1010      }
  1011    }
  1012    ASSERT_EQ(1, addrs.size());
  1013  }
  1014  
  1015  TEST(ProcSelfMaps, Map2) {
  1016    // NOTE(magi): The permissions must be different or the pages will get merged.
  1017    Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE(
  1018        MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE));
  1019    Mapping map2 =
  1020        ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE));
  1021  
  1022    auto proc_self_maps =
  1023        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1024    std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
  1025    std::vector<std::string> addrs;
  1026    // Make sure if is listed.
  1027    for (const auto& str : strings) {
  1028      if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
  1029        addrs.push_back(str);
  1030      }
  1031    }
  1032    ASSERT_EQ(1, addrs.size());
  1033    addrs.clear();
  1034    for (const auto& str : strings) {
  1035      if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
  1036        addrs.push_back(str);
  1037      }
  1038    }
  1039    ASSERT_EQ(1, addrs.size());
  1040  }
  1041  
  1042  TEST(ProcSelfMaps, MapUnmap) {
  1043    Mapping map1 = ASSERT_NO_ERRNO_AND_VALUE(
  1044        MmapAnon(kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE));
  1045    Mapping map2 =
  1046        ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_WRITE, MAP_PRIVATE));
  1047  
  1048    auto proc_self_maps =
  1049        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1050    std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
  1051    std::vector<std::string> addrs;
  1052    // Make sure if is listed.
  1053    for (const auto& str : strings) {
  1054      if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
  1055        addrs.push_back(str);
  1056      }
  1057    }
  1058    ASSERT_EQ(1, addrs.size()) << proc_self_maps;
  1059    addrs.clear();
  1060    for (const auto& str : strings) {
  1061      if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
  1062        addrs.push_back(str);
  1063      }
  1064    }
  1065    ASSERT_EQ(1, addrs.size());
  1066  
  1067    map2.reset();
  1068  
  1069    // Read it again.
  1070    proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1071    strings = absl::StrSplit(proc_self_maps, '\n');
  1072    // First entry should be there.
  1073    addrs.clear();
  1074    for (const auto& str : strings) {
  1075      if (str == AnonymousMapsEntryForMapping(map1, PROT_READ | PROT_EXEC)) {
  1076        addrs.push_back(str);
  1077      }
  1078    }
  1079    ASSERT_EQ(1, addrs.size());
  1080    addrs.clear();
  1081    // But not the second.
  1082    for (const auto& str : strings) {
  1083      if (str == AnonymousMapsEntryForMapping(map2, PROT_WRITE)) {
  1084        addrs.push_back(str);
  1085      }
  1086    }
  1087    ASSERT_EQ(0, addrs.size());
  1088  }
  1089  
  1090  TEST(ProcSelfMaps, Mprotect) {
  1091    // FIXME(jamieliu): Linux's mprotect() sometimes fails to merge VMAs in this
  1092    // case.
  1093    SKIP_IF(!IsRunningOnGvisor());
  1094  
  1095    // Reserve 5 pages of address space.
  1096    Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
  1097        MmapAnon(5 * kPageSize, PROT_NONE, MAP_PRIVATE));
  1098  
  1099    // Change the permissions on the middle 3 pages. (The first and last pages may
  1100    // be merged with other vmas on either side, so they aren't tested directly;
  1101    // they just ensure that the middle 3 pages are bracketed by VMAs with
  1102    // incompatible permissions.)
  1103    ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + kPageSize),
  1104                         3 * kPageSize, PROT_READ),
  1105                SyscallSucceeds());
  1106  
  1107    // Check that the middle 3 pages make up a single VMA.
  1108    auto proc_self_maps =
  1109        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1110    std::vector<std::string> strings = absl::StrSplit(proc_self_maps, '\n');
  1111    EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize,
  1112                                                     3 * kPageSize, PROT_READ)));
  1113  
  1114    // Change the permissions on the middle page only.
  1115    ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize),
  1116                         kPageSize, PROT_READ | PROT_WRITE),
  1117                SyscallSucceeds());
  1118  
  1119    // Check that the single VMA has been split into 3 VMAs.
  1120    proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1121    strings = absl::StrSplit(proc_self_maps, '\n');
  1122    EXPECT_THAT(
  1123        strings,
  1124        IsSupersetOf(
  1125            {AnonymousMapsEntry(m.addr() + kPageSize, kPageSize, PROT_READ),
  1126             AnonymousMapsEntry(m.addr() + 2 * kPageSize, kPageSize,
  1127                                PROT_READ | PROT_WRITE),
  1128             AnonymousMapsEntry(m.addr() + 3 * kPageSize, kPageSize,
  1129                                PROT_READ)}));
  1130  
  1131    // Change the permissions on the middle page back.
  1132    ASSERT_THAT(mprotect(reinterpret_cast<void*>(m.addr() + 2 * kPageSize),
  1133                         kPageSize, PROT_READ),
  1134                SyscallSucceeds());
  1135  
  1136    // Check that the 3 VMAs have been merged back into a single VMA.
  1137    proc_self_maps = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1138    strings = absl::StrSplit(proc_self_maps, '\n');
  1139    EXPECT_THAT(strings, Contains(AnonymousMapsEntry(m.addr() + kPageSize,
  1140                                                     3 * kPageSize, PROT_READ)));
  1141  }
  1142  
  1143  TEST(ProcSelfMaps, SharedAnon) {
  1144    const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
  1145        MmapAnon(kPageSize, PROT_READ, MAP_SHARED | MAP_ANONYMOUS));
  1146  
  1147    const auto proc_self_maps =
  1148        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/maps"));
  1149    for (const auto& line : absl::StrSplit(proc_self_maps, '\n')) {
  1150      const auto entry = ASSERT_NO_ERRNO_AND_VALUE(ParseProcMapsLine(line));
  1151      if (entry.start <= m.addr() && m.addr() < entry.end) {
  1152        // cf. proc(5), "/proc/[pid]/map_files/"
  1153        EXPECT_EQ(entry.filename, "/dev/zero (deleted)");
  1154        return;
  1155      }
  1156    }
  1157    FAIL() << "no maps entry containing mapping at " << m.ptr();
  1158  }
  1159  
  1160  TEST(ProcSelfFd, OpenFd) {
  1161    int pipe_fds[2];
  1162    ASSERT_THAT(pipe2(pipe_fds, O_CLOEXEC), SyscallSucceeds());
  1163  
  1164    // Reopen the write end.
  1165    const std::string path = absl::StrCat("/proc/self/fd/", pipe_fds[1]);
  1166    const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_WRONLY));
  1167  
  1168    // Ensure that a read/write works.
  1169    const std::string data = "hello";
  1170    std::unique_ptr<char[]> buffer(new char[data.size()]);
  1171    EXPECT_THAT(write(fd.get(), data.c_str(), data.size()),
  1172                SyscallSucceedsWithValue(5));
  1173    EXPECT_THAT(read(pipe_fds[0], buffer.get(), data.size()),
  1174                SyscallSucceedsWithValue(5));
  1175    EXPECT_EQ(strncmp(buffer.get(), data.c_str(), data.size()), 0);
  1176  
  1177    // Cleanup.
  1178    ASSERT_THAT(close(pipe_fds[0]), SyscallSucceeds());
  1179    ASSERT_THAT(close(pipe_fds[1]), SyscallSucceeds());
  1180  }
  1181  
  1182  static void CheckFdDirGetdentsDuplicates(const std::string& path) {
  1183    const FileDescriptor fd =
  1184        ASSERT_NO_ERRNO_AND_VALUE(Open(path.c_str(), O_RDONLY | O_DIRECTORY));
  1185    // Open a FD whose value is supposed to be much larger than
  1186    // the number of FDs opened by current process.
  1187    auto newfd = fcntl(fd.get(), F_DUPFD, 1024);
  1188    EXPECT_GE(newfd, 1024);
  1189    auto fd_closer = Cleanup([newfd]() { close(newfd); });
  1190    auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir(path.c_str(), false));
  1191    absl::flat_hash_set<std::string> fd_files_dedup(fd_files.begin(),
  1192                                                    fd_files.end());
  1193    EXPECT_EQ(fd_files.size(), fd_files_dedup.size());
  1194  }
  1195  
  1196  // This is a regression test for gvisor.dev/issues/3894
  1197  TEST(ProcSelfFd, GetdentsDuplicates) {
  1198    CheckFdDirGetdentsDuplicates("/proc/self/fd");
  1199  }
  1200  
  1201  // This is a regression test for gvisor.dev/issues/3894
  1202  TEST(ProcSelfFdInfo, GetdentsDuplicates) {
  1203    CheckFdDirGetdentsDuplicates("/proc/self/fdinfo");
  1204  }
  1205  
  1206  TEST(ProcSelfFdInfo, CorrectFds) {
  1207    // Make sure there is at least one open file.
  1208    auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  1209    const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(f.path(), O_RDONLY));
  1210  
  1211    // Get files in /proc/self/fd.
  1212    auto fd_files = ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fd", false));
  1213  
  1214    // Get files in /proc/self/fdinfo.
  1215    auto fdinfo_files =
  1216        ASSERT_NO_ERRNO_AND_VALUE(ListDir("/proc/self/fdinfo", false));
  1217  
  1218    // They should contain the same fds.
  1219    EXPECT_THAT(fd_files, UnorderedElementsAreArray(fdinfo_files));
  1220  
  1221    // Both should contain fd.
  1222    auto fd_s = absl::StrCat(fd.get());
  1223    EXPECT_THAT(fd_files, Contains(fd_s));
  1224  }
  1225  
  1226  TEST(ProcSelfFdInfo, Flags) {
  1227    std::string path = NewTempAbsPath();
  1228  
  1229    // Create file here with O_CREAT to test that O_CREAT does not appear in
  1230    // fdinfo flags.
  1231    int flags = O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC;
  1232    const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, flags, 0644));
  1233  
  1234    // Automatically delete path.
  1235    TempPath temp_path(path);
  1236  
  1237    // O_CREAT does not appear in fdinfo flags.
  1238    flags &= ~O_CREAT;
  1239  
  1240    // O_LARGEFILE always appears (on x86_64).
  1241    flags |= kOLargeFile;
  1242  
  1243    auto fd_info = ASSERT_NO_ERRNO_AND_VALUE(
  1244        GetContents(absl::StrCat("/proc/self/fdinfo/", fd.get())));
  1245    EXPECT_THAT(fd_info, HasSubstr(absl::StrFormat("flags:\t%#o", flags)));
  1246  }
  1247  
  1248  TEST(ProcSelfExe, Absolute) {
  1249    auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
  1250    EXPECT_EQ(exe[0], '/');
  1251  }
  1252  
  1253  TEST(ProcSelfCwd, Absolute) {
  1254    auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/cwd"));
  1255    EXPECT_EQ(exe[0], '/');
  1256  }
  1257  
  1258  TEST(ProcSelfRoot, IsRoot) {
  1259    auto exe = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/root"));
  1260    EXPECT_EQ(exe, "/");
  1261  }
  1262  
  1263  // Sanity check that /proc/cmdline is present.
  1264  TEST(ProcCmdline, IsPresent) {
  1265    std::string proc_cmdline =
  1266        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cmdline"));
  1267    ASSERT_FALSE(proc_cmdline.empty());
  1268  }
  1269  
  1270  // Sanity check for /proc/cpuinfo fields that must be present.
  1271  TEST(ProcCpuinfo, RequiredFieldsArePresent) {
  1272    std::string proc_cpuinfo =
  1273        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/cpuinfo"));
  1274    ASSERT_FALSE(proc_cpuinfo.empty());
  1275  
  1276    // Check that the usual fields are there. We don't really care about the
  1277    // contents.
  1278    for (const char* field : required_fields) {
  1279      EXPECT_THAT(proc_cpuinfo, HasSubstr(field));
  1280    }
  1281  }
  1282  
  1283  TEST(ProcCpuinfo, DeniesWriteNonRoot) {
  1284    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER)));
  1285  
  1286    // Do setuid in a separate thread so that after finishing this test, the
  1287    // process can still open files the test harness created before starting this
  1288    // test. Otherwise, the files are created by root (UID before the test), but
  1289    // cannot be opened by the `uid` set below after the test. After calling
  1290    // setuid(non-zero-UID), there is no way to get root privileges back.
  1291    ScopedThread([&] {
  1292      // Use syscall instead of glibc setuid wrapper because we want this setuid
  1293      // call to only apply to this task. POSIX threads, however, require that all
  1294      // threads have the same UIDs, so using the setuid wrapper sets all threads'
  1295      // real UID.
  1296      // Also drops capabilities.
  1297      constexpr int kNobody = 65534;
  1298      EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds());
  1299      EXPECT_THAT(open("/proc/cpuinfo", O_WRONLY), SyscallFailsWithErrno(EACCES));
  1300      EXPECT_THAT(truncate("/proc/cpuinfo", 123), SyscallFailsWithErrno(EACCES));
  1301    });
  1302  }
  1303  
  1304  // With root privileges, it is possible to open /proc/cpuinfo with write mode,
  1305  // but all write operations should fail.
  1306  TEST(ProcCpuinfo, DeniesWriteRoot) {
  1307    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_FOWNER)));
  1308  
  1309    int fd;
  1310    EXPECT_THAT(fd = open("/proc/cpuinfo", O_WRONLY), SyscallSucceeds());
  1311    if (fd > 0) {
  1312      // Truncate is not tested--it may succeed on some kernels without doing
  1313      // anything.
  1314      EXPECT_THAT(write(fd, "x", 1), SyscallFails());
  1315      EXPECT_THAT(pwrite(fd, "x", 1, 123), SyscallFails());
  1316    }
  1317  }
  1318  
  1319  // Cpuinfo should not change across save/restore.
  1320  TEST(ProcCpuinfo, Stable) {
  1321    std::string output_before;
  1322    ASSERT_NO_ERRNO(GetContents("/proc/cpuinfo", &output_before));
  1323    MaybeSave();
  1324    std::string output_after;
  1325    ASSERT_NO_ERRNO(GetContents("/proc/cpuinfo", &output_after));
  1326  
  1327    std::vector<std::string> before_fields = absl::StrSplit(output_before, '\n');
  1328    std::vector<std::string> after_fields = absl::StrSplit(output_before, '\n');
  1329    RemoveUnstableCPUInfoFields(before_fields);
  1330    RemoveUnstableCPUInfoFields(after_fields);
  1331  
  1332    EXPECT_THAT(absl::StrJoin(before_fields, "\n"),
  1333                Eq(absl::StrJoin(after_fields, "\n")));
  1334  }
  1335  
  1336  // Sanity checks that uptime is present.
  1337  TEST(ProcUptime, IsPresent) {
  1338    std::string proc_uptime =
  1339        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
  1340    ASSERT_FALSE(proc_uptime.empty());
  1341    std::vector<std::string> uptime_parts = absl::StrSplit(proc_uptime, ' ');
  1342  
  1343    // Parse once.
  1344    double uptime0, uptime1, idletime0, idletime1;
  1345    ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime0));
  1346    ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime0));
  1347  
  1348    // Sleep for one second.
  1349    absl::SleepFor(absl::Seconds(1));
  1350  
  1351    // Parse again.
  1352    proc_uptime = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/uptime"));
  1353    ASSERT_FALSE(proc_uptime.empty());
  1354    uptime_parts = absl::StrSplit(proc_uptime, ' ');
  1355    ASSERT_TRUE(absl::SimpleAtod(uptime_parts[0], &uptime1));
  1356    ASSERT_TRUE(absl::SimpleAtod(uptime_parts[1], &idletime1));
  1357  
  1358    // Sanity check.
  1359    //
  1360    // We assert that between 0.99 and 59.99 seconds have passed. If more than a
  1361    // minute has passed, then we must be executing really, really slowly.
  1362    EXPECT_GE(uptime0, 0.0);
  1363    EXPECT_GE(idletime0, 0.0);
  1364    EXPECT_GT(uptime1, uptime0);
  1365    EXPECT_GE(uptime1, uptime0 + 0.99);
  1366    EXPECT_LE(uptime1, uptime0 + 59.99);
  1367    EXPECT_GE(idletime1, idletime0);
  1368  }
  1369  
  1370  TEST(ProcMeminfo, ContainsBasicFields) {
  1371    std::string proc_meminfo =
  1372        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/meminfo"));
  1373    EXPECT_THAT(proc_meminfo, AllOf(ContainsRegex(R"(MemTotal:\s+[0-9]+ kB)"),
  1374                                    ContainsRegex(R"(MemFree:\s+[0-9]+ kB)")));
  1375  }
  1376  
  1377  TEST(ProcSentryMeminfo, ContainsFieldsAndEndsWithNewline) {
  1378    SKIP_IF(!IsRunningOnGvisor());
  1379  
  1380    std::string proc_sentry_meminfo =
  1381        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sentry-meminfo"));
  1382  
  1383    // Assert that all expected fields are present.
  1384    EXPECT_THAT(proc_sentry_meminfo,
  1385                AllOf(ContainsRegex(R"(Alloc:\s+[0-9]+ kB)"),
  1386                      ContainsRegex(R"(TotalAlloc:\s+[0-9]+ kB)"),
  1387                      ContainsRegex(R"(Sys:\s+[0-9]+ kB)"),
  1388                      ContainsRegex(R"(Mallocs:\s+[0-9]+)"),
  1389                      ContainsRegex(R"(Frees:\s+[0-9]+)"),
  1390                      ContainsRegex(R"(Live Objects:\s+[0-9]+)"),
  1391                      ContainsRegex(R"(HeapAlloc:\s+[0-9]+ kB)"),
  1392                      ContainsRegex(R"(HeapSys:\s+[0-9]+ kB)"),
  1393                      ContainsRegex(R"(HeapObjects:\s+[0-9]+)")));
  1394  
  1395    // Assert that /proc/sentry-meminfo ends with a new line.
  1396    EXPECT_EQ(proc_sentry_meminfo.back(), '\n');
  1397  }
  1398  
  1399  TEST(ProcStat, ContainsBasicFields) {
  1400    std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
  1401  
  1402    std::vector<std::string> names;
  1403    for (auto const& line : absl::StrSplit(proc_stat, '\n')) {
  1404      std::vector<std::string> fields =
  1405          absl::StrSplit(line, ' ', absl::SkipWhitespace());
  1406      if (fields.empty()) {
  1407        continue;
  1408      }
  1409      names.push_back(fields[0]);
  1410    }
  1411  
  1412    EXPECT_THAT(names,
  1413                IsSupersetOf({"cpu", "intr", "ctxt", "btime", "processes",
  1414                              "procs_running", "procs_blocked", "softirq"}));
  1415  }
  1416  
  1417  TEST(ProcStat, EndsWithNewline) {
  1418    std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
  1419    EXPECT_EQ(proc_stat.back(), '\n');
  1420  }
  1421  
  1422  TEST(ProcStat, Fields) {
  1423    std::string proc_stat = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/stat"));
  1424  
  1425    std::vector<std::string> names;
  1426    for (auto const& line : absl::StrSplit(proc_stat, '\n')) {
  1427      std::vector<std::string> fields =
  1428          absl::StrSplit(line, ' ', absl::SkipWhitespace());
  1429      if (fields.empty()) {
  1430        continue;
  1431      }
  1432  
  1433      if (absl::StartsWith(fields[0], "cpu")) {
  1434        // As of Linux 3.11, each CPU entry has 10 fields, plus the name.
  1435        EXPECT_GE(fields.size(), 11) << proc_stat;
  1436      } else if (fields[0] == "ctxt") {
  1437        // Single field.
  1438        EXPECT_EQ(fields.size(), 2) << proc_stat;
  1439      } else if (fields[0] == "btime") {
  1440        // Single field.
  1441        EXPECT_EQ(fields.size(), 2) << proc_stat;
  1442      } else if (fields[0] == "itime") {
  1443        // Single field.
  1444        ASSERT_EQ(fields.size(), 2) << proc_stat;
  1445        // This is the only floating point field.
  1446        double val;
  1447        EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_stat;
  1448        continue;
  1449      } else if (fields[0] == "processes") {
  1450        // Single field.
  1451        EXPECT_EQ(fields.size(), 2) << proc_stat;
  1452      } else if (fields[0] == "procs_running") {
  1453        // Single field.
  1454        EXPECT_EQ(fields.size(), 2) << proc_stat;
  1455      } else if (fields[0] == "procs_blocked") {
  1456        // Single field.
  1457        EXPECT_EQ(fields.size(), 2) << proc_stat;
  1458      } else if (fields[0] == "softirq") {
  1459        // As of Linux 3.11, there are 10 softirqs. 12 fields for name + total.
  1460        EXPECT_GE(fields.size(), 12) << proc_stat;
  1461      }
  1462  
  1463      // All fields besides itime are valid base 10 numbers.
  1464      for (size_t i = 1; i < fields.size(); i++) {
  1465        uint64_t val;
  1466        EXPECT_TRUE(absl::SimpleAtoi(fields[i], &val)) << proc_stat;
  1467      }
  1468    }
  1469  }
  1470  
  1471  TEST(ProcLoadavg, EndsWithNewline) {
  1472    std::string proc_loadvg =
  1473        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
  1474    EXPECT_EQ(proc_loadvg.back(), '\n');
  1475  }
  1476  
  1477  TEST(ProcLoadavg, Fields) {
  1478    std::string proc_loadvg =
  1479        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/loadavg"));
  1480    std::vector<std::string> lines = absl::StrSplit(proc_loadvg, '\n');
  1481  
  1482    // Single line.
  1483    EXPECT_EQ(lines.size(), 2) << proc_loadvg;
  1484  
  1485    std::vector<std::string> fields =
  1486        absl::StrSplit(lines[0], absl::ByAnyChar(" /"), absl::SkipWhitespace());
  1487  
  1488    // Six fields.
  1489    EXPECT_EQ(fields.size(), 6) << proc_loadvg;
  1490  
  1491    double val;
  1492    uint64_t val2;
  1493    // First three fields are floating point numbers.
  1494    EXPECT_TRUE(absl::SimpleAtod(fields[0], &val)) << proc_loadvg;
  1495    EXPECT_TRUE(absl::SimpleAtod(fields[1], &val)) << proc_loadvg;
  1496    EXPECT_TRUE(absl::SimpleAtod(fields[2], &val)) << proc_loadvg;
  1497    // Rest of the fields are valid base 10 numbers.
  1498    EXPECT_TRUE(absl::SimpleAtoi(fields[3], &val2)) << proc_loadvg;
  1499    EXPECT_TRUE(absl::SimpleAtoi(fields[4], &val2)) << proc_loadvg;
  1500    EXPECT_TRUE(absl::SimpleAtoi(fields[5], &val2)) << proc_loadvg;
  1501  }
  1502  
  1503  // NOTE: Tests in priority.cc also check certain priority related fields in
  1504  // /proc/self/stat.
  1505  
  1506  class ProcPidStatTest : public ::testing::TestWithParam<std::string> {};
  1507  
  1508  // Parses /proc/<pid>/stat output to a vector of string. We need a more
  1509  // complicated approach than absl::StrSplit because COMM can contain spaces.
  1510  PosixErrorOr<std::vector<std::string>> ParseProcPidStat(
  1511      absl::string_view proc_pid_stat) {
  1512    auto comm_start = proc_pid_stat.find('(');
  1513    auto comm_end = proc_pid_stat.rfind(')');
  1514    if (comm_start == proc_pid_stat.npos || comm_end == proc_pid_stat.npos) {
  1515      return PosixError(EINVAL, absl::StrCat("Invalid /proc/<pid>/stat"));
  1516    }
  1517    std::vector<std::string> fields =
  1518        absl::StrSplit(proc_pid_stat.substr(0, comm_start - 1), ' ');
  1519    fields.push_back(std::string{proc_pid_stat.substr(comm_start, comm_end + 1)});
  1520    absl::c_transform(absl::StrSplit(proc_pid_stat.substr(comm_end + 2), ' '),
  1521                      std::back_inserter(fields),
  1522                      [](auto sv) { return std::string{sv}; });
  1523    return fields;
  1524  }
  1525  
  1526  TEST_P(ProcPidStatTest, HasBasicFields) {
  1527    std::string proc_pid_stat = ASSERT_NO_ERRNO_AND_VALUE(
  1528        GetContents(absl::StrCat("/proc/", GetParam(), "/stat")));
  1529  
  1530    ASSERT_FALSE(proc_pid_stat.empty());
  1531    std::vector<std::string> fields =
  1532        ASSERT_NO_ERRNO_AND_VALUE(ParseProcPidStat(proc_pid_stat));
  1533  
  1534    ASSERT_GE(fields.size(), 24);
  1535    EXPECT_EQ(absl::StrCat(getpid()), fields[0]);
  1536    // fields[1] is the thread name.
  1537    EXPECT_EQ("R", fields[2]);  // task state
  1538    EXPECT_EQ(absl::StrCat(getppid()), fields[3]);
  1539  
  1540    // If the test starts up quickly, then the process start time and the kernel
  1541    // boot time will be very close, and the proc starttime field (which is the
  1542    // delta of the two times) will be 0.  For that unfortunate reason, we can
  1543    // only check that starttime >= 0, and not that it is strictly > 0.
  1544    uint64_t starttime;
  1545    ASSERT_TRUE(absl::SimpleAtoi(fields[21], &starttime));
  1546    EXPECT_GE(starttime, 0);
  1547  
  1548    uint64_t vss;
  1549    ASSERT_TRUE(absl::SimpleAtoi(fields[22], &vss));
  1550    EXPECT_GT(vss, 0);
  1551  
  1552    uint64_t rss;
  1553    ASSERT_TRUE(absl::SimpleAtoi(fields[23], &rss));
  1554    EXPECT_GT(rss, 0);
  1555  
  1556    uint64_t rsslim;
  1557    ASSERT_TRUE(absl::SimpleAtoi(fields[24], &rsslim));
  1558    EXPECT_GT(rsslim, 0);
  1559  }
  1560  
  1561  INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatTest,
  1562                           ::testing::Values("self", absl::StrCat(getpid())));
  1563  
  1564  using ProcPidStatmTest = ::testing::TestWithParam<std::string>;
  1565  
  1566  TEST_P(ProcPidStatmTest, HasBasicFields) {
  1567    std::string proc_pid_statm = ASSERT_NO_ERRNO_AND_VALUE(
  1568        GetContents(absl::StrCat("/proc/", GetParam(), "/statm")));
  1569    ASSERT_FALSE(proc_pid_statm.empty());
  1570    std::vector<std::string> fields = absl::StrSplit(proc_pid_statm, ' ');
  1571    ASSERT_GE(fields.size(), 7);
  1572  
  1573    uint64_t vss;
  1574    ASSERT_TRUE(absl::SimpleAtoi(fields[0], &vss));
  1575    EXPECT_GT(vss, 0);
  1576  
  1577    uint64_t rss;
  1578    ASSERT_TRUE(absl::SimpleAtoi(fields[1], &rss));
  1579    EXPECT_GT(rss, 0);
  1580  }
  1581  
  1582  INSTANTIATE_TEST_SUITE_P(SelfAndNumericPid, ProcPidStatmTest,
  1583                           ::testing::Values("self", absl::StrCat(getpid())));
  1584  
  1585  PosixErrorOr<uint64_t> CurrentRSS() {
  1586    ASSIGN_OR_RETURN_ERRNO(auto proc_self_stat, GetContents("/proc/self/stat"));
  1587    if (proc_self_stat.empty()) {
  1588      return PosixError(EINVAL, "empty /proc/self/stat");
  1589    }
  1590  
  1591    ASSIGN_OR_RETURN_ERRNO(std::vector<std::string> fields,
  1592                           ParseProcPidStat(proc_self_stat));
  1593    if (fields.size() < 24) {
  1594      return PosixError(
  1595          EINVAL,
  1596          absl::StrCat("/proc/self/stat has too few fields: ", proc_self_stat));
  1597    }
  1598  
  1599    uint64_t rss;
  1600    if (!absl::SimpleAtoi(fields[23], &rss)) {
  1601      return PosixError(
  1602          EINVAL, absl::StrCat("/proc/self/stat RSS field is not a number: ",
  1603                               fields[23]));
  1604    }
  1605  
  1606    // RSS is given in number of pages.
  1607    return rss * kPageSize;
  1608  }
  1609  
  1610  // The size of mapping created by MapPopulateRSS.
  1611  constexpr uint64_t kMappingSize = 100 << 20;
  1612  
  1613  // Tolerance on RSS comparisons to account for background thread mappings,
  1614  // reclaimed pages, newly faulted pages, etc.
  1615  constexpr uint64_t kRSSTolerance = 10 << 20;
  1616  
  1617  // Capture RSS before and after an anonymous mapping with passed prot.
  1618  void MapPopulateRSS(int prot, uint64_t* before, uint64_t* after) {
  1619    *before = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS());
  1620  
  1621    // N.B. The kernel asynchronously accumulates per-task RSS counters into the
  1622    // mm RSS, which is exposed by /proc/PID/stat. Task exit is a synchronization
  1623    // point (kernel/exit.c:do_exit -> sync_mm_rss), so perform the mapping on
  1624    // another thread to ensure it is reflected in RSS after the thread exits.
  1625    Mapping mapping;
  1626    ScopedThread t([&mapping, prot] {
  1627      mapping = ASSERT_NO_ERRNO_AND_VALUE(
  1628          MmapAnon(kMappingSize, prot, MAP_PRIVATE | MAP_POPULATE));
  1629    });
  1630    t.Join();
  1631  
  1632    *after = ASSERT_NO_ERRNO_AND_VALUE(CurrentRSS());
  1633  }
  1634  
  1635  // TODO(b/73896574): Test for PROT_READ + MAP_POPULATE anonymous mappings. Their
  1636  // semantics are more subtle:
  1637  //
  1638  // Small pages -> Zero page mapped, not counted in RSS
  1639  // (mm/memory.c:do_anonymous_page).
  1640  //
  1641  // Huge pages (THP enabled, use_zero_page=0) -> Pages committed
  1642  // (mm/memory.c:__handle_mm_fault -> create_huge_pmd).
  1643  //
  1644  // Huge pages (THP enabled, use_zero_page=1) -> Zero page mapped, not counted in
  1645  // RSS (mm/huge_memory.c:do_huge_pmd_anonymous_page).
  1646  
  1647  // PROT_WRITE + MAP_POPULATE anonymous mappings are always committed.
  1648  TEST(ProcSelfStat, PopulateWriteRSS) {
  1649    uint64_t before, after;
  1650    MapPopulateRSS(PROT_READ | PROT_WRITE, &before, &after);
  1651  
  1652    // Mapping is committed.
  1653    EXPECT_NEAR(before + kMappingSize, after, kRSSTolerance);
  1654  }
  1655  
  1656  // PROT_NONE + MAP_POPULATE anonymous mappings are never committed.
  1657  TEST(ProcSelfStat, PopulateNoneRSS) {
  1658    uint64_t before, after;
  1659    MapPopulateRSS(PROT_NONE, &before, &after);
  1660  
  1661    // Mapping not committed.
  1662    EXPECT_NEAR(before, after, kRSSTolerance);
  1663  }
  1664  
  1665  // Returns the calling thread's name.
  1666  PosixErrorOr<std::string> ThreadName() {
  1667    // "The buffer should allow space for up to 16 bytes; the returned std::string
  1668    // will be null-terminated if it is shorter than that." - prctl(2). But we
  1669    // always want the thread name to be null-terminated.
  1670    char thread_name[17];
  1671    int rc = prctl(PR_GET_NAME, thread_name, 0, 0, 0);
  1672    MaybeSave();
  1673    if (rc < 0) {
  1674      return PosixError(errno, "prctl(PR_GET_NAME)");
  1675    }
  1676    thread_name[16] = '\0';
  1677    return std::string(thread_name);
  1678  }
  1679  
  1680  // Parses the contents of a /proc/[pid]/status file into a collection of
  1681  // key-value pairs.
  1682  PosixErrorOr<absl::btree_map<std::string, std::string>> ParseProcStatus(
  1683      absl::string_view status_str) {
  1684    absl::btree_map<std::string, std::string> fields;
  1685    for (absl::string_view const line :
  1686         absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) {
  1687      const std::pair<absl::string_view, absl::string_view> kv =
  1688          absl::StrSplit(line, absl::MaxSplits(":\t", 1));
  1689      if (kv.first.empty()) {
  1690        return PosixError(
  1691            EINVAL, absl::StrCat("failed to parse key in line \"", line, "\""));
  1692      }
  1693      std::string key(kv.first);
  1694      if (fields.count(key)) {
  1695        return PosixError(EINVAL,
  1696                          absl::StrCat("duplicate key \"", kv.first, "\""));
  1697      }
  1698      std::string value(kv.second);
  1699      absl::StripLeadingAsciiWhitespace(&value);
  1700      fields.emplace(std::move(key), std::move(value));
  1701    }
  1702    return fields;
  1703  }
  1704  
  1705  TEST(ParseProcStatusTest, ParsesSimpleStatusFileWithMixedWhitespaceCorrectly) {
  1706    EXPECT_THAT(
  1707        ParseProcStatus(
  1708            "Name:\tinit\nState:\tS (sleeping)\nCapEff:\t 0000001fffffffff\n"),
  1709        IsPosixErrorOkAndHolds(UnorderedElementsAre(
  1710            Pair("Name", "init"), Pair("State", "S (sleeping)"),
  1711            Pair("CapEff", "0000001fffffffff"))));
  1712  }
  1713  
  1714  TEST(ParseProcStatusTest, DetectsDuplicateKeys) {
  1715    auto proc_status_or = ParseProcStatus("Name:\tfoo\nName:\tfoo\n");
  1716    EXPECT_THAT(proc_status_or,
  1717                PosixErrorIs(EINVAL, ::testing::StrEq("duplicate key \"Name\"")));
  1718  }
  1719  
  1720  TEST(ParseProcStatusTest, DetectsMissingTabs) {
  1721    EXPECT_THAT(ParseProcStatus("Name:foo\nPid: 1\n"),
  1722                IsPosixErrorOkAndHolds(UnorderedElementsAre(Pair("Name:foo", ""),
  1723                                                            Pair("Pid: 1", ""))));
  1724  }
  1725  
  1726  TEST(ProcPidStatusTest, HasBasicFields) {
  1727    // Do this on a separate thread since we want tgid != tid.
  1728    ScopedThread([] {
  1729      const pid_t tgid = getpid();
  1730      const pid_t tid = syscall(SYS_gettid);
  1731      EXPECT_NE(tgid, tid);
  1732      const auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(ThreadName());
  1733  
  1734      std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(
  1735          GetContents(absl::StrCat("/proc/", tid, "/status")));
  1736  
  1737      ASSERT_FALSE(status_str.empty());
  1738      const auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str));
  1739      EXPECT_THAT(status, IsSupersetOf({
  1740                              Pair("Name", thread_name),
  1741                              Pair("Tgid", absl::StrCat(tgid)),
  1742                              Pair("Pid", absl::StrCat(tid)),
  1743                              Pair("PPid", absl::StrCat(getppid())),
  1744                          }));
  1745  
  1746      uid_t ruid, euid, suid;
  1747      ASSERT_THAT(getresuid(&ruid, &euid, &suid), SyscallSucceeds());
  1748      gid_t rgid, egid, sgid;
  1749      ASSERT_THAT(getresgid(&rgid, &egid, &sgid), SyscallSucceeds());
  1750      std::vector<gid_t> supplementary_gids;
  1751      int ngids = getgroups(0, nullptr);
  1752      supplementary_gids.resize(ngids);
  1753      ASSERT_THAT(getgroups(ngids, supplementary_gids.data()), SyscallSucceeds());
  1754  
  1755      EXPECT_THAT(
  1756          status,
  1757          IsSupersetOf(std::vector<
  1758                       ::testing::Matcher<std::pair<std::string, std::string>>>{
  1759              // gVisor doesn't support fsuid/gid, and even if it did there is
  1760              // no getfsuid/getfsgid().
  1761              Pair("Uid",
  1762                   StartsWith(absl::StrFormat("%d\t%d\t%d\t", ruid, euid, suid))),
  1763              Pair("Gid",
  1764                   StartsWith(absl::StrFormat("%d\t%d\t%d\t", rgid, egid, sgid))),
  1765              // ParseProcStatus strips leading whitespace for each value,
  1766              // so if the Groups line is empty then the trailing space is
  1767              // stripped.
  1768              Pair("Groups", StartsWith(absl::StrJoin(supplementary_gids, " "))),
  1769          }));
  1770    });
  1771  }
  1772  
  1773  TEST(ProcPidStatusTest, StateRunning) {
  1774    // Task must be running when reading the file.
  1775    const pid_t tid = syscall(SYS_gettid);
  1776    std::string status_str = ASSERT_NO_ERRNO_AND_VALUE(
  1777        GetContents(absl::StrCat("/proc/", tid, "/status")));
  1778  
  1779    EXPECT_THAT(ParseProcStatus(status_str),
  1780                IsPosixErrorOkAndHolds(Contains(Pair("State", "R (running)"))));
  1781  }
  1782  
  1783  TEST(ProcPidStatusTest, StateSleeping) {
  1784    // Starts a child process that blocks and checks that State is sleeping.
  1785    auto res = WithSubprocess(
  1786        [&](int pid) -> PosixError {
  1787          // Because this test is timing based we will disable cooperative saving
  1788          // and the test itself also has random saving disabled.
  1789          const DisableSave ds;
  1790          // Try multiple times in case the child isn't sleeping when status file
  1791          // is read.
  1792          MonotonicTimer timer;
  1793          timer.Start();
  1794          for (;;) {
  1795            ASSIGN_OR_RETURN_ERRNO(
  1796                std::string status_str,
  1797                GetContents(absl::StrCat("/proc/", pid, "/status")));
  1798            ASSIGN_OR_RETURN_ERRNO(auto map, ParseProcStatus(status_str));
  1799            if (map["State"] == std::string("S (sleeping)")) {
  1800              // Test passed!
  1801              return NoError();
  1802            }
  1803            if (timer.Duration() > absl::Seconds(10)) {
  1804              return PosixError(ETIMEDOUT, "Timeout waiting for child to sleep");
  1805            }
  1806            absl::SleepFor(absl::Milliseconds(10));
  1807          }
  1808        },
  1809        nullptr, nullptr);
  1810    ASSERT_NO_ERRNO(res);
  1811  }
  1812  
  1813  TEST(ProcPidStatusTest, ValuesAreTabDelimited) {
  1814    std::string status_str =
  1815        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status"));
  1816    ASSERT_FALSE(status_str.empty());
  1817    for (absl::string_view const line :
  1818         absl::StrSplit(status_str, '\n', absl::SkipWhitespace())) {
  1819      EXPECT_NE(std::string::npos, line.find(":\t"));
  1820    }
  1821  }
  1822  
  1823  // Threads properly counts running threads.
  1824  //
  1825  // TODO(mpratt): Test zombied threads while the thread group leader is still
  1826  // running with generalized fork and clone children from the wait test.
  1827  TEST(ProcPidStatusTest, Threads) {
  1828    char buf[4096] = {};
  1829    EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf) - 1),
  1830                SyscallSucceedsWithValue(Gt(0)));
  1831  
  1832    auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf));
  1833    auto it = status.find("Threads");
  1834    ASSERT_NE(it, status.end());
  1835    int threads = -1;
  1836    EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads))
  1837        << "Threads value " << it->second << " is not a number";
  1838    // Don't make assumptions about the exact number of threads, as it may not be
  1839    // constant.
  1840    EXPECT_GE(threads, 1);
  1841  
  1842    memset(buf, 0, sizeof(buf));
  1843    EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf) - 1),
  1844                SyscallSucceedsWithValue(Gt(0)));
  1845  
  1846    status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(buf));
  1847    it = status.find("Threads");
  1848    ASSERT_NE(it, status.end());
  1849    threads = -1;
  1850    EXPECT_TRUE(absl::SimpleAtoi(it->second, &threads))
  1851        << "Threads value " << it->second << " is not a number";
  1852    // There must be only the thread group leader remaining, zombied.
  1853    EXPECT_EQ(threads, 1);
  1854  }
  1855  
  1856  // Returns true if all characters in s are digits.
  1857  bool IsDigits(absl::string_view s) {
  1858    return std::all_of(s.begin(), s.end(), absl::ascii_isdigit);
  1859  }
  1860  
  1861  TEST(ProcPidStatTest, VmStats) {
  1862    std::string status_str =
  1863        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/status"));
  1864    ASSERT_FALSE(status_str.empty());
  1865    auto status = ASSERT_NO_ERRNO_AND_VALUE(ParseProcStatus(status_str));
  1866  
  1867    const auto vss_it = status.find("VmSize");
  1868    ASSERT_NE(vss_it, status.end());
  1869  
  1870    absl::string_view vss_str(vss_it->second);
  1871  
  1872    // Room for the " kB" suffix plus at least one digit.
  1873    ASSERT_GT(vss_str.length(), 3);
  1874    EXPECT_TRUE(absl::EndsWith(vss_str, " kB"));
  1875    // Everything else is part of a number.
  1876    EXPECT_TRUE(IsDigits(vss_str.substr(0, vss_str.length() - 3))) << vss_str;
  1877    // ... which is not 0.
  1878    EXPECT_NE('0', vss_str[0]);
  1879  
  1880    const auto rss_it = status.find("VmRSS");
  1881    ASSERT_NE(rss_it, status.end());
  1882  
  1883    absl::string_view rss_str(rss_it->second);
  1884  
  1885    // Room for the " kB" suffix plus at least one digit.
  1886    ASSERT_GT(rss_str.length(), 3);
  1887    EXPECT_TRUE(absl::EndsWith(rss_str, " kB"));
  1888    // Everything else is part of a number.
  1889    EXPECT_TRUE(IsDigits(rss_str.substr(0, rss_str.length() - 3))) << rss_str;
  1890    // ... which is not 0.
  1891    EXPECT_NE('0', rss_str[0]);
  1892  
  1893    const auto data_it = status.find("VmData");
  1894    ASSERT_NE(data_it, status.end());
  1895  
  1896    absl::string_view data_str(data_it->second);
  1897  
  1898    // Room for the " kB" suffix plus at least one digit.
  1899    ASSERT_GT(data_str.length(), 3);
  1900    EXPECT_TRUE(absl::EndsWith(data_str, " kB"));
  1901    // Everything else is part of a number.
  1902    EXPECT_TRUE(IsDigits(data_str.substr(0, data_str.length() - 3))) << data_str;
  1903    // ... which is not 0.
  1904    EXPECT_NE('0', data_str[0]);
  1905  }
  1906  
  1907  // Parse an array of NUL-terminated char* arrays, returning a vector of
  1908  // strings.
  1909  std::vector<std::string> ParseNulTerminatedStrings(std::string contents) {
  1910    EXPECT_EQ('\0', contents.back());
  1911    // The split will leave an empty string if the NUL-byte remains, so pop
  1912    // it.
  1913    contents.pop_back();
  1914  
  1915    return absl::StrSplit(contents, '\0');
  1916  }
  1917  
  1918  TEST(ProcPidCmdline, MatchesArgv) {
  1919    std::vector<std::string> proc_cmdline = ParseNulTerminatedStrings(
  1920        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline")));
  1921    EXPECT_THAT(saved_argv, ContainerEq(proc_cmdline));
  1922  }
  1923  
  1924  TEST(ProcPidEnviron, MatchesEnviron) {
  1925    std::vector<std::string> proc_environ = ParseNulTerminatedStrings(
  1926        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/environ")));
  1927    // Get the environment from the environ variable, which we will compare with
  1928    // /proc/self/environ.
  1929    std::vector<std::string> env;
  1930    for (char** v = environ; *v; v++) {
  1931      env.push_back(*v);
  1932    }
  1933    EXPECT_THAT(env, ContainerEq(proc_environ));
  1934  }
  1935  
  1936  TEST(ProcPidCmdline, SubprocessForkSameCmdline) {
  1937    std::vector<std::string> proc_cmdline_parent;
  1938    std::vector<std::string> proc_cmdline;
  1939    proc_cmdline_parent = ParseNulTerminatedStrings(
  1940        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/cmdline")));
  1941    auto res = WithSubprocess(
  1942        [&](int pid) -> PosixError {
  1943          ASSIGN_OR_RETURN_ERRNO(
  1944              auto raw_cmdline,
  1945              GetContents(absl::StrCat("/proc/", pid, "/cmdline")));
  1946          proc_cmdline = ParseNulTerminatedStrings(raw_cmdline);
  1947          return NoError();
  1948        },
  1949        nullptr, nullptr);
  1950    ASSERT_NO_ERRNO(res);
  1951  
  1952    for (size_t i = 0; i < proc_cmdline_parent.size(); i++) {
  1953      EXPECT_EQ(proc_cmdline_parent[i], proc_cmdline[i]);
  1954    }
  1955  }
  1956  
  1957  TEST(ProcPidCmdline, SubprocessSeekCmdline) {
  1958    FileDescriptor fd;
  1959    ASSERT_NO_ERRNO(WithSubprocess(
  1960        [&](int pid) -> PosixError {
  1961          // Running. Open /proc/pid/cmdline.
  1962          ASSIGN_OR_RETURN_ERRNO(
  1963              fd, Open(absl::StrCat("/proc/", pid, "/cmdline"), O_RDONLY));
  1964          return NoError();
  1965        },
  1966        [&](int pid) -> PosixError {
  1967          // Zombie, but seek should still succeed.
  1968          int ret = lseek(fd.get(), 0x801, 0);
  1969          if (ret < 0) {
  1970            return PosixError(errno);
  1971          }
  1972          return NoError();
  1973        },
  1974        [&](int pid) -> PosixError {
  1975          // Exited.
  1976          int ret = lseek(fd.get(), 0x801, 0);
  1977          if (ret < 0) {
  1978            return PosixError(errno);
  1979          }
  1980          return NoError();
  1981        }));
  1982  }
  1983  
  1984  // Test whether /proc/PID/ symlinks can be read for a running process.
  1985  TEST(ProcPidSymlink, SubprocessRunning) {
  1986    char buf[1];
  1987  
  1988    EXPECT_THAT(ReadlinkWhileRunning("exe", buf, sizeof(buf)),
  1989                SyscallSucceedsWithValue(sizeof(buf)));
  1990  
  1991    EXPECT_THAT(ReadlinkWhileRunning("ns/net", buf, sizeof(buf)),
  1992                SyscallSucceedsWithValue(sizeof(buf)));
  1993  
  1994    EXPECT_THAT(ReadlinkWhileRunning("ns/pid", buf, sizeof(buf)),
  1995                SyscallSucceedsWithValue(sizeof(buf)));
  1996  
  1997    EXPECT_THAT(ReadlinkWhileRunning("ns/user", buf, sizeof(buf)),
  1998                SyscallSucceedsWithValue(sizeof(buf)));
  1999  }
  2000  
  2001  TEST(ProcPidSymlink, SubprocessZombied) {
  2002    AutoCapability cap1(CAP_DAC_OVERRIDE, false);
  2003    AutoCapability cap2(CAP_DAC_READ_SEARCH, false);
  2004  
  2005    char buf[1];
  2006  
  2007    int want = EACCES;
  2008    if (!IsRunningOnGvisor()) {
  2009      auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion());
  2010      if (version.major > 4 || (version.major == 4 && version.minor > 3)) {
  2011        want = ENOENT;
  2012      }
  2013    }
  2014  
  2015    EXPECT_THAT(ReadlinkWhileZombied("exe", buf, sizeof(buf)),
  2016                SyscallFailsWithErrno(want));
  2017  
  2018    if (!IsRunningOnGvisor()) {
  2019      EXPECT_THAT(ReadlinkWhileZombied("ns/net", buf, sizeof(buf)),
  2020                  SyscallFailsWithErrno(want));
  2021    }
  2022  
  2023    // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc
  2024    // files.
  2025    //
  2026    // ~4.3: Syscall fails with EACCES.
  2027    // 4.17: Syscall succeeds and returns 1.
  2028    //
  2029    if (!IsRunningOnGvisor()) {
  2030      return;
  2031    }
  2032  
  2033    EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)),
  2034                SyscallFailsWithErrno(want));
  2035  
  2036    EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)),
  2037                SyscallFailsWithErrno(want));
  2038  }
  2039  
  2040  // Test whether /proc/PID/ symlinks can be read for an exited process.
  2041  TEST(ProcPidSymlink, SubprocessExited) {
  2042    char buf[1];
  2043  
  2044    EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)),
  2045                SyscallFailsWithErrno(ESRCH));
  2046  
  2047    EXPECT_THAT(ReadlinkWhileExited("ns/net", buf, sizeof(buf)),
  2048                SyscallFailsWithErrno(ESRCH));
  2049  
  2050    EXPECT_THAT(ReadlinkWhileExited("ns/pid", buf, sizeof(buf)),
  2051                SyscallFailsWithErrno(ESRCH));
  2052  
  2053    EXPECT_THAT(ReadlinkWhileExited("ns/user", buf, sizeof(buf)),
  2054                SyscallFailsWithErrno(ESRCH));
  2055  }
  2056  
  2057  // /proc/PID/exe points to the correct binary.
  2058  TEST(ProcPidExe, Subprocess) {
  2059    auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/exe"));
  2060    auto expected_absolute_path =
  2061        ASSERT_NO_ERRNO_AND_VALUE(MakeAbsolute(link, ""));
  2062  
  2063    char actual[PATH_MAX + 1] = {};
  2064    ASSERT_THAT(ReadlinkWhileRunning("exe", actual, sizeof(actual)),
  2065                SyscallSucceedsWithValue(Gt(0)));
  2066    EXPECT_EQ(actual, expected_absolute_path);
  2067  }
  2068  
  2069  // /proc/PID/cwd points to the correct directory.
  2070  TEST(ProcPidCwd, Subprocess) {
  2071    auto want = ASSERT_NO_ERRNO_AND_VALUE(GetCWD());
  2072  
  2073    char got[PATH_MAX + 1] = {};
  2074    ASSERT_THAT(ReadlinkWhileRunning("cwd", got, sizeof(got)),
  2075                SyscallSucceedsWithValue(Gt(0)));
  2076    EXPECT_EQ(got, want);
  2077  }
  2078  
  2079  // /proc/PID/root points to the correct directory.
  2080  TEST(ProcPidRoot, Subprocess) {
  2081    char got[PATH_MAX + 1] = {};
  2082    ASSERT_THAT(ReadlinkWhileRunning("root", got, sizeof(got)),
  2083                SyscallSucceedsWithValue(Gt(0)));
  2084    EXPECT_STREQ(got, "/");
  2085  }
  2086  
  2087  // Test whether /proc/PID/ files can be read for a running process.
  2088  TEST(ProcPidFile, SubprocessRunning) {
  2089    char buf[1];
  2090  
  2091    EXPECT_THAT(ReadWhileRunning("auxv", buf, sizeof(buf)),
  2092                SyscallSucceedsWithValue(sizeof(buf)));
  2093  
  2094    EXPECT_THAT(ReadWhileRunning("cmdline", buf, sizeof(buf)),
  2095                SyscallSucceedsWithValue(sizeof(buf)));
  2096  
  2097    EXPECT_THAT(ReadWhileRunning("comm", buf, sizeof(buf)),
  2098                SyscallSucceedsWithValue(sizeof(buf)));
  2099  
  2100    EXPECT_THAT(ReadWhileRunning("gid_map", buf, sizeof(buf)),
  2101                SyscallSucceedsWithValue(sizeof(buf)));
  2102  
  2103    EXPECT_THAT(ReadWhileRunning("io", buf, sizeof(buf)),
  2104                SyscallSucceedsWithValue(sizeof(buf)));
  2105  
  2106    EXPECT_THAT(ReadWhileRunning("maps", buf, sizeof(buf)),
  2107                SyscallSucceedsWithValue(sizeof(buf)));
  2108  
  2109    EXPECT_THAT(ReadWhileRunning("stat", buf, sizeof(buf)),
  2110                SyscallSucceedsWithValue(sizeof(buf)));
  2111  
  2112    EXPECT_THAT(ReadWhileRunning("status", buf, sizeof(buf)),
  2113                SyscallSucceedsWithValue(sizeof(buf)));
  2114  
  2115    EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)),
  2116                SyscallSucceedsWithValue(sizeof(buf)));
  2117  
  2118    EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)),
  2119                SyscallSucceedsWithValue(sizeof(buf)));
  2120  
  2121    EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)),
  2122                SyscallSucceedsWithValue(sizeof(buf)));
  2123  }
  2124  
  2125  // Test whether /proc/PID/ files can be read for a zombie process.
  2126  TEST(ProcPidFile, SubprocessZombie) {
  2127    char buf[1];
  2128  
  2129    // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent
  2130    // behavior on different kernels.
  2131    //
  2132    // ~4.3: Succeds and returns 0.
  2133    // 4.17: Succeeds and returns 1.
  2134    // gVisor: Succeeds and returns 0.
  2135    EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds());
  2136  
  2137    EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)),
  2138                SyscallSucceedsWithValue(0));
  2139  
  2140    EXPECT_THAT(ReadWhileZombied("comm", buf, sizeof(buf)),
  2141                SyscallSucceedsWithValue(sizeof(buf)));
  2142  
  2143    EXPECT_THAT(ReadWhileZombied("gid_map", buf, sizeof(buf)),
  2144                SyscallSucceedsWithValue(sizeof(buf)));
  2145  
  2146    EXPECT_THAT(ReadWhileZombied("maps", buf, sizeof(buf)),
  2147                SyscallSucceedsWithValue(0));
  2148  
  2149    EXPECT_THAT(ReadWhileZombied("stat", buf, sizeof(buf)),
  2150                SyscallSucceedsWithValue(sizeof(buf)));
  2151  
  2152    EXPECT_THAT(ReadWhileZombied("status", buf, sizeof(buf)),
  2153                SyscallSucceedsWithValue(sizeof(buf)));
  2154  
  2155    EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)),
  2156                SyscallSucceedsWithValue(sizeof(buf)));
  2157  
  2158    EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)),
  2159                SyscallSucceedsWithValue(sizeof(buf)));
  2160  
  2161    EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)),
  2162                SyscallSucceedsWithValue(sizeof(buf)));
  2163  
  2164    // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux
  2165    // on proc files.
  2166    //
  2167    // ~4.3: Fails and returns EACCES.
  2168    // gVisor & 4.17: Succeeds and returns 1.
  2169    //
  2170    // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)),
  2171    //          SyscallFailsWithErrno(EACCES));
  2172  }
  2173  
  2174  // Test whether /proc/PID/ files can be read for an exited process.
  2175  TEST(ProcPidFile, SubprocessExited) {
  2176    char buf[1];
  2177  
  2178    // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels.
  2179    //
  2180    // ~4.3: Fails and returns ESRCH.
  2181    // gVisor: Fails with ESRCH.
  2182    // 4.17: Succeeds and returns 1.
  2183    //
  2184    // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)),
  2185    //            SyscallFailsWithErrno(ESRCH));
  2186  
  2187    EXPECT_THAT(ReadWhileExited("cmdline", buf, sizeof(buf)),
  2188                SyscallFailsWithErrno(ESRCH));
  2189  
  2190    if (!IsRunningOnGvisor()) {
  2191      // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
  2192      EXPECT_THAT(ReadWhileExited("comm", buf, sizeof(buf)),
  2193                  SyscallFailsWithErrno(ESRCH));
  2194    }
  2195  
  2196    EXPECT_THAT(ReadWhileExited("gid_map", buf, sizeof(buf)),
  2197                SyscallSucceedsWithValue(sizeof(buf)));
  2198  
  2199    if (!IsRunningOnGvisor()) {
  2200      // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
  2201      EXPECT_THAT(ReadWhileExited("io", buf, sizeof(buf)),
  2202                  SyscallFailsWithErrno(ESRCH));
  2203    }
  2204  
  2205    if (!IsRunningOnGvisor()) {
  2206      // FIXME(gvisor.dev/issue/164): Returns EOF on gVisor.
  2207      EXPECT_THAT(ReadWhileExited("maps", buf, sizeof(buf)),
  2208                  SyscallFailsWithErrno(ESRCH));
  2209    }
  2210  
  2211    if (!IsRunningOnGvisor()) {
  2212      // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
  2213      EXPECT_THAT(ReadWhileExited("stat", buf, sizeof(buf)),
  2214                  SyscallFailsWithErrno(ESRCH));
  2215    }
  2216  
  2217    if (!IsRunningOnGvisor()) {
  2218      // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
  2219      EXPECT_THAT(ReadWhileExited("status", buf, sizeof(buf)),
  2220                  SyscallFailsWithErrno(ESRCH));
  2221    }
  2222  
  2223    EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)),
  2224                SyscallSucceedsWithValue(sizeof(buf)));
  2225  
  2226    if (!IsRunningOnGvisor()) {
  2227      // FIXME(gvisor.dev/issue/164): Succeeds on gVisor.
  2228      EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)),
  2229                  SyscallFailsWithErrno(ESRCH));
  2230    }
  2231  
  2232    EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)),
  2233                SyscallFailsWithErrno(ESRCH));
  2234  }
  2235  
  2236  PosixError DirContains(absl::string_view path,
  2237                         const std::vector<std::string>& expect,
  2238                         const std::vector<std::string>& exclude) {
  2239    ASSIGN_OR_RETURN_ERRNO(auto listing, ListDir(path, false));
  2240  
  2241    for (auto& expected_entry : expect) {
  2242      auto cursor = std::find(listing.begin(), listing.end(), expected_entry);
  2243      if (cursor == listing.end()) {
  2244        return PosixError(
  2245            ENOENT,
  2246            absl::StrCat("Failed to find one or more paths in '", path, "'"));
  2247      }
  2248    }
  2249    for (auto& excluded_entry : exclude) {
  2250      auto cursor = std::find(listing.begin(), listing.end(), excluded_entry);
  2251      if (cursor != listing.end()) {
  2252        return PosixError(ENOENT, absl::StrCat("File '", excluded_entry,
  2253                                               "' found in path '", path, "'"));
  2254      }
  2255    }
  2256    return NoError();
  2257  }
  2258  
  2259  PosixError EventuallyDirContains(absl::string_view path,
  2260                                   const std::vector<std::string>& expect,
  2261                                   const std::vector<std::string>& exclude) {
  2262    constexpr int kRetryCount = 100;
  2263    const absl::Duration kRetryDelay = absl::Milliseconds(100);
  2264  
  2265    for (int i = 0; i < kRetryCount; ++i) {
  2266      auto res = DirContains(path, expect, exclude);
  2267      if (res.ok()) {
  2268        return res;
  2269      } else if (i < kRetryCount - 1) {
  2270        // Sleep if this isn't the final iteration.
  2271        absl::SleepFor(kRetryDelay);
  2272      }
  2273    }
  2274    return PosixError(ETIMEDOUT,
  2275                      "Timed out while waiting for directory to contain files ");
  2276  }
  2277  
  2278  std::vector<std::string> TaskFiles(const std::vector<pid_t>& pids) {
  2279    return ApplyVec<std::string>([](const pid_t p) { return absl::StrCat(p); },
  2280                                 pids);
  2281  }
  2282  
  2283  TEST(ProcTask, Basic) {
  2284    EXPECT_NO_ERRNO(
  2285        DirContains("/proc/self/task", {".", "..", absl::StrCat(getpid())}, {}));
  2286  }
  2287  
  2288  // Helper class for creating a new task in the current thread group.
  2289  class BlockingChild {
  2290   public:
  2291    BlockingChild() : thread_([=] { Start(); }) {}
  2292    ~BlockingChild() { Join(); }
  2293  
  2294    pid_t Tid() const {
  2295      absl::MutexLock ml(&mu_);
  2296      mu_.Await(absl::Condition(&tid_ready_));
  2297      return tid_;
  2298    }
  2299  
  2300    void Join() {
  2301      {
  2302        absl::MutexLock ml(&mu_);
  2303        stop_ = true;
  2304      }
  2305      thread_.Join();
  2306    }
  2307  
  2308   private:
  2309    void Start() {
  2310      absl::MutexLock ml(&mu_);
  2311      tid_ = syscall(__NR_gettid);
  2312      tid_ready_ = true;
  2313      mu_.Await(absl::Condition(&stop_));
  2314    }
  2315  
  2316    mutable absl::Mutex mu_;
  2317    bool stop_ ABSL_GUARDED_BY(mu_) = false;
  2318    pid_t tid_;
  2319    bool tid_ready_ ABSL_GUARDED_BY(mu_) = false;
  2320  
  2321    // Must be last to ensure that the destructor for the thread is run before
  2322    // any other member of the object is destroyed.
  2323    ScopedThread thread_;
  2324  };
  2325  
  2326  TEST(ProcTask, NewThreadAppears) {
  2327    BlockingChild child1;
  2328    EXPECT_NO_ERRNO(
  2329        DirContains("/proc/self/task", TaskFiles({child1.Tid()}), {}));
  2330  }
  2331  
  2332  TEST(ProcTask, KilledThreadsDisappear) {
  2333    BlockingChild child1;
  2334    EXPECT_NO_ERRNO(
  2335        DirContains("/proc/self/task", TaskFiles({child1.Tid()}), {}));
  2336  
  2337    // Stat child1's task file. Regression test for b/32097707.
  2338    struct stat statbuf;
  2339    const std::string child1_task_file =
  2340        absl::StrCat("/proc/self/task/", child1.Tid());
  2341    EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallSucceeds());
  2342  
  2343    BlockingChild child2;
  2344    EXPECT_NO_ERRNO(DirContains("/proc/self/task",
  2345                                TaskFiles({child1.Tid(), child2.Tid()}), {}));
  2346  
  2347    BlockingChild child3;
  2348    BlockingChild child4;
  2349    BlockingChild child5;
  2350    EXPECT_NO_ERRNO(
  2351        DirContains("/proc/self/task",
  2352                    TaskFiles({child1.Tid(), child2.Tid(), child3.Tid(),
  2353                               child4.Tid(), child5.Tid()}),
  2354                    {}));
  2355  
  2356    child2.Join();
  2357    EXPECT_NO_ERRNO(EventuallyDirContains(
  2358        "/proc/self/task",
  2359        TaskFiles({child1.Tid(), child3.Tid(), child4.Tid(), child5.Tid()}),
  2360        TaskFiles({child2.Tid()})));
  2361  
  2362    child1.Join();
  2363    child4.Join();
  2364    EXPECT_NO_ERRNO(EventuallyDirContains(
  2365        "/proc/self/task", TaskFiles({child3.Tid(), child5.Tid()}),
  2366        TaskFiles({child2.Tid(), child1.Tid(), child4.Tid()})));
  2367  
  2368    // Stat child1's task file again.  This time it should fail. See b/32097707.
  2369    EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf),
  2370                SyscallFailsWithErrno(ENOENT));
  2371  
  2372    child3.Join();
  2373    child5.Join();
  2374    EXPECT_NO_ERRNO(
  2375        EventuallyDirContains("/proc/self/task", {},
  2376                              TaskFiles({child2.Tid(), child1.Tid(), child4.Tid(),
  2377                                         child3.Tid(), child5.Tid()})));
  2378  }
  2379  
  2380  TEST(ProcTask, ChildTaskDir) {
  2381    BlockingChild child1;
  2382    EXPECT_NO_ERRNO(
  2383        DirContains("/proc/self/task", TaskFiles({child1.Tid()}), {}));
  2384    EXPECT_NO_ERRNO(DirContains(absl::StrCat("/proc/", child1.Tid(), "/task"),
  2385                                TaskFiles({child1.Tid()}), {}));
  2386  }
  2387  
  2388  PosixError VerifyPidDir(std::string path) {
  2389    return DirContains(path, {"exe", "fd", "io", "maps", "ns", "stat", "status"},
  2390                       {});
  2391  }
  2392  
  2393  TEST(ProcTask, VerifyTaskDir) {
  2394    EXPECT_NO_ERRNO(VerifyPidDir("/proc/self"));
  2395  
  2396    EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", getpid())));
  2397    BlockingChild child1;
  2398    EXPECT_NO_ERRNO(VerifyPidDir(absl::StrCat("/proc/self/task/", child1.Tid())));
  2399  
  2400    // Only the first level of task directories should contain the 'task'
  2401    // directory. That is:
  2402    //
  2403    // /proc/1234/task           <- should exist
  2404    // /proc/1234/task/1234/task <- should not exist
  2405    // /proc/1234/task/1235/task <- should not exist (where 1235 is in the same
  2406    //                                                thread group as 1234).
  2407    EXPECT_NO_ERRNO(
  2408        DirContains(absl::StrCat("/proc/self/task/", getpid()), {}, {"task"}));
  2409  }
  2410  
  2411  TEST(ProcTask, VerifyTaskChildren) {
  2412    auto path = JoinPath("/proc", absl::StrCat(getpid()), "task",
  2413                         absl::StrCat(gettid()), "children");
  2414    EXPECT_THAT(access(path.c_str(), F_OK), SyscallSucceeds());
  2415  
  2416    int pid1 = -1, status1 = -1;
  2417    auto cleanup1 =
  2418        ForkAndExec("/bin/sleep", {"sleep", "100"}, {}, nullptr, &pid1, &status1);
  2419    ASSERT_GT(pid1, 0);
  2420    ASSERT_EQ(status1, 0);
  2421  
  2422    auto proc_children_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path));
  2423    EXPECT_EQ(absl::StrCat(pid1, " "), proc_children_file);
  2424  
  2425    int pid2 = -1, status2 = -1;
  2426    auto cleanup2 =
  2427        ForkAndExec("/bin/sleep", {"sleep", "100"}, {}, nullptr, &pid2, &status2);
  2428    ASSERT_GT(pid2, 0);
  2429    ASSERT_EQ(status2, 0);
  2430  
  2431    proc_children_file = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path));
  2432  
  2433    // /children contains space-separated sorted list of thread Ids of children.
  2434    std::string expectedContent;
  2435    if (pid1 < pid2) {
  2436      expectedContent = absl::StrCat(pid1, " ", pid2, " ");
  2437    } else {
  2438      expectedContent = absl::StrCat(pid2, " ", pid1, " ");
  2439    }
  2440    EXPECT_EQ(expectedContent, proc_children_file);
  2441  }
  2442  
  2443  TEST(ProcTask, TaskDirCannotBeDeleted) {
  2444    // Drop capabilities that allow us to override file and directory permissions.
  2445    AutoCapability cap(CAP_DAC_OVERRIDE, false);
  2446  
  2447    EXPECT_THAT(rmdir("/proc/self/task"), SyscallFails());
  2448    EXPECT_THAT(rmdir(absl::StrCat("/proc/self/task/", getpid()).c_str()),
  2449                SyscallFailsWithErrno(EACCES));
  2450  }
  2451  
  2452  TEST(ProcTask, TaskDirHasCorrectMetadata) {
  2453    struct stat st;
  2454    EXPECT_THAT(stat("/proc/self/task", &st), SyscallSucceeds());
  2455    EXPECT_TRUE(S_ISDIR(st.st_mode));
  2456  
  2457    // Verify file is readable and executable by everyone.
  2458    mode_t expected_permissions =
  2459        S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
  2460    mode_t permissions = st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
  2461    EXPECT_EQ(expected_permissions, permissions);
  2462  }
  2463  
  2464  TEST(ProcTask, TaskDirCanSeekToEnd) {
  2465    const FileDescriptor dirfd =
  2466        ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/task", O_RDONLY));
  2467    EXPECT_THAT(lseek(dirfd.get(), 0, SEEK_END), SyscallSucceeds());
  2468  }
  2469  
  2470  TEST(ProcTask, VerifyTaskDirNlinks) {
  2471    const auto fn = [] {
  2472      // A task directory will have 3 links if the taskgroup has a single
  2473      // thread. For example, the following shows where the links to
  2474      // '/proc/12345/task' comes from for a single threaded process with pid
  2475      // 12345:
  2476      //
  2477      //   /proc/12345/task  <-- 1 link for the directory itself
  2478      //     .               <-- link from "."
  2479      //     ..
  2480      //     12345
  2481      //       .
  2482      //       ..            <-- link from ".." to parent.
  2483      //       <other contents of a task dir>
  2484      //
  2485      // We can't assert an absolute number of links since we don't control how
  2486      // many threads the test framework spawns. Instead, we'll ensure creating a
  2487      // new thread increases the number of links as expected.
  2488  
  2489      // Once we reach the test body, we can count on the thread count being
  2490      // stable unless we spawn a new one.
  2491      const uint64_t initial_links =
  2492          TEST_CHECK_NO_ERRNO_AND_VALUE(Links("/proc/self/task"));
  2493      TEST_CHECK(initial_links >= 3);
  2494  
  2495      // For each new subtask, we should gain a new link.
  2496      BlockingChild child1;
  2497      uint64_t links = TEST_CHECK_NO_ERRNO_AND_VALUE(Links("/proc/self/task"));
  2498      TEST_CHECK(links == initial_links + 1);
  2499  
  2500      BlockingChild child2;
  2501      links = TEST_CHECK_NO_ERRNO_AND_VALUE(Links("/proc/self/task"));
  2502      TEST_CHECK(links == initial_links + 2);
  2503    };
  2504    // Run as a forked process to prevent terminating tasks from other tests to
  2505    // show up here and race with the count.
  2506    EXPECT_THAT(InForkedProcess(fn), IsPosixErrorOkAndHolds(0));
  2507  }
  2508  
  2509  TEST(ProcTask, CommContainsThreadNameAndTrailingNewline) {
  2510    constexpr char kThreadName[] = "TestThread12345";
  2511    ASSERT_THAT(prctl(PR_SET_NAME, kThreadName), SyscallSucceeds());
  2512  
  2513    auto thread_name = ASSERT_NO_ERRNO_AND_VALUE(
  2514        GetContents(JoinPath("/proc", absl::StrCat(getpid()), "task",
  2515                             absl::StrCat(syscall(SYS_gettid)), "comm")));
  2516    EXPECT_EQ(absl::StrCat(kThreadName, "\n"), thread_name);
  2517  }
  2518  
  2519  TEST(ProcTask, CommCanSetSelfThreadName) {
  2520    auto path = JoinPath("/proc", absl::StrCat(getpid()), "task",
  2521                         absl::StrCat(syscall(SYS_gettid)), "comm");
  2522    constexpr char kThreadName[] = "TestThread12345";
  2523    ASSERT_NO_ERRNO(SetContents(path, kThreadName));
  2524  
  2525    auto got_thread_name = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path));
  2526    EXPECT_EQ(absl::StrCat(kThreadName, "\n"), got_thread_name);
  2527  }
  2528  
  2529  TEST(ProcTask, CommCanSetPeerThreadName) {
  2530    constexpr char kThreadName[] = "TestThread12345";
  2531  
  2532    // Path correspond to *this* thread's tid. We will changed it from the new
  2533    // thread created below.
  2534    auto path = JoinPath("/proc", absl::StrCat(getpid()), "task",
  2535                         absl::StrCat(syscall(SYS_gettid)), "comm");
  2536  
  2537    // Start a thread that will set this parent threads name.
  2538    ScopedThread peer_thread(
  2539        [&]() { ASSERT_NO_ERRNO(SetContents(path, kThreadName)); });
  2540  
  2541    peer_thread.Join();
  2542  
  2543    // Our thread name should have been updated.
  2544    auto got_thread_name = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path));
  2545    EXPECT_EQ(absl::StrCat(kThreadName, "\n"), got_thread_name);
  2546  }
  2547  
  2548  TEST(ProcTask, CommCannotSetAnotherProcessThreadName) {
  2549    // Path correspond to *this* thread's pid and tid.
  2550    auto path = JoinPath("/proc", absl::StrCat(getpid()), "task",
  2551                         absl::StrCat(syscall(SYS_gettid)), "comm");
  2552  
  2553    auto rest = [&] {
  2554      // New process is allowed to open the file, even for writing, since the
  2555      // owning user is the same.
  2556      int fd;
  2557      TEST_CHECK_SUCCESS(fd = open(path.c_str(), O_WRONLY));
  2558  
  2559      // Write gets EINVAL since the thread group is different. See Linux
  2560      // fs/proc/base.c:comm_write.
  2561      TEST_CHECK_ERRNO(write(fd, "x", 1), EINVAL);
  2562    };
  2563  
  2564    EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
  2565  }
  2566  
  2567  TEST(ProcTask, CommLenLimited) {
  2568    auto path = JoinPath("/proc", absl::StrCat(getpid()), "task",
  2569                         absl::StrCat(syscall(SYS_gettid)), "comm");
  2570    // comm is limited by 15 symbols (TASK_COMM_LEN).
  2571    constexpr char kThreadName[] = "0123456789abcde";
  2572    ASSERT_NO_ERRNO(SetContents(path, absl::StrCat(kThreadName, "XYZ")));
  2573  
  2574    auto got_thread_name = ASSERT_NO_ERRNO_AND_VALUE(GetContents(path));
  2575    EXPECT_EQ(absl::StrCat(kThreadName, "\n"), got_thread_name);
  2576  }
  2577  
  2578  TEST(ProcTaskNs, NsDirExistsAndHasCorrectMetadata) {
  2579    EXPECT_NO_ERRNO(DirContains("/proc/self/ns", {"net", "pid", "user"}, {}));
  2580  
  2581    // Let's just test the 'pid' entry, all of them are very similar.
  2582    struct stat st;
  2583    EXPECT_THAT(lstat("/proc/self/ns/pid", &st), SyscallSucceeds());
  2584    EXPECT_TRUE(S_ISLNK(st.st_mode));
  2585  
  2586    auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/self/ns/pid"));
  2587    EXPECT_THAT(link, ::testing::StartsWith("pid:["));
  2588  }
  2589  
  2590  TEST(ProcTaskNs, AccessOnNsNodeSucceeds) {
  2591    EXPECT_THAT(access("/proc/self/ns/pid", F_OK), SyscallSucceeds());
  2592  }
  2593  
  2594  TEST(ProcSysKernelHostname, Exists) {
  2595    EXPECT_THAT(open("/proc/sys/kernel/hostname", O_RDONLY), SyscallSucceeds());
  2596  }
  2597  
  2598  TEST(ProcSysKernelHostname, MatchesUname) {
  2599    struct utsname buf;
  2600    EXPECT_THAT(uname(&buf), SyscallSucceeds());
  2601    const std::string hostname = absl::StrCat(buf.nodename, "\n");
  2602    auto procfs_hostname =
  2603        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/hostname"));
  2604    EXPECT_EQ(procfs_hostname, hostname);
  2605  }
  2606  
  2607  TEST(ProcSysVmMaxmapCount, HasNumericValue) {
  2608    const std::string val_str =
  2609        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/max_map_count"));
  2610    int32_t val;
  2611    EXPECT_TRUE(absl::SimpleAtoi(val_str, &val))
  2612        << "/proc/sys/vm/max_map_count does not contain a numeric value: "
  2613        << val_str;
  2614  }
  2615  
  2616  TEST(ProcSysVmMmapMinAddr, HasNumericValue) {
  2617    const std::string mmap_min_addr_str =
  2618        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/mmap_min_addr"));
  2619    uintptr_t mmap_min_addr;
  2620    EXPECT_TRUE(absl::SimpleAtoi(mmap_min_addr_str, &mmap_min_addr))
  2621        << "/proc/sys/vm/mmap_min_addr does not contain a numeric value: "
  2622        << mmap_min_addr_str;
  2623  }
  2624  
  2625  TEST(ProcSysVmOvercommitMemory, HasNumericValue) {
  2626    const std::string overcommit_memory_str =
  2627        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/vm/overcommit_memory"));
  2628    uintptr_t overcommit_memory;
  2629    EXPECT_TRUE(absl::SimpleAtoi(overcommit_memory_str, &overcommit_memory))
  2630        << "/proc/sys/vm/overcommit_memory does not contain a numeric value: "
  2631        << overcommit_memory;
  2632  }
  2633  
  2634  // Check that link for proc fd entries point the target node, not the
  2635  // symlink itself. Regression test for b/31155070.
  2636  TEST(ProcTaskFd, FstatatFollowsSymlink) {
  2637    const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  2638    const FileDescriptor fd =
  2639        ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDONLY));
  2640  
  2641    struct stat sproc = {};
  2642    EXPECT_THAT(
  2643        fstatat(-1, absl::StrCat("/proc/self/fd/", fd.get()).c_str(), &sproc, 0),
  2644        SyscallSucceeds());
  2645  
  2646    struct stat sfile = {};
  2647    EXPECT_THAT(fstatat(-1, file.path().c_str(), &sfile, 0), SyscallSucceeds());
  2648  
  2649    // If fstatat follows the fd symlink, the device and inode numbers should
  2650    // match at a minimum.
  2651    EXPECT_EQ(sproc.st_dev, sfile.st_dev);
  2652    EXPECT_EQ(sproc.st_ino, sfile.st_ino);
  2653    EXPECT_EQ(0, memcmp(&sfile, &sproc, sizeof(sfile)));
  2654  }
  2655  
  2656  TEST(ProcFilesystems, Bug65172365) {
  2657    std::string proc_filesystems =
  2658        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/filesystems"));
  2659    ASSERT_FALSE(proc_filesystems.empty());
  2660  }
  2661  
  2662  // Check that /proc/mounts is a symlink to self/mounts.
  2663  TEST(ProcMounts, IsSymlink) {
  2664    auto link = ASSERT_NO_ERRNO_AND_VALUE(ReadLink("/proc/mounts"));
  2665    EXPECT_EQ(link, "self/mounts");
  2666  }
  2667  
  2668  TEST(ProcSelfMountinfo, RequiredFieldsArePresent) {
  2669    auto mountinfo =
  2670        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo"));
  2671    EXPECT_THAT(
  2672        mountinfo,
  2673        AllOf(
  2674            // Root mount.
  2675            ContainsRegex(
  2676                R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ /\S* / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"),
  2677            // Proc mount - always rw.
  2678            ContainsRegex(
  2679                R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)")));
  2680  }
  2681  
  2682  TEST(ProcSelfMountinfo, ContainsProcfsEntry) {
  2683    const std::vector<ProcMountInfoEntry> entries =
  2684        ASSERT_NO_ERRNO_AND_VALUE(ProcSelfMountInfoEntries());
  2685    bool found = false;
  2686    for (const auto& e : entries) {
  2687      if (e.fstype == "proc") {
  2688        found = true;
  2689        break;
  2690      }
  2691    }
  2692    EXPECT_TRUE(found);
  2693  }
  2694  
  2695  // Check that /proc/self/mounts looks something like a real mounts file.
  2696  TEST(ProcSelfMounts, RequiredFieldsArePresent) {
  2697    auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts"));
  2698    EXPECT_THAT(mounts,
  2699                AllOf(
  2700                    // Root mount.
  2701                    ContainsRegex(R"(\S+ / \S+ (rw|ro)\S* [0-9]+ [0-9]+\s)"),
  2702                    // Root mount.
  2703                    ContainsRegex(R"(\S+ /proc \S+ rw\S* [0-9]+ [0-9]+\s)")));
  2704  }
  2705  
  2706  TEST(ProcSelfMounts, ContainsProcfsEntry) {
  2707    const std::vector<ProcMountsEntry> entries =
  2708        ASSERT_NO_ERRNO_AND_VALUE(ProcSelfMountsEntries());
  2709    bool found = false;
  2710    for (const auto& e : entries) {
  2711      if (e.fstype == "proc") {
  2712        found = true;
  2713        break;
  2714      }
  2715    }
  2716    EXPECT_TRUE(found);
  2717  }
  2718  
  2719  void CheckDuplicatesRecursively(std::string path) {
  2720    std::vector<std::string> child_dirs;
  2721  
  2722    // There is the known issue of the linux procfs, that two consequent calls of
  2723    // readdir can return the same entry twice if between these calls one or more
  2724    // entries have been removed from this directory.
  2725    int max_attempts = 5;
  2726    for (int i = 0; i < max_attempts; i++) {
  2727      child_dirs.clear();
  2728      errno = 0;
  2729      bool success = true;
  2730      DIR* dir = opendir(path.c_str());
  2731      if (dir == nullptr) {
  2732        // Ignore any directories we can't read or missing directories as the
  2733        // directory could have been deleted/mutated from the time the parent
  2734        // directory contents were read.
  2735        return;
  2736      }
  2737      auto dir_closer = Cleanup([&dir]() { closedir(dir); });
  2738      absl::flat_hash_set<std::string> children;
  2739      while (true) {
  2740        // Readdir(3): If the end of the directory stream is reached, NULL is
  2741        // returned and errno is not changed.  If an error occurs, NULL is
  2742        // returned and errno is set appropriately.  To distinguish end of stream
  2743        // and from an error, set errno to zero before calling readdir() and then
  2744        // check the value of errno if NULL is returned.
  2745        errno = 0;
  2746        struct dirent* dp = readdir(dir);
  2747        if (dp == nullptr) {
  2748          // Linux will return EINVAL when calling getdents on a /proc/tid/net
  2749          // file corresponding to a zombie task.
  2750          // See fs/proc/proc_net.c:proc_tgid_net_readdir().
  2751          //
  2752          // We just ignore the directory in this case.
  2753          if (errno == EINVAL && absl::StartsWith(path, "/proc/") &&
  2754              absl::EndsWith(path, "/net")) {
  2755            break;
  2756          }
  2757          // We may also see permission failures traversing some files.
  2758          if (errno == EACCES && absl::StartsWith(path, "/proc/")) {
  2759            break;
  2760          }
  2761  
  2762          // Otherwise, no errors are allowed.
  2763          ASSERT_EQ(errno, 0) << path;
  2764          break;  // We're done.
  2765        }
  2766  
  2767        const std::string name = dp->d_name;
  2768  
  2769        if (name == "." || name == "..") {
  2770          continue;
  2771        }
  2772  
  2773        // Ignore a duplicate entry if it isn't the last attempt.
  2774        if (i == max_attempts - 1) {
  2775          ASSERT_EQ(children.find(name), children.end())
  2776              << absl::StrCat(path, "/", name);
  2777        } else if (children.find(name) != children.end()) {
  2778          std::cerr << "Duplicate entry: " << i << ":"
  2779                    << absl::StrCat(path, "/", name) << std::endl;
  2780          success = false;
  2781          break;
  2782        }
  2783        children.insert(name);
  2784  
  2785        if (dp->d_type == DT_DIR) {
  2786          child_dirs.push_back(name);
  2787        }
  2788      }
  2789      if (success) {
  2790        break;
  2791      }
  2792    }
  2793    for (auto dname = child_dirs.begin(); dname != child_dirs.end(); dname++) {
  2794      CheckDuplicatesRecursively(absl::StrCat(path, "/", *dname));
  2795    }
  2796  }
  2797  
  2798  TEST(Proc, NoDuplicates) { CheckDuplicatesRecursively("/proc"); }
  2799  
  2800  // Most /proc/PID files are owned by the task user with SUID_DUMP_USER.
  2801  TEST(ProcPid, UserDumpableOwner) {
  2802    int before;
  2803    ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds());
  2804    auto cleanup = Cleanup([before] {
  2805      ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds());
  2806    });
  2807  
  2808    EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_USER), SyscallSucceeds());
  2809  
  2810    // This applies to the task directory itself and files inside.
  2811    struct stat st;
  2812    ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds());
  2813    EXPECT_EQ(st.st_uid, geteuid());
  2814    EXPECT_EQ(st.st_gid, getegid());
  2815  
  2816    ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds());
  2817    EXPECT_EQ(st.st_uid, geteuid());
  2818    EXPECT_EQ(st.st_gid, getegid());
  2819  }
  2820  
  2821  // /proc/PID files are owned by root with SUID_DUMP_DISABLE.
  2822  TEST(ProcPid, RootDumpableOwner) {
  2823    int before;
  2824    ASSERT_THAT(before = prctl(PR_GET_DUMPABLE), SyscallSucceeds());
  2825    auto cleanup = Cleanup([before] {
  2826      ASSERT_THAT(prctl(PR_SET_DUMPABLE, before), SyscallSucceeds());
  2827    });
  2828  
  2829    EXPECT_THAT(prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE), SyscallSucceeds());
  2830  
  2831    // This *does not* applies to the task directory itself (or other 0555
  2832    // directories), but does to files inside.
  2833    struct stat st;
  2834    ASSERT_THAT(stat("/proc/self/", &st), SyscallSucceeds());
  2835    EXPECT_EQ(st.st_uid, geteuid());
  2836    EXPECT_EQ(st.st_gid, getegid());
  2837  
  2838    // This file is owned by root. Also allow nobody in case this test is running
  2839    // in a userns without root mapped.
  2840    ASSERT_THAT(stat("/proc/self/stat", &st), SyscallSucceeds());
  2841    EXPECT_THAT(st.st_uid, AnyOf(Eq(0), Eq(65534)));
  2842    EXPECT_THAT(st.st_gid, AnyOf(Eq(0), Eq(65534)));
  2843  }
  2844  
  2845  TEST(Proc, GetdentsEnoent) {
  2846    FileDescriptor fd;
  2847    ASSERT_NO_ERRNO(WithSubprocess(
  2848        [&](int pid) -> PosixError {
  2849          // Running.
  2850          ASSIGN_OR_RETURN_ERRNO(fd, Open(absl::StrCat("/proc/", pid, "/task"),
  2851                                          O_RDONLY | O_DIRECTORY));
  2852  
  2853          return NoError();
  2854        },
  2855        nullptr, nullptr));
  2856    char buf[1024];
  2857    ASSERT_THAT(syscall(SYS_getdents64, fd.get(), buf, sizeof(buf)),
  2858                SyscallFailsWithErrno(ENOENT));
  2859  }
  2860  
  2861  void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) {
  2862    std::string output;
  2863    ASSERT_NO_ERRNO(GetContents(path, &output));
  2864    ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n")));
  2865  }
  2866  
  2867  // Checks that there is variable accounting of IO between threads/tasks.
  2868  TEST(Proc, PidTidIOAccounting) {
  2869    absl::Notification notification;
  2870  
  2871    // Run a thread with a bunch of writes. Check that io account records exactly
  2872    // the number of write calls. File open/close is there to prevent buffering.
  2873    ScopedThread writer([&notification] {
  2874      const int num_writes = 100;
  2875      for (int i = 0; i < num_writes; i++) {
  2876        auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
  2877        ASSERT_NO_ERRNO(SetContents(path.path(), "a"));
  2878      }
  2879      notification.Notify();
  2880      const std::string& writer_dir =
  2881          absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io");
  2882  
  2883      CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes));
  2884    });
  2885  
  2886    // Run a thread and do no writes. Check that no writes are recorded.
  2887    ScopedThread noop([&notification] {
  2888      notification.WaitForNotification();
  2889      const std::string& noop_dir =
  2890          absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io");
  2891  
  2892      CheckSyscwFromIOFile(noop_dir, "0");
  2893    });
  2894  
  2895    writer.Join();
  2896    noop.Join();
  2897  }
  2898  
  2899  TEST(Proc, Statfs) {
  2900    struct statfs st;
  2901    EXPECT_THAT(statfs("/proc", &st), SyscallSucceeds());
  2902    EXPECT_EQ(st.f_type, PROC_SUPER_MAGIC);
  2903    EXPECT_EQ(st.f_bsize, getpagesize());
  2904    EXPECT_EQ(st.f_namelen, NAME_MAX);
  2905  }
  2906  
  2907  // Tests that /proc/[pid]/fd/[num] can resolve to a path inside /proc.
  2908  TEST(Proc, ResolveSymlinkToProc) {
  2909    const auto proc = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/cmdline", 0));
  2910    const auto path = JoinPath("/proc/self/fd/", absl::StrCat(proc.get()));
  2911    const auto target = ASSERT_NO_ERRNO_AND_VALUE(ReadLink(path));
  2912    EXPECT_EQ(target, JoinPath("/proc/", absl::StrCat(getpid()), "/cmdline"));
  2913  }
  2914  
  2915  // NOTE(b/236035339): Tests that opening /proc/[pid]/fd/[eventFDNum] with
  2916  // O_DIRECTORY leads to ENOTDIR.
  2917  TEST(Proc, RegressionTestB236035339) {
  2918    FileDescriptor efd =
  2919        ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_CLOEXEC));
  2920    const auto path = JoinPath("/proc/self/fd/", absl::StrCat(efd.get()));
  2921    EXPECT_THAT(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_DIRECTORY),
  2922                SyscallFailsWithErrno(ENOTDIR));
  2923  }
  2924  
  2925  // NOTE(b/338393279): Tests that after execve() from a non-leader thread
  2926  // changes which thread owns the thread group ID, the new thread group leader
  2927  // can access its /proc/self.
  2928  TEST(Proc, PidReuse) {
  2929    const ExecveArray owned_child_argv = {"/proc/self/exe",
  2930                                          "--proc_pid_reuse_child"};
  2931    char* const* const child_argv = owned_child_argv.get();
  2932  
  2933    const auto rest = [child_argv] {
  2934      struct stat statbuf;
  2935      TEST_PCHECK(stat("/proc/self/cwd", &statbuf) == 0);
  2936  
  2937      ScopedThread([child_argv] {
  2938        execve(child_argv[0], child_argv, /* envp = */ nullptr);
  2939        TEST_PCHECK_MSG(false, "Survived execve to test child");
  2940      });
  2941    };
  2942    EXPECT_THAT(InForkedProcess(rest), IsPosixErrorOkAndHolds(0));
  2943  }
  2944  
  2945  [[noreturn]] void RunProcPidReuseChild() {
  2946    struct stat statbuf;
  2947    TEST_PCHECK(stat("/proc/self/cwd", &statbuf) == 0);
  2948    _exit(0);
  2949  }
  2950  
  2951  TEST(ProcFilesystems, ReadCapLastCap) {
  2952    std::string lastCapStr =
  2953        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/cap_last_cap"));
  2954  
  2955    uint64_t lastCap;
  2956    ASSERT_TRUE(absl::SimpleAtoi(lastCapStr, &lastCap));
  2957    EXPECT_TRUE(lastCap > 32 && lastCap < 64);
  2958  }
  2959  
  2960  TEST(ProcFilesystems, OverflowID) {
  2961    std::string overflowGidStr =
  2962        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/overflowgid"));
  2963    std::string overflowUidStr =
  2964        ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/sys/kernel/overflowuid"));
  2965    uint64_t overflowGid, overflowUid;
  2966    ASSERT_TRUE(absl::SimpleAtoi(overflowGidStr, &overflowGid));
  2967    ASSERT_TRUE(absl::SimpleAtoi(overflowUidStr, &overflowUid));
  2968  
  2969    const uint64_t defaultOverflowID = 65534;
  2970    EXPECT_EQ(overflowGid, defaultOverflowID);
  2971    EXPECT_EQ(overflowUid, defaultOverflowID);
  2972  }
  2973  
  2974  }  // namespace
  2975  }  // namespace testing
  2976  }  // namespace gvisor
  2977  
  2978  int main(int argc, char** argv) {
  2979    for (int i = 0; i < argc; ++i) {
  2980      gvisor::testing::saved_argv.emplace_back(std::string(argv[i]));
  2981    }
  2982  
  2983    gvisor::testing::TestInit(&argc, &argv);
  2984  
  2985    if (absl::GetFlag(FLAGS_proc_pid_reuse_child)) {
  2986      gvisor::testing::RunProcPidReuseChild();
  2987    }
  2988  
  2989    return gvisor::testing::RunAllTests();
  2990  }